format(time_str)) submit_all = pd.DataFrame() for phase in range(0, conf.now_phase + 1): print('----------------------- phase:{} -------------------------'. format(phase)) if conf.is_recall_cached: one_phase_recall_item_df = \ pd.read_csv(conf.recall_cache_path.format(phase), dtype={'user_id': np.int, 'item_id': np.int}) one_phase_recall_item_df.loc[:, 'user_id'] = one_phase_recall_item_df[ 'user_id'].astype(np.str) one_phase_recall_item_df.loc[:, 'item_id'] = one_phase_recall_item_df[ 'item_id'].astype(np.str) if conf.subsampling: one_phase_recall_item_df = utils.subsampling_user( one_phase_recall_item_df, conf.subsampling) print('load recall items: phase:{} shape:{}'.format( phase, one_phase_recall_item_df.shape[0])) else: if os.path.exists(conf.total_sim_list_path): item_sim_list = pickle.load( open(conf.total_sim_list_path, 'rb')) else: raise Exception('no total item_sim_list') qitme_df = utils.read_qtime(conf.test_path, phase) # raise Exception('qtime召回结果文件不存在') _, recom_item = recall.items_recommod_5164( qitme_df, item_sim_list, all_phase_click_no_qtime, list(hot_df['item_id'])) one_phase_recall_item_df = pd.DataFrame( recom_item, columns=['user_id', 'item_id', 'sim'])
columns=[ 'img_vec{}'.format(i) for i in range(item_img_embedding_dim) ]) ], axis=1) if conf.is_click_cached: all_phase_click_666 = pd.read_csv(conf.click_cache_path, dtype={ 'user_id': np.str, 'item_id': np.str }) ''' sampling ''' if conf.subsampling: all_phase_click_666 = utils.subsampling_user( all_phase_click_666, conf.subsampling) print('load all click, shape:{}'.format(all_phase_click_666.shape)) else: all_phase_click_org = pd.DataFrame() for phase in range(0, conf.now_phase + 1): one_phase_train_click = utils.read_train_click( conf.train_path, phase) one_phase_test_click = utils.read_test_click(conf.test_path, phase) one_phase_qtime = utils.read_qtime(conf.test_path, phase) one_phase_test_click['phase'] = str(phase) one_phase_test_click['train_or_test'] = 'test' one_phase_train_click['phase'] = str(phase) one_phase_train_click['train_or_test'] = 'train' one_phase_qtime['phase'] = str(phase) one_phase_qtime['train_or_test'] = 'predict'
]) ], axis=1) submit_all = pd.DataFrame() click_all = pd.DataFrame() whole_click = pd.DataFrame() for phase in range(0, now_phase + 1): print('----------------------- phase:{} -------------------------'. format(phase)) click_train = utils.read_train_click(train_path, phase) click_test = utils.read_test_click(test_path, phase) ''' sampling ''' if subsampling: click_train = utils.subsampling_user(click_train, subsampling) click_test = utils.subsampling_user(click_test, subsampling) click = click_train.append(click_test) if flag_append: click_all = click_all.append(click) else: click_all = click click_all = click_all.sort_values('time') click_all = click_all.drop_duplicates(['user_id', 'item_id', 'time'], keep='last') # train、test重新划分,并去重 set_pred = set(click_test['user_id'])