예제 #1
0
       format(time_str))
 submit_all = pd.DataFrame()
 for phase in range(0, conf.now_phase + 1):
     print('----------------------- phase:{} -------------------------'.
           format(phase))
     if conf.is_recall_cached:
         one_phase_recall_item_df = \
             pd.read_csv(conf.recall_cache_path.format(phase), dtype={'user_id': np.int, 'item_id': np.int})
         one_phase_recall_item_df.loc[:,
                                      'user_id'] = one_phase_recall_item_df[
                                          'user_id'].astype(np.str)
         one_phase_recall_item_df.loc[:,
                                      'item_id'] = one_phase_recall_item_df[
                                          'item_id'].astype(np.str)
         if conf.subsampling:
             one_phase_recall_item_df = utils.subsampling_user(
                 one_phase_recall_item_df, conf.subsampling)
         print('load recall items: phase:{} shape:{}'.format(
             phase, one_phase_recall_item_df.shape[0]))
     else:
         if os.path.exists(conf.total_sim_list_path):
             item_sim_list = pickle.load(
                 open(conf.total_sim_list_path, 'rb'))
         else:
             raise Exception('no total item_sim_list')
         qitme_df = utils.read_qtime(conf.test_path, phase)
         # raise Exception('qtime召回结果文件不存在')
         _, recom_item = recall.items_recommod_5164(
             qitme_df, item_sim_list, all_phase_click_no_qtime,
             list(hot_df['item_id']))
         one_phase_recall_item_df = pd.DataFrame(
             recom_item, columns=['user_id', 'item_id', 'sim'])
예제 #2
0
파일: train.py 프로젝트: lacus577/name
                     columns=[
                         'img_vec{}'.format(i)
                         for i in range(item_img_embedding_dim)
                     ])
    ],
                             axis=1)

    if conf.is_click_cached:
        all_phase_click_666 = pd.read_csv(conf.click_cache_path,
                                          dtype={
                                              'user_id': np.str,
                                              'item_id': np.str
                                          })
        ''' sampling '''
        if conf.subsampling:
            all_phase_click_666 = utils.subsampling_user(
                all_phase_click_666, conf.subsampling)
        print('load all click, shape:{}'.format(all_phase_click_666.shape))
    else:
        all_phase_click_org = pd.DataFrame()
        for phase in range(0, conf.now_phase + 1):
            one_phase_train_click = utils.read_train_click(
                conf.train_path, phase)
            one_phase_test_click = utils.read_test_click(conf.test_path, phase)
            one_phase_qtime = utils.read_qtime(conf.test_path, phase)

            one_phase_test_click['phase'] = str(phase)
            one_phase_test_click['train_or_test'] = 'test'
            one_phase_train_click['phase'] = str(phase)
            one_phase_train_click['train_or_test'] = 'train'
            one_phase_qtime['phase'] = str(phase)
            one_phase_qtime['train_or_test'] = 'predict'
예제 #3
0
파일: train.py 프로젝트: lacus577/name
                     ])
    ],
                             axis=1)

    submit_all = pd.DataFrame()
    click_all = pd.DataFrame()
    whole_click = pd.DataFrame()
    for phase in range(0, now_phase + 1):
        print('----------------------- phase:{} -------------------------'.
              format(phase))

        click_train = utils.read_train_click(train_path, phase)
        click_test = utils.read_test_click(test_path, phase)
        ''' sampling '''
        if subsampling:
            click_train = utils.subsampling_user(click_train, subsampling)
            click_test = utils.subsampling_user(click_test, subsampling)

        click = click_train.append(click_test)

        if flag_append:
            click_all = click_all.append(click)
        else:
            click_all = click

        click_all = click_all.sort_values('time')
        click_all = click_all.drop_duplicates(['user_id', 'item_id', 'time'],
                                              keep='last')

        # train、test重新划分,并去重
        set_pred = set(click_test['user_id'])