Beispiel #1
0
 def init(self, data, attr, bpr_k=None, bpr_args=None, bpr_model=None):
     assert sp.isspmatrix_csc(data)
     self.data = data
     self.num_users, self.num_items = data.shape
     self.attr = attr
     #assert attr.shape[0] >= self.num_items
     #_, self.num_attrs = attr.shape
     if bpr_model == None:
         self.bpr_k = [self.num_users / 5, bpr_k][bpr_k != None]
         if bpr_args == None:
             self.bpr_args = bpr.BPRArgs(0.01, 1.0, 0.02125, 0.00355,
                                         0.00355)
         else:
             self.bpr_args = bpr_args
         self.bpr_model = bpr.BPR(self.bpr_k, self.bpr_args)
     else:
         self.bpr_model = bpr_model
         self.bpr_k = bpr_model.D
         self.bpr_args = bpr.BPRArgs(bpr_model.learning_rate, \
             bpr_model.bias_regularization, \
             bpr_model.user_regularization, \
             bpr_model.positive_item_regularization, \
             bpr_model.negative_item_regularization, \
             bpr_model.update_negative_item_factors)
     self.sampler = bpr.UniformUserUniformItem()
female_train_raw = pd.read_csv('input/female_train.csv', header=None).values

male_train_match = male_train_raw[male_train_raw[:, 2]==2]

male_test_raw = pd.read_csv('input/male_test.csv', header=None).values
male_test_match = male_test_raw[male_test_raw[:, 2]==2]

male_set = set(male_train_raw[male_train_raw[:, 2]==2, 0])
female_set = set(male_train_raw[male_train_raw[:, 2]==2, 1])

male_to_index = dict(zip(male_set, range(len(male_set))))
female_to_index = dict(zip(female_set, range(len(female_set))))

male_train, male_to_index, female_to_index = utils.load_data_from_array(
    male_train_raw, male_to_index, female_to_index)
male_bpr = bpr.BPR(rank=50, n_users=len(male_to_index),
              n_items=len(female_to_index), match_weight=1)
male_bpr.train(male_train, epochs=3000)

female_train, male_to_index, female_to_index = utils.load_data_from_array(
    female_train_raw, male_to_index, female_to_index)
female_bpr = bpr.BPR(rank=50, n_users=len(male_to_index),
              n_items=len(female_to_index), match_weight=1)
female_bpr.train(female_train, epochs=3000)

male_prediction = male_bpr.prediction_to_matrix()
female_prediction = female_bpr.prediction_to_matrix()

male_prediction_scale = np.argsort(np.argsort(male_prediction, axis=1))
female_prediction_scale = np.argsort(np.argsort(female_prediction, axis=1))
male_prediction_plus_scale = male_prediction_scale + female_prediction_scale
Beispiel #3
0
# 数据文件 ==========================
train_file = 'input/tag_click/train.csv'
test_file = 'input/tag_click/test.csv'
# 输出文件===========================
prediction_file = 'output/tag_click_pre.json'

train_frame = pd.read_csv(train_file)
test_frame = pd.read_csv(test_file)

training_data, users_to_index, items_to_index = utils.load_data_from_array(
        train_frame.values)
testing_data, users_to_index, items_to_index = utils.load_data_from_array(
        test_frame.values, users_to_index, items_to_index)

bpr = bpr.BPR(10, len(users_to_index.keys()), len(items_to_index.keys()))

bpr.train(training_data, epochs=50)


prediction = bpr.prediction_to_dict()


def data_to_dict(training_data):
    train_dict = dict()
    for row in training_data:
        user, item = row
        if user not in train_dict:
            train_dict[user] = dict()
        train_dict[user][item] = 1
    return train_dict
Beispiel #4
0
        # Model parameters
        'n_users': 6040,
        'n_items': 3705,
        'k': 20,
        'lr_u': 0.01,
        'lr_i': 0.01,
        'lr_j': 0.01,
        'regularizers': dict(au=1e-1, av=1e-1),
        # Model parameters
        'sample_method': 'Uniform',
        # training loop parameters
        'max_epochs': 20,
        'early_stop_threshold': 0.001,
        'early_stopping_lag': 0
    }
    model = bpr.BPR(**best_config_params)
    rd = data_bpr.prep_data()
    train_list_uniform = rd.get_training_list(rd.data.reset_index(), 'uniform')
    model.fit_early_stop(train_list_uniform, best_epoch)

    test_random = rd.load_sessions_file(config.config.RANDOM_TEST_PATH)
    test_random['bitClassification'] = test_random.apply(
        lambda row: infer_triple(model, row['UserID'], row['Item1'], row[
            'Item2']),
        axis=1)
    test_random.to_csv(config.config.RANDOM_TEST_OUT)

    model = bpr.BPR(**best_config_params)
    train_list_popularity = rd.get_training_list(rd.data.reset_index(),
                                                 'distribution')
    model.fit_early_stop(train_list_popularity, best_epoch)