def init(self, data, attr, bpr_k=None, bpr_args=None, bpr_model=None): assert sp.isspmatrix_csc(data) self.data = data self.num_users, self.num_items = data.shape self.attr = attr #assert attr.shape[0] >= self.num_items #_, self.num_attrs = attr.shape if bpr_model == None: self.bpr_k = [self.num_users / 5, bpr_k][bpr_k != None] if bpr_args == None: self.bpr_args = bpr.BPRArgs(0.01, 1.0, 0.02125, 0.00355, 0.00355) else: self.bpr_args = bpr_args self.bpr_model = bpr.BPR(self.bpr_k, self.bpr_args) else: self.bpr_model = bpr_model self.bpr_k = bpr_model.D self.bpr_args = bpr.BPRArgs(bpr_model.learning_rate, \ bpr_model.bias_regularization, \ bpr_model.user_regularization, \ bpr_model.positive_item_regularization, \ bpr_model.negative_item_regularization, \ bpr_model.update_negative_item_factors) self.sampler = bpr.UniformUserUniformItem()
female_train_raw = pd.read_csv('input/female_train.csv', header=None).values male_train_match = male_train_raw[male_train_raw[:, 2]==2] male_test_raw = pd.read_csv('input/male_test.csv', header=None).values male_test_match = male_test_raw[male_test_raw[:, 2]==2] male_set = set(male_train_raw[male_train_raw[:, 2]==2, 0]) female_set = set(male_train_raw[male_train_raw[:, 2]==2, 1]) male_to_index = dict(zip(male_set, range(len(male_set)))) female_to_index = dict(zip(female_set, range(len(female_set)))) male_train, male_to_index, female_to_index = utils.load_data_from_array( male_train_raw, male_to_index, female_to_index) male_bpr = bpr.BPR(rank=50, n_users=len(male_to_index), n_items=len(female_to_index), match_weight=1) male_bpr.train(male_train, epochs=3000) female_train, male_to_index, female_to_index = utils.load_data_from_array( female_train_raw, male_to_index, female_to_index) female_bpr = bpr.BPR(rank=50, n_users=len(male_to_index), n_items=len(female_to_index), match_weight=1) female_bpr.train(female_train, epochs=3000) male_prediction = male_bpr.prediction_to_matrix() female_prediction = female_bpr.prediction_to_matrix() male_prediction_scale = np.argsort(np.argsort(male_prediction, axis=1)) female_prediction_scale = np.argsort(np.argsort(female_prediction, axis=1)) male_prediction_plus_scale = male_prediction_scale + female_prediction_scale
# 数据文件 ========================== train_file = 'input/tag_click/train.csv' test_file = 'input/tag_click/test.csv' # 输出文件=========================== prediction_file = 'output/tag_click_pre.json' train_frame = pd.read_csv(train_file) test_frame = pd.read_csv(test_file) training_data, users_to_index, items_to_index = utils.load_data_from_array( train_frame.values) testing_data, users_to_index, items_to_index = utils.load_data_from_array( test_frame.values, users_to_index, items_to_index) bpr = bpr.BPR(10, len(users_to_index.keys()), len(items_to_index.keys())) bpr.train(training_data, epochs=50) prediction = bpr.prediction_to_dict() def data_to_dict(training_data): train_dict = dict() for row in training_data: user, item = row if user not in train_dict: train_dict[user] = dict() train_dict[user][item] = 1 return train_dict
# Model parameters 'n_users': 6040, 'n_items': 3705, 'k': 20, 'lr_u': 0.01, 'lr_i': 0.01, 'lr_j': 0.01, 'regularizers': dict(au=1e-1, av=1e-1), # Model parameters 'sample_method': 'Uniform', # training loop parameters 'max_epochs': 20, 'early_stop_threshold': 0.001, 'early_stopping_lag': 0 } model = bpr.BPR(**best_config_params) rd = data_bpr.prep_data() train_list_uniform = rd.get_training_list(rd.data.reset_index(), 'uniform') model.fit_early_stop(train_list_uniform, best_epoch) test_random = rd.load_sessions_file(config.config.RANDOM_TEST_PATH) test_random['bitClassification'] = test_random.apply( lambda row: infer_triple(model, row['UserID'], row['Item1'], row[ 'Item2']), axis=1) test_random.to_csv(config.config.RANDOM_TEST_OUT) model = bpr.BPR(**best_config_params) train_list_popularity = rd.get_training_list(rd.data.reset_index(), 'distribution') model.fit_early_stop(train_list_popularity, best_epoch)