def evaluate(self): """ 产生推荐并通过准确率、召回率和覆盖率等进行评估 :return: """ print("Evaluation start ...") test_user_items = dict() # 推荐 recommed_dict = dict() for user, v in self.testSet.items(): recommed = self.recommend(user) recommed_dict.setdefault(user, list()) for item, score in recommed: recommed_dict[user].append(item) test_user_items[user] = list(v.keys()) item_popularity = dict() for user, v in self.trainSet.items(): items = v.keys() for item in items: if item in item_popularity: item_popularity[item] += 1 else: item_popularity.setdefault(item, 1) precision = metric.precision(recommed_dict, test_user_items) recall = metric.recall(recommed_dict, test_user_items) coverage = metric.coverage(recommed_dict, self.item_set) popularity = metric.popularity(item_popularity, recommed_dict) print( "precision:{:.4f}, recall:{:.4f}, coverage:{:.4f}, popularity:{:.4f}" .format(precision, recall, coverage, popularity)) hit = metric.hit(recommed_dict, test_user_items) print(hit)
def test(train_data, test_data, user_size, item_size, user_bought, item_set, item_popularity): """测试""" with tf.Session() as sess: iterator = tf.data.Iterator.from_structure(train_data.output_types, train_data.output_shapes) model = ncf_model.NCF(FLAGS.embedding_size, user_size, item_size, FLAGS.lr, FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation, FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True) model.build() ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt: model.saver.restore(sess, ckpt.model_checkpoint_path) else: raise ValueError("No model!") sess.run(model.iterator.make_initializer(test_data)) model.is_training = False model.get_data() start_time = time.time() HR, MRR, NDCG = [], [], [] recommed_dict = {} test_user_items = {} try: while True: prediction, items, user = model.step(sess, None) recommed_dict.setdefault(user, prediction) test_user_items.setdefault(user, user_bought[user]) label = int(items[0]) HR.append(metrics.hit(label, prediction)) MRR.append(metrics.mrr(label, prediction)) NDCG.append(metrics.ndcg(label, prediction)) except tf.errors.OutOfRangeError: hr = np.array(HR).mean() mrr = np.array(MRR).mean() ndcg = np.array(NDCG).mean() print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg)) precision = metric.precision(recommed_dict, test_user_items) recall = metric.recall(recommed_dict, test_user_items) coverage = metric.coverage(recommed_dict, item_set) popularity = metric.popularity(item_popularity, recommed_dict) print( "precision is %.3f, recall is %.3f, coverage is %.3f, popularity is %.3f" % (precision, recall, coverage, popularity))
def main(): print("Tesing the performance of ALS...") # Load data train, test, user_set, item_set = read_rating_data(train_rate=0.7) # 得到测试集用户与其所有有正反馈物品集合的映射 test_user_items = dict() test_uids = set() for user, item, _ in test: test_uids.add(user) if user not in test_user_items: test_user_items[user] = set() test_user_items[user].add(item) test_uids = list(test_uids) item_popularity = dict() for user, item, _ in train: if item in item_popularity: item_popularity[item] += 1 else: item_popularity.setdefault(item, 1) # Train model model = ALS() model.fit(train, k=3, max_iter=10) print("Showing the predictions of users...") # Predictions predictions = model.predict(test_uids, n_items=10) # user_ids = range(1, 5) # predictions = model.predict(user_ids, n_items=2) recommed_dict = {} for user_id, prediction in zip(test_uids, predictions): recommed_dict.setdefault(user_id, list()) for item_id, score in prediction: recommed_dict[user_id].append(item_id) precision = metric.precision(recommed_dict, test_user_items) recall = metric.recall(recommed_dict, test_user_items) coverage = metric.coverage(recommed_dict, item_set) popularity = metric.popularity(item_popularity, recommed_dict) print("precision:{:.4f}, recall:{:.4f}, coverage:{:.4f}, popularity:{:.4f}".format(precision, recall, coverage, popularity))
def evaluate(self, train_data, test_data, item_set): """ 产生推荐并通过准确率、召回率和覆盖率等进行评估 :return: """ print("Evaluation start ...") test_user_items = dict() test_uids = set() for user, item, _ in test_data: test_uids.add(user) if user not in test_user_items: test_user_items[user] = set() test_user_items[user].add(item) test_uids = list(test_uids) item_popularity = dict() for user, item, _ in train_data: if item in item_popularity: item_popularity[item] += 1 else: item_popularity.setdefault(item, 1) recommed_dict = {} for uid in test_uids: recommeds = self.recommend(uid, 10) item_ids = [rec[0] for rec in recommeds] recommed_dict.setdefault(uid, item_ids) precision = metric.precision(recommed_dict, test_user_items) recall = metric.recall(recommed_dict, test_user_items) coverage = metric.coverage(recommed_dict, item_set) popularity = metric.popularity(item_popularity, recommed_dict) print( "precision:{:.4f}, recall:{:.4f}, coverage:{:.4f}, popularity:{:.4f}" .format(precision, recall, coverage, popularity))
def train(): data = load_data() item_set = set(data['movie_id'].unique()) SEQ_LEN = 50 # 1.Label Encoding for sparse features,and process sequence features with `gen_date_set` and `gen_model_input` features = ['user_id', 'movie_id', 'gender', 'age', 'occupation', 'zip'] feature_max_idx = {} for feature in features: lbe = LabelEncoder() data[feature] = lbe.fit_transform(data[feature]) + 1 feature_max_idx[feature] = data[feature].max() + 1 user_profile = data[["user_id", "gender", "age", "occupation", "zip"]].drop_duplicates('user_id') item_profile = data[["movie_id"]].drop_duplicates('movie_id') user_profile.set_index("user_id", inplace=True) user_item_list = data.groupby("user_id")['movie_id'].apply(list) train_set, test_set = gen_data_set(data, 0) train_model_input, train_label = gen_model_input(train_set, user_profile, SEQ_LEN) test_model_input, test_label = gen_model_input(test_set, user_profile, SEQ_LEN) # 2.count #unique features for each sparse field and generate feature config for sequence feature embedding_dim = 16 user_feature_columns = [SparseFeat('user_id', feature_max_idx['user_id'], embedding_dim), SparseFeat("gender", feature_max_idx['gender'], embedding_dim), SparseFeat("age", feature_max_idx['age'], embedding_dim), SparseFeat("occupation", feature_max_idx['occupation'], embedding_dim), SparseFeat("zip", feature_max_idx['zip'], embedding_dim), VarLenSparseFeat(SparseFeat('hist_movie_id', feature_max_idx['movie_id'], embedding_dim, embedding_name="movie_id"), SEQ_LEN, 'mean', 'hist_len'), ] item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)] # 3.Define Model and train K.set_learning_phase(True) import tensorflow as tf if tf.__version__ >= '2.0.0': tf.compat.v1.disable_eager_execution() model = YoutubeDNN(user_feature_columns, item_feature_columns, num_sampled=5, user_dnn_hidden_units=(64, embedding_dim)) model.compile(optimizer="adam", loss=sampledsoftmaxloss) # "binary_crossentropy") history = model.fit(train_model_input, train_label, # train_label, batch_size=256, epochs=50, verbose=1, validation_split=0.0, ) # 4. Generate user features for testing and full item features for retrieval test_user_model_input = test_model_input all_item_model_input = {"movie_id": item_profile['movie_id'].values} user_embedding_model = Model(inputs=model.user_input, outputs=model.user_embedding) item_embedding_model = Model(inputs=model.item_input, outputs=model.item_embedding) user_embs = user_embedding_model.predict(test_user_model_input, batch_size=2 ** 12) # user_embs = user_embs[:, i, :] # i in [0,k_max) if MIND item_embs = item_embedding_model.predict(all_item_model_input, batch_size=2 ** 12) # print(user_embs) # print(item_embs) # 5. [Optional] ANN search by faiss and evaluate the result test_true_label = {line[0]: [line[2]] for line in test_set} index = faiss.IndexFlatIP(embedding_dim) # faiss.normalize_L2(item_embs) index.add(item_embs) # faiss.normalize_L2(user_embs) D, I = index.search(np.ascontiguousarray(user_embs), 10) recommed_dict = {} for i, uid in enumerate(test_user_model_input['user_id']): recommed_dict.setdefault(uid, []) try: pred = [item_profile['movie_id'].values[x] for x in I[i]] recommed_dict[uid] = pred except: print(i) test_user_items = dict() for ts in test_set: if ts[0] not in test_user_items: test_user_items[ts[0]] = set(ts[1]) item_popularity = dict() for ts in train_set: for item in ts[1]: if item in item_popularity: item_popularity[item] += 1 else: item_popularity.setdefault(item, 1) precision = metric.precision(recommed_dict, test_user_items) recall = metric.recall(recommed_dict, test_user_items) coverage = metric.coverage(recommed_dict, item_set) popularity = metric.popularity(item_popularity, recommed_dict) print("precision:{:.4f}, recall:{:.4f}, coverage:{:.4f}, popularity:{:.4f}".format(precision, recall, coverage, popularity))