コード例 #1
0
    def evaluate(self):
        """
        产生推荐并通过准确率、召回率和覆盖率等进行评估
        :return:
        """
        print("Evaluation start ...")
        test_user_items = dict()
        # 推荐
        recommed_dict = dict()
        for user, v in self.testSet.items():
            recommed = self.recommend(user)
            recommed_dict.setdefault(user, list())
            for item, score in recommed:
                recommed_dict[user].append(item)
            test_user_items[user] = list(v.keys())

        item_popularity = dict()
        for user, v in self.trainSet.items():
            items = v.keys()
            for item in items:
                if item in item_popularity:
                    item_popularity[item] += 1
                else:
                    item_popularity.setdefault(item, 1)

        precision = metric.precision(recommed_dict, test_user_items)
        recall = metric.recall(recommed_dict, test_user_items)
        coverage = metric.coverage(recommed_dict, self.item_set)
        popularity = metric.popularity(item_popularity, recommed_dict)
        print(
            "precision:{:.4f}, recall:{:.4f}, coverage:{:.4f}, popularity:{:.4f}"
            .format(precision, recall, coverage, popularity))
        hit = metric.hit(recommed_dict, test_user_items)
        print(hit)
コード例 #2
0
def test(train_data, test_data, user_size, item_size, user_bought, item_set,
         item_popularity):
    """测试"""
    with tf.Session() as sess:
        iterator = tf.data.Iterator.from_structure(train_data.output_types,
                                                   train_data.output_shapes)

        model = ncf_model.NCF(FLAGS.embedding_size,
                              user_size,
                              item_size,
                              FLAGS.lr,
                              FLAGS.optim,
                              FLAGS.initializer,
                              FLAGS.loss_func,
                              FLAGS.activation,
                              FLAGS.regularizer,
                              iterator,
                              FLAGS.topK,
                              FLAGS.dropout,
                              is_training=True)

        model.build()
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        if ckpt:
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            raise ValueError("No model!")

        sess.run(model.iterator.make_initializer(test_data))
        model.is_training = False
        model.get_data()
        start_time = time.time()
        HR, MRR, NDCG = [], [], []
        recommed_dict = {}
        test_user_items = {}
        try:
            while True:
                prediction, items, user = model.step(sess, None)
                recommed_dict.setdefault(user, prediction)
                test_user_items.setdefault(user, user_bought[user])
                label = int(items[0])
                HR.append(metrics.hit(label, prediction))
                MRR.append(metrics.mrr(label, prediction))
                NDCG.append(metrics.ndcg(label, prediction))
        except tf.errors.OutOfRangeError:
            hr = np.array(HR).mean()
            mrr = np.array(MRR).mean()
            ndcg = np.array(NDCG).mean()

            print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg))

            precision = metric.precision(recommed_dict, test_user_items)
            recall = metric.recall(recommed_dict, test_user_items)
            coverage = metric.coverage(recommed_dict, item_set)
            popularity = metric.popularity(item_popularity, recommed_dict)
            print(
                "precision is %.3f, recall is %.3f, coverage is %.3f, popularity is %.3f"
                % (precision, recall, coverage, popularity))
コード例 #3
0
def main():
    print("Tesing the performance of ALS...")
    # Load data
    train, test, user_set, item_set = read_rating_data(train_rate=0.7)

    # 得到测试集用户与其所有有正反馈物品集合的映射
    test_user_items = dict()
    test_uids = set()
    for user, item, _ in test:
        test_uids.add(user)
        if user not in test_user_items:
            test_user_items[user] = set()
        test_user_items[user].add(item)
    test_uids = list(test_uids)

    item_popularity = dict()
    for user, item, _ in train:
        if item in item_popularity:
            item_popularity[item] += 1
        else:
            item_popularity.setdefault(item, 1)

    # Train model
    model = ALS()
    model.fit(train, k=3, max_iter=10)

    print("Showing the predictions of users...")
    # Predictions
    predictions = model.predict(test_uids, n_items=10)

    # user_ids = range(1, 5)
    # predictions = model.predict(user_ids, n_items=2)
    recommed_dict = {}
    for user_id, prediction in zip(test_uids, predictions):
        recommed_dict.setdefault(user_id, list())
        for item_id, score in prediction:
            recommed_dict[user_id].append(item_id)
    precision = metric.precision(recommed_dict, test_user_items)
    recall = metric.recall(recommed_dict, test_user_items)
    coverage = metric.coverage(recommed_dict, item_set)
    popularity = metric.popularity(item_popularity, recommed_dict)

    print("precision:{:.4f}, recall:{:.4f}, coverage:{:.4f}, popularity:{:.4f}".format(precision, recall, coverage, popularity))
コード例 #4
0
    def evaluate(self, train_data, test_data, item_set):
        """
        产生推荐并通过准确率、召回率和覆盖率等进行评估
        :return:
        """
        print("Evaluation start ...")
        test_user_items = dict()
        test_uids = set()
        for user, item, _ in test_data:
            test_uids.add(user)
            if user not in test_user_items:
                test_user_items[user] = set()
            test_user_items[user].add(item)
        test_uids = list(test_uids)

        item_popularity = dict()

        for user, item, _ in train_data:
            if item in item_popularity:
                item_popularity[item] += 1
            else:
                item_popularity.setdefault(item, 1)

        recommed_dict = {}
        for uid in test_uids:
            recommeds = self.recommend(uid, 10)
            item_ids = [rec[0] for rec in recommeds]
            recommed_dict.setdefault(uid, item_ids)

        precision = metric.precision(recommed_dict, test_user_items)
        recall = metric.recall(recommed_dict, test_user_items)
        coverage = metric.coverage(recommed_dict, item_set)
        popularity = metric.popularity(item_popularity, recommed_dict)
        print(
            "precision:{:.4f}, recall:{:.4f}, coverage:{:.4f}, popularity:{:.4f}"
            .format(precision, recall, coverage, popularity))
コード例 #5
0
def train():
    data = load_data()
    item_set = set(data['movie_id'].unique())
    SEQ_LEN = 50

    # 1.Label Encoding for sparse features,and process sequence features with `gen_date_set` and `gen_model_input`
    features = ['user_id', 'movie_id', 'gender', 'age', 'occupation', 'zip']
    feature_max_idx = {}
    for feature in features:
        lbe = LabelEncoder()
        data[feature] = lbe.fit_transform(data[feature]) + 1
        feature_max_idx[feature] = data[feature].max() + 1

    user_profile = data[["user_id", "gender", "age", "occupation", "zip"]].drop_duplicates('user_id')

    item_profile = data[["movie_id"]].drop_duplicates('movie_id')

    user_profile.set_index("user_id", inplace=True)

    user_item_list = data.groupby("user_id")['movie_id'].apply(list)

    train_set, test_set = gen_data_set(data, 0)

    train_model_input, train_label = gen_model_input(train_set, user_profile, SEQ_LEN)

    test_model_input, test_label = gen_model_input(test_set, user_profile, SEQ_LEN)

    # 2.count #unique features for each sparse field and generate feature config for sequence feature

    embedding_dim = 16

    user_feature_columns = [SparseFeat('user_id', feature_max_idx['user_id'], embedding_dim),
                            SparseFeat("gender", feature_max_idx['gender'], embedding_dim),
                            SparseFeat("age", feature_max_idx['age'], embedding_dim),
                            SparseFeat("occupation", feature_max_idx['occupation'], embedding_dim),
                            SparseFeat("zip", feature_max_idx['zip'], embedding_dim),
                            VarLenSparseFeat(SparseFeat('hist_movie_id', feature_max_idx['movie_id'], embedding_dim,
                                                        embedding_name="movie_id"), SEQ_LEN, 'mean', 'hist_len'),
                            ]

    item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)]

    # 3.Define Model and train

    K.set_learning_phase(True)
    import tensorflow as tf
    if tf.__version__ >= '2.0.0':
        tf.compat.v1.disable_eager_execution()

    model = YoutubeDNN(user_feature_columns, item_feature_columns, num_sampled=5,
                       user_dnn_hidden_units=(64, embedding_dim))

    model.compile(optimizer="adam", loss=sampledsoftmaxloss)  # "binary_crossentropy")

    history = model.fit(train_model_input, train_label,  # train_label,
                        batch_size=256, epochs=50, verbose=1, validation_split=0.0, )

    # 4. Generate user features for testing and full item features for retrieval
    test_user_model_input = test_model_input
    all_item_model_input = {"movie_id": item_profile['movie_id'].values}

    user_embedding_model = Model(inputs=model.user_input, outputs=model.user_embedding)
    item_embedding_model = Model(inputs=model.item_input, outputs=model.item_embedding)

    user_embs = user_embedding_model.predict(test_user_model_input, batch_size=2 ** 12)
    # user_embs = user_embs[:, i, :]  # i in [0,k_max) if MIND
    item_embs = item_embedding_model.predict(all_item_model_input, batch_size=2 ** 12)

    # print(user_embs)
    # print(item_embs)

    # 5. [Optional] ANN search by faiss  and evaluate the result

    test_true_label = {line[0]: [line[2]] for line in test_set}


    index = faiss.IndexFlatIP(embedding_dim)
    # faiss.normalize_L2(item_embs)
    index.add(item_embs)
    # faiss.normalize_L2(user_embs)
    D, I = index.search(np.ascontiguousarray(user_embs), 10)

    recommed_dict = {}
    for i, uid in enumerate(test_user_model_input['user_id']):
        recommed_dict.setdefault(uid, [])
        try:
            pred = [item_profile['movie_id'].values[x] for x in I[i]]
            recommed_dict[uid] = pred
        except:
            print(i)

    test_user_items = dict()
    for ts in test_set:
        if ts[0] not in test_user_items:
            test_user_items[ts[0]] = set(ts[1])
    item_popularity = dict()
    for ts in train_set:
        for item in ts[1]:
            if item in item_popularity:
                item_popularity[item] += 1
            else:
                item_popularity.setdefault(item, 1)

    precision = metric.precision(recommed_dict, test_user_items)
    recall = metric.recall(recommed_dict, test_user_items)
    coverage = metric.coverage(recommed_dict, item_set)
    popularity = metric.popularity(item_popularity, recommed_dict)

    print("precision:{:.4f}, recall:{:.4f}, coverage:{:.4f}, popularity:{:.4f}".format(precision, recall, coverage,
                                                                                       popularity))