Python RecallEvaluator Examples

Programming Language: Python

Namespace/Package Name: evaluator

Class/Type: RecallEvaluator

Examples at hotexamples.com: 8

Python RecallEvaluator - 8 examples found. These are the top rated real world Python examples of evaluator.RecallEvaluator extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RecallEvaluator(8)

eval(6)

eval_val(1)

map_mrr_auc_ndcg(1)

precision_recall_ndcg_k(1)

Example #1

Show file

File: CML.py Project: imguozr/CML-Recommender

def optimize(model, sampler, train, valid):
    """
    Optimize the model. TODO: implement early-stopping
    :param model: model to optimize
    :param sampler: mini-batch sampler
    :param train: train user-item matrix
    :param valid: validation user-item matrix
    :return: None
    """

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    saver = tf.train.Saver()  # 用于保存变量

    save_dir = 'checkpoints/'  # 保存目录

    save_path = os.path.join(save_dir, 'model2')

    # if model.feature_projection is not None:
    #     # initialize item embedding with feature projection
    #     sess.run(tf.assign(model.item_embeddings, model.feature_projection))

    # sample some users to calculate recall validation
    valid_users = numpy.random.choice(list(set(valid.nonzero()[0])),
                                      size=1000,
                                      replace=False)

    while True:
        # create evaluator on validation set
        validation_recall = RecallEvaluator(model, train, valid)
        # compute recall on validate set
        valid_recalls = []

        # compute recall in chunks to utilize speedup provided by Tensorflow
        for user_chunk in toolz.partition_all(100, valid_users):
            valid_recalls.extend([validation_recall.eval(sess, user_chunk)])
        print("\nRecall on (sampled) validation set: {}".format(
            numpy.mean(valid_recalls)))
        # TODO: early stopping based on validation recall

        # train model
        losses = []
        # run n mini-batches
        for _ in tqdm(range(EVALUATION_EVERY_N_BATCHES), desc="Optimizing..."):
            user_pos, neg = sampler.next_batch()
            _, loss = sess.run(
                (model.optimize, model.loss), {
                    model.user_positive_items_pairs: user_pos,
                    model.negative_samples: neg
                })

            losses.append(loss)

        print("\nTraining loss {}".format(numpy.mean(losses)))

        saver.save(sess=sess, save_path=save_path)

Example #2

Show file

File: MAML.py Project: xxyy1/MAML

def optimize(model, sampler, train, test, args):
    """
    Optimize the model. TODO: implement early-stopping
    :param model: model to optimize
    :param sampler: mini-batch sampler
    :param train: train user-item matrix
    :param valid: validation user-item matrix
    :return: None
    """
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    #sess.graph.finalize()
    if model.feature_projection is not None:
        # initialize item embedding with feature projection
        sess.run(tf.assign(model.item_embeddings, model.feature_projection))
    # all test users to calculate recall validation
    test_users = numpy.asarray(list(set(test.nonzero()[0])), dtype=numpy.int32)
    testresult = RecallEvaluator(model, train, test)
    epoch = 0
    tempbest = 0
    while True:
        print('\nepochs:{}'.format(epoch))
        epoch += 1
        # TODO: early stopping based on validation recall
        # train model
        losses = []
        feature_losses = []
        # run n mini-batches
        for _ in tqdm(range(args.eva_batches), desc="Optimizing..."):
            user_pos, neg = sampler.next_batch()
            _, loss, feature_loss = sess.run(
                (model.optimize, model.loss, model.feature_loss), {
                    model.user_positive_items_pairs: user_pos,
                    model.negative_samples: neg
                })
            feature_losses.append(feature_loss)
        #tf.train.Saver.save(sess,'my-model')
        # compute recall,ndcg,hr,pr on test set
        test_recalls, test_ndcg, test_hr, test_pr = [], [], [], []
        for user_chunk in toolz.partition_all(100, test_users):
            recalls, ndcgs, hit_ratios, precisions = testresult.eval(
                sess, user_chunk)
            test_recalls.extend(recalls)
            test_ndcg.extend(ndcgs)
            test_hr.extend(hit_ratios)
            test_pr.extend(precisions)
        print("\nresult on test set: ndcg:{},recall:{},hr:{},pr:{}".format(
            sum(test_ndcg) / float(len(test_ndcg)),
            sum(test_recalls) / float(len(test_recalls)),
            sum(test_hr) / float(len(test_hr)),
            sum(test_pr) / float(len(test_pr))))

Example #3

Show file

File: xnmt_evaluate.py Project: shrutijpalaskar/xnmt

def xnmt_evaluate(args):
    """"Returns the eval score (e.g. BLEU) of the hyp sents using reference trg sents
  """
    cols = args.evaluator.split("|")
    eval_type = cols[0]
    eval_param = {} if len(cols) == 1 else {
        key: value
        for key, value in [param.split("=") for param in cols[1].split()]
    }

    hyp_postprocess = lambda line: line.split()
    ref_postprocess = lambda line: line.split()
    if eval_type == "bleu":
        ngram = int(eval_param.get("ngram", 4))
        evaluator = BLEUEvaluator(ngram=int(ngram))
    elif eval_type == "wer":
        evaluator = WEREvaluator()
    elif eval_type == "cer":
        evaluator = CEREvaluator()
    elif eval_type == "recall":
        nbest = int(eval_param.get("nbest", 5))
        hyp_postprocess = lambda x: ast.literal_eval(x)
        ref_postprocess = lambda x: int(x)
        evaluator = RecallEvaluator(nbest=int(nbest))
    else:
        raise RuntimeError("Unknown evaluation metric {}".format(eval_type))

    ref_corpus = read_data(args.ref_file, post_process=ref_postprocess)
    hyp_corpus = read_data(args.hyp_file, post_process=hyp_postprocess)
    len_before = len(hyp_corpus)
    ref_corpus, hyp_corpus = zip(
        *filter(lambda x: NO_DECODING_ATTEMPTED not in x[1],
                zip(ref_corpus, hyp_corpus)))
    if len(ref_corpus) < len_before:
        print("> ignoring %s out of %s test sentences." %
              (len_before - len(ref_corpus), len_before))

    eval_score = evaluator.evaluate(ref_corpus, hyp_corpus)
    return eval_score

Example #4

Show file

File: SCML.py Project: li-study/Social-Collaborative-Metric-Learning

def optimize(model, rating_sampler, social_sampler, train, valid, test):
    """
    Optimize the model.
    :param model: model to optimize
    :param rating_sampler: mini-batch sampler for rating part
    :param social_sampler: mini-batch sampler for social part
    :param train: train user-item matrix
    :param valid: validation user-item matrix
    :param test: test user-item matrix
    :return: None
    """
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())

    # sample some users to calculate recall validation
    valid_users = list(set(valid.nonzero()[0]))
    test_users = list(set(test.nonzero()[0]))

    best_val_hr_list = None
    best_val_ndcg_list = None
    test_hrs = None
    test_ndcgs = None
    best_val_ndcg_20 = -1
    epoch_count = 0
    endure_count = 0
    if manifold_name == 'Euclidean':
        num_user_chunck = 1000
    else:
        num_user_chunck = 100
    while True:
        epoch_count += 1
        endure_count += 1
        start = time.time()
        # create evaluator on validation set
        validation_recall = RecallEvaluator(model, train, valid)
        # compute hr and ndcg on validate set
        valid_hrs = [[], [], [], [], [], []]
        valid_ndcgs = [[], [], [], [], [], []]

        for user_chunk in toolz.partition_all(num_user_chunck, valid_users):
            hrs_1, hrs_5, hrs_10, hrs_15, hrs_20, hrs_50, ndcgs_1, ndcgs_5, ndcgs_10, ndcgs_15, ndcgs_20, ndcgs_50 = validation_recall.eval(
                sess, user_chunk)
            valid_hrs[0].extend(hrs_1)
            valid_hrs[1].extend(hrs_5)
            valid_hrs[2].extend(hrs_10)
            valid_hrs[3].extend(hrs_15)
            valid_hrs[4].extend(hrs_20)
            valid_hrs[5].extend(hrs_50)
            valid_ndcgs[0].extend(ndcgs_1)
            valid_ndcgs[1].extend(ndcgs_5)
            valid_ndcgs[2].extend(ndcgs_10)
            valid_ndcgs[3].extend(ndcgs_15)
            valid_ndcgs[4].extend(ndcgs_20)
            valid_ndcgs[5].extend(ndcgs_50)
        valid_hrs[0] = numpy.mean(valid_hrs[0])
        valid_hrs[1] = numpy.mean(valid_hrs[1])
        valid_hrs[2] = numpy.mean(valid_hrs[2])
        valid_hrs[3] = numpy.mean(valid_hrs[3])
        valid_hrs[4] = numpy.mean(valid_hrs[4])
        valid_hrs[5] = numpy.mean(valid_hrs[5])
        valid_ndcgs[0] = numpy.mean(valid_ndcgs[0])
        valid_ndcgs[1] = numpy.mean(valid_ndcgs[1])
        valid_ndcgs[2] = numpy.mean(valid_ndcgs[2])
        valid_ndcgs[3] = numpy.mean(valid_ndcgs[3])
        valid_ndcgs[4] = numpy.mean(valid_ndcgs[4])
        valid_ndcgs[5] = numpy.mean(valid_ndcgs[5])

        val_ndcg_20 = valid_ndcgs[-2]
        if val_ndcg_20 > best_val_ndcg_20:
            endure_count = 0
            best_val_ndcg_20 = val_ndcg_20
            best_val_hr_list = valid_hrs
            best_val_ndcg_list = valid_ndcgs
            test_hrs = [[], [], [], [], [], []]
            test_ndcgs = [[], [], [], [], [], []]
            test_recall = RecallEvaluator(model, train, test)

            for user_chunk in toolz.partition_all(num_user_chunck, test_users):
                hrs_1, hrs_5, hrs_10, hrs_15, hrs_20, hrs_50, ndcgs_1, ndcgs_5, ndcgs_10, ndcgs_15, ndcgs_20, ndcgs_50 = test_recall.eval(
                    sess, user_chunk)
                test_hrs[0].extend(hrs_1)
                test_hrs[1].extend(hrs_5)
                test_hrs[2].extend(hrs_10)
                test_hrs[3].extend(hrs_15)
                test_hrs[4].extend(hrs_20)
                test_hrs[5].extend(hrs_50)
                test_ndcgs[0].extend(ndcgs_1)
                test_ndcgs[1].extend(ndcgs_5)
                test_ndcgs[2].extend(ndcgs_10)
                test_ndcgs[3].extend(ndcgs_15)
                test_ndcgs[4].extend(ndcgs_20)
                test_ndcgs[5].extend(ndcgs_50)
            test_hrs[0] = numpy.mean(test_hrs[0])
            test_hrs[1] = numpy.mean(test_hrs[1])
            test_hrs[2] = numpy.mean(test_hrs[2])
            test_hrs[3] = numpy.mean(test_hrs[3])
            test_hrs[4] = numpy.mean(test_hrs[4])
            test_hrs[5] = numpy.mean(test_hrs[5])
            test_ndcgs[0] = numpy.mean(test_ndcgs[0])
            test_ndcgs[1] = numpy.mean(test_ndcgs[1])
            test_ndcgs[2] = numpy.mean(test_ndcgs[2])
            test_ndcgs[3] = numpy.mean(test_ndcgs[3])
            test_ndcgs[4] = numpy.mean(test_ndcgs[4])
            test_ndcgs[5] = numpy.mean(test_ndcgs[5])
        else:
            if endure_count >= 10:
                break

        print(
            "\n[Epoch %d] val HR: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f], val NDCG: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f], best val HR: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f]"
            ", best val NDCG: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f], test HR: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f], test NDCG: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f]"
            %
            (epoch_count, valid_hrs[0], valid_hrs[1], valid_hrs[2],
             valid_hrs[3], valid_hrs[4], valid_hrs[5], valid_ndcgs[0],
             valid_ndcgs[1], valid_ndcgs[2], valid_ndcgs[3], valid_ndcgs[4],
             valid_ndcgs[5], best_val_hr_list[0], best_val_hr_list[1],
             best_val_hr_list[2], best_val_hr_list[3], best_val_hr_list[4],
             best_val_hr_list[5], best_val_ndcg_list[0], best_val_ndcg_list[1],
             best_val_ndcg_list[2], best_val_ndcg_list[3],
             best_val_ndcg_list[4], best_val_ndcg_list[5], test_hrs[0],
             test_hrs[1], test_hrs[2], test_hrs[3], test_hrs[4], test_hrs[5],
             test_ndcgs[0], test_ndcgs[1], test_ndcgs[2], test_ndcgs[3],
             test_ndcgs[4], test_ndcgs[5]))

        # train model
        losses = []
        # run n mini-batches
        time1 = time.time()
        for _ in range(EVALUATION_EVERY_N_BATCHES):
            user_pos, neg = rating_sampler.next_batch()
            social_pos, social_neg = social_sampler.next_batch()
            _, loss = sess.run(
                (model.optimize, model.loss), {
                    model.user_positive_items_pairs: user_pos,
                    model.negative_samples: neg,
                    model.positive_social_pairs: social_pos,
                    model.negative_social_samples: social_neg
                })
            losses.append(loss)

        end = time.time()
        print('time1:', time1 - start, ' time2:', end - time1)
        print("\nTraining loss {} finisded in {}s".format(
            numpy.mean(losses), end - start))
    print(
        "\nFinished. Best val HR: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f]"
        ", best val NDCG: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f], test HR: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f], test NDCG: [%.4f,%.4f,%.4f,%.4f,%.4f,%.4f]"
        % (best_val_hr_list[0], best_val_hr_list[1], best_val_hr_list[2],
           best_val_hr_list[3], best_val_hr_list[4], best_val_hr_list[5],
           best_val_ndcg_list[0], best_val_ndcg_list[1], best_val_ndcg_list[2],
           best_val_ndcg_list[3], best_val_ndcg_list[4], best_val_ndcg_list[5],
           test_hrs[0], test_hrs[1], test_hrs[2], test_hrs[3], test_hrs[4],
           test_hrs[5], test_ndcgs[0], test_ndcgs[1], test_ndcgs[2],
           test_ndcgs[3], test_ndcgs[4], test_ndcgs[5]))
    # hyp_user_embeddings, hyp_item_embeddings = sess.run(model.save_embeddings)
    # pkl.dump(hyp_user_embeddings, open(), 'wb'))
    # pkl.dump(hyp_item_embeddings, open(), 'wb'))
    # print('Embeddings Saved.')
    rating_sampler.close()
    social_sampler.close()

Example #5

Show file

File: CMLDR-TN.py Project: wangdi2014/CMLDR

def optimize(model, sampler, train, valid, test):
    """
    Optimize the model. TODO: implement early-stopping
    :param model: model to optimize
    :param sampler: mini-batch sampler
    :param train: train user-item matrix
    :param valid: validation user-item matrix
    :return: None
    """
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    if model.feature_projection is not None:
        # initialize item embedding with feature projection
        sess.run(tf.assign(model.item_embeddings, model.feature_projection))

    valid_users = list(set(valid.nonzero()[0]))

    stop_num = 10
    t_stop_num = 0
    stop_threshold = 0.005
    pre_recall = 0
    k_Mat = [5, 10, 20, 30, 40, 50]
    r_recalls = np.zeros([1, 6])
    r_precisions = np.zeros([1, 6])
    while True:
        # create evaluator on validation set
        validation_recall = RecallEvaluator(model, train, valid)
        valid_recalls, valid_precisions = validation_recall.eval(
            sess, valid_users, 50)

        # TODO: early stopping based on validation recall
        # train model
        losses = []
        # run n mini-batches
        for _ in tqdm(range(EVALUATION_EVERY_N_BATCHES), desc="Optimizing..."):
            user_pos, neg = sampler.next_batch()

            _, loss = sess.run(
                (model.optimize, model.loss), {
                    model.user_positive_items_pairs: user_pos,
                    model.negative_samples: neg
                })

            losses.append(loss)

        if abs(np.mean(valid_recalls) - pre_recall) < stop_threshold:
            t_stop_num = t_stop_num + 1
        else:
            t_stop_num = 0
            pre_recall = np.mean(valid_recalls)
        if t_stop_num > stop_num:
            # performance evaluation based on test set
            test_recall = RecallEvaluator(model, train, test)
            test_users = list(set(test.nonzero()[0]))

            r_aupr = test_recall.eval_val(sess, test_users, test)

            for num_k in range(1, 7):
                k = k_Mat[num_k - 1]
                print("k = ", k, "\n")
                test_recall_r, test_precision_r = test_recall.eval(
                    sess, test_users, k)
                print("\nRecall on (sampled) test set: {}".format(
                    np.mean(test_recall_r)))
                print("\nPrecision on (sampled) test set: {}".format(
                    np.mean(test_precision_r)))
                r_recalls[:, num_k - 1] = np.mean(test_recall_r)
                r_precisions[:, num_k - 1] = np.mean(test_precision_r)
            break

    sess.close()
    return r_recalls, r_precisions, r_aupr

Example #6

Show file

File: Solve.py Project: statusrank/CPE

def optimize(model, sampler, train, valid, total_batch):
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    saver = tf.train.Saver()

    saver.save(sess, os.path.join(model_saving_path, model_name))
    # Sample users to calculate recall validation
    validation_users = set(list(set(valid.nonzero()[0])))
    #validation_users = np.random.choice(list(set(valid.nonzero()[0])),size = len(valid_users),replace = False)
    # early-stopping
    k1, k2 = 30, 50
    epoch = 0
    Recall = RecallEvaluator(model=model,
                             train_user_item_matrix=train,
                             test_user_item_matrix=valid)

    while True:

        # Trian model
        Loss = []
        for i in tqdm(range(total_batch), desc="Training..."):
            user_item_pairs, neg_item = sampler.next_batch()
            _, loss = sess.run(
                (model.optimize, model.loss), {
                    model.user_item_pos_pairs: user_item_pairs,
                    model.neg_items: neg_item
                })
            Loss.append(loss)
        print("epoch :{} loss : {}".format(epoch, np.mean(Loss)))
        epoch += 1
        recalls_k1, precisions_k1, ndcgs_k1 = [], [], []
        recalls_k2, precisions_k2, ndcgs_k2 = [], [], []
        _maps, _mrrs, _aucs = [], [], []
        #validation_users = np.random.choice(list(set(valid.nonzero()[0])),size = VALID_USERS_NUMBERS,
        #                                    replace = False)
        for users_chunk in toolz.partition_all(100, validation_users):
            precision_k1, recall_k1, ndcg_k1 = Recall.precision_recall_ndcg_k(
                sess=sess, users=users_chunk, k=k1)
            precisions_k1.extend(precision_k1)
            recalls_k1.extend(recall_k1)
            ndcgs_k1.extend(ndcg_k1)

            precision_k2, recall_k2, ndcg_k2 = Recall.precision_recall_ndcg_k(
                sess=sess, users=users_chunk, k=k2)
            precisions_k2.extend(precision_k2)
            recalls_k2.extend(recall_k2)
            ndcgs_k2.extend(ndcg_k2)

            _map, _mrr, _auc, _ = Recall.map_mrr_auc_ndcg(sess=sess,
                                                          users=users_chunk)
            _maps.extend(_map)
            _mrrs.extend(_mrr)
            _aucs.extend(_auc)
        print("+" * 20)
        print("P@" + str(k1) + ": {}".format(np.mean(precisions_k1)))
        print("R@" + str(k1) + ": {}".format(np.mean(recalls_k1)))
        print("NDCG@" + str(k1) + ": {}".format(np.mean(ndcgs_k1)))
        print("-" * 20)
        print("P@" + str(k2) + ": {}".format(np.mean(precisions_k2)))
        print("R@" + str(k2) + ": {}".format(np.mean(recalls_k2)))
        print("NDCG@" + str(k2) + ": {}".format(np.mean(ndcgs_k2)))
        print("-" * 20)
        print("MAP: {}".format(np.mean(_maps)))
        print("MRR: {}".format(np.mean(_mrrs)))
        print("AUC: {}".format(np.mean(_aucs)))
        print("+" * 20)
        saver.save(sess, os.path.join(model_saving_path, model_name))
    sess.close()

Example #7

Show file

def optimize(model,
             sampler,
             train,
             valid,
             test,
             args,
             item_neighbors,
             user_neighbors,
             early_stopping_n=5):
    """
    Optimize the model. DONETODO: implement early-stopping
    :param model: model to optimize
    :param sampler: mini-batch sampler
    :param train: train user-item matrix
    :param valid: validation user-item matrix
    :return: None
    """
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    sess.run(tf.global_variables_initializer())
    if model.feature_projection is not None:
        # initialize item embedding with feature projection
        sess.run(tf.assign(model.item_embeddings, model.feature_projection))
    # all test users to calculate recall validation
    valid_users = numpy.asarray(list(set(valid.nonzero()[0])),
                                dtype=numpy.int32)
    test_users = numpy.asarray(list(set(test.nonzero()[0])), dtype=numpy.int32)
    validresult = RecallEvaluator(model, train, valid)
    testresult = RecallEvaluator(model, train, test)

    # 这里较为特殊
    # 每若干批数据训练后,进行一次评估,则记为一轮训练
    # 这是延续cml和maml的历史设计
    # cml为了提升速度,使用多进程并行地
    # 在训练集中取每一批数据
    # 所以难以界定何时便利了所有训练集数据
    # 并且cml使用的并行采样,抛弃最后构不成一批的数据
    # 不能按照传统的方式定义一轮训练
    epoch = 0
    # 用于early stopping的计数
    fail_cnt = 0

    # best_ndcg=-100.0
    best_recall = -100.0
    # best_hr=-100.0
    # best_pr=-100.0
    saver = tf.train.Saver()

    while True:
        print('\nepochs:{}'.format(epoch), file=outputfile)
        epoch += 1
        # train model
        losses = []
        # run n mini-batches
        for _ in tqdm(range(args.eva_batches), desc="Optimizing..."):
            user_pos, neg = sampler.next_batch()
            # print("get next batch",datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3])

            # (N+N*W, 1)
            all_item_ids = numpy.concatenate(
                (user_pos[:, 1], numpy.reshape(neg, (-1))), axis=0)
            # print("concat all item",datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3])

            # 首先去随机数用于后面采样定位
            # 实验中所用的数据集中，用户物品数量都在1000000以内
            # 足够覆盖每一个邻居
            # 若在使用个别更大的数据集，可以相应调大数值
            all_item_neis_id_ranindex = numpy.random.randint(
                1000000, size=all_item_ids.shape[0])

            # 取模后访问列表的元素，
            # 这种实现方式是尝试的多种采样方法中最快的
            # 可以在几毫秒或几十毫秒内完成采样
            # 而其他方法则较慢
            # 采样用户邻居时，可以先转换为numpy的array，可以使用列表进行索引
            # 但是实际效果很慢，不如一个个取数
            # 最后输入到tensorflow中
            # (N+N*W, 1)
            all_item_neis_id_sample = [
                item_neighbors[i][ranindex % len(item_neighbors[i])]
                for i, ranindex in zip(all_item_ids, all_item_neis_id_ranindex)
            ]
            # assert len(all_item_neis_id_sample)==all_item_ids.shape[0]
            # print("sample all item nei done",datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3])
            # assert len(all_item_neis_id_sample[0])==1

            # (N+N*W, 1)
            user_ids = numpy.concatenate(
                (user_pos[:, 0],
                 numpy.tile(numpy.expand_dims(user_pos[:, 0], 1),
                            (1, args.num_neg)).flatten()),
                axis=0)
            user_neis_id_ranindex = numpy.random.randint(
                1000000, size=user_ids.shape[0])
            # (N+N*W, 1)
            user_neis_id_sample = [
                user_neighbors[i][ranindex % len(user_neighbors[i])]
                for i, ranindex in zip(all_item_ids, user_neis_id_ranindex)
            ]
            # print("sample user nei done",datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")[:-3])
            # assert len(user_neis_id_sample[0])==1
            # raise NotImplementedError

            _, loss = sess.run(
                (model.optimize, model.loss), {
                    model.user_positive_items_pairs: user_pos,
                    model.negative_samples: neg,
                    model.all_item_neis_id_sample: all_item_neis_id_sample,
                    model.user_neis_id_sample: user_neis_id_sample,
                })
            losses.append(loss)
        # 训练中的日志同时记录到标准输出和文件中
        print("\nTrain loss: {}".format(numpy.mean(losses)), file=outputfile)
        print("\nTrain loss: {}".format(numpy.mean(losses)))

        # 在验证集进行评估,仅使用recall
        # 其他指标为maml遗留,在评估过程中直接置零,不再计算,减少计算用时
        valid_recalls, valid_ndcg, valid_hr, valid_pr = [], [], [], []
        for user_chunk in toolz.partition_all(100, valid_users):
            recalls, ndcgs, hit_ratios, precisions = validresult.eval(
                sess, user_chunk, item_neighbors, user_neighbors)
            valid_recalls.extend(recalls)
            valid_ndcg.extend(ndcgs)
            valid_hr.extend(hit_ratios)
            valid_pr.extend(precisions)
        ndcg_mean = numpy.mean(valid_ndcg)
        recall_mean = numpy.mean(valid_recalls)
        hr_mean = numpy.mean(valid_hr)
        pr_mean = numpy.mean(valid_pr)
        print("\nresult on valid set: recall:{}".format(recall_mean),
              file=outputfile)
        print("\nresult on valid set: recall:{}".format(recall_mean))

        # 看是否在验证集上过拟合,如果在指定轮数后
        # 在验证集仍未有提升,则触发提前终止
        # 实验中指定的是10轮
        # 每次取得最佳效果后保存模型
        # 供后续在测试集上还原
        if recall_mean <= best_recall:
            fail_cnt += 1
        else:
            # best_ndcg=ndcg_mean
            best_recall = recall_mean
            # best_hr=hr_mean
            # best_pr=pr_mean
            fail_cnt = 0
            saver.save(
                sess,
                os.path.join(os.getcwd(),
                             "models_{:%Y%m%d_%H%M%S}/".format(nowdate),
                             Filename + "_model.ckpt"))
            print("Best result!", file=outputfile)
            print("Best result!")
            # print(saver.last_checkpoints[-1])
        outputfile.flush()
        if fail_cnt >= early_stopping_n:
            break

    # 还原最佳的模型
    # 在测试集进行评估
    # saver.restore(sess, saver.last_checkpoints[-1])
    ckpt_state = tf.train.get_checkpoint_state("./models_20200604_000541")
    with open("test_pred.txt", 'w') as ftest:
        saver.restore(sess, ckpt_state.model_checkpoint_path)
        test_recalls, test_ndcg, test_hr, test_pr = [], [], [], []
        for user_chunk in toolz.partition_all(100, test_users):
            recalls, ndcgs, hit_ratios, precisions = testresult.eval(
                sess, user_chunk, item_neighbors, user_neighbors, ftest=ftest)
            test_recalls.extend(recalls)
            test_ndcg.extend(ndcgs)
            test_hr.extend(hit_ratios)
            test_pr.extend(precisions)
        ndcg_mean = numpy.mean(test_ndcg)
        recall_mean = numpy.mean(test_recalls)
        hr_mean = numpy.mean(test_hr)
        pr_mean = numpy.mean(test_pr)
        print("\nresult on test set: recall:{}".format(recall_mean),
              file=outputfile)
        print("\nresult on test set: recall:{}".format(recall_mean))

Example #8

Show file

File: CML-EN.py Project: SandroLuck/CML-EN

def optimize(model,
             sampler,
             train,
             valid,
             test,
             train_exp_neg,
             valid_exp_neg,
             test_exp_neg,
             epochs=10):
    """
    Optimize the model. TODO: implement early-stopping
    :param model: model to optimize
    :param sampler: mini-batch sampler
    :param train: train user-item matrix
    :param valid: validation user-item matrix
    :param epochs: amount of epochs to run
    :return: None
    """
    merged_summary_op = tf.summary.merge_all()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        #if model.feature_projection is not None:
        # initialize item embedding with feature projection
        #    sess.run(tf.assign(model.item_embeddings, model.feature_projection))

        # sample some users to calculate recall validation
        test_users = numpy.random.choice(list(set(test.nonzero()[0])),
                                         size=1000,
                                         replace=False)

        #Variouse sections which would be needed for tensorboard are commented
        #The reason is that they don't work on the cluster

        # Initiate summary writer and give unique log dir to all
        #logs=str(os.getcwd())+'/train'
        users_name = "_not_named"
        #users_name=raw_input("Enter a name for this runs log:")

        #log_dir=logs+"/iters_"+str(epochs*EVALUATION_EVERY_N_BATCHES)+"_time_"+str(datetime.datetime.now()).replace(" ","_")+"__"+str(users_name)
        #if not os.path.exists(log_dir):
        #    os.makedirs(log_dir)
        # copy the metadata
        #copyfile(logs+"/projector_config.pbtxt", log_dir+"/projector_config.pbtxt")
        #create statistics of the run
        #stat_file=open(log_dir+'/stat_file.dat', 'w+')

        #train_writer = tf.summary.FileWriter(log_dir,
        #                                     graph=tf.get_default_graph())
        #saver = tf.train.Saver()
        # init history to plot with matplot
        history = dict()
        history["Recall"] = []
        history["Prec"] = []
        history["NIT5"] = []
        history["NIT10"] = []

        for x in tqdm(xrange(epochs), desc='Epochs running...'):
            # create evaluator on validation set
            validation_recall = RecallEvaluator(model, train, test,
                                                train_exp_neg, test_exp_neg)
            # compute recall on validate set
            valid_recalls = []
            valid_precision_at_len_test = []
            exp_neg_items_in_top_k = []
            exp_neg_items_in_top_5 = []
            exp_neg_items_in_top_10 = []

            # compute recall in chunks to utilize speedup provided by Tensorflow
            for user_chunk in toolz.partition_all(100, test_users):
                val_recall, val_precision_at_len_test, exp_neg_in_top_5, exp_neg_in_top_10 = validation_recall.eval(
                    sess, user_chunk, k=50)
                valid_recalls.extend([val_recall])
                exp_neg_items_in_top_5.extend([exp_neg_in_top_5])
                exp_neg_items_in_top_10.extend([exp_neg_in_top_10])
                valid_precision_at_len_test.extend([val_precision_at_len_test])
            flatten = lambda l: [item for sublist in l for item in sublist]
            his_rec, hist_prec, his_nit5, hist_nit10 = log_eval_stats_binned(
                flatten(valid_recalls), flatten(valid_precision_at_len_test),
                flatten(exp_neg_items_in_top_k),
                flatten(exp_neg_items_in_top_5),
                flatten(exp_neg_items_in_top_10))

            history["Recall"] += [his_rec]
            history["Prec"] += [hist_prec]
            history["NIT5"] += [his_nit5]
            history["NIT10"] += [hist_nit10]

            #NITK_summary=tf.summary.scalar("NITK", numpy.mean(exp_neg_items_in_top_k))
            # TODO: early stopping based on validation recall
            # train model
            losses = []
            model.merged_summary_op = tf.summary.merge_all()
            #train_writer.add_summary(model.merged_summary_op, x)
            # run n mini-batches
            for i in tqdm(range(EVALUATION_EVERY_N_BATCHES),
                          desc="Optimizing..."):
                user_pos, neg, user_exp_neg, pos_neg_pairs = sampler.next_batch(
                )
                _, loss, summary = sess.run(
                    (model.optimize, model.loss, model.merged_summary_op), {
                        model.user_positive_items_pairs: user_pos,
                        model.negative_samples: neg,
                        model.user_exp_neg_items_pairs: user_exp_neg,
                        model.pos_neg_pairs: pos_neg_pairs
                    })
                #train_writer.add_summary(summary, i + (x * EVALUATION_EVERY_N_BATCHES))
                #saver.save(sess, os.path.join(log_dir, "model.ckpt"), i + (x * EVALUATION_EVERY_N_BATCHES))
                losses.append(loss)
            print "\nEpoch:" + str(x),
            print("\nTraining loss {}".format(numpy.mean(losses)))
        print(10 * "\n")
        print("Training has ended!")
        print(10 * "\n")

        #calculate recall on test set

        validation_recall = RecallEvaluator(model, train, valid, train_exp_neg,
                                            valid_exp_neg)

        valid_users = numpy.random.choice(
            list(set(valid.nonzero()[0])),
            size=int(len(list(set(valid.nonzero()[0])))),
            replace=False)
        printValForAll(valid_users,
                       validation_recall,
                       sess,
                       k=10,
                       nI1=1,
                       nI2=5)
        printValForAll(valid_users,
                       validation_recall,
                       sess,
                       k=20,
                       nI1=10,
                       nI2=20)
        printValForAll(valid_users,
                       validation_recall,
                       sess,
                       k=50,
                       nI1=30,
                       nI2=40)
        printValForAll(valid_users,
                       validation_recall,
                       sess,
                       k=75,
                       nI1=50,
                       nI2=60)
        printValForAll(valid_users,
                       validation_recall,
                       sess,
                       k=100,
                       nI1=70,
                       nI2=80)

        #print 5*"\n"
        #stat_file.writelines("Val_recall at 50 on test set:{}".format(numpy.mean(val_recall))+"\n")
        #stat_file.writelines("NIT5 on test set:{}".format(numpy.mean(exp_neg_in_top_5))+"\n")
        #stat_file.writelines("NIT10 test set:{}".format(numpy.mean(exp_neg_in_top_10))+"\n")
        #stat_file.writelines("Precision at 50 test set: {}".format(numpy.mean(val_precision_at_len_test))+"\n")

        # print "Starting tsne"
        # print model.item_embeddings
        # tsne.tsne(model.item_embeddings, no_dims=2, initial_dims=100, perplexity=30.0)
    print "Starting summary:"
    # plot the current run
    #and ma a file
    pp = PdfPages('LastRunCMLEN_' + "" + '.pdf')
    plt.figure(1)
    plt.xlabel('Epochs, each epoch is iterations: ' +
               str(EVALUATION_EVERY_N_BATCHES))
    plt.title('Recall')
    plt.plot(history["Recall"])
    pp.savefig()

    plt.figure(2)
    plt.xlabel('Epochs, each epoch is iterations: ' +
               str(EVALUATION_EVERY_N_BATCHES))

    plt.title('Precision')
    plt.plot(history["Prec"])
    pp.savefig()

    plt.figure(3)
    plt.xlabel('Epochs, each epoch is iterations: ' +
               str(EVALUATION_EVERY_N_BATCHES))
    plt.title('NIT5')
    plt.plot(history["NIT5"])
    pp.savefig()

    plt.figure(4)
    plt.xlabel('Epochs, each epoch is iterations: ' +
               str(EVALUATION_EVERY_N_BATCHES))
    plt.title('NIT50')
    plt.plot(history["NIT10"])
    pp.savefig()

    # End the session
    pp.close()
    sess.close()
    sampler.close()

    #try:
    #    os.system("tensorboard --logdir="+logs)
    #    "Started tensorboard"
    #except:
    #    print "Sth with your log dir is wrong"

    return