Beispiel #1
0
def test_graph(hyperparams, nodes, config=None):
    nodes = scoring_and_counting(hyperparams, nodes, config=config)
    metric_values = {
        "MRR_movie": metrics.mrr(nodes["movie_higher_values"]),
        "HITS@10_movie": metrics.hits_n(nodes["movie_higher_values"], 10),
        "HITS@3_movie": metrics.hits_n(nodes["movie_higher_values"], 3),
        "HITS@1_movie": metrics.hits_n(nodes["movie_higher_values"], 1),
        "MRR_r": metrics.mrr(nodes["rating_higher_values"]),
        "HITS@5_r": metrics.hits_n(nodes["rating_higher_values"], 5),
        "HITS@3_r": metrics.hits_n(nodes["rating_higher_values"], 3),
        "HITS@2_r": metrics.hits_n(nodes["rating_higher_values"], 2),
        "HITS@1_r": metrics.hits_n(nodes["rating_higher_values"], 1)
    }
    nodes.update(metric_values)

    summaries = [tf.summary.scalar(k, v) for k, v in metric_values.items()] + [
        tf.summary.histogram("rating score rankings",
                             nodes["rating_higher_values"]),
        tf.summary.histogram("movie score rankings",
                             nodes["movie_higher_values"])
    ]

    nodes["test_summary"] = tf.summary.merge(summaries)

    return nodes
Beispiel #2
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred", 
                  format="trec",
                  th=50, 
                  verbose=False,
                  reranking_th=-100.0,
                  ignore_noanswer=False, ignore_allanswer=False):
	ir, svm = read_res_pred_files(res_fname, pred_fname, format, verbose, 
		                              reranking_th=reranking_th, 
		                              ignore_noanswer=ignore_noanswer,
								  	ignore_allanswer=ignore_allanswer)

	# evaluate IR
	prec_se = metrics.recall_of_1(ir, th)
	acc_se = metrics.accuracy(ir, th)
	acc_se1 = metrics.accuracy1(ir, th)
	acc_se2 = metrics.accuracy2(ir, th)

	# evaluate SVM
	prec_svm = metrics.recall_of_1(svm, th)
	acc_svm = metrics.accuracy(svm, th)
	acc_svm1 = metrics.accuracy1(svm, th)
	acc_svm2 = metrics.accuracy2(svm, th)

	mrr_se = metrics.mrr(ir, th)
	mrr_svm = metrics.mrr(svm, th)
	map_se = metrics.map(ir)
	map_svm = metrics.map(svm)

	avg_acc1_svm = metrics.avg_acc1(svm, th)
	avg_acc1_ir = metrics.avg_acc1(ir, th)

	'''
	print "%13s %5s" %("IR", "SVM")
	print "MRR: %5.2f %5.2f" %(mrr_se, mrr_svm)
	print "MAP: %5.4f %5.4f" %(map_se, map_svm)
	print "AvgRec: %5.2f %5.2f" %(avg_acc1_ir, avg_acc1_svm)
	print "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SVM", "IR", "SVM", "IR", "SVM", "IR", "SVM")
	'''
	rec1_se =-10
	rec1_svm = -10
	for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
		#print "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2)
		if (rec1_se<-5):
			rec1_se = p_se
			rec1_svm = p_svm


	'''
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
	'''

	print "Table view"
	print "	MRR	MAP	P@1"
	print "REF_FILE	%5.2f	%5.2f	%5.2f" % (mrr_se, map_se*100, rec1_se)
	print "SVM	%5.2f	%5.2f	%5.2f" % (mrr_svm, map_svm*100, rec1_svm)
Beispiel #3
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print "acc\tf1\tMAP\tMRR\tAvgRec"
    print "%.4f %4.4f %4.4f %4.4f %4.4f" % (acc, f1, map_svm, mrr_svm,
                                            avg_acc1_svm)
Beispiel #4
0
def eval_reranker(resPredIterable,
                  th=10,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(resPredIterable, reranking_th=reranking_th,
                                      ignore_noanswer=ignore_noanswer)        
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (conf_matrix['true']['true'] + conf_matrix['false']['false']) / (conf_matrix['true']['true'] + conf_matrix['false']['false'] + conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    mrr_svm = metrics.mrr(svm, th)
    map_svm = metrics.map(svm, th)

    scores = {}
    scores['map'] = map_svm
    scores['accuracy'] = acc
    scores['precision'] = p
    scores['recall'] = r
    scores['f1'] = f1
    scores['mrr'] = mrr_svm
    return scores
Beispiel #5
0
def eval_search_engine(res_fname, format, th=10):
    ir = read_res_file(res_fname, format)

    # evaluate IR
    rec = metrics.recall_of_1(ir, th)
    acc = metrics.accuracy(ir, th)
    acc1 = metrics.accuracy1(ir, th)
    acc2 = metrics.accuracy2(ir, th)

    mrr = metrics.mrr(ir, th)

    print("%13s" % "IR")
    print("MRRof1: %5.2f" % mrr)
    for i, (r, a, a1, a2) in enumerate(zip(rec, acc, acc1, acc2), 1):
        print(
            "REC-1@%02d: %6.2f  ACC@%02d: %6.2f  AC1@%02d: %6.2f  AC2@%02d: %4.0f"
            % (i, r, i, a, i, a1, i, a2))
    print()
    print(
        "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
    )
    print(
        "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
    )
    print(
        "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
    )
    print("AC2   - the absolute number of correct answers at @X")
Beispiel #6
0
def eval_search_engine(res_fname, format, th=50):
	ir = read_res_file(res_fname, format)		

	# evaluate IR
	rec = metrics.recall_of_1(ir, th)
	acc = metrics.accuracy(ir, th)
	acc1 = metrics.accuracy1(ir, th)
	acc2 = metrics.accuracy2(ir, th)

	mrr = metrics.mrr(ir, th)

  # MAP
	map_ir = metrics.map(ir)
  

	print "%10s" %"IR"
	print "MRR: %5.2f" % mrr
	print "MAP: %5.2f" % map_ir
	for i, (r, a, a1, a2) in enumerate(zip(rec, acc, acc1, acc2), 1):
		print "REC-1@%02d: %6.2f  ACC@%02d: %6.2f  AC1@%02d: %6.2f  AC2@%02d: %4.0f" %(i, r, i, a, i, a1, i, a2)
	print
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
Beispiel #7
0
def get_cv_evaluation_results(qid_aid_label_list, y_pred):
    predictions_dict = get_cv_ranked_predictions_dict(qid_aid_label_list,
                                                      y_pred)
    logging.debug("Num of questions: %d" % (len(predictions_dict)))

    mrr_score = m.mrr(predictions_dict, 1000)
    map_score = m.map(predictions_dict) * 100
    p1_score = m.recall_of_1(predictions_dict, 1000)[0]

    return mrr_score, map_score, p1_score
Beispiel #8
0
def infer(train_data, test_data, user_size, item_size):
	config = tf.ConfigProto()
	config.gpu_options.allow_growth = True

	with tf.Session(config=config) as sess:

		############################### CREATE MODEL #############################
		iterator = tf.data.Iterator.from_structure(train_data.output_types, 
								train_data.output_shapes)
		model = NCF.NCF(FLAGS.embedding_size, user_size, item_size,	FLAGS.lr, 
				FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation, 
				FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True)
		model.build()
		# train_init_op = iterator.make_initializer(train_data)

		ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
		if ckpt:
			print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
			model.saver.restore(sess, ckpt.model_checkpoint_path)
		else:
			print("model files do not exist")
			exit(1)
		
		############################### Training ####################################
		 
		total_time = 0
		count = 0
		for epoch in range(FLAGS.epochs):
			 
		################################ EVALUATION ##################################
			sess.run(model.iterator.make_initializer(test_data))
			model.is_training = False
			HR, MRR, NDCG = [], [], []
			start_time = time.time()
			try:
				while True:
					prediction, label = model.step(sess, None)
					count = count + 1

					label = int(label[0])
					HR.append(metrics.hit(label, prediction))
					MRR.append(metrics.mrr(label, prediction))
					NDCG.append(metrics.ndcg(label, prediction))
			except tf.errors.OutOfRangeError:
				hr = np.array(HR).mean()
				mrr = np.array(MRR).mean()
				ndcg = np.array(NDCG).mean()
				print("Epoch %d testing  " %epoch + "Took: " + time.strftime("%H: %M: %S", 
									time.gmtime(time.time() - start_time)))
				print("HR is %.3f, MRR is %.3f, NDCG is %.3f" %(hr, mrr, ndcg))
			total_time += time.time() - start_time
		print("Total Epochs: %d on inference " %(epoch+1))
		print("Total recommendations: %d" % (count * FLAGS.batch_size))
		print("Approximate accelerator time in seconds is: %.2f" % total_time)
		print("Approximate accelerator performance in recommendations/second is: %.2f" % (float(count * FLAGS.batch_size)/float(total_time)))
Beispiel #9
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred", 
                  format="trec",
                  th=50, 
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
	ir, svm = read_res_pred_files(res_fname, pred_fname, format, verbose, 
		                              reranking_th=reranking_th, 
		                              ignore_noanswer=ignore_noanswer)		
	# evaluate IR
	prec_se = metrics.recall_of_1(ir, th)
	acc_se = metrics.accuracy(ir, th)
	acc_se1 = metrics.accuracy1(ir, th)
	acc_se2 = metrics.accuracy2(ir, th)

	# evaluate SVM
	prec_svm = metrics.recall_of_1(svm, th)
	acc_svm = metrics.accuracy(svm, th)
	acc_svm1 = metrics.accuracy1(svm, th)
	acc_svm2 = metrics.accuracy2(svm, th)

	mrr_se = metrics.mrr(ir, th)
	mrr_svm = metrics.mrr(svm, th)
	map_se = metrics.map(ir)
	map_svm = metrics.map(svm)

	avg_acc1_svm = metrics.avg_acc1(svm, th)
	avg_acc1_ir = metrics.avg_acc1(ir, th)

	print "%13s %5s" %("IR", "SVM")
	print "MRR: %5.2f %5.2f" %(mrr_se, mrr_svm)
	print "MAP: %5.4f %5.4f" %(map_se, map_svm)
	print "AvgRec: %5.2f %5.2f" %(avg_acc1_ir, avg_acc1_svm)
	print "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SVM", "IR", "SVM", "IR", "SVM", "IR", "SVM")
	for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
		print "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2)
	print
	print "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks were questions have at most one correct answer)"
	print "ACC   - accuracy, i.e. number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
	print "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
	print "AC2   - the absolute number of correct answers at @X"
Beispiel #10
0
def get_evaluation_results(df,
                           y_pred,
                           skip_all_positives_and_all_negatives=True):
    predictions_dict = get_ranked_predictions_dict(
        df,
        y_pred,
        skip_all_positives_and_all_negatives=
        skip_all_positives_and_all_negatives)
    logging.debug("Num of questions: %d" % (len(predictions_dict)))

    mrr_score = m.mrr(predictions_dict, 1000)
    map_score = m.map(predictions_dict) * 100
    p1_score = m.recall_of_1(predictions_dict, 1000)[0]

    return mrr_score, map_score, p1_score
Beispiel #11
0
def calc_metrics(file):
    all_predictions = []
    all_labels = []
    impressions = read_impressions_file(file)
    if sample_size > 0:
        impressions = random.sample(impressions, sample_size)
    for i, impression in enumerate(impressions):
        preds, labels = calc_impression(impression)
        all_predictions.append(preds)
        all_labels.append(labels)
        if i % 100 == 99:
            print("Completed {} / {}".format(i + 1, len(impressions)))

    metrics = {
        "auc": group_auc(all_predictions, all_labels),
        "mrr": mrr(all_predictions, all_labels),
        "ndcg@5": ndcg(all_predictions, all_labels, 5),
        "ndcg@10": ndcg(all_predictions, all_labels, 10)
    }
    return metrics
Beispiel #12
0
def eval_model(model, data_loader, sample_prob=1.0, train=False):
    sample_data = data_loader.sample_valid_data(sample_prob, train=train)
    with torch.no_grad():

        all_predictions = []
        all_labels = []

        for impression in sample_data:
            user_ids, news_ids, _, _, _, labels = impression
            prediction = model(user_ids, news_ids).view(-1)
            all_predictions.append(prediction.detach().numpy())
            all_labels.append(labels.detach().numpy())

        metrics = {
            "auc": group_auc(all_predictions, all_labels),
            "mrr": mrr(all_predictions, all_labels),
            "ndcg@5": ndcg(all_predictions, all_labels, 5),
            "ndcg@10": ndcg(all_predictions, all_labels, 10)
        }
        print(metrics)
Beispiel #13
0
def cal_score(ref_lines, probs):
    reranking_th = 0.0
    line_count = 0
    pred_lines = defaultdict(list)
    for ref_line in ref_lines:
        qid, aid, lbl = ref_line[0], ref_line[1], ref_line[2]
        pred_lines[qid].append((lbl, probs[line_count][0], aid))
        line_count += 1
    # for qid in pred_lines.keys():
    #     candidates = pred_lines[qid]
    #     if all(relevant == "false" for relevant, _, _ in candidates):
    #         del pred_lines[qid]
    for qid in pred_lines.keys():
        pred_sorted = pred_lines[qid]
        max_score = max([score for rel, score, aid in pred_sorted])
        if max_score >= reranking_th:
            pred_sorted = sorted(pred_sorted, key=itemgetter(1), reverse=True)
        pred_lines[qid] = [rel for rel, score, aid in pred_sorted]
    MAP = metrics.map(pred_lines, 10)
    MRR = metrics.mrr(pred_lines, 10)
    return MAP, MRR
Beispiel #14
0
Datei: run.py Projekt: PTYin/ESRT
def test(model, sess, test_data, all_items_idx, user_bought):
    model.is_training = False
    model.test_first = True
    all_items_embed = []
    HR, MRR, NDCG = [], [], []

    ########################## GET ALL ITEM EMBEDDING ONCE ######################
    for sample in test_data.get_all_test():
        item_embed = model.step(sess, sample, None, None)
        all_items_embed.append(item_embed[0][0])

    model.test_first = False
    all_items_embed = np.array(all_items_embed)

    ########################## TEST FOR EACH USER QUERY PAIR #####################
    for sample in test_data.get_instance():
        item_indices = model.step(sess, sample, all_items_embed, None)[0]
        itemID = sample[3]
        reviewerID = sample[4]

        ranking_list = all_items_idx[item_indices].tolist()

        top_idx = []
        u_bought = user_bought[reviewerID] if reviewerID in user_bought else []
        while len(
                top_idx
        ) < FLAGS.topK:  # delete those items already bought by the user
            candidate_item = ranking_list.pop()
            if candidate_item not in u_bought or candidate_item == itemID:
                top_idx.append(candidate_item)
        top_idx = np.array(top_idx)

        HR.append(metrics.hit(itemID, top_idx))
        MRR.append(metrics.mrr(itemID, top_idx))
        NDCG.append(metrics.ndcg(itemID, top_idx))

    hr = np.array(HR).mean()
    mrr = np.array(MRR).mean()
    ndcg = np.array(NDCG).mean()
    print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg))
Beispiel #15
0
def train(train_data,test_data,user_size,item_size):
    with tf.Session() as sess:
        iterator = tf.data.Iterator.from_structure(train_data.output_types,
                                                   train_data.output_shapes)

        model = NCF.NCF(FLAGS.embedding_size, user_size, item_size, FLAGS.lr,
                        FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation,
                        FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True)

        model.build()

        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        if ckpt:
            print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("Creating model with fresh parameters.")
            sess.run(tf.global_variables_initializer())


        count = 0
        for epoch in range(FLAGS.epochs):
            sess.run(model.iterator.make_initializer(train_data))
            model.is_training = True
            model.get_data()
            start_time = time.time()

            try:
                while True:
                    model.step(sess, count)
                    count += 1
            except tf.errors.OutOfRangeError:
                print("Epoch %d training " % epoch + "Took: " + time.strftime("%H: %M: %S",
                                                                              time.gmtime(time.time() - start_time)))



            sess.run(model.iterator.make_initializer(test_data))
            model.is_training = False
            model.get_data()
            start_time = time.time()
            HR,MRR,NDCG = [],[],[]
            prediction, label = model.step(sess, None)
            try:
                while True:
                    prediction, label = model.step(sess, None)

                    label = int(label[0])
                    HR.append(metrics.hit(label, prediction))
                    MRR.append(metrics.mrr(label, prediction))
                    NDCG.append(metrics.ndcg(label, prediction))
            except tf.errors.OutOfRangeError:
                hr = np.array(HR).mean()
                mrr = np.array(MRR).mean()
                ndcg = np.array(NDCG).mean()
                print("Epoch %d testing  " % epoch + "Took: " + time.strftime("%H: %M: %S",
                                                                              time.gmtime(time.time() - start_time)))
                print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg))

        ################################## SAVE MODEL ################################
        checkpoint_path = os.path.join(FLAGS.model_dir, "NCF.ckpt")
        model.saver.save(sess, checkpoint_path)
Beispiel #16
0
def stats_cv(path=".",  format="trec", prefix="svm", th=50, verbose=False):
  mrrs_se = []
  mrrs_svm = []
  abs_mrrs = []
  rel_mrrs = []

  maps_se = []
  maps_svm = []
  abs_maps = []
  rel_maps = []

  recalls1_se = []
  recalls1_svm = []
  abs_recalls = []
  rel_recalls = []

  oracle_mrrs = []
  oracle_maps = []
  oracle_recs1 = []

  num_folds = 0
  
  print "%13s %5s %7s %7s" %("IR", "SVM", "(abs)", "(rel)")
  for fold in sorted(os.listdir(path)):
    currentFold = os.path.join(path, fold)
    if not os.path.isdir(currentFold):
      continue
    if not fold.startswith("fold"):
      logging.warn("Directories containing CV folds should start with 'fold'")
      continue
    print fold

    # Relevancy file
    res_fname = os.path.join(currentFold, "%s.test.res" % prefix)
    if not os.path.exists(res_fname):
      logging.error("Relevancy file not found: %s", res_fname)
      sys.exit(1)

    # Predictions file
    pred_fname = os.path.join(currentFold, "%s.pred" % prefix)
    if not os.path.exists(pred_fname):
      logging.error("SVM prediction file not found: %s", pred_fname)
      sys.exit(1)

    try:
      ir, svm = read_res_pred_files(res_fname, pred_fname, format, verbose)   
    except:
      logging.error("Failed to process input files: %s %s", res_fname, pred_fname)
      logging.error("Check that the input file format is correct")
      sys.exit(1)

    # MRR
    mrr_se = metrics.mrr(ir, th) or 1
    mrr_svm = metrics.mrr(svm, th) 
    mrrs_se.append(mrr_se)
    mrrs_svm.append(mrr_svm) 

    # improvement
    abs_mrr_diff = mrr_svm - mrr_se
    rel_mrr_diff = (mrr_svm - mrr_se)*100/mrr_se
    abs_mrrs.append(abs_mrr_diff)
    rel_mrrs.append(rel_mrr_diff)

    print "MRR: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (mrr_se, mrr_svm, abs_mrr_diff, rel_mrr_diff) 

    # MAP
    map_se = metrics.map(ir) or 1
    map_svm = metrics.map(svm)
    maps_se.append(map_se) 
    maps_svm.append(map_svm)

    # improvement
    abs_map_diff = map_svm - map_se
    rel_map_diff = (map_svm - map_se)*100/map_se
    abs_maps.append(abs_map_diff)
    rel_maps.append(rel_map_diff)
    print "MAP: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (map_se, map_svm, abs_map_diff, rel_map_diff) 

    # Recall-of-1@1
    rec_se = metrics.recall_of_1(ir, th)[0] or 1
    rec_svm = metrics.recall_of_1(svm, th)[0]
    recalls1_se.append(rec_se)
    recalls1_svm.append(rec_svm)

    # improvement
    abs_rec_diff = rec_svm - rec_se
    rel_rec_diff = (rec_svm - rec_se)*100/rec_se
    abs_recalls.append(abs_rec_diff)
    rel_recalls.append(rel_rec_diff)

    print "P@1: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (rec_se, rec_svm, abs_rec_diff, rel_rec_diff)   

    num_folds += 1

    '''
    mrr_oracle = metrics.oracle_mrr(ir, th)
    map_oracle = metrics.oracle_map(ir)
    prec_oracle = metrics.oracle_precision(ir, th)[0]
    rec1_oracle = metrics.oracle_recall_of_1(ir, th)[0]

    oracle_mrrs.append(mrr_oracle)
    oracle_maps.append(map_oracle)
    oracle_recs1.append(rec1_oracle)

    print "Oracle MRR: %5.2f, Oracle MAP: %5.2f, Oracle prec: %5.2f, Oracle rec@1: %5.2f" % (mrr_oracle, map_oracle, prec_oracle, rec1_oracle)
    '''
  # mrrs
  avg_mrr_se, std_mrr_se = mean_and_std(mrrs_se)
  avg_mrr_svm, std_mrr_svm = mean_and_std(mrrs_svm)
  avg_abs_impr_mrr, std_abs_impr_mrr = mean_and_std(abs_mrrs)
  avg_rel_impr_mrr, std_rel_impr_mrr = mean_and_std(rel_mrrs)
  #oracle_avg_mrr, std_oracle_avg_mrr = mean_and_std(oracle_mrrs)

  # maps
  avg_map_se, std_map_se = mean_and_std(maps_se)
  avg_map_svm, std_map_svm = mean_and_std(maps_svm)
  avg_abs_impr_map, std_abs_impr_map = mean_and_std(abs_maps)
  avg_rel_impr_map, std_rel_impr_map = mean_and_std(rel_maps)
  #oracle_avg_map, std_oracle_avg_map = mean_and_std(oracle_maps)

  # recall
  avg_rec1_se, std_rec1_se = mean_and_std(recalls1_se)  # se 
  avg_rec1_svm, std_rec1_svm = mean_and_std(recalls1_svm)  # svm
  avg_abs_impr_rec1, std_abs_impr_rec1 = mean_and_std(abs_recalls)  # absolute
  avg_rel_impr_rec1, std_rel_impr_rec1 = mean_and_std(rel_recalls)  # relative
  #oracle_avg_rec1, std_oracle_avg_rec1 = mean_and_std(oracle_recs1)

  FMT = u"%3s: %5.2f \u00B1 %4.2f %5.2f \u00B1 %4.2f %+6.2f%% \u00B1 %4.2f %+6.2f%% \u00B1 %4.2f"
  #ORACLE_FMT = u"Oracle MRR: %5.2f \u00B1 %4.2f, Oracle MAP: %5.2f \u00B1 %4.2f, Oracle P@1: %5.2f \u00B1 %4.2f"
  print
  print "Averaged over %s folds" % num_folds
  print "%17s %12s %14s %14s" %("IR", "SVM", "(abs)", "(rel)")
  print FMT % ("MRR", avg_mrr_se, std_mrr_se, avg_mrr_svm, std_mrr_svm, avg_abs_impr_mrr, std_abs_impr_mrr, avg_rel_impr_mrr, std_rel_impr_mrr)
  print FMT % ("MAP", avg_map_se, std_map_se, avg_map_svm, std_map_svm, avg_abs_impr_map, std_abs_impr_map, avg_rel_impr_map, std_rel_impr_map)
  print FMT % ("P@1", avg_rec1_se, std_rec1_se, avg_rec1_svm, std_rec1_svm, avg_abs_impr_rec1, std_abs_impr_rec1, avg_rel_impr_rec1, std_rel_impr_rec1)
Beispiel #17
0
def stats_cv(path=".",
             format="trec",
             prefix="svm",
             th=50,
             suf="",
             verbose=False,
             truth_file=None,
             ignore_noanswer=False,
             cut_truth_map_at_N=None):
    mrrs_se = []
    mrrs_svm = []
    abs_mrrs = []
    rel_mrrs = []

    maps_se = []
    maps_svm = []
    abs_maps = []
    rel_maps = []

    recalls1_se = []
    recalls1_svm = []
    abs_recalls = []
    rel_recalls = []

    num_folds = 0
    truth = read_truth_file(truth_file, format, cut_truth_map_at_N)
    print "%13s %5s %7s %7s" % ("IR", "SVM", "(abs)", "(rel)")
    for fold in sorted(os.listdir(path)):
        currentFold = os.path.join(path, fold)
        if not os.path.isdir(currentFold):
            continue
        if not fold.startswith("fold"):
            logging.warn(
                "Directories containing CV folds should start with 'fold'")
            continue
        print fold

        # Relevancy file
        res_fname = os.path.join(currentFold, "%s.relevancy" % prefix)
        if not os.path.exists(res_fname):
            logging.error("Relevancy file not found: %s", res_fname)
            sys.exit(1)

        # Predictions file
        pred_fname = os.path.join(currentFold, "%s.pred" % (prefix + suf))
        if not os.path.exists(pred_fname):
            logging.error("SVM prediction file not found: %s", pred_fname)
            sys.exit(1)

        try:
            ir, svm = read_res_pred_files(res_fname,
                                          pred_fname,
                                          format,
                                          verbose,
                                          ignore_noanswer=ignore_noanswer,
                                          truth_map=truth)
        except:
            logging.error("Failed to process input files: %s %s", res_fname,
                          pred_fname)
            logging.error("Check that the input file format is correct")
            sys.exit(1)

        # MRR
        mrr_se = metrics.mrr(ir, th)
        mrr_svm = metrics.mrr(svm, th)
        mrrs_se.append(mrr_se)
        mrrs_svm.append(mrr_svm)

        # improvement
        abs_mrr_diff = mrr_svm - mrr_se
        rel_mrr_diff = (mrr_svm - mrr_se) * 100 / mrr_se
        abs_mrrs.append(abs_mrr_diff)
        rel_mrrs.append(rel_mrr_diff)

        print "MRR: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (
            mrr_se, mrr_svm, abs_mrr_diff, rel_mrr_diff)

        # MAP
        map_se = metrics.map(ir)
        map_svm = metrics.map(svm)
        maps_se.append(map_se)
        maps_svm.append(map_svm)
        # improvement
        abs_map_diff = map_svm - map_se
        rel_map_diff = (map_svm - map_se) * 100 / map_se
        abs_maps.append(abs_map_diff)
        rel_maps.append(rel_map_diff)
        print "MAP: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (
            map_se * 100, map_svm * 100, abs_map_diff, rel_map_diff)

        # Recall-of-1@1
        rec_se = metrics.recall_of_1(ir, th)[0]
        rec_svm = metrics.recall_of_1(svm, th)[0]
        recalls1_se.append(rec_se)
        recalls1_svm.append(rec_svm)

        # improvement
        abs_rec_diff = rec_svm - rec_se
        rel_rec_diff = (rec_svm - rec_se) * 100 / rec_se
        abs_recalls.append(abs_rec_diff)
        rel_recalls.append(rel_rec_diff)

        print "P@1: %5.2f %5.2f %+6.2f%% %+6.2f%%" % (
            rec_se, rec_svm, abs_rec_diff, rel_rec_diff)

        num_folds += 1

    # mrrs
    avg_mrr_se, std_mrr_se = mean_and_std(mrrs_se)
    avg_mrr_svm, std_mrr_svm = mean_and_std(mrrs_svm)
    avg_abs_impr_mrr, std_abs_impr_mrr = mean_and_std(abs_mrrs)
    avg_rel_impr_mrr, std_rel_impr_mrr = mean_and_std(rel_mrrs)

    # maps
    avg_map_se, std_map_se = mean_and_std(maps_se)
    avg_map_svm, std_map_svm = mean_and_std(maps_svm)
    avg_abs_impr_map, std_abs_impr_map = mean_and_std(abs_maps)
    avg_rel_impr_map, std_rel_impr_map = mean_and_std(rel_maps)

    # recall
    avg_rec1_se, std_rec1_se = mean_and_std(recalls1_se)  # se
    avg_rec1_svm, std_rec1_svm = mean_and_std(recalls1_svm)  # svm
    avg_abs_impr_rec1, std_abs_impr_rec1 = mean_and_std(
        abs_recalls)  # absolute
    avg_rel_impr_rec1, std_rel_impr_rec1 = mean_and_std(
        rel_recalls)  # relative

    FMT = u"%3s: %5.2f \u00B1 %4.2f %5.2f \u00B1 %4.2f %+6.2f%% \u00B1 %4.2f %+6.2f%% \u00B1 %4.2f"
    print
    print "Averaged over %s folds" % num_folds
    print "%17s %12s %14s %14s" % ("IR", "SVM", "(abs)", "(rel)")
    print FMT % ("MRR", avg_mrr_se, std_mrr_se, avg_mrr_svm, std_mrr_svm,
                 avg_abs_impr_mrr, std_abs_impr_mrr, avg_rel_impr_mrr,
                 std_rel_impr_mrr)
    print FMT % ("MAP", avg_map_se * 100, std_map_se, avg_map_svm * 100,
                 std_map_svm, avg_abs_impr_map, std_abs_impr_map,
                 avg_rel_impr_map, std_rel_impr_map)
    print FMT % ("P@1", avg_rec1_se, std_rec1_se, avg_rec1_svm, std_rec1_svm,
                 avg_abs_impr_rec1, std_abs_impr_rec1, avg_rel_impr_rec1,
                 std_rel_impr_rec1)
    print "Table view"
    print "	MRR	MAP	P@1"
    print u"IR	%5.2f\u00B1%4.2f	%5.2f\u00B1%4.2f	 %5.2f\u00B1%4.2f" % (
        avg_mrr_se, std_mrr_se, avg_map_se * 100, std_map_se * 100,
        avg_rec1_se, std_rec1_se)
    print u"SVM	%5.2f\u00B1%4.2f	%5.2f\u00B1%4.2f	 %5.2f\u00B1%4.2f" % (
        avg_mrr_svm, std_mrr_svm, avg_map_svm * 100, std_map_svm * 100,
        avg_rec1_svm, std_rec1_svm)
Beispiel #18
0
def evaluate():
  """Run evaluation on dev or test data."""
  add_inverse_edge = FLAGS.model in \
                     ["source_rel_attention", "source_path_attention"]
  if FLAGS.clueweb_data:
    train_graph = clueweb_text_graph.CWTextGraph(
        text_kg_file=FLAGS.clueweb_data,
        embeddings_file=FLAGS.clueweb_embeddings,
        sentence_vocab_file=FLAGS.clueweb_sentences,
        skip_new=True,
        kg_file=FLAGS.kg_file,
        add_reverse_graph=not add_inverse_edge,
        add_inverse_edge=add_inverse_edge,
        subsample=FLAGS.subsample_text_rels
    )
  elif FLAGS.text_kg_file:
    train_graph = text_graph.TextGraph(
        text_kg_file=FLAGS.text_kg_file,
        skip_new=True,
        max_text_len=FLAGS.max_text_len,
        max_vocab_size=FLAGS.max_vocab_size,
        min_word_freq=FLAGS.min_word_freq,
        kg_file=FLAGS.kg_file,
        add_reverse_graph=not add_inverse_edge,
        add_inverse_edge=add_inverse_edge,
        max_path_length=FLAGS.max_path_length
    )
  else:
    train_graph = graph.Graph(
        kg_file=FLAGS.kg_file,
        add_reverse_graph=not add_inverse_edge,
        add_inverse_edge=add_inverse_edge,
        max_path_length=FLAGS.max_path_length
    )
  # train_graph, _ = read_graph_data(
  #     kg_file=FLAGS.kg_file,
  #     add_reverse_graph=(FLAGS.model != "source_rel_attention"),
  #     add_inverse_edge=(FLAGS.model == "source_rel_attention"),
  #     mode="train", num_epochs=FLAGS.num_epochs, batchsize=FLAGS.batchsize,
  #     max_neighbors=FLAGS.max_neighbors,
  #     max_negatives=FLAGS.max_negatives
  # )
  val_graph = None
  if FLAGS.dev_kg_file:
    val_graph, eval_data = read_graph_data(
        kg_file=FLAGS.dev_kg_file,
        add_reverse_graph=not add_inverse_edge,
        add_inverse_edge=add_inverse_edge,
        # add_reverse_graph=False,
        # add_inverse_edge=False,
        mode="dev", num_epochs=1, batchsize=FLAGS.test_batchsize,
        max_neighbors=FLAGS.max_neighbors,
        max_negatives=FLAGS.max_negatives, train_graph=train_graph,
        text_kg_file=FLAGS.text_kg_file
    )
  if FLAGS.test_kg_file:
    _, eval_data = read_graph_data(
        kg_file=FLAGS.test_kg_file,
        add_reverse_graph=not add_inverse_edge,
        add_inverse_edge=add_inverse_edge,
        # add_reverse_graph=False,
        # add_inverse_edge=False,
        mode="test", num_epochs=1, batchsize=FLAGS.test_batchsize,
        max_neighbors=FLAGS.max_neighbors,
        max_negatives=None, train_graph=train_graph,
        text_kg_file=FLAGS.text_kg_file,
        val_graph=val_graph
    )
  if not FLAGS.dev_kg_file and not FLAGS.test_kg_file:
    raise ValueError("Evalution without a dev or test file!")

  iterator = eval_data.dataset.make_initializable_iterator()
  candidate_scores, candidates, labels, model, is_train_ph, inputs = \
    create_model(train_graph, iterator)

  # Create eval metrics
  # if FLAGS.dev_kg_file:
  batch_rr = metrics.mrr(candidate_scores, candidates, labels)
  mrr, mrr_update = tf.metrics.mean(batch_rr)
  mrr_summary = tf.summary.scalar("MRR", mrr)

  all_hits, all_hits_update, all_hits_summaries = [], [], []
  for k in [1, 3, 10]:
    batch_hits = metrics.hits_at_k(candidate_scores, candidates, labels, k=k)
    hits, hits_update = tf.metrics.mean(batch_hits)
    hits_summary = tf.summary.scalar("Hits_at_%d" % k, hits)
    all_hits.append(hits)
    all_hits_update.append(hits_update)
    all_hits_summaries.append(hits_summary)
  hits = tf.group(*all_hits)
  hits_update = tf.group(*all_hits_update)

  global_step = tf.Variable(0, name="global_step", trainable=False)
  current_step = tf.Variable(0, name="current_step", trainable=False,
                             collections=[tf.GraphKeys.LOCAL_VARIABLES])
  incr_current_step = tf.assign_add(current_step, 1)
  reset_current_step = tf.assign(current_step, 0)

  slim.get_or_create_global_step(graph=tf.get_default_graph())

  # best_hits = tf.Variable(0., trainable=False)
  # best_step = tf.Variable(0, trainable=False)
  # with tf.control_dependencies([hits]):
  #   update_best_hits = tf.cond(tf.greater(hits, best_hits),
  #                              lambda: tf.assign(best_hits, hits),
  #                              lambda: 0.)
  #   update_best_step = tf.cond(tf.greater(hits, best_hits),
  #                              lambda: tf.assign(best_step, global_step),
  #                              lambda: 0)
  # best_hits_summary = tf.summary.scalar("Best Hits@10", best_hits)
  # best_step_summary = tf.summary.scalar("Best Step", best_step)

  nexamples = eval_data.data_graph.tuple_store.shape[0]
  if eval_data.data_graph.add_reverse_graph:
    nexamples *= 2
  num_batches = math.ceil(nexamples / float(FLAGS.test_batchsize))
  local_init_op = tf.local_variables_initializer()

  if FLAGS.analyze:
    entity_names = utils.read_entity_name_mapping(FLAGS.entity_names_file)
    session = tf.Session()
    # summary_writer = tf.summary.FileWriter(FLAGS.output_dir, session.graph)
    init_op = tf.global_variables_initializer()
    session.run(init_op)
    session.run(local_init_op)
    saver = tf.train.Saver(tf.trainable_variables())
    ckpt_path = FLAGS.model_path + "/model.ckpt-%d" % FLAGS.global_step
    attention_probs = model["attention_encoder"].get_from_collection(
        "attention_probs"
    )
    if FLAGS.clueweb_data:
      s, nbrs_s, text_nbrs_s, text_nbrs_s_emb, r, candidates, _ = inputs
    elif FLAGS.text_kg_file:
      s, nbrs_s, text_nbrs_s, r, candidates, _ = inputs
    else:
      s, nbrs_s, r, candidates, _ = inputs
    saver.restore(session, ckpt_path)
    session.run(iterator.initializer)
    num_attention = 5
    nsteps = 0
    outf_correct = open(FLAGS.output_dir + "/analyze_correct.txt", "w+")
    outf_incorrect = open(
        FLAGS.output_dir + "/analyze_incorrect.txt", "w+"
    )
    ncorrect = 0
    analyze_outputs = [candidate_scores, s, nbrs_s, r, candidates, labels,
                       attention_probs]
    if FLAGS.text_kg_file:
      analyze_outputs.append(text_nbrs_s)
    while True:
      try:
        analyze_vals = session.run(analyze_outputs, {is_train_ph: False})
        if FLAGS.text_kg_file:
          cscores, se, nbrs, qr, cands, te, nbr_attention_probs, text_nbrs = \
            analyze_vals
        else:
          cscores, se, nbrs, qr, cands, te, nbr_attention_probs = analyze_vals
        # import pdb; pdb.set_trace()
        pred_ids = cscores.argmax(1)
        for i in range(se.shape[0]):
          sname = train_graph.inverse_entity_vocab[se[i]]
          if sname in entity_names:
            sname = entity_names[sname]
          rname = train_graph.inverse_relation_vocab[qr[i]]
          pred_target = cands[i, pred_ids[i]]
          pred_name = train_graph.inverse_entity_vocab[pred_target]
          if pred_name in entity_names:
            pred_name = entity_names[pred_name]
          tname = train_graph.inverse_entity_vocab[te[i][0]]
          if tname in entity_names:
            tname = entity_names[tname]
          if te[i][0] == pred_target:
            outf = outf_correct
            ncorrect += 1
          else:
            outf = outf_incorrect
          outf.write("\n(%d) %s, %s, ? \t Pred: %s \t Target: %s" %
                     (nsteps+i+1, sname, rname, pred_name, tname))
          top_nbrs_index = np.argsort(nbr_attention_probs[i, :])[::-1]
          outf.write("\nTop Nbrs:")
          for j in range(num_attention):
            nbr_index = top_nbrs_index[j]
            if nbr_index < FLAGS.max_neighbors:
              nbr_id = nbrs[i, nbr_index, :]
              nbr_name = ""
              for k in range(0, nbrs.shape[-1], 2):
                ent_name = train_graph.inverse_entity_vocab[nbr_id[k+1]]
                if ent_name in entity_names:
                  ent_name = entity_names[ent_name]
                rel_name = train_graph.inverse_relation_vocab[nbr_id[k]]
                nbr_name += "(%s, %s)" % (rel_name, ent_name)
            else:
              # Text Relation
              text_nbr_ids = text_nbrs[i, nbr_index - FLAGS.max_neighbors, :]
              text_nbr_ent = text_nbr_ids[0]
              ent_name = train_graph.inverse_entity_vocab[text_nbr_ent]
              if ent_name in entity_names:
                ent_name = entity_names[ent_name]
              rel_name = train_graph.get_relation_text(text_nbr_ids[1:])
              nbr_name = "(%s, %s)" % (rel_name, ent_name)
            outf.write("\n\t\t %s Prob: %.4f" %
                       (nbr_name, nbr_attention_probs[i, nbr_index]))
        nsteps += se.shape[0]
        tf.logging.info("Current hits@1: %.3f", ncorrect * 1.0 / (nsteps))

      except tf.errors.OutOfRangeError:
        break
    outf_correct.close()
    outf_incorrect.close()
    return

  class DataInitHook(tf.train.SessionRunHook):

    def after_create_session(self, sess, coord):
      sess.run(iterator.initializer)
      sess.run(reset_current_step)

  if FLAGS.test_only:
    ckpt_path = FLAGS.model_path + "/model.ckpt-%d" % FLAGS.global_step
    slim.evaluation.evaluate_once(
        master=FLAGS.master,
        checkpoint_path=ckpt_path,
        logdir=FLAGS.output_dir,
        variables_to_restore=tf.trainable_variables() + [global_step],
        initial_op=tf.group(local_init_op, iterator.initializer),
        # initial_op=iterator.initializer,
        num_evals=num_batches,
        eval_op=tf.group(mrr_update, hits_update, incr_current_step),
        eval_op_feed_dict={is_train_ph: False},
        final_op=tf.group(mrr, hits),
        final_op_feed_dict={is_train_ph: False},
        summary_op=tf.summary.merge([mrr_summary]+ all_hits_summaries),
        hooks=[DataInitHook(),
               tf.train.LoggingTensorHook(
                   {"mrr": mrr, "hits": hits, "step": current_step},
                   every_n_iter=1
               )]
    )
  else:
    slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.model_path,
        logdir=FLAGS.output_dir,
        variables_to_restore=tf.trainable_variables() + [global_step],
        initial_op=tf.group(local_init_op, iterator.initializer),
        # initial_op=iterator.initializer,
        num_evals=num_batches,
        eval_op=tf.group(mrr_update, hits_update, incr_current_step),
        eval_op_feed_dict={is_train_ph: False},
        final_op=tf.group(mrr, hits),
        final_op_feed_dict={is_train_ph: False},
        summary_op=tf.summary.merge([mrr_summary] +  all_hits_summaries),
        max_number_of_evaluations=None,
        eval_interval_secs=60,
        hooks=[DataInitHook(),
               tf.train.LoggingTensorHook(
                   {"mrr": mrr, "hits": hits, "step": current_step},
                   every_n_iter=1
               )]
    )
Beispiel #19
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print("")
    print("*** Official score (MAP for SYS): %5.4f" % (map_svm))
    print("")
    print("")
    print("******************************")
    print("*** Classification results ***")
    print("******************************")
    print("")
    print("Acc = %5.4f" % (acc))
    print("P   = %5.4f" % (p))
    print("R   = %5.4f" % (r))
    print("F1  = %5.4f" % (f1))
    print("")
    print("")
    print("********************************")
    print("*** Detailed ranking results ***")
    print("********************************")
    print("")
    print("IR  -- Score for the output of the IR system (baseline).")
    print("SYS -- Score for the output of the tested system.")
    print("")
    print("%13s %5s" % ("IR", "SYS"))
    print("MAP   : %5.4f %5.4f" % (map_se, map_svm))
    print("AvgRec: %5.4f %5.4f" % (avg_acc1_ir, avg_acc1_svm))
    print("MRR   : %6.2f %6.2f" % (mrr_se, mrr_svm))
    print("%16s %6s  %14s %6s  %14s %6s  %12s %4s" %
          ("IR", "SYS", "IR", "SYS", "IR", "SYS", "IR", "SYS"))
    for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2,
            a_svm2) in enumerate(
                zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1,
                    acc_se2, acc_svm2), 1):
        print(
            "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f"
            % (i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2,
               a_svm2))
    print()
    print(
        "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks where questions have at most one correct answer)"
    )
    print(
        "ACC   - accuracy, i.e., number of correct answers retrieved at rank @X normalized by the rank and the total number of questions"
    )
    print(
        "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)"
    )
    print("AC2   - the absolute number of correct answers at @X")
Beispiel #20
0
def train(train_data, test_data, n_user, n_item):
    with tf.Session() as sess:
        iterator = tf.data.Iterator.from_structure(train_data.output_types,
                                                   train_data.output_shapes)

        model = NCF.NCF(FLAGS.embedding_size, n_user, n_item, FLAGS.lr,
                        FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation,
                        FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True)

        model.build()

        # 有参数就读取, 没有就重新训练
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
            # 加载模型参数
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("Creating model with fresh parameters.")
            sess.run(tf.global_variables_initializer())

        count = 0
        # 在训练集上训练epochs轮
        for epoch in range(FLAGS.epochs):
            # 训练集的迭代器
            sess.run(model.iterator.make_initializer(train_data))
            model.is_training = True
            model.get_data()
            start_time = time.time()

            try:
                while True:  # 直到生成器没数据, 也就是所有训练数据遍历一次

                    model.step(sess, count)
                    count += 1
            except tf.errors.OutOfRangeError:
                # 打印训练一轮的时间
                print("Epoch %d training " % epoch + "Took: " + time.strftime("%H: %M: %S",
                                                                              time.gmtime(time.time() - start_time)))
            # 测试集的迭代器
            sess.run(model.iterator.make_initializer(test_data))
            model.is_training = False
            model.get_data()
            start_time = time.time()
            HR, MRR, NDCG = [], [], []
            pred_item, gt_item = model.step(sess, None)
            try:
                while True:  # 直到生成器没数据, 也就是所有测试数据遍历一次
                    pred_item, gt_item = model.step(sess, None)
                    # 对于测试集每同一批量数据的item都一样, 所以只取一个
                    gt_item = int(gt_item[0])
                    HR.append(metrics.hit(gt_item, pred_item))
                    MRR.append(metrics.mrr(gt_item, pred_item))
                    NDCG.append(metrics.ndcg(gt_item, pred_item))
            # 评估值取均值
            except tf.errors.OutOfRangeError:
                hr = np.array(HR).mean()
                mrr = np.array(MRR).mean()
                ndcg = np.array(NDCG).mean()
                print("Epoch %d testing  " % epoch + "Took: " + time.strftime("%H: %M: %S",
                                                                              time.gmtime(time.time() - start_time)))
                print("HR is %.3f, MRR is %.3f, NDCG is %.3f" % (hr, mrr, ndcg))

        # 保存模型参数
        checkpoint_path = os.path.join(FLAGS.model_dir, "NCF.ckpt")
        model.saver.save(sess, checkpoint_path)
Beispiel #21
0
def eval_reranker(res_fname="svm.test.res",
                  pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname,
                                               pred_fname,
                                               format,
                                               verbose,
                                               reranking_th=reranking_th,
                                               ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (
        conf_matrix['true']['true'] + conf_matrix['false']['false']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['false'] +
            conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (
            conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    #print ""
    #print "*** Official score (MAP for SYS): %5.4f" %(map_svm)
    #print ""
    #print ""
    #print "******************************"
    #print "*** Classification results ***"
    #print "******************************"
    #print ""
    #print "Acc = %5.4f" %(acc)
    #print "P   = %5.4f" %(p)
    #print "R   = %5.4f" %(r)
    #print "F1  = %5.4f" %(f1)
    #print ""
    #print ""
    #print "********************************"
    #print "*** Detailed ranking results ***"
    #print "********************************"
    #print ""
    #print "IR  -- Score for the output of the IR system (baseline)."
    #print "SYS -- Score for the output of the tested system."
    #print ""
    #print "%13s %5s" %("IR", "SYS")
    #print "MAP   : %5.4f %5.4f" %(map_se, map_svm)
    #print "AvgRec: %5.4f %5.4f" %(avg_acc1_ir, avg_acc1_svm)
    #print "MRR   : %6.2f %6.2f" %(mrr_se, mrr_svm)
    print "MAP   : %5.4f\tMRR   : %5.4f\tAvgRec: %5.4f" % (map_svm, mrr_svm,
                                                           avg_acc1_svm)
    #print "Acc   : %5.4f" %(acc)
    #print "P     : %5.4f" %(p)
    #print "R     : %5.4f" %(r)
    #print "F1    : %5.4f" %(f1)
    """
Beispiel #22
0
def eval_reranker(res_fname="svm.test.res", pred_fname="svm.train.pred",
                  format="trec",
                  th=10,
                  verbose=False,
                  reranking_th=0.0,
                  ignore_noanswer=False):
    ir, svm, conf_matrix = read_res_pred_files(res_fname, pred_fname, format, verbose,
                                      reranking_th=reranking_th,
                                      ignore_noanswer=ignore_noanswer)
    # Calculate standard P, R, F1, Acc
    acc = 1.0 * (conf_matrix['true']['true'] + conf_matrix['false']['false']) / (conf_matrix['true']['true'] + conf_matrix['false']['false'] + conf_matrix['true']['false'] + conf_matrix['false']['true'])
    p = 0
    if (conf_matrix['true']['true'] + conf_matrix['false']['true']) > 0:
        p = 1.0 * (conf_matrix['true']['true']) / (conf_matrix['true']['true'] + conf_matrix['false']['true'])
    r = 0
    if (conf_matrix['true']['true'] + conf_matrix['true']['false']) > 0:
        r = 1.0 * (conf_matrix['true']['true']) / (conf_matrix['true']['true'] + conf_matrix['true']['false'])
    f1 = 0
    if (p + r) > 0:
        f1 = 2.0 * p * r / (p + r)

    # evaluate IR
    prec_se = metrics.recall_of_1(ir, th)
    acc_se = metrics.accuracy(ir, th)
    acc_se1 = metrics.accuracy1(ir, th)
    acc_se2 = metrics.accuracy2(ir, th)

    # evaluate SVM
    prec_svm = metrics.recall_of_1(svm, th)
    acc_svm = metrics.accuracy(svm, th)
    acc_svm1 = metrics.accuracy1(svm, th)
    acc_svm2 = metrics.accuracy2(svm, th)

    mrr_se = metrics.mrr(ir, th)
    mrr_svm = metrics.mrr(svm, th)
    map_se = metrics.map(ir, th)
    map_svm = metrics.map(svm, th)

    avg_acc1_svm = metrics.avg_acc1(svm, th)
    avg_acc1_ir = metrics.avg_acc1(ir, th)

    print ("")
    print ("*** Official score (MAP for SYS): %5.4f" %(map_svm))
    print ("")
    print ("")
    print( "******************************")
    print( "*** Classification results ***")
    print( "******************************")
    print( "")
    print( "Acc = %5.4f" %(acc))
    print( "P   = %5.4f" %(p))
    print( "R   = %5.4f" %(r))
    print( "F1  = %5.4f" %(f1))
    print( "")
    print( "")
    print( "********************************")
    print( "*** Detailed ranking results ***")
    print( "********************************")
    print( "")
    print( "IR  -- Score for the output of the IR system (baseline).")
    print( "SYS -- Score for the output of the tested system.")
    print( "")
    print( "%13s %5s" %("IR", "SYS"))
    print( "MAP   : %5.4f %5.4f" %(map_se, map_svm))
    print( "AvgRec: %5.4f %5.4f" %(avg_acc1_ir, avg_acc1_svm))
    print( "MRR   : %6.2f %6.2f" %(mrr_se, mrr_svm))
    print( "%16s %6s  %14s %6s  %14s %6s  %12s %4s" % ("IR", "SYS", "IR", "SYS", "IR", "SYS", "IR", "SYS"))
    for i, (p_se, p_svm, a_se, a_svm, a_se1, a_svm1, a_se2, a_svm2) in enumerate(zip(prec_se, prec_svm, acc_se, acc_svm, acc_se1, acc_svm1, acc_se2, acc_svm2), 1):
        print( "REC-1@%02d: %6.2f %6.2f  ACC@%02d: %6.2f %6.2f  AC1@%02d: %6.2f %6.2f  AC2@%02d: %4.0f %4.0f" %(i, p_se, p_svm, i, a_se, a_svm, i, a_se1, a_svm1, i, a_se2, a_svm2))

    print( "REC-1 - percentage of questions with at least 1 correct answer in the top @X positions (useful for tasks where questions have at most one correct answer)")
    print( "ACC   - accuracy, i.e., number of correct answers retrieved at rank @X normalized by the rank and the total number of questions")
    print( "AC1   - the number of correct answers at @X normalized by the number of maximum possible answers (perfect re-ranker)")
    print( "AC2   - the absolute number of correct answers at @X")

    return map_svm
     ranks = []
     for i in rel_df.idx.unique():
         ddf = rel_df[rel_df.idx == i]
         ranked = ddf.sort_values(by='total', ascending=False)
         r = 1
         for label in ranked.is_gold:
             if label:
                 ranks.append(r)
                 r -= 1 # based on accepted eval method
             r += 1
     
     if alpha == 0.0: agg_zero_ranks.extend(ranks)
     if alpha == 0.5: agg_half_ranks.extend(ranks)
     if alpha == 1.0: agg_one_ranks.extend(ranks)
     
     amrr = mrr(ranks)
     
     if amrr > best_mrr:
         rel_best_ranks = ranks
         best_mrr = amrr
         best_arg_mrr = alpha
     
     # just for reporting
     hat10 = h_at_n(ranks, n=10)
     hat3 = h_at_n(ranks, n=3)
     hat1 = h_at_n(ranks, n=1)
     
     if int(alpha * 100) % 20 == 0:
         print('{}\t{}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(rl, alpha, amrr, hat10, hat3, hat1))
 
 agg_best_ranks.extend(rel_best_ranks)
Beispiel #24
0
            saved_name = None

            N = assoc_model.vocab_size
            for ep in range(opts.epochs):
                # report
                if opts.v > 0:
                    timeprint('starting epoch {}'.format(ep + 1))
                iteration_losses.append(
                    train_iteration(opts, assoc_model, trainer, ep % 5 == 4,
                                    log_file))
                if opts.early_stopping:
                    timeprint('evaluating after epoch {}'.format(ep + 1))
                    insts, all_s_ranks, all_t_ranks = eval(
                        assoc_model, tr_graphs, te_graphs, opts, N)
                    # save model with epoch count and remove previous if exists
                    ep_mrr = mrr(all_s_ranks + all_t_ranks)
                    ep_h10 = h_at_n(all_s_ranks + all_t_ranks)
                    ep_h1 = h_at_n(all_s_ranks + all_t_ranks, n=1)
                    timeprint('mrr: {:.4f}, h@10: {:.4f}, h@1: {:.4f}'.format(
                        ep_mrr, ep_h10, ep_h1))
                    if len(dev_mrrs) < 1 or ep_mrr > min(dev_mrrs[-2:]):
                        if len(dev_mrrs) < 1 or ep_mrr > max(dev_mrrs):
                            best_insts = insts
                            best_all_s_ranks = all_s_ranks
                            best_all_t_ranks = all_t_ranks
                            last_saved_name = saved_name
                            saved_name = '{}-ep-{:02d}.dyn'.format(
                                opts.model_out, ep + 1)
                            timeprint('saving trained model to {}'.format(
                                saved_name))
                            assoc_model.save(saved_name)
            ranks = []
            for i in rel_df.idx.unique():
                ddf = rel_df[rel_df.idx == i]
                ranked = ddf.sort_values(by='total', ascending=False)
                r = 1
                for label in ranked.is_gold:
                    if label:
                        ranks.append(r)
                        r -= 1  # based on accepted eval method
                    r += 1

            if alpha == 0.0: agg_zero_ranks.extend(ranks)
            if alpha == 0.5: agg_half_ranks.extend(ranks)
            if alpha == 1.0: agg_one_ranks.extend(ranks)

            amrr = mrr(ranks)

            if amrr > best_mrr:
                rel_best_ranks = ranks
                best_mrr = amrr
                best_arg_mrr = alpha

            # just for reporting
            hat10 = h_at_n(ranks, n=10)
            hat3 = h_at_n(ranks, n=3)
            hat1 = h_at_n(ranks, n=1)

            if int(alpha * 100) % 20 == 0:
                print('{}\t{}\t{:.4f}\t{:.4f}\t{:.4f}\t{:.4f}'.format(
                    rl, alpha, amrr, hat10, hat3, hat1))
Beispiel #26
0
     log_file.write('====\n')
 iteration_losses = [] # will hold loss averages
 dev_mrrs = []
 saved_name = None
 
 N = assoc_model.vocab_size
 for ep in range(opts.epochs):
     # report
     if opts.v > 0:
         timeprint('starting epoch {}'.format(ep + 1)) 
     iteration_losses.append(train_iteration(opts, assoc_model, trainer, ep % 5 == 4, log_file))
     if opts.early_stopping:
         timeprint('evaluating after epoch {}'.format(ep+1))
         insts, all_s_ranks, all_t_ranks = eval(assoc_model, tr_graphs, te_graphs, opts, N)
         # save model with epoch count and remove previous if exists
         ep_mrr = mrr(all_s_ranks + all_t_ranks)
         ep_h10 = h_at_n(all_s_ranks + all_t_ranks)
         ep_h1 = h_at_n(all_s_ranks + all_t_ranks, n=1)
         timeprint('mrr: {:.4f}, h@10: {:.4f}, h@1: {:.4f}'.format(ep_mrr, ep_h10, ep_h1))
         if len(dev_mrrs) < 1 or ep_mrr > min(dev_mrrs[-2:]):
             if len(dev_mrrs) < 1 or ep_mrr > max(dev_mrrs):
                 best_insts = insts
                 best_all_s_ranks = all_s_ranks
                 best_all_t_ranks = all_t_ranks
                 last_saved_name = saved_name
                 saved_name = '{}-ep-{:02d}.dyn'.format(opts.model_out, ep + 1)
                 timeprint('saving trained model to {}'.format(saved_name))
                 assoc_model.save(saved_name)
                 # remove previous model(s)
                 if last_saved_name is not None:
                     os.remove(last_saved_name)
Beispiel #27
0
def eval(prev_graphs, graphs, ergm, opts, N, log_file, rerank_file):
    writing = log_file is not None

    caches = (copy.deepcopy(ergm.cache),
              copy.deepcopy(ergm.feature_vals))

    rel_all_ranks = {}  # for final results
    rel_pre_ranks = {}  # for improvement analysis
    rel_erg_ranks = {}  # for ergm-alone analysis
    all_pre_ranks = []
    all_all_ranks = []
    all_erg_ranks = []
    insts = Counter()
    total_misses = Counter()
    overrides = Counter()
    rerank_ups = Counter()
    rerank_downs = Counter()
    erg_ups = Counter()
    erg_downs = Counter()
    rerank_diff = Counter()
    erg_diff = Counter()

    change_idx = 1

    rels_order = list(graphs.items())
    for rel, te_gr in rels_order:
        if rel == 'co_hypernym':
            continue

        # set up
        if writing:
            timeprint('testing relation {}'.format(rel))
            log_file.write('relation: {}\n'.format(rel))
        # add incrementally, eval each edge, revert
        tr_gr = prev_graphs[rel]  # to filter known connections
        s_assoc_cache = ergm.source_ranker_cache(rel)
        t_assoc_cache = ergm.target_ranker_cache(rel)
        override_rel = opts.rule_override and rel in SYMMETRIC_RELATIONS
        all_ranks = []
        pre_ranks = []
        erg_ranks = []
        if override_rel and writing:
            log_file.write('RELATION OVERRIDE\n')
        node_order = list(range(N))  # DO NOT RANDOMIZE THIS - NEED TO PREDICT BOTH SIDES
        for node in tqdm(node_order):
            s_trues, s_unch_loc_ranks, s_loc_gold_ranks, s_gold_reranked, s_gold_ergs, s_pls, change_idx = \
                node_loop(change_idx, ergm, rel, node, s_assoc_cache,
                          caches, tr_gr, te_gr, override_rel, opts.rerank, True, log_file, rerank_file)
            t_trues, t_unch_loc_ranks, t_loc_gold_ranks, t_gold_reranked, t_gold_ergs, t_pls, change_idx = \
                node_loop(change_idx, ergm, rel, node, t_assoc_cache,
                          caches, tr_gr, te_gr, override_rel, opts.rerank, False, log_file, rerank_file)

            total_trues = s_trues + t_trues
            insts[rel] += (total_trues)
            if override_rel:
                overrides[rel] += total_trues

            ulr = s_unch_loc_ranks + t_unch_loc_ranks
            lgr = s_loc_gold_ranks + t_loc_gold_ranks
            grr = s_gold_reranked + t_gold_reranked
            ger = s_gold_ergs + t_gold_ergs
            total_misses[rel] += (len(ulr))

            pre_ranks.extend(lgr)
            if override_rel:
                erg_ranks.extend(lgr)
                all_ranks.extend(lgr)
            else:
                all_ranks.extend(ulr + grr)
                erg_ranks.extend(ulr + ger)

            for pl in s_pls + t_pls:
                if pl[3] < pl[2]:
                    rerank_ups[rel] += 1
                if pl[3] > pl[2]:
                    rerank_downs[rel] += 1
                if pl[4] < pl[2]:
                    erg_ups[rel] += 1
                if pl[4] > pl[2]:
                    erg_downs[rel] += 1
                rerank_diff[rel] += (pl[2] - pl[3])
                erg_diff[rel] += (pl[2] - pl[4])

        rel_all_ranks[rel] = all_ranks
        rel_pre_ranks[rel] = pre_ranks
        rel_erg_ranks[rel] = erg_ranks

        all_all_ranks.extend(all_ranks)
        all_pre_ranks.extend(pre_ranks)
        all_erg_ranks.extend(erg_ranks)

    if writing:
        log_file.write('\nper relation:\n')
        for rel in list(graphs.keys()):
            if insts[rel] > 0 and insts[rel] - total_misses[rel] > 0:
                log_file.write('\n{}:\n'.format(rel))
                log_file.write('{} instances, {} misses\n'.format(insts[rel], total_misses[rel]))
                log_file.write('reranks: {} up, {} down\n'.format(rerank_ups[rel], rerank_downs[rel]))
                log_file.write('ERGM only: {} up, {} down\n'.format(erg_ups[rel], erg_downs[rel]))
                log_file.write('rank diff: {}, ERGM only: {}\n'.format(rerank_diff[rel], erg_diff[rel]))
                log_file.write('metrics: pre-rank\trerank\tERGM only\n')
                log_file.write('average rank: {:.5f}\t{:.5f}\t{:.5f}\n'.format(np.average(rel_pre_ranks[rel]),
                                                                               np.average(rel_all_ranks[rel]),
                                                                               np.average(rel_erg_ranks[rel])))
                log_file.write('mrr: {:.4f}\t{:.4f}\t{:.4f}\n'.format(mrr(rel_pre_ranks[rel]), mrr(rel_all_ranks[rel]),
                                                                      mrr(rel_erg_ranks[rel])))
                log_file.write(
                    'mq: {:.4f}\t{:.4f}\t{:.4f}\n'.format(mq(rel_pre_ranks[rel], N), mq(rel_all_ranks[rel], N),
                                                          mq(rel_erg_ranks[rel], N)))
                log_file.write('h@100: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(rel_pre_ranks[rel], n=100),
                                                                        h_at_n(rel_all_ranks[rel], n=100),
                                                                        h_at_n(rel_erg_ranks[rel], n=100)))
                log_file.write(
                    'h@10: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(rel_pre_ranks[rel]), h_at_n(rel_all_ranks[rel]),
                                                            h_at_n(rel_erg_ranks[rel])))
                log_file.write('h@1: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(rel_pre_ranks[rel], n=1),
                                                                      h_at_n(rel_all_ranks[rel], n=1),
                                                                      h_at_n(rel_erg_ranks[rel], n=1)))

        log_file.write('\ntotals:\n')
        log_file.write('total number of instances: {}\n'.format(sum(insts.values())))
        log_file.write('total misses: {}\n'.format(sum(total_misses.values())))
        log_file.write('overrides: {}\n'.format(sum(overrides.values())))
        log_file.write(
            'rerank improvements: {}; regressions: {}\n'.format(sum(rerank_ups.values()), sum(rerank_downs.values())))
        log_file.write(
            'only ERGM improvements: {}; regressions: {}\n'.format(sum(erg_ups.values()), sum(erg_downs.values())))
        log_file.write(
            'total rank diffs: rerank {}, only ERGM {}\n'.format(sum(rerank_diff.values()), sum(erg_diff.values())))

        log_file.write('metrics: pre-rank\trerank\tERGM only\n')
        log_file.write(
            'average rank: {:.5f}\t{:.5f}\t{:.5f}\n'.format(np.average(all_pre_ranks), np.average(all_all_ranks),
                                                            np.average(all_erg_ranks)))
        log_file.write(
            'mrr: {:.4f}\t{:.4f}\t{:.4f}\n'.format(mrr(all_pre_ranks), mrr(all_all_ranks), mrr(all_erg_ranks)))
        log_file.write(
            'mq: {:.4f}\t{:.4f}\t{:.4f}\n'.format(mq(all_pre_ranks, N), mq(all_all_ranks, N), mq(all_erg_ranks, N)))
        log_file.write(
            'h@100: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(all_pre_ranks, n=100), h_at_n(all_all_ranks, n=100),
                                                     h_at_n(all_erg_ranks, n=100)))
        log_file.write('h@10: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(all_pre_ranks), h_at_n(all_all_ranks),
                                                               h_at_n(all_erg_ranks)))
        log_file.write('h@1: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(all_pre_ranks, n=1), h_at_n(all_all_ranks, n=1),
                                                              h_at_n(all_erg_ranks, n=1)))

    print('number of instances:', sum(insts.values()))
    print('total misses:', sum(total_misses.values()))
    print('overrides:', sum(overrides.values()))
    print('average rank:', np.average(all_all_ranks))
    print('mrr: {:.4f}'.format(mrr(all_all_ranks)))
    print('mq:', mq(all_all_ranks, N))
    print('h@100: {:.5f}'.format(h_at_n(all_all_ranks, n=100)))
    print('h@10: {:.5f}'.format(h_at_n(all_all_ranks)))
    print('h@1: {:.5f}'.format(h_at_n(all_all_ranks, n=1)))

    return mrr(all_all_ranks), h_at_n(all_all_ranks, n=10), h_at_n(all_all_ranks, n=3), h_at_n(all_all_ranks, n=1)
Beispiel #28
0
def train(train_data, test_data, user_size, item_size):
	config = tf.ConfigProto()
	config.gpu_options.allow_growth = True

	with tf.Session(config=config) as sess:

		############################### CREATE MODEL #############################
		iterator = tf.data.Iterator.from_structure(train_data.output_types, 
								train_data.output_shapes)
		model = NCF.NCF(FLAGS.embedding_size, user_size, item_size,	FLAGS.lr, 
				FLAGS.optim, FLAGS.initializer, FLAGS.loss_func, FLAGS.activation, 
				FLAGS.regularizer, iterator, FLAGS.topK, FLAGS.dropout, is_training=True)
		model.build()
		# train_init_op = iterator.make_initializer(train_data)

		ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
		if ckpt:
			print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
			model.saver.restore(sess, ckpt.model_checkpoint_path)
		else:
			print("Creating model with fresh parameters.")
			sess.run(tf.global_variables_initializer())
		
		############################### Training ####################################
		count = 0
		for epoch in range(FLAGS.epochs):
			sess.run(model.iterator.make_initializer(train_data))	
			model.is_training = True
			start_time = time.time()

			try:
				while True:
					model.step(sess, count)
					count += 1
			except tf.errors.OutOfRangeError:
				print("Epoch %d training " %epoch + "Took: " + time.strftime("%H: %M: %S", 
									time.gmtime(time.time() - start_time)))

		################################ EVALUATION ##################################
			sess.run(model.iterator.make_initializer(test_data))
			model.is_training = False
			start_time = time.time()
			HR, MRR, NDCG = [], [], []

			try:
				while True:
					prediction, label = model.step(sess, None)

					label = int(label[0])
					HR.append(metrics.hit(label, prediction))
					MRR.append(metrics.mrr(label, prediction))
					NDCG.append(metrics.ndcg(label, prediction))
			except tf.errors.OutOfRangeError:
				hr = np.array(HR).mean()
				mrr = np.array(MRR).mean()
				ndcg = np.array(NDCG).mean()
				print("Epoch %d testing  " %epoch + "Took: " + time.strftime("%H: %M: %S", 
									time.gmtime(time.time() - start_time)))
				print("HR is %.3f, MRR is %.3f, NDCG is %.3f" %(hr, mrr, ndcg))

		################################## SAVE MODEL ################################
		checkpoint_path = os.path.join(FLAGS.model_dir, "NCF.ckpt")
		model.saver.save(sess, checkpoint_path)