def calculate_evaluation_parameters(output_directory, classifier): accuracy,recall,precision,error_rate = evaluation.evaluator(output_directory, classifier) print('Parameters for '+classifier+':') print('accuracy is: ',accuracy) print('recall is: ',recall) print('precision is: ',precision) print('error_rate is: ',error_rate, '%')
def calculate_evaluation_parameters(output_directory, classifier): accuracy, recall, precision, error_rate = evaluation.evaluator( output_directory, classifier) print('Parameters for ' + classifier + ':') print('accuracy is: ', accuracy) print('recall is: ', recall) print('precision is: ', precision) print('error_rate is: ', error_rate, '%')
def __init__(self, sess, data): self.alg_type = args.alg_type self.emb_dim = args.embed_size self.data = data self.evaluator = evaluator() self.batch_size = int(self.data.n_users // 10) self.learning_rate = args.lr self.random_seed = args.seed self.initializer = self._set_initializer(args.initializer) self.optimizer = self._set_optimizer(args.optimizer) self.epochs = args.epoch self.decay = args.decay self.ks = eval(args.ks) self.sess = sess self._init_graph() print("Initialized graph") with tf.name_scope('TRAIN_LOSS'): self.train_loss = tf.placeholder(tf.float32) tf.summary.scalar('train_loss', self.train_loss) self.train_mf_loss = tf.placeholder(tf.float32) tf.summary.scalar('train_mf_loss', self.train_mf_loss) self.train_emb_loss = tf.placeholder(tf.float32) tf.summary.scalar('train_emb_loss', self.train_emb_loss) self.train_reg_loss = tf.placeholder(tf.float32) self.merged_train_loss = tf.summary.merge( tf.get_collection(tf.GraphKeys.SUMMARIES, 'TRAIN_LOSS')) with tf.name_scope('TEST_ACC'): if args.eval_method == 'loo': self.test_hr_first = tf.placeholder(tf.float32) tf.summary.scalar('test_hr_first', self.test_hr_first) self.test_mrr_first = tf.placeholder(tf.float32) tf.summary.scalar('test_mrr_first', self.test_mrr_first) self.test_hr_last = tf.placeholder(tf.float32) tf.summary.scalar('test_hr_last', self.test_hr_last) self.test_mrr_last = tf.placeholder(tf.float32) tf.summary.scalar('test_mrr_last', self.test_mrr_last) elif args.eval_method == 'fold': self.test_precision_first = tf.placeholder(tf.float32) tf.summary.scalar('test_precision_first', self.test_precision_first) self.test_recall_first = tf.placeholder(tf.float32) tf.summary.scalar('test_recall_first', self.test_recall_first) self.test_f1_first = tf.placeholder(tf.float32) tf.summary.scalar('test_f1_first', self.test_f1_first) self.test_precision_last = tf.placeholder(tf.float32) tf.summary.scalar('test_precision_last', self.test_precision_last) self.test_recall_last = tf.placeholder(tf.float32) tf.summary.scalar('test_recall_last', self.test_recall_last) self.test_f1_last = tf.placeholder(tf.float32) tf.summary.scalar('test_f1_last', self.test_f1_last) self.test_ndcg_first = tf.placeholder(tf.float32) tf.summary.scalar('test_ndcg_first', self.test_ndcg_first) self.test_ndcg_last = tf.placeholder(tf.float32) tf.summary.scalar('test_ndcg_last', self.test_ndcg_last) self.merged_test_acc = tf.summary.merge( tf.get_collection(tf.GraphKeys.SUMMARIES, 'TEST_ACC'))
def __init__(self, emb_dim, epochs, batch_size, learning_rate, topk, savefile_path): self.use_dropout = False self.keep_prob = 0.6 self.use_l2 = True self.random_seed = 2021 self.data = LoadMovieLens() self.emb_dim = emb_dim self.epochs = epochs self.batch_size = batch_size self.learning_rate = learning_rate self._init_graph() random.seed(self.random_seed) self.topk = topk self.evaluator = evaluator() self.savefile_path = savefile_path
def __init__(self, dataframe_path, test_path, userid_column_name, itemid_column_name, ks): self.dataframe = pd.read_csv( dataframe_path + '.txt', sep=',', ) self.test_df = pd.read_csv( test_path + '.txt', sep=',', ) self.evaluator = evaluator() pop_series = self.dataframe[itemid_column_name].value_counts() for k in ks: k_pop_series = pop_series.head(k) k_pop_ids = k_pop_series.index.values.tolist() user_ground_truth_dict = construct_user_ground_truth_dict( self.test_df, userid_column_name, itemid_column_name) user_top_k_dict = {} for id in self.test_df[userid_column_name].unique(): user_top_k_dict[id] = k_pop_ids precs, recs, f1s, ndcgs = [], [], [], [] for id in user_top_k_dict.keys(): prec, rec, f1, ndcg = evaluator.evaluate_one_user( self.evaluator, user_top_k_dict[id], user_ground_truth_dict[id]) precs.append(prec) recs.append(rec) f1s.append(f1) ndcgs.append(ndcg) precision_value = sum(precs) / len(precs) recall_value = sum(recs) / len(recs) f1_value = sum(f1s) / len(f1s) ndcg_value = sum(ndcgs) / len(ndcgs) print(k) print(f"Precision: {precision_value}") print(f"Recall: {recall_value}") print(f"F1: {f1_value}") print(f"NDCG: {ndcg_value}")
def get_score(pred, tgt, testing, eval_type="unlabeled"): print_verbose = True if testing and print_verbose: print(tgt.ID) verbose = True units = True else: verbose = False units = False score = evaluator(pred, tgt, eval_types=(eval_type), verbose=verbose, units=units) unlabeled, unlabeled_remote = get_results(score, "unlabeled") if eval_type == "labeled": labeled, labeled_remote = get_results(score, "labeled") else: labeled, labeled_remote = get_results(score, "unlabeled") return labeled, unlabeled, labeled_remote, unlabeled_remote
def test(self, t_x1, t_x2, t_y_raw, t_y_gold, idx2tag, idx2char, unk_chars, sub_dict, trans_dict, sess, transducer, ensemble=None, batch_size=100, sent_seg=False, bias=-1, outpath=None, trans_type='mix', test_result_path=None): chars = toolbox.decode_chars_new(t_x1[0], idx2char) gold_out = t_y_gold for i in range(len(t_x1[0])): for j, n in enumerate(t_x1[0][i]): if n in sub_dict: t_x1[0][i][j] = sub_dict[n] elif n in unk_chars: t_x1[0][i][j] = 1 for i in range(len(t_x2[0])): for j, n in enumerate(t_x2[0][i]): if n in sub_dict: t_x2[0][i][j] = sub_dict[n] elif n in unk_chars: t_x2[0][i][j] = 1 transducer_dict = None if transducer is not None: char2idx = {v: k for k, v in idx2char.items()} def transducer_dict(trans_str): return self.define_transducer_dict(trans_str, char2idx, sess[-1], transducer) if bias < 0: argmax = True else: argmax = False t_x = t_x1 + t_x2 # pdb.set_trace() prediction = self.predict(data_v=t_x, sess=sess, model=self.input_v1[0] + self.input_v2[0] + self.output[0], index=0, argmax=argmax, batch_size=batch_size, ensemble=ensemble) if bias >= 0 and self.crf == 0: prediction = [toolbox.biased_out(prediction[0], bias)] predictions = toolbox.decode_tags(prediction, idx2tag) # pdb.set_trace() if self.is_space == 'sea': prediction_out, raw_out = toolbox.generate_output_sea( chars, predictions) else: prediction_out, raw_out = toolbox.generate_output( chars, predictions, trans_dict, transducer_dict, trans_type=trans_type) if sent_seg: scores = evaluation.evaluator(prediction_out, gold_out, raw_out, t_y_raw) else: scores = evaluation.evaluator(prediction_out, gold_out, verbose=True) if outpath is not None: wt = codecs.open(outpath, 'w', encoding='utf-8') for pre in prediction_out[0]: wt.write(pre + '\n') wt.close() if test_result_path is not None: wt = codecs.open(test_result_path, 'w', encoding='utf-8') if sent_seg: wt.write('Sentence segmentation:' + '\n') wt.write('F score: %f' % scores[5] + '\n') wt.write('Precision: %f' % scores[3] + '\n') wt.write('Recall: %f\n' % scores[4] + '\n') wt.write('Word segmentation:' + '\n') wt.write('F score: %f' % scores[2] + '\n') wt.write('Precision: %f' % scores[0] + '\n') wt.write('Recall: %f\n' % scores[1] + '\n') else: wt.write('F score: %f' % scores[2] + '\n') wt.write('Precision: %f' % scores[0] + '\n') wt.write('Recall: %f\n' % scores[1] + '\n') wt.write('True negative rate: %f' % scores[3] + '\n') wt.close() print 'Evaluation scores:' if sent_seg: print 'Sentence segmentation:' print 'F score: %f' % scores[5] print 'Precision: %f' % scores[3] print 'Recall: %f\n' % scores[4] print 'Word segmentation:' print 'F score: %f' % scores[2] print 'Precision: %f' % scores[0] print 'Recall: %f\n' % scores[1] else: print 'Precision: %f' % scores[0] print 'Recall: %f' % scores[1] print 'F score: %f' % scores[2] print 'True negative rate: %f' % scores[3]
def train(self, t_x1, t_x2, t_y, v_x1, v_x2, v_y_raw, v_y_gold, idx2tag, idx2char, unk_chars, trans_dict, sess, epochs, trained_model, transducer=None, lr=0.05, decay=0.05, decay_step=1, sent_seg=False, outpath=None): lr_r = lr best_epoch = 0 best_score = [0] * 6 chars = toolbox.decode_chars_new(v_x1[0], idx2char) for i in range(len(v_x1[0])): for j, n in enumerate(v_x1[0][i]): if n in unk_chars: v_x1[0][i][j] = 1 for i in range(len(v_x2[0])): for j, n in enumerate(v_x2[0][i]): if n in unk_chars: v_x2[0][i][j] = 1 for i in range(len(t_x1[0])): for k in range(len(t_x1[0][i])): for j, n in enumerate(t_x1[0][i][k]): if n in unk_chars: t_x1[0][i][k][j] = 1 for i in range(len(t_x2[0])): for k in range(len(t_x2[0][i])): for j, n in enumerate(t_x2[0][i][k]): if n in unk_chars: t_x2[0][i][k][j] = 1 transducer_dict = None if transducer is not None: char2idx = {k: v for v, k in idx2char.items()} def transducer_dict(trans_str): return self.define_transducer_dict(trans_str, char2idx, sess[-1], transducer) for epoch in range(epochs): print 'epoch: %d' % (epoch + 1) t = time() if epoch % decay_step == 0 and decay > 0: lr_r = lr / (1 + decay * (epoch / decay_step)) # #(Pdb) print(np.array(t_x1[0]).shape) # (7,) # (Pdb) print(np.array(t_x1[0][0]).shape) # (5719, 50) # (Pdb) print(np.array(t_x1[0][1]).shape) # (5473, 100) # (Pdb) print(np.array(t_x1[0][2]).shape) # (3135, 150) # (Pdb) print(np.array(t_x1[0][3]).shape) # (1323, 200) # (Pdb) print(np.array(t_x1[0][4]).shape) # (538, 250) # (Pdb) print(np.array(t_x1[0][5]).shape) # (351, 300) # (Pdb) print(np.array(t_x1[0][6]).shape) # (3, 300) # (Pdb) print(np.array(t_x1[0][7]).shape) # # data_list = t_x1 + t_x2 + t_y samples = zip(*data_list) random.shuffle(samples) # pdb.set_trace() for sample in samples: c_len = len(sample[0][0]) idx = self.bucket_dit[c_len] real_batch_size = self.num_gpus * self.batch_size model = self.input_v1[idx] + self.input_v2[idx] + self.output_[ idx] # pdb.set_trace() Batch.train(sess=sess[0], model=model, batch_size_h=self.batch_size_h, batch_size=self.real_batches[idx], config=self.train_step[idx], lr=self.l_rate, lrv=lr_r, dr=self.drop_out, drv=self.drop_out_v, data=list(sample), verbose=False, num_gpus=self.num_gpus) predictions = [] #for v_b_x in zip(*v_x): c_len = len(v_x1[0][0]) idx = self.bucket_dit[c_len] data_v = v_x1 + v_x2 b_prediction = self.predict(data_v, sess=sess, model=self.input_v1[idx] + self.input_v2[idx] + self.output[idx], index=idx, argmax=True, batch_size=200) # pdb.set_trace() b_prediction = toolbox.decode_tags(b_prediction, idx2tag) predictions.append(b_prediction) # pdb.set_trace() predictions = zip(*predictions) predictions = toolbox.merge_bucket(predictions) if self.is_space == 'sea': prediction_out, raw_out = toolbox.generate_output_sea( chars, predictions) else: prediction_out, raw_out = toolbox.generate_output( chars, predictions, trans_dict, transducer_dict) if sent_seg: scores = evaluation.evaluator(prediction_out, v_y_gold, raw_out, v_y_raw) else: scores = evaluation.evaluator(prediction_out, v_y_gold) if sent_seg: c_score = scores[2] * scores[5] c_best_score = best_score[2] * best_score[5] else: c_score = scores[2] c_best_score = best_score[2] if c_score > c_best_score: best_epoch = epoch + 1 best_score = scores self.saver.save(sess[0], trained_model, write_meta_graph=False) if outpath is not None: wt = codecs.open(outpath, 'w', encoding='utf-8') for pre in prediction_out[0]: wt.write(pre + '\n') wt.close() if sent_seg: print 'Sentence segmentation:' print 'F score: %f\n' % scores[5] print 'Word segmentation:' print 'F score: %f' % scores[2] else: print 'F score: %f' % c_score print 'Time consumed: %d seconds' % int(time() - t) print 'Training is finished!' if sent_seg: print 'Sentence segmentation:' print 'Best F score: %f' % best_score[5] print 'Best Precision: %f' % best_score[3] print 'Best Recall: %f\n' % best_score[4] print 'Word segmentation:' print 'Best F score: %f' % best_score[2] print 'Best Precision: %f' % best_score[0] print 'Best Recall: %f\n' % best_score[1] else: print 'Best F score: %f' % best_score[2] print 'Best Precision: %f' % best_score[0] print 'Best Recall: %f\n' % best_score[1] print 'Best epoch: %d' % best_epoch
def train(self, t_x, t_y, v_x, v_y_raw, v_y_gold, idx2tag, idx2char, unk_chars, trans_dict, sess, epochs, trained_model, transducer=None, lr=0.05, decay=0.05, decay_step=1, sent_seg=False, outpath=None): lr_r = lr best_epoch = 0 best_score = [0] * 6 chars = toolbox.decode_chars(v_x[0], idx2char) for i in range(len(v_x[0])): for j, n in enumerate(v_x[0][i]): if n in unk_chars: v_x[0][i][j] = 1 for i in range(len(t_x[0])): for k in range(len(t_x[0][i])): for j, n in enumerate(t_x[0][i][k]): if n in unk_chars: t_x[0][i][k][j] = 1 transducer_dict = None if transducer is not None: char2idx = {k: v for v, k in idx2char.items()} def transducer_dict(trans_str): return self.define_transducer_dict(trans_str, char2idx, sess[-1], transducer) for epoch in range(epochs): print('epoch: %d' % (epoch + 1)) sys.stdout.flush() t = time() if epoch % decay_step == 0 and decay > 0: lr_r = lr / (1 + decay * (epoch / decay_step)) data_list = t_x + t_y samples = list(zip(*data_list)) random.shuffle(samples) for sample in samples: c_len = len(sample[0][0]) idx = self.bucket_dit[c_len] real_batch_size = self.real_batches[idx] model = self.input_v[idx] + self.output_[idx] Batch.train(sess=sess[0], model=model, batch_size=real_batch_size, config=self.train_step[idx], lr=self.l_rate, lrv=lr_r, dr=self.drop_out, drv=self.drop_out_v, data=list(sample), verbose=False) predictions = [] #for v_b_x in zip(*v_x): c_len = len(v_x[0][0]) idx = self.bucket_dit[c_len] b_prediction = self.predict(data=v_x, sess=sess, model=self.input_v[idx] + self.output[idx], index=idx, argmax=True, batch_size=200) b_prediction = toolbox.decode_tags(b_prediction, idx2tag) predictions.append(b_prediction) predictions = list(zip(*predictions)) predictions = toolbox.merge_bucket(predictions) if self.is_space == 'sea': prediction_out, raw_out = toolbox.generate_output_sea( chars, predictions) else: prediction_out, raw_out = toolbox.generate_output( chars, predictions, trans_dict, transducer_dict) if sent_seg: scores = evaluation.evaluator(prediction_out, v_y_gold, raw_out, v_y_raw) else: scores = evaluation.evaluator(prediction_out, v_y_gold) if sent_seg: c_score = scores[2] * scores[5] c_best_score = best_score[2] * best_score[5] else: c_score = scores[2] c_best_score = best_score[2] if c_score > c_best_score: best_epoch = epoch + 1 best_score = scores self.saver.save(sess[0], trained_model, write_meta_graph=True) if outpath is not None: wt = codecs.open(outpath, 'w', encoding='utf-8') for pre in prediction_out[0]: wt.write(pre + '\n') wt.close() if sent_seg: print('Sentence segmentation F-score: %f' % scores[5]) print('Word segmentation F-score: %f' % scores[2]) else: print('F score: %f' % c_score) print('Time consumed: %d seconds\n' % int(time() - t)) sys.stdout.flush() print('Training is finished!') if sent_seg: print('Sentence segmentation:') print('Best F score: %f' % best_score[5]) print('Best Precision: %f' % best_score[3]) print('Best Recall: %f\n' % best_score[4]) print('Word segmentation:') print('Best F score: %f' % best_score[2]) print('Best Precision: %f' % best_score[0]) print('Best Recall: %f\n' % best_score[1]) else: print('Best F score: %f' % best_score[2]) print('Best Precision: %f' % best_score[0]) print('Best Recall: %f\n' % best_score[1]) print('Best epoch: %d' % best_epoch)
def __init__(self, dataframe_path, loo_path, userid_column_name, itemid_column_name, ks): if dataframe_path == 'FRAPPEloo_out.txt': self.context_list = [ 'weekday', 'timeofday', 'isweekend', 'weather' ] elif dataframe_path == 'yelpon': self.context_list = ['month', 'day_of_week', 'timeofday', 'hour'] elif dataframe_path == 'yelpnc': self.context_list = ['month', 'day_of_week', 'timeofday', 'hour'] self.dataframe = pd.read_csv( 'Data/' + dataframe_path, sep=',', ) self.loo_df = pd.read_csv( 'Data/' + loo_path, sep=',', ) self.evaluator = evaluator() # Get popular items in case the context does not have enough items to fill a list of k size pop_series = self.dataframe[itemid_column_name].value_counts() pop_ids = pop_series.index.values.tolist() user_top_k_dict = {} for _, row in self.loo_df.iterrows(): context_df = self.loo_df # Get the current context of the interaction interaction_context = [] for context in self.context_list: interaction_context.append(row[context]) # Filter test set such that only interactions in that context appear for index in range(len(self.context_list)): context_df = context_df[(context_df[self.context_list[index]] == interaction_context[index])] # Make initial top k dict from the popular items in the context pop_context_series = context_df[itemid_column_name].value_counts() user_top_k_dict[row[ userid_column_name]] = pop_context_series.index.values.tolist( ) user_ground_truth_dict = construct_user_ground_truth_dict( self.loo_df, userid_column_name, itemid_column_name) for k in ks: # Ensure top k dick has values that are k length # If the value is shorter than k, pad it with the most popular items not already in the list, regardless of context for key, value in user_top_k_dict.items(): if len(value) < k: for id in pop_ids: if id not in value and len(user_top_k_dict[key]) < k: user_top_k_dict[key].append(id) else: user_top_k_dict[key] = user_top_k_dict[key][:k] hr, ndcg, mrr = evaluator.evaluate_loo_no_sort( self.evaluator, user_top_k_dict, user_ground_truth_dict, k) print(k) print(f"HR: {hr}") print(f"NDCG: {ndcg}") print(f"MRR: {mrr}")
def evalute_score(pred, tgt): score = evaluator(pred, tgt, eval_types=("unlabeled")) # score = evaluator(pred, tgt, verbose=True, units=True, eval_types=("unlabeled")) score.print("unlabeled")