def api(label_tags, test_y, y_scores, all_ids): eval_samples = [] for sample in range(test_y.shape[0]): if (test_y[sample, :] == np.ones(test_y.shape[1])).any(): eval_samples.append(sample) test_y, y_scores = test_y[eval_samples, :], y_scores[eval_samples, :] ev = Evaluation(y_scores, None, test_y) all_rankedat10_tags = [] query_ids = [] for sample_id, sample_output in zip(eval_samples, y_scores): q_id = all_ids[sample_id] query_ids.append(q_id) cols = np.argsort(sample_output)[-10:] rankedat10_tags = [] for col in cols[::-1]: label_name = label_tags[col] rankedat10_tags.append(label_name) all_rankedat10_tags.append(rankedat10_tags) all_Pat5, all_Pat10, all_Rat5, all_Rat10 = \ ev.Precision(5, True), ev.Precision(10, True), ev.Recall(5, True), ev.Recall(10, True) upper_bounds_pat5 = ev.upper_bound(5, True) upper_bounds_pat10 = ev.upper_bound(10, True) all_MAP = ev.MeanAveragePrecision(True) assert len(all_Pat5) == len(all_rankedat10_tags) R = (query_ids, all_rankedat10_tags, list(all_Pat5), list(all_Pat10), list(all_Rat5), list(all_Rat10), upper_bounds_pat5, upper_bounds_pat10, all_MAP) raw_corpus = myio.read_corpus(args.corpus_w_tags, with_tags=True) with open(args.results_file, 'w') as f: for i in range(len(R[0])): query_id, rankedat10_tags, Pat5, Pat10, Rat5, Rat10, UB5, UB10, MAP = \ R[0][i], R[1][i], R[2][i], R[3][i], R[4][i], R[5][i], R[6][i], R[7][i], R[8][i] real_tags = raw_corpus[str(query_id)][2] real_tags = list(set(real_tags) & set(label_tags)) real_tags = " ".join([str(x) for x in real_tags]) rankedat10_tags = " ".join([str(x) for x in rankedat10_tags]) f.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( query_id, real_tags, rankedat10_tags, Pat5, Pat10, Rat5, Rat10, UB5, UB10, MAP))
def evaluate(test_y, y_scores, verbose=0, tag_names=None): """------------------------------------------remove ill evaluation-------------------------------------------""" # eval_labels = [] # for label in range(test_y.shape[1]): # if (test_y[:, label] == np.ones(test_y.shape[0])).any(): # eval_labels.append(label) eval_samples = [] for sample in range(test_y.shape[0]): if (test_y[sample, :] == np.ones(test_y.shape[1])).any(): eval_samples.append(sample) test_y, y_scores = test_y[eval_samples, :], y_scores[eval_samples, :] # test_y, y_scores = test_y[:, eval_labels], y_scores[:, eval_labels] ev = Evaluation(y_scores, None, test_y) EVAL_LABELS = set() for sample_id, sample_scores in zip(eval_samples, y_scores): cols = np.argsort(sample_scores)[-10:] for col in cols[::-1]: label_name = tag_names[col] EVAL_LABELS.add(label_name) mat = ev.ConfusionMatrix(5) eval_labels = list(EVAL_LABELS & set(TOP50LABELS)) print_matrix( mat, tag_names, 'Confusion:True Tag on x-axis, False Tag on y-axis', some_labels=eval_labels, ) if verbose: print 'P@1: {}\tP@3: {}\tP@5: {}\tP@10: {}\tR@1: {}\tR@3: {}\tR@5: {}\tR@10: {}\tUBP@5: {}\tUBP@10: {}\tMAP: {}\n'.format( ev.Precision(1), ev.Precision(3), ev.Precision(5), ev.Precision(10), ev.Recall(1), ev.Recall(3), ev.Recall(5), ev.Recall(10), ev.upper_bound(5), ev.upper_bound(10), ev.MeanAveragePrecision()) return ev.Recall(10)
def evaluate(test_x, test_y, model): """""" """------------------------------------------remove ill evaluation-------------------------------------------""" eval_samples = [] for sample in range(test_y.shape[0]): if (test_y[sample, :] == np.ones(test_y.shape[1])).any(): eval_samples.append(sample) print '\n{} samples ouf of {} will be evaluated (zero-labeled-samples removed).'.format(len(eval_samples), test_y.shape[0]) print type(test_y), test_y.shape test_x = test_x[eval_samples, :] test_y = test_y[eval_samples, :] # test_y = test_y[:, eval_labels] print test_x.shape, test_x.dtype, type(test_x), test_y.shape, test_y.dtype, type(test_y) """------------------------------------------remove ill evaluation-------------------------------------------""" y_scores = model.predict_proba(test_x) # probability for each class predictions = model.predict(test_x) # 1 or 0 for each class ev = Evaluation(y_scores, predictions, test_y) print 'P@1: {}\tP@3: {}\tP@5: {}\tP@10: {}\tR@1: {}\tR@3: {}\tR@5: {}\tR@10: {}\tUBP@5: {}\tUBP@10: {}\tMAP: {}\n'.format( ev.Precision(1), ev.Precision(3), ev.Precision(5), ev.Precision(10), ev.Recall(1), ev.Recall(3), ev.Recall(5), ev.Recall(10), ev.upper_bound(5), ev.upper_bound(10), ev.MeanAveragePrecision() ) """------------------------------------------remove ill evaluation-------------------------------------------""" print 'outputs before ', y_scores.shape eval_labels = [] for label in range(test_y.shape[1]): if (test_y[:, label] == np.ones(test_y.shape[0])).any(): eval_labels.append(label) print '\n{} labels out of {} will be evaluated (zero-sampled-labels removed).'.format(len(eval_labels), test_y.shape[1]) y_scores, predictions, targets = y_scores[:, eval_labels], predictions[:, eval_labels], test_y[:, eval_labels] print 'outputs after ', y_scores.shape ev = Evaluation(y_scores, predictions, targets) print 'precision recall f1 macro: {}'.format(ev.precision_recall_fscore('macro')) print 'precision recall f1 micro: {}'.format(ev.precision_recall_fscore('micro'))
def evaluate(self, data, tag_names, folder, session): all_ids = [] eval_func = self.predict_func outputs, targets = [], [] for ids, idts, idbs, tags in data: all_ids += ids output = eval_func(idts, idbs, session) outputs.append(output) targets.append(tags) outputs = np.vstack(outputs) targets = np.vstack(targets).astype(np.int32) # it was dtype object """------------------------------------------remove ill evaluation-------------------------------------------""" eval_samples = [] for sample in range(targets.shape[0]): if (targets[sample, :] == np.ones(targets.shape[1])).any(): eval_samples.append(sample) print '\n{} samples ouf of {} will be evaluated (zero-labeled-samples removed).'.format( len(eval_samples), outputs.shape[0]) outputs, targets = outputs[eval_samples, :], targets[eval_samples, :] """------------------------------------------remove ill evaluation-------------------------------------------""" ev = Evaluation(outputs, None, targets) all_rankedat10_tags = [] query_ids = [] # EVAL_LABELS = set() for sample_id, sample_output in zip(eval_samples, outputs): q_id = all_ids[sample_id] query_ids.append(q_id) cols = np.argsort(sample_output)[-10:] rankedat10_tags = [] for col in cols[::-1]: # label_id = eval_labels[col] # label_name = tag_names[label_id] label_name = tag_names[col] # EVAL_LABELS.add(label_name) rankedat10_tags.append(label_name) all_rankedat10_tags.append(rankedat10_tags) # eval_labels = list(EVAL_LABELS & set(TOP50LABELS)) all_Pat5, all_Pat10, all_Rat5, all_Rat10 = \ ev.Precision(5, True), ev.Precision(10, True), ev.Recall(5, True), ev.Recall(10, True) upper_bounds_pat5 = ev.upper_bound(5, True) upper_bounds_pat10 = ev.upper_bound(10, True) all_MAP = ev.MeanAveragePrecision(True) assert len(all_Pat5) == len(all_rankedat10_tags) # mat = ev.ConfusionMatrix(5) # print_matrix( # mat, # tag_names, # 'Confusion:True Tag on x-axis, False Tag on y-axis', # folder, # some_labels=eval_labels, # ) # mat = ev.CorrelationMatrix() # print_matrix( # mat, # tag_names, # 'Correlation: True Tag on both axis', # folder, # some_labels=eval_labels # ) print 'average: P@5: {} P@10: {} R@5: {} R@10: {} UBP@5: {} UBP@10: {} MAP: {}'.format( ev.Precision(5), ev.Precision(10), ev.Recall(5), ev.Recall(10), ev.upper_bound(5), ev.upper_bound(10), ev.MeanAveragePrecision()) """------------------------------------------remove ill evaluation-------------------------------------------""" print 'outputs before ', outputs.shape eval_labels = [] for label in range(targets.shape[1]): if (targets[:, label] == np.ones(targets.shape[0])).any(): eval_labels.append(label) print '\n{} labels out of {} will be evaluated (zero-sampled-labels removed).'.format( len(eval_labels), targets.shape[1]) outputs, targets = outputs[:, eval_labels], targets[:, eval_labels] print 'outputs after ', outputs.shape predictions = np.where(outputs > 0.5, np.ones_like(outputs), np.zeros_like(outputs)) ev = Evaluation(outputs, predictions, targets) print 'precision recall f1 macro: {}'.format( ev.precision_recall_fscore('macro')) print 'precision recall f1 micro: {}'.format( ev.precision_recall_fscore('micro')) return query_ids, all_rankedat10_tags, list(all_Pat5), list(all_Pat10), list(all_Rat5), list(all_Rat10), \ upper_bounds_pat5, upper_bounds_pat10, all_MAP