def bestfWA(eta, mu, lossfunc, method): k = eta.shape[0] fFC = np.zeros(k) fFS = -1 if method == 'F': # res = scipy.optimize.minimize(weightedAvg, (1, 1), args=(eta, mu), bounds=((-1, 1), (-1, 1))) ini = np.random.uniform(-1, 1, k) bounds = ((-1, 1), ) for i in range(k - 1): bounds += ((-1, 1), ) res = scipy.optimize.minimize(precwrap, ini, args=(eta, mu), bounds=bounds) fFC = res.x fFS = metrics.precision(fFC, eta, mu) elif method == 'T': f = np.ones(k) for i in range(pow(2, k)): f = np.array([i, j, k]) * 1. / N f = 2 * f - 1 loss = metrics.precision(f, eta, mu) if loss > fFS: fFC = f fFS = loss return (fFC, fFS)
def evaluate(path): cad = read_dataset('cad.csv') rgbd = read_dataset('rgbd.csv') freqs = freq_count(cad) results = load_results(path, rgbd, cad) mP = 0.0 mR = 0.0 mF = 0.0 mAP = 0.0 mNDCG = 0.0 mNNT1 = 0.0 mNNT2 = 0.0 for (queried, retrieved) in results: f = freqs[queried[0]] x = categories_to_rel(queried, retrieved)[:f] # Sum up the retrieval scores mP += precision(x) mR += recall(x, f) mF += f1score(x, f) mNDCG += ndcg(x) mAP += average_precision(x, f) mNNT1 += nnt1(x, f) mNNT2 += nnt2(x, f) n = len(results) print('num queries:', n) print('mean precision:', mP / n) print('mean recall:', mR / n) print('mean F1:', mF / n) print('mean AP:', mAP / n) print('mean NDCG: ', mNDCG / n) print('mean NNT1: ', mNNT1 / n) print('mean NNT2: ', mNNT2 / n) # Plot PR-curve cutoff = 1000 mean_precisions = np.zeros(cutoff, np.float64) mean_recalls = np.zeros(cutoff, np.float64) for (queried, retrieved) in results: x = categories_to_rel(queried, retrieved)[:cutoff] x = np.pad(x, (0, cutoff - len(x)), 'constant', constant_values=(0)) precisions = [] recalls = [] for k, _ in enumerate(x): p = precision(x[:k + 1]) r = recall(x[:k + 1], freqs[queried[0]]) precisions.append(p) recalls.append(r) mean_precisions += precisions mean_recalls += recalls mean_precisions /= len(results) mean_recalls /= len(results) plt.plot(mean_recalls, mean_precisions) plt.xlabel('Recall') plt.ylabel('Precision') plt.axis([0, 1, 0, 1.05]) plt.show()
def evaluate(path): queries = read_dataset('queries.csv') targets = read_dataset('targets.csv') freqs = freq_count(targets) results = load_results(path, queries, targets) cutoff = 1000 precisions = [] recalls = [] f1scores = [] aps = [] gains = [] nnt1s = [] nnt2s = [] for (queried, retrieved) in results: x = categories_to_rel(queried, retrieved)[:cutoff] p = precision(x) r = recall(x, freqs[queried[0]]) f = f1score(x, freqs[queried[0]]) g = ndcg(x) ap = average_precision(x, freqs[queried[0]]) t1 = nnt1(x, freqs[queried[0]]) t2 = nnt2(x, freqs[queried[0]]) precisions.append(p) recalls.append(r) f1scores.append(f) gains.append(g) aps.append(ap) nnt1s.append(t1) nnt2s.append(t2) print('mean precision:', numpy.mean(precisions)) print('mean recall:', numpy.mean(recalls)) print('mean F1 score:', numpy.mean(f1scores)) print('mAP:', numpy.mean(aps)) print('mean NDCG:', numpy.mean(gains)) print('mean nearest neighbor:', numpy.mean(nnt1s), numpy.mean(nnt2s)) # plot precision-recall curve mean_precisions = numpy.zeros(cutoff, numpy.float64) mean_recalls = numpy.zeros(cutoff, numpy.float64) for (queried, retrieved) in results: x = categories_to_rel(queried, retrieved)[:cutoff] x = numpy.pad(x, (0, cutoff - len(x)), 'constant', constant_values=(0)) precisions = [] recalls = [] for k, _ in enumerate(x): p = precision(x[:k + 1]) r = recall(x[:k + 1], freqs[queried[0]]) precisions.append(p) recalls.append(r) mean_precisions += precisions mean_recalls += recalls mean_precisions /= len(results) mean_recalls /= len(results) plt.plot(mean_recalls, mean_precisions) plt.xlabel('Recall') plt.ylabel('Precision') plt.axis([0, 1, 0, 1.05]) plt.show()
def evaluate_test_set(session, tags, preds, fnames, lines, batch_limit=None): batch_num = 0 num_sequences = 0 p_tp_total, p_fp_total, r_tp_total, r_fn_total = 0, 0, 0, 0 p_tp_total_binary, p_fp_total_binary, r_tp_total_binary, r_fn_total_binary = 0, 0, 0, 0 while True: try: #Train binary, eval binary setting y, y_, filenames, line_nums = \ session.run([tags, preds, fnames, lines]) p_tp, p_fp = metrics.precision(reader, y, y_, counts=True) r_tp, r_fn = metrics.recall(reader, y, y_, counts=True) p_tp_total += p_tp p_fp_total += p_fp r_tp_total += r_tp r_fn_total += r_fn #Train All tags, eval binary setting p_tp_binary, p_fp_binary = metrics.precision(reader, y, y_, binary=True, counts=True) r_tp_binary, r_fn_binary = metrics.recall(reader, y, y_, binary=True , counts=True) p_tp_total_binary += p_tp_binary p_fp_total_binary += p_fp_binary r_tp_total_binary += r_tp_binary r_fn_total_binary += r_fn_binary #TODO: Train binary, eval binary setting num_sequences += len(y) batch_num += 1 if batch_num == batch_limit: break except tf.errors.OutOfRangeError: print 'test queue is empty' break if p_tp_total: precision = p_tp_total / (p_tp_total + p_fp_total) recall = r_tp_total / (r_tp_total + r_fn_total) f1 = metrics.f1(precision, recall) precision_binary = p_tp_total_binary / (p_tp_total_binary + p_fp_total_binary) recall_binary = r_tp_total_binary / (r_tp_total_binary + r_fn_total_binary) f1_binary = metrics.f1(precision_binary, recall_binary) print 'Evaluated {} sequences from test set'.format(num_sequences) print 'Precision: ', precision print 'Recall: ', recall print 'f1: ', f1 print 'Precision Binary: ', precision_binary print 'Recall Binary: ', recall_binary print 'f1 Binary: ', f1_binary
def test(self): tf.global_variables_initializer().run() self.saver = tf.train.Saver() could_load, checkpoint_counter = self.load(self.checkpoint_dir) if could_load: print(" [*] Load SUCCESS") else: print(" [!] Load failed...") test_feed_dict = { self.test_inptus: self.test_x, self.test_labels: self.test_y } summary_str, test_loss, test_accuracy, p, t = self.sess.run( [ self.test_summary, self.test_loss, self.test_accuracy, self.test_plab, self.test_tlab ], feed_dict=test_feed_dict) import metrics print("test_accuracy: {}".format(test_accuracy)) with open('resnet.txt', 'a') as f: # 设置文件对象 f.write( str(self.i) + '-' + str(self.j) + ',' + str(metrics.accuracy(t, p)) + ',' + str(metrics.precision(t, p)) + ',' + str(metrics.recall(t, p)) + ',' + str(metrics.f1score(t, p)) + ',' + str(metrics.ft(t, p)) + '\n')
def classification_report(y_true, y_pred): print('--------------------------------') print('Accuracy -', metrics.accuracy(y_true, y_pred)) print('Recall -', metrics.recall(y_true, y_pred)) print('Precision -', metrics.precision(y_true, y_pred)) print('F1 score -', metrics.f1_score(y_true, y_pred)) print('--------------------------------')
def test_small_cross(): ds = DataSet('../datasets/', 'test', 'small-cross') print('DS: {}; iterations: {}'.format(ds.name, ds.set_count)) for i in range(1, ds.set_count + 1): print("ITER #{}".format(i)) trn, tst = ds.get_dataset(i) print('\tTRAIN: {}'.format(trn)) print('\tTEST: {}'.format(tst)) trns, tsts = utils.get_edges_set(trn), utils.get_edges_set(tst) scores = get_small_scores() auc_res_tot = mtr.auc(ds.vx_count, trns, tsts, scores) auc_res_010 = mtr.auc(ds.vx_count, trns, tsts, scores, 10) auc_res_100 = mtr.auc(ds.vx_count, trns, tsts, scores, 100) auc_res_01k = mtr.auc(ds.vx_count, trns, tsts, scores, 1000) # auc_res_10k = mtr.auc(ds.vx_count, trns, tsts, scores, 10000) # auc_res_1ck = mtr.auc(ds.vx_count, trns, tsts, scores, 100000) # auc_res_01m = mtr.auc(ds.vx_count, trns, tsts, scores, 1000000) prc_res_002 = mtr.precision(ds.vx_count, trns, tsts, scores, 2) print('\tMETRICS:') print('\t\t-> AUC___TOT: {:.04}'.format(auc_res_tot)) # expected: 0.67 print('\t\t-> AUC____10: {:.04}'.format(auc_res_010)) print('\t\t-> AUC___100: {:.04}'.format(auc_res_100)) print('\t\t-> AUC____1K: {:.04}'.format(auc_res_01k)) # print('\t\t-> AUC___10K: {:.04}'.format(auc_res_10k)) # print('\t\t-> AUC__100K: {:.04}'.format(auc_res_1ck)) # print('\t\t-> AUC____1M: {:.04}'.format(auc_res_01m)) print('\t\t-> PREC____2: {:.04}'.format(prc_res_002)) # expected: 0.50 print()
def reduce_by_kMIQP(algoname, res, source_file, save_path=None): outputs = [] k = 10 pd = [] div = [] xa = [] for li in np.arange(0, 1, 0.05): lamb = li xa.append(lamb) for one_tuple in tqdm(res, ncols=77): r, M = _preprocess(one_tuple) if algoname == 'greedy': vx, max_res = greedy_kMIQP(r, M, lamb, k=k) elif algoname == 'gurobi': vx, max_res = gurobi_kMIQP(r, M, lamb, k=k) #vx, max_res = kMIQP(r, M, lamb, k=k) groundtruth, preds, scores = one_tuple preds = [preds[x] for x in vx] outputs.append((groundtruth, preds, max_res)) if save_path is None: prec, jacc = 0.0, 0.0 for groundtruth, preds, scores in outputs: preds = preds[:k] jacc += jaccard(preds) prec += precision(groundtruth, preds) pd.append(prec * 100 / len(outputs)) div.append(jacc / len(outputs)) else: with open(save_path, 'wb') as f: pickle.dump(outputs, f, pickle.HIGHEST_PROTOCOL) return xa, pd, div
def evaluate(path): queries = read_dataset('queries.csv') targets = read_dataset('targets.csv') freqs = freq_count(targets) results = load_results(path, queries, targets) cutoff = 1000 precisions = [] recalls = [] f1scores = [] aps = [] gains = [] nnt1s = [] nnt2s = [] for (queried, retrieved) in results: x = categories_to_rel(queried, retrieved)[:cutoff] p = precision(x) r = recall(x, freqs[queried[0]]) f = f1score(x, freqs[queried[0]]) g = ndcg(x) ap = average_precision(x, freqs[queried[0]]) t1 = nnt1(x, freqs[queried[0]]) t2 = nnt2(x, freqs[queried[0]]) precisions.append(p) recalls.append(r) f1scores.append(f) gains.append(g) aps.append(ap) nnt1s.append(t1) nnt2s.append(t2) print('precision:', p) print('recall:', r) print('F1 score:', f) print('average precision:', ap) print('NDCG:', g) print('nearest neighbor:', t1, t2)
def f1_score(y_true, y_pred): from metrics import precision, recall y_true = K.cast(y_true, dtype='float32') y_pred = K.cast(y_pred, dtype='float32') precision = precision(y_true, y_pred) recall = recall(y_true, y_pred) return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
def eval_classifier(classifier, x, y): y_pred = classifier.predict(x) conf = metrics.conf_matrix(y_pred, y) accuracy = metrics.accuracy(y_pred, y) precision = metrics.precision(y_pred, y) recall = metrics.recall(y_pred, y) f1_score = metrics.f_score(y_pred, y, beta=1) avg_prec = np.mean(precision) avg_rec = np.mean(recall) avg_f1 = np.mean(f1_score) print("Confusion Matrix: ") print(conf) print("Accuracy:") print(accuracy) print("Precision:") print(precision) print(f"Average Precision: {avg_prec}") print("Recall:") print(recall) print(f"Average Recall: {avg_rec}") print("F1_score:") print(f1_score) print(f"Average F1 Score: {avg_f1}")
def check_engine_quality(self, query_num, list_of_docs): """ :param query_num: :param list_of_docs: :return: no return. prints metrics of the query. precision, recall, map. """ benchmark_path = "data\\benchmark_lbls_train.csv" df = pd.read_csv(benchmark_path) df_prec = df[df['query'] == query_num] df_prec = df_prec[df_prec['tweet'].isin(list_of_docs)] dict_for_data = df_prec.set_index('tweet')['y_true'].to_dict() rmv_lst = [] ranking = [] # Add to list for rank for doc in list_of_docs: try: ranking.append(dict_for_data[int(doc)]) except: rmv_lst.append(doc) for d in rmv_lst: list_of_docs.remove(d) data_df = pd.DataFrame({ 'query': query_num, 'tweet': list_of_docs, 'y_true': ranking }) df_rec = df[df['query'] == query_num] recall_total = len(df_rec[df_rec['y_true'] == 1.0]) # print("total Relevant doc found with tag 1 :" , len (data_df[data_df['y_true'] == 1.0])) # print("total NON relevant doc found with tag 0 :" , len (data_df[data_df['y_true'] == 0])) # print("found total of", len(df_prec), "tagged docs") # Calculate and print prec5 = metrics.precision_at_n(data_df, query_num, 5) prec10 = metrics.precision_at_n(data_df, query_num, 10) prec50 = metrics.precision_at_n(data_df, query_num, 50) prec_total = metrics.precision(data_df, True, query_number=query_num) map_of_query = metrics.map(data_df) recall_val = metrics.recall_single(data_df, recall_total, query_num) self.map_list.append(map_of_query) self.prec5_list.append(prec5) self.prec10_list.append(prec10) self.prec50_list.append(prec50) self.prec_total_list.append(prec_total) self.recall_list.append(recall_val) print() print("precision at 5 of query", query_num, "is :", prec5) print("precision at 10 of query", query_num, "is :", prec10) print("precision at 50 of query", query_num, "is :", prec50) print("precision of query", query_num, "is :", prec_total) print("recall of query", query_num, "is :", recall_val) print("map of query", query_num, "is :", map_of_query)
def reduce_by_kMIQP(res, source_file, save_path=None): outputs = [] k = 10 pd = [] div = [] xa = [] for li in np.arange(0, 10, 0.5): lamb = li xa.append(lamb) for one_tuple in tqdm(res, ncols=77): r, M = _preprocess(one_tuple) vx, max_res = kMIQP(r, M, lamb, k=k) groundtruth, preds, scores = one_tuple preds = [preds[x] for x in vx] outputs.append((groundtruth, preds, max_res)) if save_path is None: prec, jacc = 0.0, 0.0 for groundtruth, preds, scores in outputs: jacc += jaccard(preds) prec += precision(groundtruth, preds) pd.append(prec * 100 / len(outputs)) div.append(jacc / len(outputs)) else: with open(save_path, 'wb') as f: pickle.dump(outputs, f, pickle.HIGHEST_PROTOCOL) fig = plt.figure() ax1 = fig.add_subplot(111) freqpd = [0.2599] * 20 freqdiv = [0.0444] * 20 l1, = ax1.plot(xa, freqpd, label='freq-p@' + str(k), color='darkviolet', marker='o') l2, = ax1.plot(xa, pd, label='gurobi-p@' + str(k), color='r', marker='o') #ax1.legend(loc=1) ax1.set_ylabel('p@' + str(k)) ax2 = ax1.twinx() l3, = ax2.plot(xa, div, label="gurobi-diversity", color='g', marker='*') l4, = ax2.plot(xa, freqdiv, label="freq-diversity", color='y', marker='*') #ax2.legend(loc=2) ax2.set_ylabel('diversity') ax1.set_xlabel('lamb(ele)') print(xa) print(pd) print(div) my_xticks = np.arange(0, 101, 10) plt.xticks(my_xticks) plt.legend(handles=[ l1, l2, l3, l4, ], loc='best') plt.show()
def compute_all_metrics(execution_id, path_input, path_output, formula, append): from metrics import accuracy, precision, recall, f1, specificity """ Computes all metrics and persistes in a csv Args: execution_id (int): identifier of the execution path_input (string): path of the file that contains the classifications path_out (string): path of the file that will persist the metrics formula (string): mean_max | mean_mean append (boolean): true | false """ # loading results with open(path_input) as data_file: data = json.load(data_file) # computing metrics tp = tn = fp = fn = 0 for i in range(0, len(data)): if (data[i]['values'][formula]['positive'] >= data[i]['values'][formula]['negative']): if data[i]['values']['label'] == 'positive': tp += 1 else: fp += 1 elif (data[i]['values'][formula]['positive'] < data[i]['values'][formula]['negative']): if (data[i]['values']['label'] == 'negative'): tn += 1 else: fn += 1 else: raise Exception( "Positive similarity equals to negative similarity to news " + data[i]['id']) accuracy = accuracy(tp, tn, fp, fn) recall = recall(tp, fn) precision = precision(tp, fp) f1 = f1(precision, recall) specificity = specificity(tn, fp) # persiting the results with open(path_output, 'a' if append else 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') if (not append): spamwriter.writerow([ 'execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy', 'precision', 'recall', 'f1', 'specificity' ]) spamwriter.writerow([ execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1, specificity ])
def supervised_eval(self, train_or_valid): data = self.dataset.get_labeled_data(train_or_valid) if data == None: raise ValueError('no labeled examples present in dataset') X_labeled, y_true, _ = data y_pred = self.model.predict(X_labeled) p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\ metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\ metrics.auc(y_true, y_pred) self.metrics[train_or_valid].append((p, r, ac, g, auc))
def calculate_best_threshold(df): thresholds = np.linspace(0, 1, 51) precisions_per_threshold = [] for threshold in thresholds: precisions = [] for idx in df.index: mask = df.loc[idx].masks prediction = df.loc[idx].predictions prediction_mask = np.int32(prediction > threshold) precisions.append(precision(prediction_mask, mask)) precisions_per_threshold.append(np.mean(precisions)) return thresholds[np.argmax(precisions_per_threshold)]
def test_1(self): actual = [1, 1, 0, 1, 1, 1, 0, 0, 1, 1] predicted = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0] tp, fn, fp, tn = metrics.confusion_matrix(actual, predicted) self.assertEqual(tp, 3) self.assertEqual(fn, 4) self.assertEqual(fp, 1) self.assertEqual(tn, 2) self.assertEqual(metrics.accuracy(actual, predicted), 0.5) self.assertEqual(metrics.precision(actual, predicted), 3/4) self.assertEqual(metrics.recall(actual, predicted), 3/7) self.assertEqual(metrics.f1(actual, predicted), 6/11)
def test(engine, options): queries = pd.read_csv(os.path.join('data', 'queries_train.tsv'), sep='\t') bench_lbls = pd.read_csv(os.path.join('data', 'benchmark_lbls_train.csv'), dtype={ 'query': int, 'tweet': str, 'y_true': int }) q2n_relevant = bench_lbls.groupby('query')['y_true'].sum().to_dict() queries_results = [] q_times = [] for i, row in queries.iterrows(): q_id = row['query_id'] q_keywords = row['keywords'] start_time = time.time() q_n_res, q_res = engine.search(q_keywords, options['methods']) end_time = time.time() q_time = end_time - start_time q_times.append(q_time) queries_results.extend([(q_id, str(doc_id)) for doc_id in q_res]) if q_time > 10: print(f'Query time exceeded: {options}') queries_results = pd.DataFrame(queries_results, columns=['query', 'tweet']) q_results_labeled = pd.merge(queries_results, bench_lbls, on=['query', 'tweet'], how='inner', suffixes=('_result', '_bench')) options['max_q_time'] = max(q_times) options['avg_q_time'] = sum(q_times) / len(q_times) options['MAP'] = metrics.map(q_results_labeled) options['precision'] = metrics.precision(q_results_labeled) options['precision@5'] = metrics.precision( q_results_labeled.groupby('query').head(5)) options['precision@10'] = metrics.precision( q_results_labeled.groupby('query').head(10)) options['precision@50'] = metrics.precision( q_results_labeled.groupby('query').head(50)) options['recall'] = metrics.recall(q_results_labeled, q2n_relevant) save_to_csv(options)
def calculate_metrics(self): #Documentos recuperados recovered_documents = [] count_docs = self.tableWidget_results.rowCount() for item in range(count_docs): document = self.tableWidget_results.item(item, 1).text() recovered_documents.append(document) #Documentos Relevantes relevant_documents = [] count_docs = self.tableWidget_relevant.rowCount() for item in range(count_docs): document = self.tableWidget_relevant.item(item, 1).text() relevant_box = self.tableWidget_relevant.cellWidget(item, 2) mark_box = relevant_box.findChildren(QCheckBox)[0] if mark_box.isChecked(): relevant_documents.append(document) #Documentos Recuperados Relevantes rel = set(relevant_documents) rec = set(recovered_documents) recovered_relevant_documents = list(rel.intersection(rec)) RR = len(recovered_relevant_documents) REC = len(recovered_documents) REL = len(relevant_documents) index = self.comboBox_medida.currentIndex() if index == 0: value = metrics.precision(RR, REC) elif index == 1: value = metrics.recobrado(RR, REL) elif index == 2: try: beta = float(self.lineEdit_beta.text()) value = metrics.e_medida(RR, REL, REC, beta) except (Exception): self.lineEdit_beta.setText("0") value = metrics.e_medida(RR, REL, REC, 0) elif index == 3: value = metrics.f_medida(RR, REL, REC) else: value = metrics.r_presicion(RR, REC, REL) self.lineEdit_medida.setText(str(value))
def post(self): global current_scene score = 0 print('Submitted scene %s' % current_scene) if self.scene_exists(current_scene): return { 'message': "Scene {} already exist.".format(current_scene) }, 400 correct_dict = self.fetch_correct_result() if not correct_dict: return { 'message': "There is no reuslt dataexist for scene {}.".format( current_scene) }, 400 your_dict = request.get_json() submission_time = datetime.datetime.now() print(' Correct prediction', correct_dict) your_dict = {str(k): int(v) for k, v in your_dict.items()} print(' Your prediction', your_dict) sys.stdout.flush() score = 0 score2 = 0 score3 = 0 if your_dict: #score = Benchmark.diff_dicts(correct_dict, your_dict) score = metrics.accuracy(correct_dict, your_dict) score2 = metrics.precision(correct_dict, your_dict) score3 = metrics.recall(correct_dict, your_dict) print("scene accuracy", score) print("scene precision", score2) print("scene recall", score3) submission_result = { 'scene': current_scene, 'accuracy': score, 'precision': score2, 'recall': score3 } try: self.insert(submission_result, submission_time) except: return { 'message': 'An error occured while inserting the item' }, 500 return { 'Your score for this scene is ': submission_result['accuracy'] }, 201
def precisions(self): predictions = self.one_hot size = len(self._classes) with tf.compat.v1.name_scope("Precisions"): ps = [] ops = [] for i, c in enumerate(self._classes): mask = tf.one_hot([i], size, axis=-1) p, op = precision(labels=self.target, predictions=predictions, weights=mask) tf.compat.v1.summary.scalar("c{}_{}".format(i, c), p * 100) ps.append(p) ops.append(op) return ps, ops
def calculate_precisions(df): df["precisions"] = [ precision(pm, m) for pm, m in zip(df.prediction_masks, df.masks) ] df["precisions_otsu"] = [ precision(pm, m) for pm, m in zip(df.prediction_masks_otsu, df.masks) ] df["precisions_crf"] = [ precision(pm, m) for pm, m in zip(df.prediction_masks_crf, df.masks) ] df["precisions_best"] = [ precision(pm, m) for pm, m in zip(df.prediction_masks_best, df.masks) ] mask_threshold_per_cc = {} for cc, cc_df in df.groupby("predictions_cc"): mask_threshold_per_cc[cc] = calculate_best_threshold(cc_df) df["precisions_cc"] = [ precision(np.int32(p > mask_threshold_per_cc[cc]), m) for p, m, cc in zip(df.predictions, df.masks, df.predictions_cc) ] return mask_threshold_per_cc
def precisions(self): predictions = self.one_hot size = len(self._classes) with tf.name_scope("Precisions"): ps = [] ops = [] for i, c in enumerate(self._classes): mask = tf.one_hot([i], size, axis=-1) p, op = precision(labels=self.target, predictions=predictions, weights=mask, updates_collections=tf.GraphKeys.UPDATE_OPS) tf.summary.scalar("c_{}".format(c), p * 100) ps.append(p) ops.append(op) return ps, ops
def _watch_converge(res): random.shuffle(res) tqdmInput = tqdm(res, ncols=77, leave=True) prec, jacc = 0.0, 0.0 for iter, one_tuple in enumerate(tqdmInput): r, M = _preprocess(one_tuple) vx, max_res = kMIQP(r, M, lamb=1.0, k=10) groundtruth, preds, scores = one_tuple preds = [preds[x] for x in vx] prec += precision(groundtruth, preds) jacc += jaccard(preds) tqdmInput.set_description('Prec@10: %.3f%% Div: %.3f' % (prec * 100 / (iter + 1), jacc / (iter + 1)))
def active_simulation_eval(self): data = self.dataset.get_unlabeled_data() if data == None: UserWarning( 'all examples have been labeled; this eval mode works ' 'if there is unlabeled pool of data in `simulate` mode' ) return X_unlabeled, unlabeled_indexes = data # get unlabeled examples labels in simulation with `y_ideal` y_true = self.dataset.y_ideal[unlabeled_indexes] y_pred = self.model.predict(X_unlabeled) p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\ metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\ metrics.auc(y_true, y_pred) self.metrics['simulate'].append((p, r, ac, g, auc))
def results_to_metrics(results, methods, ref_motifs): _, _, ref_labels = motif.unpack_motif(ref_motifs) metric_dict = dict.fromkeys(methods) for m in methods: obs_motifs = results[m] _, _, obs_labels = motif.unpack_motif(obs_motifs) this_edit = metrics.edit_distance(obs_labels, ref_labels) this_recall = metrics.recall(obs_motifs, ref_motifs) this_precis = metrics.precision(obs_motifs, ref_motifs) this_f = metrics.f_measure(obs_motifs, ref_motifs) this_bm = metrics.boundary_distance(obs_motifs, ref_motifs) metric_dict[m] = [this_edit, this_recall, this_precis, this_f, this_bm] return metric_dict
def compute_all_metrics(execution_id, path_input, path_output, formula, append): from metrics import accuracy, precision, recall, f1, specificity """ Computes all metrics and persistes in a csv Args: execution_id (int): identifier of the execution path_input (string): path of the file that contains the classifications path_out (string): path of the file that will persist the metrics formula (string): mean_max | mean_mean append (boolean): true | false """ # loading results with open(path_input) as data_file: data = json.load(data_file) # computing metrics tp = tn = fp = fn = 0 for i in range(0, len(data)): if (data[i]['values'][formula]['positive'] >= data[i]['values'][formula]['negative']): if data[i]['values']['label'] == 'positive': tp += 1 else: fp += 1 elif (data[i]['values'][formula]['positive'] < data[i]['values'][formula]['negative']): if (data[i]['values']['label'] == 'negative'): tn += 1 else: fn += 1 else: raise Exception("Positive similarity equals to negative similarity to news " + data[i]['id']) accuracy = accuracy(tp, tn, fp, fn) recall = recall(tp, fn) precision = precision(tp, fp) f1 = f1(precision, recall); specificity = specificity(tn, fp); # persiting the results with open(path_output, 'a' if append else 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') if (not append): spamwriter.writerow( ['execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy', 'precision', 'recall', 'f1', 'specificity']) spamwriter.writerow([execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1, specificity])
def test(model, test_inputs, test_labels): """ :param model: tf.keras.Model inherited data type model being trained :param test_input: Numpy Array - shape (num_images, imsize, imsize, channels) input images to test on :param test_labels: Numpy Array - shape (num_images, 2) ground truth labels one-hot encoded :return: float, float, float, float returns dice score, sensitivity value (0.5 threshold), specificity value (0.5 threshold), and precision value all of which are in the range [0,1] """ BATCH_SZ = model.batch_size indices = np.arange(test_inputs.shape[0]).tolist() all_logits = None for i in range(0, test_labels.shape[0], BATCH_SZ): images = test_inputs[indices[i:i + BATCH_SZ]] logits = model(images) if type(all_logits) == type(None): all_logits = logits else: all_logits = np.concatenate([all_logits, logits], axis=0) """this should break if the dataset size isnt divisible by the batch size because the for loop it runs the batches on doesnt get predictions for the remainder""" sensitivity_val1 = sensitivity(test_labels, all_logits, threshold=0.15) sensitivity_val2 = sensitivity(test_labels, all_logits, threshold=0.3) sensitivity_val3 = sensitivity(test_labels, all_logits, threshold=0.5) specificity_val1 = specificity(test_labels, all_logits, threshold=0.15) specificity_val2 = specificity(test_labels, all_logits, threshold=0.3) specificity_val3 = specificity(test_labels, all_logits, threshold=0.5) dice = dice_coef(test_labels, all_logits) precision_val = precision(test_labels, all_logits) print( "Sensitivity 0.15: {}, Senstivity 0.3: {}, Senstivity 0.5: {}".format( sensitivity_val1, sensitivity_val2, sensitivity_val3)) print("Specificity 0.15: {}, Specificity 0.3: {}, Specificity 0.5: {}". format(specificity_val1, specificity_val2, specificity_val3)) print("DICE: {}, Precision: {}".format(dice, precision_val)) return dice.numpy(), sensitivity_val3, specificity_val3, precision_val
def train(model, generator, verbose=False): """trains the model for one epoch :param model: tf.keras.Model inherited data type model being trained :param generator: BalancedDataGenerator a datagenerator which runs preprocessing and returns batches accessed by integers indexing (i.e. generator[0] returns the first batch of inputs and labels) :param verbose: boolean whether to output the dice score every batch :return: list list of losses from every batch of training """ BATCH_SZ = model.batch_size train_steps = generator.steps_per_epoch loss_list = [] for i in range(0, train_steps, 1): images, labels = generator[i] with tf.GradientTape() as tape: logits = model(images) loss = model.loss_function(labels, logits) if i % 4 == 0 and verbose: sensitivity_val = sensitivity(labels, logits) specificity_val = specificity(labels, logits) precision_val = precision(labels, logits) train_dice = dice_coef(labels, logits) print("Scores on training batch after {} training steps".format(i)) print("Sensitivity1: {}, Specificity: {}".format( sensitivity_val, specificity_val)) print("Precision: {}, DICE: {}\n".format(precision_val, train_dice)) loss_list.append(loss) gradients = tape.gradient(loss, model.trainable_variables) model.optimizer.apply_gradients( zip(gradients, model.trainable_variables)) return loss_list
def metric_fn(label_ids, predict, num_labels, answer_num): mask = tf.sequence_mask(answer_num, FLAGS.max_answer_num) precision = metrics.precision(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) recall = metrics.recall(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) f1_score = metrics.f1(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) return { "precision": precision, "recall": recall, "f1_score": f1_score }
def main(flag, k): if flag == 'clo': source_path = '../data/bundle_clo.pkl' elif flag == 'ele': source_path = '../data/bundle_ele.pkl' else: assert False with open(source_path, 'rb') as f: train_set = pickle.load(f) test_set = pickle.load(f) cate_list = pickle.load(f) bundle_map = pickle.load(f) (user_count, item_count, cate_count, bundle_count, bundle_rank, _) = pickle.load(f) gen_groundtruth_data = pickle.load(f) freq = Counter() for t in train_set: if len(bundle_map[t[2]]) >= 2: t = bundle_map[t[2]] freq.update(subsets(t)) # for i in range(len(t)): # for j in range(i+1, len(t)): # freq.update([tuple([t[i], t[j]])]) preds = freq.most_common(k) preds = [[i for i in t[0]] for t in preds] total, jacc, prec = 0, jaccard(preds), 0.0 for uid, hist, pos in gen_groundtruth_data: groundtruth = list(bundle_map[pos]) prec += precision(groundtruth, preds) total += 1 print(flag, 'P@%d: %.4f%%\tDiv: %.4f' % (k, prec*100/total, -jacc))
def main(argv = None): # pylint: disable=unused-argument # load imageset train_set_folder = os.path.join(module_dir, os.path.pardir, os.path.pardir, 'data/ocr/train') test_set_folder = os.path.join(module_dir, os.path.pardir, os.path.pardir, 'data/ocr/test') # Extract it into numpy arrays. train_data, train_labels = load_imageset(train_set_folder, to_img_size = (28, 28, 1), ext = 'png') test_data, test_labels = load_imageset(test_set_folder, to_img_size = (28, 28, 1), ext = 'png') height = train_data.shape[1] width = train_data.shape[2] channel = (train_data.shape[3] if train_data.ndim > 3 else 1) label_max = np.amax(train_labels) label_min = np.amin(train_labels) num_labels = label_max - label_min + 1 # Generate a validation set. train_data, train_labels, validation_data, validation_labels = split_cv(train_data, train_labels, 0.1) num_epochs = NUM_EPOCHS train_size = train_labels.shape[0] # This is where training samples and labels are fed to the graph. # These placeholder nodes will be fed a batch of training data at each # training step using the {feed_dict} argument to the Run() call below. train_data_node = tf.placeholder( tf.float32, shape = (BATCH_SIZE, height, width, channel)) train_labels_node = tf.placeholder(tf.int64, shape = (BATCH_SIZE,)) eval_data = tf.placeholder( tf.float32, shape=(EVAL_BATCH_SIZE, height, width, channel)) # The variables below hold all the trainable weights. They are passed an # initial value which will be assigned when we call: # {tf.initialize_all_variables().run()} conv1_weights = tf.Variable( tf.truncated_normal([5, 5, channel, 32], # 5x5 filter, depth 32. stddev = 0.1, seed = SEED), name="conv1_weights") conv1_biases = tf.Variable(tf.zeros([32]), name = "conv1_biases") conv2_weights = tf.Variable( tf.truncated_normal([5, 5, 32, 64], stddev = 0.1, seed = SEED), name="conv2_weights") conv2_biases = tf.Variable(tf.constant(0.1, shape = [64]), name = "conv2_biases") fc1_weights = tf.Variable( # fully connected, depth 512. tf.truncated_normal( [height // 4 * width // 4 * 64, 512], stddev = 0.1, seed = SEED), name = "fc1_weights") fc1_biases = tf.Variable(tf.constant(0.1, shape = [512]), name = "fc1_biases") fc2_weights = tf.Variable( tf.truncated_normal([512, num_labels], stddev = 0.1, seed = SEED), name = "fc2_weights") fc2_biases = tf.Variable(tf.constant(0.1, shape = [num_labels]), name = "fc2_biases") # We will replicate the model structure for the training subgraph, as well # as the evaluation subgraphs, while sharing the trainable parameters. def lenet2(data, train = False): """LeNet2 definition.""" # 2D convolution, with 'SAME' padding (i.e. the output feature map has # the same size as the input). Note that {strides} is a 4D array whose # shape matches the data layout: [n, h, w, c]. conv1 = tf.nn.conv2d(data, conv1_weights, strides = [1, 1, 1, 1], padding = 'SAME') # Bias and rectified linear non-linearity. relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases)) # Max pooling. The kernel size spec {ksize} also follows the layout of # the data. Here we have a pooling window of 2, and a stride of 2. pool1 = tf.nn.max_pool(relu1, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') conv2 = tf.nn.conv2d(pool1, conv2_weights, strides = [1, 1, 1, 1], padding = 'SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) pool2 = tf.nn.max_pool(relu2, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') # Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. pool_shape = pool2.get_shape().as_list() reshape = tf.reshape(pool2, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]]) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. fc1 = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) # Add a 50% dropout during training only. Dropout also scales # activations such that no rescaling is needed at evaluation time. if train: fc1 = tf.nn.dropout(fc1, 0.5, seed = SEED) return tf.matmul(fc1, fc2_weights) + fc2_biases # Training computation: logits + cross-entropy loss. logits = lenet2(train_data_node, True) loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( logits, train_labels_node)) # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) # Add the regularization term to the loss. loss += 5e-4 * regularizers # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0) # Decay once per epoch, using an exponential schedule starting at 0.01. learning_rate = tf.train.exponential_decay( 0.01, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. train_size, # Decay step. 0.95, # Decay rate. staircase=True) # Use simple momentum for the optimization. optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(loss, global_step = batch) # Predictions for the current training minibatch. train_prediction = tf.nn.softmax(logits) # Predictions for the test and validation, which we'll compute less often. eval_prediction = tf.nn.softmax(lenet2(eval_data)) # Small utility function to evaluate a dataset by feeding batches of data to # {eval_data} and pulling the results from {eval_predictions}. # Saves memory and enables this to run on smaller GPUs. def eval_in_batches(data, sess): """Get all predictions for a dataset by running it in small batches.""" size = data.shape[0] if size < EVAL_BATCH_SIZE: raise ValueError("batch size for evals larger than dataset: %d" % size) predictions = np.ndarray(shape = (size, num_labels), dtype = np.float32) for begin in xrange(0, size, EVAL_BATCH_SIZE): end = begin + EVAL_BATCH_SIZE if end <= size: predictions[begin:end, :] = sess.run( eval_prediction, feed_dict={eval_data: data[begin:end, ...]}) else: batch_predictions = sess.run( eval_prediction, feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]}) predictions[begin:, :] = batch_predictions[begin - size:, :] return predictions # Create a local session to run the training. start_time = time.time() model_dir = os.path.join(module_dir, os.path.pardir, os.path.pardir, 'models') with tf.Session() as sess: # Run all the initializers to prepare the trainable parameters. tf.initialize_all_variables().run() # Import base model weights saver = tf.train.Saver([conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights, fc1_biases]) ckpt = tf.train.get_checkpoint_state(os.path.join(model_dir, 'base')) if ckpt and ckpt.model_checkpoint_path: logger.info("Continue training from the model {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) # for var in tf.trainable_variables(): # logger.info(var.eval()) logger.info('Initialized!') # Loop through training steps. for step in xrange(int(num_epochs * train_size) // BATCH_SIZE): # Compute the offset of the current minibatch in the data. # Note that we could use better randomization across epochs. offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE) batch_data = train_data[offset:(offset + BATCH_SIZE), ...] batch_labels = train_labels[offset:(offset + BATCH_SIZE)] # This dictionary maps the batch data (as a numpy array) to the # node in the graph it should be fed to. feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels} # Run the graph and fetch some of the nodes. _, l, lr, predictions = sess.run( [optimizer, loss, learning_rate, train_prediction], feed_dict=feed_dict) if step % EVAL_FREQUENCY == 0: elapsed_time = time.time() - start_time start_time = time.time() logger.info('Step %d (epoch %.2f), %.1f ms' % (step, float(step) * BATCH_SIZE / train_size, 1000 * elapsed_time / EVAL_FREQUENCY)) logger.info('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)) logger.info('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels)) logger.info('Validation error: %.1f%%' % error_rate( eval_in_batches(validation_data, sess), validation_labels)) sys.stdout.flush() # Finally print the result! test_precision = precision(eval_in_batches(test_data, sess), test_labels) logger.info('Test precision: %.1f%%' % test_precision) # Model persistence saver = tf.train.Saver([conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases]) model_path = os.path.join(model_dir, "finetuned", "lenet_finetuned.ckpt") save_path = saver.save(sess, model_path) logger.info("Model saved in file: %s" % save_path)
clf = None clf = RandomForestClassifier(n_estimators=p, criterion='entropy', max_depth=14, min_samples_split=20, n_jobs=2) clf.fit(train_X, train_y) results.append(metrics.predict_table(clf, test_X, test_y)) result = pd.concat(results) matrix = metrics.confusion_matrix(result) clases = matrix.columns.tolist() precisions = [metrics.precision(matrix, c) for c in clases] recalls = [metrics.recall(matrix, c) for c in clases] f_scores = [metrics.f_score(matrix, c) for c in clases] w_score = metrics.weighted_f_score(matrix) # f = open(result_dir + str(max_depth) + ' ' + str(min_samples_split) + '.txt', 'w') f = open(result_dir + str(p) + '.txt', 'w') f.write('F_score by class') f.write('\n') f.write(str(f_scores)) f.write('\n') f.write('\n') f.write('Weighted average: ') f.write(str(w_score))