def evaluate(path): cad = read_dataset('cad.csv') rgbd = read_dataset('rgbd.csv') freqs = freq_count(cad) results = load_results(path, rgbd, cad) mP = 0.0 mR = 0.0 mF = 0.0 mAP = 0.0 mNDCG = 0.0 mNNT1 = 0.0 mNNT2 = 0.0 for (queried, retrieved) in results: f = freqs[queried[0]] x = categories_to_rel(queried, retrieved)[:f] # Sum up the retrieval scores mP += precision(x) mR += recall(x, f) mF += f1score(x, f) mNDCG += ndcg(x) mAP += average_precision(x, f) mNNT1 += nnt1(x, f) mNNT2 += nnt2(x, f) n = len(results) print('num queries:', n) print('mean precision:', mP / n) print('mean recall:', mR / n) print('mean F1:', mF / n) print('mean AP:', mAP / n) print('mean NDCG: ', mNDCG / n) print('mean NNT1: ', mNNT1 / n) print('mean NNT2: ', mNNT2 / n) # Plot PR-curve cutoff = 1000 mean_precisions = np.zeros(cutoff, np.float64) mean_recalls = np.zeros(cutoff, np.float64) for (queried, retrieved) in results: x = categories_to_rel(queried, retrieved)[:cutoff] x = np.pad(x, (0, cutoff - len(x)), 'constant', constant_values=(0)) precisions = [] recalls = [] for k, _ in enumerate(x): p = precision(x[:k + 1]) r = recall(x[:k + 1], freqs[queried[0]]) precisions.append(p) recalls.append(r) mean_precisions += precisions mean_recalls += recalls mean_precisions /= len(results) mean_recalls /= len(results) plt.plot(mean_recalls, mean_precisions) plt.xlabel('Recall') plt.ylabel('Precision') plt.axis([0, 1, 0, 1.05]) plt.show()
def test(test_set, model): print("starting testing...") start_time = time.time() model.eval() predictions, references = [], [] with torch.no_grad(): for i in range(len(test_set)): Y, T, data = test_set.get_candidate(i) Y = Y.to(device) T = T.to(device) ids = model.ranking(Y, T).data candidate = [] comments = list(data['candidate'].keys()) for id in ids: candidate.append(comments[id]) predictions.append(candidate) references.append(data['candidate']) if i % 100 == 0: print(i) recall_1 = recall(predictions, references, 1) recall_5 = recall(predictions, references, 5) recall_10 = recall(predictions, references, 10) mr = mean_rank(predictions, references) mrr = mean_reciprocal_rank(predictions, references) s = "r1={}, r5={}, r10={}, mr={}, mrr={}" print(s.format(recall_1, recall_5, recall_10, mr, mrr)) print("testing time:", time.time() - start_time)
def evaluate(path): queries = read_dataset('queries.csv') targets = read_dataset('targets.csv') freqs = freq_count(targets) results = load_results(path, queries, targets) cutoff = 1000 precisions = [] recalls = [] f1scores = [] aps = [] gains = [] nnt1s = [] nnt2s = [] for (queried, retrieved) in results: x = categories_to_rel(queried, retrieved)[:cutoff] p = precision(x) r = recall(x, freqs[queried[0]]) f = f1score(x, freqs[queried[0]]) g = ndcg(x) ap = average_precision(x, freqs[queried[0]]) t1 = nnt1(x, freqs[queried[0]]) t2 = nnt2(x, freqs[queried[0]]) precisions.append(p) recalls.append(r) f1scores.append(f) gains.append(g) aps.append(ap) nnt1s.append(t1) nnt2s.append(t2) print('mean precision:', numpy.mean(precisions)) print('mean recall:', numpy.mean(recalls)) print('mean F1 score:', numpy.mean(f1scores)) print('mAP:', numpy.mean(aps)) print('mean NDCG:', numpy.mean(gains)) print('mean nearest neighbor:', numpy.mean(nnt1s), numpy.mean(nnt2s)) # plot precision-recall curve mean_precisions = numpy.zeros(cutoff, numpy.float64) mean_recalls = numpy.zeros(cutoff, numpy.float64) for (queried, retrieved) in results: x = categories_to_rel(queried, retrieved)[:cutoff] x = numpy.pad(x, (0, cutoff - len(x)), 'constant', constant_values=(0)) precisions = [] recalls = [] for k, _ in enumerate(x): p = precision(x[:k + 1]) r = recall(x[:k + 1], freqs[queried[0]]) precisions.append(p) recalls.append(r) mean_precisions += precisions mean_recalls += recalls mean_precisions /= len(results) mean_recalls /= len(results) plt.plot(mean_recalls, mean_precisions) plt.xlabel('Recall') plt.ylabel('Precision') plt.axis([0, 1, 0, 1.05]) plt.show()
def evaluate_test_set(session, tags, preds, fnames, lines, batch_limit=None): batch_num = 0 num_sequences = 0 p_tp_total, p_fp_total, r_tp_total, r_fn_total = 0, 0, 0, 0 p_tp_total_binary, p_fp_total_binary, r_tp_total_binary, r_fn_total_binary = 0, 0, 0, 0 while True: try: #Train binary, eval binary setting y, y_, filenames, line_nums = \ session.run([tags, preds, fnames, lines]) p_tp, p_fp = metrics.precision(reader, y, y_, counts=True) r_tp, r_fn = metrics.recall(reader, y, y_, counts=True) p_tp_total += p_tp p_fp_total += p_fp r_tp_total += r_tp r_fn_total += r_fn #Train All tags, eval binary setting p_tp_binary, p_fp_binary = metrics.precision(reader, y, y_, binary=True, counts=True) r_tp_binary, r_fn_binary = metrics.recall(reader, y, y_, binary=True , counts=True) p_tp_total_binary += p_tp_binary p_fp_total_binary += p_fp_binary r_tp_total_binary += r_tp_binary r_fn_total_binary += r_fn_binary #TODO: Train binary, eval binary setting num_sequences += len(y) batch_num += 1 if batch_num == batch_limit: break except tf.errors.OutOfRangeError: print 'test queue is empty' break if p_tp_total: precision = p_tp_total / (p_tp_total + p_fp_total) recall = r_tp_total / (r_tp_total + r_fn_total) f1 = metrics.f1(precision, recall) precision_binary = p_tp_total_binary / (p_tp_total_binary + p_fp_total_binary) recall_binary = r_tp_total_binary / (r_tp_total_binary + r_fn_total_binary) f1_binary = metrics.f1(precision_binary, recall_binary) print 'Evaluated {} sequences from test set'.format(num_sequences) print 'Precision: ', precision print 'Recall: ', recall print 'f1: ', f1 print 'Precision Binary: ', precision_binary print 'Recall Binary: ', recall_binary print 'f1 Binary: ', f1_binary
def classification_report(y_true, y_pred): print('--------------------------------') print('Accuracy -', metrics.accuracy(y_true, y_pred)) print('Recall -', metrics.recall(y_true, y_pred)) print('Precision -', metrics.precision(y_true, y_pred)) print('F1 score -', metrics.f1_score(y_true, y_pred)) print('--------------------------------')
def test(self): tf.global_variables_initializer().run() self.saver = tf.train.Saver() could_load, checkpoint_counter = self.load(self.checkpoint_dir) if could_load: print(" [*] Load SUCCESS") else: print(" [!] Load failed...") test_feed_dict = { self.test_inptus: self.test_x, self.test_labels: self.test_y } summary_str, test_loss, test_accuracy, p, t = self.sess.run( [ self.test_summary, self.test_loss, self.test_accuracy, self.test_plab, self.test_tlab ], feed_dict=test_feed_dict) import metrics print("test_accuracy: {}".format(test_accuracy)) with open('resnet.txt', 'a') as f: # 设置文件对象 f.write( str(self.i) + '-' + str(self.j) + ',' + str(metrics.accuracy(t, p)) + ',' + str(metrics.precision(t, p)) + ',' + str(metrics.recall(t, p)) + ',' + str(metrics.f1score(t, p)) + ',' + str(metrics.ft(t, p)) + '\n')
def f1_score(y_true, y_pred): from metrics import precision, recall y_true = K.cast(y_true, dtype='float32') y_pred = K.cast(y_pred, dtype='float32') precision = precision(y_true, y_pred) recall = recall(y_true, y_pred) return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
def evaluate(path): queries = read_dataset('queries.csv') targets = read_dataset('targets.csv') freqs = freq_count(targets) results = load_results(path, queries, targets) cutoff = 1000 precisions = [] recalls = [] f1scores = [] aps = [] gains = [] nnt1s = [] nnt2s = [] for (queried, retrieved) in results: x = categories_to_rel(queried, retrieved)[:cutoff] p = precision(x) r = recall(x, freqs[queried[0]]) f = f1score(x, freqs[queried[0]]) g = ndcg(x) ap = average_precision(x, freqs[queried[0]]) t1 = nnt1(x, freqs[queried[0]]) t2 = nnt2(x, freqs[queried[0]]) precisions.append(p) recalls.append(r) f1scores.append(f) gains.append(g) aps.append(ap) nnt1s.append(t1) nnt2s.append(t2) print('precision:', p) print('recall:', r) print('F1 score:', f) print('average precision:', ap) print('NDCG:', g) print('nearest neighbor:', t1, t2)
def eval_classifier(classifier, x, y): y_pred = classifier.predict(x) conf = metrics.conf_matrix(y_pred, y) accuracy = metrics.accuracy(y_pred, y) precision = metrics.precision(y_pred, y) recall = metrics.recall(y_pred, y) f1_score = metrics.f_score(y_pred, y, beta=1) avg_prec = np.mean(precision) avg_rec = np.mean(recall) avg_f1 = np.mean(f1_score) print("Confusion Matrix: ") print(conf) print("Accuracy:") print(accuracy) print("Precision:") print(precision) print(f"Average Precision: {avg_prec}") print("Recall:") print(recall) print(f"Average Recall: {avg_rec}") print("F1_score:") print(f1_score) print(f"Average F1 Score: {avg_f1}")
def validation_end(self, outputs): # OPTIONAL avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() avg_acc_loss = torch.stack([x['accuracy'] for x in outputs]).mean() if self.mode == Train_Mode.TEACHER: self.embeddings_all = torch.cat(self.embeddings_all).cpu() self.labels_all = torch.cat(self.labels_all).cpu() rec = recall(self.embeddings_all, self.labels_all, K=self.K) log_metrics = { "recall" : rec[0], "val_loss": avg_loss.item(), } elif self.mode == Train_Mode.STUDENT: avg_triplet_loss = torch.stack([x['val_triplet_loss'] for x in outputs]).mean() avg_angle_loss = torch.stack([x['val_angle_loss'] for x in outputs]).mean() avg_dist_loss = torch.stack([x['val_dist_loss'] for x in outputs]).mean() log_metrics = { "val_triplet_loss" : avg_triplet_loss.item(), "val_angle_loss": avg_angle_loss.item(), "val_dist_loss": avg_dist_loss.item(), "val_accuracy": avg_acc_loss.item(), "val_loss": avg_loss.item(), } self.embeddings_all, self.labels_all = [], [] self.train_step = 0 self.train_num_correct = 0 self.val_step = 0 self.val_num_correct = 0 return { 'val_loss': avg_loss, 'log': log_metrics}
def compute_all_metrics(execution_id, path_input, path_output, formula, append): from metrics import accuracy, precision, recall, f1, specificity """ Computes all metrics and persistes in a csv Args: execution_id (int): identifier of the execution path_input (string): path of the file that contains the classifications path_out (string): path of the file that will persist the metrics formula (string): mean_max | mean_mean append (boolean): true | false """ # loading results with open(path_input) as data_file: data = json.load(data_file) # computing metrics tp = tn = fp = fn = 0 for i in range(0, len(data)): if (data[i]['values'][formula]['positive'] >= data[i]['values'][formula]['negative']): if data[i]['values']['label'] == 'positive': tp += 1 else: fp += 1 elif (data[i]['values'][formula]['positive'] < data[i]['values'][formula]['negative']): if (data[i]['values']['label'] == 'negative'): tn += 1 else: fn += 1 else: raise Exception( "Positive similarity equals to negative similarity to news " + data[i]['id']) accuracy = accuracy(tp, tn, fp, fn) recall = recall(tp, fn) precision = precision(tp, fp) f1 = f1(precision, recall) specificity = specificity(tn, fp) # persiting the results with open(path_output, 'a' if append else 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') if (not append): spamwriter.writerow([ 'execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy', 'precision', 'recall', 'f1', 'specificity' ]) spamwriter.writerow([ execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1, specificity ])
def supervised_eval(self, train_or_valid): data = self.dataset.get_labeled_data(train_or_valid) if data == None: raise ValueError('no labeled examples present in dataset') X_labeled, y_true, _ = data y_pred = self.model.predict(X_labeled) p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\ metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\ metrics.auc(y_true, y_pred) self.metrics[train_or_valid].append((p, r, ac, g, auc))
def test_1(self): actual = [1, 1, 0, 1, 1, 1, 0, 0, 1, 1] predicted = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0] tp, fn, fp, tn = metrics.confusion_matrix(actual, predicted) self.assertEqual(tp, 3) self.assertEqual(fn, 4) self.assertEqual(fp, 1) self.assertEqual(tn, 2) self.assertEqual(metrics.accuracy(actual, predicted), 0.5) self.assertEqual(metrics.precision(actual, predicted), 3/4) self.assertEqual(metrics.recall(actual, predicted), 3/7) self.assertEqual(metrics.f1(actual, predicted), 6/11)
def post(self): global current_scene score = 0 print('Submitted scene %s' % current_scene) if self.scene_exists(current_scene): return { 'message': "Scene {} already exist.".format(current_scene) }, 400 correct_dict = self.fetch_correct_result() if not correct_dict: return { 'message': "There is no reuslt dataexist for scene {}.".format( current_scene) }, 400 your_dict = request.get_json() submission_time = datetime.datetime.now() print(' Correct prediction', correct_dict) your_dict = {str(k): int(v) for k, v in your_dict.items()} print(' Your prediction', your_dict) sys.stdout.flush() score = 0 score2 = 0 score3 = 0 if your_dict: #score = Benchmark.diff_dicts(correct_dict, your_dict) score = metrics.accuracy(correct_dict, your_dict) score2 = metrics.precision(correct_dict, your_dict) score3 = metrics.recall(correct_dict, your_dict) print("scene accuracy", score) print("scene precision", score2) print("scene recall", score3) submission_result = { 'scene': current_scene, 'accuracy': score, 'precision': score2, 'recall': score3 } try: self.insert(submission_result, submission_time) except: return { 'message': 'An error occured while inserting the item' }, 500 return { 'Your score for this scene is ': submission_result['accuracy'] }, 201
def recalls(self): predictions = self.one_hot size = len(self._classes) with tf.compat.v1.name_scope("Recalls"): rs = [] ops = [] for i, c in enumerate(self._classes): mask = tf.one_hot([i], size, axis=-1) r, op = recall(labels=self.target, predictions=predictions, weights=mask) tf.compat.v1.summary.scalar("c{}_{}".format(i, c), r * 100) rs.append(r) ops.append(op) return rs, ops
def results_to_metrics(results, methods, ref_motifs): _, _, ref_labels = motif.unpack_motif(ref_motifs) metric_dict = dict.fromkeys(methods) for m in methods: obs_motifs = results[m] _, _, obs_labels = motif.unpack_motif(obs_motifs) this_edit = metrics.edit_distance(obs_labels, ref_labels) this_recall = metrics.recall(obs_motifs, ref_motifs) this_precis = metrics.precision(obs_motifs, ref_motifs) this_f = metrics.f_measure(obs_motifs, ref_motifs) this_bm = metrics.boundary_distance(obs_motifs, ref_motifs) metric_dict[m] = [this_edit, this_recall, this_precis, this_f, this_bm] return metric_dict
def active_simulation_eval(self): data = self.dataset.get_unlabeled_data() if data == None: UserWarning( 'all examples have been labeled; this eval mode works ' 'if there is unlabeled pool of data in `simulate` mode' ) return X_unlabeled, unlabeled_indexes = data # get unlabeled examples labels in simulation with `y_ideal` y_true = self.dataset.y_ideal[unlabeled_indexes] y_pred = self.model.predict(X_unlabeled) p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\ metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\ metrics.auc(y_true, y_pred) self.metrics['simulate'].append((p, r, ac, g, auc))
def recalls(self): predictions = self.one_hot size = len(self._classes) with tf.name_scope("Recalls"): rs = [] ops = [] for i, c in enumerate(self._classes): mask = tf.one_hot([i], size, axis=-1) r, op = recall(labels=self.target, predictions=predictions, weights=mask, updates_collections=tf.GraphKeys.UPDATE_OPS) tf.summary.scalar("c_{}".format(c), r * 100) rs.append(r) ops.append(op) return rs, ops
def validation_end(self, outputs): # OPTIONAL avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() if self.mode == Train_Mode.TEACHER: self.embeddings_all = torch.cat(self.embeddings_all).cpu() self.labels_all = torch.cat(self.labels_all).cpu() rec = recall(self.embeddings_all, self.labels_all, K=self.K) log_metrics = { "recall" : rec[0], "val_loss": avg_loss.item(), } self.embeddings_all, self.labels_all = [], [] return { 'val_loss': avg_loss, 'log': log_metrics}
def compute_all_metrics(execution_id, path_input, path_output, formula, append): from metrics import accuracy, precision, recall, f1, specificity """ Computes all metrics and persistes in a csv Args: execution_id (int): identifier of the execution path_input (string): path of the file that contains the classifications path_out (string): path of the file that will persist the metrics formula (string): mean_max | mean_mean append (boolean): true | false """ # loading results with open(path_input) as data_file: data = json.load(data_file) # computing metrics tp = tn = fp = fn = 0 for i in range(0, len(data)): if (data[i]['values'][formula]['positive'] >= data[i]['values'][formula]['negative']): if data[i]['values']['label'] == 'positive': tp += 1 else: fp += 1 elif (data[i]['values'][formula]['positive'] < data[i]['values'][formula]['negative']): if (data[i]['values']['label'] == 'negative'): tn += 1 else: fn += 1 else: raise Exception("Positive similarity equals to negative similarity to news " + data[i]['id']) accuracy = accuracy(tp, tn, fp, fn) recall = recall(tp, fn) precision = precision(tp, fp) f1 = f1(precision, recall); specificity = specificity(tn, fp); # persiting the results with open(path_output, 'a' if append else 'w') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',') if (not append): spamwriter.writerow( ['execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy', 'precision', 'recall', 'f1', 'specificity']) spamwriter.writerow([execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1, specificity])
def test(engine, options): queries = pd.read_csv(os.path.join('data', 'queries_train.tsv'), sep='\t') bench_lbls = pd.read_csv(os.path.join('data', 'benchmark_lbls_train.csv'), dtype={ 'query': int, 'tweet': str, 'y_true': int }) q2n_relevant = bench_lbls.groupby('query')['y_true'].sum().to_dict() queries_results = [] q_times = [] for i, row in queries.iterrows(): q_id = row['query_id'] q_keywords = row['keywords'] start_time = time.time() q_n_res, q_res = engine.search(q_keywords, options['methods']) end_time = time.time() q_time = end_time - start_time q_times.append(q_time) queries_results.extend([(q_id, str(doc_id)) for doc_id in q_res]) if q_time > 10: print(f'Query time exceeded: {options}') queries_results = pd.DataFrame(queries_results, columns=['query', 'tweet']) q_results_labeled = pd.merge(queries_results, bench_lbls, on=['query', 'tweet'], how='inner', suffixes=('_result', '_bench')) options['max_q_time'] = max(q_times) options['avg_q_time'] = sum(q_times) / len(q_times) options['MAP'] = metrics.map(q_results_labeled) options['precision'] = metrics.precision(q_results_labeled) options['precision@5'] = metrics.precision( q_results_labeled.groupby('query').head(5)) options['precision@10'] = metrics.precision( q_results_labeled.groupby('query').head(10)) options['precision@50'] = metrics.precision( q_results_labeled.groupby('query').head(50)) options['recall'] = metrics.recall(q_results_labeled, q2n_relevant) save_to_csv(options)
def metric_fn(label_ids, predict, num_labels, answer_num): mask = tf.sequence_mask(answer_num, FLAGS.max_answer_num) precision = metrics.precision(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) recall = metrics.recall(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) f1_score = metrics.f1(label_ids, predict, num_classes=num_labels, weights=mask, pos_indices=[1]) return { "precision": precision, "recall": recall, "f1_score": f1_score }
def calculate_metrics(patient_id, spacing, label_arr_org, pred_arr_org, HAUSDORFF_PERCENT, OVERLAP_TOLERANCE, SURFACE_DICE_TOLERANCE): """ metric calculation cleanup in test.py. """ result = {} result["patient_id"] = patient_id # result["precision"] = precision(label_arr_org, pred_arr_org) result["recall"] = recall(label_arr_org, pred_arr_org) result["jaccard"] = jaccard(label_arr_org, pred_arr_org) result["dice"] = dice(label_arr_org, pred_arr_org) result["segmentation_score"] = segmentation_score(label_arr_org, pred_arr_org, spacing) bbox_metrics = calculate_bbox_metrics(label_arr_org, pred_arr_org, spacing) result = append_helper( result, ["x_distance", "y_distance", "z_distance", "distance"], bbox_metrics) surface_dice_metrics = surface_dice(label_arr_org, pred_arr_org, spacing, HAUSDORFF_PERCENT, OVERLAP_TOLERANCE, SURFACE_DICE_TOLERANCE) result = append_helper(result, [ "average_surface_distance_gt_to_pr", "average_surface_distance_pr_to_gt", "robust_hausdorff", "overlap_fraction_gt_with_pr", "overlap_fraction_pr_with_gt", "surface_dice" ], surface_dice_metrics) # get bbox center (indices) of prediction for next segmentation step for axes in ["X", "Y", "Z"]: for location in ["min", "center", "max", "length"]: result["prediction_{}_{}".format( axes, location )] = bbox_metrics["prediction_bbox_metrics"][axes][location] return result, result["dice"], bbox_metrics
user_profile = train[test_user].indices #what is doing here? relevant_items = test[test_user].indices if len(relevant_items) > 0: neval += 1 # # TODO: Here you can write to file the recommendations for each user in the test split. # WARNING: there is a catch with the item idx! # # this will rank *all* items recommended_items = recommender.recommend(user_profile, exclude_seen=True) # use this to have the *top-k* recommended items (warning: this can underestimate ROC-AUC for small k) # recommended_items = recommender.recommend(user_profile, k=at, exclude_seen=True) roc_auc_ += roc_auc(recommended_items, relevant_items) precision_ += precision(recommended_items, relevant_items, at=at) recall_ += recall(recommended_items, relevant_items, at=at) map_ += map(recommended_items, relevant_items, at=at) mrr_ += rr(recommended_items, relevant_items, at=at) ndcg_ += ndcg(recommended_items, relevant_items, relevance=test[test_user].data, at=at) roc_auc_ /= neval precision_ /= neval recall_ /= neval map_ /= neval mrr_ /= neval ndcg_ /= neval logger.info('Ranking quality') logger.info('ROC-AUC: {:.4f}'.format(roc_auc_))
def evaluating(self, model, dataset, split): """ input: model: (object) pytorch model dataset: (object) dataset split: (str) split of dataset in ['train', 'val', 'test'] return [overall_accuracy, precision, recall, f1-score, jaccard, kappa] """ args = self.args oa, precision, recall, f1, jac, kappa = 0, 0, 0, 0, 0, 0 model.eval() data_loader = DataLoader(dataset, args.batch_size, num_workers=4, shuffle=False) batch_iterator = iter(data_loader) steps = len(dataset) // args.batch_size start = time.time() for step in range(steps): x, y = next(batch_iterator) x = Variable(x, volatile=True) y = Variable(y, volatile=True) if args.cuda: x = x.cuda() y = y.cuda() # calculate pixel accuracy of generator gen_y = model(x) if self.is_multi: gen_y = gen_y[0] oa += metrics.overall_accuracy(gen_y.data, y.data) precision += metrics.precision(gen_y.data, y.data) recall += metrics.recall(gen_y.data, y.data) f1 += metrics.f1_score(gen_y.data, y.data) jac += metrics.jaccard(gen_y.data, y.data) kappa += metrics.kappa(gen_y.data, y.data) _time = time.time() - start if not os.path.exists(os.path.join(Logs_DIR, 'statistic')): os.makedirs(os.path.join(Logs_DIR, 'statistic')) # recording performance of the model nb_samples = steps * args.batch_size basic_info = [ self.date, self.method, self.epoch, self.iter, nb_samples, _time ] basic_info_names = [ 'date', 'method', 'epochs', 'iters', 'nb_samples', 'time(sec)' ] perform = [ round(idx / steps, 3) for idx in [oa, precision, recall, f1, jac, kappa] ] perform_names = [ "overall_accuracy", "precision", "recall", "f1-score", "jaccard", "kappa" ] cur_log = pd.DataFrame([basic_info + perform], columns=basic_info_names + perform_names) # save performance if os.path.exists( os.path.join(Logs_DIR, 'statistic', "{}.csv".format(split))): logs = pd.read_csv( os.path.join(Logs_DIR, 'statistic', "{}.csv".format(split))) else: logs = pd.DataFrame([]) logs = logs.append(cur_log, ignore_index=True) logs.to_csv(os.path.join(Logs_DIR, 'statistic', "{}.csv".format(split)), index=False, float_format='%.3f')
f"{engine_module} results have MAP value of {results_map}." ) if results_map <= 0 or results_map > 1: logging.error( f'{engine_module} results MAP value is out of range (0,1).' ) # test that the average across queries of precision, # precision@5, precision@10, precision@50, and recall # is in [0,1]. prec, p5, p10, p50, recall = \ metrics.precision(q_results_labeled), \ metrics.precision(q_results_labeled.groupby('query').head(5)), \ metrics.precision(q_results_labeled.groupby('query').head(10)), \ metrics.precision(q_results_labeled.groupby('query').head(50)), \ metrics.recall(q_results_labeled, q2n_relevant) logging.debug( f"{engine_module} results produced average precision of {prec}." ) logging.debug( f"{engine_module} results produced average precision@5 of {p5}." ) logging.debug( f"{engine_module} results produced average precision@10 of {p10}." ) logging.debug( f"{engine_module} results produced average precision@50 of {p50}." ) logging.debug( f"{engine_module} results produced average recall of {recall}." )
def predict(loss_fn, model, data_set, data_loader, counting=False): """ Validate after training an epoch Note: """ model.eval() true_positives = [] predicted_positives = [] possible_positives = [] union_areas = [] loss = [] for bc_cnt, bc_data in enumerate(data_loader): if counting: print('%d/%d' % (bc_cnt, len(data_set) // data_loader.batch_size)) imgs, masks, _ = bc_data imgs = Variable(imgs).cuda() masks = Variable(masks).cuda() # labels = Variable(labels).cuda() outputs = model(imgs) # outputs = outputs.view(-1, outputs.size()[2], outputs.size()[3]) # print outputs.size(), masks.size() # if outputs.size() != masks.size(): # outputs = F.upsample(outputs, size=masks.size()[-2:], mode='bilinear') mask_loss = torch.zeros(1).cuda() for o in outputs: o = o.view(-1, o.size()[2], o.size()[3]) mask_loss = mask_loss + float(loss_fn(o, masks)) # mask_loss = mask_loss # loss = criterion(outputs, masks) loss.append(mask_loss) # loss.append(loss_fn(outputs, masks)) # outputs = F.softmax(model(imgs), dim=1) # if outputs.size() != masks.size(): # outputs = F.upsample(outputs, size=masks.size()[-2:], mode='bilinear') # # _, outputs = torch.max(outputs, dim=1) output = outputs[-1] output = output.view(-1, output.size()[2], output.size()[3]) output = output.cpu().data.numpy() # labels = labels.cpu().data.numpy() masks = masks.cpu().data.numpy() imgs = imgs.cpu().data.numpy() true_positive, predicted_positive, possible_positive, union_area = metrics_pred( output, imgs, masks) true_positives += true_positive predicted_positives += predicted_positive possible_positives += possible_positive union_areas += union_area precisions = precision(true_positives, predicted_positives) recalls = recall(true_positives, possible_positives) f1_scores = f1_score(recalls, precisions) loss = torch.tensor(loss) return precisions, recalls, f1_scores, loss.mean()
def run(): while True: trial = pull_pending() if trial is None: break params = eval(trial['Parameters']) logging.info(trial) dataset = load(trial['Dataset']) fold = int(trial['Fold']) - 1 (X_train, y_train), (X_test, y_test) = dataset[fold][0], dataset[fold][1] n_minority = Counter(y_train).most_common()[1][1] n_majority = Counter(y_train).most_common()[0][1] imblearn_ratios = [ ((n_majority - n_minority) * ratio + n_minority) / n_majority for ratio in [0.5, 0.75, 1.0] ] clf = { 'NB': NB(), 'KNN': KNN(), 'SVM': SVM(gamma='scale'), 'CART': CART() }[params['classifier']] if (trial['Algorithm'] is None) or (trial['Algorithm'] == 'None'): algorithm = None else: algorithms = { 'AKNN': ResamplingCV(AKNN, clf, n_neighbors=[1, 3, 5, 7]), 'Bord': ResamplingCV(SMOTE, clf, kind=['borderline1'], k_neighbors=[1, 3, 5, 7, 9], m_neighbors=[5, 10, 15], sampling_strategy=imblearn_ratios), 'CC': ResamplingCV(CC, clf, sampling_strategy=imblearn_ratios), 'CNN': ResamplingCV(CNN, clf, n_neighbors=[1, 3, 5, 7]), 'ENN': ResamplingCV(ENN, clf, n_neighbors=[1, 3, 5, 7]), 'IHT': ResamplingCV(IHT, clf, sampling_strategy=imblearn_ratios, cv=[2]), 'NCL': ResamplingCV(NCL, clf, n_neighbors=[1, 3, 5, 7]), 'NM': ResamplingCV(NM, clf, n_neighbors=[1, 3, 5, 7]), 'OSS': ResamplingCV(OSS, clf, n_neighbors=[1, 3, 5, 7]), 'RBO': ResamplingCV(RBO, clf, gamma=[0.01, 0.1, 1.0, 10.0], ratio=[0.5, 0.75, 1.0]), 'RBU': ResamplingCV(RBU, clf, gamma=params.get('gamma'), ratio=params.get('ratio')), 'RENN': ResamplingCV(RENN, clf, n_neighbors=[1, 3, 5, 7]), 'ROS': ResamplingCV(ROS, clf, sampling_strategy=imblearn_ratios), 'RUS': ResamplingCV(RUS, clf, sampling_strategy=imblearn_ratios), 'SMOTE': ResamplingCV(SMOTE, clf, k_neighbors=[1, 3, 5, 7, 9], sampling_strategy=imblearn_ratios), 'SMOTE+ENN': ResamplingCV( SMOTEENN, clf, smote=[SMOTE(k_neighbors=k) for k in [1, 3, 5, 7, 9]], sampling_strategy=imblearn_ratios), 'SMOTE+TL': ResamplingCV( SMOTETomek, clf, smote=[SMOTE(k_neighbors=k) for k in [1, 3, 5, 7, 9]], sampling_strategy=imblearn_ratios), 'TL': TL(), } algorithm = algorithms.get(trial['Algorithm']) if algorithm is None: raise NotImplementedError if algorithm is not None: X_train, y_train = algorithm.fit_sample(X_train, y_train) clf = clf.fit(X_train, y_train) predictions = clf.predict(X_test) scores = { 'Precision': metrics.precision(y_test, predictions), 'Recall': metrics.recall(y_test, predictions), 'F-measure': metrics.f_measure(y_test, predictions), 'AUC': metrics.auc(y_test, predictions), 'G-mean': metrics.g_mean(y_test, predictions) } submit_result(trial, scores)
def post(self): #signal.alarm(0) #signal.alarm(5) global current_scene, latency, overall_latency scene_latency = datetime.datetime.utcnow() - latency if overall_latency: overall_latency += scene_latency else: #None overall_latency = scene_latency print("Latency for scene %s was %s" % (current_scene, scene_latency)) #timeout = int(os.getenv("BENCHMARK_POST_TIMEOUT", default=10)) #watchdog.reset_and_extend(timeout) score = 0 print('Submitted scene %s' % current_scene) if self.scene_exists(current_scene): return { 'message': "Scene {} already exist.".format(current_scene) }, 400 correct_dict = self.fetch_correct_result() if not correct_dict: return {"message": "Please request at least one scene first"}, 400 your_dict = request.get_json() submission_time = datetime.datetime.utcnow() try: your_dict = {str(k): int(v) for k, v in your_dict.items()} except ValueError: return { 'message': "Your result json should be in format {'object_name':'1'} with key as an object name." }, 400 except AttributeError: return { 'message': "Your result json is incorrect. Specify it like: {'object_name:1'}" }, 400 print(' Correct prediction', correct_dict) print(' Your prediction', your_dict) sys.stdout.flush() score = 0 score2 = 0 score3 = 0 if your_dict: #score = Benchmark.diff_dicts(correct_dict, your_dict) score = metrics.accuracy(correct_dict, your_dict) score2 = metrics.precision(correct_dict, your_dict) score3 = metrics.recall(correct_dict, your_dict) print("scene accuracy", score) print("scene precision", score2) print("scene recall", score3) submission_result = { 'scene': current_scene, 'accuracy': score, 'precision': score2, 'recall': score3 } try: self.insert(submission_result, submission_time) except: return { 'message': 'An error occured while inserting the item' }, 500 return { 'Your score for this scene is ': submission_result['accuracy'] }, 201
# Para cada porcentaje de confianza for i in xrange(100): # Obtengo las predicciones con una confianza mayor a cierto umbral porcentaje = float(i)/100 aux = result[result['trust'] > porcentaje] # matrix = metrics.confusion_matrix(aux) matrix = metrics.hard_matrix(aux) # Si la precision es menor que cero, es porque no habian datos que superaran tal nivel de confianza precision = metrics.accuracy(matrix, clase) if precision >= 0: valores_accuracy.append(precision) valores_recall.append(metrics.recall(matrix, clase)) x_values.append(porcentaje) # Si el f_score es menor que cero, es porque no habian datos que superaran tal nivel de confianza f_score = metrics.f_score(matrix, clase) if f_score >= 0: valores_fscore.append(f_score) x_values_fscore.append(porcentaje) #graf(clase, x_values, valores_accuracy, 'Accuracy') graf(clase, x_values, valores_recall, 'Recall') #graf(clase, x_values_fscore, valores_fscore, 'F-Score') print 'a' plt.show()
def test_precision_recall_treadoff(VGG11,CIFAR10): assert ((precision(VGG11, CIFAR10, 1, 'cuda:0') >= 0.95) or (recall(VGG11, CIFAR10, 1, 'cuda:0') >= 0.95))
def model_save_load(name, model, x, y=None): model_name = name + '.pkl' if model_name not in os.listdir('data/model'): model.fit(X=x, y=y) joblib.dump(model, 'data/model/' + model_name) return model else: model = joblib.load('data/model/' + model_name) return model dg = dg() for i in range(10): for j in range(10): train_data, test_data, train_labels, test_labels = dg.dsift_only(j) svm = SVC(kernel='poly', degree=3) svm = model_save_load('svm' + str(i) + '-' + str(j), svm, train_data.reshape([train_data.shape[0], -1]), train_labels) plab = p = svm.predict(test_data.reshape([test_data.shape[0], -1])).reshape(-1, 1).tolist() t = test_labels.reshape(-1, 1).tolist() print(str(metrics.accuracy(t, p))) with open('denseSIFTsvm.txt', 'a') as f: # 设置文件对象 f.write( str(i) + '-' + str(j) + ',' + str(metrics.accuracy(t, p)) + ',' + str(metrics.precision(t, p)) + ',' + str(metrics.recall(t, p)) + ',' + str(metrics.f1score(t, p)) + ',' + str(metrics.ft(t, p)) + '\n')
clf = None clf = RandomForestClassifier(n_estimators=p, criterion='entropy', max_depth=14, min_samples_split=20, n_jobs=2) clf.fit(train_X, train_y) results.append(metrics.predict_table(clf, test_X, test_y)) result = pd.concat(results) matrix = metrics.confusion_matrix(result) clases = matrix.columns.tolist() precisions = [metrics.precision(matrix, c) for c in clases] recalls = [metrics.recall(matrix, c) for c in clases] f_scores = [metrics.f_score(matrix, c) for c in clases] w_score = metrics.weighted_f_score(matrix) # f = open(result_dir + str(max_depth) + ' ' + str(min_samples_split) + '.txt', 'w') f = open(result_dir + str(p) + '.txt', 'w') f.write('F_score by class') f.write('\n') f.write(str(f_scores)) f.write('\n') f.write('\n') f.write('Weighted average: ') f.write(str(w_score))