Beispiel #1
0
def evaluate(path):
    cad = read_dataset('cad.csv')
    rgbd = read_dataset('rgbd.csv')
    freqs = freq_count(cad)
    results = load_results(path, rgbd, cad)

    mP = 0.0
    mR = 0.0
    mF = 0.0
    mAP = 0.0
    mNDCG = 0.0
    mNNT1 = 0.0
    mNNT2 = 0.0

    for (queried, retrieved) in results:
        f = freqs[queried[0]]
        x = categories_to_rel(queried, retrieved)[:f]
        # Sum up the retrieval scores
        mP += precision(x)
        mR += recall(x, f)
        mF += f1score(x, f)
        mNDCG += ndcg(x)
        mAP += average_precision(x, f)
        mNNT1 += nnt1(x, f)
        mNNT2 += nnt2(x, f)

    n = len(results)
    print('num queries:', n)
    print('mean precision:', mP / n)
    print('mean recall:', mR / n)
    print('mean F1:', mF / n)
    print('mean AP:', mAP / n)
    print('mean NDCG: ', mNDCG / n)
    print('mean NNT1: ', mNNT1 / n)
    print('mean NNT2: ', mNNT2 / n)

    # Plot PR-curve
    cutoff = 1000
    mean_precisions = np.zeros(cutoff, np.float64)
    mean_recalls = np.zeros(cutoff, np.float64)
    for (queried, retrieved) in results:
        x = categories_to_rel(queried, retrieved)[:cutoff]
        x = np.pad(x, (0, cutoff - len(x)), 'constant', constant_values=(0))
        precisions = []
        recalls = []
        for k, _ in enumerate(x):
            p = precision(x[:k + 1])
            r = recall(x[:k + 1], freqs[queried[0]])
            precisions.append(p)
            recalls.append(r)
        mean_precisions += precisions
        mean_recalls += recalls
    mean_precisions /= len(results)
    mean_recalls /= len(results)

    plt.plot(mean_recalls, mean_precisions)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.axis([0, 1, 0, 1.05])
    plt.show()
Beispiel #2
0
Datei: run.py Projekt: zingp/cbot
def test(test_set, model):
    print("starting testing...")
    start_time = time.time()
    model.eval()
    predictions, references = [], []
    with torch.no_grad():
        for i in range(len(test_set)):
            Y, T, data = test_set.get_candidate(i)
            Y = Y.to(device)
            T = T.to(device)
            ids = model.ranking(Y, T).data

            candidate = []
            comments = list(data['candidate'].keys())
            for id in ids:
                candidate.append(comments[id])
            predictions.append(candidate)
            references.append(data['candidate'])
            if i % 100 == 0:
                print(i)

    recall_1 = recall(predictions, references, 1)
    recall_5 = recall(predictions, references, 5)
    recall_10 = recall(predictions, references, 10)
    mr = mean_rank(predictions, references)
    mrr = mean_reciprocal_rank(predictions, references)
    s = "r1={}, r5={}, r10={}, mr={}, mrr={}"
    print(s.format(recall_1, recall_5, recall_10, mr, mrr))

    print("testing time:", time.time() - start_time)
Beispiel #3
0
def evaluate(path):
    queries = read_dataset('queries.csv')
    targets = read_dataset('targets.csv')
    freqs = freq_count(targets)
    results = load_results(path, queries, targets)
    cutoff = 1000
    precisions = []
    recalls = []
    f1scores = []
    aps = []
    gains = []
    nnt1s = []
    nnt2s = []
    for (queried, retrieved) in results:
        x = categories_to_rel(queried, retrieved)[:cutoff]
        p = precision(x)
        r = recall(x, freqs[queried[0]])
        f = f1score(x, freqs[queried[0]])
        g = ndcg(x)
        ap = average_precision(x, freqs[queried[0]])
        t1 = nnt1(x, freqs[queried[0]])
        t2 = nnt2(x, freqs[queried[0]])
        precisions.append(p)
        recalls.append(r)
        f1scores.append(f)
        gains.append(g)
        aps.append(ap)
        nnt1s.append(t1)
        nnt2s.append(t2)
    print('mean precision:', numpy.mean(precisions))
    print('mean recall:', numpy.mean(recalls))
    print('mean F1 score:', numpy.mean(f1scores))
    print('mAP:', numpy.mean(aps))
    print('mean NDCG:', numpy.mean(gains))
    print('mean nearest neighbor:', numpy.mean(nnt1s), numpy.mean(nnt2s))

    # plot precision-recall curve
    mean_precisions = numpy.zeros(cutoff, numpy.float64)
    mean_recalls = numpy.zeros(cutoff, numpy.float64)
    for (queried, retrieved) in results:
        x = categories_to_rel(queried, retrieved)[:cutoff]
        x = numpy.pad(x, (0, cutoff - len(x)), 'constant', constant_values=(0))
        precisions = []
        recalls = []
        for k, _ in enumerate(x):
            p = precision(x[:k + 1])
            r = recall(x[:k + 1], freqs[queried[0]])
            precisions.append(p)
            recalls.append(r)
        mean_precisions += precisions
        mean_recalls += recalls
    mean_precisions /= len(results)
    mean_recalls /= len(results)
    plt.plot(mean_recalls, mean_precisions)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.axis([0, 1, 0, 1.05])
    plt.show()
Beispiel #4
0
def evaluate_test_set(session, tags, preds, fnames, lines, batch_limit=None):
    batch_num = 0
    num_sequences = 0
    p_tp_total, p_fp_total, r_tp_total, r_fn_total = 0, 0, 0, 0
    p_tp_total_binary, p_fp_total_binary, r_tp_total_binary, r_fn_total_binary = 0, 0, 0, 0

    while True:
        try:

            #Train binary, eval binary setting
            y, y_, filenames, line_nums = \
                session.run([tags, preds, fnames, lines])
            p_tp, p_fp = metrics.precision(reader, y, y_, counts=True)
            r_tp, r_fn = metrics.recall(reader, y, y_, counts=True)
            p_tp_total += p_tp
            p_fp_total += p_fp
            r_tp_total += r_tp
            r_fn_total += r_fn

            #Train All tags, eval binary setting
            p_tp_binary, p_fp_binary = metrics.precision(reader, y, y_, binary=True, counts=True)
            r_tp_binary, r_fn_binary = metrics.recall(reader, y, y_, binary=True , counts=True)
            p_tp_total_binary += p_tp_binary
            p_fp_total_binary += p_fp_binary
            r_tp_total_binary += r_tp_binary
            r_fn_total_binary += r_fn_binary

            #TODO: Train binary, eval binary setting
            

            num_sequences += len(y)
            batch_num += 1
            if batch_num == batch_limit:
                break
        except tf.errors.OutOfRangeError:
            print 'test queue is empty'
            break

    if p_tp_total:
        precision = p_tp_total / (p_tp_total + p_fp_total)
        recall = r_tp_total / (r_tp_total + r_fn_total)
        f1 = metrics.f1(precision, recall)

        precision_binary = p_tp_total_binary / (p_tp_total_binary + p_fp_total_binary)
        recall_binary = r_tp_total_binary / (r_tp_total_binary + r_fn_total_binary)
        f1_binary = metrics.f1(precision_binary, recall_binary)

        print 'Evaluated {} sequences from test set'.format(num_sequences)
        print 'Precision:  ', precision
        print 'Recall:     ', recall
        print 'f1:         ', f1

        print 'Precision Binary:  ', precision_binary
        print 'Recall Binary:     ', recall_binary
        print 'f1 Binary:         ', f1_binary
Beispiel #5
0
def classification_report(y_true, y_pred):
    print('--------------------------------')
    print('Accuracy -', metrics.accuracy(y_true, y_pred))
    print('Recall -', metrics.recall(y_true, y_pred))
    print('Precision -', metrics.precision(y_true, y_pred))
    print('F1 score -', metrics.f1_score(y_true, y_pred))
    print('--------------------------------')
Beispiel #6
0
    def test(self):
        tf.global_variables_initializer().run()

        self.saver = tf.train.Saver()
        could_load, checkpoint_counter = self.load(self.checkpoint_dir)

        if could_load:
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        test_feed_dict = {
            self.test_inptus: self.test_x,
            self.test_labels: self.test_y
        }

        summary_str, test_loss, test_accuracy, p, t = self.sess.run(
            [
                self.test_summary, self.test_loss, self.test_accuracy,
                self.test_plab, self.test_tlab
            ],
            feed_dict=test_feed_dict)
        import metrics
        print("test_accuracy: {}".format(test_accuracy))
        with open('resnet.txt', 'a') as f:  # 设置文件对象
            f.write(
                str(self.i) + '-' + str(self.j) + ',' +
                str(metrics.accuracy(t, p)) + ',' +
                str(metrics.precision(t, p)) + ',' +
                str(metrics.recall(t, p)) + ',' + str(metrics.f1score(t, p)) +
                ',' + str(metrics.ft(t, p)) + '\n')
Beispiel #7
0
def f1_score(y_true, y_pred):
    from metrics import precision, recall
    y_true = K.cast(y_true, dtype='float32')
    y_pred = K.cast(y_pred, dtype='float32')
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
Beispiel #8
0
def evaluate(path):
    queries = read_dataset('queries.csv')
    targets = read_dataset('targets.csv')
    freqs = freq_count(targets)
    results = load_results(path, queries, targets)
    cutoff = 1000
    precisions = []
    recalls = []
    f1scores = []
    aps = []
    gains = []
    nnt1s = []
    nnt2s = []
    for (queried, retrieved) in results:
        x = categories_to_rel(queried, retrieved)[:cutoff]
        p = precision(x)
        r = recall(x, freqs[queried[0]])
        f = f1score(x, freqs[queried[0]])
        g = ndcg(x)
        ap = average_precision(x, freqs[queried[0]])
        t1 = nnt1(x, freqs[queried[0]])
        t2 = nnt2(x, freqs[queried[0]])
        precisions.append(p)
        recalls.append(r)
        f1scores.append(f)
        gains.append(g)
        aps.append(ap)
        nnt1s.append(t1)
        nnt2s.append(t2)
        print('precision:', p)
        print('recall:', r)
        print('F1 score:', f)
        print('average precision:', ap)
        print('NDCG:', g)
        print('nearest neighbor:', t1, t2)
def eval_classifier(classifier, x, y):
    y_pred = classifier.predict(x)

    conf = metrics.conf_matrix(y_pred, y)
    accuracy = metrics.accuracy(y_pred, y)
    precision = metrics.precision(y_pred, y)
    recall = metrics.recall(y_pred, y)
    f1_score = metrics.f_score(y_pred, y, beta=1)
    avg_prec = np.mean(precision)
    avg_rec = np.mean(recall)
    avg_f1 = np.mean(f1_score)

    print("Confusion Matrix: ")
    print(conf)
    print("Accuracy:")
    print(accuracy)
    print("Precision:")
    print(precision)
    print(f"Average Precision: {avg_prec}")
    print("Recall:")
    print(recall)
    print(f"Average Recall: {avg_rec}")
    print("F1_score:")
    print(f1_score)
    print(f"Average F1 Score: {avg_f1}")
Beispiel #10
0
    def validation_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        avg_acc_loss = torch.stack([x['accuracy'] for x in outputs]).mean()

        if self.mode == Train_Mode.TEACHER:
            self.embeddings_all = torch.cat(self.embeddings_all).cpu()
            self.labels_all = torch.cat(self.labels_all).cpu()
            rec = recall(self.embeddings_all, self.labels_all, K=self.K)

            log_metrics = {
                    "recall" : rec[0],
                    "val_loss": avg_loss.item(),
            }
        elif self.mode == Train_Mode.STUDENT:
            avg_triplet_loss = torch.stack([x['val_triplet_loss'] for x in outputs]).mean()
            avg_angle_loss = torch.stack([x['val_angle_loss'] for x in outputs]).mean()
            avg_dist_loss = torch.stack([x['val_dist_loss'] for x in outputs]).mean()
            log_metrics = {
                    "val_triplet_loss" : avg_triplet_loss.item(),
                    "val_angle_loss": avg_angle_loss.item(),
                    "val_dist_loss": avg_dist_loss.item(),
                    "val_accuracy": avg_acc_loss.item(),
                    "val_loss": avg_loss.item(),
            }
        
        self.embeddings_all, self.labels_all = [], []

        self.train_step = 0
        self.train_num_correct = 0

        self.val_step = 0
        self.val_num_correct = 0

        return { 'val_loss': avg_loss, 'log': log_metrics}
Beispiel #11
0
def compute_all_metrics(execution_id, path_input, path_output, formula,
                        append):
    from metrics import accuracy, precision, recall, f1, specificity
    """
    Computes all metrics and persistes in a csv

    Args:
        execution_id (int): identifier of the execution
        path_input (string): path of the file that contains the classifications
        path_out (string): path of the file that will persist the metrics
        formula (string): mean_max | mean_mean
        append (boolean): true | false
    """

    # loading results
    with open(path_input) as data_file:
        data = json.load(data_file)

    # computing metrics
    tp = tn = fp = fn = 0
    for i in range(0, len(data)):
        if (data[i]['values'][formula]['positive'] >=
                data[i]['values'][formula]['negative']):
            if data[i]['values']['label'] == 'positive':
                tp += 1
            else:
                fp += 1
        elif (data[i]['values'][formula]['positive'] <
              data[i]['values'][formula]['negative']):
            if (data[i]['values']['label'] == 'negative'):
                tn += 1
            else:
                fn += 1
        else:
            raise Exception(
                "Positive similarity equals to negative similarity to news " +
                data[i]['id'])

    accuracy = accuracy(tp, tn, fp, fn)
    recall = recall(tp, fn)
    precision = precision(tp, fp)
    f1 = f1(precision, recall)
    specificity = specificity(tn, fp)

    # persiting the results
    with open(path_output, 'a' if append else 'w') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        if (not append):
            spamwriter.writerow([
                'execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy',
                'precision', 'recall', 'f1', 'specificity'
            ])
        spamwriter.writerow([
            execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1,
            specificity
        ])
Beispiel #12
0
 def supervised_eval(self, train_or_valid):
     data = self.dataset.get_labeled_data(train_or_valid)
     if data == None:
         raise ValueError('no labeled examples present in dataset')
     X_labeled, y_true, _ = data
     y_pred = self.model.predict(X_labeled)
     p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\
                        metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\
                        metrics.auc(y_true, y_pred)
     self.metrics[train_or_valid].append((p, r, ac, g, auc))
Beispiel #13
0
	def test_1(self):
		actual = [1, 1, 0, 1, 1, 1, 0, 0, 1, 1]
		predicted = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0]
		tp, fn, fp, tn = metrics.confusion_matrix(actual, predicted)
		self.assertEqual(tp, 3)
		self.assertEqual(fn, 4)
		self.assertEqual(fp, 1)
		self.assertEqual(tn, 2)
		self.assertEqual(metrics.accuracy(actual, predicted), 0.5)
		self.assertEqual(metrics.precision(actual, predicted), 3/4)
		self.assertEqual(metrics.recall(actual, predicted), 3/7)
		self.assertEqual(metrics.f1(actual, predicted), 6/11)
Beispiel #14
0
    def post(self):
        global current_scene
        score = 0

        print('Submitted scene %s' % current_scene)
        if self.scene_exists(current_scene):
            return {
                'message': "Scene {} already exist.".format(current_scene)
            }, 400
        correct_dict = self.fetch_correct_result()
        if not correct_dict:
            return {
                'message':
                "There is no reuslt dataexist for scene {}.".format(
                    current_scene)
            }, 400
        your_dict = request.get_json()
        submission_time = datetime.datetime.now()
        print(' Correct prediction', correct_dict)
        your_dict = {str(k): int(v) for k, v in your_dict.items()}
        print(' Your prediction', your_dict)
        sys.stdout.flush()
        score = 0
        score2 = 0
        score3 = 0
        if your_dict:
            #score = Benchmark.diff_dicts(correct_dict, your_dict)
            score = metrics.accuracy(correct_dict, your_dict)
            score2 = metrics.precision(correct_dict, your_dict)
            score3 = metrics.recall(correct_dict, your_dict)
            print("scene accuracy", score)
            print("scene precision", score2)
            print("scene recall", score3)

        submission_result = {
            'scene': current_scene,
            'accuracy': score,
            'precision': score2,
            'recall': score3
        }
        try:
            self.insert(submission_result, submission_time)
        except:
            return {
                'message': 'An error occured while inserting the item'
            }, 500

        return {
            'Your score for this scene is ': submission_result['accuracy']
        }, 201
Beispiel #15
0
 def recalls(self):
     predictions = self.one_hot
     size = len(self._classes)
     with tf.compat.v1.name_scope("Recalls"):
         rs = []
         ops = []
         for i, c in enumerate(self._classes):
             mask = tf.one_hot([i], size, axis=-1)
             r, op = recall(labels=self.target,
                            predictions=predictions,
                            weights=mask)
             tf.compat.v1.summary.scalar("c{}_{}".format(i, c), r * 100)
             rs.append(r)
             ops.append(op)
         return rs, ops
Beispiel #16
0
def results_to_metrics(results, methods, ref_motifs):
    _, _, ref_labels = motif.unpack_motif(ref_motifs)
    metric_dict = dict.fromkeys(methods)

    for m in methods:
        obs_motifs = results[m]
        _, _, obs_labels = motif.unpack_motif(obs_motifs)

        this_edit = metrics.edit_distance(obs_labels, ref_labels)
        this_recall = metrics.recall(obs_motifs, ref_motifs)
        this_precis = metrics.precision(obs_motifs, ref_motifs)
        this_f = metrics.f_measure(obs_motifs, ref_motifs)
        this_bm = metrics.boundary_distance(obs_motifs, ref_motifs)
        metric_dict[m] = [this_edit, this_recall, this_precis, this_f, this_bm]

    return metric_dict
Beispiel #17
0
 def active_simulation_eval(self):
     data = self.dataset.get_unlabeled_data()
     if data == None:
         UserWarning(
             'all examples have been labeled; this eval mode works '
             'if there is unlabeled pool of data in `simulate` mode'
         )
         return
     X_unlabeled, unlabeled_indexes = data
     # get unlabeled examples labels in simulation with `y_ideal`
     y_true = self.dataset.y_ideal[unlabeled_indexes]
     y_pred = self.model.predict(X_unlabeled)
     p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\
                        metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\
                        metrics.auc(y_true, y_pred)
     self.metrics['simulate'].append((p, r, ac, g, auc))
Beispiel #18
0
 def recalls(self):
     predictions = self.one_hot
     size = len(self._classes)
     with tf.name_scope("Recalls"):
         rs = []
         ops = []
         for i, c in enumerate(self._classes):
             mask = tf.one_hot([i], size, axis=-1)
             r, op = recall(labels=self.target,
                            predictions=predictions,
                            weights=mask,
                            updates_collections=tf.GraphKeys.UPDATE_OPS)
             tf.summary.scalar("c_{}".format(c), r * 100)
             rs.append(r)
             ops.append(op)
         return rs, ops
Beispiel #19
0
    def validation_end(self, outputs):
        # OPTIONAL
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()

        if self.mode == Train_Mode.TEACHER:
            self.embeddings_all = torch.cat(self.embeddings_all).cpu()
            self.labels_all = torch.cat(self.labels_all).cpu()
            rec = recall(self.embeddings_all, self.labels_all, K=self.K)

            log_metrics = {
                    "recall" : rec[0],
                    "val_loss": avg_loss.item(),
            }
        
        self.embeddings_all, self.labels_all = [], []

        return { 'val_loss': avg_loss, 'log': log_metrics}
def compute_all_metrics(execution_id, path_input, path_output, formula, append):
    from metrics import accuracy, precision, recall, f1, specificity
    """
    Computes all metrics and persistes in a csv

    Args:
        execution_id (int): identifier of the execution
        path_input (string): path of the file that contains the classifications
        path_out (string): path of the file that will persist the metrics
        formula (string): mean_max | mean_mean
        append (boolean): true | false
    """

    # loading results
    with open(path_input) as data_file:
        data = json.load(data_file)

    # computing metrics
    tp = tn = fp = fn = 0
    for i in range(0, len(data)):
        if (data[i]['values'][formula]['positive'] >= data[i]['values'][formula]['negative']):
            if data[i]['values']['label'] == 'positive':
                tp += 1
            else:
                fp += 1
        elif (data[i]['values'][formula]['positive'] < data[i]['values'][formula]['negative']):
            if (data[i]['values']['label'] == 'negative'):
                tn += 1
            else:
                fn += 1
        else:
            raise Exception("Positive similarity equals to negative similarity to news " + data[i]['id'])

    accuracy = accuracy(tp, tn, fp, fn)
    recall = recall(tp, fn)
    precision = precision(tp, fp)
    f1 = f1(precision, recall);
    specificity = specificity(tn, fp);

    # persiting the results
    with open(path_output, 'a' if append else 'w') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        if (not append):
            spamwriter.writerow(
                ['execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy', 'precision', 'recall', 'f1', 'specificity'])
        spamwriter.writerow([execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1, specificity])
Beispiel #21
0
def test(engine, options):
    queries = pd.read_csv(os.path.join('data', 'queries_train.tsv'), sep='\t')
    bench_lbls = pd.read_csv(os.path.join('data', 'benchmark_lbls_train.csv'),
                             dtype={
                                 'query': int,
                                 'tweet': str,
                                 'y_true': int
                             })
    q2n_relevant = bench_lbls.groupby('query')['y_true'].sum().to_dict()
    queries_results = []
    q_times = []
    for i, row in queries.iterrows():
        q_id = row['query_id']
        q_keywords = row['keywords']
        start_time = time.time()
        q_n_res, q_res = engine.search(q_keywords, options['methods'])
        end_time = time.time()
        q_time = end_time - start_time
        q_times.append(q_time)
        queries_results.extend([(q_id, str(doc_id)) for doc_id in q_res])
        if q_time > 10:
            print(f'Query time exceeded: {options}')
    queries_results = pd.DataFrame(queries_results, columns=['query', 'tweet'])
    q_results_labeled = pd.merge(queries_results,
                                 bench_lbls,
                                 on=['query', 'tweet'],
                                 how='inner',
                                 suffixes=('_result', '_bench'))
    options['max_q_time'] = max(q_times)
    options['avg_q_time'] = sum(q_times) / len(q_times)
    options['MAP'] = metrics.map(q_results_labeled)
    options['precision'] = metrics.precision(q_results_labeled)
    options['precision@5'] = metrics.precision(
        q_results_labeled.groupby('query').head(5))
    options['precision@10'] = metrics.precision(
        q_results_labeled.groupby('query').head(10))
    options['precision@50'] = metrics.precision(
        q_results_labeled.groupby('query').head(50))
    options['recall'] = metrics.recall(q_results_labeled, q2n_relevant)
    save_to_csv(options)
Beispiel #22
0
 def metric_fn(label_ids, predict, num_labels, answer_num):
     mask = tf.sequence_mask(answer_num, FLAGS.max_answer_num)
     precision = metrics.precision(label_ids,
                                   predict,
                                   num_classes=num_labels,
                                   weights=mask,
                                   pos_indices=[1])
     recall = metrics.recall(label_ids,
                             predict,
                             num_classes=num_labels,
                             weights=mask,
                             pos_indices=[1])
     f1_score = metrics.f1(label_ids,
                           predict,
                           num_classes=num_labels,
                           weights=mask,
                           pos_indices=[1])
     return {
         "precision": precision,
         "recall": recall,
         "f1_score": f1_score
     }
Beispiel #23
0
def calculate_metrics(patient_id, spacing, label_arr_org, pred_arr_org,
                      HAUSDORFF_PERCENT, OVERLAP_TOLERANCE,
                      SURFACE_DICE_TOLERANCE):
    """
    metric calculation cleanup in test.py.
    """
    result = {}
    result["patient_id"] = patient_id
    #
    result["precision"] = precision(label_arr_org, pred_arr_org)
    result["recall"] = recall(label_arr_org, pred_arr_org)
    result["jaccard"] = jaccard(label_arr_org, pred_arr_org)
    result["dice"] = dice(label_arr_org, pred_arr_org)
    result["segmentation_score"] = segmentation_score(label_arr_org,
                                                      pred_arr_org, spacing)
    bbox_metrics = calculate_bbox_metrics(label_arr_org, pred_arr_org, spacing)
    result = append_helper(
        result, ["x_distance", "y_distance", "z_distance", "distance"],
        bbox_metrics)
    surface_dice_metrics = surface_dice(label_arr_org, pred_arr_org, spacing,
                                        HAUSDORFF_PERCENT, OVERLAP_TOLERANCE,
                                        SURFACE_DICE_TOLERANCE)
    result = append_helper(result, [
        "average_surface_distance_gt_to_pr",
        "average_surface_distance_pr_to_gt", "robust_hausdorff",
        "overlap_fraction_gt_with_pr", "overlap_fraction_pr_with_gt",
        "surface_dice"
    ], surface_dice_metrics)
    # get bbox center (indices) of prediction for next segmentation step
    for axes in ["X", "Y", "Z"]:
        for location in ["min", "center", "max", "length"]:
            result["prediction_{}_{}".format(
                axes, location
            )] = bbox_metrics["prediction_bbox_metrics"][axes][location]

    return result, result["dice"], bbox_metrics
    user_profile = train[test_user].indices  #what is doing here?
    relevant_items = test[test_user].indices
    if len(relevant_items) > 0:
        neval += 1
        #
        # TODO: Here you can write to file the recommendations for each user in the test split.
        # WARNING: there is a catch with the item idx!
        #
        # this will rank *all* items
        recommended_items = recommender.recommend(user_profile,
                                                  exclude_seen=True)
        # use this to have the *top-k* recommended items (warning: this can underestimate ROC-AUC for small k)
        # recommended_items = recommender.recommend(user_profile, k=at, exclude_seen=True)
        roc_auc_ += roc_auc(recommended_items, relevant_items)
        precision_ += precision(recommended_items, relevant_items, at=at)
        recall_ += recall(recommended_items, relevant_items, at=at)
        map_ += map(recommended_items, relevant_items, at=at)
        mrr_ += rr(recommended_items, relevant_items, at=at)
        ndcg_ += ndcg(recommended_items,
                      relevant_items,
                      relevance=test[test_user].data,
                      at=at)
roc_auc_ /= neval
precision_ /= neval
recall_ /= neval
map_ /= neval
mrr_ /= neval
ndcg_ /= neval

logger.info('Ranking quality')
logger.info('ROC-AUC: {:.4f}'.format(roc_auc_))
Beispiel #25
0
    def evaluating(self, model, dataset, split):
        """
          input:
            model: (object) pytorch model
            dataset: (object) dataset
            split: (str) split of dataset in ['train', 'val', 'test']
          return [overall_accuracy, precision, recall, f1-score, jaccard, kappa]
        """
        args = self.args
        oa, precision, recall, f1, jac, kappa = 0, 0, 0, 0, 0, 0
        model.eval()
        data_loader = DataLoader(dataset,
                                 args.batch_size,
                                 num_workers=4,
                                 shuffle=False)
        batch_iterator = iter(data_loader)
        steps = len(dataset) // args.batch_size

        start = time.time()
        for step in range(steps):
            x, y = next(batch_iterator)
            x = Variable(x, volatile=True)
            y = Variable(y, volatile=True)
            if args.cuda:
                x = x.cuda()
                y = y.cuda()
            # calculate pixel accuracy of generator
            gen_y = model(x)
            if self.is_multi:
                gen_y = gen_y[0]
            oa += metrics.overall_accuracy(gen_y.data, y.data)
            precision += metrics.precision(gen_y.data, y.data)
            recall += metrics.recall(gen_y.data, y.data)
            f1 += metrics.f1_score(gen_y.data, y.data)
            jac += metrics.jaccard(gen_y.data, y.data)
            kappa += metrics.kappa(gen_y.data, y.data)

        _time = time.time() - start

        if not os.path.exists(os.path.join(Logs_DIR, 'statistic')):
            os.makedirs(os.path.join(Logs_DIR, 'statistic'))

        # recording performance of the model
        nb_samples = steps * args.batch_size
        basic_info = [
            self.date, self.method, self.epoch, self.iter, nb_samples, _time
        ]
        basic_info_names = [
            'date', 'method', 'epochs', 'iters', 'nb_samples', 'time(sec)'
        ]

        perform = [
            round(idx / steps, 3)
            for idx in [oa, precision, recall, f1, jac, kappa]
        ]
        perform_names = [
            "overall_accuracy", "precision", "recall", "f1-score", "jaccard",
            "kappa"
        ]
        cur_log = pd.DataFrame([basic_info + perform],
                               columns=basic_info_names + perform_names)
        # save performance
        if os.path.exists(
                os.path.join(Logs_DIR, 'statistic', "{}.csv".format(split))):
            logs = pd.read_csv(
                os.path.join(Logs_DIR, 'statistic', "{}.csv".format(split)))
        else:
            logs = pd.DataFrame([])
        logs = logs.append(cur_log, ignore_index=True)
        logs.to_csv(os.path.join(Logs_DIR, 'statistic',
                                 "{}.csv".format(split)),
                    index=False,
                    float_format='%.3f')
Beispiel #26
0
                        f"{engine_module} results have MAP value of {results_map}."
                    )
                    if results_map <= 0 or results_map > 1:
                        logging.error(
                            f'{engine_module} results MAP value is out of range (0,1).'
                        )

                    # test that the average across queries of precision,
                    # precision@5, precision@10, precision@50, and recall
                    # is in [0,1].
                    prec, p5, p10, p50, recall = \
                        metrics.precision(q_results_labeled), \
                        metrics.precision(q_results_labeled.groupby('query').head(5)), \
                        metrics.precision(q_results_labeled.groupby('query').head(10)), \
                        metrics.precision(q_results_labeled.groupby('query').head(50)), \
                        metrics.recall(q_results_labeled, q2n_relevant)
                    logging.debug(
                        f"{engine_module} results produced average precision of {prec}."
                    )
                    logging.debug(
                        f"{engine_module} results produced average precision@5 of {p5}."
                    )
                    logging.debug(
                        f"{engine_module} results produced average precision@10 of {p10}."
                    )
                    logging.debug(
                        f"{engine_module} results produced average precision@50 of {p50}."
                    )
                    logging.debug(
                        f"{engine_module} results produced average recall of {recall}."
                    )
Beispiel #27
0
def predict(loss_fn, model, data_set, data_loader, counting=False):
    """ Validate after training an epoch
    Note:
    """
    model.eval()

    true_positives = []
    predicted_positives = []
    possible_positives = []
    union_areas = []
    loss = []
    for bc_cnt, bc_data in enumerate(data_loader):
        if counting:
            print('%d/%d' % (bc_cnt, len(data_set) // data_loader.batch_size))
        imgs, masks, _ = bc_data
        imgs = Variable(imgs).cuda()
        masks = Variable(masks).cuda()
        # labels = Variable(labels).cuda()

        outputs = model(imgs)

        # outputs = outputs.view(-1, outputs.size()[2], outputs.size()[3])

        # print outputs.size(), masks.size()
        # if outputs.size() != masks.size():
        #     outputs = F.upsample(outputs, size=masks.size()[-2:], mode='bilinear')
        mask_loss = torch.zeros(1).cuda()
        for o in outputs:
            o = o.view(-1, o.size()[2], o.size()[3])

            mask_loss = mask_loss + float(loss_fn(o, masks))

        # mask_loss = mask_loss
        # loss = criterion(outputs, masks)

        loss.append(mask_loss)
        # loss.append(loss_fn(outputs, masks))
        # outputs = F.softmax(model(imgs), dim=1)
        # if outputs.size() != masks.size():
        #     outputs = F.upsample(outputs, size=masks.size()[-2:], mode='bilinear')
        #
        # _, outputs = torch.max(outputs, dim=1)
        output = outputs[-1]
        output = output.view(-1, output.size()[2], output.size()[3])

        output = output.cpu().data.numpy()
        # labels = labels.cpu().data.numpy()
        masks = masks.cpu().data.numpy()
        imgs = imgs.cpu().data.numpy()

        true_positive, predicted_positive, possible_positive, union_area = metrics_pred(
            output, imgs, masks)

        true_positives += true_positive
        predicted_positives += predicted_positive
        possible_positives += possible_positive
        union_areas += union_area
    precisions = precision(true_positives, predicted_positives)
    recalls = recall(true_positives, possible_positives)
    f1_scores = f1_score(recalls, precisions)
    loss = torch.tensor(loss)
    return precisions, recalls, f1_scores, loss.mean()
Beispiel #28
0
def run():
    while True:
        trial = pull_pending()

        if trial is None:
            break

        params = eval(trial['Parameters'])

        logging.info(trial)

        dataset = load(trial['Dataset'])
        fold = int(trial['Fold']) - 1

        (X_train, y_train), (X_test,
                             y_test) = dataset[fold][0], dataset[fold][1]

        n_minority = Counter(y_train).most_common()[1][1]
        n_majority = Counter(y_train).most_common()[0][1]

        imblearn_ratios = [
            ((n_majority - n_minority) * ratio + n_minority) / n_majority
            for ratio in [0.5, 0.75, 1.0]
        ]

        clf = {
            'NB': NB(),
            'KNN': KNN(),
            'SVM': SVM(gamma='scale'),
            'CART': CART()
        }[params['classifier']]

        if (trial['Algorithm'] is None) or (trial['Algorithm'] == 'None'):
            algorithm = None
        else:
            algorithms = {
                'AKNN':
                ResamplingCV(AKNN, clf, n_neighbors=[1, 3, 5, 7]),
                'Bord':
                ResamplingCV(SMOTE,
                             clf,
                             kind=['borderline1'],
                             k_neighbors=[1, 3, 5, 7, 9],
                             m_neighbors=[5, 10, 15],
                             sampling_strategy=imblearn_ratios),
                'CC':
                ResamplingCV(CC, clf, sampling_strategy=imblearn_ratios),
                'CNN':
                ResamplingCV(CNN, clf, n_neighbors=[1, 3, 5, 7]),
                'ENN':
                ResamplingCV(ENN, clf, n_neighbors=[1, 3, 5, 7]),
                'IHT':
                ResamplingCV(IHT,
                             clf,
                             sampling_strategy=imblearn_ratios,
                             cv=[2]),
                'NCL':
                ResamplingCV(NCL, clf, n_neighbors=[1, 3, 5, 7]),
                'NM':
                ResamplingCV(NM, clf, n_neighbors=[1, 3, 5, 7]),
                'OSS':
                ResamplingCV(OSS, clf, n_neighbors=[1, 3, 5, 7]),
                'RBO':
                ResamplingCV(RBO,
                             clf,
                             gamma=[0.01, 0.1, 1.0, 10.0],
                             ratio=[0.5, 0.75, 1.0]),
                'RBU':
                ResamplingCV(RBU,
                             clf,
                             gamma=params.get('gamma'),
                             ratio=params.get('ratio')),
                'RENN':
                ResamplingCV(RENN, clf, n_neighbors=[1, 3, 5, 7]),
                'ROS':
                ResamplingCV(ROS, clf, sampling_strategy=imblearn_ratios),
                'RUS':
                ResamplingCV(RUS, clf, sampling_strategy=imblearn_ratios),
                'SMOTE':
                ResamplingCV(SMOTE,
                             clf,
                             k_neighbors=[1, 3, 5, 7, 9],
                             sampling_strategy=imblearn_ratios),
                'SMOTE+ENN':
                ResamplingCV(
                    SMOTEENN,
                    clf,
                    smote=[SMOTE(k_neighbors=k) for k in [1, 3, 5, 7, 9]],
                    sampling_strategy=imblearn_ratios),
                'SMOTE+TL':
                ResamplingCV(
                    SMOTETomek,
                    clf,
                    smote=[SMOTE(k_neighbors=k) for k in [1, 3, 5, 7, 9]],
                    sampling_strategy=imblearn_ratios),
                'TL':
                TL(),
            }

            algorithm = algorithms.get(trial['Algorithm'])

            if algorithm is None:
                raise NotImplementedError

        if algorithm is not None:
            X_train, y_train = algorithm.fit_sample(X_train, y_train)

        clf = clf.fit(X_train, y_train)
        predictions = clf.predict(X_test)

        scores = {
            'Precision': metrics.precision(y_test, predictions),
            'Recall': metrics.recall(y_test, predictions),
            'F-measure': metrics.f_measure(y_test, predictions),
            'AUC': metrics.auc(y_test, predictions),
            'G-mean': metrics.g_mean(y_test, predictions)
        }

        submit_result(trial, scores)
Beispiel #29
0
    def post(self):

        #signal.alarm(0)
        #signal.alarm(5)
        global current_scene, latency, overall_latency
        scene_latency = datetime.datetime.utcnow() - latency
        if overall_latency:
            overall_latency += scene_latency
        else:  #None
            overall_latency = scene_latency
        print("Latency for scene %s was %s" % (current_scene, scene_latency))
        #timeout = int(os.getenv("BENCHMARK_POST_TIMEOUT", default=10))
        #watchdog.reset_and_extend(timeout)
        score = 0

        print('Submitted scene %s' % current_scene)
        if self.scene_exists(current_scene):
            return {
                'message': "Scene {} already exist.".format(current_scene)
            }, 400
        correct_dict = self.fetch_correct_result()
        if not correct_dict:
            return {"message": "Please request at least one scene first"}, 400
        your_dict = request.get_json()
        submission_time = datetime.datetime.utcnow()

        try:
            your_dict = {str(k): int(v) for k, v in your_dict.items()}
        except ValueError:
            return {
                'message':
                "Your result json should be in format {'object_name':'1'} with key as an object name."
            }, 400
        except AttributeError:
            return {
                'message':
                "Your result json is incorrect. Specify it like: {'object_name:1'}"
            }, 400

        print(' Correct prediction', correct_dict)
        print(' Your prediction', your_dict)
        sys.stdout.flush()
        score = 0
        score2 = 0
        score3 = 0
        if your_dict:
            #score = Benchmark.diff_dicts(correct_dict, your_dict)
            score = metrics.accuracy(correct_dict, your_dict)
            score2 = metrics.precision(correct_dict, your_dict)
            score3 = metrics.recall(correct_dict, your_dict)
            print("scene accuracy", score)
            print("scene precision", score2)
            print("scene recall", score3)

        submission_result = {
            'scene': current_scene,
            'accuracy': score,
            'precision': score2,
            'recall': score3
        }
        try:
            self.insert(submission_result, submission_time)
        except:
            return {
                'message': 'An error occured while inserting the item'
            }, 500

        return {
            'Your score for this scene is ': submission_result['accuracy']
        }, 201
Beispiel #30
0
		# Para cada porcentaje de confianza
		for i in xrange(100):

			# Obtengo las predicciones con una confianza mayor a cierto umbral
			porcentaje = float(i)/100

			aux = result[result['trust'] > porcentaje]

			# matrix = metrics.confusion_matrix(aux)
			matrix = metrics.hard_matrix(aux)

			# Si la precision es menor que cero, es porque no habian datos que superaran tal nivel de confianza
			precision = metrics.accuracy(matrix, clase)
			if precision >= 0:
				valores_accuracy.append(precision)
				valores_recall.append(metrics.recall(matrix, clase))
				x_values.append(porcentaje)

			# Si el f_score es menor que cero, es porque no habian datos que superaran tal nivel de confianza
			f_score = metrics.f_score(matrix, clase)
			if f_score >= 0:
				valores_fscore.append(f_score)
				x_values_fscore.append(porcentaje)			

		#graf(clase, x_values, valores_accuracy, 'Accuracy')
		graf(clase, x_values, valores_recall, 'Recall')
		#graf(clase, x_values_fscore, valores_fscore, 'F-Score')
		print 'a'

	plt.show()
Beispiel #31
0
def test_precision_recall_treadoff(VGG11,CIFAR10):
    assert ((precision(VGG11, CIFAR10, 1, 'cuda:0') >=  0.95) or (recall(VGG11, CIFAR10, 1, 'cuda:0') >=  0.95))
Beispiel #32
0
def model_save_load(name, model, x, y=None):
    model_name = name + '.pkl'
    if model_name not in os.listdir('data/model'):
        model.fit(X=x, y=y)
        joblib.dump(model, 'data/model/' + model_name)
        return model
    else:
        model = joblib.load('data/model/' + model_name)
        return model


dg = dg()
for i in range(10):
    for j in range(10):
        train_data, test_data, train_labels, test_labels = dg.dsift_only(j)
        svm = SVC(kernel='poly', degree=3)
        svm = model_save_load('svm' + str(i) + '-' + str(j), svm,
                              train_data.reshape([train_data.shape[0], -1]),
                              train_labels)
        plab = p = svm.predict(test_data.reshape([test_data.shape[0],
                                                  -1])).reshape(-1,
                                                                1).tolist()
        t = test_labels.reshape(-1, 1).tolist()
        print(str(metrics.accuracy(t, p)))
        with open('denseSIFTsvm.txt', 'a') as f:  # 设置文件对象
            f.write(
                str(i) + '-' + str(j) + ',' + str(metrics.accuracy(t, p)) +
                ',' + str(metrics.precision(t, p)) + ',' +
                str(metrics.recall(t, p)) + ',' + str(metrics.f1score(t, p)) +
                ',' + str(metrics.ft(t, p)) + '\n')
Beispiel #33
0
            clf = None
            clf = RandomForestClassifier(n_estimators=p, criterion='entropy',
                                         max_depth=14, min_samples_split=20,
                                         n_jobs=2)
            
            clf.fit(train_X, train_y)
            results.append(metrics.predict_table(clf, test_X, test_y))
            

        result = pd.concat(results)

        matrix = metrics.confusion_matrix(result)

        clases = matrix.columns.tolist()
        precisions = [metrics.precision(matrix, c) for c in clases]
        recalls = [metrics.recall(matrix, c) for c in clases]
        f_scores = [metrics.f_score(matrix, c) for c in clases]

        w_score = metrics.weighted_f_score(matrix)

        # f = open(result_dir + str(max_depth) + ' ' + str(min_samples_split) + '.txt', 'w')
        f = open(result_dir + str(p) + '.txt', 'w')

        f.write('F_score by class')
        f.write('\n')
        f.write(str(f_scores))
        f.write('\n')
        f.write('\n')
        f.write('Weighted average: ')
        f.write(str(w_score))