Beispiel #1
0
def bestfWA(eta, mu, lossfunc, method):
    k = eta.shape[0]
    fFC = np.zeros(k)
    fFS = -1
    if method == 'F':
        # res = scipy.optimize.minimize(weightedAvg, (1, 1), args=(eta, mu), bounds=((-1, 1), (-1, 1)))
        ini = np.random.uniform(-1, 1, k)
        bounds = ((-1, 1), )
        for i in range(k - 1):
            bounds += ((-1, 1), )
        res = scipy.optimize.minimize(precwrap,
                                      ini,
                                      args=(eta, mu),
                                      bounds=bounds)
        fFC = res.x
        fFS = metrics.precision(fFC, eta, mu)

    elif method == 'T':
        f = np.ones(k)

        for i in range(pow(2, k)):

            f = np.array([i, j, k]) * 1. / N
            f = 2 * f - 1
            loss = metrics.precision(f, eta, mu)
            if loss > fFS:
                fFC = f
                fFS = loss
    return (fFC, fFS)
Beispiel #2
0
def evaluate(path):
    cad = read_dataset('cad.csv')
    rgbd = read_dataset('rgbd.csv')
    freqs = freq_count(cad)
    results = load_results(path, rgbd, cad)

    mP = 0.0
    mR = 0.0
    mF = 0.0
    mAP = 0.0
    mNDCG = 0.0
    mNNT1 = 0.0
    mNNT2 = 0.0

    for (queried, retrieved) in results:
        f = freqs[queried[0]]
        x = categories_to_rel(queried, retrieved)[:f]
        # Sum up the retrieval scores
        mP += precision(x)
        mR += recall(x, f)
        mF += f1score(x, f)
        mNDCG += ndcg(x)
        mAP += average_precision(x, f)
        mNNT1 += nnt1(x, f)
        mNNT2 += nnt2(x, f)

    n = len(results)
    print('num queries:', n)
    print('mean precision:', mP / n)
    print('mean recall:', mR / n)
    print('mean F1:', mF / n)
    print('mean AP:', mAP / n)
    print('mean NDCG: ', mNDCG / n)
    print('mean NNT1: ', mNNT1 / n)
    print('mean NNT2: ', mNNT2 / n)

    # Plot PR-curve
    cutoff = 1000
    mean_precisions = np.zeros(cutoff, np.float64)
    mean_recalls = np.zeros(cutoff, np.float64)
    for (queried, retrieved) in results:
        x = categories_to_rel(queried, retrieved)[:cutoff]
        x = np.pad(x, (0, cutoff - len(x)), 'constant', constant_values=(0))
        precisions = []
        recalls = []
        for k, _ in enumerate(x):
            p = precision(x[:k + 1])
            r = recall(x[:k + 1], freqs[queried[0]])
            precisions.append(p)
            recalls.append(r)
        mean_precisions += precisions
        mean_recalls += recalls
    mean_precisions /= len(results)
    mean_recalls /= len(results)

    plt.plot(mean_recalls, mean_precisions)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.axis([0, 1, 0, 1.05])
    plt.show()
Beispiel #3
0
def evaluate(path):
    queries = read_dataset('queries.csv')
    targets = read_dataset('targets.csv')
    freqs = freq_count(targets)
    results = load_results(path, queries, targets)
    cutoff = 1000
    precisions = []
    recalls = []
    f1scores = []
    aps = []
    gains = []
    nnt1s = []
    nnt2s = []
    for (queried, retrieved) in results:
        x = categories_to_rel(queried, retrieved)[:cutoff]
        p = precision(x)
        r = recall(x, freqs[queried[0]])
        f = f1score(x, freqs[queried[0]])
        g = ndcg(x)
        ap = average_precision(x, freqs[queried[0]])
        t1 = nnt1(x, freqs[queried[0]])
        t2 = nnt2(x, freqs[queried[0]])
        precisions.append(p)
        recalls.append(r)
        f1scores.append(f)
        gains.append(g)
        aps.append(ap)
        nnt1s.append(t1)
        nnt2s.append(t2)
    print('mean precision:', numpy.mean(precisions))
    print('mean recall:', numpy.mean(recalls))
    print('mean F1 score:', numpy.mean(f1scores))
    print('mAP:', numpy.mean(aps))
    print('mean NDCG:', numpy.mean(gains))
    print('mean nearest neighbor:', numpy.mean(nnt1s), numpy.mean(nnt2s))

    # plot precision-recall curve
    mean_precisions = numpy.zeros(cutoff, numpy.float64)
    mean_recalls = numpy.zeros(cutoff, numpy.float64)
    for (queried, retrieved) in results:
        x = categories_to_rel(queried, retrieved)[:cutoff]
        x = numpy.pad(x, (0, cutoff - len(x)), 'constant', constant_values=(0))
        precisions = []
        recalls = []
        for k, _ in enumerate(x):
            p = precision(x[:k + 1])
            r = recall(x[:k + 1], freqs[queried[0]])
            precisions.append(p)
            recalls.append(r)
        mean_precisions += precisions
        mean_recalls += recalls
    mean_precisions /= len(results)
    mean_recalls /= len(results)
    plt.plot(mean_recalls, mean_precisions)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.axis([0, 1, 0, 1.05])
    plt.show()
Beispiel #4
0
def evaluate_test_set(session, tags, preds, fnames, lines, batch_limit=None):
    batch_num = 0
    num_sequences = 0
    p_tp_total, p_fp_total, r_tp_total, r_fn_total = 0, 0, 0, 0
    p_tp_total_binary, p_fp_total_binary, r_tp_total_binary, r_fn_total_binary = 0, 0, 0, 0

    while True:
        try:

            #Train binary, eval binary setting
            y, y_, filenames, line_nums = \
                session.run([tags, preds, fnames, lines])
            p_tp, p_fp = metrics.precision(reader, y, y_, counts=True)
            r_tp, r_fn = metrics.recall(reader, y, y_, counts=True)
            p_tp_total += p_tp
            p_fp_total += p_fp
            r_tp_total += r_tp
            r_fn_total += r_fn

            #Train All tags, eval binary setting
            p_tp_binary, p_fp_binary = metrics.precision(reader, y, y_, binary=True, counts=True)
            r_tp_binary, r_fn_binary = metrics.recall(reader, y, y_, binary=True , counts=True)
            p_tp_total_binary += p_tp_binary
            p_fp_total_binary += p_fp_binary
            r_tp_total_binary += r_tp_binary
            r_fn_total_binary += r_fn_binary

            #TODO: Train binary, eval binary setting
            

            num_sequences += len(y)
            batch_num += 1
            if batch_num == batch_limit:
                break
        except tf.errors.OutOfRangeError:
            print 'test queue is empty'
            break

    if p_tp_total:
        precision = p_tp_total / (p_tp_total + p_fp_total)
        recall = r_tp_total / (r_tp_total + r_fn_total)
        f1 = metrics.f1(precision, recall)

        precision_binary = p_tp_total_binary / (p_tp_total_binary + p_fp_total_binary)
        recall_binary = r_tp_total_binary / (r_tp_total_binary + r_fn_total_binary)
        f1_binary = metrics.f1(precision_binary, recall_binary)

        print 'Evaluated {} sequences from test set'.format(num_sequences)
        print 'Precision:  ', precision
        print 'Recall:     ', recall
        print 'f1:         ', f1

        print 'Precision Binary:  ', precision_binary
        print 'Recall Binary:     ', recall_binary
        print 'f1 Binary:         ', f1_binary
Beispiel #5
0
    def test(self):
        tf.global_variables_initializer().run()

        self.saver = tf.train.Saver()
        could_load, checkpoint_counter = self.load(self.checkpoint_dir)

        if could_load:
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        test_feed_dict = {
            self.test_inptus: self.test_x,
            self.test_labels: self.test_y
        }

        summary_str, test_loss, test_accuracy, p, t = self.sess.run(
            [
                self.test_summary, self.test_loss, self.test_accuracy,
                self.test_plab, self.test_tlab
            ],
            feed_dict=test_feed_dict)
        import metrics
        print("test_accuracy: {}".format(test_accuracy))
        with open('resnet.txt', 'a') as f:  # 设置文件对象
            f.write(
                str(self.i) + '-' + str(self.j) + ',' +
                str(metrics.accuracy(t, p)) + ',' +
                str(metrics.precision(t, p)) + ',' +
                str(metrics.recall(t, p)) + ',' + str(metrics.f1score(t, p)) +
                ',' + str(metrics.ft(t, p)) + '\n')
Beispiel #6
0
def classification_report(y_true, y_pred):
    print('--------------------------------')
    print('Accuracy -', metrics.accuracy(y_true, y_pred))
    print('Recall -', metrics.recall(y_true, y_pred))
    print('Precision -', metrics.precision(y_true, y_pred))
    print('F1 score -', metrics.f1_score(y_true, y_pred))
    print('--------------------------------')
Beispiel #7
0
def test_small_cross():
    ds = DataSet('../datasets/', 'test', 'small-cross')
    print('DS: {}; iterations: {}'.format(ds.name, ds.set_count))
    for i in range(1, ds.set_count + 1):
        print("ITER #{}".format(i))
        trn, tst = ds.get_dataset(i)
        print('\tTRAIN: {}'.format(trn))
        print('\tTEST:  {}'.format(tst))

        trns, tsts = utils.get_edges_set(trn), utils.get_edges_set(tst)
        scores = get_small_scores()

        auc_res_tot = mtr.auc(ds.vx_count, trns, tsts, scores)
        auc_res_010 = mtr.auc(ds.vx_count, trns, tsts, scores, 10)
        auc_res_100 = mtr.auc(ds.vx_count, trns, tsts, scores, 100)
        auc_res_01k = mtr.auc(ds.vx_count, trns, tsts, scores, 1000)
        #        auc_res_10k = mtr.auc(ds.vx_count, trns, tsts, scores, 10000)
        #        auc_res_1ck = mtr.auc(ds.vx_count, trns, tsts, scores, 100000)
        #        auc_res_01m = mtr.auc(ds.vx_count, trns, tsts, scores, 1000000)
        prc_res_002 = mtr.precision(ds.vx_count, trns, tsts, scores, 2)

        print('\tMETRICS:')
        print('\t\t-> AUC___TOT: {:.04}'.format(auc_res_tot))  # expected: 0.67
        print('\t\t-> AUC____10: {:.04}'.format(auc_res_010))
        print('\t\t-> AUC___100: {:.04}'.format(auc_res_100))
        print('\t\t-> AUC____1K: {:.04}'.format(auc_res_01k))
        #        print('\t\t-> AUC___10K: {:.04}'.format(auc_res_10k))
        #        print('\t\t-> AUC__100K: {:.04}'.format(auc_res_1ck))
        #        print('\t\t-> AUC____1M: {:.04}'.format(auc_res_01m))
        print('\t\t-> PREC____2: {:.04}'.format(prc_res_002))  # expected: 0.50

    print()
Beispiel #8
0
def reduce_by_kMIQP(algoname, res, source_file, save_path=None):
    outputs = []
    k = 10
    pd = []
    div = []
    xa = []
    for li in np.arange(0, 1, 0.05):
        lamb = li
        xa.append(lamb)
        for one_tuple in tqdm(res, ncols=77):
            r, M = _preprocess(one_tuple)
            if algoname == 'greedy':
                vx, max_res = greedy_kMIQP(r, M, lamb, k=k)
            elif algoname == 'gurobi':
                vx, max_res = gurobi_kMIQP(r, M, lamb, k=k)

            #vx, max_res = kMIQP(r, M, lamb, k=k)
            groundtruth, preds, scores = one_tuple
            preds = [preds[x] for x in vx]
            outputs.append((groundtruth, preds, max_res))
        if save_path is None:
            prec, jacc = 0.0, 0.0
            for groundtruth, preds, scores in outputs:
                preds = preds[:k]
                jacc += jaccard(preds)
                prec += precision(groundtruth, preds)
            pd.append(prec * 100 / len(outputs))
            div.append(jacc / len(outputs))
        else:
            with open(save_path, 'wb') as f:
                pickle.dump(outputs, f, pickle.HIGHEST_PROTOCOL)

    return xa, pd, div
Beispiel #9
0
def evaluate(path):
    queries = read_dataset('queries.csv')
    targets = read_dataset('targets.csv')
    freqs = freq_count(targets)
    results = load_results(path, queries, targets)
    cutoff = 1000
    precisions = []
    recalls = []
    f1scores = []
    aps = []
    gains = []
    nnt1s = []
    nnt2s = []
    for (queried, retrieved) in results:
        x = categories_to_rel(queried, retrieved)[:cutoff]
        p = precision(x)
        r = recall(x, freqs[queried[0]])
        f = f1score(x, freqs[queried[0]])
        g = ndcg(x)
        ap = average_precision(x, freqs[queried[0]])
        t1 = nnt1(x, freqs[queried[0]])
        t2 = nnt2(x, freqs[queried[0]])
        precisions.append(p)
        recalls.append(r)
        f1scores.append(f)
        gains.append(g)
        aps.append(ap)
        nnt1s.append(t1)
        nnt2s.append(t2)
        print('precision:', p)
        print('recall:', r)
        print('F1 score:', f)
        print('average precision:', ap)
        print('NDCG:', g)
        print('nearest neighbor:', t1, t2)
Beispiel #10
0
def f1_score(y_true, y_pred):
    from metrics import precision, recall
    y_true = K.cast(y_true, dtype='float32')
    y_pred = K.cast(y_pred, dtype='float32')
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
def eval_classifier(classifier, x, y):
    y_pred = classifier.predict(x)

    conf = metrics.conf_matrix(y_pred, y)
    accuracy = metrics.accuracy(y_pred, y)
    precision = metrics.precision(y_pred, y)
    recall = metrics.recall(y_pred, y)
    f1_score = metrics.f_score(y_pred, y, beta=1)
    avg_prec = np.mean(precision)
    avg_rec = np.mean(recall)
    avg_f1 = np.mean(f1_score)

    print("Confusion Matrix: ")
    print(conf)
    print("Accuracy:")
    print(accuracy)
    print("Precision:")
    print(precision)
    print(f"Average Precision: {avg_prec}")
    print("Recall:")
    print(recall)
    print(f"Average Recall: {avg_rec}")
    print("F1_score:")
    print(f1_score)
    print(f"Average F1 Score: {avg_f1}")
    def check_engine_quality(self, query_num, list_of_docs):
        """
        :param query_num:
        :param list_of_docs:
        :return: no return. prints metrics of the query. precision, recall, map.
        """

        benchmark_path = "data\\benchmark_lbls_train.csv"
        df = pd.read_csv(benchmark_path)

        df_prec = df[df['query'] == query_num]
        df_prec = df_prec[df_prec['tweet'].isin(list_of_docs)]
        dict_for_data = df_prec.set_index('tweet')['y_true'].to_dict()

        rmv_lst = []

        ranking = []
        # Add to list for rank
        for doc in list_of_docs:
            try:
                ranking.append(dict_for_data[int(doc)])
            except:
                rmv_lst.append(doc)
        for d in rmv_lst:
            list_of_docs.remove(d)

        data_df = pd.DataFrame({
            'query': query_num,
            'tweet': list_of_docs,
            'y_true': ranking
        })

        df_rec = df[df['query'] == query_num]
        recall_total = len(df_rec[df_rec['y_true'] == 1.0])

        # print("total Relevant doc found with tag 1 :" , len (data_df[data_df['y_true'] == 1.0]))
        # print("total NON relevant doc found with tag 0 :" , len (data_df[data_df['y_true'] == 0]))
        # print("found total of", len(df_prec), "tagged docs")
        # Calculate and print
        prec5 = metrics.precision_at_n(data_df, query_num, 5)
        prec10 = metrics.precision_at_n(data_df, query_num, 10)
        prec50 = metrics.precision_at_n(data_df, query_num, 50)
        prec_total = metrics.precision(data_df, True, query_number=query_num)
        map_of_query = metrics.map(data_df)
        recall_val = metrics.recall_single(data_df, recall_total, query_num)
        self.map_list.append(map_of_query)
        self.prec5_list.append(prec5)
        self.prec10_list.append(prec10)
        self.prec50_list.append(prec50)
        self.prec_total_list.append(prec_total)
        self.recall_list.append(recall_val)

        print()
        print("precision at 5 of query", query_num, "is :", prec5)
        print("precision at 10 of query", query_num, "is :", prec10)
        print("precision at 50 of query", query_num, "is :", prec50)
        print("precision of query", query_num, "is :", prec_total)
        print("recall of query", query_num, "is :", recall_val)
        print("map of query", query_num, "is :", map_of_query)
Beispiel #13
0
def reduce_by_kMIQP(res, source_file, save_path=None):
    outputs = []
    k = 10
    pd = []
    div = []
    xa = []
    for li in np.arange(0, 10, 0.5):
        lamb = li
        xa.append(lamb)
        for one_tuple in tqdm(res, ncols=77):
            r, M = _preprocess(one_tuple)
            vx, max_res = kMIQP(r, M, lamb, k=k)
            groundtruth, preds, scores = one_tuple
            preds = [preds[x] for x in vx]
            outputs.append((groundtruth, preds, max_res))
        if save_path is None:
            prec, jacc = 0.0, 0.0
            for groundtruth, preds, scores in outputs:
                jacc += jaccard(preds)
                prec += precision(groundtruth, preds)
            pd.append(prec * 100 / len(outputs))
            div.append(jacc / len(outputs))
        else:
            with open(save_path, 'wb') as f:
                pickle.dump(outputs, f, pickle.HIGHEST_PROTOCOL)
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    freqpd = [0.2599] * 20
    freqdiv = [0.0444] * 20
    l1, = ax1.plot(xa,
                   freqpd,
                   label='freq-p@' + str(k),
                   color='darkviolet',
                   marker='o')
    l2, = ax1.plot(xa, pd, label='gurobi-p@' + str(k), color='r', marker='o')
    #ax1.legend(loc=1)
    ax1.set_ylabel('p@' + str(k))
    ax2 = ax1.twinx()
    l3, = ax2.plot(xa, div, label="gurobi-diversity", color='g', marker='*')
    l4, = ax2.plot(xa, freqdiv, label="freq-diversity", color='y', marker='*')
    #ax2.legend(loc=2)
    ax2.set_ylabel('diversity')
    ax1.set_xlabel('lamb(ele)')
    print(xa)
    print(pd)
    print(div)

    my_xticks = np.arange(0, 101, 10)
    plt.xticks(my_xticks)
    plt.legend(handles=[
        l1,
        l2,
        l3,
        l4,
    ], loc='best')
    plt.show()
Beispiel #14
0
def compute_all_metrics(execution_id, path_input, path_output, formula,
                        append):
    from metrics import accuracy, precision, recall, f1, specificity
    """
    Computes all metrics and persistes in a csv

    Args:
        execution_id (int): identifier of the execution
        path_input (string): path of the file that contains the classifications
        path_out (string): path of the file that will persist the metrics
        formula (string): mean_max | mean_mean
        append (boolean): true | false
    """

    # loading results
    with open(path_input) as data_file:
        data = json.load(data_file)

    # computing metrics
    tp = tn = fp = fn = 0
    for i in range(0, len(data)):
        if (data[i]['values'][formula]['positive'] >=
                data[i]['values'][formula]['negative']):
            if data[i]['values']['label'] == 'positive':
                tp += 1
            else:
                fp += 1
        elif (data[i]['values'][formula]['positive'] <
              data[i]['values'][formula]['negative']):
            if (data[i]['values']['label'] == 'negative'):
                tn += 1
            else:
                fn += 1
        else:
            raise Exception(
                "Positive similarity equals to negative similarity to news " +
                data[i]['id'])

    accuracy = accuracy(tp, tn, fp, fn)
    recall = recall(tp, fn)
    precision = precision(tp, fp)
    f1 = f1(precision, recall)
    specificity = specificity(tn, fp)

    # persiting the results
    with open(path_output, 'a' if append else 'w') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        if (not append):
            spamwriter.writerow([
                'execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy',
                'precision', 'recall', 'f1', 'specificity'
            ])
        spamwriter.writerow([
            execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1,
            specificity
        ])
Beispiel #15
0
 def supervised_eval(self, train_or_valid):
     data = self.dataset.get_labeled_data(train_or_valid)
     if data == None:
         raise ValueError('no labeled examples present in dataset')
     X_labeled, y_true, _ = data
     y_pred = self.model.predict(X_labeled)
     p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\
                        metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\
                        metrics.auc(y_true, y_pred)
     self.metrics[train_or_valid].append((p, r, ac, g, auc))
Beispiel #16
0
def calculate_best_threshold(df):
    thresholds = np.linspace(0, 1, 51)
    precisions_per_threshold = []
    for threshold in thresholds:
        precisions = []
        for idx in df.index:
            mask = df.loc[idx].masks
            prediction = df.loc[idx].predictions
            prediction_mask = np.int32(prediction > threshold)
            precisions.append(precision(prediction_mask, mask))
        precisions_per_threshold.append(np.mean(precisions))
    return thresholds[np.argmax(precisions_per_threshold)]
Beispiel #17
0
	def test_1(self):
		actual = [1, 1, 0, 1, 1, 1, 0, 0, 1, 1]
		predicted = [0, 1, 0, 1, 0, 1, 0, 1, 0, 0]
		tp, fn, fp, tn = metrics.confusion_matrix(actual, predicted)
		self.assertEqual(tp, 3)
		self.assertEqual(fn, 4)
		self.assertEqual(fp, 1)
		self.assertEqual(tn, 2)
		self.assertEqual(metrics.accuracy(actual, predicted), 0.5)
		self.assertEqual(metrics.precision(actual, predicted), 3/4)
		self.assertEqual(metrics.recall(actual, predicted), 3/7)
		self.assertEqual(metrics.f1(actual, predicted), 6/11)
Beispiel #18
0
def test(engine, options):
    queries = pd.read_csv(os.path.join('data', 'queries_train.tsv'), sep='\t')
    bench_lbls = pd.read_csv(os.path.join('data', 'benchmark_lbls_train.csv'),
                             dtype={
                                 'query': int,
                                 'tweet': str,
                                 'y_true': int
                             })
    q2n_relevant = bench_lbls.groupby('query')['y_true'].sum().to_dict()
    queries_results = []
    q_times = []
    for i, row in queries.iterrows():
        q_id = row['query_id']
        q_keywords = row['keywords']
        start_time = time.time()
        q_n_res, q_res = engine.search(q_keywords, options['methods'])
        end_time = time.time()
        q_time = end_time - start_time
        q_times.append(q_time)
        queries_results.extend([(q_id, str(doc_id)) for doc_id in q_res])
        if q_time > 10:
            print(f'Query time exceeded: {options}')
    queries_results = pd.DataFrame(queries_results, columns=['query', 'tweet'])
    q_results_labeled = pd.merge(queries_results,
                                 bench_lbls,
                                 on=['query', 'tweet'],
                                 how='inner',
                                 suffixes=('_result', '_bench'))
    options['max_q_time'] = max(q_times)
    options['avg_q_time'] = sum(q_times) / len(q_times)
    options['MAP'] = metrics.map(q_results_labeled)
    options['precision'] = metrics.precision(q_results_labeled)
    options['precision@5'] = metrics.precision(
        q_results_labeled.groupby('query').head(5))
    options['precision@10'] = metrics.precision(
        q_results_labeled.groupby('query').head(10))
    options['precision@50'] = metrics.precision(
        q_results_labeled.groupby('query').head(50))
    options['recall'] = metrics.recall(q_results_labeled, q2n_relevant)
    save_to_csv(options)
Beispiel #19
0
    def calculate_metrics(self):
        #Documentos recuperados
        recovered_documents = []

        count_docs = self.tableWidget_results.rowCount()

        for item in range(count_docs):
            document = self.tableWidget_results.item(item, 1).text()
            recovered_documents.append(document)

        #Documentos Relevantes
        relevant_documents = []

        count_docs = self.tableWidget_relevant.rowCount()

        for item in range(count_docs):
            document = self.tableWidget_relevant.item(item, 1).text()
            relevant_box = self.tableWidget_relevant.cellWidget(item, 2)
            mark_box = relevant_box.findChildren(QCheckBox)[0]
            if mark_box.isChecked():
                relevant_documents.append(document)

        #Documentos Recuperados Relevantes
        rel = set(relevant_documents)
        rec = set(recovered_documents)
        recovered_relevant_documents = list(rel.intersection(rec))

        RR = len(recovered_relevant_documents)
        REC = len(recovered_documents)
        REL = len(relevant_documents)

        index = self.comboBox_medida.currentIndex()
        if index == 0:
            value = metrics.precision(RR, REC)
        elif index == 1:
            value = metrics.recobrado(RR, REL)
        elif index == 2:
            try:
                beta = float(self.lineEdit_beta.text())
                value = metrics.e_medida(RR, REL, REC, beta)
            except (Exception):
                self.lineEdit_beta.setText("0")
                value = metrics.e_medida(RR, REL, REC, 0)

        elif index == 3:
            value = metrics.f_medida(RR, REL, REC)
        else:
            value = metrics.r_presicion(RR, REC, REL)

        self.lineEdit_medida.setText(str(value))
Beispiel #20
0
    def post(self):
        global current_scene
        score = 0

        print('Submitted scene %s' % current_scene)
        if self.scene_exists(current_scene):
            return {
                'message': "Scene {} already exist.".format(current_scene)
            }, 400
        correct_dict = self.fetch_correct_result()
        if not correct_dict:
            return {
                'message':
                "There is no reuslt dataexist for scene {}.".format(
                    current_scene)
            }, 400
        your_dict = request.get_json()
        submission_time = datetime.datetime.now()
        print(' Correct prediction', correct_dict)
        your_dict = {str(k): int(v) for k, v in your_dict.items()}
        print(' Your prediction', your_dict)
        sys.stdout.flush()
        score = 0
        score2 = 0
        score3 = 0
        if your_dict:
            #score = Benchmark.diff_dicts(correct_dict, your_dict)
            score = metrics.accuracy(correct_dict, your_dict)
            score2 = metrics.precision(correct_dict, your_dict)
            score3 = metrics.recall(correct_dict, your_dict)
            print("scene accuracy", score)
            print("scene precision", score2)
            print("scene recall", score3)

        submission_result = {
            'scene': current_scene,
            'accuracy': score,
            'precision': score2,
            'recall': score3
        }
        try:
            self.insert(submission_result, submission_time)
        except:
            return {
                'message': 'An error occured while inserting the item'
            }, 500

        return {
            'Your score for this scene is ': submission_result['accuracy']
        }, 201
Beispiel #21
0
 def precisions(self):
     predictions = self.one_hot
     size = len(self._classes)
     with tf.compat.v1.name_scope("Precisions"):
         ps = []
         ops = []
         for i, c in enumerate(self._classes):
             mask = tf.one_hot([i], size, axis=-1)
             p, op = precision(labels=self.target,
                               predictions=predictions,
                               weights=mask)
             tf.compat.v1.summary.scalar("c{}_{}".format(i, c), p * 100)
             ps.append(p)
             ops.append(op)
         return ps, ops
Beispiel #22
0
def calculate_precisions(df):
    df["precisions"] = [
        precision(pm, m) for pm, m in zip(df.prediction_masks, df.masks)
    ]
    df["precisions_otsu"] = [
        precision(pm, m) for pm, m in zip(df.prediction_masks_otsu, df.masks)
    ]
    df["precisions_crf"] = [
        precision(pm, m) for pm, m in zip(df.prediction_masks_crf, df.masks)
    ]
    df["precisions_best"] = [
        precision(pm, m) for pm, m in zip(df.prediction_masks_best, df.masks)
    ]

    mask_threshold_per_cc = {}
    for cc, cc_df in df.groupby("predictions_cc"):
        mask_threshold_per_cc[cc] = calculate_best_threshold(cc_df)

    df["precisions_cc"] = [
        precision(np.int32(p > mask_threshold_per_cc[cc]), m)
        for p, m, cc in zip(df.predictions, df.masks, df.predictions_cc)
    ]

    return mask_threshold_per_cc
Beispiel #23
0
 def precisions(self):
     predictions = self.one_hot
     size = len(self._classes)
     with tf.name_scope("Precisions"):
         ps = []
         ops = []
         for i, c in enumerate(self._classes):
             mask = tf.one_hot([i], size, axis=-1)
             p, op = precision(labels=self.target,
                               predictions=predictions,
                               weights=mask,
                               updates_collections=tf.GraphKeys.UPDATE_OPS)
             tf.summary.scalar("c_{}".format(c), p * 100)
             ps.append(p)
             ops.append(op)
         return ps, ops
Beispiel #24
0
def _watch_converge(res):
    random.shuffle(res)
    tqdmInput = tqdm(res, ncols=77, leave=True)
    prec, jacc = 0.0, 0.0
    for iter, one_tuple in enumerate(tqdmInput):
        r, M = _preprocess(one_tuple)
        vx, max_res = kMIQP(r, M, lamb=1.0, k=10)

        groundtruth, preds, scores = one_tuple
        preds = [preds[x] for x in vx]

        prec += precision(groundtruth, preds)
        jacc += jaccard(preds)

        tqdmInput.set_description('Prec@10: %.3f%% Div: %.3f' %
                                  (prec * 100 / (iter + 1), jacc / (iter + 1)))
Beispiel #25
0
 def active_simulation_eval(self):
     data = self.dataset.get_unlabeled_data()
     if data == None:
         UserWarning(
             'all examples have been labeled; this eval mode works '
             'if there is unlabeled pool of data in `simulate` mode'
         )
         return
     X_unlabeled, unlabeled_indexes = data
     # get unlabeled examples labels in simulation with `y_ideal`
     y_true = self.dataset.y_ideal[unlabeled_indexes]
     y_pred = self.model.predict(X_unlabeled)
     p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\
                        metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\
                        metrics.auc(y_true, y_pred)
     self.metrics['simulate'].append((p, r, ac, g, auc))
Beispiel #26
0
def results_to_metrics(results, methods, ref_motifs):
    _, _, ref_labels = motif.unpack_motif(ref_motifs)
    metric_dict = dict.fromkeys(methods)

    for m in methods:
        obs_motifs = results[m]
        _, _, obs_labels = motif.unpack_motif(obs_motifs)

        this_edit = metrics.edit_distance(obs_labels, ref_labels)
        this_recall = metrics.recall(obs_motifs, ref_motifs)
        this_precis = metrics.precision(obs_motifs, ref_motifs)
        this_f = metrics.f_measure(obs_motifs, ref_motifs)
        this_bm = metrics.boundary_distance(obs_motifs, ref_motifs)
        metric_dict[m] = [this_edit, this_recall, this_precis, this_f, this_bm]

    return metric_dict
def compute_all_metrics(execution_id, path_input, path_output, formula, append):
    from metrics import accuracy, precision, recall, f1, specificity
    """
    Computes all metrics and persistes in a csv

    Args:
        execution_id (int): identifier of the execution
        path_input (string): path of the file that contains the classifications
        path_out (string): path of the file that will persist the metrics
        formula (string): mean_max | mean_mean
        append (boolean): true | false
    """

    # loading results
    with open(path_input) as data_file:
        data = json.load(data_file)

    # computing metrics
    tp = tn = fp = fn = 0
    for i in range(0, len(data)):
        if (data[i]['values'][formula]['positive'] >= data[i]['values'][formula]['negative']):
            if data[i]['values']['label'] == 'positive':
                tp += 1
            else:
                fp += 1
        elif (data[i]['values'][formula]['positive'] < data[i]['values'][formula]['negative']):
            if (data[i]['values']['label'] == 'negative'):
                tn += 1
            else:
                fn += 1
        else:
            raise Exception("Positive similarity equals to negative similarity to news " + data[i]['id'])

    accuracy = accuracy(tp, tn, fp, fn)
    recall = recall(tp, fn)
    precision = precision(tp, fp)
    f1 = f1(precision, recall);
    specificity = specificity(tn, fp);

    # persiting the results
    with open(path_output, 'a' if append else 'w') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        if (not append):
            spamwriter.writerow(
                ['execution_id', 'tp', 'tn', 'fp', 'fn', 'accuracy', 'precision', 'recall', 'f1', 'specificity'])
        spamwriter.writerow([execution_id, tp, tn, fp, fn, accuracy, precision, recall, f1, specificity])
def test(model, test_inputs, test_labels):
    """
    :param model: tf.keras.Model inherited data type
        model being trained  
    :param test_input: Numpy Array - shape (num_images, imsize, imsize, channels)
        input images to test on
    :param test_labels: Numpy Array - shape (num_images, 2)
        ground truth labels one-hot encoded
    :return: float, float, float, float 
        returns dice score, sensitivity value (0.5 threshold), specificity value (0.5 threshold), 
        and precision value all of which are in the range [0,1]
    """
    BATCH_SZ = model.batch_size
    indices = np.arange(test_inputs.shape[0]).tolist()
    all_logits = None
    for i in range(0, test_labels.shape[0], BATCH_SZ):
        images = test_inputs[indices[i:i + BATCH_SZ]]
        logits = model(images)
        if type(all_logits) == type(None):
            all_logits = logits
        else:
            all_logits = np.concatenate([all_logits, logits], axis=0)
    """this should break if the dataset size isnt divisible by the batch size because
    the for loop it runs the batches on doesnt get predictions for the remainder"""
    sensitivity_val1 = sensitivity(test_labels, all_logits, threshold=0.15)
    sensitivity_val2 = sensitivity(test_labels, all_logits, threshold=0.3)
    sensitivity_val3 = sensitivity(test_labels, all_logits, threshold=0.5)
    specificity_val1 = specificity(test_labels, all_logits, threshold=0.15)
    specificity_val2 = specificity(test_labels, all_logits, threshold=0.3)
    specificity_val3 = specificity(test_labels, all_logits, threshold=0.5)

    dice = dice_coef(test_labels, all_logits)
    precision_val = precision(test_labels, all_logits)
    print(
        "Sensitivity 0.15: {}, Senstivity 0.3: {}, Senstivity 0.5: {}".format(
            sensitivity_val1, sensitivity_val2, sensitivity_val3))
    print("Specificity 0.15: {}, Specificity 0.3: {}, Specificity 0.5: {}".
          format(specificity_val1, specificity_val2, specificity_val3))
    print("DICE: {}, Precision: {}".format(dice, precision_val))

    return dice.numpy(), sensitivity_val3, specificity_val3, precision_val
def train(model, generator, verbose=False):
    """trains the model for one epoch

    :param model: tf.keras.Model inherited data type
        model being trained 
    :param generator: BalancedDataGenerator
        a datagenerator which runs preprocessing and returns batches accessed
        by integers indexing (i.e. generator[0] returns the first batch of inputs 
        and labels)
    :param verbose: boolean
        whether to output the dice score every batch
    :return: list
        list of losses from every batch of training
    """
    BATCH_SZ = model.batch_size
    train_steps = generator.steps_per_epoch
    loss_list = []
    for i in range(0, train_steps, 1):
        images, labels = generator[i]
        with tf.GradientTape() as tape:
            logits = model(images)
            loss = model.loss_function(labels, logits)
        if i % 4 == 0 and verbose:
            sensitivity_val = sensitivity(labels, logits)
            specificity_val = specificity(labels, logits)
            precision_val = precision(labels, logits)
            train_dice = dice_coef(labels, logits)
            print("Scores on training batch after {} training steps".format(i))
            print("Sensitivity1: {}, Specificity: {}".format(
                sensitivity_val, specificity_val))
            print("Precision: {}, DICE: {}\n".format(precision_val,
                                                     train_dice))

        loss_list.append(loss)
        gradients = tape.gradient(loss, model.trainable_variables)
        model.optimizer.apply_gradients(
            zip(gradients, model.trainable_variables))

    return loss_list
Beispiel #30
0
 def metric_fn(label_ids, predict, num_labels, answer_num):
     mask = tf.sequence_mask(answer_num, FLAGS.max_answer_num)
     precision = metrics.precision(label_ids,
                                   predict,
                                   num_classes=num_labels,
                                   weights=mask,
                                   pos_indices=[1])
     recall = metrics.recall(label_ids,
                             predict,
                             num_classes=num_labels,
                             weights=mask,
                             pos_indices=[1])
     f1_score = metrics.f1(label_ids,
                           predict,
                           num_classes=num_labels,
                           weights=mask,
                           pos_indices=[1])
     return {
         "precision": precision,
         "recall": recall,
         "f1_score": f1_score
     }
Beispiel #31
0
def main(flag, k):

  if flag == 'clo':
    source_path = '../data/bundle_clo.pkl'
  elif flag == 'ele':
    source_path = '../data/bundle_ele.pkl'
  else:
    assert False

  with open(source_path, 'rb') as f:
    train_set = pickle.load(f)
    test_set = pickle.load(f)
    cate_list = pickle.load(f)
    bundle_map = pickle.load(f)
    (user_count, item_count, cate_count, bundle_count, bundle_rank, _) = pickle.load(f)
    gen_groundtruth_data = pickle.load(f)

  freq = Counter()
  for t in train_set:
    if len(bundle_map[t[2]]) >= 2:
      t = bundle_map[t[2]]
      freq.update(subsets(t))
      # for i in range(len(t)):
      #   for j in range(i+1, len(t)):
      #     freq.update([tuple([t[i], t[j]])])

  preds = freq.most_common(k)
  preds = [[i for i in t[0]] for t in preds]

  total, jacc, prec = 0, jaccard(preds), 0.0
  for uid, hist, pos in gen_groundtruth_data:
    groundtruth = list(bundle_map[pos])
    prec += precision(groundtruth, preds)
    total += 1

  print(flag, 'P@%d: %.4f%%\tDiv: %.4f' % (k, prec*100/total, -jacc))
Beispiel #32
0
def main(argv = None):  # pylint: disable=unused-argument
  # load imageset
  train_set_folder = os.path.join(module_dir, os.path.pardir, os.path.pardir, 'data/ocr/train')
  test_set_folder = os.path.join(module_dir, os.path.pardir, os.path.pardir, 'data/ocr/test')

  # Extract it into numpy arrays.
  train_data, train_labels = load_imageset(train_set_folder, to_img_size = (28, 28, 1), ext = 'png')
  test_data, test_labels = load_imageset(test_set_folder, to_img_size = (28, 28, 1), ext = 'png')

  height = train_data.shape[1]
  width = train_data.shape[2]
  channel = (train_data.shape[3] if train_data.ndim > 3 else 1)

  label_max = np.amax(train_labels)
  label_min = np.amin(train_labels)
  num_labels = label_max - label_min + 1

  # Generate a validation set.
  train_data, train_labels, validation_data, validation_labels = split_cv(train_data, train_labels, 0.1)

  num_epochs = NUM_EPOCHS
  train_size = train_labels.shape[0]

  # This is where training samples and labels are fed to the graph.
  # These placeholder nodes will be fed a batch of training data at each
  # training step using the {feed_dict} argument to the Run() call below.
  train_data_node = tf.placeholder(
      tf.float32,
      shape = (BATCH_SIZE, height, width, channel))
  train_labels_node = tf.placeholder(tf.int64, shape = (BATCH_SIZE,))

  eval_data = tf.placeholder(
      tf.float32,
      shape=(EVAL_BATCH_SIZE, height, width, channel))

  # The variables below hold all the trainable weights. They are passed an
  # initial value which will be assigned when we call:
  # {tf.initialize_all_variables().run()}
  conv1_weights = tf.Variable(
      tf.truncated_normal([5, 5, channel, 32],  # 5x5 filter, depth 32.
                          stddev = 0.1,
                          seed = SEED),
      name="conv1_weights")
  conv1_biases = tf.Variable(tf.zeros([32]), name = "conv1_biases")
  
  conv2_weights = tf.Variable(
      tf.truncated_normal([5, 5, 32, 64],
                          stddev = 0.1,
                          seed = SEED),
      name="conv2_weights")
  conv2_biases = tf.Variable(tf.constant(0.1, shape = [64]), name = "conv2_biases")
  
  fc1_weights = tf.Variable(  # fully connected, depth 512.
      tf.truncated_normal(
          [height // 4 * width // 4 * 64, 512],
          stddev = 0.1,
          seed = SEED),
      name = "fc1_weights")
  fc1_biases = tf.Variable(tf.constant(0.1, shape = [512]), name = "fc1_biases")
  
  fc2_weights = tf.Variable(
      tf.truncated_normal([512, num_labels],
                          stddev = 0.1,
                          seed = SEED),
      name = "fc2_weights")
  fc2_biases = tf.Variable(tf.constant(0.1, shape = [num_labels]), name = "fc2_biases")

  # We will replicate the model structure for the training subgraph, as well
  # as the evaluation subgraphs, while sharing the trainable parameters.
  def lenet2(data, train = False):
    """LeNet2 definition."""
    # 2D convolution, with 'SAME' padding (i.e. the output feature map has
    # the same size as the input). Note that {strides} is a 4D array whose
    # shape matches the data layout: [n, h, w, c].
    conv1 = tf.nn.conv2d(data,
                         conv1_weights,
                         strides = [1, 1, 1, 1],
                         padding = 'SAME')
    # Bias and rectified linear non-linearity.
    relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
    # Max pooling. The kernel size spec {ksize} also follows the layout of
    # the data. Here we have a pooling window of 2, and a stride of 2.
    pool1 = tf.nn.max_pool(relu1,
                           ksize = [1, 2, 2, 1],
                           strides = [1, 2, 2, 1],
                           padding = 'SAME')
    conv2 = tf.nn.conv2d(pool1,
                         conv2_weights,
                         strides = [1, 1, 1, 1],
                         padding = 'SAME')
    relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
    pool2 = tf.nn.max_pool(relu2,
                           ksize = [1, 2, 2, 1],
                           strides = [1, 2, 2, 1],
                           padding = 'SAME')
    # Reshape the feature map cuboid into a 2D matrix to feed it to the
    # fully connected layers.
    pool_shape = pool2.get_shape().as_list()
    reshape = tf.reshape(pool2,
                         [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
    # Fully connected layer. Note that the '+' operation automatically
    # broadcasts the biases.
    fc1 = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
    # Add a 50% dropout during training only. Dropout also scales
    # activations such that no rescaling is needed at evaluation time.
    if train:
      fc1 = tf.nn.dropout(fc1, 0.5, seed = SEED)
    return tf.matmul(fc1, fc2_weights) + fc2_biases

  # Training computation: logits + cross-entropy loss.
  logits = lenet2(train_data_node, True)
  loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits, train_labels_node))

  # L2 regularization for the fully connected parameters.
  regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) +
                  tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases))
  # Add the regularization term to the loss.
  loss += 5e-4 * regularizers

  # Optimizer: set up a variable that's incremented once per batch and
  # controls the learning rate decay.
  batch = tf.Variable(0)
  # Decay once per epoch, using an exponential schedule starting at 0.01.
  learning_rate = tf.train.exponential_decay(
      0.01,                # Base learning rate.
      batch * BATCH_SIZE,  # Current index into the dataset.
      train_size,          # Decay step.
      0.95,                # Decay rate.
      staircase=True)
  # Use simple momentum for the optimization.
  optimizer = tf.train.MomentumOptimizer(learning_rate,
                                         0.9).minimize(loss,
                                                       global_step = batch)

  # Predictions for the current training minibatch.
  train_prediction = tf.nn.softmax(logits)

  # Predictions for the test and validation, which we'll compute less often.
  eval_prediction = tf.nn.softmax(lenet2(eval_data))

  # Small utility function to evaluate a dataset by feeding batches of data to
  # {eval_data} and pulling the results from {eval_predictions}.
  # Saves memory and enables this to run on smaller GPUs.
  def eval_in_batches(data, sess):
    """Get all predictions for a dataset by running it in small batches."""
    size = data.shape[0]
    if size < EVAL_BATCH_SIZE:
      raise ValueError("batch size for evals larger than dataset: %d" % size)
    predictions = np.ndarray(shape = (size, num_labels), dtype = np.float32)
    for begin in xrange(0, size, EVAL_BATCH_SIZE):
      end = begin + EVAL_BATCH_SIZE
      if end <= size:
        predictions[begin:end, :] = sess.run(
            eval_prediction,
            feed_dict={eval_data: data[begin:end, ...]})
      else:
        batch_predictions = sess.run(
            eval_prediction,
            feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]})
        predictions[begin:, :] = batch_predictions[begin - size:, :]
    return predictions

  # Create a local session to run the training.
  start_time = time.time()
  model_dir = os.path.join(module_dir, os.path.pardir, os.path.pardir, 'models') 
  with tf.Session() as sess:
    # Run all the initializers to prepare the trainable parameters.
    tf.initialize_all_variables().run()
    # Import base model weights
    saver = tf.train.Saver([conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights, fc1_biases])
    ckpt = tf.train.get_checkpoint_state(os.path.join(model_dir, 'base'))
    if ckpt and ckpt.model_checkpoint_path:
      logger.info("Continue training from the model {}".format(ckpt.model_checkpoint_path))
      saver.restore(sess, ckpt.model_checkpoint_path)
    # for var in tf.trainable_variables():
    #  logger.info(var.eval())

    logger.info('Initialized!')
    # Loop through training steps.
    for step in xrange(int(num_epochs * train_size) // BATCH_SIZE):
      # Compute the offset of the current minibatch in the data.
      # Note that we could use better randomization across epochs.
      offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
      batch_data = train_data[offset:(offset + BATCH_SIZE), ...]
      batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
      # This dictionary maps the batch data (as a numpy array) to the
      # node in the graph it should be fed to.
      feed_dict = {train_data_node: batch_data,
                   train_labels_node: batch_labels}
      # Run the graph and fetch some of the nodes.
      _, l, lr, predictions = sess.run(
          [optimizer, loss, learning_rate, train_prediction],
          feed_dict=feed_dict)
      if step % EVAL_FREQUENCY == 0:
        elapsed_time = time.time() - start_time
        start_time = time.time()
        logger.info('Step %d (epoch %.2f), %.1f ms' %
              (step, float(step) * BATCH_SIZE / train_size,
               1000 * elapsed_time / EVAL_FREQUENCY))
        logger.info('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
        logger.info('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels))
        logger.info('Validation error: %.1f%%' % error_rate(
            eval_in_batches(validation_data, sess), validation_labels))
        sys.stdout.flush()
    # Finally print the result!
    test_precision = precision(eval_in_batches(test_data, sess), test_labels)
    logger.info('Test precision: %.1f%%' % test_precision)

    # Model persistence
    saver = tf.train.Saver([conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases])
    model_path = os.path.join(model_dir, "finetuned", "lenet_finetuned.ckpt")
    save_path = saver.save(sess, model_path)
    logger.info("Model saved in file: %s" % save_path)
Beispiel #33
0
            clf = None
            clf = RandomForestClassifier(n_estimators=p, criterion='entropy',
                                         max_depth=14, min_samples_split=20,
                                         n_jobs=2)
            
            clf.fit(train_X, train_y)
            results.append(metrics.predict_table(clf, test_X, test_y))
            

        result = pd.concat(results)

        matrix = metrics.confusion_matrix(result)

        clases = matrix.columns.tolist()
        precisions = [metrics.precision(matrix, c) for c in clases]
        recalls = [metrics.recall(matrix, c) for c in clases]
        f_scores = [metrics.f_score(matrix, c) for c in clases]

        w_score = metrics.weighted_f_score(matrix)

        # f = open(result_dir + str(max_depth) + ' ' + str(min_samples_split) + '.txt', 'w')
        f = open(result_dir + str(p) + '.txt', 'w')

        f.write('F_score by class')
        f.write('\n')
        f.write(str(f_scores))
        f.write('\n')
        f.write('\n')
        f.write('Weighted average: ')
        f.write(str(w_score))