def auc_score(self, ground_truth, predictions, **kwargs):
        """ Calculate the AUC score for this particular trial.

        This will also calculate the F scores and ROC curves

        Args:
            ground_truth: vector of class labels
            predictions: vector of predicted class labels

        Returns:
            AUC score for this trial
        """

        # calculate f scores
        thresholded = threshold(predictions[:, 1], threshmin=0.5)
        thresholded = threshold(thresholded, threshmax=0.5, newval=1.0).astype(int)
        fhalf_score = metrics.fbeta_score(ground_truth.astype(int), thresholded, beta=0.5)
        f2_score = metrics.fbeta_score(ground_truth.astype(int), thresholded, beta=2)
        f1_score = metrics.fbeta_score(ground_truth.astype(int), thresholded, beta=1)

        # calculate ROC curve and AUC
        fpr, tpr, _ = metrics.roc_curve(ground_truth, predictions[:, 1])
        area = metrics.auc(fpr, tpr)

        self.fhalf_scores_.append(fhalf_score)
        self.f2scores_.append(f2_score)
        self.f1scores_.append(f1_score)
        self.rates_.append((fpr, tpr))
        self.aucs_.append(area)
        return area
예제 #2
0
def evaluate_model(y, y_pred, y_pred_prob, label, statistics, uniprot=None, verbose=0):
    y_pred_prob_1 = [x[1] for x in y_pred_prob]

    if uniprot:
        for u, p1, p2 in zip(uniprot, y, y_pred_prob_1):
            print("\t\t\tResult for {}, {} \n\t\t\t\tTrue: \t{} ||| Pred: \t{}".format(label, u, p1, p2))

    label_stats = compute_label_statistics(y, y_pred, labels=labels)
    statistics.update_statistics(label, 'Accuracy', accuracy_score(y, y_pred))
    statistics.update_statistics(label, 'F (beta=0.5)', fbeta_score(y, y_pred, beta=0.5, labels=[0, 1], average='binary'))
    statistics.update_statistics(label, 'F (beta=1)', fbeta_score(y, y_pred, beta=1.0, labels=[0, 1], average='binary'))
    statistics.update_statistics(label, 'Specificity', label_stats[1]['specificity'])
    statistics.update_statistics(label, 'Recall', label_stats[1]['sensitivity'])
    statistics.update_statistics(label, 'Precision', label_stats[1]['precision'])
    statistics.update_statistics(label, 'FDR', label_stats[1]['fdr'])
    try:
        statistics.update_statistics(label, 'ROC-AUC', roc_auc_score(y, y_pred, average="weighted"))
    except (ValueError, AssertionError):
        statistics.update_statistics(label, 'AUC', 0.0)
    try:
        pr_auc = average_precision_score(y, y_pred, average="weighted")
        if str(pr_auc) == 'nan':
            pr_auc = 0.0
        statistics.update_statistics(label, 'PR-AUC', pr_auc)
    except (ValueError, AssertionError):
        statistics.update_statistics(label, 'PR-AUC', 0.0)

    if verbose:
        statistics.print_statistics(label)
    return statistics
예제 #3
0
def test_sample_order_invariance():
    y_true, y_pred, _ = make_prediction(binary=True)

    y_true_shuffle, y_pred_shuffle = shuffle(y_true, y_pred,
                                             random_state=0)

    for metric in [accuracy_score,
                   hamming_loss,
                   zero_one_loss,
                   lambda y1, y2: zero_one_loss(y1, y2, normalize=False),
                   precision_score,
                   recall_score,
                   f1_score,
                   lambda y1, y2: fbeta_score(y1, y2, beta=2),
                   lambda y1, y2: fbeta_score(y1, y2, beta=0.5),
                   matthews_corrcoef,
                   mean_absolute_error,
                   mean_squared_error,
                   explained_variance_score,
                   r2_score]:

        assert_almost_equal(metric(y_true, y_pred),
                            metric(y_true_shuffle, y_pred_shuffle),
                            err_msg="%s is not sample order invariant"
                                    % metric)
예제 #4
0
def test_precision_recall_f1_score_with_an_empty_prediction():
    y_true = np.array([[0, 1, 0, 0], [1, 0, 0, 0], [0, 1, 1, 0]])
    y_pred = np.array([[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 1, 0]])

    # true_pos = [ 0.  1.  1.  0.]
    # false_pos = [ 0.  0.  0.  1.]
    # false_neg = [ 1.  1.  0.  0.]
    p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                 average=None)
    assert_array_almost_equal(p, [0.0, 1.0, 1.0, 0.0], 2)
    assert_array_almost_equal(r, [0.0, 0.5, 1.0, 0.0], 2)
    assert_array_almost_equal(f, [0.0, 1 / 1.5, 1, 0.0], 2)
    assert_array_almost_equal(s, [1, 2, 1, 0], 2)

    f2 = fbeta_score(y_true, y_pred, beta=2, average=None)
    support = s
    assert_array_almost_equal(f2, [0, 0.55, 1, 0], 2)

    p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                 average="macro")
    assert_almost_equal(p, 0.5)
    assert_almost_equal(r, 1.5 / 4)
    assert_almost_equal(f, 2.5 / (4 * 1.5))
    assert_equal(s, None)
    assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                    average="macro"),
                        np.mean(f2))

    p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                 average="micro")
    assert_almost_equal(p, 2 / 3)
    assert_almost_equal(r, 0.5)
    assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5))
    assert_equal(s, None)
    assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                    average="micro"),
                        (1 + 4) * p * r / (4 * p + r))

    p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                 average="weighted")
    assert_almost_equal(p, 3 / 4)
    assert_almost_equal(r, 0.5)
    assert_almost_equal(f, (2 / 1.5 + 1) / 4)
    assert_equal(s, None)
    assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                    average="weighted"),
                        np.average(f2, weights=support))

    p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                 average="samples")
    # |h(x_i) inter y_i | = [0, 0, 2]
    # |y_i| = [1, 1, 2]
    # |h(x_i)| = [0, 1, 2]
    assert_almost_equal(p, 1 / 3)
    assert_almost_equal(r, 1 / 3)
    assert_almost_equal(f, 1 / 3)
    assert_equal(s, None)
    assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                    average="samples"),
                        0.333, 2)
예제 #5
0
 def build_metric(self, logs):
     return {
             'val_loss': lambda y,y_hat: logs['val_loss'],
             'val_acc': lambda y,y_hat: logs['val_acc'],
             'val_f1': f1_score,
             'val_f1': lambda y,y_hat: fbeta_score(y, y_hat, beta=0.5),
             'val_f2': lambda y,y_hat: fbeta_score(y, y_hat, beta=2)
             }[self.metric_name]
예제 #6
0
    def test_fbeta_score(self):
        result = self.df.metrics.fbeta_score(beta=0.5, average='weighted')
        expected = metrics.fbeta_score(self.target, self.pred, beta=0.5, average='weighted')
        self.assertEqual(result, expected)

        result = self.df.metrics.fbeta_score(beta=0.5, average='macro')
        expected = metrics.fbeta_score(self.target, self.pred,
                                       beta=0.5, average='macro')
        self.assertEqual(result, expected)

        result = self.df.metrics.fbeta_score(beta=0.5, average=None)
        expected = metrics.fbeta_score(self.target, self.pred, beta=0.5, average=None)
        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_almost_equal(result.values, expected)
def train_predict(learner, sample_size, X_train, y_train, X_test, y_test): 
    '''
    inputs:
       - learner: the learning algorithm to be trained and predicted on
       - sample_size: the size of samples (number) to be drawn from training set
       - X_train: features training set
       - y_train: income training set
       - X_test: features testing set
       - y_test: income testing set
    '''
    
    results = {}
    

    # Fit the learner to the training data using slicing with 'sample_size'
    start = time() # Get start time
    learner.fit(X_train[:sample_size], y_train[:sample_size])
    end = time() # Get end time
    
    # Calculate the training time
    results['train_time'] = end - start
        
    # Get the predictions on the test set,
    # then get predictions on the first 300 training samples
    start = time() # Get start time
    predictions_test = learner.predict(X_test)
    predictions_train = learner.predict(X_train[:300])
    end = time() # Get end time
    
    # Calculate the total prediction time
    results['pred_time'] = end - start
            
    # Compute accuracy on the first 300 training samples
    results['acc_train'] = accuracy_score(y_train[:300], predictions_train[:300])
        
    # Compute accuracy on test set
    results['acc_test'] = accuracy_score(y_test, predictions_test)
    
    # Compute F-score on the the first 300 training samples
    results['f_train'] = fbeta_score(y_train[:300], predictions_train[:300], beta=0.5)
        
    # Compute F-score on the test set
    results['f_test'] = fbeta_score(y_test, predictions_test, beta=0.5)
       
    # Success
    print "{} trained on {} samples.".format(learner.__class__.__name__, sample_size)
        
    # Return the results
    return results
    def testGetMetrics(self):
        negative_class = 0
        positive_class = 1
        actual = random.random_integers(negative_class,positive_class,100)
        judgments = [positive_class]*len(actual)
        beta = 2.0
        expected_metrics = []
        for i in range(len(actual)):
            expected_metrics.append(fbeta_score(actual, judgments, beta))
            judgments[i] = negative_class
        expected_metrics.append(fbeta_score(actual, judgments, beta))    

        actual_metrics = getMetrics(actual, positive_class, beta)

        self.assertEqual(expected_metrics, actual_metrics)
예제 #9
0
def evaluate_model(preds, testy):
    accuracy = metrics.accuracy_score(testy, preds)
    precision = metrics.precision_score(testy, preds)
    recall = metrics.recall_score(testy, preds)
    F1 = metrics.f1_score(testy, preds)
    Fbeta = metrics.fbeta_score(testy, preds, 2) # weighting recall stronger than precision
    print "Model summary: accuracy - ", accuracy,"precision - ",precision, "recall - ", recall, "Fbeta - ",Fbeta, "F1 - ",F1
예제 #10
0
 def _created_model(self, X, Y, indices, i, model):
     # to assign an F-score weight to each classifier, 
     # sample another subset of the data and use the model 
     # we just train to generate predictions 
     beta = self.weighting 
     n = X.shape[0]
     bagsize = len(indices)
     if beta or self.verbose:
         error_sample_indices = np.random.random_integers(0,n-1,bagsize)
         error_subset = X[error_sample_indices, :] 
         if self.feature_subsets:
             error_subset = error_subset[:, self.feature_subsets[i]]
         error_labels = Y[error_sample_indices]
         y_pred = model.predict(error_subset)
         
         if self.weighting: 
             f_score = fbeta_score(error_labels, y_pred, beta)
             self.weights[i] = f_score 
         if self.verbose:
             print "Actual non-zero:", np.sum(error_labels != 0)
             num_pred_nz = np.sum(y_pred != 0)
             print "Predicted non-zero:", num_pred_nz
             pred_correct = (y_pred == error_labels)
             pred_nz = (y_pred != 0)
             num_true_nz = np.sum(pred_correct & pred_nz)
             print "True non-zero:", num_true_nz
             print "False non-zero:", num_pred_nz - num_true_nz
             print "---" 
예제 #11
0
def plot_precision_recall(performace_df, model, ax=None, beta=0.1):

    ax = ax or plt.gca()

    if isinstance(model, CalibratedClassifierCV):
        model = model.base_estimator

    thresholds = np.linspace(0, 1, model.n_estimators + 2)
    precision = []
    recall = []
    f_beta = []

    ax.axvline(0, color='lightgray')
    ax.axvline(1, color='lightgray')
    ax.axhline(0, color='lightgray')
    ax.axhline(1, color='lightgray')
    for threshold in thresholds:

        prediction = (performace_df.probabilities.values >= threshold).astype('int')
        label = performace_df.label.values

        precision.append(metrics.precision_score(label, prediction))
        recall.append(metrics.recall_score(label, prediction))
        f_beta.append(metrics.fbeta_score(label, prediction, beta=beta))

    ax.plot(thresholds, precision, label='precision')
    ax.plot(thresholds, recall, label='recall')
    ax.plot(thresholds, f_beta, label='$f_{{{:.2f}}}$'.format(beta))

    ax.legend()
    ax.set_xlabel('prediction threshold')
    ax.figure.tight_layout()
예제 #12
0
def main(data_module):
    """Load data, train model and evaluate it."""
    data = data_module.load_data()
    model = create_model(data_module.n_classes, (data['x_train'].shape[1], ))
    print(model.summary())
    optimizer = get_optimizer({'optimizer': {'initial_lr': 0.001}})
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=[precision, recall, f1, accuracy])
    t0 = time.time()
    model.fit(data['x_train'], data['y_train'],
              batch_size=32,
              epochs=30,
              validation_data=(data['x_test'], data['y_test']),
              shuffle=True,
              # callbacks=callbacks
              )
    t1 = time.time()
    # res = get_tptnfpfn(model, data)
    preds = model.predict(data['x_test'])
    preds[preds >= 0.5] = 1
    preds[preds < 0.5] = 0
    t2 = time.time()
    print(("{clf_name:<30}: {acc:0.2f}% {f1:0.2f}% in {train_time:0.2f}s "
           "train / {test_time:0.2f}s test")
          .format(clf_name="MLP",
                  acc=(accuracy_score(y_true=data['y_test'], y_pred=preds) * 100),
                  f1=(fbeta_score(y_true=data['y_test'], y_pred=preds, beta=1, average="weighted") * 100),
                  train_time=(t1 - t0),
                  test_time=(t2 - t1)))
예제 #13
0
def test_precision_recall_f1_score_binary():
    """Test Precision Recall and F1 Score for binary classification task"""
    y_true, y_pred, _ = make_prediction(binary=True)

    # detailed measures for each class
    p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None)
    assert_array_almost_equal(p, [0.73, 0.85], 2)
    assert_array_almost_equal(r, [0.88, 0.68], 2)
    assert_array_almost_equal(f, [0.80, 0.76], 2)
    assert_array_equal(s, [25, 25])

    # individual scoring function that can be used for grid search: in the
    # binary class case the score is the value of the measure for the positive
    # class (e.g. label == 1)
    ps = precision_score(y_true, y_pred)
    assert_array_almost_equal(ps, 0.85, 2)

    rs = recall_score(y_true, y_pred)
    assert_array_almost_equal(rs, 0.68, 2)

    fs = f1_score(y_true, y_pred)
    assert_array_almost_equal(fs, 0.76, 2)

    assert_almost_equal(fbeta_score(y_true, y_pred, beta=2),
                        (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)
예제 #14
0
def optimize_lda(corpus, dictionary, train, test, topics=5, max_topics=1200):  # train and test set must be lists of review-label pairs
  '''runs lda with increasing number of topics to optimize
    number of topics; runs svm classifier with each new lda lda model
    and adds error to error_list'''
  accuracy_list = []
  while topics <= max_topics:
    start_time = time.clock()
    lda = ldamodel.LdaModel(corpus, id2word=dictionary, num_topics=topics)
    print 'lda !'
    x_train, y_train = topic_vector(train, lda, dictionary)
    classifier = tree.DecisionTreeClassifier()
    classifier.fit(x_train, y_train)
    print 'classified!'
    x_test, y_test = topic_vector(test, lda, dictionary)
    y_pred = list(classifier.predict(x_test)) 
    print 'predicted!'
    accuracy = metrics.fbeta_score(y_test, y_pred, beta=2)    # F2 Score
    accuracy_list.append([accuracy, topics])
    print 'accuracy! ', accuracy 
    #confusion = metrics.confusion_matrix(y_test, y_pred)
    #print 'accuracy ', metrics.accuracy_score(y_test, y_pred)
    #print 'precision ', metrics.precision_score(y_test, y_pred)
    #print 'recall ', metrics.recall_score(y_test, y_pred)
    #topics += 50
    if topics < 100:
      topics += 25
    else:
      topics += 100
    end_time = time.clock()
    print ('lda_%s time: %s' % (topics, (end_time - start_time)))
    #print y_test
    #print y_pred
  #save_thing(accuracy_list, 'accuracy')
  return accuracy_list
예제 #15
0
def kfold_cv(cls, xs, ys, k):
    from sklearn import metrics
    pk = len(xs) / k
    prec = []
    rec = []
    for i in range(k):
        ki = pk * i
        kj = pk * (i + 1)
        xs_train = np.concatenate((xs[:ki,:], xs[kj:,:]))
        ys_train = np.concatenate((ys[:ki], ys[kj:]))
        xs_test = xs[ki:kj]
        ys_test = ys[ki:kj]

        if (ys_test == 1).sum() == 0: continue

        cls.fit(xs_train, ys_train)

        # score = cls.score(xs_test, ys_test)
        # # print '{}: {}'.format(i, score)
        # avg_score += score

        ys_pred = cls.predict(xs_test)
        # # print metrics.precision_score(ys_test, ys_pred, pos_label=0), \
        # #     metrics.recall_score(ys_test, ys_pred, pos_label=0)
        # print '-------'

        # fpr, tpr, thresholds = metrics.roc_curve(ys_test, ys_pred)
        # print metrics.auc(fpr, tpr)
        #print metrics.roc_auc_score(ys_test, ys_pred)
        # print metrics.precision_recall_curve(ys_test, ys_pred, pos_label=1)
        print metrics.fbeta_score(ys_test, ys_pred, 0.5)
        # print '--'

        # print xs_test
        # print y_pred
        # print ys_test
        # print '---'
        # (correct,) = (ys_test == ys_pred).sum(),
        # total = len(xs_test)
        # avg_score += float(correct) / total
        prec.append(metrics.f1_score(ys_test, ys_pred))
        rec.append(metrics.recall_score(ys_test, ys_pred))
    return (np.array(prec), np.array(rec))
예제 #16
0
def run(y_true, y_pred):
    perf = {}
    
    perf['accuracy'] = accuracy_score(y_true, y_pred)
    perf['precision'] = precision_score(y_true, y_pred, average='micro')
    perf['recall'] = recall_score(y_true, y_pred, average='micro')
    perf['fbeta_score'] = fbeta_score(y_true, y_pred, average='macro', beta=1.0)
    perf['hamming_loss'] = hamming_loss(y_true, y_pred)
    perf['cm'] = confusion_matrix(y_true, y_pred)
    
    return perf
예제 #17
0
def test_symmetry():
    """Test the symmetry of score and loss functions"""
    y_true, y_pred, _ = make_prediction(binary=True)

    # Symmetric metric
    for metric in [accuracy_score,
                   lambda y1, y2: accuracy_score(y1, y2, normalize=False),
                   zero_one_loss,
                   lambda y1, y2: zero_one_loss(y1, y2, normalize=False),
                   hamming_loss,
                   f1_score,
                   matthews_corrcoef,
                   mean_squared_error,
                   mean_absolute_error]:

        assert_equal(metric(y_true, y_pred),
                     metric(y_pred, y_true),
                     msg="%s is not symetric" % metric)

    # Not symmetric metrics
    for metric in [precision_score,
                   recall_score,
                   lambda y1, y2: fbeta_score(y1, y2, beta=0.5),
                   lambda y1, y2: fbeta_score(y1, y2, beta=2),
                   explained_variance_score,
                   r2_score]:

        assert_true(metric(y_true, y_pred) != metric(y_pred, y_true),
                    msg="%s seems to be symetric" % metric)

    # Deprecated metrics
    with warnings.catch_warnings(True):
        # Throw deprecated warning
        assert_equal(zero_one(y_true, y_pred),
                     zero_one(y_pred, y_true))

        assert_equal(zero_one(y_true, y_pred, normalize=False),
                     zero_one(y_pred, y_true, normalize=False))

        assert_equal(zero_one_score(y_true, y_pred),
                     zero_one_score(y_pred, y_true))
예제 #18
0
def evaluate_crf_model(x, y, estimator, labels, uniprot=None, verbose=0):
    y_pred = np.asarray(estimator.predict(x))
    statistics = Statistics()
    statistics.update_statistics('all_labels', 'accuracy', estimator.score(x, y))

    bin_labels = [0, 1]
    for i, l in enumerate(labels):
        y_true_binary_l = y[:, i].astype(int)
        y_pred_binary_l = y_pred[:, i].astype(int)
        label_stats = compute_label_statistics(y_true_binary_l, y_pred_binary_l, labels=bin_labels)
        statistics.update_statistics(l, 'Accuracy', accuracy_score(y_true_binary_l, y_pred_binary_l))
        statistics.update_statistics(l, 'Specifcity', label_stats[1]['specificity'])
        statistics.update_statistics(l, 'Recall', label_stats[1]['sensitivity'])
        statistics.update_statistics(l, 'Precision', label_stats[1]['precision'])
        statistics.update_statistics(l, 'FDR', label_stats[1]['fdr'])
        statistics.update_statistics(l, 'F-Score (beta=0.5)', fbeta_score(
            y_true_binary_l, y_pred_binary_l, beta=0.5, labels=bin_labels, average='binary'
        ))
        statistics.update_statistics(l, 'F-Score (beta=1)', fbeta_score(
            y_true_binary_l, y_pred_binary_l, beta=1.0, labels=bin_labels, average='binary'
        ))
        try:
            roc_auc = roc_auc_score(y_true_binary_l, y_pred_binary_l, average="binary")
            statistics.update_statistics(l, 'ROC-AUC', roc_auc)
        except (ValueError, AssertionError):
            statistics.update_statistics(l, 'ROC-AUC', np.NaN)
        try:
            pr_auc = average_precision_score(y_true_binary_l, y_pred_binary_l, average="binary")
            statistics.update_statistics(l, 'PR-AUC', pr_auc)
        except (ValueError, AssertionError):
            statistics.update_statistics(l, 'PR-AUC', np.NaN)

    if verbose:
        for l in labels:
            statistics.print_statistics(l)
    if uniprot and verbose:
        for u, p1, p2 in zip(uniprot, y, y_pred):
            print("\t\t\tResult for {} \n\t\t\t\tTrue: \t{} ||| Pred: \t{}".format(u, p1, p2))

    return statistics
예제 #19
0
 def cv(self, data, X, labels, n_folds=5, random_state=42, verbose=True, poslabel='guess'):
     cv = StratifiedKFold(labels, n_folds, random_state=random_state)
     truths = np.array([None] * len(labels))
     preds = np.array([None] * len(labels))
     for train, test in cv:
         self.clf.fit(X[train], labels[train])
         preds[test] = self.clf.predict(X[test])
         truths[test] = labels[test]
     binary_truths = self.to_binary(truths, poslabel)
     binary_preds = self.to_binary(preds, poslabel)
     results = \
         {'accuracy': accuracy_score(truths, preds),
          'f1_pos': f1_score(binary_truths, binary_preds),
          'fbeta.01': fbeta_score(binary_truths, binary_preds, beta=.01),
          'fbeta.1': fbeta_score(binary_truths, binary_preds, beta=.1),
          'fbeta.3': fbeta_score(binary_truths, binary_preds, beta=.3),
          'fbeta.5': fbeta_score(binary_truths, binary_preds, beta=.5),
          'fbeta.7': fbeta_score(binary_truths, binary_preds, beta=.7),
          'fbeta2': fbeta_score(binary_truths, binary_preds, beta=2),
          'fbeta3': fbeta_score(binary_truths, binary_preds, beta=3),
          'fbeta5': fbeta_score(binary_truths, binary_preds, beta=5),
          'fbeta7': fbeta_score(binary_truths, binary_preds, beta=7),
          'fbeta10': fbeta_score(binary_truths, binary_preds, beta=10),
          'macro_f1': f1_score(truths, preds, average='macro', pos_label=None),
          'micro_f1': f1_score(truths, preds, average='micro', pos_label=None),
          'recall': recall_score(binary_truths, binary_preds),
          'precision': precision_score(binary_truths, binary_preds),
          'roc_auc': roc_auc_score(binary_truths, binary_preds)
          }
     if verbose:
         print(self.confusion(truths, preds, self.clf.classes_))
         print(classification_report(truths, preds))
         self.fit(X, labels)
         self.top_terms()
         print('\n')
         if data is not None:
             self.top_error_terms(truths, preds, X, data)
     return results
예제 #20
0
파일: ex8.py 프로젝트: Catentropy/mylab
def find_threshold(fn,Xcv,ycv):
    """
    by cv testing on (Xval,yval)
    """
    dists = np.fromiter((fn(x) for x in Xcv), float)
    best = (0,0)    #threshold, f1_score
    print dists.min(), dists.max()
##    for t in dists:    # for each prob score
    for t in np.linspace(dists.min(), dists.max(), 100):
        preds = (dists < t).astype(int)
        f = fbeta_score(ycv, preds, 1.)
        if f > best[1]:
            best = (t,f)
    return best
예제 #21
0
    def getResult(self, predict, data_set):
        y_true, y_predict = control.calculate_entire_ds(predict, data_set)
        result = metrics.classification_report(y_true, y_predict)
        result += "\nAccuracy classification: %f\n" % metrics.accuracy_score(y_true, y_predict)
        result += "F1 score: %f\n" % metrics.f1_score(y_true, y_predict)
        result += "Fbeta score: %f\n" % metrics.fbeta_score(y_true, y_predict, beta=0.5)
        result += "Hamming loss: %f\n" % metrics.hamming_loss(y_true, y_predict)
        result += "Hinge loss: %f\n" % metrics.hinge_loss(y_true, y_predict)
        result += "Jaccard similarity: %f\n" % metrics.jaccard_similarity_score(y_true, y_predict)
        result += "Precision: %f\n" % metrics.precision_score(y_true, y_predict)
        result += "Recall: %f\n" % metrics.recall_score(y_true, y_predict)

        if self.is_binary():
            result += "Average precision: %f\n" % metrics.average_precision_score(y_true, y_predict)
            result += "Matthews correlation coefficient: %f\n" % metrics.matthews_corrcoef(y_true, y_predict)
            result += "Area Under the Curve: %f" % metrics.roc_auc_score(y_true, y_predict)

        return result
예제 #22
0
def test_classification_scores():
    """Test classification scorers."""
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LinearSVC(random_state=0)
    clf.fit(X_train, y_train)

    for prefix, metric in [('f1', f1_score), ('precision', precision_score),
                           ('recall', recall_score)]:

        score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='weighted')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s_macro' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='macro')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s_micro' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=None,
                        average='micro')
        assert_almost_equal(score1, score2)

        score1 = get_scorer('%s' % prefix)(clf, X_test, y_test)
        score2 = metric(y_test, clf.predict(X_test), pos_label=1)
        assert_almost_equal(score1, score2)

    # test fbeta score that takes an argument
    scorer = make_scorer(fbeta_score, beta=2)
    score1 = scorer(clf, X_test, y_test)
    score2 = fbeta_score(y_test, clf.predict(X_test), beta=2,
                         average='weighted')
    assert_almost_equal(score1, score2)

    # test that custom scorer can be pickled
    unpickled_scorer = pickle.loads(pickle.dumps(scorer))
    score3 = unpickled_scorer(clf, X_test, y_test)
    assert_almost_equal(score1, score3)

    # smoke test the repr:
    repr(fbeta_score)
예제 #23
0
    def on_epoch_end(self, epoch, logs={}):
        if 'iteration' in logs.keys() and logs['iteration'] % self.iteration_freq != 0:
            # If we've broken a large training set into smaller chunks, we don't
            # need to run the classification report after every chunk.
            return

        y_hat = self.model.predict_classes(self.x, verbose=0)
        fbeta = fbeta_score(self.y, y_hat, beta=0.5, average='weighted')
        report = classification_report(
                self.y, y_hat,
                labels=self.labels, target_names=self.target_names)

        if 'iteration' in logs.keys():
            self.logger("epoch {epoch} iteration {iteration} - val_fbeta(beta=0.5): {fbeta}".format(
                epoch=epoch, iteration=logs['iteration'], fbeta=fbeta))
        else:
            self.logger("epoch {epoch} - val_fbeta(beta=0.5): {fbeta}".format(
                epoch=epoch, fbeta=fbeta))

        self.logger(report)
예제 #24
0
def automate_train_predict(learner, sample_size, X_train, y_train, X_test, y_test): 
    '''
    inputs:
       - learner: the learning algorithm to be trained and predicted on
       - sample_size: the size of samples (number) to be drawn from training set
       - X_train: features training set
       - y_train: income training set
       - X_test: features testing set
       - y_test: income testing set
    '''
    
    results = {}
    
    # TODO: Fit the learner to the training data using slicing with 'sample_size' using .fit(training_features[:], training_labels[:])
    start = time() # Get start time
    learner.fit(X_train[:sample_size],y_train[:sample_size])
    end = time() # Get end time
    
    # TODO: Calculate the training time
    results['train_time'] = end - start
        
    # TODO: Get the predictions on the test set(X_test),
    #       then get predictions on the first 300 training samples(X_train) using .predict()
    start = time() # Get start time
    predictions_test = learner.predict(X_test)
    end = time() # Get end time
    
    # TODO: Calculate the total prediction time
    results['pred_time'] = end - start
            
    # TODO: Compute accuracy on test set using accuracy_score()
    results['acc_test'] = accuracy_score(y_test,predictions_test)
    
    # TODO: Compute F-score on the test set which is y_test
    results['f_test'] = fbeta_score(y_test,predictions_test,beta=0.5)
       
    # Success
    print ("{} trained on {} samples. train {:.3f}sec predict {:.3f}sec fsctest {:.3f}".format(learner.__class__.__name__, sample_size,results['train_time'],results['pred_time'],results['f_test']))
        
    # Return the results
    return results
예제 #25
0
def test_classification_scores():
    X, y = make_blobs(random_state=0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LinearSVC(random_state=0)
    clf.fit(X_train, y_train)
    score1 = SCORERS['f1'](clf, X_test, y_test)
    score2 = f1_score(y_test, clf.predict(X_test))
    assert_almost_equal(score1, score2)

    # test fbeta score that takes an argument
    scorer = Scorer(fbeta_score, beta=2)
    score1 = scorer(clf, X_test, y_test)
    score2 = fbeta_score(y_test, clf.predict(X_test), beta=2)
    assert_almost_equal(score1, score2)

    # test that custom scorer can be pickled
    unpickled_scorer = pickle.loads(pickle.dumps(scorer))
    score3 = unpickled_scorer(clf, X_test, y_test)
    assert_almost_equal(score1, score3)

    # smoke test the repr:
    repr(fbeta_score)
예제 #26
0
def cross_validation_scores(X, y, clf): 
    y_pred = cross_validation.cross_val_predict(clf, X, y, cv=5)  # better than cross_val_score: less restricting
    y_true = y
    
    basic_metrics = metrics_from_confusion_matrix(y_true, y_pred)

    # TODO find a way to store this: XML, mongodb, logs...
    print "TP: %0.2f" % basic_metrics['TP']
    print "FP: %0.2f" % basic_metrics['FP']
    print "FN: %0.2f" % basic_metrics['FN']
    print "TN: %0.2f" % basic_metrics['TN']
    print "NPP: %0.2f" % basic_metrics['NPP']
    print "Specificity: %0.2f" % basic_metrics['specificity']
    print "Precision/PPP (basic): %0.2f" % basic_metrics['PPP']
    print "Precision/PPP (predefined):  %0.2f" % precision_score(y_true, y_pred) 
    print "Sensitivity/recall (basic): %0.2f" % basic_metrics['sensitivity']
    print "Sensitivity/recall (predefined):  %0.2f" % recall_score(y_true, y_pred)
    print "Accuracy (basic):  %0.2f" % basic_metrics['accuracy']
    print "Accuracy (predefined):  %0.2f" % accuracy_score(y_true, y_pred)
    print "F1 score (basic):  %0.2f" % basic_metrics['F1']
    print "F1 score (predefined):  %0.2f" % f1_score(y_true, y_pred)
    print "F2 score (basic):  %0.2f" % basic_metrics['F2']
    print "F2 score (predefined):  %0.2f" % fbeta_score(y_true, y_pred, 2)   
예제 #27
0
def f025(y_true, y_pred):
    return fbeta_score(y_true, y_pred, average='binary', beta=0.25)
예제 #28
0
print(predTest.argmax(axis=1)[misclassified_knn[2]])
plt.imshow(np.reshape(x_test[misclassified_knn[2]], (28, 28)))
plt.show()
neighbors3 = historyNeigh.kneighbors(x_test[misclassified_knn[2]].reshape(1, -1))
flattened3  = [val for sublist in neighbors3[1] for val in sublist]
for n in flattened3:
    print(y_train.argmax(axis=1)[n])
    plt.imshow(np.reshape(x_train[n], (28, 28)))
    plt.show()

# Confusion matrix and other metrics
cnf_matrix_KN = confusion_matrix(y_test.argmax(axis=1), predTest.argmax(axis=1))
plot_confusion_matrix(cnf_matrix_KN, classes=['0','1','2','3','4','5','6','7','8','9'],
                      title='Confusion matrix, without normalization')
print("Accuracy on test set:")
print(accuracy_score(y_test, predTest))

print("Precision score:")
average_precision_knn = average_precision_score(y_test, predTest)
print(average_precision_knn)

recall_knn = recall_score(y_test, predTest, average='micro')
print("Recall score:")
print(recall_knn)

fscore_knn = fbeta_score(y_test, predTest, beta=1, average='micro')
print("F-score:")
print(fscore_knn)

예제 #29
0
def train_predict(learner, sample_size, X_train, y_train, X_test, y_test):
    '''
    inputs:
       - learner: the learning algorithm to be trained and predicted on
       - sample_size: the size of samples (number) to be drawn from training set
       - X_train: features training set
       - y_train: income training set
       - X_test: features testing set
       - y_test: income testing set
    '''

    results = {}

    # TODO: Fit the learner to the training data using slicing with 'sample_size' using .fit(training_features[:], training_labels[:])
    start = time()  # Get start time
    learner = learner.fit(X_train[:sample_size], y_train[:sample_size])
    end = time()  # Get end time

    # TODO: Calculate the training time
    results['train_time'] = (end - start)
    print('\nthe training time is: ', results['train_time'])

    # TODO: Get the predictions on the test set(X_test),
    #       then get predictions on the first 300 training samples(X_train) using .predict()
    start = time()  # Get start time
    predictions_test = learner.predict(X_test)
    predictions_train = learner.predict(X_train[:300])
    end = time()  # Get end time
    print('\nstart time is ', start)
    print('\nend time is ', end)
    print('\npredictions test shape is: ', predictions_test.shape)
    print('\npredictions train shape is: ', predictions_train.shape)

    # TODO: Calculate the total prediction time
    results['pred_time'] = (end - start)
    print('\nthe total prediction time is :', results['pred_time'])

    # TODO: Compute accuracy on the first 300 training samples which is y_train[:300]
    results['acc_train'] = accuracy_score(y_train[:300], predictions_train)
    print('\nthe accuracy on the first 300 is :', results['acc_train'])

    # TODO: Compute accuracy on test set using accuracy_score()
    results['acc_test'] = accuracy_score(y_test, predictions_test)
    print('\ntest accuracy score is: ', results['acc_test'])

    # TODO: Compute F-score on the the first 300 training samples using fbeta_score()
    results['f_train'] = fbeta_score(y_train[:300],
                                     predictions_train,
                                     beta=0.75,
                                     average='micro')
    print('\nfirst 300 fbeta score is: ', results['f_train'])

    # TODO: Compute F-score on the test set which is y_test
    results['f_test'] = fbeta_score(y_test,
                                    predictions_test,
                                    beta=0.75,
                                    average='micro')
    print('\nfbeta test score is: ', results['f_test'])

    # Success
    print("\n{} trained on {} samples\n.".format(learner.__class__.__name__,
                                                 sample_size))

    # Return the results
    return results
예제 #30
0
        }

        # print("LightGBM params:", lgbm_params)


        def f2_score(y_pred: np.array, data: Any) -> Any:
            y_true = data.get_label()
            y_pred = y_pred > gold_threshold

            if np.sum(y_pred) == 0:
                return 'f2', 0, True

            return 'f2', fbeta_score(y_true, y_pred, beta=2), True

        lgb_clf = lgb.train(lgbm_params,
                            lgtrain,
                            num_boost_round=2000,
                            valid_sets=[lgtrain, lgvalid],
                            valid_names=['train', 'valid'],
                            early_stopping_rounds=100,
                            verbose_eval=50,
                            feval=f2_score)

        val_pred = lgb_clf.predict(x_val)
        f2 = fbeta_score(y_val, val_pred > gold_threshold, beta=2)
        dprint(f2)
        filename = f'{model_dir}/lightgbm_f{level2_fold}_c{class_:04}_{f2:.04}.pkl'

        with open(filename, 'wb') as model_file:
            pickle.dump(lgb_clf, model_file)
def main(cv, scaler, PENALTY, SOLVER, C, N_JOBS, RANDOM_STATE):

    trainset = pd.read_csv("trainset_180314.csv").iloc[:, 1:]
    print(len(trainset))
    print(trainset.columns[5:].tolist())

    # continuous and categorical
    mains = ["user_coupon", "user_id", "coupon_id", "start_time", "is_used"]

    categorical = [
        'sex_1', 'sex_2', 'age_60', 'age_70', 'age_80', 'age_90', 'age_0',
        'city1', 'city2', 'city3', 'city4', 'city5', 'AppVerLast_2.1',
        'AppVerLast_2.2', 'AppVerLast_2.3', 'AppVerLast_2.4', 'AppVerLast_2.5',
        'AppVerLast_2.7', 'AppVerLast_2.8', 'covers_mon', 'covers_tue',
        'covers_wed', 'covers_thu', 'covers_fri', 'covers_sat', 'covers_sun',
        'type1', 'type6', 'Complaints', 'Eventsoperation',
        'NewUserCouponPackageByBD', 'PreUserCouponCode', 'RecallUserDaily',
        'home201603222253', 'home_dongbeiguan', 'home_jiangzhecai',
        'home_muqinjie', 'home_xiangcaiguan', 'preuser', 'shareuser', '商家拒单返券',
        '家厨发券', '活动赠券', '码兑券', '自运营赠券', '蒲公英受邀', 'CoupUseLast'
    ]

    conitnuous = [
        'kitchen_entropy', 'distance_median', 'distance_std',
        'user_longitude_median', 'user_longitude_std', 'user_latitude_median',
        'user_latitude_std', 'coupon_effective_days', 'money', 'max_money',
        'WeeklyCouponUsedCount', "BiWeeklyCouponUsedCount", 'WeeklyOrderCount',
        'BiWeeklyOrderCount', 'coupon_usage_rate', 'order_coupon_usage_rate',
        'coupon_type1_usage_rate', 'coupon_type6_usage_rate',
        'coupon_used_weekend_perc', 'order_weekend_perc', 'worth_money_median',
        'worth_money_std', 'InterCoup', 'InterOrder', 'Recency'
    ]

    # scaling
    X_train_continuous = scaler.fit_transform(trainset[conitnuous])
    trainset_scaled = pd.concat([
        trainset.loc[:, mains + categorical],
        pd.DataFrame(X_train_continuous, columns=conitnuous)
    ],
                                axis=1)

    # split train & dev
    split_date1 = "2016-04-15"
    split_date2 = "2016-04-22"
    split_date3 = "2016-04-29"
    split_date4 = "2016-05-06"

    trainset1 = trainset_scaled[trainset_scaled["start_time"] <= split_date1]
    devset1 = trainset_scaled[(trainset_scaled["start_time"] > split_date1)
                              & (trainset_scaled["start_time"] <= split_date2)]

    trainset2 = trainset_scaled[trainset_scaled["start_time"] <= split_date2]
    devset2 = trainset_scaled[(trainset_scaled["start_time"] > split_date2)
                              & (trainset_scaled["start_time"] <= split_date3)]

    trainset3 = trainset_scaled[trainset_scaled["start_time"] <= split_date3]
    devset3 = trainset_scaled[(trainset_scaled["start_time"] > split_date3)
                              & (trainset_scaled["start_time"] <= split_date4)]

    trainset4 = trainset_scaled[trainset_scaled["start_time"] <= split_date4]
    devset4 = trainset_scaled[trainset_scaled["start_time"] > split_date4]

    # shuffle trainset
    trainset1 = trainset1.iloc[shuffle(trainset1.index).tolist(), ]
    trainset2 = trainset2.iloc[shuffle(trainset2.index).tolist(), ]
    trainset3 = trainset3.iloc[shuffle(trainset3.index).tolist(), ]
    trainset4 = trainset4.iloc[shuffle(trainset4.index).tolist(), ]

    trainsets = [trainset1, trainset2, trainset3, trainset4]
    devsets = [devset1, devset2, devset3, devset4]

    X_trains, y_trains, X_devs, y_devs = [], [], [], []
    for i in trainsets:
        X_trains.append(i[i.columns[5:]])
        y_trains.append(i["is_used"])
    for i in devsets:
        X_devs.append(i[i.columns[5:]])
        y_devs.append(i["is_used"])

    ## 1. Logistic Regression

    res_lr = defaultdict(
        lambda: defaultdict(lambda: defaultdict(lambda: list)))
    res_lr["PENALTY"] = PENALTY
    res_lr["SCALER"] = SCALER
    res_lr["BALANCE"] = BALANCE

    evaluations = ["F05", "Precision", "Recall", "Mean_Pre", "AUC", "Accuracy"]
    for c in C:
        for ev in evaluations:
            res_lr[ev][str(c)] = []

    # train
    start_time = time.time()

    for c in C:
        start_time2 = time.time()

        for n in cv:
            lr = LogisticRegression(C=c,
                                    penalty=PENALTY,
                                    solver=SOLVER,
                                    class_weight={1: BALANCE},
                                    max_iter=MAX_ITER,
                                    random_state=RANDOM_STATE,
                                    n_jobs=N_JOBS)
            lr.fit(X_trains[n], y_trains[n])
            y_pred = lr.predict(X_devs[n])
            y_dev = y_devs[n]

            print("P: {}, CV: {}, C: {}".format(PENALTY, n, c))
            print(confusion_matrix(y_dev, y_pred, labels=[1, 0]))

            f05 = fbeta_score(y_dev, y_pred, beta=0.5, labels=[1, 0])
            precision = precision_score(y_dev, y_pred, labels=[1, 0])
            recall = recall_score(y_dev, y_pred, labels=[1, 0])
            mp = average_precision_score(y_dev, y_pred)
            auc = roc_auc_score(y_dev, y_pred)
            acc = accuracy_score(y_dev, y_pred)
            evaluations_res = [f05, precision, recall, mp, auc, acc]

            for i in range(len(evaluations)):
                print("{}: {}".format(evaluations[i], evaluations_res[i]))
                res_lr[evaluations[i]][str(c)].append(evaluations_res[i])
            print("\n")

        print("Finished c {} in {} sec\n".format(c, time.time() - start_time2))

    print("{} sec\n".format(time.time() - start_time))

    # average cv results
    for ev in evaluations:
        res_lr[ev] = {c: np.mean(res_lr[ev][c]) for c in res_lr[ev]}

    # save param output
    with open('res_lr_{}_{}_1v{}.json'.format(PENALTY, SCALER, BALANCE),
              'w') as f:
        json.dump(res_lr, f)
all_predicted_lines = []
all_target_lines = []
for doc in doc_test:
    predicted_lines = random_search.predict(doc.data)
    all_predicted_lines += list(predicted_lines)
    all_target_lines += list(doc.targets)

    predicted_doc = utils.classify_doc(predicted_lines)
    documents_predicted.append(predicted_doc)
    documents_target.append(doc.category)

print("Line by Line ")
print("Confusion Matrix: \n{}".format(
    confusion_matrix(all_target_lines, all_predicted_lines)))

accuracy = fbeta_score(all_target_lines,
                       all_predicted_lines,
                       average=None,
                       beta=2)
print("Accuracy: {}".format(accuracy))

doc_accuracy = fbeta_score(documents_target,
                           documents_predicted,
                           average=None,
                           beta=2)

print("Document Accuracy: {}".format(doc_accuracy))

print("Document Confusion Matrix: \n{}".format(
    confusion_matrix(documents_target, documents_predicted)))
예제 #33
0
 def mf(x):
     p2 = np.zeros_like(p)
     for i in range(17):
         p2[:, i] = (p[:, i] > x[i]).astype(np.int)
     score = fbeta_score(y, p2, beta=2, average='samples')
     return score
예제 #34
0
pred_stances = cross_val_predict(vote_pipeline,
                                 train_data.Abstract,
                                 train_data.Stance,
                                 cv=cv)
print second_clf.named_steps
print first_clf.named_steps

print 80 * '='
print "TRAIN"
print 80 * '='

print classification_report(train_data.Stance, pred_stances, digits=4)

macro_f = fbeta_score(train_data.Stance,
                      pred_stances,
                      1.0,
                      labels=['AGAINST', 'FAVOR', 'NONE'],
                      average='macro')

print 'macro-average of F-score(FAVOR) and F-score(AGAINST): {:.4f}\n'.format(
    macro_f)

print 80 * '='
print "VALIDATE"
print 80 * '='
print 'WORD2VEC VECTORS:', word2vec_ids[0]
print 80 * '='

vote_pipeline.fit(train_data.Abstract, train_data.Stance)

pred_stances = vote_pipeline.predict(validate_data.Abstract)
예제 #35
0
def fbeta(true_label, prediction):
    return fbeta_score(true_label, prediction, beta=2, average='samples')
예제 #36
0
# transform skewed data
skewed = ['capital-gain', 'capital-loss']
features_log_transformed = pd.DataFrame(data = features_raw)
features_log_transformed[skewed] = features_raw[skewed].apply(lambda x: np.log(x + 1))

# Normalize numerical features
scaler = MinMaxScaler() # default=(0, 1)
numerical = ['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
features_log_minmax_transform = pd.DataFrame(data = features_log_transformed)
features_log_minmax_transform[numerical] = scaler.fit_transform(features_log_transformed[numerical])

# One-hot encode categorical features
features_final = pd.get_dummies(features_log_minmax_transform)
income = income_raw.map({'>50K': 1, '<=50K': 0})

# Shuffle and Split data
X_train, X_test, y_train, y_test = train_test_split(features_final,
                                                    income,
                                                    test_size = 0.2,
                                                    random_state = 0)

# Evaluate Model Performance with fbeta = 0.5
fbeta = 0.5
best_clf = Models.evaluate_models(X_train,y_train,X_test,y_test,fbeta)
print("\n",best_clf.__class__.__name__)
best_clf = Models.optimize_best_model(best_clf,X_train,y_train,X_test,y_test)
model_predictions = best_clf.predict(X_test)
print("Final accuracy score on the testing data: {:.4f}".format(accuracy_score(y_test, model_predictions)))
print("Final F-score on the testing data: {:.4f}".format(fbeta_score(y_test, model_predictions, beta = 0.5)))
예제 #37
0
def test_precision_recall_f1_score_multilabel_2():
    """ Test precision_recall_f1_score on a crafted multilabel example 2
    """
    # Second crafted example
    y_true_ll = [(1,), (2,), (2, 3)]
    y_pred_ll = [(4,), (4,), (2, 1)]
    lb = LabelBinarizer()
    lb.fit([range(1, 5)])
    y_true_bi = lb.transform(y_true_ll)
    y_pred_bi = lb.transform(y_pred_ll)

    for y_true, y_pred in [(y_true_ll, y_pred_ll), (y_true_bi, y_pred_bi)]:
        # tp = [ 0.  1.  0.  0.]
        # fp = [ 1.  0.  0.  2.]
        # fn = [ 1.  1.  1.  0.]

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average=None)
        assert_array_almost_equal(p, [0.0, 1.0, 0.0, 0.0], 2)
        assert_array_almost_equal(r, [0.0, 0.5, 0.0, 0.0], 2)
        assert_array_almost_equal(f, [0.0, 0.66, 0.0, 0.0], 2)
        assert_array_almost_equal(s, [1, 2, 1, 0], 2)

        f2 = fbeta_score(y_true, y_pred, beta=2, average=None)
        support = s
        assert_array_almost_equal(f2, [0, 0.55, 0, 0], 2)

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="micro")
        assert_almost_equal(p, 0.25)
        assert_almost_equal(r, 0.25)
        assert_almost_equal(f, 2 * 0.25 * 0.25 / 0.5)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="micro"),
                            (1 + 4) * p * r / (4 * p + r))

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="macro")
        assert_almost_equal(p, 0.25)
        assert_almost_equal(r, 0.125)
        assert_almost_equal(f, 2 / 12)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="macro"),
                            np.mean(f2))

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="weighted")
        assert_almost_equal(p, 2 / 4)
        assert_almost_equal(r, 1 / 4)
        assert_almost_equal(f, 2 / 3 * 2 / 4)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="weighted"),
                            np.average(f2, weights=support))

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="samples")
        # Check weigted
        # |h(x_i) inter y_i | = [0, 0, 1]
        # |y_i| = [1, 1, 2]
        # |h(x_i)| = [1, 1, 2]

        assert_almost_equal(p, 1 / 6)
        assert_almost_equal(r, 1 / 6)
        assert_almost_equal(f, 2 / 4 * 1 / 3)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="samples"),
                            0.1666, 2)
# In[171]:

X = df.iloc[:, :df.shape[1]]
Y = st.iloc[:, 0]

# In[163]:

#Make pipeline of clf
clf_pipeline = make_pipeline(StandardScaler(), tree.DecisionTreeClassifier())
y_pred = cross_val_predict(clf_pipeline, X, Y, cv=10)
print('Accuracy score:', metrics.accuracy_score(Y, y_pred))
print('RMSE:', round(sqrt(mean_squared_error(Y, y_pred)), 2))
print('R_squared:', round(r2_score(Y, y_pred), 2))
print('Recall score:', metrics.recall_score(Y, y_pred))
print('Fbeta score:', fbeta_score(Y, y_pred, beta=1.5))
print('F1-score:', f1_score(Y, y_pred))

# # Filter Features by Variance

# In[164]:

var = df.var()
idx = []
for i in range(len(var)):
    if var[i] < 0.75:
        print('{:50} {}'.format(var.index[i], var[i]))
        idx.append(var.index[i])

# In[165]:
예제 #39
0
def avg_fscore(y_true, y_pred):
    return fbeta_score(y_true, y_pred, average="macro", beta=0.5)
예제 #40
0
    grid_fit = grid_obj.fit(X_train, y_train)

    # Get the estimator
    best_clf = grid_fit.best_estimator_

    # Make predictions using the unoptimized and model
    predictions = (clf.fit(X_train, y_train)).predict(X_test)
    best_predictions = best_clf.predict(X_test)

    # Report the before-and-afterscores
    print("Unoptimized model\n------")
    print("Accuracy score on testing data: {:.4f}".format(
        accuracy_score(y_test, predictions)))
    print("F-score on testing data: {:.4f}".format(
        fbeta_score(y_test, predictions, beta=0.5)))
    print("\nOptimized Model\n------")
    print("Final accuracy score on the testing data: {:.4f}".format(
        accuracy_score(y_test, best_predictions)))
    print("Final F-score on the testing data: {:.4f}".format(
        fbeta_score(y_test, best_predictions, beta=0.5)))

    pickle.dump(best_clf, open(filename, 'wb'))

importances = best_clf.feature_importances_

vs.feature_plot(importances, X_train, y_train)

X_train_reduced = X_train[X_train.columns.values[(
    np.argsort(importances)[::-1])[:5]]]
X_test_reduced = X_test[X_test.columns.values[(
예제 #41
0
def f2_measure(y_true, y_pred):
    return fbeta_score(y_true, y_pred, beta=2)
예제 #42
0
    def evaluate(
        self,
        sentences: Union[List[DataPoint], Dataset],
        out_path: Union[str, Path] = None,
        embedding_storage_mode: str = "none",
        mini_batch_size: int = 32,
        num_workers: int = 8,
    ) -> (Result, float):

        # read Dataset into data loader (if list of sentences passed, make Dataset first)
        if not isinstance(sentences, Dataset):
            sentences = SentenceDataset(sentences)
        data_loader = DataLoader(sentences,
                                 batch_size=mini_batch_size,
                                 num_workers=num_workers)

        # use scikit-learn to evaluate
        y_true = []
        y_pred = []

        with torch.no_grad():
            eval_loss = 0

            lines: List[str] = []
            batch_count: int = 0
            for batch in data_loader:

                batch_count += 1

                # remove previously predicted labels
                [sentence.remove_labels('predicted') for sentence in batch]

                # get the gold labels
                true_values_for_batch = [
                    sentence.get_labels(self.label_type) for sentence in batch
                ]

                # predict for batch
                loss = self.predict(
                    batch,
                    embedding_storage_mode=embedding_storage_mode,
                    mini_batch_size=mini_batch_size,
                    label_name='predicted',
                    return_loss=True)

                eval_loss += loss

                sentences_for_batch = [
                    sent.to_plain_string() for sent in batch
                ]

                # get the predicted labels
                predictions = [
                    sentence.get_labels('predicted') for sentence in batch
                ]

                for sentence, prediction, true_value in zip(
                        sentences_for_batch,
                        predictions,
                        true_values_for_batch,
                ):
                    eval_line = "{}\t{}\t{}\n".format(sentence, true_value,
                                                      prediction)
                    lines.append(eval_line)

                for predictions_for_sentence, true_values_for_sentence in zip(
                        predictions, true_values_for_batch):

                    true_values_for_sentence = [
                        label.value for label in true_values_for_sentence
                    ]
                    predictions_for_sentence = [
                        label.value for label in predictions_for_sentence
                    ]

                    y_true_instance = np.zeros(len(self.label_dictionary),
                                               dtype=int)
                    for i in range(len(self.label_dictionary)):
                        if self.label_dictionary.get_item_for_index(
                                i) in true_values_for_sentence:
                            y_true_instance[i] = 1
                    y_true.append(y_true_instance.tolist())

                    y_pred_instance = np.zeros(len(self.label_dictionary),
                                               dtype=int)
                    for i in range(len(self.label_dictionary)):
                        if self.label_dictionary.get_item_for_index(
                                i) in predictions_for_sentence:
                            y_pred_instance[i] = 1
                    y_pred.append(y_pred_instance.tolist())

                store_embeddings(batch, embedding_storage_mode)

            # remove predicted labels
            for sentence in sentences:
                sentence.annotation_layers['predicted'] = []

            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            # make "classification report"
            target_names = []
            for i in range(len(self.label_dictionary)):
                target_names.append(
                    self.label_dictionary.get_item_for_index(i))
            classification_report = metrics.classification_report(
                y_true,
                y_pred,
                digits=4,
                target_names=target_names,
                zero_division=0)

            # get scores
            micro_f_score = round(
                metrics.fbeta_score(y_true,
                                    y_pred,
                                    beta=self.beta,
                                    average='micro',
                                    zero_division=0), 4)
            accuracy_score = round(metrics.accuracy_score(y_true, y_pred), 4)
            macro_f_score = round(
                metrics.fbeta_score(y_true,
                                    y_pred,
                                    beta=self.beta,
                                    average='macro',
                                    zero_division=0), 4)
            precision_score = round(
                metrics.precision_score(y_true,
                                        y_pred,
                                        average='macro',
                                        zero_division=0), 4)
            recall_score = round(
                metrics.recall_score(y_true,
                                     y_pred,
                                     average='macro',
                                     zero_division=0), 4)

            detailed_result = ("\nResults:"
                               f"\n- F-score (micro) {micro_f_score}"
                               f"\n- F-score (macro) {macro_f_score}"
                               f"\n- Accuracy {accuracy_score}"
                               '\n\nBy class:\n' + classification_report)

            # line for log file
            if not self.multi_label:
                log_header = "ACCURACY"
                log_line = f"\t{accuracy_score}"
            else:
                log_header = "PRECISION\tRECALL\tF1\tACCURACY"
                log_line = f"{precision_score}\t" \
                           f"{recall_score}\t" \
                           f"{macro_f_score}\t" \
                           f"{accuracy_score}"

            result = Result(
                main_score=micro_f_score,
                log_line=log_line,
                log_header=log_header,
                detailed_results=detailed_result,
            )

            eval_loss /= batch_count

            return result, eval_loss
예제 #43
0
 def _get_fbeta_score(self, classifier, X_valid, y_valid):
     p_valid = classifier.predict(X_valid)
     return fbeta_score(y_valid,
                        np.array(p_valid) > 0.2,
                        beta=2,
                        average='samples')
예제 #44
0
def classification(source, model, target_att, test_source = "", fs_task=False):
    # source -- Path to the file that is used to train.
    # model -- Object loaded from file with trained model.
    # target_att -- Name of attribute in source that is considered as target.
    # test_source -- Path to the file that is used to test.
    # fs_task -- String with name of used feature selection algorithm.  
    
    results = dict.fromkeys(["predictions", "score", "model", "features", "removed_features", "selected_features", "feature_importances", "measures"])
    results["predictions"] = []
    
    # Basic metrics used for classification and feature selection evaluation.
    metrics = dict.fromkeys(["accuracy","recall","precision","f_measure","f_beta"])
    metrics["accuracy"] = []
    metrics["recall"] = []
    metrics["precision"] = []
    metrics["f_measure"] = []
    results["removed_features"] = []
    results["selected_features"] = []
    results["feature_importances"] = []
    # http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-unranked-retrieval-sets-1.html
    metrics["f_beta"] = []
    
    cfr = model
    print(model)
    
    # Object for reading train data and test data
    csv = csvhandling.CsvHandling()
    
    # Numpy array with values from source path without feature names and target values.
    train = csv.read_csv(source)
    
    # List of feature names
    features = csv.get_features(source)
    
    # Numpy array with target values
    target = csv.get_target(source,target_att)

    if test_source != False:
        
        # Numpy array with values from test_source path without feature names and target values.
        test = csv.read_csv(test_source)
        
        # Numpy array with test target values
        test_target = csv.get_target(test_source,target_att)
        
        if fs_task:
            # Pipeline with fitted model and feature selection filter or only fitted model.
            cfr = featureselection.get_fs_model(cfr, fs_task, train, target)
            
            original_features = features[:]
            if fs_task == "RFE":
                selected_features = []
            else:
                selected_features = featureselection.get_selected_features(cfr.named_steps["feature_selection"],original_features)
            removed_features = [i for i in features if not i in selected_features]
            results["removed_features"].append(removed_features)
            results["selected_features"].append(selected_features)
        else:
            cfr.fit(train, target)    
        prediction = cfr.predict(test)
        results["predictions"].append(prediction)
        metrics["accuracy"].append(mx.accuracy_score(test_target, prediction))
        metrics["precision"].append(mx.precision_score(test_target, prediction, average="macro"))
        metrics["recall"].append(mx.recall_score(test_target, prediction, average="macro"))
        metrics["f_measure"].append(mx.f1_score(test_target, prediction, average="macro"))
    else:
        # If there are no test data than there is cross-validation used for model evaluation.
        cv = cross_validation.KFold(len(train), n_folds=5, shuffle=False, random_state=None)
        
        if fs_task == "RFE":
            # Pipeline with fitted model and feature selection filter or only fitted model.
            cfr = featureselection.get_fs_model(cfr, fs_task+"CV", train, target, cv)
            
            original_features = features[:]
            selected_features = featureselection.get_selected_features(cfr,original_features)
            removed_features = [i for i in features if not i in selected_features]
            results["removed_features"].append(removed_features)
            results["selected_features"].append(selected_features)
            for traincv, testcv in cv:
                test = train[testcv]
                test_target = target[testcv]
                prediction = cfr.predict(test)
                results["predictions"].append(prediction)
                metrics["accuracy"].append(mx.accuracy_score(test_target, prediction))
                metrics["precision"].append(mx.precision_score(test_target, prediction))
                metrics["recall"].append(mx.recall_score(test_target, prediction))
                metrics["f_measure"].append(mx.f1_score(test_target, prediction))
                metrics["f_beta"].append(mx.fbeta_score(test_target, prediction, 0.5))        
        else:
            for traincv, testcv in cv:
                # Repaired bug from http://stackoverflow.com/questions/19265097/why-does-cross-validation-for-randomforestregressor-fail-in-scikit-learn
                if fs_task:
                    cfr = featureselection.get_fs_model(cfr, fs_task, train[traincv], target[traincv])
                    original_features = features[:]
                    if fs_task == "fromModel":
                        selected_features = featureselection.get_selected_features(cfr,original_features)
                    else:
                        selected_features = featureselection.get_selected_features(cfr.named_steps["feature_selection"],original_features)
                    removed_features = [i for i in features if not i in selected_features]
                    results["removed_features"].append(removed_features)
                    results["selected_features"].append(selected_features)
                else:
                    cfr.fit(train[traincv], target[traincv])
                
                test = train[testcv]
                test_target = target[testcv]
                prediction = cfr.predict(test)
                
                results["predictions"].append(prediction)
                metrics["accuracy"].append(mx.accuracy_score(test_target, prediction))
                metrics["precision"].append(mx.precision_score(test_target, prediction))
                metrics["recall"].append(mx.recall_score(test_target, prediction))
                metrics["f_measure"].append(mx.f1_score(test_target, prediction))
                metrics["f_beta"].append(mx.fbeta_score(test_target, prediction, 0.5))
    results["score"] = cfr.score(test, test_target)
    results["model"] = cfr
    results["metrics"] = metrics
    return results
예제 #45
0
def sample_run(df, anoms_ref, window_size=500, com=12):
    """
    This functions expects a dataframe df as mandatory argument.  
    The first column of the df should contain timestamps, the second machine IDs
    
    Keyword arguments:
    df: a pandas data frame with two columns: 1. timestamp, 2. value
    anoms_ref: reference anomaly detection results 
    window_size: the size of the window of data points that are used for anomaly detection
    com: decay in terms of center of mass (this approximates averageing over about twice as many hours)
    """

    n_epochs = 10
    p_anoms = .5

    def detect_ts_online(df_smooth, window_size, stop):
        is_anomaly = False
        run_time = 9999
        start_index = max(0, stop - window_size)
        df_win = df_smooth.iloc[start_index:stop, :]
        start_time = time.time()
        results = detect_ts(df_win,
                            alpha=0.05,
                            max_anoms=0.02,
                            only_last=None,
                            longterm=False,
                            e_value=False,
                            direction='both')
        run_time = time.time() - start_time
        if results['anoms'].shape[0] > 0:
            timestamp = df_win['timestamp'].tail(1).values[0]
            if timestamp == results['anoms'].tail(1)['timestamp'].values[0]:
                is_anomaly = True
        return is_anomaly, run_time

    def running_avg(ts, com=6):
        rm_o = np.zeros_like(ts)
        rm_o[0] = ts[0]

        for r in range(1, len(ts)):
            curr_com = float(min(com, r))
            rm_o[r] = rm_o[r - 1] + (ts[r] - rm_o[r - 1]) / (curr_com + 1)

        return rm_o

    # create arrays that will hold the results of batch AD (y_true) and online AD (y_pred)
    y_true = []
    y_pred = []
    run_times = []

    # check which unique machines, sensors, and timestamps we have in the dataset
    machineIDs = df['machineID'].unique()
    sensors = df.columns[2:]
    timestamps = df['datetime'].unique()[window_size:]

    # sample n_machines_test random machines and sensors
    random_machines = np.random.choice(machineIDs, n_epochs)
    random_sensors = np.random.choice(sensors, n_epochs)

    # we intialize an array with that will later hold a sample of timetamps
    random_timestamps = np.random.choice(timestamps, n_epochs)

    for i in range(0, n_epochs):
        # take a slice of the dataframe that only contains the measures of one random machine
        df_s = df[df['machineID'] == random_machines[i]]

        # smooth the values of one random sensor, using our running_avg function
        smooth_values = running_avg(df_s[random_sensors[i]].values, com)

        # create a data frame with two columns: timestamp, and smoothed values
        df_smooth = pd.DataFrame(data={
            'timestamp': df_s['datetime'].values,
            'value': smooth_values
        })

        # load the results of batch AD for this machine and sensor
        anoms_s = anoms_ref[((anoms_ref['machineID'] == random_machines[i]) &
                             (anoms_ref['errorID'] == random_sensors[i]))]

        # find the location of the t'th random timestamp in the data frame
        if np.random.random() < p_anoms:
            anoms_timestamps = anoms_s['datetime'].values
            np.random.shuffle(anoms_timestamps)
            counter = 0
            while anoms_timestamps[0] < timestamps[0]:
                if counter > 100:
                    return 0.0, 9999.0
                np.random.shuffle(anoms_timestamps)
                counter += 1
            random_timestamps[i] = anoms_timestamps[0]

        # select the test case
        test_case = df_smooth[df_smooth['timestamp'] == random_timestamps[i]]
        test_case_index = test_case.index.values[0]

        # check whether the batch AD found an anomaly at that time stamps and copy into y_true at idx
        y_true_i = random_timestamps[i] in anoms_s['datetime'].values

        # perform online AD, and write result to y_pred
        y_pred_i, run_times_i = detect_ts_online(df_smooth, window_size,
                                                 test_case_index)

        y_true.append(y_true_i)
        y_pred.append(y_pred_i)
        run_times.append(run_times_i)

    return fbeta_score(y_true, y_pred, beta=2), np.mean(run_times)
        #        print(means_and_stds)
        end_ind = int(
            np.argwhere(
                np.isnan(stats_all['preds']['test'][min_ind, 0,
                                                    train_setSize, :]))[0]) - 1
        predictions = stats_all['preds']['test'][min_ind, cv_fold,
                                                 train_setSize, 0:end_ind]
        targets = stats_all['targets']['test'][min_ind, cv_fold, train_setSize,
                                               0:end_ind]
        probs = stats_all['probs']['test'][min_ind, cv_fold, train_setSize,
                                           0:end_ind]

        fpr, tpr, thresholds = metrics.roc_curve(targets, probs, pos_label=1)
        auc_score = metrics.auc(fpr, tpr)
        auc_scores[cv_fold, train_setSize] = auc_score
        f2beta_score = metrics.fbeta_score(targets, predictions, beta=2)

        if (train_setSize == (num_trainSetSizes - 1)):
            for epoch_num in range(0, num_epochs, 2):
                auc_scores_curve_train[cv_fold,
                                       epoch_num] = stats_all['auc']['train'][
                                           epoch_num, cv_fold, train_setSize]
                auc_scores_curve_val[cv_fold,
                                     epoch_num] = stats_all['auc']['val'][
                                         epoch_num, cv_fold, train_setSize]
                auc_scores_curve_test[cv_fold,
                                      epoch_num] = stats_all['auc']['test'][
                                          epoch_num, cv_fold, train_setSize]

                losses_curve_train[cv_fold,
                                   epoch_num] = stats_all['losses']['train'][
예제 #47
0
def fbeta(_, predictions_binary, labels, parameters):
    return metrics.fbeta_score(labels, predictions_binary, **parameters)
예제 #48
0
    def _test(num_classes, threshold, multilabel, average):

        fbeta = FBeta(
            beta=2.0,
            num_classes=num_classes,
            threshold=threshold,
            multilabel=multilabel,
            average=average,
        )

        f1 = F1Score(
            num_classes=num_classes,
            threshold=threshold,
            multilabel=multilabel,
            average=average,
        )

        outputs = torch.randn(100, 4)
        targets = torch.randint(0, 4, size=(100, ))

        bs = _BaseInputHandler(
            num_classes=num_classes,
            average=average,
            threshold=0.5,
            multilabel=multilabel,
        )

        np_outputs, np_targets = bs._compute(outputs=outputs, targets=targets)

        fbeta.accumulate(outputs=outputs, targets=targets)
        f1.accumulate(outputs=outputs, targets=targets)

        fbeta_val = fbeta.value
        f1_val = f1.value

        assert fbeta.case_type == "multiclass"
        assert f1.case_type == "multiclass"

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=UndefinedMetricWarning)
            fbeta_skm = fbeta_score(np_targets.numpy(),
                                    np_outputs.numpy(),
                                    average=average,
                                    beta=2.0)

            f1_skm = f1_score(np_targets.numpy(),
                              np_outputs.numpy(),
                              average=average)

        assert fbeta_skm == pytest.approx(fbeta_val.item())
        assert f1_skm == pytest.approx(f1_val.item())

        bs = 16
        iters = targets.shape[0] // bs + 1

        fbeta.reset()
        f1.reset()
        for i in range(iters):
            idx = i * bs

            fbeta.accumulate(outputs=outputs[idx:idx + bs],
                             targets=targets[idx:idx + bs])

            f1.accumulate(
                outputs=outputs[idx:idx + bs],
                targets=targets[idx:idx + bs],
            )

        f1_m = f1.value
        fbeta_m = fbeta.value

        assert f1_skm == pytest.approx(f1_m.item())

        assert fbeta_skm == pytest.approx(fbeta_m.item())
    print 'Precision:\t', precision

    # The recall is the ratio 'tp / (tp + fn)' where 'tp' is the number of
    # true positives and 'fn' the number of false negatives. The recall is
    # intuitively the ability of the classifier to find all the positive samples.
    # The best value is 1 and the worst value is 0.
    recall = recall_score(y_true, y_hat)
    print 'Recall:  \t', recall

    # F1 score, also known as balanced F-score or F-measure
    # The F1 score can be interpreted as a weighted average of the precision and
    # recall, where an F1 score reaches its best value at 1 and worst score at 0.
    # The relative contribution of precision and recall to the F1 score are
    # equal. The formula for the F1 score is:
    #     F1 = 2 * (precision * recall) / (precision + recall)
    print 'f1 score: \t', f1_score(y_true, y_hat)
    #print 2 * (precision * recall) / (precision + recall)

    # The F-beta score is the weighted harmonic mean of precision and recall,
    # reaching its optimal value at 1 and its worst value at 0.
    # The 'beta' parameter determines the weight of precision in the combined
    # score. 'beta < 1' lends more weight to precision, while 'beta > 1'
    # favors recall ('beta -> 0' considers only precision, 'beta -> inf' only recall).
    print 'F-beta:'
    for beta in np.logspace(-3, 3, num=7, base=10):
        fbeta = fbeta_score(y_true, y_hat, beta=beta)
        print '\tbeta=%9.3f\tF-beta=%.5f' % (beta, fbeta)
        #print (1+beta**2)*precision*recall / (beta**2 * precision + recall)

    print precision_recall_fscore_support(y_true, y_hat, beta=1)
예제 #50
0
    def _test(num_classes, threshold, multilabel, average):

        fbeta = FBeta(
            beta=2.0,
            num_classes=num_classes,
            threshold=threshold,
            multilabel=multilabel,
            average=average,
        )

        f1 = F1Score(
            num_classes=num_classes,
            threshold=threshold,
            multilabel=multilabel,
            average=average,
        )

        outputs = torch.randn(100, 1)
        targets = torch.randint(0, 2, size=(100, ))

        bs = _BaseInputHandler(
            num_classes=num_classes,
            average=average,
            threshold=0.5,
            multilabel=multilabel,
        )

        np_outputs, np_targets = bs._compute(outputs=outputs, targets=targets)

        fbeta.accumulate(outputs=outputs, targets=targets)
        f1.accumulate(outputs=outputs, targets=targets)

        fbeta_val = fbeta.value
        f1_val = f1.value

        assert fbeta.case_type == "binary"
        assert f1.case_type == "binary"

        fbeta_skm = fbeta_score(np_targets.numpy(),
                                np_outputs.numpy(),
                                average="binary",
                                beta=2.0)

        f1_skm = f1_score(np_targets.numpy(),
                          np_outputs.numpy(),
                          average="binary")

        assert fbeta_skm == pytest.approx(fbeta_val.item())
        assert f1_skm == pytest.approx(f1_val.item())

        bs = 16
        iters = targets.shape[0] // bs + 1

        fbeta.reset()
        f1.reset()
        for i in range(iters):
            idx = i * bs

            fbeta.accumulate(outputs=outputs[idx:idx + bs],
                             targets=targets[idx:idx + bs])

            f1.accumulate(
                outputs=outputs[idx:idx + bs],
                targets=targets[idx:idx + bs],
            )

        f1_m = f1.value
        fbeta_m = fbeta.value

        assert f1_skm == pytest.approx(f1_m.item())

        assert fbeta_skm == pytest.approx(fbeta_m.item())
예제 #51
0
    def ki_test(self,
                ckpth,
                list_path,
                img_root,
                test_id,
                test_batch,
                ntype='',
                real_sn=False,
                test_each=False):
        """
        kinship identification test
        :return:
        """

        self.Net.load(ckpth)
        self.infer = partial(self.Net.inference, net_type=ntype)
        test_set = self.dloader(list_path,
                                img_root,
                                test_id,
                                transform=test_transform,
                                test=True,
                                test_each=test_each,
                                real_sn=real_sn)
        test_loader = DataLoader(test_set, batch_size=test_batch)
        total_pred = []
        total_label = []
        self.Net.net.eval()
        with torch.no_grad():
            for data in test_loader:
                images, labels, _, _ = data
                images, labels = images.to(self.device), labels.to(self.device)
                if ntype == 'cascade':
                    predicted = self.infer(images)
                else:
                    outputs = self.infer(images)
                    _, predicted = torch.max(outputs.data, 1)
                    predicted = predicted.cpu().data.numpy()
                labels = labels.cpu().data.numpy()
                total_pred = np.concatenate((total_pred, predicted), axis=0)
                total_label = np.concatenate((total_label, labels), axis=0)

        if real_sn:
            confu_m = confusion_matrix(total_label,
                                       total_pred,
                                       labels=[1, 2, 3, 4],
                                       normalize='true')
            f10_fd = fbeta_score(total_label,
                                 total_pred,
                                 labels=[1],
                                 beta=10,
                                 average='macro')
            f10_fs = fbeta_score(total_label,
                                 total_pred,
                                 labels=[2],
                                 beta=10,
                                 average='macro')
            f10_md = fbeta_score(total_label,
                                 total_pred,
                                 labels=[3],
                                 beta=10,
                                 average='macro')
            f10_ms = fbeta_score(total_label,
                                 total_pred,
                                 labels=[4],
                                 beta=10,
                                 average='macro')
            micro_f1 = fbeta_score(total_label,
                                   total_pred,
                                   beta=10,
                                   average='macro')
            acc = sum(total_label == total_pred) / len(total_label)
            return confu_m, f10_fd, f10_fs, f10_md, f10_ms, micro_f1, acc
        else:
            if test_each:
                confu_m = confusion_matrix(total_label,
                                           total_pred,
                                           labels=[1, 2, 3, 4],
                                           normalize='true')
                micro_f1 = f1_score(total_label, total_pred)
                acc = sum(total_label == total_pred) / len(total_label)
                return confu_m, micro_f1, acc
            else:
                confu_m = confusion_matrix(total_label,
                                           total_pred,
                                           labels=[1, 2, 3, 4],
                                           normalize='true')
                micro_f1 = f1_score(total_label, total_pred, average='macro')
                acc = sum(total_label == total_pred) / len(total_label)
                return confu_m, micro_f1, acc
예제 #52
0
#calculate the f0.5 measure
from sklearn.metrics import fbeta_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

#perfect precision, 50% recall
y_true = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
y_pred = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
p = precision_score(y_true, y_pred)
r = recall_score(y_true, y_pred)
f = fbeta_score(y_true, y_pred, beta=0.5)
print('Result: p=%.3f, r=%.3f, f=%.3f' % (p, r, f))
def train_nn(i):
    trainindex = train_df[train_df['CVindices'] != i].index.tolist()
    valindex = train_df[train_df['CVindices'] == i].index.tolist()
    X_val_df = train_df.iloc[valindex, :]
    X_build, X_valid = train_data_224_3[trainindex, :], train_data_224_3[
        valindex, :]
    y_build, y_valid = train_target_224_3[trainindex, :], train_target_224_3[
        valindex, :]

    print('Split train: ', len(X_build), len(y_build))
    print('Split valid: ', len(X_valid), len(y_valid))
    model = resnet152_model(ROWS, COLUMNS, CHANNELS, num_classes=17)
    #        callbacks = [
    #            EarlyStopping(monitor='val_loss', patience=3, verbose=VERBOSEFLAG),
    #        ]
    callbacks = [
        ModelCheckpoint(MODEL_WEIGHTS_FILE,
                        monitor='val_loss',
                        save_best_only=True,
                        verbose=1)
    ]
    #sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
    adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6)
    model.compile(optimizer=adam,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    #        model.compile(loss='categorical_crossentropy', optimizer="adadelta", \
    #              metrics=["accuracy"])
    model.fit_generator(train_datagen.flow(X_build,
                                           y_build,
                                           batch_size=batch_size,
                                           shuffle=True),
                        samples_per_epoch=len(X_build),
                        nb_epoch=nb_epoch,
                        callbacks=callbacks,
                        validation_data=valid_datagen.flow(
                            X_valid, y_valid, batch_size=batch_size),
                        nb_val_samples=X_valid.shape[0],
                        verbose=VERBOSEFLAG)
    model.load_weights(MODEL_WEIGHTS_FILE)

    pred_cv = model.predict_generator(valid_datagen.flow(X_valid,
                                                         batch_size=batch_size,
                                                         shuffle=False),
                                      val_samples=X_valid.shape[0])
    print(
        'F2 Score : ',
        fbeta_score(y_valid,
                    np.array(pred_cv) > 0.2,
                    beta=2,
                    average='samples'))
    pred_cv = pd.DataFrame(pred_cv)
    pred_cv.head()
    pred_cv.columns = [
        "slash_burn", "clear", "blooming", "primary", "cloudy",
        "conventional_mine", "water", "haze", "cultivation", "partly_cloudy",
        "artisinal_mine", "habitation", "bare_ground", "blow_down",
        "agriculture", "road", "selective_logging"
    ]
    pred_cv["image_name"] = X_val_df.image_name.values

    sub_valfile = inDir + '/submissions/Prav.resnet152_01.fold' + str(
        i) + '.csv'
    pred_cv = pred_cv[[
        "image_name", "slash_burn", "clear", "blooming", "primary", "cloudy",
        "conventional_mine", "water", "haze", "cultivation", "partly_cloudy",
        "artisinal_mine", "habitation", "bare_ground", "blow_down",
        "agriculture", "road", "selective_logging"
    ]]
    pred_cv.to_csv(sub_valfile, index=False)
    pred_test = model.predict_generator(test_datagen.flow(
        test_data_224_3, batch_size=batch_size, shuffle=False),
                                        val_samples=test_data_224_3.shape[0])
    pred_test = pd.DataFrame(pred_test)
    pred_test.columns = [
        "slash_burn", "clear", "blooming", "primary", "cloudy",
        "conventional_mine", "water", "haze", "cultivation", "partly_cloudy",
        "artisinal_mine", "habitation", "bare_ground", "blow_down",
        "agriculture", "road", "selective_logging"
    ]
    pred_test["image_name"] = test_all.image_name.values
    pred_test = pred_test[[
        "image_name", "slash_burn", "clear", "blooming", "primary", "cloudy",
        "conventional_mine", "water", "haze", "cultivation", "partly_cloudy",
        "artisinal_mine", "habitation", "bare_ground", "blow_down",
        "agriculture", "road", "selective_logging"
    ]]
    sub_file = inDir + '/submissions/Prav.resnet152_01.fold' + str(
        i) + '-test' + '.csv'
    pred_test.to_csv(sub_file, index=False)
예제 #54
0
    def evaluate(
        self,
        sentences: Union[List[Sentence], Dataset],
        out_path: Union[str, Path] = None,
        embedding_storage_mode: str = "none",
        mini_batch_size: int = 32,
        num_workers: int = 8,
    ) -> (Result, float):

        # read Dataset into data loader (if list of sentences passed, make Dataset first)
        if not isinstance(sentences, Dataset):
            sentences = SentenceDataset(sentences)
        data_loader = DataLoader(sentences, batch_size=mini_batch_size, num_workers=num_workers)

        # if span F1 needs to be used, use separate eval method
        if self._requires_span_F1_evaluation():
            return self._evaluate_with_span_F1(data_loader, embedding_storage_mode, mini_batch_size, out_path)

        # else, use scikit-learn to evaluate
        y_true = []
        y_pred = []
        labels = Dictionary(add_unk=False)

        eval_loss = 0
        batch_no: int = 0

        lines: List[str] = []

        for batch in data_loader:

            # predict for batch
            loss = self.predict(batch,
                                embedding_storage_mode=embedding_storage_mode,
                                mini_batch_size=mini_batch_size,
                                label_name='predicted',
                                return_loss=True)
            eval_loss += loss
            batch_no += 1

            for sentence in batch:

                for token in sentence:
                    # add gold tag
                    gold_tag = token.get_tag(self.tag_type).value
                    y_true.append(labels.add_item(gold_tag))

                    # add predicted tag
                    predicted_tag = token.get_tag('predicted').value
                    y_pred.append(labels.add_item(predicted_tag))

                    # for file output
                    lines.append(f'{token.text} {gold_tag} {predicted_tag}\n')

                lines.append('\n')

        if out_path:
            with open(Path(out_path), "w", encoding="utf-8") as outfile:
                outfile.write("".join(lines))

        eval_loss /= batch_no

        # use sklearn
        from sklearn import metrics

        # make "classification report"
        target_names = []
        for i in range(len(labels)):
            target_names.append(labels.get_item_for_index(i))
        classification_report = metrics.classification_report(y_true, y_pred, digits=4, target_names=target_names, zero_division=1)

        # get scores
        macro_f_score = round(metrics.fbeta_score(y_true, y_pred, beta=self.beta, average='micro'), 4)
        micro_f_score = round(metrics.fbeta_score(y_true, y_pred, beta=self.beta, average='macro'), 4)
        accuracy_score = round(metrics.accuracy_score(y_true, y_pred), 4)

        detailed_result = (
            "\nResults:"
            f"\n- F-score (micro) {macro_f_score}"
            f"\n- F-score (macro) {micro_f_score}"
            f"\n- Accuracy {accuracy_score}"
            '\n\nBy class:\n' + classification_report
        )

        # line for log file
        log_header = "ACCURACY"
        log_line = f"\t{accuracy_score}"

        result = Result(
            main_score=macro_f_score,
            log_line=log_line,
            log_header=log_header,
            detailed_results=detailed_result,
        )
        return result, eval_loss
예제 #55
0
            total = np.add(total, inception_full_pred)
            avg = total / 12
            p_valid = np.vstack((p_valid, avg))
            targets = np.zeros(17)
            for t in tags.split(' '):
                targets[label_map[t]] = 1
            Y_valid.append(targets)

        Y_valid = np.array(Y_valid, np.uint8)
        p_valid = np.delete(p_valid, 0, axis=0)

        print(Y_valid)
        print(p_valid)
        print(
            fbeta_score(Y_valid,
                        np.array(p_valid) > 0.2,
                        beta=2,
                        average='samples'))

    constant_threshold = find_f2score_threshold(p_valid, Y_valid, verbose=True)

    # Find different thresholds for each label
    if find_thresholds:
        out = np.array(p_valid)
        threshold = np.arange(0.1, 0.9, 0.1)
        acc = []
        accuracies = []
        best_threshold = np.zeros(out.shape[1])
        for i in range(out.shape[1]):
            y_prob = np.array(out[:, i])
            for j in threshold:
                y_pred = [1 if prob >= j else 0 for prob in y_prob]
예제 #56
0
                   random_state=30)

# In[49]:

from sklearn.metrics import make_scorer, fbeta_score, accuracy_score
from sklearn import metrics

ada_income = (ada_Boosts.predict(X_test))

print(metrics.confusion_matrix(Y_test, ada_income))
print("Accuracy Score =", metrics.accuracy_score(Y_test, ada_income))

#print ("Accuracy score on testing data:{:.4f}".format(
#accuracy_score(Y_test, ada_income)))
print("F-score on testing data:{:.4f}".format(
    fbeta_score(Y_test, ada_income, beta=0.5)))

# In[97]:

from sklearn import metrics

adult_tree = tree.DecisionTreeClassifier(criterion='entropy',
                                         max_depth=4,
                                         max_leaf_nodes=5)
adult_tree.fit(X_train, Y_train)
predictions = adult_tree.predict(X_test)
print(metrics.confusion_matrix(Y_test, predictions))
print("Accuracy Score =", metrics.accuracy_score(Y_test, predictions))

#adult_tree = tree.DecisionTreeClassifier(criterion='entropy', max_depth=4, max_leaf_nodes=5)
예제 #57
0
def test_precision_recall_f1_score_multilabel_1():
    """ Test precision_recall_f1_score on a crafted multilabel example
    """
    # First crafted example
    y_true_ll = [(0,), (1,), (2, 3)]
    y_pred_ll = [(1,), (1,), (2, 0)]
    lb = LabelBinarizer()
    lb.fit([range(4)])
    y_true_bi = lb.transform(y_true_ll)
    y_pred_bi = lb.transform(y_pred_ll)

    for y_true, y_pred in [(y_true_ll, y_pred_ll), (y_true_bi, y_pred_bi)]:

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average=None)
        #tp = [0, 1, 1, 0]
        #fn = [1, 0, 0, 1]
        #fp = [1, 1, 0, 0]
        # Check per class

        assert_array_almost_equal(p, [0.0, 0.5, 1.0, 0.0], 2)
        assert_array_almost_equal(r, [0.0, 1.0, 1.0, 0.0], 2)
        assert_array_almost_equal(f, [0.0, 1 / 1.5, 1, 0.0], 2)
        assert_array_almost_equal(s, [1, 1, 1, 1], 2)

        f2 = fbeta_score(y_true, y_pred, beta=2, average=None)
        support = s
        assert_array_almost_equal(f2, [0, 0.83, 1, 0], 2)

        # Check macro
        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="macro")
        assert_almost_equal(p, 1.5 / 4)
        assert_almost_equal(r, 0.5)
        assert_almost_equal(f, 2.5 / 1.5 * 0.25)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="macro"),
                            np.mean(f2))

        # Check micro
        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="micro")
        assert_almost_equal(p, 0.5)
        assert_almost_equal(r, 0.5)
        assert_almost_equal(f, 0.5)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="micro"),
                            (1 + 4) * p * r / (4 * p + r))

        # Check weigted
        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="weighted")
        assert_almost_equal(p, 1.5 / 4)
        assert_almost_equal(r, 0.5)
        assert_almost_equal(f, 2.5 / 1.5 * 0.25)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="weighted"),
                            np.average(f2, weights=support))
        # Check weigted
        # |h(x_i) inter y_i | = [0, 1, 1]
        # |y_i| = [1, 1, 2]
        # |h(x_i)| = [1, 1, 2]
        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="samples")
        assert_almost_equal(p, 0.5)
        assert_almost_equal(r, 0.5)
        assert_almost_equal(f, 0.5)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="samples"),
                            0.5)
예제 #58
0
def f2_score(y_true, y_pred):
    y_true, y_pred, = np.array(y_true), np.array(y_pred)
    return fbeta_score(y_true, y_pred, beta=2, average='samples')
예제 #59
0
def test_precision_recall_f1_score_with_an_empty_prediction():
    y_true_ll = [(1,), (0,), (2, 1,)]
    y_pred_ll = [tuple(), (3,), (2, 1)]

    lb = LabelBinarizer()
    lb.fit([range(4)])
    y_true_bi = lb.transform(y_true_ll)
    y_pred_bi = lb.transform(y_pred_ll)

    for y_true, y_pred in [(y_true_ll, y_pred_ll), (y_true_bi, y_pred_bi)]:
        # true_pos = [ 0.  1.  1.  0.]
        # false_pos = [ 0.  0.  0.  1.]
        # false_neg = [ 1.  1.  0.  0.]
        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average=None)
        assert_array_almost_equal(p, [0.0, 1.0, 1.0, 0.0], 2)
        assert_array_almost_equal(r, [0.0, 0.5, 1.0, 0.0], 2)
        assert_array_almost_equal(f, [0.0, 1 / 1.5, 1, 0.0], 2)
        assert_array_almost_equal(s, [1, 2, 1, 0], 2)

        f2 = fbeta_score(y_true, y_pred, beta=2, average=None)
        support = s
        assert_array_almost_equal(f2, [0, 0.55, 1, 0], 2)

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="macro")
        assert_almost_equal(p, 0.5)
        assert_almost_equal(r, 1.5 / 4)
        assert_almost_equal(f, 2.5 / (4 * 1.5))
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="macro"),
                            np.mean(f2))

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="micro")
        assert_almost_equal(p, 2 / 3)
        assert_almost_equal(r, 0.5)
        assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5))
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="micro"),
                            (1 + 4) * p * r / (4 * p + r))

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="weighted")
        assert_almost_equal(p, 3 / 4)
        assert_almost_equal(r, 0.5)
        assert_almost_equal(f, (2 / 1.5 + 1) / 4)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="weighted"),
                            np.average(f2, weights=support))

        p, r, f, s = precision_recall_fscore_support(y_true, y_pred,
                                                     average="samples")
        # |h(x_i) inter y_i | = [0, 0, 2]
        # |y_i| = [1, 1, 2]
        # |h(x_i)| = [0, 1, 2]
        assert_almost_equal(p, 1 / 3)
        assert_almost_equal(r, 1 / 3)
        assert_almost_equal(f, 1 / 3)
        assert_equal(s, None)
        assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                        average="samples"),
                            0.333, 2)
예제 #60
0
def fbeta(model, X_valid, y_valid):
    p_valid = model.predict(X_valid)
    return fbeta_score(y_valid,
                       np.array(p_valid) > 0.2,
                       beta=2,
                       average='samples')