Ejemplo n.º 1
0
def get_results(results, instance_of_datasets, classifier_name, y_true, y_pred, file_dump):
    tmp_ = {"y_pred": y_pred,
                                                      "y_true": y_true,
                                                      "accuracy": accuracy_score(y_true, y_pred),
                                                      "precision_micro": precision_score(y_true, y_pred,
                                                                                         average="micro"),
                                                      "precision_macro": precision_score(y_true, y_pred,
                                                                                         average="macro"),
                                                      "recall_micro": recall_score(y_true, y_pred, average="micro"),
                                                      "recall_macro": recall_score(y_true, y_pred, average="macro"),
                                                      "f1_micro": f1_score(y_true, y_pred, average="micro"),
                                                      "f1_macro": f1_score(y_true, y_pred, average="macro")
                                                      }

    cPickle.dump(tmp_, gzip.open("%s/single_%s_%s_%s.zcp"%(dir_results,file_dump,instance_of_datasets, classifier_name), "wb+"))
    results[instance_of_datasets][classifier_name]=tmp_
    print(classifier_name,
          "accuracy", results[instance_of_datasets][classifier_name]["accuracy"],
          "f1 score_micro", results[instance_of_datasets][classifier_name]["f1_micro"],
          "precision_micro", results[instance_of_datasets][classifier_name]["precision_micro"],
          "recall_micro", results[instance_of_datasets][classifier_name]["recall_micro"],
          "f1 score_macro", results[instance_of_datasets][classifier_name]["f1_macro"],
          "precision_macro", results[instance_of_datasets][classifier_name]["precision_macro"],
          "recall_macro", results[instance_of_datasets][classifier_name]["recall_macro"]
          )
    cPickle.dump(results, gzip.open(dir_results+"/"+file_dump, "wb+"))
    return results
Ejemplo n.º 2
0
def calc_fit(model, metric, train_x, train_y, test_x, test_y, p):
    train_x = map(lambda x: list(compress(x, p)), train_x)
    test_x = map(lambda x: list(compress(x, p)), test_x)
    clf = model.fit(train_x, train_y)
    predictions = clf.predict(test_x)
    if metric == 'precision': return precision_score(test_y, predictions, [0, 1])
    elif metric == 'recall': return recall_score(test_y, predictions, [0, 1])
    elif metric == 'accuracy': return accuracy_score(test_y, predictions, [0, 1])
    return precision_score(test_y, predictions, [0, 1]) + recall_score(test_y, predictions, [0, 1]) + accuracy_score(test_y, predictions, [0, 1])
Ejemplo n.º 3
0
def metric_overall_outlier(scores, weights, marks, title=None):
    from pyod.utils.utility import get_label_n
    from sklearn.metrics.ranking import roc_auc_score
    from sklearn.metrics.classification import precision_score, recall_score

    y_true = []
    weighted_scores = []
    for i in range(len(scores)):
        score = 0.0
        for w, s, m in zip(weights[i], scores[i], marks[i]):
            score += w * s

        # print(1 if 'n' in marks[i] else 0, score, scores[i], weights[i], marks[i])
        weighted_scores.append(score)
        y_true.append(1 if 1 in marks[i] else 0)

    pk, rk = [], []
    for k in range(1, len(y_true)):
        y_predict = get_label_n(y_true, weighted_scores, k)
        pk.append(precision_score(y_true, y_predict))
        rk.append(recall_score(y_true, y_predict))
    n = sum(y_true)
    print('overall@{}'.format(n), len(y_true), pk[n], rk[n], roc_auc_score(y_true, weighted_scores))

    if title is not None:
        fp_save = os.path.join('results', 'overall_' + title)
        # plot_curve('overall_{}_precision'.format(title), 'precision', list(range(1, len(y_true))), pk,
        #            fp_save=fp_save + '_precision.pdf')
        # plot_curve('overall_{}_recall'.format(title), 'recall', list(range(1, len(y_true))), rk,
        #            fp_save=fp_save + '_recall.pdf')
        plot_precision_recall(
            '', list(range(1, len(y_true))), pk, rk, path_save=fp_save + '.pdf'
        )
Ejemplo n.º 4
0
def metric_permission_based_outlier(scores, marks, target_labels, title=None):
    from pyod.utils.utility import get_label_n
    from sklearn.metrics.ranking import roc_auc_score
    from sklearn.metrics.classification import precision_score, recall_score

    for i in range(len(target_labels)):
        label_i = target_labels[i]

        scores_i, y_true = [], []
        for j in range(len(scores)):
            if marks[j][i] != 0:
                scores_i.append(scores[j][i])
                y_true.append(1 if marks[j][i] == 1 else 0)

        pk, rk = [], []
        for k in range(1, len(y_true)):
            y_predict = get_label_n(y_true, scores_i, k)
            pk.append(precision_score(y_true, y_predict))
            rk.append(recall_score(y_true, y_predict))

        n = sum(y_true) - 1
        if 0 <= n < len(pk):
            # print(y_true)j
            # print(scores_i)
            print('{}@{}/{}'.format(label_i, n, len(scores_i)), pk[n], rk[n], roc_auc_score(y_true, scores_i))
        else:
            print('{}@{}/{}'.format(label_i, n, len(scores_i)), 0.0, 0.0, 0.0)

        if title is not None:
            fp_save = os.path.join('results_weighted', title)
            plot_curve('{}_{}_precision'.format(title, label_i), 'precision', list(range(1, len(y_true))), pk,
                       path_save=fp_save + '_{}_precision.pdf'.format(label_i))
            plot_curve('{}_{}_recall'.format(title, label_i), 'recall', list(range(1, len(y_true))), rk,
                       path_save=fp_save + '_{}_recall.pdf'.format(label_i))
 def Predict(self, inp, labels, classifier, folds, name, paramdesc):
     X= inp
     y = labels
     X, y = X[y != 2], y[y != 2]
     n_samples, n_features = X.shape
     
     ###############################################################################
     # Classification and ROC analysis
     
     # Run classifier with cross-validation and plot ROC curves
     cv = StratifiedKFold(y, n_folds=folds)
     
     mean_tpr = 0.0
     mean_fpr = np.linspace(0, 1, 100)
     all_tpr = []
     
     _precision = 0.0
     _recall = 0.0
     _accuracy = 0.0
     _f1 = 0.0
     
     for i, (train, test) in enumerate(cv):
         probas_ = classifier.fit(X[train], y[train]).predict_proba(X[test])
         pred_ = classifier.predict(X[test])
         _precision += precision_score(y[test], pred_)
         _recall += recall_score(y[test], pred_)
         _accuracy += accuracy_score(y[test], pred_)
         _f1 += f1_score(y[test], pred_)
         # Compute ROC curve and area the curve
         fpr, tpr, thresholds = roc_curve(y[test], probas_[:, 1])
         mean_tpr += interp(mean_fpr, fpr, tpr)
         mean_tpr[0] = 0.0
         roc_auc = auc(fpr, tpr)
         plt.plot(fpr, tpr, lw=1, label='ROC fold %d (area = %0.2f)' % (i, roc_auc))
     
     _precision /= folds
     _recall /= folds
     _accuracy /= folds
     _f1 /= folds
     
     
     plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Luck')
     
     mean_tpr /= len(cv)
     mean_tpr[-1] = 1.0
     mean_auc = auc(mean_fpr, mean_tpr)
     plt.plot(mean_fpr, mean_tpr, 'k--',
              label='Mean ROC (area = %0.2f)' % mean_auc, lw=2)
     
     plt.xlim([-0.05, 1.05])
     plt.ylim([-0.05, 1.05])
     plt.xlabel('False Positive Rate')
     plt.ylabel('True Positive Rate')
     plt.title('Receiver operating characteristic - {0}'.format(name))
     plt.legend(loc="lower right")
     plt.savefig(self.configObject['outputdir'] + '/' + name + '.png')
     plt.close()
     
     result = self.OutputResult(name, paramdesc, len(inp), floor(labels.size / folds), _precision, _recall, _accuracy, _f1) 
     Announce(result)
Ejemplo n.º 6
0
def get_score(a, b_max):
    a_max = np.argmax(a, axis=-1)
    acc = accuracy_score(a_max, b_max)
    p = precision_score(a_max, b_max, average='macro')
    r = recall_score(a_max, b_max, average='macro')
    f1 = f1_score(a_max, b_max, average='macro')
    return acc, p, r, f1
Ejemplo n.º 7
0
def run():
    paras = create_dataset()

    X = np.array(get_features(paras))
    Y = np.array(get_ys(paras))

    skf = StratifiedKFold(Y, n_folds=10)

    f = open('results/correct.txt', 'w')
    f2 = open('results/wrong.txt', 'w')

    accs = []
    precs = []
    recs = []
    f1s = []

    for train_index, test_index in skf:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]

        cv = CountVectorizer()
        X_train_counts = cv.fit_transform(X_train)

        tf_transformer = TfidfTransformer(use_idf=True).fit(X_train_counts)
        X_train_tfidf = tf_transformer.transform(X_train_counts)

        clf = DummyClassifier(strategy="most_frequent").fit(
            X_train_counts, y_train)

        X_test_counts = cv.transform(X_test)
        X_test_tfidf = tf_transformer.transform(X_test_counts)

        y_pred = clf.predict(X_test_counts)

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        accs.append(acc)
        precs.append(prec)
        recs.append(rec)
        f1s.append(f1)

        print 'Acc \t %s' % acc
        print 'Prec \t %s' % prec
        print 'Recall \t %s' % rec
        print 'F1 \t %s' % f1

        for para, (y_t, y_p) in zip(X_test, zip(y_test, y_pred)):
            if y_t == y_p:
                f.write('%s\n' % para)
            else:
                f2.write('%s\n' % para)

    print 'Avg Acc \t %s \t ' % np.mean(accs)
    print 'Avg Prec \t %s' % np.mean(precs)
    print 'Avg Recall \t %s' % np.mean(recs)
    print 'Avg F1 \t %s' % np.mean(f1s)
Ejemplo n.º 8
0
 def metrics(y_true, y_predict):
     logger.info("计算分类指标...")
     F_value = f1_score(y_true, y_predict, average="weighted")
     Recall_value = recall_score(y_true, y_predict, average="weighted")
     Precision_value = precision_score(y_true,
                                       y_predict,
                                       average="weighted")
     return F_value, Recall_value, Precision_value
Ejemplo n.º 9
0
def metric_permission_based_outlier(scores, marks, target_permissions, title=None):
    """Metric and print permission based outlier scores, i.e., precision/recall and AUC value.

    :param scores:
        List, scores(i, j) of each widget(i) in each permission(j).
    :param marks:
        List, outlier marks(i, j) of each widget(i) in each permission(j).
        The value could be 0 (not related to the permission), 1 (outlier), -1 (inlier).
    :param target_permissions:
        List of string, the `j`th permission name.
    :param title:
        String, file name used to save the plot, `None` means not to save.

    :return: None
    """
    from pyod.utils.utility import get_label_n
    from sklearn.metrics.ranking import roc_auc_score
    from sklearn.metrics.classification import precision_score, recall_score

    for i in range(len(target_permissions)):
        permission_i = target_permissions[i]

        # sort scores in each permission
        scores_i, y_true = [], []
        for j in range(len(scores)):
            if marks[j][i] != 0:
                scores_i.append(scores[j][i])
                y_true.append(1 if marks[j][i] == 1 else 0)

        # no positive or negative labels
        if sum(y_true) == len(scores_i) or sum(y_true) == 0:
            print('{}({}/{}), error'.format(
                permission_i, sum(y_true), len(scores_i)
            ))
            continue

        # compute precision, recall curve and auc value
        pk, rk = [], []
        for k in range(1, len(y_true)):
            y_predict = get_label_n(y_true, scores_i, k)
            pk.append(precision_score(y_true, y_predict))
            rk.append(recall_score(y_true, y_predict))
        auc = roc_auc_score(y_true, scores_i)

        # print top-k precision, recall, and AUC value
        k = sum(y_true)
        print('{}({}/{}), p/r: {}, AUC: {}'.format(
            permission_i, k, len(scores_i), round(pk[k - 1], 4), round(auc, 4)
        ))

        # save plot
        if title is not None:
            path_save = os.path.join('{}-{}.pdf'.format(title, permission_i))
            plot_precision_recall(
                permission_i, list(range(1, len(y_true))), pk, rk, path_save
            )
    def by_class_evaluation(attack_test_y,
                            target_y,
                            p,
                            attack_test_x,
                            labels=None):
        if labels is None:
            labels = np.unique(target_y)

        precisions = [
            precision_score(attack_test_y[target_y == c], p[target_y == c]) *
            100 for c in np.unique(target_y)
        ]
        accuracies = [
            accuracy_score(attack_test_y[target_y == c], p[target_y == c]) *
            100 for c in np.unique(target_y)
        ]
        f1_scores = [
            f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100
            for c in np.unique(target_y)
        ]
        recalls = [
            recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100
            for c in np.unique(target_y)
        ]
        c_train_accs = [
            accuracy_score(
                target_y[np.logical_and(target_y == c, attack_test_y == 1)],
                np.argmax(attack_test_x[np.logical_and(target_y == c,
                                                       attack_test_y == 1)],
                          axis=1)) * 100 for c in np.unique(target_y)
        ]
        c_test_accs = [
            accuracy_score(
                target_y[np.logical_and(target_y == c, attack_test_y == 0)],
                np.argmax(attack_test_x[np.logical_and(target_y == c,
                                                       attack_test_y == 0)],
                          axis=1)) * 100 for c in np.unique(target_y)
        ]

        x = PrettyTable()
        x.float_format = '.2'
        x.add_column("Class", labels)
        x.add_column('Target Accuracy Train', np.round(c_train_accs, 2))
        x.add_column('Target Accuracy Test', np.round(c_test_accs, 2))
        x.add_column("Attack Precision", np.round(precisions, 2))
        x.add_column("Attack Accuracy", np.round(accuracies, 2))
        x.add_column("Attack Recall", np.round(recalls, 2))
        x.add_column("Attack F-1 Score", np.round(f1_scores, 2))
        x.add_column(
            "Percentage of Data",
            np.round(
                np.array([
                    len(target_y[target_y == c]) / len(target_y) * 100
                    for c in np.unique(target_y)
                ]), 2))
        print(x.get_string(title='Per Class Evaluation'))
Ejemplo n.º 11
0
    def __evaluate(self, modelFactory, x, y):
        """
        Perform the cross validation
        :param modelFactory: a factory that builds a model
        :param x: the evaluation data
        :param y: the evaluation classes
        """

        #Creating KFold
        kf = KFold(self.folds, shuffle=True, random_state=None)
        print(
            "=============================" + str(self.folds) +
            "-fold Cross-Validation training and testing ============================= \n"
        )
        i = 1
        # If the number of classes is not given, use the classes that we have
        if not self.numClasses:
            self.numClasses = len(set(y))
        # A list of results to be used to see how well the model is doing over the folds
        tableResults = []
        #Loop through the folds separation of data
        for trainIndex, testIndex in kf.split(x):
            # print(type(trainIndex))
            # Build a model adapter using a factory
            model = modelFactory.create()
            # A print to see if it is ok
            print(" ============== Fold ", i, "============")
            trainDocs, testDocs = x[trainIndex], x[testIndex]
            trainCats, testCats = y[trainIndex], y[testIndex]
            # If we want the categories to be represented as a binary array, here is were we do that
            #TODO: Categorical class error representation on valuating the classes returned by the model
            # Using the adapter to fit our model
            model.fit(trainDocs,
                      trainCats,
                      epochs=self.epochs,
                      batch_size=len(trainIndex))
            # Predicting it
            pred = model.predict(testDocs, testCats)
            print(pred)
            # Getting the scores
            accuracy = accuracy_score(testCats, pred)
            recall = recall_score(testCats, pred, average='weighted')
            precision = precision_score(testCats, pred, average='weighted')
            f1 = f1_score(testCats, pred, average='weighted')
            #Appending it to the result table
            tableResults.append({
                'result': 'result',
                'accuracy': accuracy,
                'recall': recall,
                'precision': precision,
                'f1': f1
            })
            i += 1
        self.tableResults = tableResults
Ejemplo n.º 12
0
def train_and_eval(output,
                   ngram_range=(1, 1),
                   max_features=None,
                   max_df=1.0,
                   C=1.0):
    """Train and eval newsgroup classification.

    :param ngram_range: ngram range
    :param max_features: the number of maximum features
    :param max_df: max document frequency ratio
    :param C: Inverse of regularization strength for LogisticRegression
    :return: metrics
    """
    # Loads train and test data.
    train_data = fetch_20newsgroups(subset='train')
    test_data = fetch_20newsgroups(subset='test')

    # Define the pipeline.
    pipeline = Pipeline([('tfidf', TfidfVectorizer()),
                         ('clf', LogisticRegression(multi_class='auto'))])

    # Set pipeline parameters.
    params = {
        'tfidf__ngram_range': ngram_range,
        'tfidf__max_features': max_features,
        'tfidf__max_df': max_df,
        'clf__C': C,
    }
    pipeline.set_params(**params)
    print(pipeline.get_params().keys())

    # Train the model.
    pipeline.fit(train_data.data, train_data.target)
    # Predict test data.
    start_time = time()
    predictions = pipeline.predict(test_data.data)
    inference_time = time() - start_time
    avg_inference_time = 1.0 * inference_time / len(test_data.target)
    print("Avg. inference time: {}".format(avg_inference_time))

    # Calculate the metrics.
    accuracy = accuracy_score(test_data.target, predictions)
    recall = recall_score(test_data.target, predictions, average='weighted')
    f1 = f1_score(test_data.target, predictions, average='weighted')
    metrics = {
        'accuracy': accuracy,
        'recall': recall,
        'f1': f1,
    }

    # Persistent the model.
    joblib.dump(pipeline, output)

    return metrics
Ejemplo n.º 13
0
def classifier_evaluation(ytrue, ypred):
    """function compute key performance metrics
    """
    from sklearn.metrics.classification import (accuracy_score,
                                                precision_score, recall_score)

    return {
        "accuracy_score": accuracy_score(ytrue, ypred),
        "precision_score": precision_score(ytrue, ypred),
        "recall_score": recall_score(ytrue, ypred)
    }
Ejemplo n.º 14
0
def show_metrics(model, X_enc, y_enc, show_confusion=False):
    pr = model.predict_classes(X_enc)
    yh = y_enc.argmax(2)
    fyh, fpr = decode_results(yh, pr)
    print('Accuracy:', accuracy_score(fyh, fpr))
    print('F1:', f1_score(fyh, fpr, average='weighted'))
    print('Precision (per class: %s)' % labels)
    print(precision_score(fyh, fpr, average=None))
    print('Recall (per class: %s)' % labels)
    print(recall_score(fyh, fpr, average=None))

    if show_confusion:
        print('Confusion matrix:')
        print(confusion_matrix(fyh, fpr))
Ejemplo n.º 15
0
def run_grid_search(grid_search, show_evaluation=True):
    """ Run the GridSearch algorithm and compute evaluation metrics """
    X_train, X_test, y_train, y_test = split_dataset()

    grid_search.fit(X_train, y_train)
    # for key, value in grid_search.cv_results_.items():
    #     print key, value

    predictions = grid_search.predict(X_test)

    if show_evaluation:
        logger.debug("macro_recall: %s", recall_score(y_test, predictions, average="macro"))
        logger.debug(precision_recall_fscore_support(y_test, predictions))
        logger.debug(confusion_matrix(y_test, predictions))
Ejemplo n.º 16
0
def balanced_accuracy_score(y_true, y_pred, sample_weight=None):
    """Compute the balanced pred
    The balanced pred is used in binary classification problems to deal
    with imbalanced datasets. It is defined as the arithmetic mean of sensitivity
    (true positive rate) and specificity (true negative rate), or the average_flat
    pred obtained on either class.
    The best value is 1 and the worst value is 0.
    Read more in the :ref:`User Guide <balanced_accuracy_score>`.
    Parameterspartial(power, exponent=2)
    ----------
    y_true : 1d array-like
        Ground truth (correct) target values.
    y_pred : 1d array-like
        Estimated targets as returned by a classifier.
    sample_weight : array-like of shape = [n_samples], optional
        Sample weights.
    Returns
    -------
    balanced_accuracy : float.
        The average_flat of sensitivity and specificity
    See also
    --------
    recall_score
    References
    ----------
    .. [1] Brodersen, K.H.; Ong, alpha.S.; Stephan, K.E.; Buhmann, J.M. (2010).
           The balanced pred and its posterior distribution.
           Proceedings of the 20th International Conference on Pattern Recognition,
           3121–24.
    Examples
    --------
    >>> from decog.metrics import balanced_accuracy_score
    >>> y_true = [0, 1, 0, 0, 1, 0]
    >>> y_pred = [0, 1, 0, 0, 0, 1]
    >>> balanced_accuracy_score(y_true, y_pred)
    0.625
    """
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)

    if y_type != 'binary':
        raise ValueError('Balanced pred is only meaningful '
                         'for binary classification problems.')
    # simply wrap the ``recall_score`` function
    return recall_score(y_true,
                        y_pred,
                        pos_label=None,
                        average='macro',
                        sample_weight=sample_weight)
Ejemplo n.º 17
0
def train_and_evaluate_model(model, X_train, Y_train, X_test, Y_test):
    train_start = datetime.now()
    model.fit(X_train, Y_train)
    train_duration_sec = (datetime.now() - train_start).seconds

    test_start = datetime.now()
    Y_pred = model.predict(X_test)
    test_duration_sec = (datetime.now() - test_start).seconds

    accuracy = accuracy_score(Y_test, Y_pred)
    precision = precision_score(Y_test, Y_pred, average="weighted")
    recall = recall_score(Y_test, Y_pred, average="weighted")
    return dict(accuracy=float(accuracy),
                precision=float(precision),
                recall=float(recall),
                train_duration_sec=train_duration_sec,
                test_duration_sec=test_duration_sec)
Ejemplo n.º 18
0
def myaccuracy(raw_file, result_file):
    df = pd.read_csv(result_file,
                     sep='\t',
                     header=None,
                     names=['pred_0', 'pred_1'])
    test_df = pd.read_csv(raw_file,
                          sep='\t',
                          header=None,
                          names=['idx', 'question', 'relation', 'label'])

    df["pred"] = df.apply(lambda row: func(row["pred_1"], row["pred_0"]),
                          axis=1)
    f1 = f1_score(y_true=test_df.label, y_pred=df.pred)
    acc = accuracy_score(y_true=test_df.label, y_pred=df.pred)
    p = precision_score(y_true=test_df.label, y_pred=df.pred)
    r = recall_score(y_true=test_df.label, y_pred=df.pred)
    # print("accuracy: ", acc)
    # print("precision: ", p)
    # print("recall: ", r)
    # print("f1: ", f1)

    # df['idx'] = test_df.idx.map(lambda x: x.split('-')[0])
    df["idx"] = test_df.idx
    df["group_sort"] = df["pred_1"].groupby(df["idx"]).rank(ascending=0,
                                                            method="dense")
    df["candidate"] = test_df.relation

    # test_df['idx'] = test_df.idx.map(lambda x: x.split('-')[0])

    df.drop_duplicates(subset=['idx', 'group_sort'],
                       keep='first',
                       inplace=True)
    true_relation = test_df.loc[test_df["label"] == 1]
    pred_relation = df.loc[(df["group_sort"] == 1.0)]

    # print(pred_relation.tail())
    # print(true_relation.tail())
    new_df = pd.merge(true_relation, pred_relation, how="inner")
    new_df["correct"] = new_df.apply(
        lambda row: row["relation"] == row["candidate"], axis=1)
    c = new_df.loc[new_df["correct"] == True]
    correct = c.idx.count()
    total = new_df.idx.count()
    print("my_accuracy: {}, {}/{}".format(correct / total, correct, total))
Ejemplo n.º 19
0
def scores(y_test, predictions, pp, clf):
    print()
    if pp == 'Y':
        print('Scores After Preprocessing :')
    else:
        print('Scores Before Preprocessing :')
    print('Classifier = {clf}'.format(clf=clf))
    print('Accuracy score = {accuracy}'.format(
        accuracy=accuracy_score(y_test, predictions)))
    print('Precision score = {precision}'.format(
        precision=precision_score(y_test, predictions)))
    print('Recall score = {recall}'.format(
        recall=recall_score(y_test, predictions)))
    print('F1 Score = {f1score}'.format(f1score=f1_score(y_test, predictions)))
    print('ROC AUC = {roc_auc}'.format(
        roc_auc=roc_auc_score(y_test, predictions)))
    print(confusion_matrix(y_test, predictions))
    print(classification_report(y_test, predictions))
    print()
Ejemplo n.º 20
0
def metric_overall_outlier(scores, marks, title=None):
    """Metric global outlier results, i.e., precision/recall and AUC value.

    :param scores:
        List, summed scores of each widget(i).
    :param marks:
        List, outlier marks(i, j) of each widget(i) in each permission(j).
        The value could be 0 (not related to the permission), 1 (outlier), -1 (inlier).
        If there is one outlier in the related permission, then the widget is outlier.
    :param title:
        String, file name used to save the plot, `None` means not to save.

    :return: None
    """
    from pyod.utils.utility import get_label_n
    from sklearn.metrics.ranking import roc_auc_score
    from sklearn.metrics.classification import precision_score, recall_score

    # get global outlier mark
    y_true = [1 if 1 in marks[i] else 0 for i in range(len(scores))]

    # compute precision, recall curve and auc value
    pk, rk = [], []
    for k in range(1, len(y_true)):
        y_predict = get_label_n(y_true, scores, k)
        pk.append(precision_score(y_true, y_predict))
        rk.append(recall_score(y_true, y_predict))
    auc = roc_auc_score(y_true, scores)

    # print top-k precision, recall, and AUC value
    k = sum(y_true)
    print('overall({}/{}), p/r: {}, AUC: {}'.format(
        k, len(y_true), round(pk[k - 1], 4), round(auc, 4)
    ))

    # save plot
    if title is not None:
        path_save = os.path.join('{}.pdf'.format(title))
        plot_precision_recall(
            'Overall', list(range(1, len(y_true))), pk, rk, path_save
        )
Ejemplo n.º 21
0
def get_classification_metrics(ground_truth_labels, predicted_labels):
    classification_metric_dict = dict({})
    classification_metric_dict['accuracy'] = accuracy_score(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['precision'] = precision_score(
        ground_truth_labels, predicted_labels, average='weighted')
    classification_metric_dict['recall'] = recall_score(ground_truth_labels,
                                                        predicted_labels,
                                                        average='weighted')
    classification_metric_dict['f1_score'] = f1_score(ground_truth_labels,
                                                      predicted_labels,
                                                      average='weighted')
    classification_metric_dict['brier_score_loss'] = brier_score_loss(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['matthews_corr_coef'] = matthews_corrcoef(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['jaccard_score'] = jaccard_score(
        ground_truth_labels, predicted_labels, average='weighted')
    classification_metric_dict['cohen_kappa_score'] = cohen_kappa_score(
        ground_truth_labels, predicted_labels)

    return classification_metric_dict
def run():
    paras, sents = create_dataset()
    
    X = np.array(get_features(paras))
    Y = np.array(get_ys(paras))
    
    
    print len(X[0])
    
    sents = np.array(sents)
    
    skf = StratifiedKFold(Y, n_folds=10)
    
    f = open('results/correct.txt','w')
    f2 = open('results/wrong.txt','w')
    
    accs = []
    precs = []
    recs = []
    f1s = []
    
    for train_index, test_index in skf:
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = Y[train_index], Y[test_index]
    
        sent_train = sents[train_index]
        sent_test = sents[test_index]
        
#         cv = CountVectorizer(stop_words="english", ngram_range=(1,1), min_df = 5)
#         sent_train_counts = cv.fit_transform(sent_train)
#         
#         tf_transformer = TfidfTransformer(use_idf=True).fit(sent_train_counts)
#         sent_train_counts = tf_transformer.transform(sent_train_counts)
#         
#         sent_train_counts = sent_train_counts.toarray()
#         
#         print sent_train_counts.shape
#         print X_train.shape
# 
#         new_train = []
#         for i,j in zip(X_train, sent_train_counts):
#             new_train.append(np.append(i,j))
        
        #fs = SelectKBest(chi2, k=24)
        #X_train = fs.fit_transform(X_train, y_train)
        
        clf = LogisticRegression()
        
        clf.fit(X_train, y_train)
        
        print clf.coef_
        
#         
#         sent_test_counts = cv.transform(sent_test)
#         sent_test_counts = tf_transformer.transform(sent_test_counts)
#         
#         sent_test_counts = sent_test_counts.toarray()
#         
#         new_test = []
#         for i,j in zip(X_test, sent_test_counts):
#             new_test.append(np.append(i,j))
        
        #X_test = fs.transform(X_test)
        
        y_pred = clf.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        
        accs.append(acc)
        precs.append(prec)
        recs.append(rec)
        f1s.append(f1)
        
        print 'Acc \t %s' % acc
        print 'Prec \t %s' % prec
        print 'Recall \t %s' % rec
        print 'F1 \t %s' % f1
        
        for (index,test),(y_t, y_p) in zip(zip(test_index, X_test), zip(y_test, y_pred)):
            if y_t == y_p:
#                 if paras[index]['prev_para']:
#                     f.write('%s\n' % paras[index]['prev_para']['sents'])
                f.write('%s\n' % sents[index])
                f.write('%s\n' % (y_t))
            else:
#                 if paras[index]['prev_para']:
#                     f2.write('%s\n' % paras[index]['prev_para']['sents'])
                f2.write('%s\n' % sents[index])
                f2.write('%s\n' % (y_t))
        
    print 'Avg Acc \t %s \t ' % np.mean(accs)
    print 'Avg Prec \t %s' % np.mean(precs)
    print 'Avg Recall \t %s' % np.mean(recs)
    print 'Avg F1 \t %s' % np.mean(f1s)
Ejemplo n.º 23
0
                  y[train_idx],
                  batch_size=cm.bs,
                  epochs=cm.n_ep,
                  verbose=0,
                  callbacks=[cm.custom_stopping(value=cm.loss, verbose=1)],
                  validation_data=(X_train, y[train_idx]))

        y_pred = model.predict(X_test)
        y_pred = np.argmax(y_pred, axis=1)

        y_true = np.argmax(y[test_idx], axis=1)

        acc_fold = accuracy_score(y_true, y_pred)
        avg_acc.append(acc_fold)

        recall_fold = recall_score(y_true, y_pred, average='macro')
        avg_recall.append(recall_fold)

        f1_fold = f1_score(y_true, y_pred, average='macro')
        avg_f1.append(f1_fold)

        print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}] at fold[{}]'.format(
            acc_fold, recall_fold, f1_fold, i))
        print('______________________________________________________')
        del model

    ic_acc = st.t.interval(0.9,
                           len(avg_acc) - 1,
                           loc=np.mean(avg_acc),
                           scale=st.sem(avg_acc))
    ic_recall = st.t.interval(0.9,
Ejemplo n.º 24
0
    test_documents, "section", class_map, len(ipc_sections))

print(
    "=============================== Predicting test data ==============================="
)
# Predicting the class for each word vector in the database
real = []
pred = []
for doc, ipc in test_embedding_generator:
    result = model.predict_one(doc)
    pred.append(class_map[result])  #adding the result to the predicted vector
    real.append(class_map[numpy.argmax(
        ipc)])  #Adding the real value to de real class vector

#Calculating the metric F1, Precision, Accuracy and Recall
accuracy = accuracy_score(real, pred)
recall = recall_score(real, pred, average='weighted')
precision = precision_score(real, pred, average='weighted')
f1 = f1_score(real, pred, average='weighted')
print("Accuracy " + str(accuracy), "Recall " + str(recall),
      "Precision " + str(precision), "F1 " + str(f1))
result_string += "Accuracy " + str(accuracy) + " Recall " + str(
    recall) + " Precision " + str(precision) + " F1 " + str(f1) + "\n"
f = open(result_file_name, "w")
f.write("Database: " + training_documents_collection)
f.write("embedding matrix: " + str(maxWords) + " " + str(embeddingSize))
f.write("epochs: " + str(epochs))
f.write("layers : " + str(layers))
f.write(result_string)
f.close()
Ejemplo n.º 25
0
    y_train, y_test = classes[train_index], classes[test_index]

    # treino do modelo
    print(f'Gerando o Modelo {i}...')
    classifier = RandomForestClassifier(n_estimators=10,
                                        criterion='gini',
                                        random_state=iteracao).fit(
                                            x_train, y_train)

    # classificando o conjunto de teste
    y_pred = classifier.predict(x_test)

    # metricas de desempenho
    aux_accuracy += accuracy_score(y_test, y_pred)
    aux_f1_score += f1_score(y_test, y_pred)
    aux_precision += precision_score(y_test, y_pred)
    aux_recall += recall_score(y_test, y_pred)
    conf_matrices += np.asarray(confusion_matrix(y_test, y_pred))

    print(f'Modelo {i} finalizado e avaliado.')
    i += 1

# resultados
print(f'\nITERATION #{iteracao} -----------------------')
print(f'Accuracy = {aux_accuracy / k_fold.n_splits}')
print(f'F1 Score = {aux_f1_score / k_fold.n_splits}')
print(f'Precision = {aux_precision / k_fold.n_splits}')
print(f'Recall = {aux_recall / k_fold.n_splits}')
print(f'Examples x Attributes = {tf_idf.shape}')
print(f'Confusion Matrix = \n{np.array(list(conf_matrices))}')
        kerasAdapter.fit(dataTrainGenerator,
                         epochs=epochs,
                         batch_size=len(dataTrainGenerator),
                         validationDataGenerator=dataTestGenerator,
                         validationSteps=len(dataTestGenerator),
                         callbacks=[modelCheckpoint, configSaver])
        result = kerasAdapter.predict(dataTestGenerator,
                                      batch_size=parameters['batchSize'])
        testClasses = classes[testIndex]
        metrics = dict()
        metrics['fscore'] = f1_score(testClasses, result, average='weighted')
        metrics['precision'] = precision_score(testClasses,
                                               result,
                                               average='weighted')
        metrics['recall'] = recall_score(testClasses,
                                         result,
                                         average='weighted')
        metrics['auc'] = roc_auc_score(testClasses, result, average='weighted')

        metrics['fscore_b'] = f1_score(testClasses, result)
        metrics['precision_b'] = precision_score(testClasses, result)
        metrics['recall_b'] = recall_score(testClasses, result)
        metrics['auc_b'] = roc_auc_score(testClasses, result)

        metrics['kappa'] = cohen_kappa_score(testClasses, result)
        metrics['accuracy'] = accuracy_score(testClasses, result)
        tn, fp, fn, metrics['tp_rate'] = confusion_matrix(testClasses,
                                                          result).ravel()
        print(classification_report(testClasses, result))
        metrics["fold"] = i
        if dictWriter is None:
    sklearn_y_pred = sklearn_mnb.predict(X_test)

    assert (my_y_pred == sklearn_y_pred).all()

    ###### my defined fasttext
    train_data, test_data = train_test_split(processed_data[['label', 'item']],
                                             test_size=0.1,
                                             random_state=2020)

    fasttext = FastText(class_num=3, class_type='multi-class', ngram_range=2)
    fasttext.fit(train_data['item'], train_data['label'], epochs=5)
    y_pred = fasttext.predict(test_data['item'])
    y_true = fasttext.y_encoder.transform(test_data['label'])
    macro_f1 = f1_score(y_true, y_pred, average='macro')
    macro_precision = precision_score(y_true, y_pred, average='macro')
    macro_recall = recall_score(y_true, y_pred, average='macro')

    ##### textCNN
    ## multi-class test
    train_data, test_data = train_test_split(
        processed_data[['subject', 'processed_item']],
        test_size=0.1,
        random_state=2020)

    text_cnn = TextCNN(class_num=4, class_type='multi-class')
    text_cnn.fit(train_data['processed_item'],
                 train_data['subject'],
                 validation_data=(test_data['processed_item'],
                                  test_data['subject']),
                 epochs=2)
    y_true = text_cnn.y_encoder.transform(test_data['subject'])
Ejemplo n.º 28
0
    img_dir = 'C:/Users/Administrator/Desktop/Normal'
    for _ in os.listdir(img_dir):
        res = blur_detector.judge_blur_or_not(os.path.join(img_dir, _))
        print(res)
        if res['desc'] == 'Not Blurry':
            preds.append(0)
        else:
            preds.append(1)

        gts.append(0)

    img_dir = 'C:/Users/Administrator/Desktop/Blur'
    for _ in os.listdir(img_dir):
        res = blur_detector.judge_blur_or_not(os.path.join(img_dir, _))
        print(res)
        if res['desc'] == 'Not Blurry':
            preds.append(0)
        else:
            preds.append(1)

        gts.append(1)

    tok = time.time()
    print('FPS={}'.format(len(os.listdir(img_dir)) / (tok - tik)))

    print(confusion_matrix(gts, preds))
    print('Precision = %f' % precision_score(gts, preds))
    print('Recall = %f' % recall_score(gts, preds))
    print('Accuracy = %f' % accuracy_score(gts, preds))
Ejemplo n.º 29
0
        test_idx = folds[i][1]

        X_train = X[train_idx]
        X_test = X[test_idx]

        X_train = feature_extraction(X_train)
        X_test = feature_extraction(X_test)

        clf = train_boosting(X_train, y[train_idx])

        tmp = clf.predict(X_test)

        acc_fold = accuracy_score(y[test_idx], tmp)
        avg_acc.append(acc_fold)

        recall_fold = recall_score(y[test_idx], tmp, average='macro')
        avg_recall.append(recall_fold)

        f1_fold = f1_score(y[test_idx], tmp, average='macro')
        avg_f1.append(f1_fold)

        print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}] at fold[{}]'.format(
            acc_fold, recall_fold, f1_fold, i))
        print('______________________________________________________')

    ic_acc = st.t.interval(0.9,
                           len(avg_acc) - 1,
                           loc=np.mean(avg_acc),
                           scale=st.sem(avg_acc))
    ic_recall = st.t.interval(0.9,
                              len(avg_recall) - 1,
Ejemplo n.º 30
0
#     result = model.predict_one(doc)
#     pred.append(class_map[result]) #adding the result to the predicted vector
#     real.append(class_map[numpy.argmax(ipc)]) #Adding the real value to de real class vector

for doc in test_documents:
    result = model.predict_one(pickle.loads(doc['embedding']))
    pred.append(class_map[result]) #adding the result to the predicted vector
    real.append(doc['ipc_classes'][0][0])
    all_class.append(doc['ipc_classes'])

print(pred)
print(real)

#Calculating the metric F1, Precision, Accuracy and Recall
accuracy = accuracy_score(real, pred)
recall = recall_score(real, pred, average='weighted')
recall_per_class = recall_score(real, pred, average=None)
precision = precision_score(real, pred, average='weighted')
precision_per_class = precision_score(real, pred, average=None)
f1 = f1_score(real, pred, average='weighted')
f1_per_class = f1_score(real, pred, average=None)
results_per_class = dict()
for i in range(0, len(recall_per_class)):
    if not class_map[i] in results_per_class.keys():
        results_per_class[class_map[i]] = []
    results_per_class[class_map[i]].append(recall_per_class[i])
    results_per_class[class_map[i]].append(precision_per_class[i])
    results_per_class[class_map[i]].append(f1_per_class[i])


matrix = confusion_matrix(real, pred, labels=ipc_sections.sort())
Ejemplo n.º 31
0
    for i in train:
        y_train.append(features[i][6])
        tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]]
        x_train.append(tmp)
        
    y_test = []
    x_test = []  
    for i in test:
        y_test.append(features[i][6])
        tmp = [features[i][0], features[i][1], features[i][2], features[i][3], features[i][4], features[i][5]]
        x_test.append(tmp)
        
    lr.fit(x_train, y_train)
    lrPredTest = lr.predict(x_test)
    lrPrecisionTest = precision_score(y_test, lrPredTest)
    lrRecallTest = recall_score(y_test, lrPredTest)
    lrF1Test = f1_score(y_test, lrPredTest)
    lrAvgPrecision += lrPrecisionTest
    lrAvgRecall += lrRecallTest
    lrAvgF1 += lrF1Test

print "log reg completed in ", time.time() - start, " s"
print "lr:\n Precision {}\n Recall {}\n F1 {}\n".format(lrAvgPrecision / 5, lrAvgRecall / 5, lrAvgF1 / 5)
  

start = time.time()
"""RANDOM FOREST"""
rf = RandomForestClassifier(n_estimators=100, min_samples_leaf=5)

rfAvgPrecision = 0.0
rfAvgRecall = 0.0
Ejemplo n.º 32
0
            # Evaluate model and predict data on TEST 
            print("******Evaluating TEST set*********")
            rnn_model.load_weights(model_filename)

            y_test_predict = rnn_model.predict(X_test_, batch_size = BATCH_SIZE)
            y_test_predict = np.array(y_test_predict)
            y_test_predict = np.argmax(y_test_predict, axis=1)

            all_trainable_count = int(np.sum([K.count_params(p) for p in set(rnn_model.trainable_weights)]))

            MAE = metrics.mean_absolute_error(y_test, y_test_predict, sample_weight=None, multioutput='uniform_average')

            acc_fold = accuracy_score(y_test, y_test_predict)
            avg_acc.append(acc_fold)

            recall_fold = recall_score(y_test, y_test_predict, average='macro')
            avg_recall.append(recall_fold)

            f1_fold  = f1_score(y_test, y_test_predict, average='macro')
            avg_f1.append(f1_fold)

            with open(SAVE_DIR + '/results_model_with_self_attn_' + MODE + '.csv', 'a') as out_stream:
                out_stream.write(str(SEED) + ', ' + str(DATA_FILE[0:-4]) + ', ' + str(i) + ', ' + str(early_stopping_epoch) + ', ' + str(all_trainable_count) + ', ' + str(acc_fold) + ', ' + str(MAE) + ', ' + str(recall_fold) + ', ' + str(f1_fold) + '\n')


            print('Accuracy[{:.4f}] Recall[{:.4f}] F1[{:.4f}] at fold[{}]'.format(acc_fold, recall_fold, f1_fold, i))
            print('______________________________________________________')
            K.clear_session()

    ic_acc = st.t.interval(0.9, len(avg_acc) - 1, loc=np.mean(avg_acc), scale=st.sem(avg_acc))
    ic_recall = st.t.interval(0.9, len(avg_recall) - 1, loc=np.mean(avg_recall), scale=st.sem(avg_recall))
def evaluate_results(result, target_def, n_shadow):
    attack_test_y = result['attack_test_y']
    attack_test_x = result['attack_test_x']
    preds = result['preds']
    target_y = result['target_y']
    target_x = result['target_x']
    labels = result['labels']

    #    INFORMATION ABOUT THE MODEL UNDER ATTACK
    x = PrettyTable(
        ['Model Definition', 'Training Accuracy', 'Testing Accuracy'])
    x.float_format = ".2"

    target_preds = np.argmax(attack_test_x, axis=1)
    train_acc = accuracy_score(target_y[attack_test_y == 1],
                               target_preds[attack_test_y == 1])
    test_acc = accuracy_score(target_y[attack_test_y == 0],
                              target_preds[attack_test_y == 0])

    x.add_row([
        target_def.split(os.path.dirname(os.getcwd()))[-1], train_acc * 100,
        test_acc * 100
    ])

    print(x.get_string(title='Target Model'))

    #    INFORMATION ABOUT THE OVERALL ATTACK EFFECTIVENESS
    cols = ['Num Shadow', 'Accuracy', 'Precision', 'Recall', 'F-1']
    x = PrettyTable(cols)
    x.float_format = ".2"

    p = np.argmax(preds, axis=1)
    x.add_row([
        n_shadow,
        accuracy_score(attack_test_y, p) * 100,
        precision_score(attack_test_y, p) * 100,
        recall_score(attack_test_y, p) * 100,
        f1_score(attack_test_y, p) * 100
    ])
    print(x.get_string(title='Attack Aggregate'))

    # noinspection PyShadowingNames
    def by_class_evaluation(attack_test_y,
                            target_y,
                            p,
                            attack_test_x,
                            labels=None):
        if labels is None:
            labels = np.unique(target_y)

        precisions = [
            precision_score(attack_test_y[target_y == c], p[target_y == c]) *
            100 for c in np.unique(target_y)
        ]
        accuracies = [
            accuracy_score(attack_test_y[target_y == c], p[target_y == c]) *
            100 for c in np.unique(target_y)
        ]
        f1_scores = [
            f1_score(attack_test_y[target_y == c], p[target_y == c]) * 100
            for c in np.unique(target_y)
        ]
        recalls = [
            recall_score(attack_test_y[target_y == c], p[target_y == c]) * 100
            for c in np.unique(target_y)
        ]
        c_train_accs = [
            accuracy_score(
                target_y[np.logical_and(target_y == c, attack_test_y == 1)],
                np.argmax(attack_test_x[np.logical_and(target_y == c,
                                                       attack_test_y == 1)],
                          axis=1)) * 100 for c in np.unique(target_y)
        ]
        c_test_accs = [
            accuracy_score(
                target_y[np.logical_and(target_y == c, attack_test_y == 0)],
                np.argmax(attack_test_x[np.logical_and(target_y == c,
                                                       attack_test_y == 0)],
                          axis=1)) * 100 for c in np.unique(target_y)
        ]

        x = PrettyTable()
        x.float_format = '.2'
        x.add_column("Class", labels)
        x.add_column('Target Accuracy Train', np.round(c_train_accs, 2))
        x.add_column('Target Accuracy Test', np.round(c_test_accs, 2))
        x.add_column("Attack Precision", np.round(precisions, 2))
        x.add_column("Attack Accuracy", np.round(accuracies, 2))
        x.add_column("Attack Recall", np.round(recalls, 2))
        x.add_column("Attack F-1 Score", np.round(f1_scores, 2))
        x.add_column(
            "Percentage of Data",
            np.round(
                np.array([
                    len(target_y[target_y == c]) / len(target_y) * 100
                    for c in np.unique(target_y)
                ]), 2))
        print(x.get_string(title='Per Class Evaluation'))

    by_class_evaluation(attack_test_y,
                        target_y,
                        p,
                        attack_test_x,
                        labels=labels)

    return {
        'attack_test_y': attack_test_y,
        'attack_test_x': attack_test_x,
        'preds': preds,
        'target_y': target_y,
        'target_x': target_x,
        'target_def': target_def,
        'n_shadow': n_shadow,
        'labels': labels
    }
Ejemplo n.º 34
0
                yRec_list.append(data['classes'])
                xRec = np.concatenate([testRec, gyr], axis=-1)
                xRec_list.append(xRec)

        catal_classifier = Catal()
        catal_classifier.fit(X_train, y[train_idx])

        for i in range(len(missing_list)):
            miss = missing_list[i]
            y_pred = catal_classifier.predict(xRec_list[i])
            finalResult['acc'][miss].append(
                accuracy_score(yRec_list[i], y_pred))
            finalResult['f1'][miss].append(
                f1_score(yRec_list[i], y_pred, average='macro'))
            finalResult['rec'][miss].append(
                recall_score(yRec_list[i], y_pred, average='macro'))

    Result = dict()
    Result['acc'] = dict()
    Result['recall'] = dict()
    Result['f1'] = dict()

    for miss in missing_list:
        ic_acc = st.t.interval(0.9,
                               len(finalResult['acc'][miss]) - 1,
                               loc=np.mean(finalResult['acc'][miss]),
                               scale=st.sem(finalResult['acc'][miss]))
        ic_recall = st.t.interval(0.9,
                                  len(finalResult['rec'][miss]) - 1,
                                  loc=np.mean(finalResult['rec'][miss]),
                                  scale=st.sem(finalResult['rec'][miss]))