def test_value_counts():
    df = pd.DataFrame({
        'Height': [
            150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158,
            159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169,
            169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179,
            179, 179, 181, 181, 182, 183, 184, 186, 190, 190
        ],
        'Weight': [
            54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59,
            59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78,
            83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100
        ],
        'Size': [
            'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S',
            'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M',
            'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
            'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'
        ],
        'SizePred': [
            'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S',
            'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M',
            'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L',
            'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'
        ],
    })
    cm = ConfusionMatrix(df["Size"], df["SizePred"])
    assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

    assert (cm.true - df.Size.value_counts()).sum() == 0
    assert (cm.pred - df.SizePred.value_counts()).sum() == 0
    cm.print_stats()
Beispiel #2
0
    def save_confusion_matrix(self, truth_res, pred_res):

        #truth_res = [self.label_map[i+1] for i in truth_res]
        #pred_res = [self.label_map[i+1] for i in pred_res]
        '''
        print(len(truth_res))
        print(len(pred_res))
        confusion_matrix = ConfusionMatrix(truth_res, pred_res)
        plt.figure(dpi=200, figsize=(10, 7))
        confusion_matrix.plot()
        plt.savefig(self.confusion_matrix_file_path)
        '''
        s = sklearn.metrics.confusion_matrix(truth_res, pred_res)
        list_label = self.label_map[1:]
        df_cm = pd.DataFrame(data=s, columns=list_label, index=list_label)
        plt.figure(dpi=100)

        heatmap = sns.heatmap(df_cm, annot=True, fmt='d')
        heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(),
                                     rotation=70,
                                     ha='right',
                                     fontsize=5)
        heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(),
                                     rotation=20,
                                     ha='right',
                                     fontsize=5)

        plt.savefig(self.confusion_matrix_file_path)

        confusion_matrix = ConfusionMatrix(truth_res, pred_res)
        confusion_matrix.print_stats()
def process_results(mode, file, thrshld):
    threshold = thrshld
    with open(file) as json_file:  
        data = json.load(json_file)
        accuracy = 0.0
        actual = []
        predicted = []
        for p in data['results']:
            labellingScore = int(p['labellingScore']) 
            score = float(p['score'])
            if labellingScore == 1 and score > threshold:
                accuracy = accuracy + 1
            elif labellingScore == 0 and score < threshold:
                accuracy = accuracy + 1
            if labellingScore == 1:
                actual.append(1)
            else:
                actual.append(0)
            if score > threshold:
                predicted.append(1)
            else:
                predicted.append(0)
    if mode is 1:
        cm = ConfusionMatrix(actual, predicted)
        cm.print_stats()
    return accuracy/len(data['results'])
Beispiel #4
0
def compute_metrics(task_name, preds, labels):
    assert len(preds) == len(labels)
    if task_name == "cola":
        return {"mcc": matthews_corrcoef(labels, preds)}
    elif task_name == "sst-2":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "mrpc":
        return acc_and_f1(preds, labels)
    elif task_name == "sts-b":
        return pearson_and_spearman(preds, labels)
    elif task_name == "qqp":
        return acc_and_f1(preds, labels)
    elif task_name == "mnli":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "mnli-mm":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "qnli":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "rte":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "wnli":
        return {"acc": simple_accuracy(preds, labels)}
    elif task_name == "sa" or task_name == 'sa_csv':
        from pandas_ml import ConfusionMatrix
        pcm = ConfusionMatrix(labels, preds)
        pcm.print_stats()
        precision, recall, f1, _ = precision_recall_fscore_support(
            labels, preds, average='weighted')
        #return {"acc": simple_accuracy(preds, labels)}
        return {"acc": pcm.stats_overall['Accuracy']}
    elif task_name == "arg_mining":
        return {}
    else:
        raise KeyError(task_name)
    def test_pandas_confusion_cm_stats_integers(self):
        y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200]
        y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200]
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)
        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        cm.print_stats()
    def test_pandas_confusion_cm_stats_integers(self):
        y_true = [600, 200, 200, 200, 200, 200, 200, 200, 500, 500, 500, 200, 200, 200, 200, 200, 200, 200, 200, 200]
        y_pred = [100, 200, 200, 100, 100, 200, 200, 200, 100, 200, 500, 100, 100, 100, 100, 100, 100, 100, 500, 200]
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)
        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        cm.print_stats()
Beispiel #7
0
def metrics(y_true, y_pred, y_pred_proba=False):
    target_names = ['KEEP', 'UP', 'DOWN']

    if y_pred_proba is not False:
        print('Cross Entropy: {}'.format(log_loss(y_true, y_pred_proba)))
    print('Accuracy: {}'.format(accuracy_score(y_true, y_pred)))
    print('Coefficient Kappa: {}'.format(cohen_kappa_score(y_true, y_pred)))
    print('Report: {}'.format(
        classification_report(y_true, y_pred, target_names=target_names)))
    cm = ConfusionMatrix(y_true.tolist(), y_pred.tolist())
    cm.print_stats()
    def test_value_counts(self):
        df = pd.DataFrame({
            'Height': [150, 150, 151, 151, 152, 155, 155, 157, 157, 157, 157, 158, 158, 159, 159, 159, 160, 160, 162, 162, 163, 164, 165, 168, 169, 169, 169, 170, 171, 171, 173, 173, 174, 176, 177, 177, 179, 179, 179, 179, 179, 181, 181, 182, 183, 184, 186, 190, 190],
            'Weight': [54, 55, 55, 47, 58, 53, 59, 60, 56, 55, 62, 56, 55, 55, 64, 61, 59, 59, 63, 66, 64, 62, 66, 66, 72, 65, 75, 71, 70, 70, 75, 65, 79, 78, 83, 75, 84, 78, 74, 75, 74, 90, 80, 81, 90, 81, 91, 87, 100],
            'Size': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL', 'XL'],
            'SizePred': ['S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'S', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'L', 'M', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'L', 'XL', 'L', 'L', 'XL', 'L', 'XL', 'XL', 'XL'],
        })
        cm = ConfusionMatrix(df["Size"], df["SizePred"])
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert (cm.true - df.Size.value_counts()).sum() == 0
        assert (cm.pred - df.SizePred.value_counts()).sum() == 0
        cm.print_stats()
def Test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    cm_targets = []
    cm_predicted = []
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_aaa):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)

            cm_targets.extend(targets.cpu().numpy())
            cm_predicted.extend(predicted.cpu().numpy())

            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

            progress_bar(
                batch_idx, len(testloader),
                'Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                (test_loss /
                 (batch_idx + 1), 100. * correct / total, correct, total))

    matrix = confusion_matrix(cm_targets, cm_predicted)
    plot_confusion_matrix(matrix, classes_str)

    from pandas_ml import ConfusionMatrix
    cm = ConfusionMatrix(cm_targets, cm_predicted)
    cm.print_stats()

    # Save checkpoint.
    acc = 100. * correct / total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/vggckpt.pth')
        best_acc = acc
Beispiel #10
0
def accuracy(result):
    true = 0
    total = len(result)
    cm_expected = []
    cm_predicted = []
    for i in range(len(result)):
        if result[i][0] == result[i][1]:
            true += 1
        cm_expected.append(result[i][1])
        cm_predicted.append(result[i][0])
    misclassified = total - true;
    cm = ConfusionMatrix(cm_expected, cm_predicted)
    cm.print_stats()
    print("----------------------------------------")
    return cm, total, true, misclassified, true/len(result)*100
Beispiel #11
0
def plot_confusion_matrix(cls_pred):
    # This is called from print_test_accuracy() below.

    # cls_pred is an array of the predicted class-number for
    # all images in the test-set.

    # Get the true classifications for the test-set.
    cls_true = data.test.cls

    # Get the confusion matrix using sklearn.
    # cm = confusion_matrix(y_true=cls_true,
    #                   y_pred=cls_pred)
    cm = ConfusionMatrix(y_true=cls_true, y_pred=cls_pred)
    # Print the confusion matrix as text.
    #print(cm)
    cm.print_stats()
Beispiel #12
0
    def confusion(self, inputs, targets):
        """Prints the confusion matrix
        :param inputs:
        :param targets:
        :return:
        """
        predicted = []
        expected = []

        for i, t in zip(inputs, targets.tolist()):
            self._forward(i)
            out = self._transform_output()
            predicted.append(out.index(max(out)))
            expected.append(t.index(max(t)))

        confusion_m = ConfusionMatrix(expected, predicted)
        confusion_m.print_stats()
Beispiel #13
0
def calculate_accuracy(result):
    correct = 0
    wrong = 0
    # Calculating the accuracy
    for i in range(len(result)):
        if result[i] == y_actu[i]:
            correct += 1
        else:
            wrong += 1

    accuracy = correct / (correct + wrong)
    print("\nThe accuracy achieved is: " + str("%.2f" % (accuracy * 100)) +
          "%\n")
    print("Below is the confusion matrix:\n")
    cm = ConfusionMatrix(y_actu, result)
    print(cm)
    cm.print_stats()
    def confusion(self, inputs, targets):
        """Prints the confusion matrix
               :param inputs:
               :param targets:
               :return:
               """
        predicted = []
        expected = []

        # Produce confusion matrix arrays
        for i, t in zip(inputs, targets):
            self.forward(i)
            predicted.append(np.argmax(self.y))
            expected.append(np.argmax(t))

        confusion_m = ConfusionMatrix(expected, predicted)
        confusion_m.print_stats()
def random_forest(df):
    train, test = df[df['is_train'] == True], df[df['is_train'] == False]
    print('Number of observations in the training data:', len(train))
    print('Number of observations in the test data:', len(test))
    features = df.columns[1:]

    y = pd.factorize(train['Env'])[0]
    clf = RandomForestClassifier(n_estimators=500, random_state=0)
    clf.fit(train[features], y)

    target_names = df['Env'].unique()
    preds = target_names[clf.predict(test[features])]

    # confusion matrix array
    cmatrix = pd.crosstab(test['Env'],
                          preds,
                          rownames=['Actual Env'],
                          colnames=['Predicted Env'])

    #important features
    importance = list(zip(train[features], clf.feature_importances_))
    importance_sorted = sorted(importance, key=lambda x: x[1], reverse=True)

    ffeature = importance_sorted[:20]
    feature_df = pd.DataFrame(ffeature, columns=['feature', 'importance'])
    feature_df.to_csv(domain + '_features.csv')

    #Important features graphs
    ffeature.reverse()
    plt.plot([val[1] for val in ffeature], range(len(ffeature)), 'o')
    plt.hlines(range(len(ffeature)), [0], [val[1] for val in ffeature],
               linestyles='dotted',
               lw=2)
    plt.yticks(range(len(ffeature)), [val[0] for val in ffeature])
    plt.tight_layout()
    plt.savefig(domain + '_graph.png', dpi=300)
    ''' This 'cm' vaiable is what we used to create the confusion matrices. It uses
    pandas_ml which is not compatible with pandas version 0.25. Pandas needs
    to be downgraded to 0.24.2 in order for this to work. The 'cmatrix' variable above
    will also produce a matrix for the confusion matrix but pandas_ml was used originally 
    to create the graph. If you comment these 3 lines out, the random forest function will work
    and produce the important features.'''
    cm = ConfusionMatrix(test["Env"].tolist(), preds)
    cm.print_stats()
    cm.plot()
    def test_pandas_confusion_cm_stats_animals(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        assert cm.population == len(y_true)  # 12
        cm.print_stats()
        cm_stats = cm.stats()  # noqa

        assert cm.binarize("cat").TP == cm.get("cat")  # cm.get("cat", "cat")
        assert cm.binarize("cat").TP == 3
        assert cm.binarize("dog").TP == cm.get("dog")  # 1
        assert cm.binarize("rabbit").TP == cm.get("rabbit")  # 3
    def test_pandas_confusion_cm_stats_animals(self):
        y_true = ['rabbit', 'cat', 'rabbit', 'rabbit', 'cat', 'dog', 'dog', 'rabbit', 'rabbit', 'cat', 'dog', 'rabbit']
        y_pred = ['cat', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'dog', 'cat', 'rabbit', 'cat', 'rabbit', 'rabbit']
        print("y_true: %s" % y_true)
        print("y_pred: %s" % y_pred)

        cm = ConfusionMatrix(y_true, y_pred)
        assert isinstance(cm, pdml.confusion_matrix.LabeledConfusionMatrix)

        assert isinstance(cm.stats(), OrderedDict)
        assert cm.population == len(y_true)  # 12
        cm.print_stats()
        cm_stats = cm.stats()  # noqa

        assert cm.binarize("cat").TP == cm.get("cat")  # cm.get("cat", "cat")
        assert cm.binarize("cat").TP == 3
        assert cm.binarize("dog").TP == cm.get("dog")  # 1
        assert cm.binarize("rabbit").TP == cm.get("rabbit")  # 3
Beispiel #18
0
def draw_heatmap(total_predictions, total_ground_truths):

    if len(total_predictions) == len(total_ground_truths):
        data = {
            'y_Actual': total_ground_truths,
            'y_Predicted': total_predictions
        }

        df = pd.DataFrame(data, columns=['y_Actual', 'y_Predicted'])

        confusion_matrix = pd.crosstab(df['y_Actual'],
                                       df['y_Predicted'],
                                       rownames=['Actual'],
                                       colnames=['Predicted'])
        sn.heatmap(confusion_matrix, annot=True, fmt='g', cmap='YlGnBu')
        Confusion_Matrix = ConfusionMatrix(df['y_Actual'], df['y_Predicted'])
        Confusion_Matrix.print_stats()
        plt.show()
Beispiel #19
0
    def confusion(self, inputs, targets, nhidden):
        y_output = np.zeros((np.shape(inputs)[0], np.shape(targets)[1]))
        for h in range(np.shape(inputs)[0]):  #224 training samples

            hidden_temp = np.zeros(nhidden)
            output_temp = np.zeros(np.shape(targets)[1])
            for i in range(
                    np.shape(inputs)[1]):  #40 data in each training samples
                for j in range(nhidden):  #updating each hidden_temp
                    hidden_temp[
                        j] += self.weight_hidden_lst[i][j] * inputs[h][i]
            for bias_hidden in range(nhidden):
                hidden_temp[bias_hidden] += self.bias_hidden_lst[0][
                    bias_hidden]

            a_hidden = np.zeros((nhidden, 1))
            for a in range(nhidden):
                a_hidden[a] = 1 / (1 + np.exp(-self.beta * hidden_temp[a]))

            for jj in range(np.shape(a_hidden)[0]):  #12 (or something else)
                for k in range(np.shape(targets)[1]):  #to each output
                    output_temp[
                        k] += self.weight_output_lst[jj][k] * a_hidden[jj]
            for bias_output in range(np.shape(self.bias_output_lst)[0]):
                output_temp[bias_output] += self.bias_output_lst[0][
                    bias_output]

            for y in range(np.shape(targets)[1]):
                y_output[h][y] = 1 / (1 + np.exp(-self.beta * output_temp[y]))
        output_max = np.zeros(np.shape(targets)[0])
        validtargets_max = np.zeros(np.shape(targets)[0])
        for l in range(np.shape(targets)[0]):

            output_max[l] = np.argmax(y_output[l][:])
            validtargets_max[l] = np.argmax(targets[l][:])

        confusion_mtrx = ConfusionMatrix(validtargets_max, output_max)
        print confusion_mtrx
        confusion_mtrx.print_stats()
        confusion_mtrx.plot(backend='seaborn')
        plt.show()
Beispiel #20
0
def plot_confusion_matrix_metrics(true_labels=None,
                                  predicted_labels=None,
                                  normalized=False,
                                  verbose=True):
    """
    Plot a confusion matrix given the known labels of the data (true_labels) and their corresponding predictions (predicted_labels).
    If normalized=True, the confusion matrix will bound its values in an interval between 0 and 1.
    
    Doesn't require plt.show(), just call this function at the end of a cell.
    
    :param true_labels: true values for labels
    :type true_labels: np.array
    :param predicted_labels: predicted label values
    :type predicted_labels: np.array
    :param normalized: bound the analysis in the interval [0, 1]
    :type normalized: boolean (default=False)
    """
    cm = ConfusionMatrix(true_labels, predicted_labels)
    cm.plot(cmap='GnBu', normalized=normalized)
    ax = plt.gca()
    label_dict = {"True": 1, "False": 0}
    str_labels = [
        'Digit {}'.format(label_dict.get(i.get_text(), i.get_text()))
        for i in ax.get_xticklabels()
    ]
    ax.set_xticklabels(str_labels, rotation=0, horizontalalignment='center')
    ax.set_yticklabels(str_labels)
    cm_array = cm.to_array()
    width, height = cm_array.shape
    for x in range(width):
        for y in range(height):
            plt.annotate(str(cm_array[x][y]),
                         xy=(y, x),
                         horizontalalignment='center',
                         verticalalignment='center')
    plt.show()
    print(cm)
    if verbose:
        print("===================================================")
        print("Evaluation metrics:")
        cm.print_stats()
Beispiel #21
0
def plotconfusion(truth, predictions, path, label_dict, classes):
    """
    This function plots the confusion matrix and
    also prints useful statistics.

    :param truth: true labels
    :type truth: np array
    :param predictions: model predictions
    :type predictions: np array
    :param path: path to save image
    :type path: str
    :param label_dict: dict to transform int to str
    :type label_dict: dict
    :param classes: number of classes
    :type classes: int
    """
    acc = np.array(truth) == np.array(predictions)
    size = float(acc.shape[0])
    acc = np.sum(acc.astype("int32")) / size
    truth = [label_dict[i] for i in truth]
    predictions = [label_dict[i] for i in predictions]
    cm = ConfusionMatrix(truth, predictions)
    cm_array = cm.to_array()
    cm_diag = np.diag(cm_array)
    sizes_per_cat = []
    for n in range(cm_array.shape[0]):
        sizes_per_cat.append(np.sum(cm_array[n]))
    sizes_per_cat = np.array(sizes_per_cat)
    sizes_per_cat = sizes_per_cat.astype(np.float32)**-1
    recall = np.multiply(cm_diag, sizes_per_cat)
    print("\nRecall:{}".format(recall))
    print("\nRecall stats: mean = {0:.6f}, std = {1:.6f}\n".format(
        np.mean(recall),  # noqa
        np.std(recall)))  # noqa
    title = "Confusion matrix of {0} examples\n accuracy = {1:.6f}".format(
        int(size),  # noqa
        acc)
    plot_confusion_matrix(cm_array, classes, title=title, path=path)
    cm.print_stats()
    def classifier_test(self, type, model_name):
        # for testing set
        if type == 1:
            y, x = self.test_list_letter, self.test_list_word
        # for validation
        else:
            y, x = self.validate_list_letter, self.validate_list_word

        # Now we need to load the learned model and check the accuracy in validation or testing data

        m = svm_load_model(model_name)
        number = m.get_nr_sv()
        y_label, p_acc, _ = svm_predict(y, x, m, '-b 1')
        # print y_label
        y_pred = pd.Series(y_label, name='Predicted')
        y_actual = pd.Series(self.test_list_letter, name='Actual')
        df_CF = ConfusionMatrix(y_actual, y_pred)
        df_CF.print_stats()
        df_CF.plot(color='b')
        plt.show()

        return p_acc, number
Beispiel #23
0
def calc_general_stats(rows):
    y_true = []
    y_pred = []
    for i in range(len(rows)):
        row = rows[i]
        className_true = row.split('/')[4]
        numBoxes = int(row.split(' ')[1])
        className_pred = 'none'
        if numBoxes > 0:
            className_pred = row.split(' ')[1 + numBoxes].split(',')[4]

        y_true.append(className_true)
        y_pred.append(className_pred)

    # stats
    cm = ConfusionMatrix(y_true, y_pred)
    cm.print_stats()
    cm.stats()

    # other report...
    target_names = [
        'arrabida', 'camara', 'clerigos', 'musica', 'none', 'serralves'
    ]
    print(classification_report(y_true, y_pred, target_names=target_names))

    # plot
    cm = confusion_matrix(y_true, y_pred)
    classes = ['arrabida', 'camara', 'clerigos', 'musica', 'none', 'serralves']
    df_cm = pd.DataFrame(cm, index=classes, columns=classes)
    plt.figure(figsize=(10, 7))
    sn.set(font_scale=1.4)
    ax = sn.heatmap(cm,
                    annot=True,
                    annot_kws={"size": 16},
                    yticklabels=classes,
                    xticklabels=classes,
                    cmap='Blues',
                    fmt='g')
    plt.show()
Beispiel #24
0
def _confusion_matrix(net, data_set, sess_config):
    preds = []
    lbs = []
    count = 0
    orig_to_child, child_to_orig = load_child_labels(FLAGS.dataset_dir)
    with tf.Session(config=sess_config) as sess:
        inputs = data_set.batch_inputs()
        label_inputs = inputs[1]
        output_logits = net.output_logits(inputs[0])
        sess.run(tf.initialize_local_variables())
        saver = tf.train.Saver()
        saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        if len(inputs) == 3:
            output_logits = output_logits[-1]
        output_softmax = tf.nn.softmax(output_logits)
        prediction = tf.argmax(output_softmax, axis=1)
        labels = tf.argmax(label_inputs, axis=1)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        while True:
            try:
                count += 1
                print("%s Evalutions..." % (count * FLAGS.batch_size))
                pred_values, label_values = sess.run([prediction, labels])
                preds.extend(
                    [child_to_orig[pd] for pd in pred_values.tolist()])
                lbs.extend([child_to_orig[lv] for lv in label_values.tolist()])
            except Exception:
                coord.request_stop()
                coord.join(threads)
                break

    cm = ConfusionMatrix(lbs, preds)
    cm.print_stats()
    cm.plot()
    plt.show()
def generate_confusion_matrix_stats(s_name, si_actual, si_prediction, s_model):
    confusion_matrix = pd.crosstab(si_actual,
                                   si_prediction,
                                   rownames=['Actual'],
                                   colnames=['Predicted'])
    plt.figure()
    sn.heatmap(confusion_matrix, annot=True, fmt='g')
    plt.savefig('confusion_matrix_ ' + s_name + '_' +
                str(i_prediction_horizon) + '_' + s_model + '.pdf')
    plt.savefig('confusion_matrix_ ' + s_name + '_' +
                str(i_prediction_horizon) + '_' + s_model + '.png')

    # generate and save additional ML stats
    Confusion_Matrix = ConfusionMatrix(si_actual, si_prediction)
    original = sys.stdout
    sys.stdout = open(
        "ml_stats_" + s_name + '_' + str(i_prediction_horizon) + '_' +
        s_model + ".txt", "w")

    print(Confusion_Matrix.print_stats())
    sys.stdout = original
      len(y_train.loc[dataframe['Class'] == 1]),
      len(y_train.loc[dataframe['Class'] == 1]) / len(y_train))
#Applying Logistic Regression Machine Learning Algorithm
logistic = linear_model.LogisticRegression(C=1e5)
#Fitting the Algorithm for X_train and y_train
logistic.fit(X_train, y_train)
dt = tree.DecisionTreeClassifier()
dt.fit(X_train, y_train)
classifier = KNeighborsClassifier(n_neighbors=5)
classifier.fit(X_train, y_train)
#Scoring
print("Using Logistivc Regression the Accuracy Score is: ",
      logistic.score(X_test, y_test))
print("Using Decision tree the Accuracy score is ", dt.score(X_test, y_test))
print("Using KNearestNeighbour the Accuracy score is ",
      classifier.score(X_test, y_test))
y_predicted = np.array(logistic.predict(X_test))
y_right = np.array(y_test)
#print y_test
#The confusion matrix (or error matrix) is one way to summarize the performance of a classifier
#  for binary classification tasks. This square matrix
# consists of columns and rows that list the number of instances as absolute or
# relative "actual class" vs. "predicted class" ratios.
#Plotting the Confusion matrix for y_right and y_predicted
confusion_matrix = ConfusionMatrix(y_right, y_predicted)
print("Confusion matrix:", confusion_matrix)
confusion_matrix.plot(normalized=True)
plt.show()
#printing the stats of Confusion matrix
confusion_matrix.print_stats()
Beispiel #27
0
def test_model(test_model, test_dataloader):
    print("Testing started..")
    test_model.eval()
    correct = 0
    total = 0
    all_labels_d = torch.tensor([], dtype=torch.long).to(device)
    all_predictions_d = torch.tensor([], dtype=torch.long).to(device)
    all_predictions_probabilities_d = torch.tensor(
        [], dtype=torch.float).to(device)

    if batch_size == 1:
        all_timePerFrame_host = []

    else:
        print("Please set batch size to 1....")
        exit(0)

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            frame_time_start = datetime.datetime.now()  # frame start time

            outputs = test_model(inputs)
            outputs = F.softmax(outputs, 1)
            #print(outputs)
            predicted_probability, predicted = torch.max(outputs.data, 1)

            frame_time_end = datetime.datetime.now()  # frame end time

            time_per_image = (frame_time_end -
                              frame_time_start).total_seconds()
            #print((predicted == labels).sum())
            total += labels.size(0)
            correct += (predicted == labels).sum()
            all_labels_d = torch.cat((all_labels_d, labels), 0)
            all_predictions_d = torch.cat((all_predictions_d, predicted), 0)
            all_predictions_probabilities_d = torch.cat(
                (all_predictions_probabilities_d, predicted_probability), 0)
            all_timePerFrame_host = all_timePerFrame_host + [time_per_image]

    print('copying some data back to cpu for generating confusion matrix...')
    y_true = all_labels_d.cpu()
    y_predicted = all_predictions_d.cpu()  # to('cpu')
    testset_predicted_probabilites = all_predictions_probabilities_d.cpu(
    )  # to('cpu')

    class_names = test_datasets.classes  # taking class names for plotting confusion matrix
    cm = confusion_matrix(y_true, y_predicted,
                          target_number_labels)  # confusion matrix

    print('Accuracy of the network on the %d test images: %f %%' %
          (total, (100.0 * correct / total)))

    print(cm)

    print("taking class names to plot CM")

    print("Generating confution matrix")

    plot_confusion_matrix(cm, classes=class_names, title='my confusion matrix')
    #plot_confusion_matrix(cm, classes=target_number_labels, title='my confusion matrix')

    # print('confusion matrix saved to ', plot_dir)

    ##################################################################
    # classification report
    #################################################################
    #print(classification_report(y_true, y_predicted, target_names=target_number_labels))

    ##################################################################
    # Standard metrics for medico Task
    #################################################################
    print("Printing standard metric for medico task")

    print("Accuracy =", mtc.accuracy_score(y_true, y_predicted))
    print("Precision score =",
          mtc.precision_score(y_true, y_predicted, average="weighted"))
    print("Recall score =",
          mtc.recall_score(y_true, y_predicted, average="weighted"))
    print("F1 score =", mtc.f1_score(y_true, y_predicted, average="weighted"))
    print("Specificity =")
    print("MCC =", mtc.matthews_corrcoef(y_true, y_predicted))

    ##################################################################
    # Standard metrics for medico Task
    #################################################################
    print("Printing standard metric for medico task")

    print("1. Recall score (REC) =",
          mtc.recall_score(y_true, y_predicted, average="weighted"))
    print("2. Precision score (PREC) =",
          mtc.precision_score(y_true, y_predicted, average="weighted"))
    print("3. Specificity (SPEC) =")
    print("4. Accuracy (ACC) =", mtc.accuracy_score(y_true, y_predicted))
    print("5. Matthews correlation coefficient(MCC) =",
          mtc.matthews_corrcoef(y_true, y_predicted))

    print("6. F1 score (F1) =",
          mtc.f1_score(y_true, y_predicted, average="weighted"))

    panda_cm_data = ConfusionMatrix(y_true, y_predicted)
    panda_cm_data.print_stats()
    cm_dictionary = panda_cm_data.stats()
    print("cm _ dictionary saving")
    f = open(os.path.join(history_dir, "20_5_cm_dictionary.pkl"), "wb")
    pickle.dump(cm_dictionary['class'], f)
    f.close()

    print('Finished.. ')

    print('Finished.. ')

    return y_predicted, testset_predicted_probabilites, all_timePerFrame_host
Beispiel #28
0
th1flatt = th1.flatten()


# In[ ]:


th1res = th1.reshape(-1,3)


# In[ ]:


#with open('/home/omar/Desktop/cminitialtumorhdbscan','w') as f:
cmth = ConfusionMatrix(segarreshap,th1flatt)
print(cmth.print_stats())
#print(cm.print_stats(),file=f)
#if th1.all() == masknorm.all():
#   print('ventricles are highlighted')


# In[ ]:


# Group similar grey levels using 8 clusters
kmeanstime = time.time()
X = image_cols.reshape(-1,1)
#X = image_cols
k_m = cluster.KMeans(n_clusters=5, n_init= 30)

km_predict=k_m.fit(X)
Beispiel #29
0
# predict on the test set
# ---------------------------------------------------
pred_y = logreg.predict(test_x)
# cf = confusion_matrix(test_y, pred_y, labels=['actual','predicted'])
labels=[0,1]
cf = confusion_matrix(pred_y,test_y,labels)
print(cf)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(test_x, test_y)))

# confusion matrix with details
# -----------------------------
ty=list(test_y)
py=list(pred_y)
cm1=ConfusionMatrix(py,ty)
print(cm1)
cm1.print_stats()
cm1.plot()

# Classification report : precision, recall, F-score
# ---------------------------------------------------
print(cr(test_y, pred_y))


#model number 2

# RFE (recursive feature elimination)
# -----------------------------------
logreg = LogisticRegression()

# sklearn.feature_selection.RFE
# (estimator, n_features_to_select=None, step=1, verbose=0)
# set the default cut-off to 0.5
# and set predictions to 0 and 1
for i in range(0, length):
    if y_results[i] <= 0.5:
        y_results[i] = 0
    else:
        y_results[i] = 1

# accuracy score
print(accuracy_score(test_y, y_results) * 100)

# confusion matrix
cm = ConfusionMatrix(list(y_results), list(test_y))
print(cm)
cm.print_stats()

# Classification report : precision, recall, F-score
print(cr(test_y, y_results))

# draw the ROC curve
from sklearn import metrics
import matplotlib.pyplot as plt

fpr, tpr, threshold = metrics.roc_curve(test_y, y_results)
roc_auc = metrics.auc(fpr, tpr)
print(roc_auc)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
plt.legend(loc='lower right')
Beispiel #31
0
def test_model(test_model, test_dataloader):
    print("Testing started..")
    test_model.eval()
    correct = 0
    total = 0
    all_labels_d = torch.tensor([], dtype=torch.long).to(device)
    all_predictions_d = torch.tensor([], dtype=torch.long).to(device)

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = test_model(inputs)
            #outputs = (outputs1*0.6 + outputs2*0.4)/2
            _, predicted = torch.max(outputs.data, 1)
            print((predicted == labels).sum())
            total += labels.size(0)
            correct += (predicted == labels).sum()
            all_labels_d = torch.cat((all_labels_d, labels), 0)
            all_predictions_d = torch.cat((all_predictions_d, predicted), 0)

    print('copying some data back to cpu for generating confusion matrix...')
    testset_labels = all_labels_d.cpu()
    testset_predicted_labels = all_predictions_d.cpu()  # to('cpu')

    cm = confusion_matrix(testset_labels,
                          testset_predicted_labels)  # confusion matrix

    print('Accuracy of the network on the %d test images: %f %%' %
          (total, (100.0 * correct / total)))

    print(cm)

    print("taking class names to plot CM")

    class_names = test_datasets.classes  # taking class names for plotting confusion matrix

    print("Generating confution matrix")

    plot_confusion_matrix(cm, classes=class_names, title='my confusion matrix')

    print('confusion matrix saved to ', plot_dir)

    ##################################################################
    # classification report
    #################################################################
    print(
        classification_report(testset_labels,
                              testset_predicted_labels,
                              target_names=class_names))

    ##################################################################
    # Standard metrics for medico Task
    #################################################################
    print("Printing standard metric for medico task")

    weights = [
        1 / 53, 1 / 81, 1 / 138, 1 / 125, 1 / 134, 1 / 11, 1 / 125, 1 / 132,
        1 / 132, 1 / 4, 1 / 184, 1 / 72, 1 / 120, 1 / 39, 1 / 110, 1 / 138
    ]

    print(
        "1. Recall score (REC) =",
        mtc.recall_score(testset_labels,
                         testset_predicted_labels,
                         average="weighted"))
    print(
        "2. Precision score (PREC) =",
        mtc.precision_score(testset_labels,
                            testset_predicted_labels,
                            average="weighted"))
    print("3. Specificity (SPEC) =")
    print(
        "4. Accuracy (ACC) =",
        mtc.accuracy_score(testset_labels, testset_predicted_labels, weights))
    print("5. Matthews correlation coefficient(MCC) =",
          mtc.matthews_corrcoef(testset_labels, testset_predicted_labels))

    print(
        "6. F1 score (F1) =",
        mtc.f1_score(testset_labels,
                     testset_predicted_labels,
                     average="weighted"))

    panda_cm_data = ConfusionMatrix(testset_labels, testset_predicted_labels)
    panda_cm_data.print_stats()
    cm_dictionary = panda_cm_data.stats()
    print("cm _ dictionary saving")
    f = open(os.path.join(history_dir, "24_3_cm_dictionary.pkl"), "wb")
    pickle.dump(cm_dictionary['class'], f)
    f.close()

    print('Finished.. ')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
print("\nX_train:\n")
print(X_train.head())
print(X_train.shape)

print("\nX_test:\n")
print(X_test.head())
print(X_test.shape)

modelo = GaussianNB()
modelo.fit(X_train, y_train)

y_pred = modelo.predict(X_train)

#expected = y_train
#predicted = modelo.predict(X_train)

#print(metrics.classification_report(y_train, y_pred))
#print(metrics.confusion_matrix(y_train, y_pred))

print('Precisión: {:.2f}'.format(modelo.score(X_train, y_pred)))

#matriz = confusion_matrix(y_test, y_pred)
#print('Matriz de Confusión:')
#print(matriz)

#cm = ConfusionMatrix(X_test, y_pred)
cm = ConfusionMatrix(y_train, y_pred)
cm.print_stats()
Beispiel #33
0
np.random.shuffle(data)

data = np.c_[np.ones(data.shape[0]), data]   # adding offset term


#%% Data
X = data[:ndat, :-1]
y = data[:ndat, -1]

#%% Prior
mean = np.zeros(X.shape[-1])
Sigma = np.eye(X.shape[-1]) * 100.
prior = LogReg(mean=mean, Sigma=Sigma)

#%% Estimation
for xt, yt in zip(X, y):
    prior.update(yt, xt)
    prior.log()
    
#%% Confusion matrix
CM = ConfusionMatrix(prior.true_vals, prior.binary_preds)
CM.print_stats()

#%% Plots
beta_log = np.array(prior.mean_log)

plt.figure(figsize=(8, 4))
plt.plot(prior.brier_score_log)
plt.xlabel('t')
plt.ylabel('Brier score')
plt.savefig('/tmp/l5-brier.png', bbox_inches='tight')