Ejemplo n.º 1
0
 def plot_cm(self):
     cm = confusion_matrix(self.labels,
                           map(lambda x: int(x > 0.5), self.preds))
     print(cm)
     plot_confusion_matrix(cm, ['Down', 'Up'],
                           normalize=False,
                           title="Confusion Matrix")
Ejemplo n.º 2
0
    def train(self,
              params,
              save_path,
              max_steps=100000,
              early_stopping_rounds=100):
        print('Training')
        print('Train Size = {}'.format(len(self.data)))
        print('Features size = {}'.format(len(self.data[0])))

        train = xgb.DMatrix(self.data, self.labels)
        test = xgb.DMatrix(self.test_data, self.test_labels)

        watchlist = [(test, 'test')]
        clf = xgb.train(params,
                        train,
                        max_steps,
                        evals=watchlist,
                        early_stopping_rounds=early_stopping_rounds)
        joblib.dump(clf, save_path)
        cm = confusion_matrix(self.test_labels,
                              map(lambda x: int(x > 0.5), clf.predict(test)))
        print(cm)
        plot_confusion_matrix(cm, ['Down', 'Up'],
                              normalize=False,
                              title="Confusion Matrix")
Ejemplo n.º 3
0
def test(model):
    model.eval()
    test_loss = 0
    correct = 0

    preds = []
    for i, (data, labels) in enumerate(test_loader):
        #data = torch.from_numpy(data)
        data = data.to(device, dtype=torch.float)
        #labels = torch.from_numpy(np.array(labels))
        labels = labels.to(device, dtype=torch.long)
        with torch.no_grad():
            output = model(data)
        test_loss += loss_func(output, labels).item()
        _, pred = torch.max(output.data, 1)
        correct += (pred == labels).sum().item()
        preds.extend(pred.tolist())

    print('Test set: Average loss: {:.4f}, Accuracy:{}/{} ({:.2f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

    outfile.write(
        'Test set: Average loss: {:.4f}, Accuracy:{}/{} ({:.2f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
    cm.plot_confusion_matrix(np.array(preds),
                             np.array(test_dataset.label),
                             np.array(['0', '1', '2', '3', '4']),
                             normalize=True)
    return 100. * correct / len(test_loader.dataset)
Ejemplo n.º 4
0
def store_conf_matrix_as_png( cnf_matrix, _classifier_name ):

	import plot_confusion_matrix as p_CM 

	print("SCMP :: CLF_Name_ ", _classifier_name )
	cm_labels = np.arange(len(cnf_matrix[0]))
	p_CM.plot_confusion_matrix( cnf_matrix, cm_labels, _classifier_name , normalize=False, show=False )
Ejemplo n.º 5
0
def main():
    print('__name__:', __name__)
    #confusion_matrix_minc_mat = sio.loadmat(sys.argv[1])
    #confusion_matrix_minc = confusion_matrix_minc_mat[confusion_matrix_minc_mat.keys()[1]]

    confusion_matrix_minc_probability_mat = sio.loadmat(sys.argv[1])
    confusion_matrix_minc_probability = np.around(
        np.array(confusion_matrix_minc_probability_mat['confusion_mat']) * 100,
        decimals=2)

    #print confusion_matrix_minc_probability

    class_names = ['airplane', 'bathtub', 'bed', 'bench', 'bookshelf', 'bottle', 'bowl', 'car', \
        'chair', 'cup', 'curtain', 'desk', 'door', 'dresser',  'flower_pot',\
        'glass_box', 'guitar', 'keyboard', 'lamp', 'laptop', 'mantel', 'monitor', \
        'night_stand', 'person', 'piano', 'plant', 'radio', 'range_hood', 'sink', \
        'sofa', 'stairs', 'stool', 'table', 'tent', 'toilet', 'tv_stand', 'vase', \
        'wardrobe', 'xbox']

    #class_names = ['brick', 'carpet', 'ceramic','fabric','foliage','food','glass','hair','leather','metal','mirror',\
    #              'other','painted','paper','plastic','polishedstone','skin','sky','stone','tile','wallpaper','water','wood']

    print class_names

    plt.figure()
    plot_confusion_matrix(confusion_matrix_minc_probability,
                          classes=class_names,
                          normalize=False,
                          title='Confusion matrix of MINC')
    plt.show()
Ejemplo n.º 6
0
    def score(self):
        """
        Scoring on the test set.
        """
        print("\n\nModel evaluation.\n")
        self.model.eval()
        self.scores = []
        self.ground_truth = []
        preds = []
        truths = []
        for graph_pair in tqdm(self.testing_graphs):
            data = process_pair(graph_pair)
            self.ground_truth.append(calculate_normalized_ged(data))
            data = self.transfer_to_torch(data)
            target = data["target"]
            prediction = self.model(data)
            self.scores.append(calculate_loss(prediction, target))

            preds.append(0 if prediction.item() < 0.5 else 1)
            truths.append(int(data["target"].item()))
        self.print_evaluation()
        plot_confusion_matrix(np.array(truths),
                              np.array(preds),
                              np.array([0, 1]),
                              title='SimGNN confusion matrix')
Ejemplo n.º 7
0
    def train(self):
        params = {}
        params['objective'] = 'multi:softprob'
        params['eta'] = 0.01
        params['num_class'] = 2
        params['max_depth'] = 20
        params['subsample'] = 0.05
        params['colsample_bytree'] = 0.05
        params['eval_metric'] = 'mlogloss'
        #params['scale_pos_weight'] = 10
        #params['silent'] = True
        #params['gpu_id'] = 0
        #params['max_bin'] = 16
        #params['tree_method'] = 'gpu_hist'

        train = xgb.DMatrix(self.data, self.labels)
        test = xgb.DMatrix(self.test_data, self.test_labels)

        watchlist = [(train, 'train'), (test, 'test')]
        clf = xgb.train(params,
                        train,
                        1000,
                        evals=watchlist,
                        early_stopping_rounds=100)
        joblib.dump(clf, 'models/clf.pkl')
        cm = confusion_matrix(self.test_labels,
                              map(lambda x: int(x[1] > .5), clf.predict(test)))
        print(cm)
        plot_confusion_matrix(cm, ['Down', 'Up'],
                              normalize=True,
                              title="Confusion Matrix")
def evaluate_plot(model, iterator, criterion, device, epoch, network_type):

    # set model to evaluation mode
    model.eval()

    # total loss of the epoch
    epoch_loss = 0

    tp = 0.0
    tn = 0.0
    fp = 0.0
    fn = 0.0
    predicted_total = np.array([])
    target_total = np.array([])

    with torch.no_grad():
        for i, batch in enumerate(iterator):
            src = batch[0].to(device)
            src = torch.unsqueeze(src, dim=1)
            trg = batch[1].long().to(device)

            res = model(src.float())

            # average loss of a batch
            loss = criterion(res.float(), trg)
            epoch_loss += loss.item()

            # take no mass as positive for convenience of unsupervised version
            _, predicted = torch.max(res, dim=1)
            tp += ((predicted == 1) & (trg == 1)).sum().item()
            tn += ((predicted == 0) & (trg == 0)).sum().item()
            fn += ((predicted == 0) & (trg == 1)).sum().item()
            fp += ((predicted == 1) & (trg == 0)).sum().item()
            predicted_total = np.concatenate(
                [predicted_total, predicted.cpu().numpy()])
            target_total = np.concatenate([target_total, trg.cpu().numpy()])

    precision = 100 * tp / (tp + fp)
    recall = 100 * tp / (tp + fn)
    accuracy = 100 * (tp + tn) / (tp + fp + tn + fn)

    predicted_total = predicted_total.astype("int32")
    target_total = target_total.astype("int32")

    # get result of the first and last epoch
    if (epoch == 0 or epoch == 59):
        pcm.plot_confusion_matrix(target_total, predicted_total,
                                  np.array(["mass", "no mass"]))
        plt.savefig('Results/CNN_' + network_type + '_epoch' + str(epoch + 1) +
                    '_result.png')
        pcm.plot_confusion_matrix(target_total,
                                  predicted_total,
                                  np.array(["mass", "no mass"]),
                                  normalize=True)
        plt.savefig('Results/CNN_' + network_type + '_epoch' + str(epoch + 1) +
                    '_normalized_result.png')
        plt.clf()

    return epoch_loss / len(iterator), accuracy, precision, recall
def gen_conf_matrix(w,y,class_names):
    l = len(y) // C
    y_true_d = [1]*l + [2]*l + [3]*l
    y_d = [0]*len(y)
    y_pred_d = y_d
    for i in range(len(y)):
        y_d[i] = np.matmul(w,y[i,:])
        y_pred_d[i] = find_nearest_ret_idx(sigmoid(y_d[i]), 1) +1
    plot_confusion_matrix(y_true_d,y_pred_d,class_names)
Ejemplo n.º 10
0
def classify(X_train, y_train, X_test, y_test, features):
    print('classifying ...')
    clf = RandomForestClassifier(n_estimators=1000)
    clf.fit(X_train, y_train)

    # print('\nFeature Importance')
    # i = 0
    # ft = list(features)
    # for fi in clf.feature_importances_:
    #     print(ft[i], '\t', fi)
    #     i += 1

    y_pred = clf.predict(X_test)

    # print(list(y_test))
    # print(list(y_pred))

    p, r, f, _ = precision_recall_fscore_support(y_test,
                                                 y_pred,
                                                 average='micro')
    a = accuracy_score(y_test, y_pred)
    print(a, p, r, f)

    #majority class label
    mc = most_common(list(y_test))
    y_pred_mc = [mc] * len(y_test)
    p, r, f, _ = precision_recall_fscore_support(y_test,
                                                 y_pred_mc,
                                                 average='micro')
    a = accuracy_score(y_test, y_pred_mc)
    print(a, p, r, f)

    # Compute confusion matrix
    print(set(y_test))
    print(set(y_pred))

    y_test_intents = []
    y_pred_intents = []
    for i in range(len(y_test)):
        y_test_intents.append(intent_list[y_test[i] - 1])
        y_pred_intents.append(intent_list[y_pred[i] - 1])
    class_names = list(set(y_test_intents))

    cnf_matrix = confusion_matrix(y_test_intents,
                                  y_pred_intents,
                                  labels=class_names)
    np.set_printoptions(precision=2)

    # Plot non-normalized confusion matrix

    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=class_names,
                          title='Confusion matrix, without normalization')

    plt.show()
Ejemplo n.º 11
0
def store_conf_matrix_as_png(cnf_matrix, _classifier_name):

    import plot_confusion_matrix as p_CM

    print("SCMP :: CLF_Name_ ", _classifier_name)
    cm_labels = np.arange(len(cnf_matrix[0]))
    p_CM.plot_confusion_matrix(cnf_matrix,
                               cm_labels,
                               _classifier_name,
                               normalize=False,
                               show=False)
Ejemplo n.º 12
0
def test_performance(xx=_par_.xx, yy=_par_.yy, odir=_par_.odir):
    """
    Test the performance of both classifiers
    :param xx: numpy.ndarray of shape (n_samples, n_features), optional, "training data"
    :param yy: numpy.ndarray of shape (n_samples), optional, "training labels"
    :param odir: str, optional, "directory to store performance measures"
    """

    # init Scores classes to store some performance scores
    acc_onc, acc_rnc = Scores([accuracy_score]), Scores([accuracy_score])
    cm_onc, cm_rnc = ConfusionMatrix(_par_.labels), ConfusionMatrix(
        _par_.labels)

    # cross-validate with stratified randomized folds
    splits = ((xx[i], xx[j], yy[i], yy[j]) for i, j in StratifiedShuffleSplit(
        n_splits=100, test_size=0.33).split(xx, yy))

    for xx_train, xx_test, yy_train, yy_test in splits:

        # test own neighbor classifier
        _par_.onc.fit(xx_train, yy_train)
        yy_pred = _par_.onc.predict(xx_test)
        acc_onc.add_targets(yy_test,
                            yy_pred), cm_onc.add_targets(yy_test, yy_pred)

        # test RadiusNeighborsClassifier
        _par_.rnc.fit(xx_train, yy_train)
        yy_pred = _par_.rnc.predict(xx_test)
        acc_rnc.add_targets(yy_test,
                            yy_pred), cm_rnc.add_targets(yy_test, yy_pred)

    # print performance measures for both classifiers
    aprint('\nNeighborsClassifier:')
    aprint(acc_onc.get_mean())

    aprint('\nsklearn.neighbors.RadiusNeighborsClassifier:')
    aprint(acc_rnc.get_mean())

    # plot confusion matrices
    file = os.path.join(odir, 'cm.png')
    aprint('\nFor the confusion matrices see figure {}.'.format(file), fmt='w')
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
    plt.subplots_adjust(wspace=0.4)
    fig.suptitle('Confusion Matrix', fontsize=18)
    plot_confusion_matrix(ax1,
                          cm_onc.get_normed_cm(),
                          _par_.label_names,
                          title='NeighborsClassifier')
    plot_confusion_matrix(ax2,
                          cm_rnc.get_normed_cm(),
                          _par_.label_names,
                          title='sklearn.neighbors.RadiusNeighborsClassifier')
    fig.savefig(file, dpi=200)
Ejemplo n.º 13
0
def test():
    args = parse_args()
    model = Networks.ResNet18_ARM___RAF()

    print("Loading pretrained weights...", args.checkpoint)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint["model_state_dict"], strict=False)

    data_transforms_test = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    test_dataset = RafDataSet(args.raf_path,
                              phase='test',
                              transform=data_transforms_test)
    test_size = test_dataset.__len__()
    print('Test set size:', test_size)

    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.batch_size,
                                              num_workers=args.workers,
                                              shuffle=False,
                                              pin_memory=True)

    model = model.cuda()

    pre_labels = []
    gt_labels = []
    with torch.no_grad():
        bingo_cnt = 0
        model.eval()
        for batch_i, (imgs, targets, _) in enumerate(test_loader):
            outputs, _ = model(imgs.cuda())
            targets = targets.cuda()
            _, predicts = torch.max(outputs, 1)
            correct_or_not = torch.eq(predicts, targets)
            pre_labels += predicts.cpu().tolist()
            gt_labels += targets.cpu().tolist()
            bingo_cnt += correct_or_not.sum().cpu()

        acc = bingo_cnt.float() / float(test_size)
        acc = np.around(acc.numpy(), 4)
        print(f"Test accuracy: {acc:.4f}.")

    if args.plot_cm:
        cm = confusion_matrix(gt_labels, pre_labels)
        cm = np.array(cm)
        labels_name = ['SU', 'FE', 'DI', 'HA', 'SA', 'AN', "NE"]  # 横纵坐标标签
        plot_confusion_matrix(cm, labels_name, 'RAF-DB', acc)
Ejemplo n.º 14
0
 def train(self):
     print('Training')
     print('Train Size = {}'.format(len(self.data)))
     print('Features size = {}'.format(len(self.data[0])))
     clf = SGDClassifier(loss='hinge')
     clf.fit(self.data, self.labels)
     joblib.dump(clf, 'models/clf.pkl')
     cm = confusion_matrix(
         self.test_labels,
         map(lambda x: int(x > 0.5), clf.predict(self.test_data)))
     print(cm)
     plot_confusion_matrix(cm, ['Down', 'Up'],
                           normalize=False,
                           title="Confusion Matrix")
def print_performace_anomaly_detection_algorithm(test_true_labels, predicted_labels, timestamp_T_true, timestamp_eva, data_mode, if_with_mass_data):
    
    class_names = np.array(['no mass', 'mass'])
    # Plot non-normalized confusion matrix
    pcm.plot_confusion_matrix(test_true_labels.astype('int'), predicted_labels.astype('int'), classes = class_names,
                          title='Confusion matrix, without normalization'+" (" + required_algorithm + ")")
    
    # Plot normalized confusion matrix
    pcm.plot_confusion_matrix(test_true_labels.astype('int'), predicted_labels.astype('int'), classes=class_names, normalize=True,
                          title='Normalized confusion matrix'+" (" + required_algorithm + ")")
    plt.show()
    
    fig = plt.figure(num = 5, figsize = (10,8))
    ax = fig.add_subplot(211)
    #plt.scatter(timestamp_T[:47128, 0], Tag, s = 1, c='blue', marker='x',alpha=0.5, label= 'true labels')
    ax.scatter(timestamp_T_true, Tag, s = 1, c='blue', marker='x',alpha=0.5, \
               label= 'true labels\n' + '( ' + 'if containing mass information in data:'+ str(if_with_mass_data) +' )')
    ax.legend(loc='center right')
    ax.set_xlabel("time", fontsize = 15, labelpad = 1)
    ax.set_ylabel("labels", fontsize = 15, labelpad = 1)
    ax.set_title("True Labels in All Dataset" + " (" + data_mode + ' / ' + data_mode + ")", fontsize = 15)
    
    ax = fig.add_subplot(212)
    plt.scatter(timestamp_eva, predicted_labels, s = 1, c='red', marker='x',alpha=0.5, \
                label= 'predicted labels\n' + '( '+ 'if containing mass information in data:'+ str(if_with_mass_data) +' )')
    ax.legend(loc='center right')
    ax.set_xlabel("time", fontsize = 15, labelpad = 1)
    ax.set_ylabel("labels", fontsize = 15, labelpad = 1)
    ax.set_title("Predicted Labels in Test Dataset" + " (" + data_mode + ' / '+ required_algorithm + ")", fontsize = 15)
    plt.subplots_adjust(wspace = 0.1, hspace = 0.25)
    plt.show()
    
    print("the set of predicted labels: ", set(predicted_labels), "the set of true labels", set(test_true_labels))  
    print('\nThe accuracy of '+ required_algorithm  + ' is: ', '{:.4f}'.format(accuracy_predicted_labels))  
    print('\nThe precision of ' + required_algorithm + ' is: ', '{:.4f}'.format(metrics.precision_score(test_true_labels, predicted_labels)))
    print('\nThe recall of ' + required_algorithm + ' is: ', '{:.4f}'.format(metrics.recall_score(test_true_labels, predicted_labels)))
    print('\nThe F1 of ' + required_algorithm + ' is: ', '{:.4f}'.format(metrics.f1_score(test_true_labels, predicted_labels)))
    
    np.set_printoptions(formatter={'float': '{: 0.4f}'.format})
    k = 0
    for metric_performace in ['precision', 'recall', 'F1', 'data_size']:
        print('\nThe ' + metric_performace +' of ' + required_algorithm  + ' is: ', \
          np.array(precision_recall_fscore_support(test_true_labels, predicted_labels))[k])
        k = k+1
        if k == 3:
            np.set_printoptions(edgeitems=3,infstr='inf', linewidth=75, nanstr='nan', precision=8, suppress=False, threshold=1000, formatter=None)
Ejemplo n.º 16
0
    def plot_cm_half_data(self):
        half_data_len = len(self.time_ordered_preds) / 2
        cm = confusion_matrix(
            self.labels[:half_data_len],
            map(lambda x: int(x > .5), self.preds[:half_data_len]))
        print(cm)
        plot_confusion_matrix(cm, ['Down', 'Up'],
                              normalize=False,
                              title="Confusion Matrix First Half")

        cm = confusion_matrix(
            self.labels[half_data_len:],
            map(lambda x: int(x > .5), self.preds[half_data_len:]))
        print(cm)
        plot_confusion_matrix(cm, ['Down', 'Up'],
                              normalize=False,
                              title="Confusion Matrix Seconde Half")
Ejemplo n.º 17
0
def compute_test():
    model.eval()
    #output = model(features, adj)
    #loss_test = F.nll_loss(output[idx_test], labels[idx_test])
    test_output = torch.sigmoid(
        model.forward(features, adj, test_map1, test_map2))
    pred = np.where(test_output.data.numpy() < 0.5, 0, 1)
    print(
        "True Positive Rate:",
        recall_score(np.asarray(test_label), pred, average="micro",
                     labels=[1]))
    print(
        "False Positive Rate:", 1 - recall_score(
            np.asarray(test_label), pred, average="micro", labels=[0]))
    plot_confusion_matrix(np.asarray(test_label),
                          pred,
                          np.array([0, 1]),
                          title='Confusion matrix, without normalization')
Ejemplo n.º 18
0
def test_model():
    names=['cats', 'dogs']
    from plot_confusion_matrix import plot_confusion_matrix
    test_imgs, test_labels = next(train_batches)
    print('test set loaded: ' , test_batches.classes.shape)
    plotImages(test_imgs)
    predictions = model.predict(x = test_batches, verbose=0)
    print('predictions acquired with tensor shape: ', predictions.shape)
    print('plotting confusion matrix and generating classification report')
    cm = confusion_matrix(y_true=test_batches.classes, y_pred=np.argmax(predictions, axis=1)) 
    plot_confusion_matrix(cm, names, normalize=False, title='Confusion Matrix', cmap=plt.cm.Blues)
    print('Classification Report')
    report = classification_report(test_batches.classes, np.argmax(predictions, axis=1)
                                , target_names=names, output_dict=True)
    print(report)
    df = pd.DataFrame(report).transpose()
    df.insert(0, 'Index', ['cats', 'dogs', 'accuracy', 'macro avg', 'weighted avg' ])
    df.to_csv('classification_report.csv', index=False)
    print('\n\n classification report saved to csv file')
Ejemplo n.º 19
0
def bench_k_means(estimator, init_method, train_data, test_data, test_labels):
    t0 = time()
    estimator.fit(train_data)
    k_labels = estimator.predict(test_data)
    print('init\t\ttime\tinertia\thomo\tcompl\tv-meas\tARI\tAMI\tsilhouette')
    print('%-9s\t%.2fs\t%i\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f' %
          (init_method, (time() - t0), estimator.inertia_,
           metrics.homogeneity_score(test_labels, k_labels),
           metrics.completeness_score(test_labels, k_labels),
           metrics.v_measure_score(test_labels, k_labels),
           metrics.adjusted_rand_score(test_labels, k_labels),
           metrics.adjusted_mutual_info_score(
               test_labels, k_labels, average_method='arithmetic'),
           metrics.silhouette_score(test_data,
                                    test_labels,
                                    metric='euclidean',
                                    sample_size=sample_size)))
    # initialize a array with same dimension of k_lables
    k_labels_matched = np.empty_like(k_labels)
    # For each cluster label, find and assign the best-matching truth label
    for k in np.unique(k_labels):
        match_nums = [
            np.sum((k_labels == k) & (test_labels == t))
            for t in np.unique(test_labels)
        ]
        k_labels_matched[k_labels == k] = np.unique(test_labels)[np.argmax(
            match_nums)]
    nerr_test = (k_labels_matched != test_labels).sum()
    print("recognition rate of test data = {:.1f}%".format(
        100 - 100 * float(nerr_test) / test_data.shape[0]))
    cm = confusion_matrix(test_labels, k_labels_matched)
    print("Confusion matrix:\n%s" % cm)
    plt.figure(figsize=(10, 10))
    class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    plot_confusion_matrix(
        cm,
        classes=class_names,
        normalize=True,
        title='Normalized confusion matrix of initialization method ' +
        init_method)
Ejemplo n.º 20
0
def show_confusion(generator):
    '''
    Plots confusion matrix for model predictions

    Parameters
    ----------
    generator: Keras ImageDataGenerator

    '''
    test_X = generator[0][0]
    test_y = generator.classes
    predicted_y = model.predict_classes(test_X)

    class_names = [
        'box elder beetle', 'spotted cucumber beetle', 'emerald ash borer',
        'Japanese beetle', 'ladybug', 'striped cucumber beetle'
    ]

    cnf_matrix = confusion_matrix(test_y, predicted_y)
    np.set_printoptions(precision=2)
    print(cnf_matrix)

    # Plot non-normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=class_names,
                          title='Confusion matrix, without normalization')

    plt.savefig('./result_images/' + ts + 'confusion_matrix.png')

    # Plot normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=class_names,
                          normalize=True,
                          title='Normalized confusion matrix')

    plt.savefig('./result_images/' + ts + 'normalized_confusion_matrix.png')
Ejemplo n.º 21
0
def show_confusion(generator):
    '''
    Plots confusion matrix for model predictions

    Parameters
    ----------
    generator: Keras ImageDataGenerator

    '''
    test_X = generator[0][0]
    test_y = generator.classes

    probs = model.predict(test_X)
    indices = probs.argsort(axis = 1)
    top_prediction = np.flip(indices, 1)[:, 0]
    top_prediction.reshape(1, -1)

    class_names = ['box elder beetle', 'cucumber beetle','emerald ash borer',
                'Japanese beetle', 'ladybug', 'striped cucumber beetle']

    # Compute confusion matrix
    cnf_matrix = confusion_matrix(test_y, top_prediction)
    np.set_printoptions(precision=2)

    print(cnf_matrix)
    #Plot non-normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=class_names,
                          title='Confusion matrix, without normalization')

    plt.savefig('./result_images/'+ts+'confusion_matrix.png')

    # Plot normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                          title='Normalized confusion matrix')

    plt.savefig('./result_images/'+ts+'normalized_confusion_matrix.png')
Ejemplo n.º 22
0
def plot_confusion_matrix_u(test_labels, pred_labels, normalize, type): {
    # Reference: https://scikit-learn.org/stable/auto_examples/model_selection
    #   /plot_confusion_matrix.html#sphx-glr-auto-examples-model-selection-plot-confusion-matrix-py
    
    plot_confusion_matrix(
        test_labels, 
        pred_labels, 
        classes=classes_dict, 
        # normalize=normalize,
        title=type
    )
}

def load(ds_path):
def evaluate(prediction, target):
    """
    This function evaluates the model after training on training set and testing on testing set.
    Plots of the confusion matrices are also shown.
    """
    print 'Evaluating Predictions...'
    # Compute confusion matrix
    cnf_matrix = confusion_matrix(target[0:len(prediction)], prediction)
    np.set_printoptions(precision=2)

    # Plot non-normalized confusion matrix
    plt.figure()
    class_names = ['Fatal', 'Serious', 'Slight']
    plot_confusion_matrix(cnf_matrix,
                          classes=class_names,
                          title='Confusion matrix, without normalization')

    # Plot normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=class_names,
                          normalize=True,
                          title='Normalized confusion matrix')
    plt.show()
Ejemplo n.º 24
0
def finalTest(netFE, netTask, test_data, test_label):
    text = ['left', 'right', 'rest']
    Loss = nn.CrossEntropyLoss()
    netFE.eval()
    netTask.eval()
    output = netTask(netFE(torch.FloatTensor(test_data).to(device)))
    pred = output.data.max(1)[1]
    #pred = voting(pred)
    loss = Loss(output, torch.tensor(test_label).to(device)).item()
    correct_num = sum(np.array(pred.cpu()) == test_label)  #.cpu()
    val_accuracy = correct_num / len(test_label)
    cmt = plot_confusion_matrix(test_label,
                                pred.cpu(),
                                np.array(text),
                                title='accuracy: {0}%'.format(val_accuracy *
                                                              100),
                                normalize=True,
                                show=False)

    return val_accuracy, loss, cmt
def evaluate_complete(model, iterator, criterion, device, epoch, network_type,
                      timestamps):

    # set model to evaluation mode
    model.eval()

    # total loss of the epoch
    epoch_loss = 0

    # threshold of two kinds of data
    threshold = torch.tensor(0.01).to(device)

    tp = 0.0
    tn = 0.0
    fp = 0.0
    fn = 0.0
    tp_time = []
    tn_time = []
    fp_time = []
    fn_time = []
    timestamps = timestamps[:, 0, 0]
    predicted_total = np.array([])
    target_total = np.array([])

    with torch.no_grad():
        for i, batch in enumerate(iterator):
            src = batch[0].to(device)
            src = torch.unsqueeze(src, dim=1)
            trg = batch[1].long().to(device)

            res = model(src.float()).view(
                -1)  # reshape from (128, 1) to (128) to match target shape
            print(res)

            # average loss of a batch
            loss = criterion(res.float(), trg.float())
            epoch_loss += loss.item()

            # take no mass as positive for convenience
            predicted = torch.ones(res.size()[0]).long().to(device)
            for j in range(res.size()[0]):
                if (torch.abs(res[j] - torch.tensor(1).to(device)) >=
                        threshold):
                    predicted[j] = 0

            tp += ((predicted == 1) & (trg == 1)).sum().item()
            tn += ((predicted == 0) & (trg == 0)).sum().item()
            fn += ((predicted == 0) & (trg == 1)).sum().item()
            fp += ((predicted == 1) & (trg == 0)).sum().item()
            predicted_total = np.concatenate(
                [predicted_total, predicted.cpu().numpy()])
            target_total = np.concatenate([target_total, trg.cpu().numpy()])

    precision = 100 * tp / (tp + fp)
    recall = 100 * tp / (tp + fn)
    accuracy = 100 * (tp + tn) / (tp + fp + tn + fn)

    predicted_total = predicted_total.astype("int32")
    target_total = target_total.astype("int32")

    for i in range(len(timestamps)):
        if (predicted_total[i] == target_total[i]):
            if (predicted_total[i] == 1):
                tp_time.append(timestamps[i])
            else:
                tn_time.append(timestamps[i])
        elif (predicted_total[i] > target_total[i]):
            fp_time.append(timestamps[i])
        else:
            fn_time.append(timestamps[i])

    # get result of the first and last epoch
    if (epoch == 0 or epoch == 59):
        plt.subplot(211)
        plt.title("targets")
        plt.plot(timestamps, target_total, 'bo')
        plt.subplot(212)
        plt.title("prediction")
        plt.plot(tp_time, np.ones(len(tp_time)), 'ro', label="tp")
        plt.plot(tn_time, np.zeros(len(tn_time)), 'yo', label="tn")
        plt.plot(fp_time, np.ones(len(fp_time)), 'go', label="fp")
        plt.plot(fn_time, np.zeros(len(fn_time)), 'co', label="fn")
        plt.legend()
        plt.savefig("Results/CNN_" + network_type + "_epoch" + str(epoch + 1) +
                    "_prediction_target.png")
        pcm.plot_confusion_matrix(target_total, predicted_total,
                                  np.array(["mass", "no mass"]))
        plt.savefig('Results/CNN_' + network_type + '_epoch' + str(epoch + 1) +
                    '_result.png')
        pcm.plot_confusion_matrix(target_total,
                                  predicted_total,
                                  np.array(["mass", "no mass"]),
                                  normalize=True)
        plt.savefig('Results/CNN_' + network_type + '_epoch' + str(epoch + 1) +
                    '_normalized_result.png')
        plt.clf()

    return epoch_loss / len(iterator), accuracy, precision, recall
Ejemplo n.º 26
0
          validation_set=0.2,
          shuffle=True)

# read X_test and Y_test
X_test = pd.read_csv('./Preprocessed Data/X_test.csv', header=0)
Y_test = pd.read_csv('./Preprocessed Data/y_test.csv', header=0)

# predict the results using our trained model
Y_pred = model.predict_label(X_test)
Y_pred = Y_pred[:, 0].tolist()

print(classification_report(Y_test, Y_pred))

cnf_matrix = confusion_matrix(Y_test, Y_pred)
np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
cnf_fig = plt.figure()
plot_confusion_matrix(cnf_matrix,
                      classes=class_names,
                      title='Confusion matrix for DNN')

# Plot normalized confusion matrix
normalized_fig = plt.figure()
plot_confusion_matrix(cnf_matrix,
                      classes=class_names,
                      normalize=True,
                      title='Normalized confusion matrix for DNN')
plt.show()
cnf_fig.savefig('./Plots/DNN_conf_matrix.png')
normalized_fig.savefig('./Plots/DNN_normalized_conf_matrix.png')
def evaluate(model, iterator_train, iterator, criterion, device, epoch,
             network_type):

    # set model to evaluation mode
    model.eval()

    # total loss of the epoch
    epoch_loss = 0

    # threshold of two kinds of data
    threshold = 50

    tp = 0.0
    tn = 0.0
    fp = 0.0
    fn = 0.0
    loss_total = np.empty((0, 10))
    predicted_total = np.empty(0)
    target_total = np.empty(0)

    # calculate baseline for test data regularization
    with torch.no_grad():
        for i, batch in enumerate(iterator_train):
            src = batch[0].to(device)  # data of shape (128, 10, 400)
            src = torch.unsqueeze(
                src, dim=1)  # add channel dimension (becomes(128, 1, 10, 400))
            trg = batch[1].to(device)  # label of shape (128, 10, 400)

            res = model(src.float()).view(
                -1, 10, 400)  # reshape result to match target shape
            src = torch.squeeze(src)

            # record property-wise loss on training data in one batch
            # the property wise loss of one training data is 400 loss value summed
            loss_train = torch.abs(src.float() - res).to(device)
            loss_train = torch.sum(loss_train, 2)
            loss_total = np.concatenate((loss_total, loss_train.cpu().numpy()))

        loss_total = loss_total.T
        # now loss_total is of shape (10, train size)
        # calculate property-wise loss mean and standard deviation matrix
        loss_mean = np.mean(loss_total, 1)
        loss_std = np.cov(loss_total)

        for i, batch in enumerate(iterator):
            src = batch[0].to(device)
            src = torch.unsqueeze(src, dim=1)
            trg = batch[1].long().to(device)

            res = model(src.float()).view(-1, 10,
                                          400)  # reshape to match target shape
            src = torch.squeeze(src)

            # property-wise loss on test data in one batch
            loss_test = torch.abs(src.float() - res).cpu().numpy()
            loss_test = np.sum(loss_test, 2)

            # normalize loss and predict
            # take no mass as positive for convenience
            loss_final = np.ndarray(res.size()[0])
            loss_test -= loss_mean
            for j in range(res.size()[0]):
                loss_final[j] = loss_test[j].dot(np.linalg.inv(loss_std)).dot(
                    loss_test[j].T)

            predicted = torch.ones(res.size()[0],
                                   dtype=torch.float).long().to(device)
            for k in range(res.size()[0]):
                if (np.abs(loss_final[k]) >= threshold):
                    predicted[k] = 0

            tp += ((predicted == 1) & (trg == 1)).sum().item()
            tn += ((predicted == 0) & (trg == 0)).sum().item()
            fn += ((predicted == 0) & (trg == 1)).sum().item()
            fp += ((predicted == 1) & (trg == 0)).sum().item()
            predicted_total = np.concatenate(
                [predicted_total, predicted.cpu().numpy()])
            target_total = np.concatenate([target_total, trg.cpu().numpy()])

    precision = 100 * tp / max((tp + fp), 1)
    recall = 100 * tp / max((tp + fn), 1)
    accuracy = 100 * (tp + tn) / (tp + fp + tn + fn)
    f1 = 2 * precision * recall / max((precision + recall), 1)

    predicted_total = predicted_total.astype("int32")
    target_total = target_total.astype("int32")

    # get result of the first and last epoch
    if (epoch == 0 or epoch == 59):
        pcm.plot_confusion_matrix(target_total, predicted_total,
                                  np.array(["mass", "no mass"]))
        plt.savefig('Results/CNN_' + network_type + '_epoch' + str(epoch + 1) +
                    '_result.png')
        pcm.plot_confusion_matrix(target_total,
                                  predicted_total,
                                  np.array(["mass", "no mass"]),
                                  normalize=True)
        plt.savefig('Results/CNN_' + network_type + '_epoch' + str(epoch + 1) +
                    '_normalized_result.png')
        plt.clf()

    return epoch_loss / len(iterator), accuracy, precision, recall, f1
Ejemplo n.º 28
0
    test_predict.append(pred_mets)  # pred_ctrl
    test_auc.append(feature[1])
#   check if the code delivers expected results; i.e., no deviation
#   print('Predicted {}'.format(number),': ', feature)

# print(test_predict)
# print(test_label)
########### DEFINE THE CONFUSION MATRIX ###########
# define the test label
cm = confusion_matrix(test_label, test_predict)
class_name = ['control', 'metastasis']  # sub-folders are ctrl and mets
n_classes = len(class_name)
print(n_classes)

plt.figure()
plot_confusion_matrix(cm, classes=class_name, title='Confusion Matrix')
#roc_curve(test_label, test_predict)
plt.show()

#skplt.metrics.plot_roc_curve(test_label[:],test_predict[:])
#plt.show()

# area = roc_auc_score(test_label,test_predict)
# print("Area Under the Curve: ", area)

# print('Predicted:', decode_predictions(preds))
# print: [[u'n02504458', u'African_elephant']]

fpr = dict()
tpr = dict()
roc_auc = dict()
def classificarion_report_by_class(classifier, X, y,
                                   cv = None,
                                   score = 'f1',
                                   classes_names = None,
                                   parameters = None,
                                   title = 'classifier',
                                   jobs = -1):
    """Dada um dataset, encontra os melhores parametros do classificador
    baseado na variacao dos parametros passado para a funcao utilizando
    o RandomizedSearchCV com todo o dataset. Em seguida o classificador
    e treinado utilizando validacao
    cruzada e testado. Para cada classe do probleam e apresentado a media
    da acuracia revocacao e mendida f de cada fold.
    Um grafico da matrix de confusao tambem e mostrado.
    
    Args:
        classifer: a classifier.
        X: features list.
        y: class list.
        cv: a cross validation class.
        class_names: a list containing the names of classes.
        
    Example:
        Best estimator:
        
        SVC(C=865.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
          gamma=0.0, kernel='linear', max_iter=-1, probability=False,
          random_state=None, shrinking=True, tol=0.001, verbose=False)
                                    precision     recal f-measure   support
        Report:
        acara-bandeira-marmorizado       0.16      0.16      0.15         8
                       acara-disco       0.19      0.26      0.22         8
                    barbus-sumatra       0.18      0.20      0.17         8
                       barlus-ouro       0.16      0.19      0.17         8
                       carpa-media       0.09      0.12      0.10         8
                           cascudo       0.09      0.08      0.08         8
                           dourado       0.24      0.30      0.25         8
                           kinguio       0.48      0.46      0.46         8
             kinguio-cometa-calico       0.13      0.13      0.13         8
                   kinguio-korraco       0.10      0.11      0.10         8
                       mato-grosso       0.21      0.21      0.18         8
                   molinesia-preta       0.15      0.16      0.15         8
                             oscar       0.17      0.19      0.17         8
                      oscar-albino       0.21      0.38      0.23         8
                              pacu       0.28      0.26      0.26         8
                       paulistinha       0.25      0.07      0.11         8
                  piau-tres-pintas       0.26      0.26      0.25         8
                     platy-laranja       0.78      0.66      0.68         8
                        telescopio       0.31      0.28      0.28         8
                       tetra-negro       0.14      0.10      0.11         8
                       tricogaster       0.17      0.14      0.15         8
                          tucunare       0.11      0.06      0.07         8
                                                                            
                       avg / total       0.22      0.22      0.20       8.0
                       
            Matrix de confusao
                                    Predicted class
                [[10  4  0  0  3  5  2  2  2  4  0  0  1  0  0  3  0  0  3  0  0  1]
            T   [ 0 10  1  0  3  1  0  0  2  4  1  0  2  1  4  1  2  0  3  0  4  1]
            r   [ 2  3  3  1  2  1  0  0  2  8  2  1  3  1  1  3  3  0  1  2  1  0]
            u   [ 3  1  2  8  3  0  1  2  1  0  5  3  1  2  2  0  1  2  0  2  1  0]
            e   [ 3  3  3  0  2  1  3  1  4  5  2  0  3  1  0  1  1  0  3  0  3  1]
                [ 5  3  3  0  1  3  3  2  1  1  0  1  3  3  1  1  7  0  0  0  1  1]
            c   [ 1  5  1  3  1  4  5  0  1  4  1  0  2  2  1  1  3  0  1  0  1  3]
            l   [ 7  1  1  1  1  1  0 14  1  0  1  2  1  0  0  1  0  5  0  2  0  1]
            a   [ 1  1  4  2  5  4  2  0  2  3  2  1  3  1  1  1  1  0  2  1  3  0]
            s   [ 1  6  5  1  4  2  1  1  4  4  1  3  1  1  0  0  0  0  2  2  1  0]
            s   [ 0  3  1  4  1  2  2  1  0  3  9  3  0  1  1  1  1  0  1  3  2  1]
                [ 2  2  2  3  4  1  0  1  1  3  6  3  4  1  0  1  1  0  1  3  1  0]
                [ 2  2  2  3  7  3  1  1  2  1  1  0  6  3  1  2  0  0  2  0  0  1]
                [ 1  5  2  2  0  0  0  0  1  2  1  2  1 10  2  1  3  0  1  2  1  3]
                [ 3  0  2  2  0  3  5  1  1  2  0  1  3  1  4  2  5  0  1  0  3  1]
                [ 6  4  0  6  1  3  8  1  0  0  1  2  0  0  0  0  3  1  2  1  1  0]
                [ 1  0  2  0  0  1  2  0  0  1  2  2  0  3  5  0  9  0  1  0  7  4]
                [ 0  0  0  3  0  0  1  5  0  1  1  1  0  1  0  1  0 25  0  1  0  0]
                [ 2  2  0  0  1  1  0  2  3  1  2  2  2  5  0  3  1  0 10  1  2  0]
                [ 3  0  1  5  1  0  0  4  3  1  4  2  2  0  0  3  0  4  2  3  1  1]
                [ 0  2  1  1  2  1  1  0  0  1  0  1  2  2  3  3  5  0  2  1  8  4]
                [ 1  2  2  1  0  1  3  0  0  2  2  2  1  5  2  1  8  0  1  0  4  2]]
            
            
    """
        
    #number of classes
    number_of_classes = len(classes_names)
    
    print "\nRunning RandomizedSearchCV...."
    grid = RandomizedSearchCV(classifier, parameters, cv = cv, scoring = score, n_jobs = jobs)
    grid.fit(X, y)
    print "Done!!"
    
    print ("\nBest estimator found:")
    print (grid.best_estimator_)
    
    #String utilized on report.
    last_line_heading = 'avg / total'
    
    #headers of report
    headers = ["precision", "recal", "f-measure", "support"]
    
    #Class name with bigger size
    width = max( len(cn) for cn in classes_names)
    #Compare if the bigger class name e bigger than last line
    width = max(width, len(last_line_heading))
    
    #Format coluns to display scores.
    fmt = '%% %ds' % width  # first column: class name
    fmt += '  '
    fmt += ' '.join(['% 9s' for _ in headers])
    fmt += '\n'
    headers = [""] + headers
    report = fmt % tuple(headers)
    report += '\n'
    
    #Scores averange for each class.
    avg_by_class = {}
    
    #Confusion matrix
    matrix_predicted = []
    matrix_true = []
    
    #Initialize each score averange with 0.
    for i in range(len(classes_names)):
        avg_by_class[i] = {'precision': 0,
                           'recall': 0,
                           'f-measure': 0,
                           'support': 0}
    
    print "\nRunning cross validation..."
    #Cross validation folders.
    for i, (train, test) in enumerate(cv):
        print "Running fold %d..." % i
        #Best estimator fond by grid.
        classifier = grid.best_estimator_
        
        classifier.fit(X[train], y[train])
        predicted = classifier.predict(X[test])
        
        matrix_predicted.extend(predicted)
        matrix_true.extend(y[test])
        
        #Scores fond.
        p, r, f1, s = precision_recall_fscore_support(y[test],
                                                      predicted,
                                                      average = None)
        
        #Fit class scores with each folds scores.
        for index, label in enumerate(classes_names):
            avg_by_class[index]['precision'] += p[index]
            avg_by_class[index]['recall'] += r[index]
            avg_by_class[index]['f-measure'] += f1[index]
            avg_by_class[index]['support'] += s[index]
    
    avg_precision = 0.0
    avg_recall = 0.0
    avg_fscore = 0.0
    avg_support = 0.0
    
    #Scores for each class from all folds.
    for i in range(len(classes_names)):
        #Class name
        values = [classes_names[i]]
        
        #Final class averange.
        p = avg_by_class[i]['precision'] / len(cv)
        r = avg_by_class[i]['recall'] / len(cv)
        f = avg_by_class[i]['f-measure'] / len(cv)
        s = avg_by_class[i]['support'] / len(cv)
        
        #Format string to print.
        for v in (p, r, f):
            values += ["{0:0.2f}".format(v)]
        values += ["{0}".format(s)]
        
        avg_precision += p
        avg_recall += r
        avg_fscore += f
        avg_support += s
        report += fmt % tuple(values)
    report += '\n'
    
    #Averange of classifier
    values = [last_line_heading]
    for v in (avg_precision/number_of_classes,
              avg_recall/number_of_classes,
              avg_fscore/number_of_classes):
        values += ["{0:0.2f}".format(v)]
    values += ['{0}'.format(avg_support/number_of_classes)]
    report += fmt % tuple(values)
    
    print "Report:"
    print report
    
    #Confusion matrix
    cm = confusion_matrix(matrix_true, matrix_predicted)
    
    #plot confusion matrix.
    pcm.plot_confusion_matrix(cm, title = title, class_labels = classes_names)
    
    return grid.best_estimator_
Ejemplo n.º 30
0
def main():
    version_dir = './v6/'  # needs trailing slash

    # validation split, both files with headers and the Happy column
    train_file = version_dir + 'trainData.csv'

    label_file = 'train_labels.csv'

    # train = pd.read_csv(train_file)[0:2000]
    # test = pd.read_csv(train_file)[2000:]
    #
    # y_train = pd.read_csv(label_file)[0:2000]['Label']
    # x_train = train.drop(['id'], axis=1)
    #
    # y_test = pd.read_csv(label_file)[2000:].Label
    # x_test = test.drop(['id'], axis=1)

    train = pd.read_csv(train_file)[1000:]
    test = pd.read_csv(train_file)[0:1000]

    y_train = pd.read_csv(label_file)[1000:]['Label']
    x_train = train.drop(['id'], axis=1)

    y_test = pd.read_csv(label_file)[0:1000].Label
    x_test = test.drop(['id'], axis=1)

    y_train_num = y_train
    x_train_num = x_train
    all_col_headers = list(train.columns.values)

    remove_cols = ['id']
    numeric_cols = [x for x in all_col_headers if x not in remove_cols]
    # remove_cols = ['label','instance weight','migration code-change in msa','migration code-change in reg','migration code-move within reg','migration prev res in sunbelt']

    cat_cols = [
        x for x in all_col_headers
        if x not in numeric_cols and x not in remove_cols
    ]
    used_cols = [x for x in all_col_headers if x not in remove_cols]

    # handle numerical features
    x_num_train = train[numeric_cols].as_matrix()
    x_num_test = test[numeric_cols].as_matrix()

    x_train_count = x_num_train.shape[0]
    x_test_count = x_num_test.shape[0]

    x_num_combined = np.concatenate((x_num_train, x_num_test),
                                    axis=0)  # 0 -row 1 - col

    # scale numeric features to <0,1>
    max_num = np.amax(x_num_combined, 0)

    x_num_combined = np.true_divide(
        x_num_combined,
        max_num)  # scale by max. truedivide needed for decimals
    x_train_num_scaled = x_num_combined[0:x_train_count]
    x_test_num_scaled = x_num_combined[x_train_count:]

    y_test_num = y_test.as_matrix()
    y_train_num = y_train.as_matrix()

    class_labels = ['Abbr', 'Human', 'Loc', 'Desc', 'Entity', 'Num']

    classifierType = "SVM"
    print "Classifier: " + classifierType

    if classifierType == "DT":
        classifier = DecisionTreeClassifier(min_samples_leaf=5)
        classifier.fit(x_train_num, y_train_num)
        # classifier.fit(np.concatenate((x_train_num, x_train_num), axis=0), np.concatenate((y_train_num, y_train_num), axis=0))
        tree.export_graphviz(classifier,
                             feature_names=used_cols,
                             out_file=version_dir + "weighted_tree.dot")

    elif classifierType == "Ada":
        # 200,0.01 - v1
        # 50, 0.1 - v2
        classifier = AdaBoostClassifier(n_estimators=50, learning_rate=1)
        classifier.fit(x_train_num, y_train_num)

    elif classifierType == "SVM":
        classifier = svm.SVC(probability=True, C=1, kernel='linear')
        classifier.fit(x_train_num, y_train_num)
        # classifier.fit(np.concatenate((x_train_num, x_train_num), axis=0), np.concatenate((y_train_num, y_train_num), axis=0))

    elif classifierType == "RF":
        classifier = RandomForestClassifier(n_estimators=200, n_jobs=-1)
        classifier.fit(x_train_num, y_train_num)

    elif classifierType == "NB":
        classifier = GaussianNB()
        classifier.fit(x_train_num, y_train_num)

    elif classifierType == "KNN":
        classifier = neighbors.KNeighborsClassifier(n_neighbors=1)
        classifier.fit(x_train_num, y_train_num)

    predicted_test = classifier.predict(x_test)
    predicted_train = classifier.predict(x_train)

    print "\nMetrics classification report - Test"
    print(metrics.classification_report(y_test_num, predicted_test))
    print "Confusion Matrix report - Test"
    test_confusion_matrix = metrics.confusion_matrix(y_test_num,
                                                     predicted_test)
    print test_confusion_matrix
    print "Test Correct predictions: ", np.trace(test_confusion_matrix)

    plot_confusion_matrix(test_confusion_matrix, class_labels)

    print ""

    print "\nMetrics classification report - Train"
    print metrics.classification_report(y_train_num, predicted_train)
    print "Confusion Matrix report - Train"
    train_confusion_matrix = metrics.confusion_matrix(y_train_num,
                                                      predicted_train)
    print train_confusion_matrix
    print "Train correct predictions: ", np.trace(train_confusion_matrix)

    outputHeaderRow = list(all_col_headers)
    outputHeaderRow.insert(0, "Predicted")
    outputHeaderRow.insert(0, "Actual")
    trainOutputData = np.column_stack(
        (y_train_num, predicted_train, train.as_matrix()))
    testOutputData = np.column_stack(
        (y_test_num, predicted_test, test.as_matrix()))

    trainOp_wthHeader = np.vstack((outputHeaderRow, trainOutputData))
    testOp_wthHeader = np.vstack((outputHeaderRow, testOutputData))

    np.savetxt(fname=version_dir + 'train_preds.csv',
               X=trainOp_wthHeader,
               delimiter=',',
               fmt="%s")
    np.savetxt(version_dir + 'test_preds.csv',
               testOp_wthHeader,
               delimiter=',',
               fmt="%s")

    print "\nFeature Importances"
    featImps = classifier.feature_importances_
    featNames = np.array(all_col_headers[1:]).flatten()
    featVals = np.column_stack((featImps, featNames))
    featVals = featVals[featVals[:, 0].argsort()[::-1]]

    print featVals
Ejemplo n.º 31
0
preds = pd.DataFrame([
    list([r.sort_values(ascending=False)[:5].index.values])
    for i, r in probs.iterrows()
])

print "map@5:", mapk([[l] for l in exp_data_test_labels], preds[0], 5)
from sklearn.metrics import accuracy_score
print "accuracy is", accuracy_score(exp_data_test_labels, pred)

# Compute confusion matrix
class_names = exp_data_train_labels.unique()
cnf_matrix = confusion_matrix(exp_data_test_labels, pred)
np.set_printoptions(precision=2)

plt.figure()
plot_confusion_matrix(cnf_matrix,
                      classes=class_names,
                      title='KNN - Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix,
                      classes=class_names,
                      normalize=True,
                      title='KNN - Normalized confusion matrix')

plt.show()
"""
map@5: 0.245661767929
accuracy is 0.149976231175
"""
Ejemplo n.º 32
0
def train_C(params):

    # -------------------
    #  Parameters
    # -------------------

    log(str(params), name=params['log_name'])

    # # Clear remaining model
    # network.clear(params['name']+'_R'+str(params['start_run']))

    # -------------------
    #  CUDA
    # -------------------

    cuda = True if torch.cuda.is_available() else False
    C_Loss = torch.nn.BCELoss()

    if cuda:
        C_Loss.cuda()
        floatTensor = torch.cuda.FloatTensor
        log("CUDA Training.", name=params['log_name'])
    else:
        floatTensor = torch.FloatTensor
        log("CPU Training.", name=params['log_name'])

    # -------------------
    #  Data scaling
    # -------------------
    '''
    XTL ... Training data labelled
    XTU ... Training data unlabelled
    
    XL  ... Labelled data
    XU  ... Unlabelled data
    XV  ... Validation data
    '''

    dset_L = params['dset_L']
    dset_V = params['dset_V']

    XTL, YTL = pp.get_data(params, dset_L)
    XV, YV = pp.get_data(params, dset_V)

    XTL = pp.scale_minmax(XTL)
    XV = pp.scale_minmax(XV)

    if params['ratio_V'] < 1.0:
        XV, YV = pp.select_random(XV, YV, params['ratio_L'])
        log("Selected %s of validation samples." %
            (format(params['ratio_V'], '0.2f')),
            name=params['log_name'])
    XV, YV = pp.get_tensor(XV, YV)

    # -------------------
    #  Load accuracy
    # -------------------

    mat_accuracy_C = network.load_R_Acc(params)

    # -------------------
    #  Start Training
    # -------------------

    YF = None
    PF = None

    for run in range(params['runs']):

        # -------------------
        #  Training Data
        # -------------------

        XL, YL = XTL, YTL

        if params['ratio_L'] < 1.0:
            XL, YL = pp.select_random(XL, YL, params['ratio_L'])
            log("Selected %s of labelled samples." %
                (format(params['ratio_L'], '0.2f')),
                name=params['log_name'])

        count_L = YL.shape[0]
        log("Number of labelled samples = %d." % (count_L),
            name=params['log_name'])

        dataloader = pp.get_dataloader(params, XL, YL)

        C = network.load_Ref(run, params)

        # -------------------
        #  Optimizers
        # -------------------

        optimizer_C = torch.optim.Adam(C.parameters(),
                                       lr=params['CLR'],
                                       betas=(params['CB1'], params['CB2']))

        # -------------------
        #  Training
        # -------------------

        if run >= params['start_run']:

            if params['oversampling']:
                XL, YL = pp.over_sampling(params, XL, YL)
                log("Oversampling: created %d new labelled samples." %
                    (XL.shape[0] - count_L),
                    name=params['log_name'])

            for epoch in range(params['epochs']):

                # Jump to start epoch
                if run == params['start_run']:
                    if epoch < params['start_epoch']:
                        continue

                running_loss_C = 0.0

                for i, data in enumerate(dataloader, 1):

                    loss_C = []

                    # -------------------
                    #  Train the classifier on real samples
                    # -------------------
                    X1, Y1 = data
                    optimizer_C.zero_grad()
                    P1 = C(X1)
                    loss = C_Loss(P1, Y1)
                    loss_C.append(loss)
                    loss.backward()
                    optimizer_C.step()

                    # -------------------
                    #  Calculate overall loss
                    # -------------------
                    running_loss_C += np.mean([loss.item() for loss in loss_C])

                # -------------------
                #  Post Epoch
                # -------------------

                logString = "[Run %d/%d] [Epoch %d/%d] [C loss: %f]" % (
                    run + 1, params['runs'], epoch + 1, params['epochs'],
                    running_loss_C / (i))
                log(logString, save=False, name=params['log_name'])

                if (epoch + 1) % params['save_step'] == 0:
                    # log("~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~|",save=False,name=params['log_name'])
                    idx = run, int(epoch / params['save_step']) + 1

                    # Predict labels
                    PV = C(XV)

                    acc_C_real = get_accuracy(PV, YV)
                    mat_accuracy_C[idx] = acc_C_real

                    logString = "[Run %d/%d] [Epoch %d/%d] [C acc: %f ]" % (
                        run + 1, params['runs'], epoch + 1, params['epochs'],
                        acc_C_real)
                    log(logString, save=True, name=params['log_name'])

                    network.save_Ref(params['name'], run, C)
                    network.save_R_Acc(params, mat_accuracy_C)

                    params['start_epoch'] = epoch + 1
                    network.save_Parameter(params)

                    # log("~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~|",save=False,name=params['log_name'])

            # End of Training Run
            params['start_run'] = run + 1
            params['start_epoch'] = 0
            network.save_Parameter(params)

        # -------------------
        #  Post Run
        # -------------------

        # Classify Validation data
        PC = C(XV).detach()

        if YF == None:
            YF = YV
            PF = PC
        else:
            YF = torch.cat((YF, YV), 0)
            PF = torch.cat((PF, PC), 0)

    # -------------------
    #  Post Training
    # -------------------

    timeline = np.arange(0, params['epochs'] + 1, params['save_step'])

    # -------------------
    #  Plot Accuracy
    # -------------------

    acc_C = np.mean(mat_accuracy_C, axis=0)

    fig, ax = plt.subplots()

    legend = []
    cmap = plt.get_cmap('gnuplot')
    indices = np.linspace(0, cmap.N, 7)
    colors = [cmap(int(i)) for i in indices]

    ax.plot(timeline, acc_C, c=colors[0], linestyle='solid')
    legend.append("Accuracy $A_C$")

    ax.set_xlim(0.0, params['epochs'])
    ax.set_ylim(0.0, 1.0)

    ax.legend(legend)
    ax.set_xlabel('Epoch')
    ax.set_ylabel('Accuracy')

    ax.grid()
    save_fig(params, 'eval', fig)

    # -------------------
    #  Generate Confusion Matrix
    # -------------------

    YF = pp.one_hot_to_labels(params, YF)
    PF = pp.one_hot_to_labels(params, PF)

    con_mat = confusion_matrix(YF,
                               PF,
                               labels=None,
                               sample_weight=None,
                               normalize='true')
    plot_confusion_matrix(con_mat, params, name='C', title='Confusion matrix')

    # -------------------
    #  Log Results
    # -------------------

    log(" - " + params['name'] + ": [C acc: %f]" % (acc_C[-1]), name='results')