Ejemplo n.º 1
0
def print_matrix(predictions):
    class_names = np.array([str(i) for i in range(1+np.max(
        np.concatenate([clusters_true2, predictions])))])
    plot_confusion_matrix(
        clusters_true2, predictions, classes=class_names,
        normalize=True, title='Normalized confusion matrix',
        path=DATASET_PATH + DATASET_NAME, should_save=True)
Ejemplo n.º 2
0
    def evaluate(self, X, y):
        """
        Evaluates the trained crf model.
        
        :param X:   test data: [[(word, pos), (word, pos)], [...]]
        :param y:   test labels
        :return:    evaluation result
        """
        y_pred = self.crf.predict(X)

        sent_level_acc = self.evaluate_sentence(y, y_pred)

        labels = flatten(y)
        y_pred = flatten(y_pred)

        print("F1 score:")
        print(
            sklearn.metrics.precision_recall_fscore_support(labels,
                                                            y_pred,
                                                            average='micro'))
        print()
        print("Accuracy:")
        print(sklearn.metrics.accuracy_score(labels, y_pred))
        print()
        print("Sentence level accuracy:")
        print(sent_level_acc)
        print()
        print("F1 score per class:")
        print(sklearn.metrics.precision_recall_fscore_support(labels, y_pred))
        print()
        print("Confusion matrix:")
        cfm = sklearn.metrics.confusion_matrix(labels, y_pred)

        plot_confusion_matrix(cfm, np.unique(labels))
Ejemplo n.º 3
0
def get_results(clf,
                data,
                fit_time,
                feature_set,
                clf_name='',
                save_confusion=False):
    results = {}

    t0 = time.time()
    predicted = np.array([])
    for i in range(0, len(data['test']['X']), 128):  # go in chunks of size 128
        predicted_single = clf.predict(data['test']['X'][i:(i + 128)])
        predicted = np.append(predicted, predicted_single)
    t1 = time.time()
    cm = metrics.confusion_matrix(data['test']['y'], predicted)
    results['testing_time'] = t1 - t0
    results['accuracy'] = metrics.accuracy_score(data['test']['y'], predicted)

    print("classifier: %s" % clf_name)
    print("training time: %0.4fs" % fit_time)
    print("testing time: %0.4fs" % results['testing_time'])
    print("accuracy: %0.4f" % results['accuracy'])
    print("confusion matrix:\n%s" % cm)
    if save_confusion:
        path = './confusion_plots/%s_%s' % (clf_name, feature_set)
        title = '%s (accuracy: %0.2f)' % (clf_name, results['accuracy'])
        u.plot_confusion_matrix(cm, path, title=title)
    return results
def performance_summary(y_test,
                        y_predicted,
                        output,
                        y_mapping=None,
                        y_labels=None):
    scores = {}
    scores['Accuracy'] = accuracy_score(y_test, y_predicted)
    scores['Precision'] = precision_score(y_test, y_predicted, average='macro')
    scores['Recall'] = recall_score(y_test, y_predicted, average='macro')
    scores['F1'] = f1_score(y_test, y_predicted, average='macro')
    print(scores)

    print(' - Detailed classification report:')
    if y_mapping is not None:
        y_test = list(map(y_mapping, y_test))
        y_predicted = list(map(y_mapping, y_predicted))
    detailed_report = classification_report(y_test, y_predicted)

    print(detailed_report, end='\n')
    with open(os.path.join(output, 'classification_report.txt'), 'w') as out:
        out.writelines(detailed_report)

    print(' - Saving confusion matrix')
    utils.plot_confusion_matrix(y_test,
                                y_predicted,
                                labels=y_labels,
                                output=os.path.join(output,
                                                    'confusion_matrix.pdf'))
Ejemplo n.º 5
0
    def eval_fn(self, loader, device, train=False, confusion_m = False, criterion = None):
        """
        Evaluation method
        :param loader: data loader for either training or testing set
        :param device: torch device
        :param train: boolean to indicate if training or test set is used
        :return: accuracy on the data
        """
        objs = AvgrageMeter()
        score = AvgrageMeter()
        self.eval()

        t = tqdm(loader)
        with torch.no_grad():
            for images, labels in t:
                images = images.to(device)
                labels = labels.to(device)

                outputs = self(images)
                acc, _ = accuracy(outputs, labels, topk=(1, 5))
                score.update(acc.item(), images.size(0))

                if(criterion):
                  loss = criterion(outputs, labels)
                  objs.update(loss.data, images.size(0))

                if(confusion_m):
                  # Plot confusion matrix
                  plot_confusion_matrix(labels.cpu(), outputs.topk(1, 1, True, True)[1].cpu(), normalize = True, title='Confusion matrix')

                t.set_description('(=> Test) Score: {:.4f}'.format(score.avg))

        return score.avg, objs.avg
Ejemplo n.º 6
0
    def evaluate(self, X_val, y_val):
        # evaluate_vgg16 the model with validation set
        model = load_model(self.model_file)
        scores = model.evaluate(X_val, y_val)
        print('val_loss: {}, val_acc: {}'.format(scores[0], scores[1]))

        y_true, y_pred = get_predictions_and_labels(model, X_val, y_val)
        cm = confusion_matrix(y_true, y_pred)
        cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        df = pd.DataFrame(cm_percent,
                          index=self.EMOTIONS,
                          columns=self.EMOTIONS)
        df.index.name = 'Actual'
        df.columns.name = 'Predicted'
        df.to_csv(self.base_dir + self.model_dir + 'cm_val.csv',
                  float_format='%.4f')

        # plot percentage confusion matrix
        fig1, ax1 = plt.subplots()
        plot_confusion_matrix(cm_percent, class_names=self.EMOTIONS)
        plt.savefig(self.base_dir + self.model_dir + 'cm_percent_val.png',
                    format='png')
        # plot normal confusion matrix
        fig2, ax2 = plt.subplots()
        plot_confusion_matrix(cm,
                              float_display='.0f',
                              class_names=self.EMOTIONS)
        plt.savefig(self.base_dir + self.model_dir + 'cm_val.png',
                    format='png')

        plt.show()
Ejemplo n.º 7
0
    def do_viterbi(self):
        parameters = {}
        parameters["dataset"] = "A" if self.a_radio.isChecked() else "B"
        if self.split_radio.isChecked():
            parameters["test_days"] = self.days_spin.value()

        if self.sampling_radio.isChecked():
            parameters["n_samples"] = self.samples_spin.value()

        sample, predicted, accuracy = smarthouse(**parameters)

        plot_classification_report(sample, predicted)
        plt.figure(2)
        plot_confusion_matrix(
            sample,
            predicted,
            list(map(str, range(max(sample) + 1))),
            normalize=True,
        )

        sample_text, predicted_text = self.format_sequences(sample, predicted)

        self.accuracy_value_label.setText(f"{accuracy*100:.3f}")
        self.sample_textbrowser.setText(sample_text)
        self.predicted_textbrowser.setText(predicted_text)

        plt.show()
Ejemplo n.º 8
0
    def compare_model(self, X_val, y_val):
        folder_list = [
            model_dir for model_dir in os.listdir(self.base_dir)
            if 'LSTM' in model_dir
        ]
        for folder in folder_list:
            filename = 'LSTM.h5'
            path = os.path.join(self.base_dir, folder, filename)
            model = load_model(path)
            scores = model.evaluate(X_val, y_val)
            print('model: {}, val_loss: {}, val_acc: {}'.format(
                folder, scores[0], scores[1]))

            y_true, y_pred = get_predictions_and_labels(model, X_val, y_val)
            cm = confusion_matrix(y_true, y_pred)
            cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

            # plot percentage confusion matrix
            fig1, ax1 = plt.subplots()
            plot_confusion_matrix(cm_percent, class_names=self.EMOTIONS)
            plt.savefig(os.path.join(self.base_dir, folder,
                                     'cm_percent_test.png'),
                        format='png')
            # plot normal confusion matrix
            fig2, ax2 = plt.subplots()
            plot_confusion_matrix(cm,
                                  float_display='.0f',
                                  class_names=self.EMOTIONS)
            plt.savefig(os.path.join(self.base_dir, folder, 'cm_test.png'),
                        format='png')
Ejemplo n.º 9
0
def main_pipeline():
    pp = pprint.PrettyPrinter(indent=4)

    pipe = Pipeline([
        ('scale', StandardScaler()),
        # ('classify', LinearSVC())
        ('classify', LogisticRegression(C=0.01, penalty='l1'))
    ])

    data = load_data()
    X, y = get_X_y(data)

    X_undersample, y_undersample = generate_undersample_rus(X, y)

    pipe.fit(X_undersample, y_undersample)

    # predict on source data
    y_pred = pipe.predict(X)

    # Compute confusion matrix
    cnf_matrix = confusion_matrix(y, y_pred)
    np.set_printoptions(precision=2)

    print("Recall metric in the testing dataset: ",
          cnf_matrix[1, 1] / (cnf_matrix[1, 0] + cnf_matrix[1, 1]))

    # Plot non-normalized confusion matrix
    class_names = [0, 1]
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=class_names,
                          title='Confusion matrix')
    plt.show()
    print('plt.show')
Ejemplo n.º 10
0
def model(clf, Xtrain, Xtest, ytrain, ytest, modelname):
    """
    :param model: estimator
    """
    clf = clf
    clf.fit(Xtrain, ytrain)
    pred = clf.predict(Xtest)
    cm = confusion_matrix(ytest, pred)
    print('############################')
    print("the recall for", modelname, ' is:',
          cm[1, 1] / (cm[1, 1] + cm[1, 0]))
    print('############################')
    fig, ax = plt.subplots()
    u.plot_confusion_matrix(cm,
                            classes=np.unique(ytrain),
                            ax=ax,
                            title='resampled' + modelname)
    plt.show()
    #fig = plt.figure(figsize=(6,3))
    print("TP: ", cm[1, 1, ],
          "exceptional events transaction predicted exception")  #
    print("TN: ", cm[0, 0], "normal events predicted normal")
    print("FP: ", cm[0, 1], "normal events predicted exception")
    print("FN: ", cm[1, 0], "exceptional events predicted normal")
    # sns.heatmap(cm, cmap="coolwarm_r", annot=True, linewidths=0.5)
    # plt.title("Confusion_matrix")
    # plt.xlabel("Predicted_class")
    # plt.ylabel("Real class")
    # plt.show()
    print(
        "\n----------Classification Report------------------------------------"
    )
    print(classification_report(ytest, pred))
Ejemplo n.º 11
0
def visualize(m_test, x_test, y_test, model, variant=None):
    # viz accuracy
    print('predicting test set...')
    y_pred = model.predict([m_test, x_test], batch_size=48)
    conf_mat = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))
    name = 'RNN'
    if variant is not None:
        name += '_' + variant
    plot_confusion_matrix(conf_mat, RARITIES, name)
Ejemplo n.º 12
0
def predict(**kwargs):
    truth, predict, accuracy = smarthouse(**kwargs)
    print(sklearn.metrics.classification_report(truth, predict))
    conf_mat = sklearn.metrics.confusion_matrix(truth, predict)

    plot_confusion_matrix(truth,
                          predict,
                          list(map(str, range(max(truth)))),
                          normalize=True)
Ejemplo n.º 13
0
def val(netG_A2B, netG_B2A, netD_A, netD_B, netGaze):
    global best_accuracy
    netG_B2A.eval()
    netGaze.eval()
    pred_all = np.array([], dtype='int64')
    target_all = np.array([], dtype='int64')

    for idx, (data, target) in enumerate(val_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data[:, :args.nc, :, :]), Variable(target)

        # do the forward pass
        data = gaze2gan(data, val_loader.dataset.mean[0:args.nc],
                        val_loader.dataset.std[0:args.nc])
        fake_data = netG_B2A(data)
        fake_data = gan2gaze(fake_data, val_loader.dataset.mean[0:args.nc],
                             val_loader.dataset.std[0:args.nc])
        scores = netGaze(fake_data.repeat(1, int(3 / args.nc), 1, 1))[0]
        scores = scores.view(-1, args.num_classes)
        pred = scores.data.max(1)[
            1]  # got the indices of the maximum, match them
        print('Done with image {} out of {}...'.format(
            min(args.batch_size * (idx + 1), len(val_loader.dataset)),
            len(val_loader.dataset)))
        pred_all = np.append(pred_all, pred.cpu().numpy())
        target_all = np.append(target_all, target.cpu().numpy())

    val_accuracy, _ = plot_confusion_matrix(target_all, pred_all,
                                            merged_activity_classes)
    print("\n------------------------")
    print("Validation accuracy = {:.2f}%\n------------------------".format(
        val_accuracy))
    with open(os.path.join(args.output_dir, "logs.txt"), "a") as f:
        f.write("\n------------------------\n")
        f.write(
            "Validation accuracy = {:.2f}%\n------------------------\n".format(
                val_accuracy))

    # now save the model if it has better accuracy than the best model seen so forward
    if val_accuracy > best_accuracy:
        # save the model
        torch.save(netG_A2B.state_dict(),
                   os.path.join(args.output_dir, 'netG_A2B.pth'))
        torch.save(netG_B2A.state_dict(),
                   os.path.join(args.output_dir, 'netG_B2A.pth'))
        torch.save(netD_A.state_dict(),
                   os.path.join(args.output_dir, 'netD_A.pth'))
        torch.save(netD_B.state_dict(),
                   os.path.join(args.output_dir, 'netD_B.pth'))
        torch.save(netGaze.state_dict(),
                   os.path.join(args.output_dir, 'netGaze.pth'))
        best_accuracy, _ = plot_confusion_matrix(target_all, pred_all,
                                                 merged_activity_classes,
                                                 args.output_dir)

    return val_accuracy
Ejemplo n.º 14
0
def evaluate_model (config,model,X_test,Y_test,savedir,combination):
    features = X_test.shape[2]
    targets = Y_test.shape[2]
    
    major_classes = ['idle', 'stop', 'go', 'clear']
    
    minor_classes = ["idle",
                     "stop_both-static", "stop_both-dynamic", "stop_left-static",
                     "stop_left-dynamic", "stop_right-static", "stop_right-dynamic",
                     "clear_left-static", "clear_right-static",
                     "go_both-static", "go_both-dynamic", "go_left-static",
                     "go_left-dynamic", "go_right-static", "go_right-dynamic"]
    
    predictions = model.predict(X_test)

    
    if int (config ['training-mode']['subclasses']) == 1:
        classes_tcg = minor_classes
        predictions_bin = utils.binarize_predictions(X_test.reshape(-1, features),
                                                     Y_test.reshape(-1, targets),
                                                     predictions.reshape(-1, targets),
                                                     subclasses=True)
    else:
        classes_tcg = major_classes

        predictions_bin = utils.binarize_predictions(X_test.reshape(-1, features),
                                                     Y_test.reshape(-1, targets),
                                                     predictions.reshape(-1, targets))

    cnf_matrix = confusion_matrix(predictions_bin[1], predictions_bin[2], labels=classes_tcg)
    
    np.set_printoptions(precision=2)  
    plt.figure(figsize=(30, 12))
    plt.subplot(121)
    utils.plot_confusion_matrix(cnf_matrix, classes=classes_tcg, title='Confusion matrix, without normalization')
    
    plt.close('all')
    plt.subplot(122)
    utils.plot_confusion_matrix(cnf_matrix, classes=classes_tcg, normalize=True,
                                title='Confusion matrix with normalization')

    plt.suptitle("confusion matrix")
    plt.subplots_adjust(top=0.88)

    plt.savefig(os.path.join(savedir, 'cm' + '_' + combination))
    
    unpadded_seq = utils.delete_pading(X_test.reshape(-1, X_test.shape[2]),
                                       Y_test.reshape(-1, targets),
                                       predictions.reshape(-1, targets))
    
    utils.plot_roc_multiclass(unpadded_seq[1], unpadded_seq[2],
                              targets, classes_tcg)
        
    plt.savefig(os.path.join(savedir, 'roc'+'_'+combination))
    
    plt.close('all')
Ejemplo n.º 15
0
    def _train_party_classifier(self, force: bool = False):
        """
        Trains classifier learning to predict political party from moral relevance weight vectors.
        :param force: Trains and overwrites classifier even if already available.
        :return:
        """

        pp_model_path = "data/party_predictor.pkl"
        pp_predictor = None

        # Build model predicting moral values for word.
        if force or not os.path.isfile(pp_model_path):
            df = self._users_df.sample(frac=1)
            df.mv_scores = df.mv_scores.values / df.num_words.values
            df.loc[df.party == "Libertarians", "party"] = "Republican Party"
            class_names = ["Republican Party", "Democratic Party"]

            x = np.asarray([np.asarray(x) for x in df.mv_scores.values])
            le = preprocessing.LabelEncoder()
            le.fit(class_names)
            y = le.transform(df.party.values)

            for train_index, test_index in StratifiedShuffleSplit(
                    n_splits=1, test_size=0.5).split(x, y):
                x_train, x_test = x[train_index], x[test_index]
                y_train, y_test = y[train_index], y[test_index]

                pp_predictor = xgb.XGBClassifier(objective='binary:logistic',
                                                 colsample_bytree=0.7,
                                                 learning_rate=0.05,
                                                 n_estimators=6000,
                                                 n_jobs=0,
                                                 nthread=0)
                pp_predictor.fit(x_train, y_train)
                pickle.dump(pp_predictor, open(pp_model_path, "wb"))

                y_pred = pp_predictor.predict(x_test)
                print(
                    classification_report(y_test,
                                          y_pred,
                                          target_names=class_names))
                utils.plot_precision_recall_curve(y_test, y_pred)
                utils.plot_roc_curve(y_test, y_pred, 2)
                utils.plot_confusion_matrix(
                    y_test,
                    y_pred, ["Republican Party", "Democratic Party"],
                    title="Confusion Matrix")
                # scores = cross_val_score(pp_predictor, x, y, cv=20, scoring='f1_macro')
                # print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

        # Load built model.
        else:
            pp_predictor = pickle.load(open(
                pp_model_path, "rb"))  # pd.read_pickle(path=pp_model_path)

        return pp_predictor
Ejemplo n.º 16
0
 def make_confusion_matrix(self):
     y_true = pd.read_csv(
         osp.join(self.ROOT, "input", self.raw_dirname,
                  "train.csv"))["label"].values
     y_pred = pd.read_csv(osp.join(self.val_preds_path,
                                   "oof_preds.csv"))["pred"].values
     cmx = confusion_matrix(y_true, y_pred)
     plot_confusion_matrix(cm=cmx,
                           classes=self.classes,
                           save_path=self.WORK_DIR)
def evaluate_by_frame_state_level(predictions_idx, true_idx, stateList,
                                  model_name):
    acc = accuracy_score(predictions_idx, true_idx)
    print('Frame-by-frame at the state level: ', acc * 100, '%')

    # plot confusion matrix
    cm = confusion_matrix(true_idx, predictions_idx)
    plot_confusion_matrix(
        cm, len(stateList), model_name,
        'Confusion matrix for frame-by-frame at state level')
    return acc
def test_vgg_dataset():
    global dataset_config

    dataset = dataset_config['ucm']

    img_path = tf.placeholder(tf.string)
    img_content = tf.read_file(img_path)
    img = tf.image.decode_image(img_content, channels=3)

    # img = tf.image.resize_image_with_crop_or_pad(img, config.IMG_W, config.IMG_H)
    img2 = tf.image.resize_nearest_neighbor([img],
                                            [config.IMG_H, config.IMG_W])
    # with tf.Session() as sess:
    #     mm2 = sess.run(img2,feed_dict={img_path:'hd_0613.jpg'})[0]
    #     print(mm2.shape)
    #     plt.imshow(mm2)
    #
    #     plt.show()
    x = tf.placeholder(tf.float32, shape=[1, config.IMG_W, config.IMG_H, 3])
    y_ = tf.placeholder(tf.int16, shape=[1, config.N_CLASSES])

    logits = VGG.VGG16N(x, config.N_CLASSES, False)

    predict = tf.argmax(logits, 1)
    # true_label = tf.argmax(label_batch, 1)
    # loss = tools.loss(logits, y_)
    # accuracy = tools.accuracy(logits, y_)
    saver = tf.train.Saver()
    ckpt = tf.train.get_checkpoint_state(dataset['checkpoint_path'])
    matrix_confusion = np.zeros((dataset['n_class'], dataset['n_class']))
    if ckpt and ckpt.model_checkpoint_path:
        global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        print('step: ', global_step)
        i = 0
        with tf.Session() as sess:
            i = 0
            saver.restore(sess, ckpt.model_checkpoint_path)
            val_data_path = os.path.join(dataset['data_path'], 'validation')
            for val_class_name in os.listdir(val_data_path):
                class_path = os.path.join(val_data_path, val_class_name)
                class_index = dataset['class2label'][val_class_name]
                for val_img_name in os.listdir(class_path):
                    val_img_path = os.path.join(class_path, val_img_name)
                    img_content = sess.run(img2,
                                           feed_dict={img_path: val_img_path})
                    pre = sess.run(predict, feed_dict={x: img_content})
                    print(class_index, pre)
                    matrix_confusion[class_index][pre] += 1

        utils.plot_confusion_matrix(matrix_confusion,
                                    normalize=False,
                                    target_names=config.ucm_class,
                                    title="Confusion Matrix")
        np.savetxt('ucm_vgg_confusion_matrix', matrix_confusion)
Ejemplo n.º 19
0
 def confusion_matrix(self, normalize=False):
     """ Plots a confusion matrix of the model
     """
     predictions = self.model.predict_generator(self.test_batches)
     predictions = np.argmax(predictions, axis=1)
     ground_truth = self.test_batches.classes
     classes = [*self.test_batches.class_indices]
     utils.plot_confusion_matrix(ground_truth,
                                 predictions,
                                 classes,
                                 normalize=normalize)
Ejemplo n.º 20
0
def evaluate(X_test, y_test):
    # evaluate the model with validation set
    model = load_model('../MLP/mlp.h5')
    scores = model.evaluate(X_test, y_test)
    print('val_loss: {}, val_acc: {}'.format(scores[0], scores[1]))
    y_pred = model.predict_classes(X_test)
    cm = confusion_matrix(y_test, y_pred)
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    # plot percentage confusion matrix
    plot_confusion_matrix(cm_percent, class_names=['Cat', 'Dog'])
    plt.savefig('../MLP/cm_percent_val.png', format='png')
    plt.show()
Ejemplo n.º 21
0
def knn_classify(X, y, neighbors=1, test_size=0.3, plot_conf_matrix=True):
    X_train, X_test, y_train, y_test = (
            train_test_split(X, y, random_state=0, test_size=test_size))

    knn_classifier = KNeighborsClassifier(n_neighbors=neighbors,
            algorithm='kd_tree')
    knn_classifier.fit(X_train, y_train)
    y_pred = knn_classifier.predict(X_test)

    if plot_conf_matrix:
        title = "KNN Classification with N={0}".format(neighbors)
        plot_confusion_matrix(y_test, y_pred, title)
Ejemplo n.º 22
0
def evaluate(model, o_idx, output_dict=True, plot_matrix=False):
    with torch.no_grad():
        y_true = []
        y_pred = []
        for i in range(len(X_test)):
            numer_Y = [tag_to_ix[y] for y in y_test[i]]
            score, tag_seq = model(X_test[i][0], X_test[i][1])
            y_true.extend(numer_Y)
            y_pred.extend(tag_seq)

        y_true = np.array(y_true)
        y_pred = np.array(y_pred)

        exclude_o_idx = np.where(y_true != o_idx)
        y_pred_without_o = y_pred[exclude_o_idx]
        y_without_o = y_true[exclude_o_idx]

        y_pred_without_o_class = [id_to_tag[y] for y in y_pred_without_o]
        y_without_o_class = [id_to_tag[y] for y in y_without_o]

        #print(type(y_without_o),type(y_pred))
        #print(labels)

        perf = classification_report(y_without_o_class,
                                     y_pred_without_o_class,
                                     output_dict=output_dict,
                                     labels=labels)

        if plot_matrix:
            #print(__doc__)
            np.set_printoptions(precision=2)
            #print(len(X_test[0][0]),len(y_true),len(y_pred))
            f = open(error_path, 'w')
            acc = 0
            for i in range(len(X_test)):
                for j in range(len(X_test[i][0])):
                    if y_true[acc] != y_pred[acc]:
                        f.write(id2word[X_test[i][0][j][4].item()] + " " +
                                id_to_tag[y_true[acc]] + " " +
                                id_to_tag[y_pred[acc]] + "\n")
                    acc += 1
            f.close()

            utils.plot_confusion_matrix(y_true, y_pred,
                                        np.array(not_removed_label))
            #print(666,y_without_o,y_pred)
            utils.plot_confusion_matrix(y_true,
                                        y_pred,
                                        np.array(not_removed_label),
                                        normalize=True)
            plt.show()

    return perf
Ejemplo n.º 23
0
def evaluate(model,
             o_idx,
             X_test,
             y_test,
             output_dict=True,
             plot_matrix=False):
    with torch.no_grad():
        y_true = []
        y_pred = []
        for i, text in enumerate(X_test):
            numer_Y = [tag_to_ix[y] for y in y_test[i]]
            raw_output = model(text)
            _, pred_Y = torch.max(raw_output, 1)
            y_true.extend(numer_Y)
            y_pred.extend(pred_Y)

        y_true = np.array(y_true)
        y_pred = np.array(y_pred)

        exclude_o_idx = np.where(y_true != o_idx)
        y_pred_without_o = y_pred[exclude_o_idx]
        y_without_o = y_true[exclude_o_idx]

        y_pred_without_o_class = [id_to_tag[y] for y in y_pred_without_o]
        y_without_o_class = [id_to_tag[y] for y in y_without_o]

        perf = classification_report(y_without_o_class,
                                     y_pred_without_o_class,
                                     output_dict=output_dict,
                                     labels=labels)

        if plot_matrix:
            np.set_printoptions(precision=2)
            f = open(error_path, 'w')
            acc = 0
            for idx, text in enumerate(X_test):
                for word in text:
                    if y_true[acc] != y_pred[acc]:
                        f.write(word + " " + id_to_tag[y_true[acc]] + " " +
                                id_to_tag[y_pred[acc]] + "\n")
                    acc += 1
            f.close()

            utils.plot_confusion_matrix(y_true, y_pred,
                                        np.array(not_removed_label))
            utils.plot_confusion_matrix(y_true,
                                        y_pred,
                                        np.array(not_removed_label),
                                        normalize=True)
            plt.show()

    return perf
Ejemplo n.º 24
0
    def plot_confusion_matrix_figure(self, dirname, predict, targets, mods):
        """[绘制预测结果的混淆矩阵]

        Args:
            dirname ([str]): [存储图像的文件夹]
            predict ([二维array,(length, probality)]]): [网络的得到预测值]]
            targets ([一维array 或 二维array(onehot)]): [对应的真实标签]
            mods ([一维array]): 真实类别,str
        """
        cm = util.generate_confusion_matrix(predict, targets, mods)
        util.ensure_dir(dirname)
        util.plot_confusion_matrix(cm, dirname, mods)
        print("Figure 'Confusion Matrix' generated successfully")
Ejemplo n.º 25
0
def plot_report(fig_name, plot_cm=False):
    """
    plot the comparison result using different ML methods
    :param fig_name: saved figure name
    :param plot_cm: whether to plot confusion matrix result
    :return: figure
    """
    dir = "log/peps mini"
    pattern = r'(internal|access|lock)\\\d{1,2}.csv$'
    pattern_valid = r'(3|6|9|12).csv$'
    utils.construct_set(dir, pattern, pattern_valid)
    X_train, X_valid, y_train, y_valid = utils.load_train_valid()
    methods = ["Logistic", "LDA", "QDA", "KNN", "SVM", "RF", "GBM", "MLP"]
    params = [
        None, None, None, {
            "n_neighbors": 10
        }, {
            "C": 0.25,
            "gamma": 0.5
        }, {
            "max_features": 2,
            "n_estimators": 100
        }, {
            "n_estimators": 400,
            "max_depth": 3
        }, {
            "hidden_layer_sizes": (16, 8)
        }
    ]
    df_report = pd.DataFrame()
    for method, param in zip(methods, params):
        cm, report_temp, classes = utils.train(X_train,
                                               X_valid,
                                               y_train,
                                               y_valid,
                                               method=method,
                                               param=param)
        df_report = df_report.append(report_temp, ignore_index=True)
        if plot_cm:
            plt.figure()
            utils.plot_confusion_matrix(cm, classes, normalize=True)
            plt.title(method)
            if not os.path.exists(dir_fig + '/methods/'):
                os.makedirs(dir_fig + '/methods/')
            plt.savefig(dir_fig + '/methods/' + method + '.png')
    df_report.set_index('method', inplace=True)
    df_report.plot(kind='bar', rot=0, figsize=(16, 6), ylim=(0.6, 1))
    plt.title(fig_name)
    if not os.path.exists(dir_fig):
        os.makedirs(dir_fig)
    plt.savefig(dir_fig + '/' + fig_name + '.png')
def test_on_fer_test_set(fer_path, model_type="CustomVGG"):
    start_time = time()
    fer = pd.read_csv(fer_path)
    if "attribution" not in fer:
        raise Exception(
            "Fer not split between train/val/test. Please run split_fer script."
        )
    fer_test = fer[fer["attribution"] == "test"].reset_index()

    model = load_model(model_type=model_type)

    print("Loaded fer test set and model in {}s".format(
        round(time() - start_time, 2)))
    start_time = time()

    def preprocess_batch(pixelstring_batch, emotions_batch, DEVICE):
        if model_type == "CustomVGG":
            return preprocess_batch_custom_vgg(pixelstring_batch,
                                               emotions_batch, DEVICE, False,
                                               config["loss_mode"])
        elif model_type == "DenseSIFTHybrid":
            return preprocess_batch_dense_sift_hybrid(pixelstring_batch,
                                                      emotions_batch, DEVICE,
                                                      False,
                                                      config["loss_mode"])
        elif model_type == "SIFTHybrid":
            return preprocess_batch_sift_hybrid(pixelstring_batch,
                                                emotions_batch, DEVICE, False,
                                                config["loss_mode"])

    use_descriptors = (model_type == "DenseSIFTHybrid"
                       or model_type == "SIFTHybrid")
    dummy_weights = torch.FloatTensor([1] * len(config["catslist"])).to(
        DEVICE)  # we don't care about the test loss value here.
    proba, _, acc, cm1, cm2, acc_fact = evaluate(
        model,
        fer_test,
        preprocess_batch,
        dummy_weights,
        DEVICE,
        compute_cm=True,
        use_descriptors=use_descriptors)

    print("FINAL ACCURACY: {}".format(acc))
    print("Average predicted proba for right class: {}".format(proba))
    print("Duration on {} test faces: {}s".format(
        len(fer_test), round(time() - start_time, 2)))
    print("Accuracy with grouped classes : {}".format(acc_fact))
    print("Close the confusion matrices to end the script.")
    plot_confusion_matrix(cm1, config["catslist"])
    plot_confusion_matrix(cm2, ["bad", "good", "surprise", "neutral"])
Ejemplo n.º 27
0
def evaluate(validation_generator):
    # evaluate the model with validation set
    y_true = np.array([0] * len(os.listdir('../data/validation/cats/')) + [1] * len(os.listdir('../data/validation/dogs/')))
    model = load_model('../CNN/cnn.h5')
    print(model.summary())
    scores = model.evaluate_generator(validation_generator)
    print('val_loss: {}, val_acc: {}'.format(scores[0], scores[1]))
    y_pred = get_predictions(model, validation_generator)
    cm = confusion_matrix(y_true, y_pred)
    cm_percent = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    # plot percentage confusion matrix
    plot_confusion_matrix(cm_percent, class_names=['Cat', 'Dog'])
    plt.savefig('../MLP/cm_percent_val.png', format='png')
    plt.show()
Ejemplo n.º 28
0
def test_utils(figs=False):
    label_names = ['a', 'b', 'c', 'd', 'e', 'f', 'g']
    predicted = [0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6]
    target    = [0, 0, 2, 3, 4, 5, 6, 0, 1, 1, 3, 4, 5, 6, 0, 1, 2, 2, 4, 5, 6]

    if figs:
        utils.plot_confusion_matrix(
            predicted, target, label_names
        )
    predicted, target = utils.label2name(
        predicted, target, label_names
    )
    print(predicted)
    print(target)
Ejemplo n.º 29
0
def test_model(model,
               labels,
               data_name='sk_eigenjoint_nor_528',
               valid_segment_idx=650):
    data = data_dir + '/' + data_name
    file_paths = glob(data + "/*.npy")
    losses = []
    ground_ys = []
    pred_ys = []
    for index, file_path in enumerate(file_paths):
        if index >= valid_segment_idx:
            print 'Predict ' + file_path
            valid_x = np.load(file_path)
            valid_x = np.reshape(valid_x,
                                 newshape=(1, valid_x.shape[0],
                                           valid_x.shape[1]))
            valid_y = labels[index]
            #                valid_y = valid_y[:-1]
            pred_y = model.predict_on_batch(valid_x)
            file_name = str(index - valid_segment_idx)
            plot_fig(pred_y, valid_y, file_name, save_flag=True)

            pred_y = np.argmax(pred_y, axis=2)
            pred_y = pred_y.ravel()
            ground_ys.append(valid_y)
            pred_ys.append(pred_y)
            #            pred_y = clear_pred(pred_y)

            loss = eval_jaccard(valid_y, pred_y)
            losses.append(loss)
    ground_ys = np.concatenate(ground_ys)
    pred_ys = np.concatenate(pred_ys)

    print(classification_report(ground_ys, pred_ys))

    cnf_matrix = confusion_matrix(ground_ys, pred_ys)
    np.set_printoptions(precision=2)
    cnf_matrix = cnf_matrix.astype('float') / cnf_matrix.sum(
        axis=1)[:, np.newaxis]
    plt.figure()
    plot_confusion_matrix(cnf_matrix,
                          classes=range(21),
                          normalize=True,
                          title='Normalized confusion matrix')

    plt.show()
    #    print cnf_matrix

    return losses, cnf_matrix
Ejemplo n.º 30
0
def eval(logdir):
    # Load graph
    model = Net1()

    # dataflow
    df = Net1DataFlow(hp.Test1.data_path, hp.Test1.batch_size)

    ckpt = tf.train.latest_checkpoint(logdir)

    pred_conf = PredictConfig(model=model,
                              input_names=get_eval_input_names(),
                              output_names=get_eval_output_names())
    if ckpt:
        pred_conf.session_init = SaverRestore(ckpt)
    predictor = OfflinePredictor(pred_conf)

    x_mfccs, y_ppgs = next(df().get_data())
    y_ppg_1d, pred_ppg_1d, summ_loss, summ_acc = predictor(x_mfccs, y_ppgs)

    # plot confusion matrix
    _, idx2phn = load_vocab()
    y_ppg_1d = [idx2phn[i] for i in y_ppg_1d]
    pred_ppg_1d = [idx2phn[i] for i in pred_ppg_1d]
    summ_cm = plot_confusion_matrix(y_ppg_1d, pred_ppg_1d, phns)

    writer = tf.summary.FileWriter(logdir)
    writer.add_summary(summ_loss)
    writer.add_summary(summ_acc)
    writer.add_summary(summ_cm)
    writer.close()
Ejemplo n.º 31
0
def test(model, X_test, y_test, enc):

    y_test_pred = model.predict(X_test)
    y_test_pred = np.argmax(y_test_pred, axis=1)

    y_test_pred_labels = enc.inverse_transform(y_test_pred)

    cm = metrics.confusion_matrix(y_true=y_test,
                                  y_pred=y_test_pred_labels,
                                  labels=enc.classes_)
    plt.figure()
    utils.plot_confusion_matrix(cm, enc.classes_, normalize=False)
    plt.show()

    test_acc = metrics.accuracy_score(enc.transform(y_test), y_test_pred)
    print("Accuracy: {}".format(test_acc))
Ejemplo n.º 32
0
def main(arguments):
    # load the features of the dataset
    features = datasets.load_breast_cancer().data

    # standardize the features
    features = StandardScaler().fit_transform(features)

    # get the number of features
    num_features = features.shape[1]

    # load the corresponding labels for the features
    labels = datasets.load_breast_cancer().target

    # transform the labels to {-1, +1}
    labels[labels == 0] = -1

    # split the dataset to 70/30 partition: 70% train, 30% test
    train_features, test_features, train_labels, test_labels = train_test_split(features, labels,
                                                                                test_size=0.3, stratify=labels)

    train_size = train_features.shape[0]
    test_size = test_features.shape[0]

    # slice the dataset as per the batch size
    train_features = train_features[:train_size - (train_size % BATCH_SIZE)]
    train_labels = train_labels[:train_size - (train_size % BATCH_SIZE)]
    test_features = test_features[:test_size - (test_size % BATCH_SIZE)]
    test_labels = test_labels[:test_size - (test_size % BATCH_SIZE)]

    # instantiate the SVM class
    model = SVM(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, svm_c=arguments.svm_c, num_classes=NUM_CLASSES,
                num_features=num_features)

    # train the instantiated model
    model.train(epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels],
                train_size=train_features.shape[0], validation_data=[test_features, test_labels],
                validation_size=test_features.shape[0], result_path=arguments.result_path)

    test_conf, test_accuracy = utils.plot_confusion_matrix(phase='testing', path=arguments.result_path,
                                                           class_names=['benign', 'malignant'])

    print('True negatives : {}'.format(test_conf[0][0]))
    print('False negatives : {}'.format(test_conf[1][0]))
    print('True positives : {}'.format(test_conf[1][1]))
    print('False positives : {}'.format(test_conf[0][1]))
    print('Testing accuracy : {}'.format(test_accuracy))
	num_classes = len(y.unique())

	# Guardamos las predicciones y clases reales de todos los fold en una lista
	y_pred_total = []
	y_test_total = []

	k_fold = cross_validation.StratifiedKFold(y, n_folds = 10, indices = True)

	for train_indices, test_indices in k_fold:
		X_train = X.iloc[train_indices]
		y_train = y.iloc[train_indices]

		X_test = X.iloc[test_indices]
		y_test = y.iloc[test_indices]

		clf = tree.DecisionTreeClassifier( criterion = 'entropy')

		# Ajusto el modelo y predigo 
		clf = clf.fit( X_train, y_train )
		y_pred = clf.predict( X_test )

		y_pred_total += y_pred.tolist()
		y_test_total += y_test.tolist()

	precision = precision_score(y_test_total, y_pred_total, average = None)
	recall = recall_score(y_test_total, y_pred_total, average = None)
	f_score = f1_score(y_test_total, y_pred_total, average = None)

	plot_confusion_matrix(y_test_total, y_pred_total, 'Decision Tree Classifier', normed=True)
                    recalls[label][median], label='%s vs rest' % genre_list[label])
            plot_roc(roc_scores[label][median], desc, tprs[label][median],
                     fprs[label][median], label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores),
               np.mean(all_pr_scores), np.std(all_pr_scores))
    print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)


def create_model():
    from sklearn.linear_model.logistic import LogisticRegression
    clf = LogisticRegression()

    return clf


if __name__ == "__main__":
    X, y = read_fft(genre_list)

    train_avg, test_avg, cms = train_model(
        create_model, X, y, "Log Reg FFT", plot=True)

    cm_avg = np.mean(cms, axis=0)
    cm_norm = cm_avg / np.sum(cm_avg, axis=0)

    plot_confusion_matrix(cm_norm, genre_list, "fft",
                          "Confusion matrix of an FFT based classifier")
name = 'model%d.png' % np.random.randint(10e8)
path = '/tmp/%s' % name
# This command only save the image of your model
plot(model, to_file=path, show_shapes=True, show_layer_names=True)
# Load and review the image (increase both dimension of figsize
# if you see too small images)
plt.figure(figsize=(8, 120))
plt.imshow(mpimg.imread(path))
plt.axis('off')
plt.show()

# Callback
checkpointer = ModelCheckpoint(filepath='seq_example_best_weights.hdf5', verbose=1, save_best_only=True)
history = model.fit(X_train, y_train, batch_size=BATCH_SIZE,
                    nb_epoch=3, verbose=1,
                    shuffle=True, validation_split=0.2,
                    callbacks=[checkpointer])

y = model.predict(X_test, batch_size=BATCH_SIZE, verbose=1)
# the prediction is just probability values, we select the highest probability
# values
y = np.argmax(y, axis=-1)
# convert one-hot encoded y_test to label also
y_test = np.argmax(y_test, axis=-1)
print('Test accuracy:', accuracy_score(y_test, y))
print('Classification report:', classification_report(y_test, y))
plt.figure()
plot_confusion_matrix(confusion_matrix(y_test, y),
                      labels=range(0, 4))
plt.show()
Ejemplo n.º 36
0
 def do_classify(self, name):
     train_avg, test_avg, cms = self._classify_obj.train_model(name, plot=True)
     cm_avg = np.mean(cms, axis=0)
     cm_norm = cm_avg / np.sum(cm_avg, axis=0)
     name, desc = self._classify_obj.get_way_name()
     plot_confusion_matrix(cm_norm, AbstractSoundClassifyBase.genre_list, name, desc)
            desc = "%s %s" % (name, genre_list[label])
            plot_roc(roc_scores[label][median], desc, tprs[label][median],
                     fprs[label][median], label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores),
               np.mean(all_pr_scores), np.std(all_pr_scores))
    print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)


def create_model():
    from sklearn.linear_model.logistic import LogisticRegression
    clf = LogisticRegression()

    return clf


if __name__ == "__main__":
    X, y = read_ceps(genre_list)

    train_avg, test_avg, cms = train_model(
        create_model, X, y, "Log Reg CEPS", plot=True)

    cm_avg = np.mean(cms, axis=0)
    cm_norm = cm_avg / np.sum(cm_avg, axis=0)

    plot_confusion_matrix(cm_norm, genre_list, "ceps",
                          "Confusion matrix of a CEPS based classifier")
Ejemplo n.º 38
0
            plot_roc_curves(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores))
    #print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    #save the trained model to disk
    joblib.dump(clf, 'saved_model/SVMFFT.pkl')
    
    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)


if __name__ == "__main__":
    start = timeit.default_timer()
    print
    print " Starting classification \n"
    print " Classification running ... \n" 
    X, y = read_fft(genre_list)
    print X,y
    train_avg, test_avg, cms = train_model(X, y, "fft", plot=True)
    cm_avg = np.mean(cms, axis=0)
    cm_norm = cm_avg / np.sum(cm_avg, axis=0)
    print " Classification finished \n"
    stop = timeit.default_timer()
    print " Total time taken (s) = ", (stop - start)
    print "\n Plotting confusion matrix ... \n"
    plot_confusion_matrix(cm_norm, genre_list, "fft","SVM FFT classifier - Confusion matrix")
    print " All Done\n"
    print " See plots in 'graphs' directory \n"
    
Ejemplo n.º 39
0
            plot_roc_curves(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores))
    #print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    #save the trained model to disk
    joblib.dump(clf, 'saved_model/model_ceps.pkl')
    
    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)


if __name__ == "__main__":
    start = timeit.default_timer()
    print
    print " Starting classification \n"
    print " Classification running ... \n" 
    X, y = read_fft(genre_list)
    print X,y
    train_avg, test_avg, cms = train_model(X, y, "ceps", plot=True)
    cm_avg = np.mean(cms, axis=0)
    cm_norm = cm_avg / np.sum(cm_avg, axis=0)
    print " Classification finished \n"
    stop = timeit.default_timer()
    print " Total time taken (s) = ", (stop - start)
    print "\n Plotting confusion matrix ... \n"
    plot_confusion_matrix(cm_norm, genre_list, "ceps","CEPS classifier - Confusion matrix")
    print " All Done\n"
    print " See plots in 'graphs' directory \n"