def train_and_save_model(model='xvector',
                         binary_class=False,
                         single_class='glass'):
    model = define_xvector()
    model.compile(loss='categorical_crossentropy',
                  optimizer=SGD(lr=0.001),
                  metrics=['acc',
                           km.precision(label=1),
                           km.recall(label=0)])
    model.summary()
    callback_list = [
        ModelCheckpoint(
            'checkpoint-{epoch:02d}.h5',
            monitor='loss',
            verbose=1,
            save_best_only=True,
            period=2
        ),  # do the check point each epoch, and save the best model
        ReduceLROnPlateau(
            monitor='loss', patience=3, verbose=1, min_lr=1e-6
        ),  # reducing the learning rate if the val_loss is not improving
        CSVLogger(filename='training_log.csv'),  # logger to csv
        EarlyStopping(
            monitor='loss',
            patience=5)  # early stop if there's no improvment of the loss
    ]
    tr_data, tr_label, ts_data, ts_label = train_test_split()
    encoder = LabelBinarizer()
    tr_label = encoder.fit_transform(tr_label)
    ts_label = encoder.transform(ts_label)
    print(
        "Start Training process \nTraining data shape {} \nTraining label shape {}"
        .format(tr_data.shape, tr_label.shape))
    model.fit(tr_data,
              tr_label,
              batch_size=16,
              epochs=100,
              verbose=1,
              validation_split=0.2)
    model.save('5class_segmentYoutube_model.h5')
    pred = model.predict(ts_data)
    pred = encoder.inverse_transform(pred)
    ts_label = encoder.inverse_transform(ts_label)
    cm = confusion_matrix(y_target=ts_label, y_predicted=pred, binary=False)
    cm = confusion_matrix(y_target=ts_label, y_predicted=pred, binary=False)
    plt.figure(figsize=(10, 10))
    fig, ax = plot_confusion_matrix(conf_mat=cm)
    ax.set_xticklabels([''] + CLASS_TYPE, rotation=40, ha='right')
    ax.set_yticklabels([''] + CLASS_TYPE)
    plt.savefig("ConfusionMatrix_segment_youtube.png")
    plt.show()
Beispiel #2
0
def make_plots(train, test, pipelines):
    extensions = ['svg', 'eps', 'png']
    X_train, y_train = load_X_y(train)
    X_test, y_test = load_X_y(test)
    pipelines.sort()
    clf = pipelines._results[0]
    y_pred = clf.predict(X_test)
    classifiers_with_predict_proba = find_classifiers_with_predict_proba()    
    plt.clf()
    if clf.classifier.__class__.__name__ in classifiers_with_predict_proba:
        y_probas = clf.predict_proba(X_test)[:,1]
        fpr, tpr, _ = metrics.roc_curve(y_test, y_probas)
        plt.plot([0, 1], [0, 1], 'k--')
        plt.plot(fpr, tpr)
        plt.xlabel('False positive rate')
        plt.ylabel('True positive rate')   
        fig = plt.gcf()
        fig.set_size_inches(4,3)
        plt.tight_layout()
        for ext in extensions:
            plt.savefig("./figures/roc_curve."+ext)        
    else:
        print(clf.classifier.__class__.__name__,"not in predict proba list")    
    cm = confusion_matrix(y_target=y_test, 
                          y_predicted=y_pred, 
                          binary=True)
    plot_confusion_matrix(conf_mat=cm, colorbar=True)
    fig = plt.gcf()
    fig.set_size_inches(4,3)
    plt.tight_layout()
    for ext in extensions:
        plt.savefig("./figures/confusion_matrix."+ext)
    '''plot_learning_curves(X_train, y_train, X_test, y_test, 
Beispiel #3
0
def init(X_train, y_train, X_test, y_test, index=0):

    # pca = PCA(n_components=2, whiten=True)
    # pca = pca.fit(X_train)

    # print('Explained variance percentage = %0.2f' % sum(pca.explained_variance_ratio_))
    # X_train = pca.transform(X_train)
    # X_test = pca.transform(X_test)

    from mlxtend.evaluate import confusion_matrix
    from mlxtend.plotting import plot_confusion_matrix

    oc_svm_clf = svm.OneClassSVM(nu=0.9, gamma=0.0001,
                                 kernel='linear')  # Obtained using grid search
    oc_svm_clf.fit(X_train, y_train)
    oc_svm_preds = oc_svm_clf.predict(X_test)

    cm = confusion_matrix(y_target=y_test,
                          y_predicted=oc_svm_preds,
                          binary=True)
    fig, ax = plot_confusion_matrix(conf_mat=cm)

    print(cm)

    # plt.savefig("confusion_matrix.pdf", format='pdf')
    plt.savefig("confusion_matrix" + str(index) + ".png", format='png')
 def saveConfusionMatrix(self, y_test, y_pred):
     cm = confusion_matrix(y_target=y_test, y_predicted=y_pred, binary=False)
     fig, ax = plot_confusion_matrix(conf_mat=cm)
     ax.set_title('RandomForest Confusion Matrix')
     plt.savefig('images/' + self.name + '_Confusion_Matrix.png')
     plt.show()
     plt.close()
def test_binary():
    y_targ = [1, 1, 1, 0, 0, 2, 0, 3]
    y_pred = [1, 0, 1, 0, 0, 2, 1, 3]
    x = np.array([[4, 1],
                  [1, 2]])
    y = confusion_matrix(y_targ, y_pred, binary=True, positive_label=1)
    assert_array_equal(x, y)
def test_binary():
    y_targ = [1, 1, 1, 0, 0, 2, 0, 3]
    y_pred = [1, 0, 1, 0, 0, 2, 1, 3]
    x = np.array([[4, 1],
                  [1, 2]])
    y = confusion_matrix(y_targ, y_pred, binary=True, positive_label=1)
    assert_array_equal(x, y)
Beispiel #7
0
def multi_class_confision(imageResults):
    y_target = []
    y_predicted = []
    for result in imageResults:
        y_target.append(result[1][0])
        y_predicted.append(result[2][0])
    cm = confusion_matrix(y_target=y_target,
                          y_predicted=y_predicted,
                          binary=False)

    table = []
    for i in range(10):
        table.append([categoryDict[i]] + cm[i].tolist())

    result_file.write("Confusion Matrix:\n\n")
    result_file.write(tabulate(table, headers=['', categoryDict[0], categoryDict[1], categoryDict[2], categoryDict[3], categoryDict[4], categoryDict[5], categoryDict[6], categoryDict[7], categoryDict[8], categoryDict[9]], tablefmt='orgtbl'))
    result_file.write("\n")

    recall = np.diag(cm) / np.sum(cm, axis=1)
    precision = np.diag(cm) / np.sum(cm, axis=0)
    # overall recall and precision
    overall_recall = np.mean(recall)
    overall_precision = np.mean(precision)
    accuracy = np.diag(cm) / 10
    accuracy = np.mean(accuracy)
    return recall, precision, overall_recall, overall_precision, accuracy
def test(model, device, test_loader, criterion, epoch):
    # Test the model
    model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
    loss_list = []
    all_predicted = []
    all_labels = []
    classes = (
    '10_down', '04_fist_moved', '01_palm', '05_thumb', '02_l', '09_c', '08_palm_moved', '07_ok', '03_fist', '06_index')
    with torch.no_grad():
        correct = 0
        total = 0
        for i, (images, labels) in enumerate(test_loader):
            images = images.to(device)
            labels = labels.to(device)
            for item in labels.cpu().numpy():
                all_labels.append(item)
            labels = labels.long()
            labels = labels.view(-1, len(labels))[0]
            outputs = model(images)
            outputs = outputs.float()
            loss = criterion(outputs, labels)
            loss_list.append(loss.item())
            if i % 10 == 0:
                print('Validation Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, i * len(images), len(test_loader.dataset),
                           100. * i / len(test_loader), loss.item()))
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            for item in predicted.cpu().numpy():
                all_predicted.append(item)

        print('Test Accuracy of the model on the 3000 test images: {} %'.format(100 * correct / total))
        accuracy = 100 * correct / total

        all_labels_array = np.array(all_labels).reshape(-1, )
        all_predicted_array = np.array(all_predicted).reshape(-1, )

        my_dict = dict(list(enumerate(classes)))

        # print(all_labels_array)
        # print(all_predicted_array)
        # print("My dict=", my_dict)
        all_labels_vect = np.vectorize(my_dict.get)(all_labels_array)
        # print(all_labels_vect)
        all_predicted_vect = np.vectorize(my_dict.get)(all_predicted_array)

        # Create CM From Data
        cm1 = ConfusionMatrix(predict_vector=all_predicted_vect, actual_vector=all_labels_vect)

        # Create CM From Data
        #cm1 = ConfusionMatrix(actual_vector=all_labels_array, predict_vector=all_predicted_array)
        cm = confusion_matrix(y_target=all_labels_array, y_predicted=all_predicted_array, binary=False)
        # print(cm.F1)
        # print(cm1)

        # print(type(cm.F1))
        # print(type(cm1))
    return accuracy, loss_list, cm, cm1
    def __str__(self):
        truth, prediction = self._fix_label_prediction_representation()
        distinct_values = {*truth.reshape((-1, ))}

        cmx = confusion_matrix(truth,
                               prediction,
                               binary=len(distinct_values) <= 2)
        return f"{cmx}"
Beispiel #10
0
def dump():
    hate_speech = pd.read_csv(
        './twitter-hate-speech-classifier-DFE-a845520.csv',
        encoding='iso-8859-1')
    print('There are', len(hate_speech), 'data points.')
    hate_speech_subset = hate_speech.iloc[:, [19, 5, 6]]
    hate_speech_subset.columns = ['Tweets', 'Verdict', 'Confidence']

    le = preprocessing.LabelEncoder()
    le.fit(list(hate_speech_subset.Verdict.unique()))
    hate_speech_subset['Numeric_Verdict'] = le.transform(
        list(hate_speech_subset.Verdict.values))
    hate_speech_subset['Tweets'] = hate_speech_subset['Tweets'].map(
        lambda x: processTweet(x))

    text = hate_speech_subset['Tweets'].values
    vectorizer = CountVectorizer(ngram_range=(1, 2))
    vectorizer.fit(text)

    X = vectorizer.transform(text)
    y = hate_speech_subset['Numeric_Verdict'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    cm = confusion_matrix(
        y_train,
        SVC(kernel='linear', probability=True).fit(X_train,
                                                   y_train).predict(X_train))
    fig, ax = plot_confusion_matrix(conf_mat=cm)
    plt.show()

    #0 : The tweet contains hate speech
    #1 : The tweet is not offensive
    #2 : The tweet uses offensive language but not hate speech

    print(X.shape)
    """
    param_grid = {"max_depth": [3, None],
                "n_estimators": [10, 50, 100],
                "max_features": [1, 3, 10],
                "min_samples_split": [2, 3, 10],
                "min_samples_leaf": [1, 3, 10],
                "bootstrap": [True, False],
                "criterion": ["gini", "entropy"]}

    grid_rf = GridSearchCV(RandomForestClassifier(),
                            param_grid=param_grid,
                            cv=10,
                            scoring='accuracy')
    grid_rf.fit(X_train, y_train)
    grid_rf.score(X_train, y_train)
    """
    clf_rfc = RandomForestClassifier()
    clf_rfc.fit(X_train, y_train)
    score = clf_rfc.score(X_test, y_test)
    print(score)

    pickle.dump(clf_rfc, open('pkl_objects/classifier.pkl', 'wb'), protocol=4)
    convert('pkl_objects/classifier.pkl')
def test_multiclass():
    y_targ = [1, 1, 1, 0, 0, 2, 0, 3]
    y_pred = [1, 0, 1, 0, 0, 2, 1, 3]
    x = np.array([[2, 1, 0, 0],
                  [1, 2, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]])
    y = confusion_matrix(y_targ, y_pred, binary=False, positive_label=1)
    assert_array_equal(x, y)
def test_multiclass():
    y_targ = [1, 1, 1, 0, 0, 2, 0, 3]
    y_pred = [1, 0, 1, 0, 0, 2, 1, 3]
    x = np.array([[2, 1, 0, 0],
                  [1, 2, 0, 0],
                  [0, 0, 1, 0],
                  [0, 0, 0, 1]])
    y = confusion_matrix(y_targ, y_pred, binary=False, positive_label=1)
    assert_array_equal(x, y)
    def plot_confusion_matrix(self, figsize=(6, 6)) -> Tuple[Figure, Axis]:
        from mlxtend.plotting import plot_confusion_matrix

        truth, prediction = self._fix_label_prediction_representation()

        distinct_values = {*truth.reshape((-1, ))}
        cm = confusion_matrix(truth,
                              prediction,
                              binary=len(distinct_values) <= 2)
        return plot_confusion_matrix(cm, figsize=figsize)
Beispiel #14
0
def plot_one_hot_encoded_confusion_matrix(
        df: pd.DataFrame, true_columns,
        prediction_columns) -> Tuple[Figure, Axis]:
    y_hat = df[prediction_columns].apply(lambda row: np.argmax(row),
                                         raw=True,
                                         axis=1)
    y = df[true_columns].apply(lambda row: np.argmax(row), raw=True, axis=1)

    cm = confusion_matrix(y.values, y_hat.values)
    return plot_confusion_matrix(cm, figsize=(12, 12))
Beispiel #15
0
 def plot_confusion_matrix(self, x_test, y_test, logger, *argv):
     try:
         estimator = self.estimator.best_estimator_
         cm = confusion_matrix(y_target=y_test,
                               y_predicted=estimator.predict(x_test),
                               binary=False)
         fig, ax = plot_confusion_matrix(conf_mat=cm, figsize=(15, 15))
         plt.savefig('../../plots/cm_' + str(argv[0]) + "_" + str(argv[1]) +
                     '.png')
         logger.info('Plotting confusion matrix completed')
     except Exception as e:
         logger.error('Failed in plot_confusion_matrix:' + str(e))
Beispiel #16
0
def save_confusion_matrix(file_path, y_target, y_predicted, target_names=None, binary=False):
    
    cm = confusion_matrix(y_target, y_predicted, binary)

    fig, ax = plot_confusion_matrix(conf_mat=cm, colorbar=True, show_absolute=False, show_normed=True)

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names)
        plt.yticks(tick_marks, target_names)

    plt.savefig(file_path)
def plot(actual: List[Any], predicted: List[Any], type: str, threshold: float):
    multi_class_cm = confusion_matrix(y_target=actual,
                                      y_predicted=predicted,
                                      binary=False)
    multi_class_plot, ax = plot_confusion_matrix(conf_mat=multi_class_cm,
                                                 class_names=GRADE_HIERARCHY,
                                                 colorbar=True,
                                                 show_absolute=True,
                                                 show_normed=True)
    multi_class_plot.suptitle(
        f"Multi Class Confusion Matrix for {type} Majority Sorting (λ = {threshold})",
        fontsize=10)
    multi_class_plot.savefig(f"confusion_matrix_{type}_{threshold}.png")
Beispiel #18
0
def calculate_performance(observations: pd.DataFrame,
                          predictions: Union[Network, NetworkGroup],
                          sign: bool) -> Tuple[dict, float, float]:
    prediction_df = pd.DataFrame(
        np.zeros_like(observations.values),
        index=observations.index,
        columns=observations.columns,
    )
    if len(predictions.links):
        if isinstance(predictions, Network):
            table = predictions.get_adjacency_table("weight")
            for row in table.index:
                for col in table.columns:
                    prediction_df.loc[row, col] = table.loc[row, col]
        elif isinstance(predictions, NetworkGroup):
            vector_table = predictions.get_adjacency_vectors("weight")
            for row in vector_table.index:
                if row not in predictions.linkid_revmap:
                    continue
                else:
                    source, target = predictions.linkid_revmap[row][0][
                        -1].split("-")
                    # FIXME: Is this the right thing to do?
                    val = np.mean(vector_table.loc[row, :])
                    prediction_df.loc[source, target] = val
                    prediction_df.loc[target, source] = val
        else:
            raise ValueError("Unsupported predictions object")
        np.fill_diagonal(prediction_df.values, 0.0)
        prediction_df.fillna(0.0, inplace=True)
        if sign:
            prediction_df[prediction_df > 0] = 1
            prediction_df[prediction_df < 0] = -1
            prediction_df = prediction_df.astype(int)
        t_vec = observations.values.reshape(-1)
        p_vec = prediction_df.values.reshape(-1)
        cm = confusion_matrix(t_vec, p_vec, binary=True, positive_label=0)
        cm_fixed = [[cm[1, 1], cm[1, 0]], [cm[0, 1], cm[0, 0]]]
        cm_dict = {
            "tn": cm_fixed[0][0],
            "fp": cm_fixed[0][1],
            "fn": cm_fixed[1][0],
            "tp": cm_fixed[1][1],
        }
        precision = calculate_precision(cm_dict)
        sensitivity = calculate_sensitivity(cm_dict)
    else:
        cm_dict = {"tn": np.nan, "fp": np.nan, "fn": np.nan, "tp": np.nan}
        precision = np.nan
        sensitivity = np.nan
    return cm_dict, precision, sensitivity
Beispiel #19
0
    def __str__(self):
        from mlxtend.evaluate import confusion_matrix

        # get true and prediction data. It needs to be a one hot encoded 2D array [samples, class] where nr_classes >= 2
        tv, pv = clean_one_hot_classification(
            self.df[LABEL_COLUMN_NAME]._.values,
            self.df[PREDICTION_COLUMN_NAME]._.values)

        # confusion matrix needs integer encoding
        tv = np.apply_along_axis(np.argmax, 1, tv)
        pv = np.apply_along_axis(np.argmax, 1, pv)
        cm = confusion_matrix(tv, pv, binary=tv.max() < 2)

        return f"{cm}"
def testConfusion(clf, X, y):
    y_pred = clf.predict(X)
    score = clf.score(X, y)
    cm = confusion_matrix(y, y_pred)

    # Plot it
    classes = np.append(
        "", max(np.unique(y), np.unique(y_pred), key=lambda x: len(x)))
    fig, ax = plot_confusion_matrix(conf_mat=cm)
    ax.set_xticklabels(classes, rotation=90)
    ax.set_yticklabels(classes)
    ax.set_title("Binary testing error {:.2f}".format(score))
    plt.show()

    return y_pred, cm
def show_cm(targets, predictions):
	'''
	Shows a confusion matrix for model testing.

	:param targets: Numpy array containing targets
	:param predictions: Numpy array containing corresponding predictions
	:return: figure object containing confusion matrix
	'''
	cm = confusion_matrix(y_target=targets, 
						y_predicted=predictions, 
						binary=False)

	fig, ax = plot_confusion_matrix(conf_mat=cm)
	plt.show(block=True)

	return fig
Beispiel #22
0
def init(X_train, y_train, X_test, y_test, index=0):

    pca = PCA(n_components=2, whiten=True)
    pca = pca.fit(X_train)
    print("Treinando PCA...")

    print('Explained variance percentage = %0.2f' %
          sum(pca.explained_variance_ratio_))
    X_train = pca.transform(X_train)
    X_test = pca.transform(X_test)
    print("Transformando PCA...")

    classifier = KNeighborsClassifier(n_neighbors=19,
                                      weights="uniform",
                                      metric="euclidean",
                                      n_jobs=-1)

    print("Treinando classificador...")
    classifier.fit(X_train, y_train)

    print("Classificando...")
    y_predicted = classifier.predict(X_test)

    from mlxtend.evaluate import confusion_matrix
    from mlxtend.plotting import plot_confusion_matrix

    cm = confusion_matrix(y_target=y_test,
                          y_predicted=y_predicted,
                          binary=True)
    fig, ax = plot_confusion_matrix(conf_mat=cm)

    print(cm)

    # plt.savefig("confusion_matrix.pdf", format='pdf')
    plt.savefig("confusion_matrix" + str(index) + ".png", format='png')

    ###############################################
    ## Classification Report
    ###############################################

    from sklearn.metrics import classification_report

    c_report = classification_report(y_test, y_predicted)

    ### print values
    print("classification_report")
    print(c_report)
Beispiel #23
0
def plot_confusion_matrix(df, figsize=(6, 6), **kwargs):
    from mlxtend.plotting import plot_confusion_matrix
    from mlxtend.evaluate import confusion_matrix

    # get true and prediction data. It needs to be a one hot encoded 2D array [samples, class] where nr_classes >= 2
    tv, pv = clean_one_hot_classification(df[LABEL_COLUMN_NAME]._.values,
                                          df[PREDICTION_COLUMN_NAME]._.values)

    # confusion matrix needs integer encoding
    tv = np.apply_along_axis(np.argmax, 1, tv)
    pv = np.apply_along_axis(np.argmax, 1, pv)

    # plot the confusion matrix
    cm = confusion_matrix(tv, pv, binary=tv.max() < 2)
    fig, ax = plot_confusion_matrix(cm, figsize=figsize)

    return fig
Beispiel #24
0
count_vectorizer = CountVectorizer(stop_words='english')
count_train = count_vectorizer.fit_transform(x_train.values)
count_test = count_vectorizer.transform(x_test.values)
pred_test = OneVsRestClassifier(LinearSVC(random_state=0)).fit(count_train, y_train).predict(count_test)

#comprobamos la efectividad del modelo
pred_testd = pd.DataFrame(pred_test, columns=list(data.columns.values)[2:len(data.columns.values)])
cols = list(data.columns.values)[2:len(data.columns.values)]
from mlxtend.evaluate import confusion_matrix
import matplotlib.pyplot as plt
from mlxtend.plotting import plot_confusion_matrix

plt.subplot(3,3,3) 
for i in range(len(cols)):
    cm = confusion_matrix(y_target=y_test[cols[i]], 
                      y_predicted=pred_testd[cols[i]])
    
    fig, ax = plot_confusion_matrix(conf_mat=cm)
    plt.title(cols[i])
    plt.show()


for i in range(len(cols)):
   print( accuracy_score(y_test[cols[i]],pred_testd[cols[i]]))


for j in range(len(cols)):  
    uns = [i for i, v in enumerate(y_test[cols[j]]) if v == 1]
    ok= sum(pred_testd[cols[j]][uns]==1)/sum(y_test[cols[j]]==1)
    print(cols[j])
    print(ok)
    y_pred = []
    y_actual = []
    path = TEST

    for i in os.listdir(path):
        print(i)
        for f in os.listdir(os.path.join(path, i)):
            ext = os.path.splitext(f)[1]
            if ext == '.jpg' or ext == '.jpeg':
                y_pred.append(
                    recognise(str(IdentityMetadata(path, i, f)), database,
                              FRmodel))
                y_actual.append(i)
    print(y_pred)
    print(y_actual)
    cm = confusion_matrix(y_target=y_actual, y_predicted=y_pred, binary=False)
    fig, ax = plot_confusion_matrix(conf_mat=cm)
    plt.show()

else:
    FRmodel = faceRecoModel(input_shape=(3, 96, 96))
    load_weights_from_FaceNet(FRmodel)
    #FRmodel.load_weights("mytraining.h5")
    FRmodel.summary()
    fix(FRmodel)
    FRmodel.summary()

    in_a = Input(shape=(3, 96, 96))
    in_p = Input(shape=(3, 96, 96))
    in_n = Input(shape=(3, 96, 96))
    emb_a = FRmodel(in_a)
Beispiel #26
0
model.add(Dense(5, input_dim=17, activation='relu'))
model.add(Dense(5, activation='sigmoid'))
model.add(Dense(1, activation='softmax'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train, y_train, epochs=10)
scores = model.evaluate(x_train, y_train)
print(model.metrics_names[1], scores[1] * 100)
score = model.predict(x_test)
print(score)
score = score.round()
print(scores)
y_target = list(y_test)
from mlxtend.evaluate import confusion_matrix

cm = confusion_matrix(
    y_target=[7, 4, 2, 1, 7, 4, 2, 6, 5, 3, 3, 4, 1, 1, 2, 1, 6, 1, 7, 2],
    y_predicted=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
    binary=False)
'''y_actu = pd.Series([7, 4, 2, 1, 7, 4, 2, 6, 5, 3, 3, 4, 1, 1, 2, 1, 6, 1, 7, 2],name='Actual')
y_pred = pd.Series([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],name='Predicted')
df_confusion = pd.crosstab(y_actu, y_pred)
'''
print(cm)
import matplotlib.pyplot as plt
from mlxtend.evaluate import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

fig, ax = plot_confusion_matrix(conf_mat=cm)
plt.show()
Beispiel #27
0
    def classification(self, x, y):
        """Sampling"""
        sss = StratifiedShuffleSplit(n_splits=3, test_size=0.3, random_state=0)
        x_train = []
        x_test = []
        y_train = []
        y_test = []
        for train_index, test_index in sss.split(x, y):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]

        "----------------------------------LOGISTIC REGRESSION ----------------------------------"
        classifier_lr = LogisticRegression()
        classifier_lr.fit(x_train, y_train)
        y_predict_logistic = classifier_lr.predict(x_test)

        cm_logistic = metrics.confusion_matrix(y_test, y_predict_logistic)
        print("CONFUSION MATRIX TEST AND PREDICT")
        print(cm_logistic)
        sns.heatmap(cm_logistic, square=True)
        plt.show()
        print("Coefficient of determination on training set:",
              classifier_lr.score(x_train, y_train))

        acc_logistic = accuracy_score(y_test, y_predict_logistic)
        print("Accuracy Logistic Regression" + str(acc_logistic))
        f1_score_logistic = skl.metrics.f1_score(y_test,
                                                 y_predict_logistic,
                                                 average='macro')
        print("F1-score Logistic Regression: %f" % f1_score_logistic)
        precision_lr = precision_score(y_test,
                                       y_predict_logistic,
                                       pos_label=3,
                                       average='macro')

        print("Precision Logistic Regression " + str(precision_lr))
        recall_lr = recall_score(y_test,
                                 y_predict_logistic,
                                 pos_label=3,
                                 average='macro')

        print("Recall Logistic Regression " + str(recall_lr))

        "----------------------------------LINEAR SVN ----------------------------------"
        classifier_svc = svm.LinearSVC()
        classifier_svc.fit(x_train, y_train)
        y_pred_svc_linear = classifier_svc.predict(x_test)
        cm_svc_linear = confusion_matrix(y_test, y_pred_svc_linear)
        print(cm_svc_linear)

        acc_svc_linear = accuracy_score(y_test, y_pred_svc_linear)
        print("Accuracy SVN" + str(acc_svc_linear))
        f1_score_svn = skl.metrics.f1_score(y_test,
                                            y_pred_svc_linear,
                                            average='macro')
        print("F1-score Linear SVN: %f" % f1_score_svn)
        precision_svn = precision_score(y_test,
                                        y_pred_svc_linear,
                                        pos_label=3,
                                        average='macro')

        print("Precision Linear SVN " + str(precision_svn))
        recall_svn = recall_score(y_test,
                                  y_pred_svc_linear,
                                  pos_label=3,
                                  average='macro')

        print("Recall Linear SVN " + str(recall_svn))
        print(classifier_svc.coef_)
        print("classifier_svc.coef_")
        "----------------------------------PERCEPTRON ----------------------------------"
        clf_perceptron = Perceptron(n_iter=2, shuffle=False)
        clf_perceptron.fit(x_train, y_train)
        y_pred_perceptron = clf_perceptron.predict(x_test)
        cm_perceptron = confusion_matrix(y_test, y_pred_perceptron)
        print(cm_perceptron)
        acc_perceptron = accuracy_score(y_test, y_pred_perceptron)
        print("Accuracy Perceptron" + str(acc_perceptron))
        f1_score_perceptron = skl.metrics.f1_score(y_test,
                                                   y_pred_perceptron,
                                                   average='macro')
        print("F1-score Perceptron: %f" % f1_score_perceptron)
        precision_perceptron = precision_score(y_test,
                                               y_pred_perceptron,
                                               pos_label=3,
                                               average='macro')

        print("Precision Perceptron " + str(precision_perceptron))

        recall_perceptron = recall_score(y_test,
                                         y_pred_perceptron,
                                         pos_label=3,
                                         average='macro')

        print("Recall Perceptron " + str(recall_perceptron))
        """----------------------------------LINEAR REGRESSION ----------------------------------"""
        regression = linear_model.LinearRegression()
        regression.fit(x_train, y_train)
        y_pred_regression = regression.predict(x_test)
        cm_regression = confusion_matrix(y_test, y_pred_regression)
        print(cm_regression)
        #score = regression.score(x_test, y_test)
        print('Coefficients for Linear Regression: \n', regression.coef_)

        plt.figure()
        plt.plot(regression.coef_, color='navy', linestyle='--')
        plt.title('Coefficients for Linear Regression')
        plt.show()
Beispiel #28
0
def cnn_recognition():

    # Load the data from chords.csv
    df = pd.read_csv('chords.csv')
    data = []
    for i in df.itertuples():
        # print(i[1])
        y, sr = librosa.core.load(i[1], duration=1.5)
        mfcc = librosa.feature.melspectrogram(y=y, sr=sr)
        # print(mfcc.shape)
        if mfcc.shape == (128, 65):
            data.append((mfcc, i[3]))
    print("number of audio samples : " + str(len(data)))

    # Shuffle the data randomly and load to training and testing sets
    random.shuffle(data)
    train = data[:1405]
    test = data[1405:]

    # Zip takes iterables and returns tuples
    X_train, y_train = zip(*train)
    X_test, y_test = zip(*test)

    # Reshape the spectogram to (128,65)
    X_train = np.array([x.reshape((128, 65, 1)) for x in X_train])
    X_test = np.array([x.reshape((128, 65, 1)) for x in X_test])

    # One hot encoding to model class_id
    y_train = np.array(to_categorical(y_train, 10))
    y_test = np.array(to_categorical(y_test, 10))

    # Building Sequential Model
    model = Sequential()
    input_shape = (128, 65, 1)
    model.add(Conv2D(24, (5, 5), strides=(1, 1), input_shape=input_shape))
    model.add(MaxPooling2D((4, 2), strides=(4, 2)))
    model.add(Activation('relu'))
    model.add(Conv2D(48, (5, 5), padding="valid"))
    model.add(MaxPooling2D((4, 2), strides=(4, 2)))
    model.add(Activation('relu'))
    model.add(Conv2D(48, (5, 5), padding="valid"))
    model.add(Activation('relu'))
    model.add(Flatten())
    model.add(Dropout(rate=0.5))
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(rate=0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.summary()

    model.compile(optimizer="Adam",
                  loss="categorical_crossentropy",
                  metrics=['accuracy'])

    # Train the Model
    hist = model.fit(x=X_train,
                     y=y_train,
                     epochs=40,
                     batch_size=30,
                     validation_data=(X_test, y_test))

    # Evaluation of the Model
    score = model.evaluate(x=X_test, y=y_test)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    img = 'Test_Loss_and_Test_Accuracy'
    y_pos = np.arange(2)
    column = ['Test Loss', 'Test Accuracy']
    plt.bar(y_pos, score, label='Loss and Accuracy')
    plt.xticks(y_pos, column)
    plt.ylabel('Percentage')
    plt.legend()
    plt.savefig('images/{}'.format(img))
    plt.show()

    # Saving the accuracy and loss of the model in txt file
    with open('model_accuracy_and_loss.txt', 'w') as f:
        f.write('Test Loss : ' + str(score[0]) + '\n')
        f.write('Test Accuracy : ' + str(score[1]))

    train_loss = hist.history['loss']
    validation_loss = hist.history['val_loss']
    train_acc = hist.history['accuracy']
    validation_acc = hist.history['val_accuracy']
    num_epochs = range(1, 41)

    #Save the model loss to result_images
    name1 = 'model_loss'
    # Plotting Model Loss
    plt.figure(1, figsize=(8, 6))
    plt.plot(num_epochs, train_loss)
    plt.plot(num_epochs, validation_loss)
    plt.xlabel('Number of Epochs')
    plt.ylabel('Loss')
    plt.title('Training Loss vs Validation Loss')
    plt.grid(True)
    plt.legend(['Training Loss', 'Validation Loss'])
    plt.savefig('images/{}'.format(name1))
    plt.show()

    # Saving the model accuracy to result_images
    name2 = 'model_accuracy'
    # Plotting Model Accuracy
    plt.figure(2, figsize=(8, 6))
    plt.plot(num_epochs, train_acc)
    plt.plot(num_epochs, validation_acc)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training Accuracy vs Validation Accuracy')
    plt.grid(True)
    plt.legend(['Training Accuracy', 'Validation Accuracy'])
    plt.savefig('images/{}'.format(name2))
    plt.show()

    # Predicting the Model
    y_pred = model.predict_classes(X_test)
    label_id = np.argmax(y_test, axis=1)
    conf_matrix = confusion_matrix(label_id, y_pred, binary=False)
    print(conf_matrix)

    # Visualizing the performance of the Model
    name3 = 'confusion_matrix'
    plot_confusion_matrix(conf_mat=conf_matrix, class_names=chord_label)
    plt.title('Confusion Matrix')
    plt.savefig('images/{}'.format(name3))
    plt.show()

    # SAVE THE MODEL
    model.save('model.h5')
Beispiel #29
0
    def plot_confusion_matrix(self, figsize=(12, 12)) -> plt.Figure:
        y = self.df[LABEL_COLUMN_NAME].apply(lambda row: np.argmax(row), raw=True, axis=1)
        y_hat = self.df[PREDICTION_COLUMN_NAME].apply(lambda row: np.argmax(row), raw=True, axis=1)

        cm = confusion_matrix(y.values, y_hat.values)
        return plot_confusion_matrix(cm, figsize=figsize)[0]
    X_test = test_data[:, 0:size_new - 2]
    y_test = test_data[:, size_new - 1]
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)
    model_rfc = RandomForestClassifier(n_estimators=40)
    model_rfc.fit(X_train, y_train)
    pred_rfc = model_rfc.predict(X_test)
    acc_rfc = 0
    for i in range(0, len(pred_rfc)):
        if pred_rfc[i] == y_test[i]:
            acc_rfc = acc_rfc + 1
    acc_rfc = acc_rfc / len(pred_rfc)
    print("rfc " + str(acc_rfc))

    cm = confusion_matrix(y_target=y_test, y_predicted=pred_rfc, binary=False)
    fig, ax = plot_confusion_matrix(conf_mat=cm)
    plt.show()

    model_knn = KNeighborsClassifier(n_neighbors=15)
    model_knn.fit(X_train, y_train)
    pred_knn = model_knn.predict(X_test)
    acc_knn = 0
    for i in range(0, len(pred_knn)):
        if pred_knn[i] == y_test[i]:
            acc_knn = acc_knn + 1
    acc_knn = acc_knn / len(pred_knn)
    print("knn " + str(acc_knn) + "\n")
    #print(X_test)

datafile.close()
def get_confusion_matrix_one_hot(runname, model_results, truth):
    '''model_results and truth should be for one-hot format, i.e, have >= 2 columns,
    where truth is 0/1, and max along each row of model_results is model result
    '''
    mr = []
    mr2 = []
    mr3 = []
    print(model_results, truth)
    for x in model_results:
        mr.append(np.argmax(x))
        mr2.append(x)
    mr3 = label_binarize(mr, classes=[0, 1, 2])
    no_ev = min(len(mr), len(truth))
    print(no_ev)
    model_results = np.asarray(mr)[:no_ev]
    truth = np.asarray(truth)[:no_ev]
    print(np.shape(model_results), np.shape(truth))
    mr2 = mr2[:no_ev]
    mr3 = mr3[:no_ev]
    cm = confusion_matrix(y_target=truth,
                          y_predicted=np.rint(np.squeeze(model_results)),
                          binary=False)
    fig, ax = plot_confusion_matrix(conf_mat=cm, figsize=(5, 5))
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.savefig('/home/clarkr/Figures/' + runname + 'confmat.png')
    lw = 2
    n_classes = 3
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    t2 = label_binarize(truth, classes=[0, 1, 2])
    print(mr2[:100])
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(t2[:, i], np.asarray(mr2)[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)]))

    # Then interpolate all ROC curves at this points
    mean_tpr = np.zeros_like(all_fpr)
    for i in range(n_classes):
        mean_tpr += interp(all_fpr, fpr[i], tpr[i])

    # Finally average it and compute AUC
    mean_tpr /= n_classes

    fpr["macro"] = all_fpr
    tpr["macro"] = mean_tpr
    roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
    fpr["micro"], tpr["micro"], _ = roc_curve(t2.ravel(), mr3.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    # Plot all ROC curves
    plt.figure()
    plt.plot(fpr["micro"],
             tpr["micro"],
             label='Micro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["micro"]),
             color='deeppink',
             linestyle=':',
             linewidth=4)

    plt.plot(fpr["macro"],
             tpr["macro"],
             label='Macro-average ROC curve (area = {0:0.2f})'
             ''.format(roc_auc["macro"]),
             color='navy',
             linestyle=':',
             linewidth=4)

    colors = cycle(['aqua', 'darkorange', 'cornflowerblue'])
    for i, color in zip(range(n_classes), colors):
        plt.plot(fpr[i],
                 tpr[i],
                 color=color,
                 lw=lw,
                 label='ROC curve of class {0} (area = {1:0.2f})'
                 ''.format(i, roc_auc[i]))
    plt.legend(loc="lower right")
    plt.savefig('/home/clarkr/Figures/' + runname + '_roc.png')
    np.save('/home/clarkr/confmatdata/' + runname + '_fp.npy', fpr)
    np.save('/home/clarkr/confmatdata/' + runname + '_tp.npy', tpr)
    return cm
def CNN_function():
    x_train_label = []
    x_test_label = []
    x_predict_label = []

    x_train_dataset = []
    x_test_dataset = []
    x_predict_dataset = []

    X_label = pd.Categorical(X)
    categories = X_label.categories
    X_label = X_label.codes

    # Split the data into train and test dataset
    x_train_label, x_test_label, x_train_dataset, x_test_dataset = train_test_split(
        X_label, Y, test_size=0.19, random_state=3)

    # Split the data train data into further training labels and prediction dataset
    x_train_label, x_predict_label, x_train_dataset, x_predict_dataset = train_test_split(
        x_train_label, x_train_dataset, test_size=0.23, random_state=3)

    x_train_nmpy = np.array(x_train_label)
    x_test_nmpy = np.array(x_test_label)
    x_predict_nmpy = np.array(x_predict_label)

    # Normalize the pixel values of the train data and test data
    x_train_dataset = tf.keras.utils.normalize(x_train_dataset, axis=1)
    x_test_dataset = tf.keras.utils.normalize(x_test_dataset, axis=1)
    x_predict_dataset = tf.keras.utils.normalize(x_predict_dataset, axis=1)

    # Reshaping the dataset for input to neural network
    x_train_dataset = x_train_dataset.reshape(
        (x_train_dataset.shape[0], 28, 28, 1)).astype('float32')
    x_test_dataset = x_test_dataset.reshape(
        (x_test_dataset.shape[0], 28, 28, 1)).astype('float32')
    x_predict_dataset = x_predict_dataset.reshape(
        (x_predict_dataset.shape[0], 28, 28, 1)).astype('float32')

    # Creating a Sequential Model for Neural Network
    # 2 Convolution Layers with 30,15 filter and Kernel of 6x6 and 3x3
    # Pooling Layer 2 with (2x2)
    # 2 Dense layer
    # Flatten Layer

    model = Sequential()
    model.add(Conv2D(30, (6, 6), input_shape=(28, 28, 1), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(15, (3, 3), activation='relu'))
    model.add(MaxPooling2D(2, 2))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(10, activation='softmax'))

    # Compilation of given model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    num_epochs = 30

    # Training our model
    Model1 = model.fit(x_train_dataset,
                       x_train_nmpy,
                       epochs=num_epochs,
                       validation_data=(x_test_dataset, x_test_nmpy))

    plt.plot(Model1.history['accuracy'])
    plt.plot(Model1.history['val_accuracy'])

    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

    # Predictions and Testing Part

    predictions = model.predict(x_predict_dataset)

    pred = []

    for i in range(0, len(x_predict_label)):
        pred.append(np.argmax(predictions[i]))

    total = len(pred)
    n_p = 0
    for x in range(0, len(pred)):
        if (int(pred[x]) == int(x_predict_label[x])):
            n_p = n_p + 1

    print("The accuracy of test set: " + str((n_p / total) * 100))

    a = x_predict_label.tolist()

    cm = confusion_matrix(a, pred)
    print(cm)

    # Plotting the confusion matrix
    df_cm = pd.DataFrame(cm, range(10), range(10))
    sn.set(font_scale=1.2)  #for label size
    sn.heatmap(df_cm, annot=True, annot_kws={"size": 16})  # font size
    plt.show()