Ejemplo n.º 1
0
def plot_save(dist, label, model_name):
    plt.clf()
    sns.distplot(dist, fit=norm, kde=False, bins=8)
    plt.xlabel(label)
    plt.ylabel('Frequency')
    plt.title('Classifier Accuracy:' + model_name.replace('_', ' ').upper() )
    plt.savefig(PLOT_FOLDER + "pred_%s_%s.png" % (label, model_name))
    plt.clf()
    save_report_to_csv (REPORT_FOLDER + 'acc_validation_report.csv', [ 
        model_name,
        label,
        np.mean(dist),
        np.std(dist), 
    ])    
Ejemplo n.º 2
0
def plot_confusion_matrix (confusion_matrix_array):

    print ('###### Start Confusion Matrix ####')

    print (confusion_matrix_array)

    save_report_to_csv (REPORT_FOLDER + get_model_name_by_file(VALIDATION_FILE)+'_confusion_report.csv', [
        'MultinomialNB', 
        get_model_name_by_file(MODEL_FILE),
        confusion_matrix_array[0][0],
        confusion_matrix_array[0][1],
        confusion_matrix_array[1][0],
        confusion_matrix_array[1][1]
    ])


    print ('###### End Confusion Matrix ####')


    df_cm = pd.DataFrame(confusion_matrix_array, range(2), range(2))

    #plt.figure(figsize = (10,7))

    plot = df_cm.plot()
    fig = plot.get_figure()
    

    ax = plt.subplot()
    
    sn.heatmap(df_cm, annot=True, fmt='g', ax = ax, annot_kws={"size": 16})# font size
    
    # labels, title and ticks
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Real')
    
    ax.yaxis.set_ticklabels(['Non Political', 'Political']) 
    ax.xaxis.set_ticklabels(['Non Political', 'Political'])

    model_name = MODEL_FILE
    
    model_name = model_name.replace ('.politics_ben.skl', '')
    model_name = model_name.replace (SKL_FOLDER, '')
    
    ax.set_title(model_name.replace ('_', ' ').upper())

    fig.add_subplot(ax)

    fig.savefig(PLOT_FOLDER + 'confusion_matrix_publica_'+ model_name + '.png', dpi=400)
Ejemplo n.º 3
0
def plot_save(dist, label):
    sns.distplot(dist, fit=norm, kde=False, bins=8)
    plt.xlabel(label)
    plt.ylabel('Frequency')

    cnn_normal_plot = H5_FILE.replace('.h5', '')
    cnn_normal_plot = cnn_normal_plot.replace(H5_FOLDER, '')

    plt.title('Accuracy of CNN classifier: (%s)' % cnn_normal_plot.upper())
    plt.savefig(PLOT_FOLDER + "pred_%s_%s_CNN.png" %
                (label, cnn_normal_plot.upper()))
    plt.clf()
    save_report_to_csv(REPORT_FOLDER + 'acc_validation_report.csv', [
        'CNN',
        label + ' ' + cnn_normal_plot,
        np.mean(dist),
        np.std(dist),
    ])
Ejemplo n.º 4
0
def train_CNN(X,
              y,
              inp_dim,
              model,
              weights,
              epochs=EPOCHS,
              batch_size=BATCH_SIZE):
    cv_object = KFold(n_splits=NO_OF_FOLDS, shuffle=True, random_state=42)
    print(cv_object)
    p, r, f1 = [], [], []
    p1, r1, f11 = 0., 0., 0.
    p_class, r_class, f1_class = [], [], []
    sentence_len = X.shape[1]

    marcro_f1, macro_r, macro_p = [], [], []

    precision_scores = []
    recall_scores = []
    f1_scores = []

    for train_index, test_index in cv_object.split(X):
        if INITIALIZE_WEIGHTS_WITH == "word2vec":
            model.layers[0].set_weights([weights])
        elif INITIALIZE_WEIGHTS_WITH == "random":
            shuffle_weights(model)
        else:
            print("ERROR!")
            return
        X_train, y_train = X[train_index], y[train_index]
        X_test, y_test = X[test_index], y[test_index]
        y_train = y_train.reshape((len(y_train), 1))
        X_temp = np.hstack((X_train, y_train))
        for epoch in range(epochs):
            for X_batch in batch_gen(X_temp, batch_size):
                x = X_batch[:, :sentence_len]
                y_temp = X_batch[:, sentence_len]

                class_weights = None
                if SCALE_LOSS_FUN:
                    class_weights = {}
                    for cw in range(len(set(tx_class))):
                        class_weights[cw] = np.where(
                            y_temp == cw)[0].shape[0] / float(len(y_temp))
                try:
                    y_temp = np_utils.to_categorical(y_temp,
                                                     num_classes=len(
                                                         set(tx_class)))
                except Exception as e:
                    print(e)
                    print(y_temp)
                #print(x.shape, y.shape)
                loss, acc = model.train_on_batch(x,
                                                 y_temp,
                                                 class_weight=class_weights)

        y_pred = model.predict_on_batch(X_test)
        y_pred = np.argmax(y_pred, axis=1)
        #print(classification_report(y_test, y_pred))
        #print(precision_recall_fscore_support(y_test, y_pred))
        #print(y_pred)
        p.append(precision_score(y_test, y_pred, average='weighted'))
        p1 += precision_score(y_test, y_pred, average='micro')
        p_class.append(precision_score(y_test, y_pred, average=None))
        r.append(recall_score(y_test, y_pred, average='weighted'))
        r1 += recall_score(y_test, y_pred, average='micro')
        r_class.append(recall_score(y_test, y_pred, average=None))
        f1.append(f1_score(y_test, y_pred, average='weighted'))
        f11 += f1_score(y_test, y_pred, average='micro')
        f1_class.append(f1_score(y_test, y_pred, average=None))

        macro_p.append(precision_score(y_test, y_pred, average='macro'))
        macro_r.append(recall_score(y_test, y_pred, average='macro'))
        marcro_f1.append(f1_score(y_test, y_pred, average='macro'))

    print("macro results are")
    print("average precision is %f" % (np.array(p).mean()))
    print("average recall is %f" % (np.array(r).mean()))
    print("average f1 is %f" % (np.array(f1).mean()))

    save_report_to_csv(
        REPORT_FOLDER + 'CNN_training_report.csv',
        [
            'CNN',
            get_model_name_by_file(POLITICS_FILE),
            #weighted scores
            np.array(p).mean(),
            np.array(p).std() * 2,
            np.array(r).mean(),
            np.array(r).std() * 2,
            np.array(f1).mean(),
            np.array(f1).std() * 2,

            #macro scores
            np.array(macro_p).mean(),
            np.array(macro_p).std() * 2,
            np.array(macro_r).mean(),
            np.array(macro_r).std() * 2,
            np.array(marcro_f1).mean(),
            np.array(marcro_f1).std() * 2,

            #by class scores
            np.array(np.array(p_class)[:, 0]).mean(),
            np.array(np.array(p_class)[:, 1]).mean(),
            np.array(np.array(r_class)[:, 0]).mean(),
            np.array(np.array(r_class)[:, 1]).mean(),
            np.array(np.array(f1_class)[:, 0]).mean(),
            np.array(np.array(f1_class)[:, 1]).mean()
        ])

    print("micro results are")
    print("average precision is %f" % (p1 / NO_OF_FOLDS))
    print("average recall is %f" % (r1 / NO_OF_FOLDS))
    print("average f1 is %f" % (f11 / NO_OF_FOLDS))
Ejemplo n.º 5
0
    accuracy = accuracy_score (y_true, y_pred)

    save_report_to_csv (REPORT_FOLDER + 'validation_report.csv', [
        'MultinomialNB', 
        get_model_name_by_file(MODEL_FILE),
        get_model_name_by_file(VALIDATION_FILE),

        accuracy,

        p[0],
        p[1],
        r[0],
        r[1], 
        f1[0],
        f1[1],
        s[0],
        s[1],
        
        f1_macro,
        recall_macro,
        precision_macro,

        mean_auc, 
        std_auc,

        ff1,
        recall,
        precision
    ])

    print ('Confusion Matrix')
Ejemplo n.º 6
0
def classification_model(X, Y, model_type=None):
    X, Y = shuffle(X, Y, random_state=SEED)
    print("Model Type:", model_type)
    
    params = load_hiperparameters (POLITICS_FILE)

    if not params:
        model = GridSearchCV(estimator=get_model(model_type), param_grid=param_grid[model_type], n_jobs=-1, verbose=3)
    else:
        model = get_model(model_type)
        model.set_params (**params)

    model.fit(X, Y)

    predictions = cross_val_predict(model, X, Y, cv=NO_OF_FOLDS)

    if params is None:
        try:
            print('\n Best estimator:')
            print(model.best_estimator_)
            save_hiperparameters (POLITICS_FILE, model.best_estimator_)

            print('\n Best hyperparameters:')
            print(model.best_params_)
        except Exception as error:
            print (error)
            print ('Nothind to do!')
            pass

    scores1 = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='precision_weighted')
    
    print("Precision(avg): %0.3f (+/- %0.3f)" %(scores1.mean(), scores1.std() * 2))

    precision_score_mean = scores1.mean()
    precision_score_std = scores1.std() * 2

    scores2 = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='recall_weighted')
    print("Recall(avg): %0.3f (+/- %0.3f)" % (scores2.mean(), scores2.std() * 2))

    recall_score_mean = scores2.mean()
    recall_score_std = scores2.std() * 2

    scores3 = cross_val_score(
        model, X, Y, cv=NO_OF_FOLDS, scoring='f1_weighted')
    print("F1-score(avg): %0.3f (+/- %0.3f)" %
          (scores3.mean(), scores3.std() * 2))

    f1_score_mean = scores3.mean()
    f1_score_std = scores3.std() * 2

    # getting metrics by class
    f1_class = f1_score(Y, predictions, average=None)
    r_class = recall_score(Y, predictions, average=None)
    p_class = precision_score(Y, predictions, average=None)

    f1_macro = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='f1_macro')
    r_macro = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='recall_macro')
    p_macro = cross_val_score(model, X, Y, cv=NO_OF_FOLDS, scoring='precision_macro')

    print (f1_class, r_class, p_class)
    
    save_report_to_csv (REPORT_FOLDER  + model_type +'_training_report.csv', [
        model_type, 
        get_model_name_by_file(POLITICS_FILE),
        
        # weighted scores
        precision_score_mean,
        precision_score_std,
        recall_score_mean,
        recall_score_std,
        f1_score_mean,
        f1_score_std,

        #macro scores
        f1_macro.mean(),
        f1_macro.std() * 2,
        r_macro.mean(),
        r_macro.std() * 2,
        p_macro.mean(),
        p_macro.std() * 2,

        # by class
        f1_class[0],
        f1_class[1],
        r_class[0],
        r_class[1],
        p_class[0],
        p_class[1],
    ])

    return model