コード例 #1
0
def Training(X, y, classifier, RF = False):
    
    cv = KFold(n_splits = 10, shuffle = True,random_state = 12345)
    start_time = dt.datetime.now()
    print('Start learning at {}'.format(str(start_time)))
    i = 0
    
    df_f1_micro = []
    df_f1_macro = []
    df_accuracy = []
    df_MCC = []

    for train_index, test_index in cv.split(X):
        i += 1
        classifier.fit(X[train_index], y[train_index])
        ypred = classifier.predict(X[test_index])
        kappa_score = cohen_kappa_score(y[test_index], ypred)
        confmat = confusion_matrix(y[test_index], ypred)
        f1micro = f1_score(y[test_index], ypred,average="micro")
        f1macro = f1_score(y[test_index], ypred,average="macro")
        accuracy = accuracy_score(y[test_index], ypred)
        MCC = matthews_corrcoef(y[test_index], ypred)
        print("\nKappa score\n" ,kappa_score,"\n")
        print("\n confmat\n",confmat,"\n")
        print("\n f1 micro\n", f1micro,"\n")
        print("\n f1 macro\n", f1macro,"\n")
        print("\n accuracy\n", accuracy,"\n")
        
        if i == 1:
            
            df_confmat = pd.DataFrame(confmat)
            
            
            
        elif i > 1:
            
            temp = pd.DataFrame(confmat)
            df_confmat = df_confmat.append(temp)
        
        df_f1_micro.append(f1micro)
        df_f1_macro.append(f1macro)
        df_accuracy.append(accuracy)
        df_MCC.append(MCC)
    
    df_f1_micro = pd.DataFrame(df_f1_micro,columns=["f1micro"])
    df_f1_macro = pd.DataFrame(df_f1_macro,columns=["f1macro"])
    df_accuracy = pd.DataFrame(df_accuracy,columns=["accuracy"])
    df_MCC = pd.DataFrame(df_MCC,columns = ["MCC"] ) 
    end_time = dt.datetime.now() 
    
    print('Stop learning {}'.format(str(end_time)))
    elapsed_time= end_time - start_time
    print('Elapsed learning {}'.format(str(elapsed_time)))
    
    if RF == True:
        MakeTree(0,classifier,X)
    else:
        pass
    
    return df_confmat, df_f1_micro, df_f1_macro, df_accuracy, df_MCC
def compute_results_from_AMT(auto_list, set_separator_index, file_annotations, in_score_a_column_idx=5, in_score_b_column_idx=7, in_term_column_idx=1):
    auto_s = build_automated_sets(auto_list, set_separator_index)
    dictionary_a = get_term_value_dictionary_AMT(file_annotations, in_score_a_column_idx, in_term_column_idx)
    dictionary_b = get_term_value_dictionary_AMT(file_annotations, in_score_b_column_idx, in_term_column_idx)
    
    dictionary_merge = {}
    for k in dictionary_a.keys():
        dictionary_merge[k] = np.mean([dictionary_a[k],dictionary_b[k]]) 
    ground_truth_rank = sorted(dictionary_merge, key=dictionary_merge.get, reverse=True)
    
#     ground_sets = build_ground_truth_sets(dictionary_merge, 3)
#     [p, r] = evaluate_results_top_bottom_classification(ground_sets, auto_s)
    
    tau_result = evaluate_results_top_bottom_rank(ground_truth_ranked_list=ground_truth_rank, auto_sets=auto_s)
    tau_result_ties = evaluate_results_top_bottom_optimistic(dictionary_merge, auto_s)
    tau_result_skip_ties = evaluate_results_top_bottom_skip_ties(dictionary_merge, auto_s)
    
    scores_literal_a = get_term_value_list_AMT(file_annotations, score_column_idx = in_score_a_column_idx)
    scores_literal_b = get_term_value_list_AMT(file_annotations, score_column_idx = in_score_b_column_idx)
    
    scores_a = [convert_score(s) for s in scores_literal_a]
    scores_b = [convert_score(s) for s in scores_literal_b]
    k_result = cohen_kappa_score(scores_a, scores_b)
    
    print(evaluate_results_true_tau_b(dictionary_merge, auto_s))
    
    return [k_result, tau_result_ties, tau_result_skip_ties]
コード例 #3
0
def compute_agreement_AMT(file_annotations, in_score_a_column_idx=5, in_score_b_column_idx=7):
    scores_literal_a = get_term_value_list_AMT(file_annotations, score_column_idx = in_score_a_column_idx)
    scores_literal_b = get_term_value_list_AMT(file_annotations, score_column_idx = in_score_b_column_idx)
    
    scores_a = [convert_score(s) for s in scores_literal_a]
    scores_b = [convert_score(s) for s in scores_literal_b]
    
    
    k_result = cohen_kappa_score(scores_a, scores_b)
    k_percent = len([(x,y) for x, y in zip(scores_a, scores_b) if x == y]) / len(scores_a) 

    scores_a_rebased, scores_b_rebased = rebase_scores(scores_a, scores_b)
    
    k_permissive = cohen_kappa_score(scores_a_rebased, scores_b_rebased)
    k_permissive_percent = len([(x,y) for x, y in zip(scores_a_rebased, scores_b_rebased) if x == y]) / len(scores_a_rebased)
    
    return k_result, k_percent, k_permissive, k_permissive_percent
コード例 #4
0
    def training(self, Model, C=1, gamma=1):
        cv = KFold(n_splits=10, shuffle=True, random_state=12345)
        start_time = dt.datetime.now()
        print('Start learning at {}'.format(str(start_time)))
        i = 0

        df_f1_micro = []
        df_f1_macro = []
        df_accuracy = []
        df_MCC = []

        for train_index, test_index in cv.split(self.X):
            i += 1
            if Model == "SVMLinear":
                classifier = svm.LinearSVC(C=C)
                classifier.fit(self.X[train_index], self.y[train_index])
            else:
                classifier = svm.SVC(kernel='rbf', C=C, gamma=gamma)
                classifier.fit(self.X[train_index], self.y[train_index])
            ypred = classifier.predict(self.X[test_index])
            kappa_score = cohen_kappa_score(self.y[test_index], ypred)
            confmat = confusion_matrix(self.y[test_index], ypred)
            f1micro = f1_score(self.y[test_index], ypred, average="micro")
            f1macro = f1_score(self.y[test_index], ypred, average="macro")
            accuracy = accuracy_score(self.y[test_index], ypred)
            MCC = matthews_corrcoef(self.y[test_index], ypred)
            print("\nKappa score\n", kappa_score, "\n")
            print("\n confmat\n", confmat, "\n")
            print("\n f1 micro\n", f1micro, "\n")
            print("\n f1 macro\n", f1macro, "\n")
            print("\n accuracy\n", accuracy, "\n")

            if i == 1:

                df_confmat = pd.DataFrame(confmat)

            elif i > 1:

                temp = pd.DataFrame(confmat)
                df_confmat = df_confmat.append(temp)

            df_f1_micro.append(f1micro)
            df_f1_macro.append(f1macro)
            df_accuracy.append(accuracy)
            df_MCC.append(MCC)

        self.f1_micro = pd.DataFrame(df_f1_micro, columns=["f1micro"])
        self.f1_macro = pd.DataFrame(df_f1_macro, columns=["f1macro"])
        self.accuracy = pd.DataFrame(df_accuracy, columns=["accuracy"])
        self.MCC = pd.DataFrame(df_MCC, columns=["MCC"])
        self.confmat = df_confmat

        end_time = dt.datetime.now()
        print('Stop learning {}'.format(str(end_time)))
        elapsed_time = end_time - start_time
        print('Elapsed learning {}'.format(str(elapsed_time)))

        return self.confmat, self.f1_micro, self.f1_macro, self.accuracy, self.MCC
コード例 #5
0
def compute_agreement(file_annotation_a, file_annotation_b, in_score_column_idx):
    
    scores_a = get_term_value_list(file_annotation_a, score_column_idx = in_score_column_idx)
    scores_b = get_term_value_list(file_annotation_b, score_column_idx = in_score_column_idx)

    k_result = cohen_kappa_score(scores_a, scores_b)
    k_percent = len([(x,y) for x, y in zip(scores_a, scores_b) if x == y]) / len(scores_a)
    
    scores_a_rebased, scores_b_rebased = rebase_scores([int(a) for a in scores_a], [int(b) for b in scores_b])
    
    if scores_a_rebased != scores_b_rebased:
        k_permissive = cohen_kappa_score(scores_a_rebased, scores_b_rebased)
    else:
        k_permissive = 1.0
        
    k_permissive_percent = len([(x,y) for x, y in zip(scores_a_rebased, scores_b_rebased) if x == y]) / len(scores_a_rebased)    
    
    return k_result, k_percent, k_permissive, k_permissive_percent
コード例 #6
0
def classificationSummary(y_true, y_pred, class_names=None):
    """ Provide a comprehensive summary of classification performance similar to R's confusionMatrix """
    confMatrix = classification.confusion_matrix(y_true, y_pred)
    TP = confMatrix[0, 0]
    FP = confMatrix[1, 0]
    TN = confMatrix[1, 1]
    FN = confMatrix[0, 1]
    N = TN + TP + FN + FP
    sensitivity = TP / (TP + FN)
    specificity = TN / (TN + FP)
    prevalence = (TP + FN) / N 
    PPV = TP / (TP + FP)
    NPV = TN / (TN + FN)
    BAC = (sensitivity + specificity) / 2
    
    metrics = [
        ('Accuracy', classification.accuracy_score(y_true, y_pred)),
        ('95% CI', None),
        ('No Information Rate', None),
        ('P-Value [Acc > NIR]', None),
        (None, None),
        ('Kappa', classification.cohen_kappa_score(y_true, y_pred)),
        ("Mcnemar's Test P-Value", None),
        (None, None),
        ('Sensitivity', sensitivity),
        ('Specificity', specificity),
        ('Pos Pred Value', PPV),
        ('Neg Pred Value', NPV),
        ('Prevalence', prevalence),
        ('Detection Rate', None),
        ('Detection Prevalence', None),
        ('Balanced Accuracy', BAC),
        ]

    print('Confusion Matrix and Statistics\n')
    _printConfusionMatrix(confMatrix, class_names)
    if len(set(y_true)) < 5:
        print(classification_report(y_true, y_pred, digits=4))
    
    fmt1 = '{{:>{}}} : {{:.3f}}'.format(max(len(m[0]) for m in metrics if m[0] is not None))
    fmt2 = '{{:>{}}} : {{}}'.format(max(len(m[0]) for m in metrics if m[0] is not None))
    for metric, value in metrics:
        if metric is None:
            print()
        elif value is None:
            pass
            # print(fmt2.format(metric, 'missing'))
        else:
            print(fmt1.format(metric, value))
コード例 #7
0
def compute_results(auto_list, set_separator_index, file_annotation_a, file_annotation_b, in_score_column_idx, in_term_column_idx):
    auto_s = build_automated_sets(auto_list, set_separator_index)
    dictionary_a = get_term_value_dictionary(file_annotation_a, in_score_column_idx, in_term_column_idx)
    dictionary_b = get_term_value_dictionary(file_annotation_b, in_score_column_idx, in_term_column_idx)
      
    dictionary_merge = {}
    for k in dictionary_a.keys():
        dictionary_merge[k] = np.mean([dictionary_a[k],dictionary_b[k]]) 
    ground_truth_rank = sorted(dictionary_merge, key=dictionary_merge.get, reverse=True)
    
    tau_result = evaluate_results_top_bottom_rank(ground_truth_ranked_list=ground_truth_rank, auto_sets=auto_s)
    tau_result_ties = evaluate_results_top_bottom_optimistic(dictionary_merge, auto_s)
    tau_result_skip_ties = evaluate_results_top_bottom_skip_ties(dictionary_merge, auto_s)
    
    scores_a = get_term_value_list(file_annotation_a, score_column_idx = in_score_column_idx)
    scores_b = get_term_value_list(file_annotation_b, score_column_idx = in_score_column_idx)

    k_result = (cohen_kappa_score(scores_a, scores_b))
    
    return [k_result, tau_result_ties, tau_result_skip_ties] 
コード例 #8
0
def get_classification_metrics(ground_truth_labels, predicted_labels):
    classification_metric_dict = dict({})
    classification_metric_dict['accuracy'] = accuracy_score(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['precision'] = precision_score(
        ground_truth_labels, predicted_labels, average='weighted')
    classification_metric_dict['recall'] = recall_score(ground_truth_labels,
                                                        predicted_labels,
                                                        average='weighted')
    classification_metric_dict['f1_score'] = f1_score(ground_truth_labels,
                                                      predicted_labels,
                                                      average='weighted')
    classification_metric_dict['brier_score_loss'] = brier_score_loss(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['matthews_corr_coef'] = matthews_corrcoef(
        ground_truth_labels, predicted_labels)
    classification_metric_dict['jaccard_score'] = jaccard_score(
        ground_truth_labels, predicted_labels, average='weighted')
    classification_metric_dict['cohen_kappa_score'] = cohen_kappa_score(
        ground_truth_labels, predicted_labels)

    return classification_metric_dict
コード例 #9
0
def main():
    samples, labels, _ = loader.load_img("radio_img")

    #add the fourth dimension (color)
    samples = np.expand_dims(samples, axis=4)

    print("shape = {}".format(samples.shape))
    inputShape = (samples.shape[1], samples.shape[2], samples.shape[3])
    print("inputShape = {}".format(inputShape))

    #weights
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(labels),
                                                      labels)
    d_class_weights = dict(enumerate(class_weights))
    print("weights {}".format(d_class_weights))

    #one-hot encoding
    lb = LabelBinarizer()
    labels = lb.fit_transform(labels)
    classesNum = labels.shape[1]
    print("Classes: {}".format(classesNum))

    #split to training and test
    (trainSamples, testSamples, trainLabels,
     testLabels) = train_test_split(samples,
                                    labels,
                                    test_size=0.25,
                                    random_state=42)

    model = cnn_model(inputShape, classesNum)

    ## checkpoints
    #    checkpt1 = ModelCheckpoint(filepath='model.{epoch:02d}-{val_loss:.2f}.h5', save_best_only=True)
    #    checkpt2 = EarlyStopping(monitor='val_loss', patience=3)

    EPOCHS = 20
    BATCH = 50
    model.fit(
        trainSamples,
        trainLabels,
        batch_size=BATCH,
        epochs=EPOCHS,
        class_weight=d_class_weights,
        verbose=1,
        #callbacks = [checkpt1,checkpt2],
        validation_data=(testSamples, testLabels))

    cnnResults = model.predict(testSamples)

    print(
        confusion_matrix(testLabels.argmax(axis=1), cnnResults.argmax(axis=1)))
    print(
        classification_report(testLabels.argmax(axis=1),
                              cnnResults.argmax(axis=1)))
    cnnAcc = accuracy_score(testLabels.argmax(axis=1),
                            cnnResults.argmax(axis=1))
    print("Accuracy CNN: {:.2f}".format(cnnAcc))
    print("Cohen's Kappa {:.2f}".format(
        cohen_kappa_score(testLabels.argmax(axis=1),
                          cnnResults.argmax(axis=1))))
    input("")
コード例 #10
0
flatmodel = Sequential()
flatmodel.add(Flatten(input_shape=(14, )))
flatmodel.add(Dense(50, activation='sigmoid'))

# concatenated model
combined = concatenate([cnnmodel.output, flatmodel.output])
combined = Dense(16, activation="sigmoid")(combined)
combined = Dense(numClasses, activation="sigmoid")(combined)

model = Model(inputs=[cnnmodel.input, flatmodel.input], outputs=combined)

print(model.summary())

model.compile(loss='categorical_crossentropy',
              optimizer="adam",
              metrics=['accuracy'])

EPOCHS = 10
BATCH = 100
model.fit([samplesIMG, samplesCSV], labels, batch_size=BATCH, epochs=EPOCHS)

results = model.predict([samplesIMG, samplesCSV])

print(confusion_matrix(labels.argmax(axis=1), results.argmax(axis=1)))
print(classification_report(labels.argmax(axis=1), results.argmax(axis=1)))
print("Accuracy: {:.2f}".format(
    accuracy_score(labels.argmax(axis=1), results.argmax(axis=1))))
print("Cohen's Kappa {:.2f}".format(
    cohen_kappa_score(labels.argmax(axis=1), results.argmax(axis=1))))
input("")
コード例 #11
0
        result = kerasAdapter.predict(dataTestGenerator,
                                      batch_size=parameters['batchSize'])
        testClasses = classes[testIndex]
        metrics = dict()
        metrics['fscore'] = f1_score(testClasses, result, average='weighted')
        metrics['precision'] = precision_score(testClasses,
                                               result,
                                               average='weighted')
        metrics['recall'] = recall_score(testClasses,
                                         result,
                                         average='weighted')
        metrics['auc'] = roc_auc_score(testClasses, result, average='weighted')

        metrics['fscore_b'] = f1_score(testClasses, result)
        metrics['precision_b'] = precision_score(testClasses, result)
        metrics['recall_b'] = recall_score(testClasses, result)
        metrics['auc_b'] = roc_auc_score(testClasses, result)

        metrics['kappa'] = cohen_kappa_score(testClasses, result)
        metrics['accuracy'] = accuracy_score(testClasses, result)
        tn, fp, fn, metrics['tp_rate'] = confusion_matrix(testClasses,
                                                          result).ravel()
        print(classification_report(testClasses, result))
        metrics["fold"] = i
        if dictWriter is None:
            dictWriter = csv.DictWriter(cvsFileHandler, metrics.keys())
        if metrics['fold'] == 0:
            dictWriter.writeheader()
        dictWriter.writerow(metrics)
        i += 1
コード例 #12
0
            classifier_fname = 'classifiers/{}_{}_fold{}.pkl'.format(
                classifier_row['fname'].split('.')[0],
                classifier_row['classifier'], classifier_row['fold'])
            classifier = joblib.load(open(classifier_fname, 'rb'))
            try:
                predicted = classifier.predict(data_test)
                metrics = dict()

                metrics['fname'] = classifier_row['fname']
                metrics['classifier'] = classifier_row['classifier']
                metrics['accuracy'] = accuracy_score(classes_test, predicted)
                tn, fp, fn, metrics['tp_rate'] = confusion_matrix(
                    classes_test, predicted).ravel()
                metrics['tp_rate'] = metrics['tp_rate'] / (metrics['tp_rate'] +
                                                           fn)
                metrics['kappa'] = cohen_kappa_score(classes_test, predicted)
                metrics['auc'] = roc_auc_score(classes_test,
                                               predicted,
                                               average='weighted')
                metrics['fscore'] = f1_score(classes_test,
                                             predicted,
                                             average='weighted')
                metrics['macro_f'] = f1_score(classes_test,
                                              predicted,
                                              average='macro')

                # metrics['fscore'] = f1_score(classes_test, predicted, average='weighted')
                # metrics['precision'] = precision_score(classes_test, predicted, average='weighted')
                # metrics['recall'] = recall_score(classes_test, predicted, average='weighted')
                # metrics['auc'] = roc_auc_score(classes_test, predicted, average='weighted')
                #
コード例 #13
0
ファイル: main.py プロジェクト: tae0086/tutorial2019
def main():

    #load data 
    file = "datasetA_3c.csv"
    dataframe = pandas.read_csv(file)
    dataset = dataframe.values
    samples = dataset[:,1:]
    labels = dataset[:,0]
    samples = np.array(samples)
    labels = np.array(labels)
    labels = labels.astype(str)

    print("Class distribution:")
    print(Counter(labels))

### choose k best attributes
#    from sklearn.feature_selection.univariate_selection import SelectKBest
#    newSamples = SelectKBest(k=100).fit_transform(samples, labels)
#    print(newSamples.shape) 
#    samples = newSamples

### Calculate weights for unbalanced classes
#    from sklearn.utils import class_weight
#    d_class_weights = None
#    class_weights = class_weight.compute_class_weight('balanced',np.unique(labels),labels)
#    print("Class weights:")
#    print(class_weights)
#    d_class_weights = dict(enumerate(class_weights))

### Normalize samples
#    from sklearn.preprocessing.data import normalize
#    normalize(samples)


    ## convert to one-hot encoding
    lb = LabelBinarizer()
    labels = lb.fit_transform(labels)
    classesNum = labels.shape[1]
    print ("Classes: {}".format(classesNum))

    trainSamples = samples
    trainLabels = labels
    testSamples = samples
    testLabels = labels
    
### Division into training and test samples
#    from sklearn.model_selection._split import train_test_split
#    (trainSamples, testSamples, trainLabels, testLabels) = train_test_split(samples, labels, test_size=0.25, random_state=42)
    
    
    model = Sequential()
    model.add(Dense(250, activation='sigmoid'))
    model.add(Dense(250, activation='sigmoid'))
    model.add(Dense(250, activation='sigmoid'))
    model.add(Dense(classesNum, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer="adam",metrics=['accuracy'])

    EPOCHS=50
    BATCH=50
    H = model.fit(trainSamples, trainLabels, batch_size=BATCH, epochs=EPOCHS
              #,class_weight=d_class_weights
              #,validation_data=(testSamples,testLabels)
              #,validation_split=0.1
              )
    mlpResults = model.predict(testSamples)

    print(confusion_matrix(testLabels.argmax(axis=1), mlpResults.argmax(axis=1)))
    print(classification_report(testLabels.argmax(axis=1), mlpResults.argmax(axis=1),target_names=lb.classes_))
    print("MLP Accuracy: {:.2f}".format(accuracy_score(testLabels.argmax(axis=1), mlpResults.argmax(axis=1))))
    print("Cohen's Kappa {:.2f}".format(cohen_kappa_score(testLabels.argmax(axis=1), mlpResults.argmax(axis=1))))

    N = np.arange(0, EPOCHS)
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(N, H.history["loss"], label="train_loss")
    plt.plot(N, H.history["acc"], label="train_acc")
    #plt.plot(N, H.history["val_loss"], label="val_loss")
    #plt.plot(N, H.history["val_acc"], label="val_acc")
    
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.show()
コード例 #14
0
    bots.load()

    data = bots.data
    shp = np.shape(data)
    row = shp[0]
    col = shp[1]
    bands = shp[2]

    X = data.reshape(row * col, bands)

    bots_gt = Dataset('Botswana_gt', '.mat')
    bots_gt.load()
    y = bots_gt.data.reshape(row * col, 1)
    ind = np.where(y[:, 0] != 0)
    X = X[ind]
    y = y[y != 0]
    #Linear SVM with all bands and cv=5
    start_time = time.time()
    svm = SVC(kernel='linear')
    scores = cross_val_score(svm, X, y, cv=5)

    print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
    predictions = cross_val_predict(svm, X, y, cv=5)
    kappa_score = cohen_kappa_score(y, predictions)
    print("Kappa coefficient: %0.2f" % kappa_score)
    #print(confusion_matrix(y_test,y_pred))
    print(classification_report(y, predictions))
    # stuff only to run when not called via 'import' here
    print("--- %s seconds ---" % (time.time() - start_time))
    main()
コード例 #15
0
if os.path.exists('assets/tags.npy'):
    tags = np.load('assets/tags.npy')

# Combine training, validation and testing data
utt_Speaker = utt_Speaker_train + utt_Speaker_dev + utt_Speaker_test
utt_data = utt_train_data + utt_dev_data + utt_test_data
utt_id_data = utt_id_train_data + utt_id_dev_data + utt_id_test_data
utt_Emotion_data = utt_Emotion_train_data + utt_Emotion_dev_data + utt_Emotion_test_data
utt_Sentiment_data = utt_Sentiment_train_data + utt_Sentiment_dev_data + utt_Sentiment_test_data

# Evaluation of context model predictions
print('Accuracy comparision between context-based predictions: {}'.format(
    classification.accuracy_score(meld_elmo_con_out, meld_elmo_mean_con_out)))
print('Kappa (Cohen) score between context-based predictions: {}'.format(
    classification.cohen_kappa_score(meld_elmo_con_out,
                                     meld_elmo_mean_con_out)))
print(
    classification.classification_report(meld_elmo_con_out,
                                         meld_elmo_mean_con_out))
print('Spearman Correlation between context-based predictions: {}'.format(
    stats.spearmanr(meld_elmo_con_out, meld_elmo_mean_con_out)))
reliability_data = convert_predictions_to_indices(meld_elmo_con_out,
                                                  meld_elmo_non_con_out,
                                                  meld_elmo_mean_con_out,
                                                  meld_elmo_mean_non_con_out,
                                                  meld_elmo_top_con_out, tags)
k_alpha = alpha(reliability_data, level_of_measurement='nominal')
print("Krippendorff's alpha: {}".format(round(k_alpha, 6)))

fleiss_kappa_score = fleissKappa(reliability_data, 5)
コード例 #16
0
clf1.fit(X_train,Y_train)

#feature importance
clf1.feature_importances_()

predict = clf1.predict(X_test)


#cross val score
score1 = np.mean(cross_val_score(clf, X, Y, scoring='accuracy', cv=10))
print(score1)
## Metrics-accuracy
print(accuracy_score(predict,Y_test))

#kappa score
score3 = cohen_kappa_score(Y_test,predict)
print(score3)
#recall score
score2=recall_score(Y_test, predict, average='macro') 
print(score2)


#resampling to get a better model
minor_class = [4,8,3]
major_class = [5,6,7]
df.info()
df_minor = df[df.quality.isin(minor_class)]
df_major = df[df.quality.isin(major_class)]

#Upsampling
df_minor_upsampled = resample(df_minor,replace=True,n_samples = len(df_major), random_state = 123)
コード例 #17
0
def main():
    print("Loading samples and labels")
    samples, labels, _ = load_files("data")
    print("Loaded {} samples".format(samples.shape[0]))

    sequence_dim = 100
    print("Converting to sequences of length {}".format(sequence_dim))
    samples, labels = make_sequences(samples, labels, sequence_dim)

    print("Number of samples from sequences: {}".format(samples.shape[0]))

    lb = LabelBinarizer()
    labels = lb.fit_transform(labels)

    # flattened samples for Decision Tree
    flatSamples = samples.reshape(samples.shape[0], -1)  #tree!
    (trainSamples, testSamples, trainLabels,
     testLabels) = train_test_split(flatSamples,
                                    labels,
                                    test_size=0.25,
                                    random_state=42)

    print("=" * 20)
    print("Building DecisionTree model")
    model = DecisionTreeClassifier()
    model.fit(trainSamples, trainLabels)
    treeResults = model.predict(testSamples)
    print(
        confusion_matrix(testLabels.argmax(axis=1),
                         treeResults.argmax(axis=1)))
    print(
        classification_report(testLabels.argmax(axis=1),
                              treeResults.argmax(axis=1)))
    treeAcc = accuracy_score(testLabels.argmax(axis=1),
                             treeResults.argmax(axis=1))
    print("Accuracy Tree: {:.2f}".format(treeAcc))
    print("Cohen's Kappa {:.2f}".format(
        cohen_kappa_score(testLabels.argmax(axis=1),
                          treeResults.argmax(axis=1))))

    print("=" * 20)
    print("Building CNN model")

    (trainSamples, testSamples, trainLabels,
     testLabels) = train_test_split(samples,
                                    labels,
                                    test_size=0.25,
                                    random_state=42)
    inputShape = (samples.shape[1], samples.shape[2])
    model = Sequential()
    model.add(Conv1D(32, 10, padding="same", input_shape=inputShape))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))

    model.add(Conv1D(64, 10, padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    model.add(Conv1D(128, 10, padding="same"))
    model.add(Activation("relu"))
    model.add(Dropout(0.2))
    model.add(Flatten(input_shape=inputShape))
    model.add(Dense(128, activation='sigmoid'))
    model.add(Dense(64, activation='sigmoid'))
    model.add(Dense(labels.shape[1], activation='softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer="adam",
                  metrics=['accuracy'])

    EPOCHS = 10
    BATCH = 128
    model.fit(trainSamples,
              trainLabels,
              batch_size=BATCH,
              epochs=EPOCHS,
              validation_data=(testSamples, testLabels))

    cnnResults = model.predict(testSamples)

    print(
        confusion_matrix(testLabels.argmax(axis=1), cnnResults.argmax(axis=1)))
    print(
        classification_report(testLabels.argmax(axis=1),
                              cnnResults.argmax(axis=1),
                              target_names=lb.classes_))
    print("CNN Accuracy: {:.2f}".format(
        accuracy_score(testLabels.argmax(axis=1), cnnResults.argmax(axis=1))))
    print("Cohen's Kappa {:.2f}".format(
        cohen_kappa_score(testLabels.argmax(axis=1),
                          cnnResults.argmax(axis=1))))
    input("")
コード例 #18
0
ファイル: images_test.py プロジェクト: tae0086/tutorial2019
def printResults(testLabels,testResults):
    print(confusion_matrix(testLabels.argmax(axis=1), testResults.argmax(axis=1)))
    print(classification_report(testLabels.argmax(axis=1), testResults.argmax(axis=1)))
    print("Cohen's Kappa: {}".format(cohen_kappa_score(testLabels.argmax(axis=1), testResults.argmax(axis=1))))
    return accuracy_score(testLabels.argmax(axis=1), testResults.argmax(axis=1))
コード例 #19
0
print("*" * 50)
print("Accuracy DT Gini : ", accuracy_score(y_test, y_pred_gini) * 100)
print("Accuracy DT Entropy: ", accuracy_score(y_test, y_pred_entropy) * 100)
print("Accuracy SVM: ", accuracy_score(y_test, y_pred_svm) * 100)
print("Accuracy RF: ", accuracy_score(y_test, y_pred_rf) * 100)
print("Accuracy KNN: ", accuracy_score(y_test, y_pred_knn) * 100)
print("Accuracy NB: ", accuracy_score(y_test, y_pred_nb) * 100)
print("Accuracy Bagging with Mode method: ",
      accuracy_score(y_test, final_pred) * 100)
print("Accuracy ADA: ", accuracy_score(y_test, y_pred_boost) * 100)
print("*" * 50)

#Printing results for our best model
print("ROC_AUC : ", roc_auc_score(y_test, y_pred_boost) * 100)
print("Accuracy K: ", cohen_kappa_score(y_test, y_pred_boost) * 100)

# ROC Graph
y_pred_score = classifier.predict_proba(X_test)
preds = y_pred_score[:, 1]
fpr, tpr, threshold = metrics.roc_curve(y_test, preds)
roc_auc = metrics.auc(fpr, tpr)

# method I: plt
import matplotlib.pyplot as plt
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
plt.legend(loc='lower right')
plt.plot([0, 1], [0, 1], 'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])