Exemple #1
0
def confusion_matrix_metrics(cm):
    """

    Parameters
    ----------
    cm : ndarray of shape (n_classes, n_classes) 
        scikit-learn confusion matrix object

    Returns
    -----------
    list containing confusion matrix
    sensitivity, specificity, ppv, npv
    """

    tn, fp, fn, tp = cm.ravel()

    # sensitivity -TP/P
    sensitivity = tp / (tp + fn)

    # specificity - TN/N
    specificity = tn / (tn + fp)

    # precision / positive predictive value - TP/(TP+FP)
    ppv = tp / (tp + fp)

    # negative predictive value - TN/(TN+FN)
    npv = tn / (tn + fn)

    return [sensitivity, specificity, ppv, npv]
Exemple #2
0
def intersection_over_union(confusion_matrix):
    """ Intersection-over-union metric for image segmentation """

    tn, fp, fn, tp = confusion_matrix.ravel()
    iou = tp / (tp + fn + fp)

    return iou
Exemple #3
0
def performance(method, confusion_matrix):
    conn = pymysql.connect(host='10.214.163.179', user='******', password='******', port=3306, database='dt_yc')
    cursor = conn.cursor()

    tn, fp, fn, tp = confusion_matrix.ravel()
    precision = float(tp / (tp + fp))
    recall = float(tp / (tp + fn))
    f1_score = float(2 * (recall * precision) / (recall + precision))

    if method == train_random_forest:
        method_name = 'random forest'
    elif method == train_decision_tree:
        method_name = 'decision tree'
    elif method == train_knn:
        method_name = 'knn'
    else:
        return

    current_date = get_current_date()
    year = int(current_date[0:4])
    month = int(current_date[5:7])
    insert = "INSERT INTO dt_yc.model_train_performance VALUES(%s, %s, %s, %s, %s, %s)"
    val = [method_name, year, month, precision, recall, f1_score]

    cursor.execute(insert, val)
    conn.commit()

    cursor.close()
    conn.close()
def printIndexes(confusion_matrix):
    tn, fp, fn, tp = confusion_matrix.ravel()
    acc = (tp + tn) / (tn + fp + fn + tp)
    err = 1 - acc
    ppv = (tp) / (tp + fp)
    npv = (tn) / (tn + fn)
    tpr = (tp) / (tp + fn)
    tnr = (tn) / (tn + fp)
    f1 = 2 * ((ppv * tpr) / (ppv + tpr))
    mcc = (tp * tn - fp * fn) / math.sqrt(
        (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))

    dr = (tp) / (fn + tp)
    far = (fp) / (tn + fp)

    fnr = (fn) / (fn + tp)
    fpr = (fp) / (fp + tn)

    print("\n")
    print("Accuracy: ", acc)
    print("Error: ", err)
    print("Precision or Positive Predictive Value: ", ppv)
    print("Negative Predictive Value: ", npv)
    print("\n")
    print("Sensitivity, Recall, Hit Rate, or True Positive Rate: ", tpr)
    print("Specificity, Selectivity or True Negative Rate: ", tnr)
    print("False Positive Rate: ", fpr)
    print("False Negative Rate: ", fnr)
    print("\n")
    print("F1 score: ", f1)
    print("Matthews Correlation Coefficient: ", mcc)
    print("\n")
    print("DR: ", dr)
    print("FAR: ", far)
    print("\n")
Exemple #5
0
    def confusion_derivations(self, confusion_matrix, multi=True):
        """ Get derivations of confusion matrix """

        # Basic derivations
        if confusion_matrix.shape == (2, 2) and multi is False:
            # Binary
            TN, FP, FN, TP = confusion_matrix.ravel()
        else:
            # Multiclass
            FP = (confusion_matrix.sum(axis=0) -
                  np.diag(confusion_matrix)).astype(float)
            FN = (confusion_matrix.sum(axis=1) -
                  np.diag(confusion_matrix)).astype(float)
            TP = (np.diag(confusion_matrix)).astype(float)
            TN = (confusion_matrix.sum() - (FP + FN + TP)).astype(float)

        P = (TP + FN).astype(float)
        N = (TN + FP).astype(float)

        # Add everything to dictonary
        metrics = {'P':P.astype(int),'N':N.astype(int), \
                   'TP':TP.astype(int),'FP':FP.astype(int),\
                   'TN':TN.astype(int),'FN':FN.astype(int)}
        # Recall
        metrics['TPR'] = TP / P
        # Specificicty
        metrics['TNR'] = TN / N
        # Precision
        metrics['PPV'] = TP / (TP + FP)
        # Negative predictive value
        metrics['NPV'] = TN / (TN + FN)
        # False negative rate
        metrics['FNR'] = 1 - metrics['TPR']
        # False positive rate
        metrics['FPR'] = 1 - metrics['TNR']
        # False discovery rate
        metrics['FPR'] = 1 - metrics['PPV']
        # False Omission rate
        metrics['FOR'] = 1 - metrics['NPV']
        # Critical Success Index
        metrics['TS'] = TP / (TP + FN + FP)
        # Accuracy
        metrics['ACC'] = (TP + TN) / (P + N)
        # Balanced Accuracy
        metrics['BACC'] = (metrics['TPR'] + metrics['TNR']) / 2
        # Predicted positive condition rate
        metrics['PPCR'] = (TP + FP) / (TP + FP + TN + FN)
        # F1-score
        metrics['F1'] = 2 * (metrics['PPV'] * metrics['TPR']) / (
            metrics['PPV'] + metrics['TPR'])
        # Matthews correlation coefficient
        metrics['MCC'] = ((TP * TN) - (FP * FN)) / (np.sqrt(
            ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))))
        # Fowlkes-Mallows index
        metrics['FM'] = np.sqrt(metrics['PPV'] * metrics['TPR'])

        # Return metrics
        return metrics
Exemple #6
0
    def unpack_confusion_matrix(self, confusion_matrix):
        # If only one element left in the confusion matrix, then ravel() cannot be used, so here divide it into two paths
        if len(confusion_matrix) > 1:
            _, FP, FN, TP = confusion_matrix.ravel()
        else:
            FP = 0
            FN = 0
            TP = 0

        return FP, FN, TP
Exemple #7
0
    def stratified_cross_validation(self, X_labelled, y_labelled, X_unlabelled,
                                    y, labelled_set, unlabelled_set):
        skf = StratifiedKFold(n_splits=10, random_state=None, shuffle=False)
        labels = np.copy(y)
        final_confusion_matrix = [[0, 0], [0, 0]]
        for train_index, test_index in skf.split(X_labelled, y_labelled):
            y = np.copy(labels)
            print("TRAIN:", train_index, "TEST:", test_index)
            X_train, X_test = X_labelled[train_index], X_labelled[test_index]
            y_train, y_test = y_labelled[train_index], y_labelled[test_index]
            labelled_set = train_index
            print("y shape before ", y.shape)
            print("Y before ", y)
            y = np.delete(y, test_index)
            print("y shape after ", y.shape)
            print("y after ", y)
            sample_rate = 0.2
            print("Y before ", labels)
            print("X train ", X_train.shape)
            print("X test ", X_test.shape)
            #final_labels, clf = semi_supervised_classification().pseudo_labelling(y, X_train, y_train, X_unlabelled, labelled_set, unlabelled_set, sample_rate)
            final_labels, clf = self.cl.label_propagation(
                X_train, y, X_unlabelled)
            print("Y after ", labels)
            pred_labels = clf.predict(X_test)
            print("pred_labels :", pred_labels, "\tReal labels: ", y_test)
            print(self.classification_rep(X_train, y_train, clf))
            confusion_matrix = self.confusion_mat(X_test, y_test, clf)
            print(confusion_matrix)
            tn, fp, fn, tp = confusion_matrix.ravel()
            print(tn, fp, fn, tp)
            final_confusion_matrix[0][0] += tn
            final_confusion_matrix[0][1] += fp
            final_confusion_matrix[1][0] += fn
            final_confusion_matrix[1][1] += tp

            print("Final confiusion matrix ", final_confusion_matrix)

        tp, fp, fn, tp = final_confusion_matrix[0][0], final_confusion_matrix[
            0][1], final_confusion_matrix[1][0], final_confusion_matrix[1][1]
        overall_precision = tp / (tp + fp)
        overall_recall = tp / (tp + fn)
        overall_accuracy = (tp + tn) / (tp + tn + fp + fn)
        overall_f1_score = 2 * overall_precision * overall_recall / (
            overall_precision + overall_recall)
        return np.array(
            final_confusion_matrix
        ), overall_precision, overall_recall, overall_accuracy, overall_f1_score
Exemple #8
0
def accuracy(confusion_matrix):
    tn, fp, fn, tp = confusion_matrix.ravel()
    PPV = tp / (tp + fp)
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    NPV = tn / (fn + tn)
    pos_lr = sensitivity / (1 - specificity)
    neg_lr = (1 - sensitivity) / specificity
    recall = tp / (tp + fn)
    precison = tp / (tp + fp)
    f1 = (2 * precison * recall)/(precison + recall)
    print("*F1: %f  *Sensitivity: %f  *Specificity: %f  *PPV: %f  *NPV: %f  *Positive-LR: %f  *Negative-LR: %f" %
          (f1, sensitivity, specificity, PPV, NPV, pos_lr, neg_lr))
    diagonal_sum = confusion_matrix.trace()
    sum_of_all_elements = confusion_matrix.sum()
    return diagonal_sum / sum_of_all_elements
def SVMalgorithmRelevantNotRelevant(lists, gamma=1):
    vectors = list()
    target = list()
    for x in range(len(lists)):

        vectors.append(lists[x][1])
        target.append(lists[x][2])


#Split the data into train and test sub-datasets.
    X_train, X_test, y_train, y_test = train_test_split(
        vectors, target, test_size=0.3,
        random_state=50)  # 70% training and 30% test
    clf = svm.SVC(kernel='linear', C=gamma)
    # Train the model using the training sets
    clf.fit(X_train, y_train)

    # Predict the response for test dataset
    y_pred = clf.predict(X_test)  #Predict probabilities for the test data

    confusion_matrix = metrics.confusion_matrix(y_test, y_pred)

    (tn, fp, fn, tp) = confusion_matrix.ravel()
    accuracy = (tn + tp) / (tp + tn + fp + fn)
    print("CLASSIFICATION REPORT ====>")
    print("Accuracy: %0.2f" % (accuracy))
    print(classification_report(y_test, y_pred))

    print("Confusion Matrix:")
    print(confusion_matrix)

    print("Metrics\n")
    # Model Accuracy: how often is the classifier correct?
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    # Model Precision: what percentage of positive tuples are labeled as such?
    print("Precision element y:", metrics.precision_score(y_test, y_pred))
    '''In the event where both the class distribution simply mimic each other, AUC is 0.5. In other words, our model is 50% accurate for instances and their classification. The model has no discrimination capabilities at all in this case'''
    # Model Recall: what percentage of positive tuples are labelled as such?
    print("Recall:", metrics.recall_score(y_test, y_pred))
    probs = y_pred[:]
    y_test = y_test[:]
    auc = roc_auc_score(y_test, probs)
    print('AUC: %.2f' % auc)
    fpr, tpr, thresholds = roc_curve(y_test, probs)  #Get the ROC Curve.
    plot_roc_curve(fpr, tpr)  # Plot ROC Curve using our defined function

    return
Exemple #10
0
def get_stats(df):
    expected_label_column = list(df["Expected_label"])
    given_label_column = list(df["Given_label"])

    confusion_matrix = confusion_matrix(expected_label_column,
                                        given_label_column,
                                        labels=["no", "yes"])
    tn, fp, fn, tp = confusion_matrix.ravel()
    total = tn + fp + fn + tp

    accuracy = (tn + tp) / total
    print('Accuracy : ', accuracy)

    sensitivity = tp / (fn + tp)
    print('Sensitivity : ', sensitivity)

    specificity = tn / (tn + fp)
    print('Specificity : ', specificity)

    print((tn, fp, fn, tp))
def function_print_binary(y_test, y_pred):
    confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
    (tn, fp, fn, tp) = confusion_matrix.ravel()
    accuracy = (tn + tp) / (tp + tn + fp + fn)
    names = ["L", "H"]
    print(
        classification_report(y_true=y_test, y_pred=y_pred,
                              target_names=names))
    print("-Accuracy:", metrics.accuracy_score(y_test, y_pred))
    # Model Precision: what percentage of positive tuples are labeled as such?
    print("-Precision element y:",
          metrics.precision_score(y_test, y_pred, average=None))

    # Model Recall: what percentage of positive tuples are labelled as such?
    print("-Recall:", metrics.recall_score(y_test, y_pred, average=None))
    probs = y_pred
    auc = roc_auc_score(y_test, probs)
    fpr, tpr, thresholds = roc_curve(y_test, probs)
    print("show information in roc with the information:")
    print('-AUC: %.2f' % auc)
    plot_roc_curve(fpr, tpr)
Exemple #12
0
            config.CUR_CLASSIFIER))
        predictions = cross_val_predict(estimator=cur_pipe,
                                        X=X_train,
                                        y=y_train,
                                        cv=cv_procedure,
                                        n_jobs=-1,
                                        verbose=config.VERBOSE)
        unique, counts = np.unique(predictions, return_counts=True)
        prediction_counts = dict(zip(unique, counts))
        print("Classifier predictions...")
        print(prediction_counts)

        confusion_matrix = confusion_matrix(y_true=y_train,
                                            y_pred=predictions,
                                            normalize='all')
        tn, fp, fn, tp = confusion_matrix.ravel()
        print("Confusion matrix results...")
        print("True Positive Rate: {}".format(tp))
        print("False Positive Rate: {}".format(fp))
        print("True Negative Rate: {}".format(tn))
        print("False Negative Rate: {}".format(fn))

        # We need to bypass the predictions made in the plot_confusion_matrix function so we create the confusion matrix display directly. See https://github.com/scikit-learn/scikit-learn/blob/95d4f0841/sklearn/metrics/_plot/confusion_matrix.py#L119
        disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix,
                                      display_labels=display_labels)
        disp.plot(include_values=include_values,
                  cmap=cmap,
                  ax=ax,
                  xticks_rotation=xticks_rotation,
                  values_format=values_format)
        plt.show()
Exemple #13
0
 def recall(self, confusion_matrix):
     tn, fp, fn, tp = confusion_matrix.ravel()
     return tp / (tp + fn)
Exemple #14
0
def sensibility(confusion_matrix):
    tn, fp, fn, tp = confusion_matrix.ravel()

    return tp / (tp + fn)
>>>>>>> 51909512688e130709dd516ea800287b75f958d5
print(f'Adaline - Pesos Anterior: {oldWeights}')
print(f'Adaline - Pesos Atuais: {newWeights}')
print(f'Adaline - Quantidade Epochs utilizadas: {qntEpochs}')
print('')

# Prevendo valores
predictedValues = []
for testInput in testInputs:
    predictedValues.append(a.predict_act_func(testInput))

# Printando matriz de confusão
print('----- Matriz de Confusão -----')
confusion_matrix = confusion_matrix(testOutputs,predictedValues)
print(confusion_matrix)
tp, fp, fn, tn = confusion_matrix.ravel()

accuracy = round((tp+tn)/(tp+fp+tn+fn),2)
recall = round(tp/(tp+fn),2)
precision = round(tp/(tp+fp),2)
fscore = round(2 * ((precision * recall)/(precision + recall)),2)

fontDictionary = {
        'color': 'black',
        'size': 16
        }

print(a.error)

plt.figure()
plt.ylabel('Classificados')
Exemple #16
0
for i in range(len(X)):
    d1 = distance.euclidean(X[i], centroids[0])
    d2 = distance.euclidean(X[i], centroids[1])

    if min(d1, d2) == d1:
        labels.append(entriod['centroid 1'])
    elif min(d1, d2) == d2:
        labels.append(entriod['centroid 2'])

accuracy = np.mean(np.array(labels) == Y)
print('accuracy is', round(accuracy, 4))

confusion_matrix = confusion_matrix(Y, labels)
print('confusion_matrix  is', confusion_matrix)

TN, FP, FN, TP = confusion_matrix.ravel()
TPR = TP / (TP + FN)
TNR = TN / (TN + FP)
dic = [TP, FP, TN, FN, round(accuracy, 6), round(TPR, 6), round(TNR, 6)]
result = pd.DataFrame(dic)
result.index = ['TP', 'FP', 'TN', 'FN', 'accuracy', 'TPR', 'TNR']

q35 = result.T

q2 = pd.read_csv('Q2.csv')
q2.drop(['Unnamed: 0'], axis=1, inplace=True)

q_all = pd.concat([q2, q35], axis=0)

q_all.index.name = 'index'
q_all.index = [
Exemple #17
0
    result = model.predict([test_sequences, test_POS, manual_feat_test])
    
    # the prediction of the algorithm is converted to binary prediction considering a classification threshold of 0.5
    # any floating point number outputed by our algorithm is converted to 0 if it is below 0.5 or to 1 if it is greater or equal than 0.5 
    
    y_score = binary_conversion(result,0.5)
    
    # evaluation metrics on the test set are computed 
    test_accuracy = accuracy_score(y_test,y_score)
    test_f1 = f1_score(y_test,y_score)
    confusion_matrix = confusion_matrix(y_test,y_score)
    precision = precision_score(y_test,y_score)
    recall = recall_score(y_test,y_score)
    
    print("Test accuracy: {}, Test F1 score: {}, with classification threshold 0.5".format(test_accuracy,test_f1))
    print(confusion_matrix.ravel())
    
    #AUC is computed 
    
    fpr, tpr, _ = roc_curve(y_test, result)
    roc_auc = auc(fpr, tpr)
    print("Precision: {}, Recall: {}, AUC: {}".format(precision,recall,roc_auc))
	
	# A result dictionary is produced for this run
    result_dictionary = {'Test Accuracy': test_accuracy, 'Test F1': test_f1, 'Precision': precision, 'Recall': recall,
                         'Confusion': confusion_matrix, 'AUC': roc_auc}
    result_list.append(result_dictionary)


print('Final results for the tests')
Exemple #18
0
def specificity(confusion_matrix):
    tn, fp, fn, tp = confusion_matrix.ravel()

    return tn / (tn + fp)
Exemple #19
0
    def skfold_cv(self, X1, y1, X2, y2, response_labels, labelled_set, unlabelled_set, ppl, data, ngrams, semi_clf):
        
        skf = StratifiedKFold(n_splits=10, random_state=None, shuffle=False)
        labels = np.copy(y2)
        final_confusion_matrix = [[0,0],[0,0]] 
        X_labelled = X2[labelled_set]
        y_labelled = y2[labelled_set]
        X_unlabelled = X2[unlabelled_set]
        y_unlabelled = y2[unlabelled_set]
        i = 1
        for train_index, test_index in skf.split(X_labelled, y_labelled):
            print("Cross Validation iteration #",i)
            i+=1
            y2 = np.copy(labels)
            #print("TRAIN:", train_index, "TEST:", test_index)
            print("Train_index_shape ", train_index.shape, "\t Test index shape ", test_index.shape)
            X_train, X_test = X_labelled[train_index], X_labelled[test_index]
            y_train, y_test = y_labelled[train_index], y_labelled[test_index]
            response_labels_train = response_labels[train_index]
            response_labels_test = response_labels[test_index]
            
            X_train_clf1 = np.concatenate((X1, X_train),axis=0)
            y_train_clf1 = np.concatenate((y1, response_labels_train),axis=0)
            y_train_clf1 = y_train_clf1.astype(int)
            
            #labelled_set = train_index
            print("y shape before ", y2.shape)
            y2 = np.delete(y2, test_index)
            print("y shape after ", y2.shape)
            sample_rate=0.2
            
            #unique, counts = np.unique(y_train_clf1, return_counts=True)
            #print(dict(zip(unique, counts)))
            
            ppl.fit(X_train_clf1, y_train_clf1)
            y_test_pred = ppl.predict(X_test)
            #print(y_test_pred, "\n", y_test_pred.shape)
            
            y_unlabelled_pred = ppl.predict(X_unlabelled)
            print(y_unlabelled_pred, y_unlabelled_pred.shape)
            
            cl = classification()
            train_index_orig = labelled_set[train_index]
            test_index_orig = labelled_set[test_index]
            
            #Combining predcited response labels with originial ones to pass as feature for vectorization
            combined_train_index_orig = np.concatenate((train_index_orig, test_index_orig, unlabelled_set),axis=0)
            response_label_pred = y_unlabelled_pred
            combined_response_labels =  np.concatenate((response_labels_train, response_labels_test, response_label_pred),axis=0)
            print(response_label_pred.shape, response_labels_train.shape)
            print(combined_train_index_orig.shape,combined_response_labels.shape)
            train_df_clf2 = data.iloc[combined_train_index_orig,:]
            
            #combined_response_labels = np.transpose(np.matrix(combined_response_labels))      
            combined_response_labels = pd.Series(combined_response_labels)
            response_required_label = combined_response_labels
            
            print("Shape before ", train_df_clf2.shape, combined_response_labels.shape)
            train_df_clf2 = train_df_clf2.assign(response_required_label= response_required_label.values)
            
            #print(dict(zip(combined_train_index_orig, combined_response_labels)))
            #print(train_df_clf2.iloc[130:180,])
            
            pipeline = Pipeline([
                # Use FeatureUnion to combine the features from subject and body
                ('union', FeatureUnion(
                    transformer_list=[
            
                        # Pipeline for pulling features from the post's subject line
                        ('deadline_ppl', Pipeline([
                            ('selector', Custom_features_2(key = 'deadline_weight')),
                        ])),
                        
                        ('response_label_ppl', Pipeline([
                            ('selector', Custom_features_2(key = 'response_required_label')),
                        ])),            
                        
                        # Pipeline for standard bag-of-words model for body
                        ('text_ppl', Pipeline([
                            ('selector', Custom_features(key = 'Text')),
                            ('tfidf',  TfidfVectorizer(ngram_range = ngrams, use_idf=True, smooth_idf=True, norm='l2')),
                        ])),
            
                    ],
            
                    # weight components in FeatureUnion
                    transformer_weights={
                        'deadline_ppl': 1.0,
                        'response_label_ppl':1.0,
                        'text_ppl': 1.0,
                    },
                )),
            ])
                
            X_vec = pipeline.fit_transform(train_df_clf2)
            #print(X_vec, X_vec.shape)
                        
            '''
            vectorizer = TfidfVectorizer(ngram_range=(1,3), norm='l2', use_idf=True, smooth_idf=True, sublinear_tf=False)
            X_vec = vectorizer.fit_transform(X)
            #print("Vec torized_text \n", X_vec)
            print(X_vec.shape)
            '''
            
            X_train_vec = X_vec[0:train_index.shape[0]]
            X_test_vec = X_vec[train_index.shape[0]:(train_index.shape[0]+test_index.shape[0])]
            X_unlabelled_vec = X_vec[-X_unlabelled.shape[0]:]
            print(X_vec.shape, X_train_vec.shape, X_unlabelled_vec.shape, X_test_vec.shape)  
            
            '''
            X_unlabelled_vec = X_vec[0: X_unlabelled.shape[0]]
            X_labelled_vec = X_vec[-X_labelled.shape[0]:]
            X_train_vec = X_labelled_vec[train_index]
            X_test_vec = X_labelled_vec[test_index]
            print(X_unlabelled_vec.shape, X_labelled_vec.shape)
            '''
            #print(X_unlabelled_vec.shape, X_labelled_vec.shape, y_train.shape)
            #print("XYZZZZZ \n", X_unlabelled_vec[0])
            #predicted_labels, prediction_confidence, clf = cl.linear_svc(X_train, y_train, X_test)
            y_ = np.concatenate((y_train, y_unlabelled), axis=0)  
            if(semi_clf == 'LS'):
                predicted_labels, clf = cl.label_spreading(X_train_vec, y_, X_unlabelled_vec)
            elif(semi_clf == 'EM'):
                predicted_labels, clf = cl.expectation_maximization(X_train_vec, y_train, X_unlabelled_vec)
            #print("final_labels :", predicted_labels, predicted_labels.shape)
            unique, counts = np.unique(predicted_labels, return_counts=True)
            print("Predicted label summary ", dict(zip(unique, counts)))
            y_pred = clf.predict(X_test_vec)
            #print(classification_report(y_test, y_pred))
            #print("Accuracy ", accuracy_score(y_test, y_pred))
            #print(sklearn.metrics.confusion_matrix(y_test, y_pred))
            print("pred_labels :", y_pred, "\tReal labels: ", y_test)
            confusion_matrix = sklearn.metrics.confusion_matrix(y_test, y_pred)
            print(confusion_matrix)
            print("Type is ", type(confusion_matrix))
            tn, fp, fn, tp = confusion_matrix.ravel()
            #print(tn, fp, fn, tp)
            final_confusion_matrix[0][0] += tn
            final_confusion_matrix[0][1] += fp
            final_confusion_matrix[1][0] += fn
            final_confusion_matrix[1][1] += tp

            #print("Final confiusion matrix ", final_confusion_matrix)  
        
        #tn, fp, fn, tp = final_confusion_matrix[0][0], final_confusion_matrix[0][1], final_confusion_matrix[1][0], final_confusion_matrix[1][1]
        tn, fp, fn, tp = np.array(final_confusion_matrix).ravel()
            
        u_precision = tp/(tp + fp)
        u_recall = tp/(tp + fn)
        u_f1_score = 2 * u_precision * u_recall / (u_precision + u_recall)
        
        non_u_precision = tn/(tn + fn)
        non_u_recall = tn/(tn + fp)
        non_u_f1_score = 2 * non_u_precision * non_u_recall / (non_u_precision + non_u_recall)
        
        
        accuracy = (tp + tn)/(tp + tn + fp + fn)
        
        return np.array(final_confusion_matrix), u_precision, u_recall, u_f1_score, non_u_precision, non_u_recall, non_u_f1_score, accuracy
        
        
            
            
            
            
        '''
            
            
            #unique, counts = np.unique(y_unlabelled_pred, return_counts=True)
            #print(dict(zip(unique, counts)))
            #sys.exit()
            
            
            final_labels, clf = semi_supervised_classification().pseudo_labelling(y, X_train, y_train, X_unlabelled, labelled_set, unlabelled_set, sample_rate)
            #final_labels, clf = self.cl.expectation_maximization(X_train, y_train, X_unlabelled)
            #final_labels, clf = self.cl.label_spreading(X_train, y, X_unlabelled)
            print("Y after ", labels)
            pred_labels = clf.predict(X_test)
            print("pred_labels :", pred_labels, "\tReal labels: ", y_test)
            print(self.classification_rep(X_train, y_train, clf))
            confusion_matrix = self.confusion_mat(X_test, y_test, clf)
            print(confusion_matrix)
            tn, fp, fn, tp = confusion_matrix.ravel()
            print(tn, fp, fn, tp)
            final_confusion_matrix[0][0] += tn
            final_confusion_matrix[0][1] += fp
            final_confusion_matrix[1][0] += fn
            final_confusion_matrix[1][1] += tp

            print("Final confiusion matrix ", final_confusion_matrix)  
        
        tp, fp, fn, tp = final_confusion_matrix[0][0], final_confusion_matrix[0][1], final_confusion_matrix[1][0], final_confusion_matrix[1][1]
        overall_precision = tp/(tp + fp)
        overall_recall = tp/(tp + fn)
        overall_accuracy = (tp + tn)/(tp + tn + fp + fn)
        overall_f1_score = 2 * overall_precision * overall_recall / (overall_precision + overall_recall)
        return np.array(final_confusion_matrix), overall_precision, overall_recall, overall_accuracy, overall_f1_score
            
         '''