Beispiel #1
0
def GaussianClassifier(dataset, show=False):
    X, labels = dataset
    kernel = 1.0 * RBF(1.0)
    classifier = GaussianProcessClassifier(kernel=kernel,
                                           random_state=0).fit(X, labels)
    classifier.score(X, labels)
    new_labels = classifier.predict(X)

    conf = confusion_matrix(labels, new_labels)
    print(conf)
    conf = conf / conf.sum(axis=1)

    plt.matshow(conf)
    plt.title("Gaussian")
    plt.show()

    if show:
        for i in range(len(X[0])):
            if i >= 2:
                break
            for j in range(i):
                x = list(x[i] for x in X)
                y = list(x[j] for x in X)

                paint(x, y, labels, "labels")
                paint(x, y, new_labels, "prediction")

        plt.show()
def GP_Classifier(i):
    x_data, y_data = data_select(i)
    gpc = GaussianProcessClassifier(random_state=53)
    # split validation
    X_train, X_test, Y_train, Y_test = train_test_split(x_data,
                                                        y_data,
                                                        test_size=0.25,
                                                        random_state=53)
    gpc.fit(X_train, np.ravel(Y_train, order='C'))
    train_score = gpc.score(X_train, Y_train)
    test_score = gpc.score(X_test, Y_test)
    print('Train Acc: %.3f, Test Acc: %.3f' % (train_score, test_score))
    # K-fold validation
    kfold = model_selection.KFold(n_splits=10)
    results_kfold = model_selection.cross_val_score(gpc,
                                                    x_data,
                                                    np.ravel(y_data,
                                                             order='C'),
                                                    cv=kfold)
    print("Accuracy: %.2f%%" % (results_kfold.mean() * 100.0))
    # leave one out validatoin
    loocv = LeaveOneOut()
    results_loocv = model_selection.cross_val_score(gpc,
                                                    x_data,
                                                    np.ravel(y_data,
                                                             order='C'),
                                                    cv=loocv)
    print("Accuracy: %.2f%%" % (results_loocv.mean() * 100.0))
Beispiel #3
0
def train_l2_gaussian(x_train, x_test, y_train, y_test):
    clf = GaussianProcessClassifier()
    clf.fit(x_train, y_train)

    if y_test is not None:
        print('GaussianProcessClassifier:', clf.score(x_test, y_test))
    else:
        print('GaussianProcessClassifier:', clf.score(x_train, y_train))
    return np.reshape(clf.predict(x_train), (-1, 1))
def one_vs_rest_gauss_process_with_log():
    raw_frame=thal_data()
    x=raw_frame.drop(['thal','pressure','cholestoral','age','heart_rate'],axis=1)
    y=raw_frame['thal']
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=5)    
    kernel = 1.0 * RBF(1.0)
    gpc = GaussianProcessClassifier(kernel=kernel,random_state=0, multi_class = 'one_vs_rest').fit(x_train, y_train)
    global train_score
    train_score.append(gpc.score(x_train,y_train))
    global test_score
    test_score.append(gpc.score(x_test,y_test))
Beispiel #5
0
def train_l1_gaussian(x_train, x_test, y_train, y_test):
    clf = GaussianProcessClassifier(n_jobs=-1)
    clf.fit(x_train, y_train)

    if y_test is not None:
        print('GaussianProcessClassifier:', clf.score(x_test, y_test))
    else:
        print('GaussianProcessClassifier:', clf.score(x_train, y_train))
    test_res = np.reshape(clf.predict(x_train), (-1, 1))
    train_res = np.reshape(clf.predict(x_test), (-1, 1))
    return [test_res, train_res]
Beispiel #6
0
def task3(feature_sets, label_sets):
    sets = ["A", "B", "crashes", "diabetes", "ionosphere"]
    kernel = 1.0 * RBF(1.0)
    for i in range(5):
        n = len(label_sets[i])
        m = np.linspace(10, .6 * n, num=10, dtype=int)
        div = int(n * .4)
        x_train = feature_sets[i][div:]
        x_test = feature_sets[i][:div]
        y_train = label_sets[i][div:]
        y_test = label_sets[i][:div]
        gpc_errors = []
        for j in range(10):
            gpc = GPC(kernel=kernel, random_state=0)
            gpc.fit(x_train[:m[j] - 1], np.ravel(y_train[:m[j] - 1]))
            gpc_errors.append(1 - gpc.score(x_test, np.ravel(y_test)))

        plt.legend()
        plt.ylabel("Error")
        plt.xlabel("M value")
        plt.title(sets[i])
        plt.plot(m, gpc_errors, label="GPC")
        plt.show()

    return
    def compute_per_gaussian(self, max_iter=100):
        """Compute SVM per feature"""
        # per feature
        for feature_index in range(int(len(X[0])/45)):
            X_train_mod = []
            # define training dataset
            for example in range(len(self.X_train)):   # for each example (469)
                X_train_mod.append([self.X_train[example][self.epoch*self.neuron_num + self.counter]])

            X_test_mod = []
            # define testing dataset
            for example in range(len(self.X_test)):   # for each example (469)
                X_test_mod.append([self.X_test[example][self.epoch*self.neuron_num + self.counter]])

            gamma = 1e-2
            c = 10
            kernel = 'linear'

            clf = GPC(max_iter_predict=max_iter)  # GPC model
            clf.fit(X_train_mod, self.y_train) # compute with only one feature
            score = clf.score(X_test_mod, self.y_test)

            self.features_accuracy.append(score)

            self.counter += 1
Beispiel #8
0
    def compute_per_gaussian(self, max_iter=100):
        """Compute SVM per feature"""

        print(len(self.X_train))
        print(len(self.X_train[0]))

        # per feature
        for feature_index in range(int(len(self.X[0]))):
            X_train_mod = []
            # define training dataset
            for example in range(len(self.X_train)):  # for each example (469)
                X_train_mod.append([self.X_train[example][self.counter]])

            X_test_mod = []
            # define testing dataset
            for example in range(len(self.X_test)):  # for each example (469)
                X_test_mod.append([self.X_test[example][self.counter]])

            clf = GPC(max_iter_predict=max_iter)  # GPC model
            clf.fit(X_train_mod, self.y_train)  # compute with only one feature
            score = clf.score(X_test_mod, self.y_test)

            self.features_accuracy.append(score)

            self.counter += 1
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:], df_train.iloc[:, 0]
    X_test, y_test = df_test.iloc[:, 2:], df_test.iloc[:, 0]
    unique_labels = sorted(y_train.unique().tolist())

    clf = GaussianProcessClassifier(max_iter_predict=500, warm_start=True, n_jobs=-1)

    clf.fit(X_train, y_train)

    print("\n\n{}\n".format(clf.score(X_test, y_test)))

    y_predicted = clf.predict(X_test)

    print("Generating confusion matrix figure... \n")
    stdfunc.plot_confusion_matrix(y_test, y_predicted, ml_name='GP',
                                  classes=unique_labels,
                                  title='Confusion matrix for Gaussian Process evaluation')

    print("Generating classification report figure... \n")
    stdfunc.plot_classification_report(y_test, y_predicted, ml_name='GP',
                                       classes=unique_labels,
                                       title='Classification report for Gaussian Process evaluation')
Beispiel #10
0
class GaussianProcess_(ProbabilisticModel):

    """GaussianProcess Classifier
    """

    def __init__(self, *args, **kwargs):
        self.model = GaussianProcessClassifier(*args, **kwargs)
        self.name = "gpc"        

    def train(self, dataset, *args, **kwargs):
        return self.model.fit(*(dataset.format_sklearn() + args), **kwargs)

    def predict(self, feature, *args, **kwargs):
        return self.model.predict(feature, *args, **kwargs)

    def score(self, testing_dataset, *args, **kwargs):
        return self.model.score(*(testing_dataset.format_sklearn() + args),
                                **kwargs)
    def predict_proba(self, feature, *args, **kwargs):
        return self.model.predict_proba(feature, *args, **kwargs)
    
    def feature_importances_(self):
        LOGGER.warn("GPC model does not support feature_importance")
        return None
    
    def get_params(self):
        return self.model.get_params()
Beispiel #11
0
def gaussian_process_models(x_train, y_train):
    from sklearn.gaussian_process import GaussianProcessClassifier
    classifier1 = GaussianProcessClassifier()
    classifier1.fit(x_train, y_train)

    print('GaussianProcessClassifier training accuracy: ',
          classifier1.score(x_train, y_train))

    return classifier1
def GPAL(X,
         Y,
         train_ind,
         candidate_ind,
         test_ind,
         sample='En',
         kernel='rbf',
         Niter=500,
         eta=10):
    ourRes = []
    train_index = train_ind.copy()
    test_index = test_ind.copy()
    candidate_index = candidate_ind.copy()
    varRes = []
    enRes = []
    for i in range(Niter):
        print(i)
        if (kernel == 'linear'):
            dotkernel = DotProduct(sigma_0=1)
            model = GPC(kernel=dotkernel)
        else:
            model = GPC()
        model.fit(X[train_index], Y[train_index])
        ourRes.append(model.score(X[test_index, :], Y[test_index]))
        print(ourRes[-1])
        if (sample == 'rand'):
            sampleIndex = np.random.randint(len(candidate_index))
        elif (sample == 'En'):
            proba = model.predict_proba(X[candidate_index, :])
            en = sp.stats.entropy(proba.T)
            sampleScore = en
            sampleIndex = np.argmax(sampleScore)
        elif (sample == 'var'):
            model.predict_proba(X[candidate_index, :])
            meanVar = np.zeros(len(candidate_index))
            for tem in model.base_estimator_.estimators_:
                meanVar = meanVar + tem.var
            sampleIndex = np.argmax(meanVar)
        elif (sample == 'varEN'):
            proba = model.predict_proba(X[candidate_index, :])
            en = sp.stats.entropy(proba.T)
            meanVar = np.zeros(len(candidate_index))
            enRes.append(np.mean(en))

            for tem in model.base_estimator_.estimators_:
                meanVar = meanVar + tem.var
            sampleIndex = np.argmax(meanVar / len(np.unique(Y)) * eta + en)
            varRes.append(np.mean(meanVar))
            print('max var %f----selected var %f-----selected en %f ' %
                  (np.max(meanVar), meanVar[sampleIndex], en[sampleIndex]))
        sampleIndex = candidate_index[sampleIndex]
        train_index = train_index + [sampleIndex]
        candidate_index = [
            x for x in candidate_index if x not in [sampleIndex]
        ]
    return [ourRes, varRes, enRes]
def compute(dataSet, dataRes, dataTest, dataTestID):
    gauss = GaussianProcessClassifier()
    gauss.fit(dataSet, dataRes)
    with open('gaussian.csv', 'w') as subFile:
        fileWriter = csv.writer(subFile, delimiter=',')
        fileWriter.writerow(['PassengerId', 'Survived'])

        for i, row in enumerate(dataTest):
            predict = gauss.predict([row])[0]
            fileWriter.writerow([dataTestID[i], predict])
    print('Gaussian', gauss.score(dataSet, dataRes))
Beispiel #14
0
def job(i):
    results = pd.DataFrame()
    df_train = pd.read_csv("preprocessed_training_" + str(i) + ".csv")
    df_train = df_train.drop(["ID"], axis=1)
    y = df_train["Class"]
    X = df_train.drop(['Class'], axis=1)
    df_test = pd.read_csv("preprocessed_test_" + str(i) + ".csv")
    df_test = df_test.drop(["ID"], axis=1)
    X_p = df_test.drop(["Class"], axis=1)

    global n_estimators
    #n_estimators = [1, 8]
    global max_iter_predict
    global warm_start
    global multi_class
    #max_iter_predict = [1, 2]
    random_state = 1428

    for n in n_estimators:
        for d in max_iter_predict:
            for s in warm_start:
                for m in multi_class:
                    result_row = {}
                    result_row["n_estimators"] = n
                    result_row["fold"] = i
                    result_row["max_iter_predict"] = d
                    result_row["warm_start"] = s
                    result_row["multi_class"] = m

                    print(result_row)

                    rf = GaussianProcessClassifier(n_restarts_optimizer=n,
                                                   max_iter_predict=d,
                                                   warm_start=s,
                                                   n_jobs=1,
                                                   multi_class=m)
                    rf.fit(X, y)
                    predicted = rf.predict(X_p)
                    result_row["score"] = round(
                        rf.score(X_p, df_test["Class"]), 4)
                    confusion = confusion_matrix(df_test["Class"], predicted)
                    conf = pd.DataFrame(confusion)
                    conf.to_csv(target_path + "confusion_" + str(i) + "_" +
                                str(n) + "_" + str(d) + "_" + str(s) + "_" +
                                str(m) + "_" + ".csv",
                                index=False)
                    results = results.append(result_row, ignore_index=True)
    return results
Beispiel #15
0
def gaussian_kernel(X,y,X_train, X_test, y_train, y_test):
    from sklearn.svm import SVC
    from sklearn.gaussian_process import GaussianProcessClassifier
    from sklearn.gaussian_process.kernels import RBF  
    kernel = 1.0 * RBF(1.0)
    svclassifier = GaussianProcessClassifier(kernel=kernel,random_state=0) 
    model = svclassifier
    plot_model(model,'Gaussian kernel',X,y)
    svclassifier.fit(X_train, y_train)  

    # predictions
    y_pred = svclassifier.predict(X_test)  

    # Evaluate model
    from sklearn.metrics import classification_report, confusion_matrix  
    print("Confusion Matrix for gaussian")
    print(confusion_matrix(y_test,y_pred))
    print("Classification report for gaussian")
    print(classification_report(y_test,y_pred))
    print("Score for Gaussian RBF:",svclassifier.score(X_train, y_train)) 
def OVA_OVO(param):

    print('Aplicando metodo multiclase ONE VS ALL GAUSSIAN PROCESS CLASSIFIER')

    for i in lista_datasets:
        print('Base de datos: ' + str(i))
        dataset = arff.loadarff('./datasets/' + str(i))
        df = pd.DataFrame(dataset[0])
        input = df.iloc[:, df.columns != 'class']
        output = pd.factorize(df['class'])[0]

        X_train, X_test, Y_train, Y_test = train_test_split(input, output, test_size=0.25)

        kernel = ( 1.0 * RBF(1.0) )

        gpc = GaussianProcessClassifier(kernel=kernel, random_state=0, multi_class=param)
        gpc.fit(X_train, Y_train)
        print('Porcentaje de bien clasificados GAUSSIAN PROCESS CLASSIFIER ONE VS ALL')
        print(gpc.score(X_test, Y_test))

    print('--------------------------')
Beispiel #17
0
class scikit_GaussianProcessClassifier(MLAlgo):
    def __init__(self):
        self.clf = GaussianProcessClassifier()
        self.className = self.__class__.__name__

    def train(self, train_data):
        train_X = train_data[:, :-1]
        train_Y = train_data[:, -1]
        self.clf.fit(train_X, train_Y)
        print("GaussianProcessClassifier model built.")
        return self.className + " Training finished...\n"

    def test(self, test_data):
        test_X = test_data[:, :-1]
        test_Y = test_data[:, -1]
        print("Accuracy: ", self.clf.score(test_X, test_Y))
        return self.className + " Testing finished...\n"

    def predict(self, predict_data):
        print("Predictions: ", self.clf.predict(predict_data))
        return self.className + " Prediction finished...\n"

    def cross_validate(self, train_data):
        X_ = train_data[:, :-1]
        Y_ = train_data[:, -1]
        predicted = cross_val_predict(self.clf, X_, Y_, cv=10)
        print("Cross-validation accuracy: ",
              metrics.accuracy_score(Y_, predicted))

        if metrics.accuracy_score(Y_,
                                  predicted) > MLAlgo.cross_validate_accuracy:
            MLAlgo.cross_validate_accuracy = metrics.accuracy_score(
                Y_, predicted)
            MLAlgo.classifier = self.clf
            MLAlgo.trained_instance = self

        return self.className + " Cross validation finished...\n"
Beispiel #18
0
print(np.shape(labels_con))
labels=np.vstack((labels_ad,labels_con))
print(np.shape(labels))
data_train, data_test, labels_train, labels_test = train_test_split(lst, labels, test_size=0.20, random_state=42)


kernel = 1.0 * RBF(214)
'''
S=data_train
T=data_test
S /= S.std(axis=0)
T /= T.std(axis=0)
ica = FastICA(n_components=14)
S_ = ica.fit_transform(S)
T_=ica.fit_transform(T)
'''
gpc = GaussianProcessClassifier(kernel=kernel,n_restarts_optimizer=5,random_state=None,multi_class="one_vs_rest",max_iter_predict=100,n_jobs=-1)
gpc=gpc.fit(data_train, labels_train)
print('')
print('accuracy on trainingset:',gpc.score(data_train,labels_train))
print('accuracy on testset:',gpc.score(data_test, labels_test))
print("confusion matrix for the training")
cm_train = confusion_matrix(labels_train, gpc.predict(data_train))
print(cm_train)
print(classification_report(labels_train, gpc.predict(data_train), labels=[0, 1]))
print("confusion matrix for the testing")
cm_test = confusion_matrix(labels_test, gpc.predict(data_test))
print(cm_test)
print('')
print(classification_report(labels_test, gpc.predict(data_test), labels=[0, 1]))
                                transform=torchvision.transforms.ToTensor())
mnist_testset = datasets.MNIST(root='./data',
                               train=False,
                               download=False,
                               transform=torchvision.transforms.ToTensor())

mnist_testset = mnist_testset

print(mnist_trainset.data.shape)
print(mnist_trainset.data[0:2000].view(-1, 28 * 28).shape)
kernel = 1.0 * RBF(28 * 28)

gpc = GaussianProcessClassifier(kernel=kernel,
                                n_restarts_optimizer=3,
                                random_state=None,
                                multi_class="one_vs_rest",
                                max_iter_predict=200,
                                n_jobs=-1)
gpc = gpc.fit(mnist_trainset.data[0:1000].view(-1, 28 * 28),
              mnist_trainset.targets[0:1000])

print(
    gpc.score(mnist_trainset.data[0:1000].view(-1, 28 * 28),
              mnist_trainset.targets[0:1000]))
print(
    gpc.score(mnist_testset.data[0:500].view(-1, 28 * 28),
              mnist_testset.targets[0:500]))
print(gpc.predict(mnist_trainset.data[0:20].view(-1, 28 * 28)))
print(mnist_trainset.targets[0:20])
print(gpc.predict(mnist_testset.data[0:20].view(-1, 28 * 28)))
print(mnist_testset.targets[0:20])
Beispiel #20
0
ax.set_xlabel("1st eigenvector")
ax.w_xaxis.set_ticklabels([])
ax.set_ylabel("2nd eigenvector")
ax.w_yaxis.set_ticklabels([])
ax.set_zlabel("3rd eigenvector")
ax.w_zaxis.set_ticklabels([])
plt.show()

from sklearn.gaussian_process.kernels import RBF
kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier(kernel=kernel,
                                multi_class='one_vs_one',
                                random_state=0).fit(X_, Y_)

# lets see how good our fit on the train set is
print(gpc.score(X_, Y_))

# create the TF neural net
# some hyperparams
training_epochs = 200

n_neurons_in_h1 = 10
n_neurons_in_h2 = 10
learning_rate = 0.01
dkl_loss_rate = 0.1

n_features = len(X[0])
labels_dim = 1
#############################################

# these placeholders serve as our input tensors
Beispiel #21
0
# Quadratic Discriminant
qda = QuadraticDiscriminantAnalysis()
qda.fit(X_train, y_train)
print('QDA accuracy: ', qda.score(X_test, y_test))

# MPL classifier
mpl = MLPClassifier(hidden_layer_sizes=(100, ),
                    activation='logistic',
                    max_iter=5000)
mpl.fit(X_train, y_train)
print('MPL accuracy: ', mpl.score(X_test, y_test))

# Gaussian Process
gpc = GaussianProcessClassifier()
gpc.fit(X_train, y_train)
print('GPC accuracy: ', gpc.score(X_test, y_test))

# Random Forest Classifier
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
print('RFC accuracy: ', rfc.score(X_test, y_test))

# Computes the Silhoutte coefficient
print(
    'Silhouette coefficient: ',
    metrics.silhouette_score(novos_dados_pca.real.T,
                             target,
                             metric='euclidean'))
print()

#%%%%%%%%%%%%%%%%%%%% Supervised classification for PCA-KL features
Beispiel #22
0
sv.score(result_test, y_test)

#rf = RandomForestClassifier()
#rf.fit(result,y)
#rf.score(result,y)
#rf.score(result_test,y_test)

# lr = LogisticRegressionCV()
# lr.fit(result,y)
# lr.score(result,y)
# lr.score(result_test,y_test)
#
# Specify Gaussian Processes with fixed and optimized hyperparameters
gp_opt = GaussianProcessClassifier(kernel=1.0 * RBF(length_scale=1.0))
gp_opt.fit(result, y)
gp_opt.score(result, y)
gp_opt.score(result_test, y_test)
# =============================================================================

################################################################################

clf = GradientBoostingClassifier(verbose=1)

# The list of hyper-parameters we want to optimize. For each one we define the bounds,
# the corresponding scikit-learn parameter name, as well as how to sample values
# from that dimension (`'log-uniform'` for the learning rate)
n_features = result.shape[1]

dim_max_depth = Integer(1, 35, name='max_depth')
dim_learning_rate = Real(10**-5, 10**0, "log-uniform", name='learning_rate')
dim_max_features = Integer(1, n_features, name='max_features')
for name, gpc in gpList:
    start_time = time.time()

    # Training / Fitting
    print('\nName: ', name)
    gpc.fit(usedTrainX, usedTrainY)

    # Cross Validation
    #tenFoldCV = cross_val_score(gpc, usedTrainX, usedTrainY, cv=10, scoring='neg_log_loss', n_jobs=-1)
    tenFoldCV = cross_val_score(gpc, usedTrainX, usedTrainY, cv=5, n_jobs=-1)
    #avgCV = np.mean(tenFoldCV)
    print('Average of 10VC: ', round(tenFoldCV.mean(), 4) * 100, '%')
    print('Std of 10VC: ', tenFoldCV.std())

    # Testing
    score = gpc.score(usedTestX, usedTestY)
    print('Testing Score:\t', round(score, 4) * 100, '%')

    # Running Time
    print("Time(s):\t", round((time.time() - start_time) * 100) / 100)

    # Add to usedModelList
    usedModelList.append([name, tenFoldCV.mean(), score, gpc, tenFoldCV.std()])

#%% Measuring Training+CV+Testing Time for models in usedModelList
for name, m, s, model, std in usedModelList:
    start_time = time.time()

    # Training / Fitting
    print('\nName: ', name)
    gpc.fit(usedTrainX, usedTrainY)
Beispiel #24
0
def clf_GAU(imputed_data_x, y, train_idx, test_idx):
    kernel = 1.0 * RBF(1.0)
    clf = GaussianProcessClassifier(kernel=kernel, random_state=0)
    clf.fit(imputed_data_x[train_idx], y[train_idx])
    score = clf.score(imputed_data_x[test_idx], y[test_idx])
    return np.round(score * 100, 4)
def batch_Parametric_PCA():

    # Datasets
    X = skdata.load_iris()     # K = 95
    #X = skdata.fetch_openml(name='Engine1', version=1) # K = 235
    #X = skdata.fetch_openml(name='prnn_crabs', version=1) # K = 10
    #X = skdata.fetch_openml(name='analcatdata_happiness', version=1) # K = 53
    #X = skdata.fetch_openml(name='mux6', version=1) # K = 105
    #X = skdata.fetch_openml(name='threeOf9', version=1) # K = 385
    #X = skdata.fetch_openml(name='parity5', version=1) # K = 25
    #X = skdata.fetch_openml(name='sa-heart', version=1) # K = 74
    #X = skdata.fetch_openml(name='vertebra-column', version=1) # K = 305
    #X = skdata.fetch_openml(name='breast-tissue', version=2) # K = 5
    #X = skdata.fetch_openml(name='transplant', version=2)  # K = 65
    #X = skdata.fetch_openml(name='hayes-roth', version=2)  # K = 5
    #X = skdata.fetch_openml(name='plasma_retinol', version=2)  # K = 145
    #X = skdata.fetch_openml(name='aids', version=1) # K = 42
    #X = skdata.fetch_openml(name='lupus', version=1) # K = 37
    #X = skdata.fetch_openml(name='pwLinear', version=2)  # K = 135
    #X = skdata.fetch_openml(name='fruitfly', version=2) # K = 120
    #X = skdata.fetch_openml(name='pm10', version=2) # K = 485
    #X = skdata.fetch_openml(name='visualizing_livestock', version=1) # K = 125
    #X = skdata.fetch_openml(name='strikes', version=2)  # K = 130   

    dados = X['data']
    target = X['target']
    
    # Normalize data
    dados = preprocessing.scale(dados)

    n = dados.shape[0]
    m = dados.shape[1]

    print('N = ', n)
    print('M = ', m)

    inicio = 5
    incremento = 5

    lista_k = list(range(inicio, n, incremento))

    acuracias = []
    kappas_medios = []

    for k in lista_k:

        print('K = ', k)
        novos_dados = ParametricPCA(dados, k, 2, 'KL') 

        #%%%%%%%%%%%%%%%%% Parametric PCA
        print('Parametric PCA for supervised classification')
    
        # Split training data
        X_train, X_test, y_train, y_test = train_test_split(novos_dados.real.T, target, test_size=.4, random_state=42)
        acc = []
    
        # KNN
        neigh = KNeighborsClassifier(n_neighbors=7)
        neigh.fit(X_train, y_train) 
        s = neigh.score(X_test, y_test)
        kap = metrics.cohen_kappa_score(neigh.predict(X_test), y_test)
        acc.append(s)
        print('KNN accuracy: ', s)
    
        # SVM
        svm = SVC(gamma='auto')
        svm.fit(X_train, y_train)
        s = svm.score(X_test, y_test)
        kap = metrics.cohen_kappa_score(svm.predict(X_test), y_test)
        acc.append(s)
        print('SVM accuracy: ', s)

        # Naive Bayes
        nb = GaussianNB()
        nb.fit(X_train, y_train)
        s = nb.score(X_test, y_test)
        kap = metrics.cohen_kappa_score(nb.predict(X_test), y_test)
        acc.append(s)
        print('NB accuracy: ', s)

        # Decision Tree
        dt = DecisionTreeClassifier(random_state=0)
        dt.fit(X_train, y_train)
        s = dt.score(X_test, y_test)
        kap = metrics.cohen_kappa_score(dt.predict(X_test), y_test)
        acc.append(s)
        print('DT accuracy: ', s)

        # Quadratic Discriminant 
        qda = QuadraticDiscriminantAnalysis()
        qda.fit(X_train, y_train)
        s = qda.score(X_test, y_test)
        kap = metrics.cohen_kappa_score(qda.predict(X_test), y_test)
        acc.append(s)
        print('QDA accuracy: ', s)

        # MPL classifier
        mpl = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', max_iter=5000)
        mpl.fit(X_train, y_train)
        s = mpl.score(X_test, y_test)
        kap = metrics.cohen_kappa_score(mpl.predict(X_test), y_test)
        acc.append(s)
        print('MPL accuracy: ', s)

        # Gaussian Process
        gpc = GaussianProcessClassifier()
        gpc.fit(X_train, y_train)
        s = gpc.score(X_test, y_test)
        kap = metrics.cohen_kappa_score(gpc.predict(X_test), y_test)
        acc.append(s)
        print('GPC accuracy: ', s)

        # Random Forest Classifier
        rfc = RandomForestClassifier()
        rfc.fit(X_train, y_train)
        s = rfc.score(X_test, y_test)
        kap = metrics.cohen_kappa_score(rfc.predict(X_test), y_test)
        acc.append(s)
        print('RFC accuracy: ', s)
        
        acuracia = sum(acc)/len(acc)
        
        # Computes the Silhoutte coefficient
        print('Silhouette coefficient: ', metrics.silhouette_score(novos_dados.real.T, target, metric='euclidean'))
        print('Average accuracy: ', acuracia)
        print()

        acuracias.append(acuracia)

    print('List of values for K: ', lista_k)
    print('Supervised classification accuracies: ', acuracias)
    acuracias = np.array(acuracias)
    print('Max Acc: ', acuracias.max())
    print('K* = ', lista_k[acuracias.argmax()])
    print()

    plt.figure(1)
    plt.plot(lista_k, acuracias)
    plt.title('Mean accuracies for different values of K (neighborhood)')
    plt.show()

     #%%%%%%%%%%%%% Dimensionality reduction methods
    # PCA
    novos_dados_pca = PCA(dados, 2)     
    # Kernel PCA
    model = KernelPCA(n_components=2, kernel='rbf')   
    novos_dados_kpca = model.fit_transform(dados)
    novos_dados_kpca = novos_dados_kpca.T
    # ISOMAP
    model = Isomap(n_neighbors=20, n_components=2)
    novos_dados_isomap = model.fit_transform(dados)
    novos_dados_isomap = novos_dados_isomap.T
    # LLE
    model = LocallyLinearEmbedding(n_neighbors=20, n_components=2)
    novos_dados_LLE = model.fit_transform(dados)
    novos_dados_LLE = novos_dados_LLE.T
    # Lap. Eig.
    model = SpectralEmbedding(n_neighbors=20, n_components=2)
    novos_dados_Lap = model.fit_transform(dados)
    novos_dados_Lap = novos_dados_Lap.T
    
    #%%%%%%%%%%%%%%%%% PCA 
    print('Results for PCA')
    
    # Split training data
    X_train, X_test, y_train, y_test = train_test_split(novos_dados_pca.real.T, target, test_size=.4, random_state=42)
    acc = []
       
    # KNN
    neigh = KNeighborsClassifier(n_neighbors=7)
    neigh.fit(X_train, y_train) 
    s = neigh.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(neigh.predict(X_test), y_test)
    acc.append(s)
    print('KNN accuracy: ', s)
        
    # SVM
    svm = SVC(gamma='auto')
    svm.fit(X_train, y_train)
    s = svm.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(svm.predict(X_test), y_test)
    acc.append(s)
    print('SVM accuracy: ', s)
    
    # Naive Bayes
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    s = nb.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(nb.predict(X_test), y_test)
    acc.append(s)
    print('NB accuracy: ', s)
    
    # Decision Tree
    dt = DecisionTreeClassifier(random_state=0)
    dt.fit(X_train, y_train)
    s = dt.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(dt.predict(X_test), y_test)
    acc.append(s)
    print('DT accuracy: ', s)
    
    # Quadratic Discriminant 
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(X_train, y_train)
    s = qda.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(qda.predict(X_test), y_test)
    acc.append(s)
    print('QDA accuracy: ', s)
    
    # MPL classifier
    mpl = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', max_iter=5000)
    mpl.fit(X_train, y_train)
    s = mpl.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(mpl.predict(X_test), y_test)
    acc.append(s)
    print('MPL accuracy: ', s)
    
    # Gaussian Process
    gpc = GaussianProcessClassifier()
    gpc.fit(X_train, y_train)
    s = gpc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(gpc.predict(X_test), y_test)
    acc.append(s)
    print('GPC accuracy: ', s)
    
    # Random Forest Classifier
    rfc = RandomForestClassifier()
    rfc.fit(X_train, y_train)
    s = rfc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(rfc.predict(X_test), y_test)
    acc.append(s)
    print('RFC accuracy: ', s)
    
    # Computes the Silhoutte coefficient
    print('Silhouette coefficient: ', metrics.silhouette_score(novos_dados_pca.real.T, target, metric='euclidean'))
    print('Average accuracy: ', sum(acc)/len(acc))
    print()
        
    #%%%%%%%%%%%%%%%%% KPCA
    print('Results for KPCA')
    
    # Split training data
    X_train, X_test, y_train, y_test = train_test_split(novos_dados_kpca.real.T, target, test_size=.4, random_state=42)
    acc = []
        
    # KNN
    neigh = KNeighborsClassifier(n_neighbors=7)
    neigh.fit(X_train, y_train) 
    s = neigh.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(neigh.predict(X_test), y_test)
    acc.append(s)
    print('KNN accuracy: ', s)
    
    # SVM
    svm = SVC(gamma='auto')
    svm.fit(X_train, y_train)
    s = svm.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(svm.predict(X_test), y_test)
    acc.append(s)
    print('SVM accuracy: ', s)

    # Naive Bayes
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    s = nb.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(nb.predict(X_test), y_test)
    acc.append(s)
    print('NB accuracy: ', s)

    # Decision Tree
    dt = DecisionTreeClassifier(random_state=0)
    dt.fit(X_train, y_train)
    s = dt.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(dt.predict(X_test), y_test)
    acc.append(s)
    print('DT accuracy: ', s)

    # Quadratic Discriminant 
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(X_train, y_train)
    s = qda.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(qda.predict(X_test), y_test)
    acc.append(s)
    print('QDA accuracy: ', s)

    # MPL classifier
    mpl = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', max_iter=5000)
    mpl.fit(X_train, y_train)
    s = mpl.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(mpl.predict(X_test), y_test)
    acc.append(s)
    print('MPL accuracy: ', s)

    # Gaussian Process
    gpc = GaussianProcessClassifier()
    gpc.fit(X_train, y_train)
    s = gpc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(gpc.predict(X_test), y_test)
    acc.append(s)
    print('GPC accuracy: ', s)

    # Random Forest Classifier
    rfc = RandomForestClassifier()
    rfc.fit(X_train, y_train)
    s = rfc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(rfc.predict(X_test), y_test)
    acc.append(s)
    print('RFC accuracy: ', s)

    # Computes the Silhoutte coefficient
    print('Silhouette coefficient: ', metrics.silhouette_score(novos_dados_kpca.real.T, target, metric='euclidean'))
    print('Average accuracy: ', sum(acc)/len(acc))
    print()
    
    #%%%%%%%%%%%%%%%%% ISOMAP
    print('Results for ISOMAP')
    
    # Split training data
    X_train, X_test, y_train, y_test = train_test_split(novos_dados_isomap.real.T, target, test_size=.4, random_state=42)
    acc = []
    
    # KNN
    neigh = KNeighborsClassifier(n_neighbors=7)
    neigh.fit(X_train, y_train) 
    s = neigh.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(neigh.predict(X_test), y_test)
    acc.append(s)
    print('KNN accuracy: ', s)
    
    # SVM
    svm = SVC(gamma='auto')
    svm.fit(X_train, y_train)
    s = svm.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(svm.predict(X_test), y_test)
    acc.append(s)
    print('SVM accuracy: ', s)

    # Naive Bayes
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    s = nb.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(nb.predict(X_test), y_test)
    acc.append(s)
    print('NB accuracy: ', s)

    # Decision Tree
    dt = DecisionTreeClassifier(random_state=0)
    dt.fit(X_train, y_train)
    s = dt.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(dt.predict(X_test), y_test)
    acc.append(s)
    print('DT accuracy: ', s)

    # Quadratic Discriminant 
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(X_train, y_train)
    s = qda.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(qda.predict(X_test), y_test)
    acc.append(s)
    print('QDA accuracy: ', s)

    # MPL classifier
    mpl = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', max_iter=5000)
    mpl.fit(X_train, y_train)
    s = mpl.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(mpl.predict(X_test), y_test)
    acc.append(s)
    print('MPL accuracy: ', s)

    # Gaussian Process
    gpc = GaussianProcessClassifier()
    gpc.fit(X_train, y_train)
    s = gpc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(gpc.predict(X_test), y_test)
    acc.append(s)
    print('GPC accuracy: ', s)

    # Random Forest Classifier
    rfc = RandomForestClassifier()
    rfc.fit(X_train, y_train)
    s = rfc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(rfc.predict(X_test), y_test)
    acc.append(s)
    print('RFC accuracy: ', s)

    # Computes the Silhoutte coefficient
    print('Silhouette coefficient: ', metrics.silhouette_score(novos_dados_isomap.real.T, target, metric='euclidean'))
    print('Average accuracy: ', sum(acc)/len(acc))
    print()
    
    #%%%%%%%%%%%%%%%%% LLE
    print('Results for LLE')
    
    # Split training data
    X_train, X_test, y_train, y_test = train_test_split(novos_dados_LLE.real.T, target, test_size=.4, random_state=42)
    acc = []
    
    # KNN
    neigh = KNeighborsClassifier(n_neighbors=7)
    neigh.fit(X_train, y_train) 
    s = neigh.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(neigh.predict(X_test), y_test)
    acc.append(s)
    print('KNN accuracy: ', s)
    
    # SVM
    svm = SVC(gamma='auto')
    svm.fit(X_train, y_train)
    s = svm.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(svm.predict(X_test), y_test)
    acc.append(s)
    print('SVM accuracy: ', s)

    # Naive Bayes
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    s = nb.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(nb.predict(X_test), y_test)
    acc.append(s)
    print('NB accuracy: ', s)

    # Decision Tree
    dt = DecisionTreeClassifier(random_state=0)
    dt.fit(X_train, y_train)
    s = dt.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(dt.predict(X_test), y_test)
    acc.append(s)
    print('DT accuracy: ', s)

    # Quadratic Discriminant 
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(X_train, y_train)
    s = qda.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(qda.predict(X_test), y_test)
    acc.append(s)
    print('QDA accuracy: ', s)

    # MPL classifier
    mpl = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', max_iter=5000)
    mpl.fit(X_train, y_train)
    s = mpl.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(mpl.predict(X_test), y_test)
    acc.append(s)
    print('MPL accuracy: ', s)

    # Gaussian Process
    gpc = GaussianProcessClassifier()
    gpc.fit(X_train, y_train)
    s = gpc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(gpc.predict(X_test), y_test)
    acc.append(s)
    print('GPC accuracy: ', s)

    # Random Forest Classifier
    rfc = RandomForestClassifier()
    rfc.fit(X_train, y_train)
    s = rfc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(rfc.predict(X_test), y_test)
    acc.append(s)
    print('RFC accuracy: ', s)

    # Computes the Silhoutte coefficient
    print('Silhouette coefficient: ', metrics.silhouette_score(novos_dados_LLE.real.T, target, metric='euclidean'))
    print('Average accuracy: ', sum(acc)/len(acc))
    print()
    
    #%%%%%%%%%%%%%%%%% Laplacian Eigenmaps
    print('Results for Laplacian Eigenmaps')
    
    # Split training data
    X_train, X_test, y_train, y_test = train_test_split(novos_dados_Lap.real.T, target, test_size=.4, random_state=42)
    acc = []
    
    # KNN
    neigh = KNeighborsClassifier(n_neighbors=7)
    neigh.fit(X_train, y_train) 
    s = neigh.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(neigh.predict(X_test), y_test)
    acc.append(s)
    print('KNN accuracy: ', s)
    
    # SVM
    svm = SVC(gamma='auto')
    svm.fit(X_train, y_train)
    s = svm.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(svm.predict(X_test), y_test)
    acc.append(s)
    print('SVM accuracy: ', s)

    # Naive Bayes
    nb = GaussianNB()
    nb.fit(X_train, y_train)
    s = nb.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(nb.predict(X_test), y_test)
    acc.append(s)
    print('NB accuracy: ', s)

    # Decision Tree
    dt = DecisionTreeClassifier(random_state=0)
    dt.fit(X_train, y_train)
    s = dt.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(dt.predict(X_test), y_test)
    acc.append(s)
    print('DT accuracy: ', s)

    # Quadratic Discriminant 
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(X_train, y_train)
    s = qda.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(qda.predict(X_test), y_test)
    acc.append(s)
    print('QDA accuracy: ', s)

    # MPL classifier
    mpl = MLPClassifier(hidden_layer_sizes=(100,), activation='logistic', max_iter=5000)
    mpl.fit(X_train, y_train)
    s = mpl.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(mpl.predict(X_test), y_test)
    acc.append(s)
    print('MPL accuracy: ', s)

    # Gaussian Process
    gpc = GaussianProcessClassifier()
    gpc.fit(X_train, y_train)
    s = gpc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(gpc.predict(X_test), y_test)
    acc.append(s)
    print('GPC accuracy: ', s)

    # Random Forest Classifier
    rfc = RandomForestClassifier()
    rfc.fit(X_train, y_train)
    s = rfc.score(X_test, y_test)
    kap = metrics.cohen_kappa_score(rfc.predict(X_test), y_test)
    acc.append(s)
    print('RFC accuracy: ', s)

    # Computes the Silhoutte coefficient
    print('Silhouette coefficient: ', metrics.silhouette_score(novos_dados_Lap.real.T, target, metric='euclidean'))
    print('Average accuracy: ', sum(acc)/len(acc))
    print()
    
    batch_Parametric_PCA_cluster(X)
Qda = QuadraticDiscriminantAnalysis()
Qda.fit(X_train, y_train)
print('Accuracy of QDA classifier on training set: {:.2f}'
     .format(Qda.score(X_train, y_train)))
print('Accuracy of QDA classifier on test set: {:.2f}'
     .format(Qda.score(X_test, y_test)))


# In[29]:


from sklearn.gaussian_process import GaussianProcessClassifier
GPC = GaussianProcessClassifier()
GPC.fit(X_train, y_train)
print('Accuracy of GPC classifier on training set: {:.2f}'
     .format(GPC.score(X_train, y_train)))
print('Accuracy of GPC classifier on test set: {:.2f}'
     .format(GPC.score(X_test, y_test)))


# In[30]:


svm2 = SVC(gamma=2, C=1)
svm2.fit(X_train, y_train)
print('Accuracy of svm2 classifier on training set: {:.2f}'
     .format(svm2.score(X_train, y_train)))
print('Accuracy of svm2 classifier on test set: {:.2f}'
     .format(svm2.score(X_test, y_test)))

Beispiel #27
0
    for i in range(0, 50):
        print(colored(
            round(model.predict(np.expand_dims(X_train[i], axis=0))[0][0]),
            "green"),
              end="    ")
    print("")

elif train == "sk":
    # select whichever one you would like to use

    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import LinearSVC
    from sklearn.ensemble import GradientBoostingClassifier
    from sklearn.gaussian_process import GaussianProcessClassifier
    from sklearn.ensemble import AdaBoostClassifier

    print(colored("[TRAIN] Training with sklearn", "green"))
    model = GaussianProcessClassifier()
    model.fit(X_train, y_train)
    print(colored("[TRAIN] sklearn complete", "green"))
    score = model.score(X_test, y_test)
    print(colored("Accuracy: {}".format(score), "cyan", attrs=['bold']))
    dump(model, 'model.joblib')

    for i in range(0, 50):
        print(colored(model.predict(np.expand_dims(X_train[i], axis=0)),
                      "green"),
              end="    ")
    print("")
probs = probs[:, 1]  
auc_SVM = roc_auc_score(labels_test, probs)  

#calculating AUC 
probs = ensemble.predict_proba(selected_features_data_test) 
probs = probs[:, 1]  
auc_ensemble = roc_auc_score(labels_test, probs)  
'''
#calculating AUC
probs = gpc.predict_proba(selected_features_data_test)
probs = probs[:, 1]
auc_GP = roc_auc_score(labels_test, probs)

print('')
print('training accuracy GP classifer:',
      gpc.score(selected_features_train_data, labels_train))
print("training accuracy SVM classifer:",
      metrics.accuracy_score(training_pred, labels_train))

print('test accuracy GP classifer:',
      gpc.score(selected_features_data_test, labels_test))
print('test accuracy SVM classifer:',
      metrics.accuracy_score(test_pred, labels_test))
print('test accuracy tree classifer:',
      metrics.accuracy_score(test_tree_pred, labels_test))
print('test accuracy ensamble classifer:',
      ensemble.score(selected_features_data_test, labels_test))

print('AUC GP classifer: %.2f' % auc_GP)
#print('AUC SVM classifer: %.2f' % auc_SVM)
#print('AUC ensemble  classifer: %.2f' % auc_ensemble)
# split into training and test set
train_set, test_set = train_test_split(parts_labeled, random_state=42)
# get X and Y values
X_train, X_test = [s[['corr_scaled','mass_scaled']].values for s in (train_set, test_set)]
y_train, y_test = [s['manual_label'].values for s in (train_set, test_set)]

#clf_scaler_path = '../output/pipeline/GPClassification/GPCclfRBF.p'
#with open(clf_scaler_path, 'rb') as f:
#    clf = pickle.load(f)
#    scaler = pickle.load(f)

# train a gaussian process classifier with RBF kernel (Default)
clf = GaussianProcessClassifier(1.0 * RBF(1.0),  random_state=42, n_jobs=-1)
clf.fit(X_train, y_train)
plot2dDecisionFunc(clf, X_train, y_train, save=save_dir+'prob_surfaceGPC.pdf')
clf.score(X_test, y_test)
labels_pred = clf.predict_proba(X_test)[:,1]

# compute f1 score: harmonic mean between precision and recall
# see https://en.wikipedia.org/wiki/F1_score
prob_f1 = pd.DataFrame()
prob_f1['prob_thresh'] = np.linspace(0.1, 1, 90, endpoint=False)
f1score = np.array([metrics.precision_recall_fscore_support(y_test, labels_pred>thresh)[2]
        for thresh in prob_thresh])
prob_f1['f1score_False']= f1score[:,0]
prob_f1['f1score_True']= f1score[:,1]
prob_f1.to_csv(save_dir+'prob_f1score.csv', index=False)

fig, ax = plt.subplots()
ax.plot(prob_f1.prob_thresh, prob_f1.f1score_False, color='r')
ax.plot(prob_f1.prob_thresh, prob_f1.f1score_True, color='b')
Beispiel #30
0
    #initialize the taret classifier and train it
    # clf = neighbors.KNeighborsClassifier(n_neighbors=3)
    #clf=SVC()
    clf = GaussianProcessClassifier(1.0 * RBF(1.0))
    #clf = DecisionTreeClassifier(max_depth=5)
    #clf=MLPClassifier(alpha=1)
    clf.fit(X_train, y_train)

    #Store the predicted values
    y_pred = clf.predict(X_test)

    #Calculate global accuracy
    accuracy = accuracy_score(y_test, y_pred)
    #accuracy = clf.score(X_test, y_test)
    accuracy = clf.score(X_test, y_test)

    minority_y_test_index = []

    minority_y_test_index1 = np.where(y_test == 1)
    total_indexes = np.where(y_test >= 0)
    minority_y_test_index1_list1 = minority_y_test_index1[0].tolist()

    minority_y_test_index = minority_y_test_index1_list1
    y_pred_minority = []
    y_test_minority = []

    majority_test_index = total_indexes

    for item in minority_y_test_index:
        y_test_minority.append(y_test[item])