def analysis(x_tr, y_tr, x_te=None, y_te=None):
    #print("Performing KNN Classification!")

    # Create the classifier
    knn = KNeighborsClassifier(n_neighbors=3)

    # Train the model
    knn.fit(x_tr, y_tr)

    # Compute the training accuracy
    acc = knn.score(x_tr, y_tr)

    # Compute the CV scores
    scores = cross_val_score(knn, x_tr, y_tr, cv=5)

    print("\n")
    print("KNN Accuracy = %3.4f" % (acc))
    print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Classify the data
    test_score = 0
    if x_te is not None:
        yhat = knn.predict(x_te)
        test_score, notneeded = hp.check_accuracy(yhat, y_te)
    else:
        yhat = None

    data_scores = np.array([scores.mean(), scores.std(), acc, test_score])
    return yhat, data_scores
def analysis(x_tr, y_tr, x_te=None, y_te=None):
    #print("Performing Bagging Classification!")

    # Create the classifier
    clf = BaggingClassifier(n_estimators=100)

    # Train the model
    clf.fit(x_tr, y_tr)

    # Compute the training accuracy
    acc = clf.score(x_tr, y_tr)

    # Compute the CV scores
    scores = cross_val_score(clf, x_tr, y_tr, cv=5)

    print("\n")
    print("Bagging Accuracy = %3.4f" % (acc))
    print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Classify the data
    test_score = 0
    if x_te is not None:
        yhat = clf.predict(x_te)
        test_score, notneeded = hp.check_accuracy(yhat, y_te)
    else:
        yhat = None

    data_scores = np.array([scores.mean(), scores.std(), acc, test_score])
    return yhat, data_scores
Exemple #3
0
def analysis(x_tr,y_tr,x_te=None,y_te=None):
    #print("Performing Decision Tree Classification!")

    # Create the classifier
    decision_tree = DecisionTreeClassifier(max_depth=7)

    # Train the model
    decision_tree.fit(x_tr, y_tr)

    # Compute the training accuracy
    acc = decision_tree.score(x_tr, y_tr)

    # Compute the CV scores
    scores = cross_val_score(decision_tree, x_tr, y_tr, cv=5)

    print("\n")
    print("Decision Tree Accuracy = %3.4f" % (acc))
    print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Classify the data
    test_score = 0
    if x_te is not None:
        yhat = decision_tree.predict(x_te)
        test_score, notneeded = hp.check_accuracy(yhat,y_te)
    else:
        yhat = None

    data_scores = np.array([scores.mean(),scores.std(),acc,test_score])
    return yhat,data_scores
Exemple #4
0
def analysis(x_tr, y_tr, x_te=None, y_te=None):
    #print("Performing RBF SVM Classification!")

    # Create the SVM classifier
    svm = SVC(kernel='rbf', C=1)

    # Train the models
    svm.fit(x_tr, y_tr)

    # Compute the training accuracy
    acc = svm.score(x_tr, y_tr)

    # Compute the CV scores
    scores = cross_val_score(svm, x_tr, y_tr, cv=5)

    print("\n")
    print("RBF SVM Accuracy = %3.4f" % (acc))
    print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Classify the data
    test_score = 0
    if x_te is not None:
        yhat = svm.predict(x_te)
        test_score, notneeded = hp.check_accuracy(yhat, y_te)
    else:
        yhat = None

    data_scores = np.array([scores.mean(), scores.std(), acc, test_score])
    return yhat, data_scores
Exemple #5
0
def analysis(x_tr, y_tr, x_te=None, y_te=None):
    mode = 1
    #print("Performing SSL Classification!")

    # Perform K-fold cross validation
    k_fold = KFold(n_splits=5)
    clf = RandomForestClassifier(n_estimators=100)
    NUM_ITER = 2
    THRESHOLD = 0.9
    scores = []
    x_tr_values = x_tr.values
    y_tr_values = y_tr.values

    for train_indices, test_indices in k_fold.split(x_tr.values):
        x_tr_kfold = x_tr_values[train_indices]
        x_te_kfold = x_tr_values[test_indices]

        y_tr_kfold = y_tr_values[train_indices]
        y_te_kfold = y_tr_values[test_indices]

        score, temp = train_ssl(
            clf,
            x_tr_kfold,  #labeled
            y_tr_kfold,  #labeled
            x_te.values,
            x_te_kfold,
            y_te_kfold,
            NUM_ITER,
            THRESHOLD,
            mode)

        scores.append(score)

    scores = np.array(scores)

    #print("Scoring Training accuracy")
    acc, temp = train_ssl(clf, x_tr_values, y_tr_values, x_te.values,
                          x_tr_values, y_tr_values, NUM_ITER, THRESHOLD, mode)

    print("\n")
    print("SSL Accuracy = %3.4f" % (acc))
    print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Classify the data
    test_score = 0
    if x_te is not None:
        mode = 0
        temp, yhat = train_ssl(clf, x_tr_values, y_tr_values, x_te.values,
                               x_te.values, y_te.values, NUM_ITER, THRESHOLD,
                               mode)
        test_score, notneeded = hp.check_accuracy(yhat, y_te)
    else:
        yhat = None

    data_scores = np.array([scores.mean(), scores.std(), acc, test_score])

    return yhat, data_scores
Exemple #6
0
def analysis(x_tr, y_tr, x_te=None, y_te=None):
    #print("Performing MLP Classification!")

    # Create the classifier
    clf = MLPClassifier(solver='adam',
                        alpha=1e5,
                        hidden_layer_sizes=(100, 100),
                        random_state=1,
                        max_iter=500,
                        momentum=0.9,
                        learning_rate_init=0.002)

    # Create a scaler to scale the features (mean=0, var=1)
    scaler = StandardScaler()

    # Fit
    scaler.fit(x_tr)

    # Scale
    x_tr = scaler.transform(x_tr)
    x_te = scaler.transform(x_te)

    # Train the model
    clf.fit(x_tr, y_tr)

    # Compute the training accuracy
    acc = clf.score(x_tr, y_tr)

    # Compute the CV scores
    scores = cross_val_score(clf, x_tr, y_tr, cv=5)

    print("\n")
    print("MLP_SK Accuracy = %3.4f" % (acc))
    print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Classify the data
    test_score = 0
    if x_te is not None:
        yhat = clf.predict(x_te)
        test_score, notneeded = hp.check_accuracy(yhat, y_te)
    else:
        yhat = None

    data_scores = np.array([scores.mean(), scores.std(), acc, test_score])
    return yhat, data_scores
def analysis(x_tr,y_tr,x_te=None,y_te=None):
    #print("Performing Linear SVM Classification!")

    # Create the SVM classifier
    svm = SVC(kernel='linear',C=1)

    if (0):

        # Genearate a feature matrix of polynomial combinations (Cover's Thm)
        poly = PolynomialFeatures(degree=3)

        # Map or transform training data to higher level
        x_tr = poly.fit_transform(x_tr)

        # Map or transform test data to higher level
        x_te = poly.fit_transform(x_te)

    # Train the models
    svm.fit(x_tr, y_tr)

    # Compute the training accuracy
    acc = svm.score(x_tr, y_tr)

    # Compute the CV scores
    scores = cross_val_score(svm, x_tr, y_tr, cv=5)

    print("\n")
    print("Linear SVM Accuracy = %3.4f" % (acc))
    print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Classify the data
    test_score = 0
    if x_te is not None:
        yhat = svm.predict(x_te)
        test_score, notneeded = hp.check_accuracy(yhat,y_te)
    else:
        yhat = None

    data_scores = np.array([scores.mean(),scores.std(),acc,test_score])
    return yhat,data_scores
Exemple #8
0
def train_ssl(clf, X_tr_lab, Y_tr_lab, X_tr_unlab, X_test, Y_test, NUM_ITER,
              THRESHOLD, mode):

    for i in range(NUM_ITER):

        clf.fit(X_tr_lab, Y_tr_lab)
        result = clf.predict(X_test)
        #print("Accuracy after %d rounds: %.2f" % (i,accuracy))
        idxs_d = []

        if (X_tr_unlab.size > 0):

            for j, point in enumerate(X_tr_unlab):
                prob = clf.predict_proba(point.reshape(1, -1))
                class_w = np.argmax(prob)

                if (prob[0, class_w] > THRESHOLD):
                    # add data to labeled data
                    X_tr_lab = np.vstack((X_tr_lab, point))
                    Y_tr_lab = np.hstack((Y_tr_lab, class_w))
                    idxs_d.append(j)

            # remove data from unlabed data
            X_tr_unlab = np.delete(X_tr_unlab, idxs_d, 0)
            #print("Added %d data after this round" % (len(idxs_d)))

    #print("%.2f" % (accuracy))
    Y_pred = clf.predict(X_test)

    if (mode == 1):
        accuracy, pass_fail = hp.check_accuracy(Y_pred, Y_test)
        accuracy = accuracy / 100
    else:
        accuracy = None

    return accuracy, Y_pred
def analysis(x_tr, y_tr, x_te=None, y_te=None):

    # Set up to perform k-fold cross validation
    k_fold = KFold(n_splits=5)
    HIDDEN = [100, 100, 100, 100, 100]
    NUM_STEPS = 1000
    mode = 1

    feature_columns = [
        tf.feature_column.numeric_column("x", shape=[1, x_tr.values.shape[1]])
    ]

    # Normalize the data for the DNN
    for column in x_tr:
        #print(column)
        x_tr = ti.normalize_column(x_tr, column)

    for column in x_te:
        #print(column)
        x_te = ti.normalize_column(x_te, column)

    classifier = tf.estimator.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=HIDDEN,
        optimizer=tf.train.AdamOptimizer(1e-4),
        n_classes=2,
        dropout=0.1)

    scores = []

    if mode == 1:
        for train_indices, test_indices in k_fold.split(x_tr.values):
            x_tr_values = x_tr.values
            y_tr_values = y_tr.values

            x_tr_kfold = x_tr_values[train_indices]
            x_te_kfold = x_tr_values[test_indices]
            y_tr_kfold = y_tr_values[train_indices]
            y_te_kfold = y_tr_values[test_indices]

            train_input_fn = tf.estimator.inputs.numpy_input_fn(
                x={"x": x_tr_kfold},
                y=y_tr_kfold,
                num_epochs=None,
                batch_size=50,
                shuffle=True)

            classifier.train(input_fn=train_input_fn, steps=NUM_STEPS)

            train_input_fn = tf.estimator.inputs.numpy_input_fn(
                x={"x": x_te_kfold}, y=y_te_kfold, num_epochs=1, shuffle=False)
            scores.append(
                classifier.evaluate(input_fn=train_input_fn)["accuracy"])
    else:
        scores = []

    scores = np.array(scores)
    # Set up to get training accuracy
    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": x_tr.values},
                                                        y=y_tr.values,
                                                        num_epochs=None,
                                                        batch_size=50,
                                                        shuffle=True)

    classifier.train(input_fn=train_input_fn, steps=NUM_STEPS)

    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": x_tr.values},
                                                        y=y_tr.values,
                                                        num_epochs=1,
                                                        shuffle=False)

    acc = classifier.evaluate(input_fn=train_input_fn)["accuracy"]

    print("\n")
    print("DNN Accuracy = %3.4f" % (acc))
    print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

    # Classify the data
    test_score = 0
    if x_te is not None:
        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={"x": x_te.values}, num_epochs=1, shuffle=False)

        predictions = classifier.predict(input_fn=train_input_fn)
        y_pred = []
        for i in predictions:
            y_pred.append(int(i['classes']))
        yhat = np.array(y_pred)
        test_score, notneeded = hp.check_accuracy(yhat, y_te)
    else:
        yhat = None

    data_scores = np.array([scores.mean(), scores.std(), acc, test_score])
    return yhat, data_scores