def analysis(x_tr, y_tr, x_te=None, y_te=None): #print("Performing KNN Classification!") # Create the classifier knn = KNeighborsClassifier(n_neighbors=3) # Train the model knn.fit(x_tr, y_tr) # Compute the training accuracy acc = knn.score(x_tr, y_tr) # Compute the CV scores scores = cross_val_score(knn, x_tr, y_tr, cv=5) print("\n") print("KNN Accuracy = %3.4f" % (acc)) print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Classify the data test_score = 0 if x_te is not None: yhat = knn.predict(x_te) test_score, notneeded = hp.check_accuracy(yhat, y_te) else: yhat = None data_scores = np.array([scores.mean(), scores.std(), acc, test_score]) return yhat, data_scores
def analysis(x_tr, y_tr, x_te=None, y_te=None): #print("Performing Bagging Classification!") # Create the classifier clf = BaggingClassifier(n_estimators=100) # Train the model clf.fit(x_tr, y_tr) # Compute the training accuracy acc = clf.score(x_tr, y_tr) # Compute the CV scores scores = cross_val_score(clf, x_tr, y_tr, cv=5) print("\n") print("Bagging Accuracy = %3.4f" % (acc)) print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Classify the data test_score = 0 if x_te is not None: yhat = clf.predict(x_te) test_score, notneeded = hp.check_accuracy(yhat, y_te) else: yhat = None data_scores = np.array([scores.mean(), scores.std(), acc, test_score]) return yhat, data_scores
def analysis(x_tr,y_tr,x_te=None,y_te=None): #print("Performing Decision Tree Classification!") # Create the classifier decision_tree = DecisionTreeClassifier(max_depth=7) # Train the model decision_tree.fit(x_tr, y_tr) # Compute the training accuracy acc = decision_tree.score(x_tr, y_tr) # Compute the CV scores scores = cross_val_score(decision_tree, x_tr, y_tr, cv=5) print("\n") print("Decision Tree Accuracy = %3.4f" % (acc)) print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Classify the data test_score = 0 if x_te is not None: yhat = decision_tree.predict(x_te) test_score, notneeded = hp.check_accuracy(yhat,y_te) else: yhat = None data_scores = np.array([scores.mean(),scores.std(),acc,test_score]) return yhat,data_scores
def analysis(x_tr, y_tr, x_te=None, y_te=None): #print("Performing RBF SVM Classification!") # Create the SVM classifier svm = SVC(kernel='rbf', C=1) # Train the models svm.fit(x_tr, y_tr) # Compute the training accuracy acc = svm.score(x_tr, y_tr) # Compute the CV scores scores = cross_val_score(svm, x_tr, y_tr, cv=5) print("\n") print("RBF SVM Accuracy = %3.4f" % (acc)) print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Classify the data test_score = 0 if x_te is not None: yhat = svm.predict(x_te) test_score, notneeded = hp.check_accuracy(yhat, y_te) else: yhat = None data_scores = np.array([scores.mean(), scores.std(), acc, test_score]) return yhat, data_scores
def analysis(x_tr, y_tr, x_te=None, y_te=None): mode = 1 #print("Performing SSL Classification!") # Perform K-fold cross validation k_fold = KFold(n_splits=5) clf = RandomForestClassifier(n_estimators=100) NUM_ITER = 2 THRESHOLD = 0.9 scores = [] x_tr_values = x_tr.values y_tr_values = y_tr.values for train_indices, test_indices in k_fold.split(x_tr.values): x_tr_kfold = x_tr_values[train_indices] x_te_kfold = x_tr_values[test_indices] y_tr_kfold = y_tr_values[train_indices] y_te_kfold = y_tr_values[test_indices] score, temp = train_ssl( clf, x_tr_kfold, #labeled y_tr_kfold, #labeled x_te.values, x_te_kfold, y_te_kfold, NUM_ITER, THRESHOLD, mode) scores.append(score) scores = np.array(scores) #print("Scoring Training accuracy") acc, temp = train_ssl(clf, x_tr_values, y_tr_values, x_te.values, x_tr_values, y_tr_values, NUM_ITER, THRESHOLD, mode) print("\n") print("SSL Accuracy = %3.4f" % (acc)) print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Classify the data test_score = 0 if x_te is not None: mode = 0 temp, yhat = train_ssl(clf, x_tr_values, y_tr_values, x_te.values, x_te.values, y_te.values, NUM_ITER, THRESHOLD, mode) test_score, notneeded = hp.check_accuracy(yhat, y_te) else: yhat = None data_scores = np.array([scores.mean(), scores.std(), acc, test_score]) return yhat, data_scores
def analysis(x_tr, y_tr, x_te=None, y_te=None): #print("Performing MLP Classification!") # Create the classifier clf = MLPClassifier(solver='adam', alpha=1e5, hidden_layer_sizes=(100, 100), random_state=1, max_iter=500, momentum=0.9, learning_rate_init=0.002) # Create a scaler to scale the features (mean=0, var=1) scaler = StandardScaler() # Fit scaler.fit(x_tr) # Scale x_tr = scaler.transform(x_tr) x_te = scaler.transform(x_te) # Train the model clf.fit(x_tr, y_tr) # Compute the training accuracy acc = clf.score(x_tr, y_tr) # Compute the CV scores scores = cross_val_score(clf, x_tr, y_tr, cv=5) print("\n") print("MLP_SK Accuracy = %3.4f" % (acc)) print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Classify the data test_score = 0 if x_te is not None: yhat = clf.predict(x_te) test_score, notneeded = hp.check_accuracy(yhat, y_te) else: yhat = None data_scores = np.array([scores.mean(), scores.std(), acc, test_score]) return yhat, data_scores
def analysis(x_tr,y_tr,x_te=None,y_te=None): #print("Performing Linear SVM Classification!") # Create the SVM classifier svm = SVC(kernel='linear',C=1) if (0): # Genearate a feature matrix of polynomial combinations (Cover's Thm) poly = PolynomialFeatures(degree=3) # Map or transform training data to higher level x_tr = poly.fit_transform(x_tr) # Map or transform test data to higher level x_te = poly.fit_transform(x_te) # Train the models svm.fit(x_tr, y_tr) # Compute the training accuracy acc = svm.score(x_tr, y_tr) # Compute the CV scores scores = cross_val_score(svm, x_tr, y_tr, cv=5) print("\n") print("Linear SVM Accuracy = %3.4f" % (acc)) print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Classify the data test_score = 0 if x_te is not None: yhat = svm.predict(x_te) test_score, notneeded = hp.check_accuracy(yhat,y_te) else: yhat = None data_scores = np.array([scores.mean(),scores.std(),acc,test_score]) return yhat,data_scores
def train_ssl(clf, X_tr_lab, Y_tr_lab, X_tr_unlab, X_test, Y_test, NUM_ITER, THRESHOLD, mode): for i in range(NUM_ITER): clf.fit(X_tr_lab, Y_tr_lab) result = clf.predict(X_test) #print("Accuracy after %d rounds: %.2f" % (i,accuracy)) idxs_d = [] if (X_tr_unlab.size > 0): for j, point in enumerate(X_tr_unlab): prob = clf.predict_proba(point.reshape(1, -1)) class_w = np.argmax(prob) if (prob[0, class_w] > THRESHOLD): # add data to labeled data X_tr_lab = np.vstack((X_tr_lab, point)) Y_tr_lab = np.hstack((Y_tr_lab, class_w)) idxs_d.append(j) # remove data from unlabed data X_tr_unlab = np.delete(X_tr_unlab, idxs_d, 0) #print("Added %d data after this round" % (len(idxs_d))) #print("%.2f" % (accuracy)) Y_pred = clf.predict(X_test) if (mode == 1): accuracy, pass_fail = hp.check_accuracy(Y_pred, Y_test) accuracy = accuracy / 100 else: accuracy = None return accuracy, Y_pred
def analysis(x_tr, y_tr, x_te=None, y_te=None): # Set up to perform k-fold cross validation k_fold = KFold(n_splits=5) HIDDEN = [100, 100, 100, 100, 100] NUM_STEPS = 1000 mode = 1 feature_columns = [ tf.feature_column.numeric_column("x", shape=[1, x_tr.values.shape[1]]) ] # Normalize the data for the DNN for column in x_tr: #print(column) x_tr = ti.normalize_column(x_tr, column) for column in x_te: #print(column) x_te = ti.normalize_column(x_te, column) classifier = tf.estimator.DNNClassifier( feature_columns=feature_columns, hidden_units=HIDDEN, optimizer=tf.train.AdamOptimizer(1e-4), n_classes=2, dropout=0.1) scores = [] if mode == 1: for train_indices, test_indices in k_fold.split(x_tr.values): x_tr_values = x_tr.values y_tr_values = y_tr.values x_tr_kfold = x_tr_values[train_indices] x_te_kfold = x_tr_values[test_indices] y_tr_kfold = y_tr_values[train_indices] y_te_kfold = y_tr_values[test_indices] train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": x_tr_kfold}, y=y_tr_kfold, num_epochs=None, batch_size=50, shuffle=True) classifier.train(input_fn=train_input_fn, steps=NUM_STEPS) train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": x_te_kfold}, y=y_te_kfold, num_epochs=1, shuffle=False) scores.append( classifier.evaluate(input_fn=train_input_fn)["accuracy"]) else: scores = [] scores = np.array(scores) # Set up to get training accuracy train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": x_tr.values}, y=y_tr.values, num_epochs=None, batch_size=50, shuffle=True) classifier.train(input_fn=train_input_fn, steps=NUM_STEPS) train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": x_tr.values}, y=y_tr.values, num_epochs=1, shuffle=False) acc = classifier.evaluate(input_fn=train_input_fn)["accuracy"] print("\n") print("DNN Accuracy = %3.4f" % (acc)) print("CV Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) # Classify the data test_score = 0 if x_te is not None: train_input_fn = tf.estimator.inputs.numpy_input_fn( x={"x": x_te.values}, num_epochs=1, shuffle=False) predictions = classifier.predict(input_fn=train_input_fn) y_pred = [] for i in predictions: y_pred.append(int(i['classes'])) yhat = np.array(y_pred) test_score, notneeded = hp.check_accuracy(yhat, y_te) else: yhat = None data_scores = np.array([scores.mean(), scores.std(), acc, test_score]) return yhat, data_scores