parser = argparse.ArgumentParser( description='T-test to see if TAN and naive are different') parser.add_argument('-data', type=str, help='Data set path') args = parser.parse_args() data = args.data data = data.replace('\r', '') #Removes the carriage return cuz I use windows #Load the data in with open(data, "r") as read_file: data = json.load(read_file) metadata = np.array(data['metadata']['features']) #Build a list of list containing accuracy for naive and TAN accuracy = [] for i in range(0, 10): split = split_data(data, i, 10) train = split[0] test = split[1] accuracy.append(ttest(train, test, metadata)) accuracy = np.array(accuracy) differences = accuracy.T[1] - accuracy.T[0] print("Accuracy (NB, TAN): \n", accuracy) print("Differences (TAN - NB): \n", differences) #Calculate average accuracy difference: diff_avg = np.mean(differences) #Calculate standard deviation of differences diff_sd = np.std(differences) #Calculate standard error from SD
method = ['DBSCAN'] #driver_main, impostor, n_clusters, selected_features, window_size, method = fc.general_parameters() n_estimators = fc.if_parameters() kernel, nu = fc.ocsvm_parameters() for driver in driver_main: for ws in window_size: print("Building DF for Driver", driver, "with Window_Size", ws) data_final = fc.build_df_final(data_normalized, driver, ws, selected_features) print('Bulding DF with all impostors') data_impostor, impostores = fc.build_impostors_df(data_normalized, impostor, ws, selected_features, driver) print('Doing data split') x_train, x_val = fc.split_data(data_final) for c in n_clusters: print('Create clusters') labels_train, centroid_train, x_train_class = fc.clusters_of_maneuvers(x_train, c) for m in method: if (m=='DBSCAN'): print('Training DBSCAN') dbscan_list, eps_list = fc.train_model_dbscan(labels_train, centroid_train, x_train_class, x_val) print('Doing predictions DBSCAN') result = fc.test_model_dbscan(dbscan_list, data_final, data_impostor, centroid_train, x_train_class) print('Evaluanting the results') acc, min_man, media_man, max_man, deviation = fc.evaluating_result(result, ws)
from keras.layers import Dropout from keras.models import model_from_json from keras.models import load_model from nltk.tokenize import RegexpTokenizer path = '/home/mark/Research' data_dir = path + '/data' train = True load_all = True weight_matrix, word_index = functions.load_embeddings(data_dir + '/glove.6B.100d.txt') data = functions.read_data(data_dir) train, test, val = functions.split_data(data, .8, data_dir) train = train.reset_index() test = test.reset_index() val = val.reset_index() #max_length, avg_words, seq_length = functions.maxLen(data) train_x = functions.pipeline(train, word_index, weight_matrix) test_x = functions.pipeline(test, word_index, weight_matrix) val_x = functions.pipeline(val, word_index, weight_matrix) train_y = functions.labels(train) test_y = functions.labels(test) val_y = functions.labels(val) print 'Training data: '
# get data import functions as f #import cv2 from sklearn.neighbors import KNeighborsClassifier import numpy as np from sklearn import datasets, svm, metrics data = f.get_array_from_images('images_no_copies') dev, test = f.split_data(data, 0.2) training_data, training_labels = f.reshape(dev) test_data, test_labels = f.reshape(test) classifier = svm.SVC(gamma=0.001) classifier.fit(training_data, training_labels) score = classifier.score(test_data, test_labels) print(score)
"Вас вітає мережа магазинів WINTIME, якщо вы хочете дізнатися інформацію про магазин в вашому місті - введіть назву свого міста" ) print( "Команди: /all - подивитися доступні міста, /add - додати місто, /edit - редагувати місто, /delete - видалити місто" ) print("==" * 20) while True: user_input = input("Ввести дані ( для виходу - exit ): ") if user_input != "exit" and user_input not in options: get = wc.getInfoCities(user_input) elif user_input.strip() == "/add": next_input = input( "Ведіть назву міста, адресу, телефон та години роботи через кому (city,address,phone,time): " ) new_city = f.split_data(next_input) if len(new_city) < 4: print("Ви ввели недостатньо даних, спробуйте ще раз:(") else: add = wc.addCity(new_city) elif user_input.strip() == "/edit": # print("Введіть назву міста, яке ви хочете редагувати\nДоступні міста") # cities = wc.getCities() edit_city = input( "Введіть назву міста та НОВІ ДАНІ через кому (city, new address, new phone, new time):" ) upd_city = f.split_data(edit_city) print(upd_city) edit = wc.editCity(upd_city)
logging.config.dictConfig(config) logger = logging.getLogger('base') # 2 logger.info("setting metaparameters") n_epochs = 10 learning_rate = 0.01 batch_size = 100 logger.info("n_epochs: {}, learning_rate: {}, batch_size: {}" .format(n_epochs, learning_rate, batch_size)) # 3 logger.info("data preparation") X_np, y_np = functions.get_data() m, n = X_np.shape X_split, y_split, n_batches = functions.split_data(X_np, y_np, batch_size, m) # 4 logger.info("starting construction phase") X = tf.placeholder(tf.float32, shape=(None, n), name="X") y = tf.placeholder(tf.float32, shape=(None, 1), name="y") theta = tf.Variable(tf.random_uniform([n, 1], -1.0, 1.0), name="theta") y_pred = tf.matmul(X, theta, name="predictions") # 5 with tf.name_scope("loss") as scope: error = y_pred - y mse = tf.reduce_mean(tf.square(error), name="mse")
def main(): st.title("Machine Learning Binary Classification Web App") st.markdown("Classifying the mushrooms 🍄 to be Edible or Poisonous") st.markdown("(Configure classifier options on the left side menu)") st.sidebar.title("Machine Learning Binary Classification Web App") st.sidebar.markdown( "Classifying the mushrooms 🍄 to be Edible or Poisonous") df = load_data() if st.checkbox("Show raw data", True): st.subheader("Mushroom Data Set (Classification") st.write(df) x_train, x_test, y_train, y_test = split_data(df) class_names = ['edible', 'poisonous'] st.sidebar.subheader("Choose Classifier") classifier = st.sidebar.selectbox( "Classifier", ("Support Vector Machine (SVM)", "Logistic Regression", "Random Forest")) if classifier == "Support Vector Machine (SVM)": st.sidebar.subheader("Model Hyperparameters") C = st.sidebar.number_input( "C (Regularisation parameter)", 0.01, 10.0, step=0.01, key="C") kernel = st.sidebar.radio("Kernel", ("rbf", "linear"), key="kernel") gamma = st.sidebar.radio( "Gamma (Kernal Coefficient)", ("scale", "auto"), key="gamma") metrics = st.sidebar.multiselect("What metrics to plot?", ( 'Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve'), key='metrics') if st.sidebar.button("Classify", key='classify'): st.subheader("Support Vector Machine (SVM) Results: ") model = SVC(C=C, kernel=kernel, gamma=gamma) model.fit(x_train, y_train) accuracy = model.score(x_test, y_test) y_pred = model.predict(x_test) st.write("Accuracy: ", accuracy.round(2)) st.write("Precision: ", precision_score( y_test, y_pred, labels=class_names).round(2)) st.write("Recall: ", recall_score( y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics, model, x_test, y_test, class_names) if classifier == "Logistic Regression": st.sidebar.subheader("Model Hyperparameters") C = st.sidebar.number_input( "C (Regularisation parameter)", 0.01, 10.0, step=0.01, key="C_LR") max_iter = st.sidebar.slider( "Maximum number of iterations", 100, 500, key='max_iter') metrics = st.sidebar.multiselect("What metrics to plot?", ( 'Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve'), key='metrics') if st.sidebar.button("Classify", key='classify'): st.subheader("Logistic Regression Results: ") model = LogisticRegression(C=C, max_iter=max_iter) model.fit(x_train, y_train) accuracy = model.score(x_test, y_test) y_pred = model.predict(x_test) st.write("Accuracy: ", accuracy.round(2)) st.write("Precision: ", precision_score( y_test, y_pred, labels=class_names).round(2)) st.write("Recall: ", recall_score( y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics, model, x_test, y_test, class_names) if classifier == "Random Forest": st.sidebar.subheader("Model Hyperparameters") n_estimators = st.sidebar.number_input( "The number of trees in the forest", 100, 5000, step=10, key='n_estimators') max_depth = st.sidebar.number_input( "The maximum depth of the tree", 1, 20, step=1, key='max_depth') bootstrap = st.sidebar.radio( "Bootstrap samples when building trees", ('True', 'False'), key='bootstrap') metrics = st.sidebar.multiselect("What metrics to plot?", ( 'Confusion Matrix', 'ROC Curve', 'Precision-Recall Curve'), key='metrics') if st.sidebar.button("Classify", key='classify'): st.subheader("Random Forest Results: ") model = RandomForestClassifier( n_estimators=n_estimators, max_depth=max_depth, bootstrap=bootstrap, n_jobs=-1) model.fit(x_train, y_train) accuracy = model.score(x_test, y_test) y_pred = model.predict(x_test) st.write("Accuracy: ", accuracy.round(2)) st.write("Precision: ", precision_score( y_test, y_pred, labels=class_names).round(2)) st.write("Recall: ", recall_score( y_test, y_pred, labels=class_names).round(2)) plot_metrics(metrics, model, x_test, y_test, class_names)
# get data from functions import get_all_data, split_data, reshape import cv2 from sklearn.neighbors import KNeighborsClassifier import numpy as np from sklearn import datasets, svm, metrics import os import cv2 import pickle data = get_all_data() dev, test = split_data(data, 0.2) training_data, training_labels = reshape(dev) test_data, test_labels = reshape(test) classifier = svm.SVC(gamma=0.001) classifier.fit(training_data, training_labels) # save the model to disk filename = 'model.sav' pickle.dump(classifier, open(filename, 'wb')) #score = classifier.score(test_data, test_labels) #print(score)
def run_regression(X, z, reg_string, polydegree, lambdas, N_bs, K, test_size, scale, max_iter=50000): """ Runs the selected regression methods for the input design matrix, p's, lambdas, and using the resampling methods as specified. While there may be several ways I could have done this more optimally, this function exists because a rather late attempt at restructuring the code in order to reduce the amount of duplicate lines of code regarding regression, that had just escalated out of control, making it extremely difficult to debug and finding whatever was causing all the issues. :param X: (N, p) array containing input design matrix :param z: (N, 1) array containing data points :param reg_string: string containing the name of the regression method to be used :param polydegree: list/range of the different p-values to be used :param lambdas: array of all the lambda values to be used :param N_bs: int, number of Bootstraps :param K: int, number of folds in the Cross-Validation :param test_size: float, size of the test partition [0.0, 1.0] :param scale: list determining if the scaling is only by the mean, the std or both [bool(mean), bool(std)] :param max_iter: maximum number of iterations for Lasso :return: a lot of arrays with the various results and different ways of representing the data """ nlambdas = len(lambdas) # number of lambdas p = polydegree[-1] # the maximum p-value method = 4 # OLS method # Splitting into train and test, scaling the data X_train, X_test, z_train, z_test = fun.split_data(X, z, test_size=test_size) X_train_scaled = fun.scale_X(X_train, scale) X_test_scaled = fun.scale_X(X_test, scale) X_scaled = fun.scale_X(X, scale) # Bootstrap arrays bs_error_train = np.zeros((p, nlambdas)) bs_error_test = np.zeros((p, nlambdas)) bs_bias = np.zeros((p, nlambdas)) bs_var = np.zeros((p, nlambdas)) bs_error_train_opt = np.zeros((p, 2)) bs_error_test_opt = np.zeros((p, 2)) bs_bias_opt = np.zeros( (p, 2) ) # First index is min(MSE) lmb for each p, second at lmb that yields total lowest MSE bs_var_opt = np.zeros((p, 2)) bs_lmb_opt = np.zeros(p) # Cross-validation arrays cv_error_train = np.zeros((p, nlambdas)) cv_error_test = np.zeros((p, nlambdas)) cv_error_train_opt = np.zeros((p, 2)) cv_error_test_opt = np.zeros((p, 2)) cv_lmb_opt = np.zeros(p) # Setting up regression object to be used for regression (Lasso is dealt with later) reg_obj = reg.OrdinaryLeastSquares(method) # default if reg_string == 'SKL': reg_obj = skl.LinearRegression() # Testing with scikit-learn OLS elif reg_string == 'Ridge': reg_obj = reg.RidgeRegression() # Looping over all polynomial degrees in the analysis for degree in polydegree: n_poly = fun.polynom_N_terms( degree ) # number of terms in the design matrix for the given degree print('p = %2d, np = %3d' % (degree, n_poly)) # Setting up correct design matrices for the current degree X_train_bs = np.zeros((len(X_train_scaled), n_poly)) X_test_bs = np.zeros((len(X_test_scaled), n_poly)) X_cv = np.zeros((len(X_scaled), n_poly)) # Filling the elements up to term n_poly X_train_bs[:, :] = X_train_scaled[:, 0:n_poly] X_test_bs[:, :] = X_test_scaled[:, 0:n_poly] X_cv[:, :] = X_scaled[:, 0:n_poly] # Looping over all the lambda values for i in range(nlambdas): lmb = lambdas[i] # current lambda value # Printing out in order to gauge where we are if i % 10 == 0: print('i = %d, lmb= %.3e' % (i, lmb)) # Updating the current lambda value for Ridge and Lasso if reg_string == 'Ridge': reg_obj.set_lambda(lmb) elif reg_string == 'Lasso': reg_obj = skl.Lasso(alpha=lmb, max_iter=max_iter, precompute=True, warm_start=True) # Bootstrap BS = res.Bootstrap(X_train_bs, X_test_bs, z_train, z_test, reg_obj) error_, bias_, var_, trainE_ = BS.compute( N_bs) # performing the Bootstrap bs_error_test[degree - 1, i] = error_ bs_bias[degree - 1, i] = bias_ bs_var[degree - 1, i] = var_ bs_error_train[degree - 1, i] = trainE_ # Cross validation CV = res.CrossValidation(X_cv, z, reg_obj) trainE, testE = CV.compute(K) # performing the Cross-Validation cv_error_train[degree - 1, i] = trainE cv_error_test[degree - 1, i] = testE # Locating minimum MSE for each polynomial degree # Bootstrap index_bs = np.argmin(bs_error_test[degree - 1, :]) bs_lmb_opt[degree - 1] = lambdas[index_bs] bs_error_train_opt[:, 0] = bs_error_train[:, index_bs] bs_error_test_opt[:, 0] = bs_error_test[:, index_bs] bs_bias_opt[:, 0] = bs_bias[:, index_bs] bs_var_opt[:, 0] = bs_var[:, index_bs] # Cross-validation index_cv = np.argmin(cv_error_test[degree - 1, :]) cv_lmb_opt[degree - 1] = lambdas[index_cv] cv_error_train_opt[:, 0] = cv_error_train[:, index_cv] cv_error_test_opt[:, 0] = cv_error_test[:, index_cv] # Locate minimum MSE to see how it depends on lambda bs_min = np.unravel_index(np.argmin(bs_error_test), bs_error_test.shape) cv_min = np.unravel_index(np.argmin(cv_error_test), cv_error_test.shape) bs_best = [polydegree[bs_min[0]], lambdas[bs_min[1]]] cv_best = [polydegree[cv_min[0]], lambdas[cv_min[1]]] # Bootstrap bs_error_train_opt[:, 1] = bs_error_train[:, bs_min[1]] bs_error_test_opt[:, 1] = bs_error_test[:, bs_min[1]] bs_bias_opt[:, 1] = bs_bias[:, bs_min[1]] bs_var_opt[:, 1] = bs_var[:, bs_min[1]] # Cross-validation cv_error_train_opt[:, 1] = cv_error_train[:, cv_min[1]] cv_error_test_opt[:, 1] = cv_error_test[:, cv_min[1]] # This return is extremely large, sadly, and should have been improved upon # this was just the fastest way of doing it when I had to restructure the code # so better planning in the future would be a better solution return (bs_error_train, bs_error_test, bs_bias, bs_var, bs_error_train_opt, bs_error_test_opt, bs_bias_opt, bs_var_opt, bs_lmb_opt, cv_error_train, cv_error_test, cv_error_train_opt, cv_error_test_opt, cv_lmb_opt, bs_min, bs_best, cv_min, cv_best)
# This return is extremely large, sadly, and should have been improved upon # this was just the fastest way of doing it when I had to restructure the code # so better planning in the future would be a better solution return (bs_error_train, bs_error_test, bs_bias, bs_var, bs_error_train_opt, bs_error_test_opt, bs_bias_opt, bs_var_opt, bs_lmb_opt, cv_error_train, cv_error_test, cv_error_train_opt, cv_error_test_opt, cv_lmb_opt, bs_min, bs_best, cv_min, cv_best) ######################################################################################################################## if run_mode == 'a': save = 'N%d_nf%d_p%d_noise%.2f_seed%d' % (N, n_franke, p, noise, seed) # Splitting into train and test data X_train, X_test, z_train, z_test = fun.split_data(X, z_ravel, test_size=test_size) # X_train, X_test, z_train, z_test = train_test_split(X, z_ravel, test_size=test_size) # Scaling the data X_train_scaled = fun.scale_X(X_train, scale) X_test_scaled = fun.scale_X(X_test, scale) # Plotting the Franke function fun.plot_surf(x_mesh, y_mesh, z_mesh, 'x', 'y', 'z', 'Franke function, $N$=%d, noise=%.2f' % (N, noise),