def get_consumptions(id_, solar, start, length): # Initialize data DFS = [data_original, data_forecast] load_ = get_data(id_, start, length, DFS[0]) forecast_ = get_data(id_, start, length, DFS[1]) return load_, forecast_
def trainByCnn(): x_train, y_train, x_val, y_val, embedding_mat = process_data.get_data(cnum=1000, test_size=0.8) model = cnn.get_model(x_train, y_train, embedding_mat) model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=[x_val, y_val]) model.save('../ckpt/cnn.h5')
def run(): x_train, x_test, y_train, y_test = get_data() model = lin_reg(x_train, x_test, y_train, y_test) print("\nPHM Linear Regression") while True: user = input( "1. Train\n2. Normal_solve\n3. Predict\n4. Save\n5. Load\n6. Quit\n" ) if user == '1': model.fit() elif user == '2': model.normal_fit() elif user == '3': model.predict() elif user == '4': model.save() elif user == '5': model.load() elif user == '6': break print("\n--------Linear Regression---------\n")
def trainByLstm(): x_train,y_train,x_val,y_val,embedding_mat = process_data.get_data(cnum=10000,test_size=0.2) model = lsmcrf.get_model(x_train,y_train,embedding_mat) model.fit(x_train,y_train,batch_size=BATCH_SIZE,epochs=EPOCHS, validation_data=[x_val,y_val]) crfmodel_Weights = model.get_weights() with open('../ckpt/crfmodel_Weights.pkl', 'wb') as outp: pickle.dump(crfmodel_Weights, outp)
def get_data(file_arr): template, rate = process_data.get_data(file_arr) data = np.array(template[0]) n = len(data[1]) data = data.reshape(n, inputs) for idx in range(1, len(template)): sub_arr = np.array(template[idx]) n = len(sub_arr[0]) sub_arr = sub_arr.reshape(n, inputs) data = np.concatenate((data, sub_arr), axis=0) return data, rate
def train(args): '''Function for training the model, sets the gpu configrations,loads the data, creates the savers and loaders, perfoms training, writes summaries.''' ## Setting the GPU configrations - reverse in order of nvidia-smi os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu ## Limit from taking the whole gpu config = tf.ConfigProto() config.gpu_options.allow_growth = True ## loading data train_data = get_data(args.img_size, args.dataset, is_train=True, debug=False) print 'loaded data successfully...' ## model definitoin with tf.variable_scope('bc_gan'): model = Bicycle_GAN(args) print 'Graph definition for model created...' ## Starting a session init = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init) ## savers and loaders global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='bc_gan') trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bc_gan') saver = tf.train.Saver(global_vars) loader = tf.train.Saver(global_vars) if args.pretrained_weights != "": loader.restore(sess, args.pretrained_weights) ## Summaries if not os.path.exists('./logs'): os.mkdir('./logs') logdir = os.path.join('./logs', 'bcgan') summary_writer = tf.summary.FileWriter(logdir, sess.graph) ## Training model.train(sess, train_data, saver, summary_writer) print "Model is trained ...."
def main(): # step 1: get the data and define all the usual variables X, Y, d = get_data() # Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.03) X, Y = shuffle(X, Y) Xtrain, Ytrain = X[:-50], Y[:-50] Xtest, Ytest = X[-50:], Y[-50:] ann = ANN([500, 300]) session = tf.InteractiveSession() ann.set_session(session) ann.fit(Xtrain, Ytrain, Xtest, Ytest, show_fig=True) print("Train accuracy:", ann.score(Xtrain, Ytrain)) print("Test accuracy:", ann.score(Xtest, Ytest))
def test(args): '''Function for testing the model, sets the gpu configrations,loads the test data, loads weights, perfoms testing, writes summaries.''' ## Setting the GPU configrations os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu ## Limit from taking the whole gpu config = tf.ConfigProto() config.gpu_options.allow_growth = True ## loading data test_data = get_data(args.img_size, args.dataset, is_train=False, debug=False) ## model definitoin with tf.variable_scope('bc_gan'): model = Bicycle_GAN(args) ## Starting a session init = tf.global_variables_initializer() sess = tf.Session(config=config) sess.run(init) ## savers and loaders global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='bc_gan') trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='bc_gan') loader = tf.train.Saver(global_vars) if args.pretrained_weights != None: loader.restore(sess, args.pretrained_weights) ## results if not os.path.exists('./results'): os.mkdir('./results') write_dir = os.path.join('results') ## Training model.test(sess, test_data, write_dir) print "Testing complete ...."
def run(): x_train, x_test, y_train, y_test = get_data() model = rf_model(x_train, x_test, y_train, y_test) print("\nPHM Random Forest Regression") while True: user = input("1. Train\n2. Predict\n3. Save\n4. Load\n5. Quit\n") if user == '1': model.train() elif user == '2': model.predict() elif user == '3': save(model) elif user == '4': load(model) elif user == '5': break print("\n-----------RF------------")
def run(): x_train, x_test, y_train, y_test = get_data(features='previous_capacity', caps=7) model = SVR_model(x_train, x_test, y_train, y_test) print("\nSVR") while True: user = input("1. Train\n2. Predict\n3. Save\n4. Load\n5. Quit\n") if user == '1': model.fit() elif user == '2': model.predict() elif user == '3': save(model) elif user == '4': load(model) elif user == '5': break print("\n-----------SVR------------")
def run(): x_train, x_test, y_train, y_test = get_data(features=['min_discharge_voltagem']) model = Poly_model(x_train, x_test, y_train, y_test) print("\nPoly_reg") while True: user = input("1. Train\n2. Predict\n3. Save\n4. Load\n5. Quit\n") if user == '1': model.fit() elif user == '2': model.predict() elif user == '3': save(model) elif user == '4': load(model) elif user == '5': break print("\n-----------Poly------------")
def cut_result_data(score): score = str(score).split(".")[1] data = process_data.get_data("data/test_result.csv") # greatest = [] # for index,row in data.iterrows(): # if row[0] > row[1]: # greatest.append(row[0]) # else: # greatest.append(row[1]) # data["Probability"] = greatest try: zero_score = get_column_accuracy(data,"0") one_score = get_column_accuracy(data,"1") print("Zero Column Score => \t",zero_score) print("One Column Score => \t",one_score) if zero_score > one_score: myData = {"id":data["result_id"].to_list(),\ "Probability":data["0"].to_list(),\ } myData = pd.DataFrame(myData) else: myData = {"id":data["result_id"].to_list(),\ "Probability":data["1"].to_list(),\ } myData = pd.DataFrame(myData) except: myData = {"id":data["result_id"].to_list(),\ "Probability":data["0"].to_list(),\ } myData = pd.DataFrame(myData) print(myData) myData.to_csv("data/test_results_"+score+".csv",index=0) return
def main(argv): if len(argv) > 1: filename = argv[1] else: filename = 'a.csv' if os.path.exists(filename): basename, ext = filename.split('.') data = process_data.get_data(filename) predictor_pipeline = process_data.make_predictor_pipeline( do_one_hot=False) label_pipeline = process_data.make_label_pipeline() predictors_processed = predictor_pipeline.fit_transform(data) labels_processed = label_pipeline.fit_transform(data) display_data(predictors_processed, labels_processed, basename) else: print(filename + " doesn't exist.") return
def get_data(file_arr): template, rate = process_data.get_data(file_arr) data = np.array(template[0]) n = len(data[1]) data = data.reshape(n, inputs) # Ch2 #ch2_arr = np.array(ch2[0]) #n2 = len(ch2_arr[1]) #ch2_arr = ch2_arr.reshape(n2, inputs) for idx in range(1, len(template)): sub_arr = np.array(template[idx]) n = len(sub_arr[0]) sub_arr = sub_arr.reshape(n, inputs) data = np.concatenate((data, sub_arr), axis=0) #for idx in range(1, len(ch2)): # sub_arr2 = np.array(ch2[idx]) # n2 = len(sub_arr2[1]) # sub_arr2 = sub_arr2.reshape(n2, inputs) #ch2_arr=np.concatenate((ch2_arr, sub_arr2), axis=0) return data, rate
predicted_vol = decoder.predict(outputs) # print(np.max(predicted_vol),np.min(predicted_vol),np.mean(predicted_vol), np.median(predicted_vol)) np.save('D:/Master-Thesis/water_collapse/code/Material_ENKF_full.npy', predicted_vol) return predicted_vol if __name__ == "__main__": path, verify_rate, sequence_length, originalFile, destinationFile = variable_value( ) #-------------------------------------# print("Data Preprocessing...") vol = Pcd.get_data(path) dataset, verify = Pcd.train_and_vertify(vol, verify_rate) # predict and verify # scaler_data = MinMaxScaler() # scaler_vol = scaler_data.fit_transform(verify) # scaler_vol = verify print("Data Predicting...") print('dataset shape = ' + str(dataset.shape)) print('vertify shape = ' + str(verify.shape)) predicted_vol = predict_vol(verify, verify.shape[0] - sequence_length, sequence_length)
def main( argv ): my_args = process_args(argv) #my_args is a dict containing all opts mapped to their args basename, ext = my_args['DataFileName'].split('.') data = process_data.get_data(my_args['DataFileName'], ext) train_data, test_data = sklearn.model_selection.train_test_split( data, test_size=.20 ) # search for good fit and analysis label_pipeline = process_data.make_label_pipeline() # ravel() just reshapes the data for easier processing actual_train_labels = label_pipeline.fit_transform(train_data).values.ravel() if my_args["ModelType"] == "tree": fit_pipeline = make_decision_tree_fit_pipeline() fit_params = make_decision_tree_params() elif my_args["ModelType"] == "svm": fit_pipeline = make_svm_fit_pipeline() fit_params = make_svm_params() else: print("pick --model type") sys.exit(1) if my_args["SplitterType"] == "k-fold": cv = sklearn.model_selection.KFold(n_splits=my_args["Folds"]) elif my_args["SplitterType"] == "stratified": cv = sklearn.model_selection.StratifiedKFold(n_splits=my_args["Folds"]) else: print("pick --splitter type") sys.exit(1) if my_args["SearchType"] == "grid": search_grid = sklearn.model_selection.GridSearchCV( fit_pipeline, fit_params, scoring="f1_micro", n_jobs=-1, cv=cv, refit=True, verbose=1 ) elif my_args["SearchType"] == "random": search_grid = sklearn.model_selection.RandomizedSearchCV( fit_pipeline, fit_params, scoring="f1_micro", n_iter=my_args["Iterations"], n_jobs=-1, cv=cv, refit=True, verbose=1 ) else: print("pick --search type") sys.exit(1) search_grid.fit(train_data, actual_train_labels) # examine best parameters print( "Best Score:", search_grid.best_score_ ) print( "Best Params:", search_grid.best_params_ ) print() print() print() scores = sklearn.model_selection.cross_val_score(search_grid.best_estimator_, train_data, actual_train_labels, scoring="f1", cv=cv, n_jobs=-1 ) print( "CV:", scores.mean( ), scores.std( ) ) print() print() print() predicted_train_labels = search_grid.best_estimator_.predict(train_data) print("actual training labels", actual_train_labels) print("predicted training labels", predicted_train_labels) print("Training Labels Correct:", calculateCorrectLabels(actual_train_labels, predicted_train_labels)) actual_test_labels = label_pipeline.fit_transform(test_data).values.ravel() predicted_test_labels = search_grid.best_estimator_.predict(test_data) print("actual test labels", actual_test_labels) print("predicted test labels", predicted_test_labels) print("Test Labels Correct:", calculateCorrectLabels(actual_test_labels, predicted_test_labels)) return
from keras.models import Sequential from keras.layers import Dense, Activation from keras import metrics from util import y2indicator from process_data import get_data import matplotlib.pyplot as plt # NOTE: do NOT name your file keras.py because it will conflict # with importing keras # installation is easy! just the usual "sudo pip(3) install keras" # get the data, same as Theano + Tensorflow examples # no need to split now, the fit() function will do it X, Y, d = get_data() # get shapes # by default Keras wants one-hot encoded labels # there's another cost function we can use # where we can just pass in the integer labels directly # just like Tensorflow / Theano Y = y2indicator(Y) # the model will be a sequence of layers model = Sequential() # ANN with layers [29 (D)] -> [500] -> [300] -> [2] model.add(Dense(units=500, input_dim=D)) model.add(Activation('relu'))
import numpy as np import matplotlib.pyplot as plt from sklearn.utils import shuffle from process_data import get_data def y2indicator(y, K): N = len(y) ind = np.zeros((N, K)) for i in range(N): ind[i, y[i]] = 1 return ind Xtrain, Ytrain, Xtest, Ytest, datatrain, datatest = get_data() D = Xtrain.shape[1] K = len(set(Ytrain) | set(Ytest)) M = 100 # num hidden units # convert to indicator Ytrain_ind = y2indicator(Ytrain, K) Ytest_ind = y2indicator(Ytest, K) # randomly initialize weights W1 = np.random.randn(D, M) b1 = np.zeros(M) W2 = np.random.randn(M, K) b2 = np.zeros(K)
def getResults(ldf, rdf): ldf.insert(0, "Probability", "0") for (col_name, data) in ldf.iteritems(): if (col_name == "Probability"): for i in range(len(data)): print(rdf['0'][i]) ldf[col_name][i] = rdf['0'][i] * 100 else: continue ldf = ldf[['playerID', 'Contestant', 'Probability']] return ldf test_df = process_data.get_data("s40-test-updated.csv", "csv") predictions_df = process_data.get_data("s40-predictions.csv", "csv") results_df = getResults(test_df, predictions_df) results_df.to_csv("s40-final-results.csv", index=False) results = results_df.to_numpy() # print("results np: ", results) ids = [] contestants = [] prob = [] for i in range(len(results)): ids.append(results[i][0]) contestants.append(results[i][1]) prob.append(results[i][2])
def main(in_csv, batch_size, eps, mn): x_train, x_test, y_train, y_test = pd.get_data(in_csv) train_obo(x_train, y_train, bs=batch_size, ep=eps, mod_name="models/" + mn)
def run_sacssan(args): """ Run SACSANN """ if args.test_chromosomes: test_chromosomes = [ int(i) for i in args.test_chromosomes[0].split(",") ] else: test_chromosomes = [] if args.mode == "predict": if (args.intermediate_network_weights_path is None or args.smoothing_network_weights_path is None): raise ValueError( "Path to pre-trained weights need to be specified in predict mode" ) intermediate_classifier = pickle.load( open(args.intermediate_network_weights_path, "rb")) final_classifier = pickle.load( open(args.smoothing_network_weights_path, "rb")) intermediate_scaler = pickle.load( open(args.intermediate_scaler_path, "rb")) final_scaler = pickle.load(open(args.final_scaler_path, "rb")) _, chromosomes_lengths = process_data.format_test_data( args.features_path, test_chromosomes, scaler=intermediate_scaler) predict_compartments( intermediate_classifier, intermediate_scaler, final_scaler, final_classifier, test_chromosomes, chromosomes_lengths, args.features_path, args.output_folder, ) else: train_chromosomes = [ int(i) for i in args.train_chromosomes[0].split(",") ] chromosomes = train_chromosomes + test_chromosomes possible_chrs = process_data.get_chromosome_list(args.genome) for i in range(len(chromosomes)): if chromosomes[i] not in possible_chrs: logger.warning( f"Unvalid chromosome, " f"possible chromosomes for the input genome are {possible_chrs}" ) sys.exit() if not os.path.exists(args.output_folder): os.makedirs(args.output_folder) ( X_train, y_train, X_test, y_test, A_indexes, B_indexes, scaler, testChrLen, ) = process_data.get_data( args.labels_path, args.features_path, train_chromosomes, test_chromosomes, scaling=True, balance=True, save_model=args.save_model, output_folder=args.output_folder, ) train_and_predict_compartments( args.features_path, train_chromosomes, test_chromosomes, testChrLen, X_train, y_train, X_test, y_test, A_indexes, B_indexes, scaler, args.output_folder, args.save_model, )
from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import matplotlib.pyplot as plt import pandas as pd from tabulate import tabulate from sklearn.utils import shuffle from process_data import get_data Xtrain, Ytrain, Xtest, Ytest, datatrain, datatest = get_data(regression=True) X = Xtrain Y = Ytrain # normalize, keep original to unscale later Yorig = Y Y = (Y - np.min(Y)) / (np.max(Y) - np.min(Y)) D = X.shape[1] K = len(set(Ytrain) | set(Ytest)) M = 10 # num hidden units # layer 1 W = np.random.randn(D, M) / np.sqrt(D) b = np.zeros(M) # layer 2 V = np.random.randn(M) / np.sqrt(M) c = 0
def draw_data(): fig = plt.figure(1) ax = fig.add_subplot(111) data = filter(lambda v: v[0] == 81390, get_data()) ax.plot(map(lambda v: v[1], data), map(lambda v: v[2], data))
# estimate for the weight vector W, can be found by minimizing (y - WB).T * (y - WB) W = np.matmul(np.linalg.inv(np.matmul(X.T, X)), np.matmul(X.T, Y)) predictions = [] # iterate over the test set for entry in range(len(X_)): # get predicition p = np.matmul(W.T, X_[entry]) # threshold for binary classification if p >= 0.5: pred = 1 elif p < 0.5: pred = 0 predictions.append(pred) predictions = np.array(predictions) # compute MSE error = np.sum(np.power(predictions - test_labels, 2)) / len(features) return (W, error, predictions) # fetch and clean the data pre_data = process_data.get_data() data = process_data.process_data(pre_data) features = data[0] labels = data[1] train_features = np.array(features[:80]) train_labels = np.array(labels[:80]).reshape(80, 1) test_features = np.array(features[80:]) test_labels = np.array(labels[80:]).reshape(20, 1) lr_pred = linear_regression(train_features, train_labels, test_features, test_labels)
def run(N, T, D, pt, market, freq, seed, onlyprice=False, flat=False, real_data=-1): r = np.random.RandomState(seed) player_ids = r.choice(np.arange(126), N, replace=False) data_original = pd.read_csv(DATA, index_col='date', parse_dates=True) data_forcast = pd.read_csv(DATA_FORCAST, index_col='date', parse_dates=True) dfs_nosolar = [data_original, data_forcast] data_solar = pd.read_csv(DATA_SOLAR, index_col='date', parse_dates=True) data_solar_forcast = pd.read_csv(DATA_SOLAR_FORCAST, index_col='date', parse_dates=True) dfs_solar = [data_solar, data_solar_forcast] # real_data = int(real_data) # if real_data > 0: # loads = get_data(real_data, D + 1, N, r) # else: # loads = None players = {} for n in range(N): has_solar = n <= (N // 2) DFS = dfs_solar if has_solar else dfs_nosolar if real_data > 0: load_ = get_data(n, real_data, D, DFS[0]) forcast_ = get_data(n, real_data, D, DFS[1]) else: load_ = None forcast_ = None val = random_player(T, D, pt, r, flat, load=load_, forcast=forcast_, solar=has_solar) players[n] = val for p in range(N): players[p]['freq'] = freq CONFIG = { 'ROUNDS': T * (D - 1) + 1, 'SLICE': T, 'RANDOM_STATE': r, 'MARKET': market, 'ONLYPRICE': onlyprice, } start = time.perf_counter() welfare, traded = core_loop(players, CONFIG) end = time.perf_counter() - start for k, pl in players.items(): pl.pop('model', None) pl.pop('con', None) pl.pop('var', None) return (end, players, welfare, traded)
D = len(self.gaussian) P = np.zeros((N,D)) for i in self.labels: mean = self.gaussian[i]['mean'] cov = self.gaussian[i]['cov'] P[:,i] = mvn.logpdf(X,mean=mean,cov=cov) + np.log(self.prior[i]) return np.argmax(P,axis=1) def score(self,X,Y): P = self.project(X) return np.mean(Y == P) if __name__ == '__main__': X,Y = get_data() X,Y = shuffle(X,Y) N = len(Y)//2 Xtrain = X[:N] Ytrain = Y[:N] Xtest = X[N:] Ytest = Y[N:] model = Facial_Rec() model.fit(Xtrain,Ytrain) print('Train accuracy: ',model.score(Xtrain,Ytrain)) print('Test accuracy: ',model.score(Xtest,Ytest)) print() alphabet = np.array([chr(i) for i in range(65,91)]) idx = [22,7,8,18,11,4,17] delim = '' print(delim.join(alphabet[idx]))
import sys # sys.argv[1] = learning_rate # sys.argv[2] = iterations def T_indicator(t, K): N = len(t) ind = np.zeros((N, K)) for n in range(N): ind[n, t[n]] = 1 return ind # Get the data X, t = get_data() X, t = shuffle(X, t) t = t.astype(np.int32) #N = len(t) D = X.shape[1] M = 5 K = len(set(t)) X_train = X[:-100, :] t_train = t[:-100] T_train = T_indicator(t_train, K) X_test = X[-100:, :] t_test = t[-100:] T_test = T_indicator(t_test, K)
def random_search(): X, Y, data = get_data() X, Y = shuffle(X, Y) Ntrain = int(0.75 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # Make copies of the small data (because variance matters?) Xtrain = np.concatenate((Xtrain, Xtrain, Xtrain), 0) Ytrain = np.concatenate((Ytrain, Ytrain, Ytrain), 0) print('size Xtrain: ' + str(Xtrain.shape)) print('size Ytrain: ' + str(Ytrain.shape)) print('size Xtest: ' + str(Xtest.shape)) print('size Ytest: ' + str(Ytest.shape)) # starting hyperparameters M = 20 # hidden units nHidden = 2 # hidden layers log_lr = -4 # learning rate log_l2 = -2 # l2 regularization, since we always want it to be positive max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None validation_accuracies = [] for _ in range(max_tries): print('on try: ' + str(_ + 1) + '/' + str(max_tries)) model = ANN([M] * nHidden) # choose params randomly on log base 10 scale model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=4000, show_fig=True) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s (layers), %s (log_lr), %s (log_l2)" % (validation_accuracy, train_accuracy, [M] * nHidden, log_lr, log_l2)) # keep track of all validation_accuracies.append(validation_accuracy) # keep the best parameters, then make modifications to them if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # select new hyperparams nHidden = best_nHidden + np.random.randint( -1, 2) # -1, 0, or 1, add, remove or keep same the layers nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2) * 10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) # TODO: save these in mongodb, then read them and see if we beat it, in a new file run forward on best params print("Best validation_accuracy:", best_validation_rate) print("Mean validation_accuracy:", np.mean(validation_accuracies)) print("Best settings:") print("Best M (hidden units):", best_M) print("Best nHidden (hidden layers):", best_nHidden) print("Best learning_rate:", best_lr) print("Best l2 regularization:", best_l2)
def main(argv): my_args = process_args(argv) basename, ext = my_args['DataFileName'].split('.') data = process_data.get_data(my_args['DataFileName']) # search for good fit and analysis label_pipeline = process_data.make_label_pipeline() # ravel() just reshapes the data for easier processing actual_labels = label_pipeline.fit_transform(data).values.ravel() if my_args["ModelType"] == "tree": fit_pipeline = make_decision_tree_fit_pipeline() fit_params = make_decision_tree_params() elif my_args["ModelType"] == "svm": fit_pipeline = make_svm_fit_pipeline() fit_params = make_svm_params() elif my_args["ModelType"] == "bagging-tree": fit_pipeline = make_bagging_tree_fit_pipeline() fit_params = make_bagging_tree_params() elif my_args["ModelType"] == "adaboost-tree": fit_pipeline = make_adaboost_tree_fit_pipeline() fit_params = make_adaboost_tree_params() else: print("pick --model type") sys.exit(1) if my_args["SplitterType"] == "k-fold": cv = sklearn.model_selection.KFold(n_splits=my_args["Folds"]) elif my_args["SplitterType"] == "stratified": cv = sklearn.model_selection.StratifiedKFold(n_splits=my_args["Folds"]) else: print("pick --splitter type") sys.exit(1) if my_args["SearchType"] == "grid": search_grid = sklearn.model_selection.GridSearchCV(fit_pipeline, fit_params, scoring="f1_micro", n_jobs=-1, cv=cv, refit=True, verbose=1) elif my_args["SearchType"] == "random": search_grid = sklearn.model_selection.RandomizedSearchCV( fit_pipeline, fit_params, scoring="f1_micro", n_iter=my_args["Iterations"], n_jobs=-1, cv=cv, refit=True, verbose=1) else: print("pick --search type") sys.exit(1) search_grid.fit(data, actual_labels) # examine best parameters print("Best Score:", search_grid.best_score_) print("Best Params:", search_grid.best_params_) print() print() print() scores = sklearn.model_selection.cross_val_score( search_grid.best_estimator_, data, actual_labels, scoring="f1_micro", cv=cv, n_jobs=-1) print("CV:", scores.mean(), scores.std()) print() print() print() predicted_labels = search_grid.best_estimator_.predict(data) cm = sklearn.metrics.confusion_matrix(actual_labels, predicted_labels) print("confusion_matrix:") print("TN: ", cm[0][0]) print("FN: ", cm[0][1]) print("FP: ", cm[1][0]) print("TP: ", cm[1][1]) f1_score = sklearn.metrics.f1_score(actual_labels, predicted_labels, average="micro") print() print( "Precision:", sklearn.metrics.precision_score(actual_labels, predicted_labels, average="micro")) print( "Recall:", sklearn.metrics.recall_score(actual_labels, predicted_labels, average="micro")) print("F1:", f1_score) test_data = process_data.get_data("data/test.csv") actual_test_labels = label_pipeline.fit_transform(test_data).values.ravel() predicted_test_labels = search_grid.best_estimator_.predict_proba( test_data) labels = pd.DataFrame(predicted_test_labels) ids = test_data.result_id.to_list() labels["result_id"] = ids labels.to_csv("data/test_result.csv", index=False) cut_data.cut_result_data(f1_score) return
import dash_html_components as html import pandas as pd import plotly.graph_objects as go import process_data as process_data from dash.dependencies import Input, Output import plotly import random from collections import deque external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css'] app = dash.Dash(__name__, external_stylesheets=external_stylesheets) df = process_data.get_data() df = df[df.unit_number == 3] # df=(df-df.mean())/df.std() df = (df - df.min()) / (df.max() - df.min()) # time_deque = deque(df['time'].tolist()) # sensor_data_deque = deque(df['sensor_3'].tolist()) time_deque = deque(maxlen=150) time_deque = time_deque + deque( list(range(1, len(df['sensor_3'].tolist()) + 1))) print(time_deque) full_sensor_data = deque(df['sensor_3'].tolist())
import numpy as np from process_data import get_data # Data X, T = get_data() # Weights M = 5 D = X.shape[1] K = len(set(T)) W1 = np.random.randn(D, M) b1 = np.zeros(M) W2 = np.random.randn(M, K) b2 = np.zeros(K) def softmax(z): expZ= np.exp(z) return(expZ/ expZ.sum(axis=1, keepdims=True)) def forward(X, W1, W2, b1, b2): Z = X.dot(W1) + b1 A = np.tanh(Z) return(softmax(A.dot(W2 + b2))) def classification_rate(P, T): return np.mean(P == T) Y = forward(X, W1, W2, b1, b2) P = np.argmax(Y, axis = 1) print('Classification rate with random weights: {}'.format(classification_rate(P, T)))