def run_all2(self): for i in range(9): print("epoch " + str(i + 1) + " out of 9") prices = data_handler.read_data( "./Data/lob_datatrial000" + str(i + 1) + ".csv", "MIC") X, y = data_handler.split_data(prices, self.steps) self.train(X, y, 100, 0) time = data_handler.read_data("./Data/lob_data.csv", "TIME") prices = data_handler.read_data("./Data/lob_data.csv", "MIC") X, y = data_handler.split_data(prices, self.steps) self.test(X, y, verbose=1)
def process_numpy_data(file_path='data/', region='ME'): """ processes data from the numpy array for the given region into lists that are more well suited for subsequent function calls and the model itself args: - file_path: file_path of the numpy array data - region: which region will be processed (ie: 'ME') returns: - train_list: list containing training data - val_list: list containing validation data - test_list: list containing test data """ # load the numpy array data for a given region and get train, val, and test data splits region_data = np.load(file_path + region + '.npy') energy_train, energy_val, energy_test = data_handler.split_data( region_data) # turn the split-up data into lists where each entry contains data for 1 encoder/decoder cycle train_list = data_handler.create_lists_of_data(energy_train) val_list = data_handler.create_lists_of_data(energy_val) test_list = data_handler.create_lists_of_data(energy_test) return train_list, val_list, test_list
def get_fitness(in_sample_df, pop): """ Return the testing accuracy, which is our fitness function. """ fitness = [] for p in pop: p_df = translate_dna(in_sample_df, p) # getting test and training train_df, test_df = dh.split_data(p_df, 0.63) # split x (features) and y (target) training_array = train_df.as_matrix() x_array, y_array = training_array[:, 1:], training_array[:, 0] # create Tensors to hold inputs and outputs, and wrap them in Variables, x = torch.tensor(x_array, dtype=torch.float, requires_grad=True) y = torch.tensor(y_array, dtype=torch.float, requires_grad=False) # construct and train model model, optimiser, trained_model, model_se = construct_model(x, y) # test the nn using test data model_accuracy = tst.test_net(trained_model, test_df) fitness.append(model_accuracy) return fitness
def run_all(self): time = data_handler.read_data("./Data/lob_data.csv", "TIME") prices = data_handler.read_data("./Data/lob_data.csv", "MIC") X, y = data_handler.split_data(prices, self.steps) split_ratio = [9, 1] train_X, test_X = data_handler.split_train_test_data(X, split_ratio) train_X = train_X.reshape((-1, self.steps, 1)) test_X = test_X.reshape((-1, self.steps, 1)) train_y, test_y = data_handler.split_train_test_data(y, split_ratio) self.train(train_X, train_y, 200, verbose=1) self.test(test_X, test_y, verbose=1) self.save()
def main_GA(df): """ Generate a network with the genetic algorithm. Args: df (data frame): all data """ dna_size = len(df.columns) - 1 # dna for input vector pop = np.random.randint(2, size=(POP_SIZE, dna_size)) # get validation data set (10% of data set) validation_df, in_sample_df = dh.split_data(df, 0.1) for g in range(N_GENERATIONS): fitness = get_fitness(in_sample_df, pop) generational_best = [pop[np.argmax(fitness), :]] print(" Most fitted DNA: ", generational_best[0], "best =", np.max(fitness), ", generational average =", np.mean(fitness)) pop = select(pop, fitness) pop1 = copy.copy(pop) children = 0 for parent in pop: pr_multiplier = min(10 / (len(pop) - children), 1) # produce a child by crossover operation child = crossover(parent, pop1, dna_size, pr_multiplier) # mutate child child = mutate(child, dna_size, pr_multiplier) # replace parent with its child parent[:] = child children += 1 # validate best model in last generation but first need to train based on chromosome val_train_df = translate_dna(in_sample_df, generational_best[0]) training_array = val_train_df.as_matrix() x_array, y_array = training_array[:, 1:], training_array[:, 0] # create Tensors to hold inputs and outputs, and wrap them in Variables, x = torch.tensor(x_array, dtype=torch.float, requires_grad=True) y = torch.tensor(y_array, dtype=torch.float, requires_grad=False) # construct and train model model, optimiser, trained_model, model_se = construct_model(x, y) val_test_df = translate_dna(validation_df, generational_best[0]) validation_results = tst.test_net(trained_model, val_test_df) print("generational best: ", generational_best) print("validation results: " + str(validation_results))
def train(): X, y = load_char_data_and_labels() #print X vocab_list, vocab_dict, rev_vocab_dict = create_char_vocabulary(X) X, seq_lens = data_to_character_ids(X, vocab_dict) #print X train_X, train_y, train_seq_lens, valid_X, valid_y, valid_seq_lens = \ split_data(X, y, seq_lens) with tf.Session() as sess: # Load old model or create new one model = create_model(sess, FLAGS) # Train results returned = generate_epoch(train_X, train_y, train_seq_lens, FLAGS.num_epochs, FLAGS.batch_size) #print "THE PROBLEM : ", returned #for epoch_num, epoch in enumerate(returned): # print "EPOCH:", epoch_num , " \n ",epoch.next(), "\n\n" for epoch_num, epoch in enumerate(returned): print "EPOCH:", epoch_num sess.run(tf.assign(model.lr, FLAGS.learning_rate * \ (FLAGS.learning_rate_decay_factor ** epoch_num))) train_loss = [] train_accuracy = [] for batch_num, (batch_X, batch_y, batch_seq_lens) in enumerate(epoch): _, loss, accuracy = model.step(sess, batch_X, batch_seq_lens, batch_y, dropout=FLAGS.dropout, forward_only=False, sampling=False) train_loss.append(loss) train_accuracy.append(accuracy) print print "EPOCH %i SUMMARY" % epoch_num print "Training loss %.3f" % np.mean(train_loss) print "Training accuracy %.3f" % np.mean(train_accuracy) print "----------------------" # Validation results for valid_epoch_num, valid_epoch in enumerate( generate_epoch(valid_X, valid_y, valid_seq_lens, num_epochs=1, batch_size=FLAGS.batch_size)): valid_loss = [] valid_accuracy = [] for valid_batch_num, \ (valid_batch_X, valid_batch_y, valid_batch_seq_lens) in \ enumerate(valid_epoch): loss, accuracy = model.step(sess, valid_batch_X, valid_batch_seq_lens, valid_batch_y, dropout=0.0, forward_only=False, sampling=False) valid_loss.append(loss) valid_accuracy.append(accuracy) print "Validation loss %.3f" % np.mean(valid_loss) print "Validation accuracy %.3f" % np.mean(valid_accuracy) print "----------------------" # Save checkpoint every epoch. if not os.path.isdir(FLAGS.ckpt_dir): os.makedirs(FLAGS.ckpt_dir) checkpoint_path = os.path.join(FLAGS.ckpt_dir, "model.ckpt") print "Saving the model." model.saver.save(sess, checkpoint_path, global_step=model.global_step)
params = { 'axes.labelsize': 30, 'font.size': 30, 'legend.fontsize': 15, 'xtick.labelsize': 20, 'ytick.labelsize': 20, 'figure.figsize': [10, 8], 'font.family': "Arial" } plt.rcParams.update(params) if __name__ == '__main__': feature_names, features, labels = data_handler.get_data() training_features, training_labels, test_features, test_labels = data_handler.split_data( features, labels) svm_model = model.Model(training_features, training_labels, test_features, test_labels, feature_names) svm_model.train() svm_model.test() # confusion matrix and accuracy svm_model.evaluate() # ROC curve svc_disp = plot_roc_curve(svm_model.clf, svm_model.test_features, svm_model.test_labels) plt.plot([0, 1], [0, 1], linestyle='--', lw=2,
#A function to predict data def predict(model, data, label_scaler): data_x = data[:seq_length] data_x = np.expand_dims(data_x, axis=0) pred_y = model.predict(data_x) pred_y = label_scaler.inverse_transform(pred_y) return pred_y.reshape(output_seq_length, 1) raw_x, raw_y = data_handler.fetch_data() #Standardizing data to force it to lie on the same range scaler_x, x, scaler_y, y = data_handler.standardize_data(raw_x, raw_y) #Splitting data into training and testing sets train_x, test_x = data_handler.split_data(x) train_y, test_y = data_handler.split_data(y) #Define validation data as the entire training set validation_data = (test_x.reshape(-1, seq_length, 1), test_y.reshape(-1, output_seq_length, 1)) #Build model model, callbacks = build_model() #Accept inputs from command line train = sys.argv[1] if (train == 'train'): #Call the input generator to yield a batch of sequences input_gen = data_handler.generate_input_pipe(train_x, train_y, batch_size, seq_length, output_seq_length) #Train the model model.fit_generator(generator=input_gen,
def main_relevance(df_all): """ Automatically reduce our network size by removing inputs that we deemed irrelevant by neuron relevance. We iteratively reduce the network until empty, returning best performing network (via sampling). """ training_results = dict() # store standard errors test_results = dict() # store accuracy global_model = dict() # store best local model validation_pruned = dict() # validation df model_pruned = True # get validation data set (10% of data set) validation_df, in_sample_df = dh.split_data(df_all, 0.1) validation_pruned[len(in_sample_df.columns) - 1] = validation_df while len(in_sample_df.columns) - 1 >= 1 and model_pruned: total_error = 0 total_accuracy = 0 local_best = [0, 0] for i in range(MODEL_SAMPLING): # get training and validation data sets train_df, test_df = dh.split_data(in_sample_df, 0.63) training_array = train_df.as_matrix() # split x (features) and y (target) x_array, y_array = training_array[:, 1:], training_array[:, 0] # create Tensors to hold inputs and outputs, and wrap them in Variables, x = torch.tensor(x_array, dtype=torch.float, requires_grad=True) y = torch.tensor(y_array, dtype=torch.float, requires_grad=False) # construct and train model model, optimiser, trained_model, model_se = construct_model(x, y) # test the nn using test data model_accuracy = tst.test_net(trained_model, test_df) # evaluate model data total_error = total_error + model_se total_accuracy = total_accuracy + model_accuracy if model_accuracy > local_best[1]: local_best = [trained_model, model_accuracy] # update dictionary and global best model training_results[len(in_sample_df.columns) - 1] = total_error / MODEL_SAMPLING test_results[len(in_sample_df.columns) - 1] = total_accuracy / MODEL_SAMPLING global_model[len(in_sample_df.columns) - 1] = local_best[0] # prune model based on relevance measure on inputs prune_input = trn.get_min_phat(model, x, y, optimiser) if prune_input is not None: in_sample_df = trn.remove_input(in_sample_df, prune_input) prior_val_df = validation_pruned[len(in_sample_df.columns)] validation_pruned[len(in_sample_df.columns) - 1] = trn.remove_input(prior_val_df, prune_input) print("Model has been pruned to include " + str(len(in_sample_df.columns) - 1) + " inputs. Best performance was " + str(local_best[1]) + ". Average performance was " + str(total_accuracy / MODEL_SAMPLING)) print(in_sample_df.columns.values) else: model_pruned = False # select best model, and test validation set best_acc = max(test_results, key=test_results.get) best_model = global_model[best_acc] validation_results = tst.test_net(best_model, validation_pruned[best_acc]) dh.plot_data(test_results, training_results) print("the best model had " + str(best_acc) + " inputs, with " + str(test_results[best_acc]) + " accuracy.") print("validation results: " + str(validation_results)) return best_model
api_key='chx6zB1e9mVinjDrpbUs') files = ['kc1.csv', 'kc2.csv', 'pc1.csv'] for filename in files: df = load_csv(filename) #os dados estao como csv agora for i in range(len(df[0]) - 1): data_conversion_to_float(df, i) str_column_to_int(df, len(df[0]) - 1) split = int(0.7 * len(df)) kn = [1, 3] number_prototypes = [50, 100, 150, 200] lvq_training_set = [] knn_test_set = [] split_data(df, split, lvq_training_set, knn_test_set) lvq_training_set = np.array(lvq_training_set) knn_test_set = np.array(knn_test_set) lvq_training_set = data_balance(lvq_training_set) lrate = 0.1 epochs = 30 accuracy_lvq1_k1 = [] accuracy_lvq2_k1 = [] accuracy_lvq3_k1 = [] accuracy_lvq1_k3 = [] accuracy_lvq2_k3 = [] accuracy_lvq3_k3 = [] accuracy_knn = []
prices = data_handler.read_data("lob_datatrial0001.csv", "MIC") # splitting data into chunks of 4 steps = 59 reshape = True # X, y = data_handler.split_data(prices, steps, reshape) # split_ratio = [9,1] # train_X, test_X = data_handler.split_train_test_data(X, split_ratio) # train_X = train_X.reshape((-1, steps, 1)) # test_X = test_X.reshape((-1, steps, 1)) # train_y, test_y = data_handler.split_train_test_data(y, split_ratio) model = Vanilla_LSTM((steps, 1)) for i in range(9): print("epoch " + str(i + 1) + " out of 9") prices = data_handler.read_data( "lob_datatrial000" + str(i + 1) + ".csv", "MIC") X, y = data_handler.split_data(prices, steps, reshape) model.train(X, y, 200, 0) checkpoint_path = "./Models/vanilla.ckpt" # model.save_model(checkpoint_path) # model.test(test_X, test_y) prices = data_handler.read_data("lob_datatrial0010.csv", "MIC")
from cnn import (get_1_layer_model, get_2_layers_model, get_3_layers_model, get_4_layers_model, load_model, save_model) from data_handler import get_data, shuffle_data, split_data from performance import evaluate, predictions, show_plots from utils import path_relative_to from variables import BATCH_SIZE, EPOCHS, TRAIN_PERC, models_dir MODEL_FILE_NAME = 'cnn_4_layers' (train_data, train_labels), (test_data, test_labels) = get_data(to_normalize=True) train_data, train_labels = shuffle_data(train_data, train_labels) (train_data, train_labels), (xvalidate_data, xvalidate_labels) = split_data( train_data, train_labels, TRAIN_PERC) # model = load_model(MODEL_FILE_NAME, models_dir()) # model = model or get_2_layers_model() model = get_4_layers_model() print('model', model.summary()) results = model.fit(train_data, train_labels, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(xvalidate_data, xvalidate_labels)) save_model(model, MODEL_FILE_NAME, models_dir()) print('history', results.history)