log.addHandler(fh) data_path = os.path.join(FILE_PATH, './model', MODEL_NAME) try: ## load model and dataset X = pickle.load( open( data_path + "/x_set" + str(lstm.IN_TIMESTEPS) + str(lstm.OUT_TIMESTEPS_RANGE[-1]) + ".pkl", "rb")) Y = pickle.load( open( data_path + "/y_set" + str(lstm.IN_TIMESTEPS) + str(lstm.OUT_TIMESTEPS_RANGE[-1]) + ".pkl", "rb")) except: ## generate train/val/test datasets based on raw data X, Y = generate_data('./reg_fmt_datasets.pkl') # save dataset os.mkdir(data_path) pickle.dump( X, open( data_path + "/x_set" + str(lstm.IN_TIMESTEPS) + str(lstm.OUT_TIMESTEPS_RANGE[-1]) + ".pkl", "wb")) pickle.dump( Y, open( data_path + "/y_set" + str(lstm.IN_TIMESTEPS) + str(lstm.OUT_TIMESTEPS_RANGE[-1]) + ".pkl", "wb")) print("Save data successfully!") ## build the lstm model
from data_processing import generate_data tf.logging.set_verbosity(tf.logging.INFO) LOG_DIR = './ops_logs/sin' TIMESTEPS = 3 RNN_LAYERS = [{'num_units': 5}] DENSE_LAYERS = None TRAINING_STEPS = 10000 PRINT_STEPS = TRAINING_STEPS / 10 BATCH_SIZE = 100 regressor = learn.SKCompat( learn.Estimator( model_fn=lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS), model_dir=LOG_DIR)) X, y = generate_data( np.sin, np.linspace(0, 100, 10000, dtype=np.float32), TIMESTEPS, seperate=False) # create a lstm instance and validation monitor validation_monitor = learn.monitors.ValidationMonitor(X['val'], y['val'], every_n_steps=PRINT_STEPS, early_stopping_rounds=1000) print(X['train']) print(y['train']) regressor.fit( X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)
BATCH_SIZE = 100 LOG_DIR = './ops_logs/lstm/' + str(TIMESTEPS) + str(OUTPUT_TIMESTEPS) # MODEL_SAVED_PATH = './snapshot/model.ckpt' # MODEL_SAVED_DIR = './snapshot' def mse(pred, true): return np.sqrt(((pred - true)**2).mean()) ## build the lstm model model_fn = lstm_model(TIMESTEPS, RNN_LAYERS, DENSE_LAYERS) ## generate train/val/test datasets based on raw data X, y = generate_data('./reg_fmt_datasets.pkl', TIMESTEPS, OUTPUT_TIMESTEPS) estimator = learn.Estimator(model_fn=model_fn, model_dir=LOG_DIR) # estimator = tf.estimator.Estimator(model_fn = model_fn, model_dir = LOG_DIR) regressor = learn.SKCompat(estimator) ## create a validation monitor validation_monitor = learn.monitors.ValidationMonitor( X['val'], y['val'], every_n_steps=PRINT_STEPS) ## fit the train dataset regressor.fit(X['train'], y['train'], monitors=[validation_monitor], batch_size=BATCH_SIZE, steps=TRAINING_STEPS)
import numpy as np from data_processing import process_data, generate_data, histogram from Decision_stump import one_dimension_decision_stump, multi_dimension_decision_stump, check_accuracy,Out_of_sample_error if __name__ == '__main__': #Q17, Q18 E_in_list = [] E_out_list = [] for i in range(5000): X, Y = generate_data(10, 5) score, s, theta = one_dimension_decision_stump(X, Y) E_in_list.append((10-float(score))/10) E_out_list.append(Out_of_sample_error(s, theta)) histogram(E_in_list, 'qustion 17', 'in sample error', 'frequency') print "Question 17: average in sample error: %f" % (sum(E_in_list)/5000) histogram(E_out_list, 'qustion 18', 'out of sample error', 'frequency') print "Question 18: average out of sample error: %f" % (sum(E_out_list)/5000) #Q19 X_train, Y_train = process_data('./hw2_train.dat') X_test, Y_test = process_data('./hw2_test.dat') best_record, s, theta, index = multi_dimension_decision_stump(X_train, Y_train) print "Qustion 19: index: %d, h = %d * sign(x - %f), in sample error: %f" % (index, s, theta, (len(Y_train)-float(best_record))/len(Y_train)) X_test_trans = np.transpose(X_test) accuracy = check_accuracy(s, theta, X_test_trans[index], Y_test) print "Qustion 20: out of sample error: %f" % (1 - accuracy)
# construct learner and train encoder = RNN(input_dim=4, h_dim=100, o_dim=22, bptt_truncate=4, rnn_role='encoder') decoder = RNN(input_dim=22, h_dim=100, o_dim=22, bptt_truncate=4, rnn_role='decoder') seq_2_seq_learner = Seq2Seq(encoder, decoder) # training data prep data = generate_data(n=data_size, encoded=encoded) loss = [] for i in range(epoch): print("\nEpoch {} ...".format(i)) current_loss = seq_2_seq_learner.train( data, learning_rate=learning_rate, reverse_input=False, stop_vec=ptn_stop_vec, data_print_interval=data_print_interval) loss.append((i, current_loss)) if i % epoch_print_interval == 0: print("Current Loss after epoch {}: {}\n".format(i, current_loss)) # prepare test data test_cdna_file = "data/cDNA_CFTR.fa"
os.makedirs(FLAGS.sample_dir) # the current file path FILE_PATH = os.path.dirname(__file__) TASK_NAME_LIST = [] data_path = os.path.join(FILE_PATH, './model', FLAGS.model_name) if FLAGS.run_mode is 0: try: ## load model and dataset X = pickle.load(open(data_path + "/x_set_train" + ".pkl", "rb")) Y = pickle.load(open(data_path + "/y_set_train" + ".pkl", "rb")) except: ## generate train/val/test datasets based on raw data X, Y = generate_data('./reg_fmt_datasets_1spd.pkl') # save dataset # os.mkdir(data_path) pickle.dump(X, open(data_path + "/x_set_train" + ".pkl", "wb")) pickle.dump(Y, open(data_path + "/y_set_train" + ".pkl", "wb")) print("Save data successfully!") else: try: ## load model and dataset X = pickle.load(open(data_path + "/x_set_test" + ".pkl", "rb")) Y = pickle.load(open(data_path + "/y_set_test" + ".pkl", "rb")) except: ## generate train/val/test datasets based on raw data X, Y = generate_data('./reg_fmt_datasets_6spd.pkl') # save dataset # os.mkdir(data_path)