def get_data_train_and_test_rnn(test_epochs, test_display_step, buy_threshold, sell_threshold, use_random_data): # GET DATA data_df = dml.get_all_ml_data() training_df = data_df[data_df.date < test_data_date].copy() test_df = data_df[data_df.date >= test_data_date].copy() del data_df # TRAIN training_data_class = td.TrainingData(training_df, feature_series_count, feature_count, label_count) # TODO: switch rnn to use batch data, testing below # fff, lll, ddd = training_data_class.get_batch(3) train_rnn(training_data_class, _model_path, use_random_data) # TEST testing_data_class = td.TrainingData(test_df, feature_series_count, feature_count, label_count) test_rnn(testing_data_class, test_epochs, test_display_step, buy_threshold, sell_threshold) merge_csv_files(_get_meta_prediction_files(), prediction_file)
def get_data_and_test_rnn(test_epochs, test_display_step, buy_threshold, sell_threshold, specific_file=None): # GET DATA data_df = dml.get_all_ml_data() test_df = data_df[data_df.date >= test_data_date].copy() del data_df # TEST testing_data_class = td.TrainingData(test_df, feature_series_count, feature_count, label_count) test_rnn(testing_data_class, test_epochs, test_display_step, buy_threshold, sell_threshold, specific_file)
def predict_rnn(after_date, specific_files=None): the_curr_time = datetime.datetime.now().strftime('%X') print_string = "Time: {}".format(the_curr_time) print("START PREDICTING MODEL...{}".format(print_string)) data_df = dml.get_all_predictable_data() test_df = data_df[data_df.date >= after_date].copy() del data_df predict_data_cls = td.TrainingData(test_df, feature_series_count, feature_count, label_count) predictions_df = pd.DataFrame(columns=('model_file', 'date', 'ticker', 'prediction')) file_list = _get_rnn_model_files() if specific_files is None: print("Got these files:") print(file_list) else: file_list = specific_files for each_file in file_list: session, x, y, prediction, cost, tf_graph = restore_rnn(each_file) with tf_graph.as_default(): feature_data = 'first run' while feature_data is not None: feature_data, label_data, descriptive_df = predict_data_cls.get_next_training_data( until_exhausted=True) if feature_data is None: print(" --- Data Exhausted --- ") the_curr_time = datetime.datetime.now().strftime('%X') print_string = "Time: {}".format(the_curr_time) print(print_string) break prediction_out = session.run([prediction], feed_dict={x: feature_data})[0] ticker = descriptive_df['ticker'].iloc[-1] data_date = descriptive_df['date'].iloc[-1] predictions_df.loc[predictions_df.shape[0]] = [ each_file, data_date, ticker, prediction_out[0][0] ] with open( _prediction_path + get_file_friendly_datetime_string() + "." + _prediction_filename, 'wt') as f: f.write(predictions_df.to_csv())
final_model = Model(input=[q, d_positive, d_negative], output=[merged]) # train sgd = SGD() print("compile network") final_model.compile(loss={'triple_sentence_vector': dssm_loss}, optimizer=sgd, sparse_input=True) # final_model.compile(loss={'triple_sentence_vector': dssm_loss}, optimizer=sgd, sparse_input=False) # final_model.compile(loss={'triple_sentence_vector': dssm_loss_v2}, optimizer=sgd, mode=NanGuardMode(nan_is_error=True, inf_is_error=False, big_is_error=False)) # final_model.compile(loss={'triple_sentence_vector': dssm_loss_v2}, optimizer=sgd, mode=theano.compile.MonitorMode(post_func=detect_nan)) # final_model.compile(loss={'triple_sentence_vector': dssm_loss_v2}, optimizer=sgd, mode=DebugMode(check_py_code=False)) # final_model.compile(loss={'triple_sentence_vector': dssm_loss_v2}, optimizer=sgd, mode=DebugMode(check_py_code=False)) for epoch in range(20): training_data_set =\ training_data.TrainingData(\ combined_feature_file_name=training_data_file_name,\ sample_size=sample_size) final_model.fit_generator( training_data_set.generate_sparse_training_data(negative_d_num), samples_per_epoch=1024 * 1024 * 16, nb_epoch=1, verbose=1) # final_model.fit_generator(training_data_set.generate_dense_training_data(negative_d_num), samples_per_epoch=1024*1024*16, nb_epoch=1, verbose=1) json_string = final_model.to_json() open('wec_model_architecture_v2.%d.json' % (epoch), 'w').write(json_string) final_model.save_weights('wec_model_weights_v2.%d.h5' % (epoch), overwrite=True)
def train(training_data_file_name="../data/combined_feature_1m", model_name="alpha", mini_batch_size=4096, eta=0.1, sample_size=64*1024): print_log("Initialize training data") training_data_set =\ training_data.TrainingData(\ combined_feature_file_name=training_data_file_name,\ sample_size=sample_size) negative_d_num = 4 train_q = theano.shared(sp.csr_matrix((sample_size, training_data_set.feature_num), dtype=theano.config.floatX), borrow=True) train_d = theano.shared(sp.csr_matrix(((negative_d_num+1)*sample_size, training_data_set.feature_num), dtype=theano.config.floatX), borrow=True) # train_q = theano.shared(np.asarray(np.empty((sample_size, training_data_set.feature_num)), dtype=theano.config.floatX), borrow=True) # train_d = theano.shared(np.asarray(np.empty(((negative_d_num+1)*sample_size, training_data_set.feature_num)), dtype=theano.config.floatX), borrow=True) q = TS.csr_matrix("q") d = TS.csr_matrix("d") # q = T.matrix("q") # d = T.matrix("d") print_log("Create training function") net = NetWork(q, d, training_data_set.feature_num, negative_d_num, mini_batch_size, hidden_layer_output_num=[300, 300, 128]) # indexes = generate_index(mini_batch_size, negative_d_num) # cost = net.cosine_layer.cost(indexes[0], indexes[1]) # cost = net.cosine_layer.cost_v2() cost = net.cosine_layer.cost_v3() test = net.cosine_layer.test_v3() watch_input = net.hidden_layers[0].watch_input() grads = theano.grad(cost, net.params) # print_log("%s" % pp(grads[0])) # f = theano.function([q], grads[0]) # pp(f.maker.fgraph.outputs[0]) # theano.printing.pydotprint(grads[0], outfile="symbolic_graph_unopt.png", var_with_name_simple=True) updates = [(param, param-eta*grad) for param, grad in zip(net.params, grads)] # updates = [(param, param-1*eta*grad) # for param, grad in zip(net.params[:1], grads[:1])] # updates += [(param, param-eta*grad) # for param, grad in zip(net.params[1:], grads[1:])] i = T.lscalar() train = theano.function([i], cost, updates = updates,\ givens = {q:train_q[i*mini_batch_size:(i+1)*mini_batch_size],\ d:train_d[i*mini_batch_size*(negative_d_num+1):(i+1)*mini_batch_size*(negative_d_num+1)]}) test = theano.function([i], test,\ givens = {q:train_q[i*mini_batch_size:(i+1)*mini_batch_size],\ d:train_d[i*mini_batch_size*(negative_d_num+1):(i+1)*mini_batch_size*(negative_d_num+1)]}) watch_input = theano.function([i], watch_input,\ givens = {q:train_q[i*mini_batch_size:(i+1)*mini_batch_size],\ d:train_d[i*mini_batch_size*(negative_d_num+1):(i+1)*mini_batch_size*(negative_d_num+1)]}) iteration = 0 epochs = 1000 for epoch in xrange(epochs): training_data_set.load_sparse_training_data_v4(train_q, train_d) # print_log("%s" % train_q.eval()[11]) # print_log("%s" % train_d.eval()[55]) # print_log("%s" % train_d.eval()[59]) while size(train_q) > 0: iteration += 1 num_training_batches = size(train_q)/mini_batch_size print_log("epoch: %d\titeration: %d" % (epoch, iteration)) print_log("num_training_batches: %d" % (num_training_batches)) print_log("eta: %f" % (eta)) # for i in range(0, len(net.params), 2): # print_log("layer %d w params:\n%s" % (i/2 + 1, np.asarray(net.params[i].eval()))) # print_log("layer %d b params:\n%s" % (i/2 + 1, np.asarray(net.params[i+1][:8].eval()))) # for i in range(0, len(net.params), 1): # print_log("layer %d w params:\n%s" % (i + 1, np.asarray(net.params[i].eval()))) loss = 0 for mini_batch_index in xrange(num_training_batches): if mini_batch_index == 0: output_test = test(0) print_log("test output:\n%s" % output_test[0]) print_log("validate:\t%f" % output_test[1]) continue # w_q, w_d = watch_input(mini_batch_index) # print_log("q\n%s" % w_q[11]) # print_log("pd\n%s" % w_d[55]) # print_log("nd\n%s" % w_d[59]) loss += train(mini_batch_index) print_log("loss:\t%f" % (loss / num_training_batches)) training_data_set.load_sparse_training_data_v4(train_q, train_d) model_file = open("../../model/dssm_%s_%d_%f_%d_%d.save" % (model_name, mini_batch_size, eta, epoch, iteration), "wb") cPickle.dump(net, model_file) model_file.close() training_data_set.clear()