def gather_features(data, feature_mask): mask = np.zeros(np.shape(data)[-1], dtype=bool) for i in range(len(mask)): if i in feature_mask: mask[i] = True else: mask[i] = False return data[:, mask] tf.reset_default_graph() c = conf.config('trial_cnn_cls').config['common'] sample_window = c['input_step'] + c['predict_step'] tv_gen = dp.train_validation_generaotr() f = tv_gen._load_data(c['src_file_path']) stock = tv_gen._selectData2array(f, ['0050'], None) train = stock[:769] validation = stock[769:] train_data = train train_label = train[:, -3:] validation_data = validation validation_label = validation[:, -3:]
def get_ens_model(lagday=5, model_temp=xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True)): print('**********Generate model for {} day***********'.format(lagday)) c = conf.config('trial_cnn_cls').config['common'] *_, meta = gu.read_metafile(c['meta_file_path']) tv_gen = dp.train_validation_generaotr() f = tv_gen._load_data(c['src_file_path']) data = tv_gen._selectData2array(f, f.index[:-4], None) data_velocity = (data[1:, 0:4] - data[:-1, 0:4]) / (data[:-1, 0:4] + 0.1) data = data[1:] train_sample = data[:-30] train_sample_v = data_velocity[:-30] flat_train_sample = np.reshape(np.transpose(train_sample, (0, 2, 1)), (-1, 94)) flat_train_sample_velocity = np.reshape( np.transpose(train_sample_v, (0, 2, 1)), (-1, 4)) test_sample = data[-30:] test_sample_v = data_velocity[-30:] flat_test_sample = np.reshape(np.transpose(test_sample, (0, 2, 1)), (-1, 94)) flat_test_sample_velocity = np.reshape( np.transpose(test_sample_v, (0, 2, 1)), (-1, 4)) # # flat_train_sample = train_data['train'] # flat_train_sample_velocity = train_data['train_velocity'] # # flat_test_sample = test_data['test'] # flat_test_sample_velocity = test_data['test_velocity'] fe_train = feature_extractor(flat_train_sample, flat_train_sample_velocity) d_ratio = fe_train.ratio() d_kdj_ratio = fe_train.kdj_ratio() d_ratio_velocity = fe_train.ratio_velocity() d_ud = fe_train.ud() d_kdj_macd_rssi_ratio = fe_train.kdj_macd_rssi_ratio() fe_test = feature_extractor(flat_test_sample, flat_test_sample_velocity) d_ratio_test = fe_test.ratio() d_kdj_ratio_test = fe_test.kdj_ratio() d_ratio_velocity_test = fe_test.ratio_velocity() d_ud_test = fe_test.ud() d_kdj_macd_rssi_ratio_test = fe_test.kdj_macd_rssi_ratio() train_label_raw = np.stack( (flat_train_sample[:, -3] + flat_train_sample[:, -2], flat_train_sample[:, -1]), axis=1) test_label_raw = np.stack( (flat_test_sample[:, -3] + flat_test_sample[:, -2], flat_test_sample[:, -1]), axis=1) model_dict = {} predict_dict = {} #*****ratio******** train, train_label = data_label_shift(d_ratio, train_label_raw, lag_day=lagday) test, test_label = data_label_shift(d_ratio_test, test_label_raw, lag_day=lagday) train_label = np.argmax(train_label, axis=-1) test_label = np.argmax(test_label, axis=-1) model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['ratio'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['ratio'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [ratio]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [ratio]: ", accuracy_score(y_xgb_v, test_label)) #*****kdj_ratio******** train = d_kdj_ratio[:-lagday] test = d_kdj_ratio_test[:-lagday] model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['kdj_ratio'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['kdj_ratio'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [kdj_ratio]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [kdj_ratio]: ", accuracy_score(y_xgb_v, test_label)) #*****ratio_velocity******** train = d_ratio_velocity[:-lagday] test = d_ratio_velocity_test[:-lagday] model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['ratio_velocity'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['ratio_velocity'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [ratio_velocity]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [ratio_velocity]: ", accuracy_score(y_xgb_v, test_label)) #*****ud******** train = d_ud[:-lagday] test = d_ud_test[:-lagday] model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['ud'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['ud'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [ud]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [ud]: ", accuracy_score(y_xgb_v, test_label)) #*****kdj_macd_rssi_ratio******** train = d_kdj_macd_rssi_ratio[:-lagday] test = d_kdj_macd_rssi_ratio_test[:-lagday] model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=500, silent=True) model.fit(train, train_label) model_dict['kdj_macd_rssi_ratio'] = model y_xgb_train = model.predict(train) y_xgb_v = model.predict(test) predict_dict['kdj_macd_rssi_ratio'] = [y_xgb_train, y_xgb_v] print("Train Accuracy [kdj_macd_rssi_ratio]: ", accuracy_score(y_xgb_train, train_label)) print("Validation Accuracy [kdj_macd_rssi_ratio]: ", accuracy_score(y_xgb_v, test_label)) #*********Generate assemble input*********** predict_train = [] predict_test = [] for k in predict_dict: predict_train.append(predict_dict[k][0]) predict_test.append(predict_dict[k][1]) predict_train = np.stack(predict_train, axis=1) predict_test = np.stack(predict_test, axis=1) model = xgb.XGBClassifier(max_depth=3, learning_rate=0.05, n_estimators=10, silent=True) model.fit(predict_train, train_label) model_dict['ensemble'] = model y_xgb_train_ens = model.predict(predict_train) y_xgb_v_ens = model.predict(predict_test) print("Train Accuracy [Ens]: ", accuracy_score(y_xgb_train_ens, train_label)) print("Validation Accuracy [Ens]: ", accuracy_score(y_xgb_v_ens, test_label)) return model_dict
import sys sys.path.append('../') import tensorflow as tf import hparam as conf import sessionWrapper as sesswrapper from utility import dataProcess as dp import model_zoo as mz import loss_func as l tf.reset_default_graph() c = conf.config( 'test_onlyEnc_biderect_gru_nospecialstock_cls').config['common'] sample_window = c['input_step'] + c['predict_step'] tv_gen = dp.train_validation_generaotr() train, validation, train_raw, validation_raw, _ = tv_gen.generate_train_val_set_mStock( c['src_file_path'], c['input_stocks'], c['input_step'], c['predict_step'], c['train_eval_ratio'], metafile=c['meta_file_path']) if c['feature_size'] == None: c['feature_size'] = train.shape[-1] #x = tf.placeholder(tf.float32, [None, c['input_step'], train.shape[-1]]) x = tf.placeholder(tf.float32, [None, c['input_step'], c['feature_size']]) y = tf.placeholder(tf.float32, [None, c['predict_step'], 3]) decoder_output = mz.model_zoo(c, x, y, dropout=0.6, is_train=True).decoder_output decoder_output_eval = mz.model_zoo(c, x, y, dropout=1.0,
import sessionWrapper as sesswrapper import data_process_list as dp import model_zoo as mz import loss_func as l import random import math import matplotlib.pyplot as plt import numpy as np from tqdm import tqdm train = [] validation = [] tf.reset_default_graph() c = conf.config('test_clsModel').config['common'] sample_step = c['input_step'] + c['predict_step'] rand = 0 for i in range(0, 38500, 35): rand = random.random() * 2 * math.pi # if rand >= 1: # rand = 0 # else: # rand += 0.1 # # rand = rand * 2 * math.pi if i < 35000:
rnd = random.randint(0, len(data_set) - sample_step) tmpbatch = np.reshape(data_set[rnd:rnd + sample_step, :], (1, sample_step, -1)) batch.append(tmpbatch) batch = np.squeeze(np.array(batch)) train, label = np.split(batch, [train_step], axis=1) if feature_size == None: feature_size = np.shape(train)[-1] train = train[:, :, :feature_size] label = label[:, :, 0] return train, label c = conf.config('baseline_random').config['common'] epoch = 0 batch_size = 32 while epoch < 100: epoch += 1 if c['sample_type'] != 'random': np.random.shuffle(t_3) #Cehck variable reused # tvars = tf.trainable_variables() # tvars_vals = sess.run(tvars) # for var, val in zip(tvars, tvars_vals): # print(var.name) # break
import init import tensorflow as tf import hparam as conf import sessionWrapper as sesswrapper import data_process_specialList as dp import model_zoo as mz import loss_func as l import random import math import matplotlib.pyplot as plt import numpy as np from tqdm import tqdm tv_gen = dp.train_validation_generaotr() c = conf.config('test_sampleMethod').config['common'] process_data = tv_gen._load_data(c['src_file_path']) select_data = tv_gen._selectData2array(process_data, c['input_stocks'], None) #Test _selectData2array function is match with raw_data with shape (Period, features, c['input_stocks']) for sIdx in range(len(c['input_stocks'])): raw_data = process_data.loc[c['input_stocks'][sIdx]] selected_data = select_data[:, :, sIdx] test_result = [] for i in range(len(selected_data)): test_result.append((raw_data.iloc[i] == selected_data[i]).all()) if np.array(test_result).any() == True:
import data_process_list as dp import tensorflow as tf import os import model_zoo as mz def map_ud(softmax_output): ud_meta = {0:-1, 1:0, 2:1} ud_index = np.argmax(softmax_output, axis=-1) ud = [ud_meta[v] for v in ud_index] return ud c = conf.config('baseline_2in1').config['common'] stocks = ['0050', '0051', '0052', '0053', '0054', '0055', '0056', '0057', '0058', '0059', '006201', '006203', '006204', '006208', '00690', '00692', '00701', '00713'] tv_gen = dp.train_validation_generaotr() if c['sample_type'] == 'random' : tv_gen.generate_train_val_set = tv_gen.generate_train_val_set_random eval_set = tv_gen.generate_test_set(c['src_file_path'], stocks, c['input_step']) def load_ckpt(saver, sess, checkpoint_dir, ckpt_name=""): """ Load the checkpoint. According to the scale, read different folder to load the models. """ print(" [*] Reading checkpoints...")
batch.append(tmpbatch) batch = np.squeeze(np.array(batch)) train, label = np.split(batch, [train_step], axis=1) if feature_size == None: feature_size = np.shape(train)[-1] #train = np.reshape(train[:,:,3], (batch_size, train_step, -1)) train = train[:, :, :feature_size] label = label[:, :, 56:] return train, label tf.reset_default_graph() c = conf.config('test_onlyEnc_biderect_gru_cls').config['common'] #c['src_file_path'] = '../Data/all_feature_data.pkl' tv_gen = dp.train_validation_generaotr() if 'random' in c['sample_type']: tv_gen.generate_train_val_set = tv_gen.generate_train_val_set_random train, validation = tv_gen.generate_train_val_set( c['src_file_path'], c['input_stocks'], c['input_step'], c['predict_step'], c['train_eval_ratio'], c['train_period']) Ndata = len(train) @ray.remote(num_gpus=1) def Enc_gru_cls(conf, steps, train, validation, weights=None): c['input_step'] = conf['input_step']
import init import hparam as conf import tensorflow as tf import data_process as dp import os import model_zoo as mz c = conf.config('baseline').config['common'] c['test_period'] = ['20130102', '20130402'] tv_gen = dp.train_validation_generaotr() evalSet, _ = tv_gen.generate_train_val_set(c['src_file_path'], c['input_stocks'], c['input_step'], c['predict_step'], 0.0, c['test_period']) c['batch_size'] = len(evalSet) def load_ckpt(saver, sess, checkpoint_dir, ckpt_name=""): """ Load the checkpoint. According to the scale, read different folder to load the models. """ print(" [*] Reading checkpoints...") print(checkpoint_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path:
sys.path.append('../') import tensorflow as tf import hparam as conf import sessionWrapper as sesswrapper import data_process_list as dp import model_zoo as mz def l2loss(x, y): loss = tf.reduce_mean(tf.nn.l2_loss(x - y)) return loss c = conf.config('sin_test').config['common'] #tv_gen = dp.train_validation_generaotr() #train, validation = tv_gen.generate_train_val_set(c['src_file_path'], c['input_stocks'], c['input_step'], c['predict_step'], c['train_eval_ratio'], c['train_period']) import random import math import numpy as np train = [] validation = [] for i in range(0, 38500, 35): rand = random.random() * 2 * math.pi if i < 35000: