def _saveIndividualFeatureImportance(self, sess, dl, logdir, initial_state=None, delta=1e-6): w = self.getWeightWithData(sess, dl, initial_state=initial_state) gradients = np.zeros(shape=(self._individual_feature_dim)) time_start = time.time() for _, (I_macro, I, R, mask) in enumerate(dl.iterateOneEpoch(subEpoch=False)): for idx in range(self._individual_feature_dim): I_copy = copy.deepcopy(I) I_copy[mask, idx] += delta feed_dict = {self._I_macro_placeholder:I_macro, self._I_placeholder:I_copy, self._R_placeholder:R, self._mask_placeholder:mask, self._dropout_placeholder:1.0} if self.model_params['use_rnn']: feed_dict[self._initial_state_placeholder] = initial_state w_idx, = sess.run(fetches=[self._w], feed_dict=feed_dict) gradients[idx] = np.mean(np.absolute(w_idx - w)) time_last = time.time() - time_start time_est = time_last / (idx+1) * self._individual_feature_dim deco_print('Calculating VI for %s\tElapse / Estimate: %.2fs / %.2fs' %(dl.getIndividualFeatureByIdx(idx), time_last, time_est)) gradients /= delta deco_print('Saving output in %s' %os.path.join(logdir, 'ave_absolute_gradient.npy')) np.save(os.path.join(logdir, 'ave_absolute_gradient.npy'), gradients)
def _build_forward_pass_graph(self): if self.model_params['use_rnn']: with tf.variable_scope('RNN_Layer'): rnn_cell = create_rnn_cell( cell_type=self.model_params['cell_type_rnn'], num_units=self.model_params['num_units_rnn'], num_layers=self.model_params['num_layers_rnn'], dp_input_keep_prob=self._dropout_placeholder, dp_output_keep_prob=1.0) rnn_outputs, rnn_state = tf.nn.dynamic_rnn( cell=rnn_cell, inputs=self._rnn_input, initial_state=self._initial_state, dtype=tf.float32) self._macro_nn_input = tf.squeeze(rnn_outputs, axis=0) if self.model_params['cell_type_rnn'] == 'lstm': if self.model_params['num_layers_rnn'] == 1: self._rnn_last_state = tf.concat([rnn_state.c, rnn_state.h], axis=1) else: self._rnn_last_state = tf.concat([tf.concat([state_tuple.c, state_tuple.h], axis=1) for state_tuple in rnn_state], axis=1) else: if self.model_params['num_layers_rnn'] == 1: self._rnn_last_state = rnn_state else: self._rnn_last_state = tf.concat(rnn_state, axis=1) else: self._macro_nn_input = self._I_macro_placeholder with tf.variable_scope('NN_Layer'): I_macro_tile = tf.tile(tf.expand_dims(self._macro_nn_input, axis=1), [1,self._nSize,1]) # T * N * macro_feature_dim I_macro_masked = tf.boolean_mask(I_macro_tile, mask=self._mask_placeholder) I_masked = tf.boolean_mask(self._I_placeholder, mask=self._mask_placeholder) I_concat = tf.concat([I_masked, I_macro_masked], axis=1) # None * (macro_feature_dim + individual_feature_dim) R_masked = tf.boolean_mask(self._R_placeholder, mask=self._mask_placeholder) h_l = I_concat for l in range(self.model_params['num_layers']): with tf.variable_scope('dense_layer_%d' %l): layer_l = Dense(units=self.model_params['hidden_dim'][l], activation=tf.nn.relu) h_l = layer_l(h_l) h_l = tf.nn.dropout(h_l, self._dropout_placeholder) with tf.variable_scope('last_dense_layer'): layer = Dense(units=1) w = layer(h_l) self._w = tf.reshape(w, shape=[-1]) weighted_R_masked = R_masked * self._w N_i = tf.reduce_sum(tf.to_int32(self._mask_placeholder), axis=1) # len T weighted_R_split = tf.split(weighted_R_masked, num_or_size_splits=N_i) if 'normalize_w' in self.model_params and self.model_params['normalize_w']: deco_print('Normalize weight by N!') N_bar = tf.reduce_mean(N_i) self._SDF = tf.expand_dims(tf.concat([tf.reduce_sum(item, keepdims=True) for item in weighted_R_split], axis=0) / tf.to_float(N_i) * tf.to_float(N_bar), axis=1) + 1 else: self._SDF = tf.expand_dims(tf.concat([tf.reduce_sum(item, keepdims=True) for item in weighted_R_split], axis=0), axis=1) + 1
def _build_train_op(self, loss, scope, loss_factor=1.0): """Construct a training op. Arguments: loss: Scalar 'Tensor' """ ### Trainable variables deco_print('Trainable variables (scope=%s)' % scope) total_params = 0 trainable_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) for var in trainable_variables: var_params = 1 for dim in var.get_shape(): var_params *= dim.value total_params += var_params print('Name: {} and shape: {}'.format(var.name, var.get_shape())) deco_print('Number of parameters: %d' % total_params) ### Train optimizer if self._model_params['optimizer'] == 'Momentum': optimizer = lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.9) elif self._model_params['optimizer'] == 'AdaDelta': optimizer = lambda lr: tf.train.AdadeltaOptimizer( lr, rho=0.95, epsilon=1e-08) else: optimizer = self._model_params['optimizer'] ### Learning rate decay if 'use_decay' in self._model_params and self._model_params[ 'use_decay'] == True: learning_rate_decay_fn = lambda lr, global_step: tf.train.exponential_decay( learning_rate=lr, global_step=global_step, decay_steps=self._model_params['decay_steps'], decay_rate=self._model_params['decay_rate'], staircase=True) else: learning_rate_decay_fn = None return tf.contrib.layers.optimize_loss( loss=loss * loss_factor, global_step=self._global_step, learning_rate=self._model_params['learning_rate'], optimizer=optimizer, gradient_noise_scale=None, gradient_multipliers=None, clip_gradients=self._model_params['max_grad_norm'] if 'max_grad_norm' in self._model_params else None, learning_rate_decay_fn=learning_rate_decay_fn, update_ops=None, variables=trainable_variables, name=None, summaries=None, colocate_gradients_with_ops=True, increment_global_step=True)
def loadSavedModel(self, sess, logdir): if tf.train.latest_checkpoint(logdir) is not None: saver = tf.train.Saver(max_to_keep=100) saver.restore(sess, tf.train.latest_checkpoint(logdir)) deco_print('Restored checkpoint') else: deco_print( 'WARNING: Checkpoint not found! Use random initialization! ') self.randomInitialization(sess)
def loadSavedModel(self, nFactors): if os.path.exists(os.path.join(self._logdir, 'model_%d.npz' % nFactors)): tmp = np.load(os.path.join(self._logdir, 'model_%d.npz' % nFactors)) self._Lam = tmp['lam'] self._w = tmp['w'] self._k = tmp['k'] deco_print('Model Restored! ') else: self._Lam, self._w, self._k = None, None, None deco_print('WARNING: Model Not Found! ')
def _load_macro_feature(self, pathMacroFeature, macroIdx=None, meanMacroFeature=None, stdMacroFeature=None, normalizeMacroFeature=True): if pathMacroFeature is None: self._macroFeature = np.empty(shape=[self._dateCount, 0]) self._meanMacroFeature = None self._stdMacroFeature = None else: tmp = np.load(pathMacroFeature) if macroIdx is None: macro_idx = np.arange(len(tmp['variable'])) elif macroIdx == 'all': macro_idx = np.arange(len(tmp['variable'])) elif type(macroIdx) is list: macro_idx = np.sort(np.array(macroIdx, dtype=int)) elif macroIdx == '178': macro_idx = np.sort( np.concatenate((np.arange(124), np.arange(284, 338)))) else: macro_idx = [] deco_print( 'WARNING: macroIdx not supported! Use no macro variables. ' ) self._macroFeature = tmp['data'][:, macro_idx] if normalizeMacroFeature: if meanMacroFeature is None or stdMacroFeature is None: self._meanMacroFeature = self._macroFeature.mean(axis=0) self._stdMacroFeature = self._macroFeature.std(axis=0) else: self._meanMacroFeature = meanMacroFeature self._stdMacroFeature = stdMacroFeature self._macroFeature -= self._meanMacroFeature self._macroFeature /= self._stdMacroFeature else: self._meanMacroFeature = None self._stdMacroFeature = None self._idx2var_macro, self._var2idx_macro = self._create_var_idx_associations( tmp['variable'][macro_idx]) self._varCount_macro = self._macroFeature.shape[1]
def train(self, sess, model_valid, numEpoch=128, subEpoch=32): saver = tf.train.Saver(max_to_keep=128) if os.path.exists(self._logdir_nFactor): os.system('rm -rf %s' % self._logdir_nFactor) best_loss = float('inf') time_start = time.time() for epoch in range(numEpoch): deco_print('Doing Epoch %d' % epoch) for _ in range(subEpoch): feed_dict_train = { self._I_placeholder: self._I_data, self._R_placeholder: self._R_data, self._splits_placeholder: self._splits_data, self._dropout_placeholder: self._dropout } sess.run(fetches=[self._train_op], feed_dict=feed_dict_train) loss_train_epoch = self.evalLoss(sess) loss_valid_epoch = model_valid.evalLoss(sess) if loss_valid_epoch < best_loss: best_loss = loss_valid_epoch deco_print('Saving current best checkpoint') saver.save(sess, save_path=os.path.join(self._logdir_nFactor, 'model-best')) time_elapse = time.time() - time_start time_est = time_elapse / (epoch + 1) * numEpoch deco_print('Epoch %d Train Loss: %0.4f' % (epoch, loss_train_epoch)) deco_print('Epoch %d Valid Loss: %0.4f' % (epoch, loss_valid_epoch)) deco_print('Epoch %d Elapse/Estimate: %0.2fs/%0.2fs' % (epoch, time_elapse, time_est)) print('\n')
def main(_): with open(FLAGS.config, 'r') as file: config = json.load(file) deco_print('Read the following in config: ') print(json.dumps(config, indent=4)) deco_print('Creating data layer') dl_train = DataInRamInputLayer(config['individual_feature_file']) dl_valid = DataInRamInputLayer(config['individual_feature_file_valid']) dl_test = DataInRamInputLayer(config['individual_feature_file_test']) deco_print('Data layer created') model = ModelIPCA_GDFFN(individual_feature_dim=config['individual_feature_dim'], tSize=config['tSize_train'], hidden_dims=config['hidden_dims'], nFactor=FLAGS.nFactor, lr=config['lr'], dropout=config['dropout'], logdir=FLAGS.logdir, dl=dl_train, is_train=True) model_valid = ModelIPCA_GDFFN(individual_feature_dim=config['individual_feature_dim'], tSize=config['tSize_valid'], hidden_dims=config['hidden_dims'], nFactor=FLAGS.nFactor, lr=config['lr'], dropout=config['dropout'], logdir=FLAGS.logdir, dl=dl_valid, is_train=False, force_var_reuse=True) gpu_options = tf.GPUOptions(allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=sess_config) if FLAGS.isTrain: model.randomInitialization(sess) model.train(sess, model_valid, numEpoch=config['num_epoch'], subEpoch=config['sub_epoch']) model_test = ModelIPCA_GDFFN(individual_feature_dim=config['individual_feature_dim'], tSize=config['tSize_test'], hidden_dims=config['hidden_dims'], nFactor=FLAGS.nFactor, lr=config['lr'], dropout=config['dropout'], logdir=FLAGS.logdir, dl=dl_test, is_train=False, force_var_reuse=True) model.loadSavedModel(sess) w = model.getMarkowitzWeight(sess) stats = pd.DataFrame(np.zeros((4,3)), columns=['train', 'valid', 'test'], index=['SR', 'UV', 'Alpha', 'Alpha_weighted']) stats.loc[:,'train'] = model.calculateStatistics(sess, w) stats.loc[:,'valid'] = model_valid.calculateStatistics(sess, w) stats.loc[:,'test'] = model_test.calculateStatistics(sess, w) print(stats)
def randomInitialization(self, sess): sess.run(tf.global_variables_initializer()) deco_print('Random initialization')
config = json.load(f) config['global_batch_size'] = config['num_gpus'] * config['batch_size_per_gpu'] if 'selected_covariate_file_int' in config and config['selected_covariate_file_int'] and \ 'selected_covariate_file_float' in config and config['selected_covariate_file_float']: with open(config['selected_covariate_file_int'], 'r') as f: selected_int = list(json.load(f).values()) with open(config['selected_covariate_file_float'], 'r') as f: selected_float = list(json.load(f).values()) config['feature_dim_rnn'] = len(selected_int) + len(selected_float) config['feature_dim_ff'] = 291 - config['feature_dim_rnn'] else: selected_int = False selected_float = False config['feature_dim_rnn'] = 291 config['feature_dim_ff'] = 0 deco_print('Read Following Config') deco_print_dict(config) ### ### Create Data Layer deco_print('Creating Data Layer...') path = os.path.join(os.path.expanduser('~'), 'data/RNNdata') if FLAGS.dataset == 'subprime': path = os.path.join(path, 'subprime_new') elif FLAGS.dataset == 'prime': path = os.path.join(path, 'prime_new') elif FLAGS.dataset == 'all': path = os.path.join(path, 'prime_subprime_new') else: raise ValueError('Dataset Not Found! ')
from matplotlib.ticker import LinearLocator, FormatStrFormatter from src.utils import deco_print, deco_print_dict, decide_boundary, construct_nonlinear_function from src.model import Config, Model from src.data_layer import DataInRamInputLayer tf.flags.DEFINE_string('logdir', '', 'Path to save logs and checkpoints') tf.flags.DEFINE_string('model', 'neural', 'neural/logistic') tf.flags.DEFINE_string( 'task', '', 'Task: 1d_nonlinear/2d_nonlinear/2d_contour/3d_contour/3d_contour_slice') tf.flags.DEFINE_string('plot_out', '', 'Path to save plots') FLAGS = tf.flags.FLAGS ### Create Data Layer deco_print('Creating Data Layer') path = os.path.join(os.path.expanduser('~'), 'data/vol/Numpy_data_subprime_Test_new') mode = 'analysis' dl = DataInRamInputLayer(path=path, shuffle=False, load_file_list=False) deco_print('Data Layer Created') ### ### Create Model deco_print('Creating Model') if FLAGS.model == 'neural': config = Config(feature_dim=291, num_category=7, batch_size=1, dropout=1.0) elif FLAGS.model == 'logistic': config = Config(feature_dim=291, num_category=7, hidden_dim=[],
def train(self, sess, dl, dl_valid, logdir, model_valid, loss_weight=None, loss_weight_valid=None, dl_test=None, model_test=None, loss_weight_test=None, printOnConsole=True, printFreq=128, saveLog=True, saveBestFreq=128, ignoreEpoch=64): if self._mode != 'train': deco_print('ERROR: Model has no train op! ') else: ### validation on loss and sharpe logdir_loss = os.path.join(logdir, 'loss') logdir_sharpe = os.path.join(logdir, 'sharpe') os.system('mkdir -p ' + logdir_loss) os.system('mkdir -p ' + logdir_sharpe) ### saver = tf.train.Saver(max_to_keep=100) if saveLog: sw = tf.summary.FileWriter(logdir, sess.graph) best_valid_loss_unc = float('inf') best_valid_loss = float('inf') best_valid_sharpe_unc = float('-inf') best_valid_sharpe = float('-inf') sharpe_train = [] sharpe_valid = [] ### evaluate test data evaluate_test_data = False if dl_test is not None and model_test is not None: evaluate_test_data = True sharpe_test = [] if self.model_params['use_rnn']: INITIAL_train = self.getZeroInitialState() else: INITIAL_train = None ### train unconditional loss time_start = time.time() deco_print('Start Training Unconditional Loss...') for epoch in range(self.model_params['num_epochs_unc']): for _, (I_macro, I, R, mask) in enumerate(dl.iterateOneEpoch(subEpoch=self.model_params['sub_epoch'])): feed_dict = {self._I_macro_placeholder:I_macro, self._I_placeholder:I, self._R_placeholder:R, self._mask_placeholder:mask, self._dropout_placeholder:self.model_params['dropout']} if self.model_params['weighted_loss']: feed_dict[self._loss_weight] = loss_weight if self.model_params['use_rnn']: feed_dict[self._initial_state_placeholder] = INITIAL_train sess.run(fetches=[self._train_model_op_unc], feed_dict=feed_dict) ### evaluate train loss / sharpe train_epoch_loss, INITIAL_valid = self.evaluate_loss(sess, dl, INITIAL_train, loss_weight) train_epoch_loss_residual = self.evaluate_loss_residual(sess, dl, INITIAL_train) train_epoch_sharpe = self.evaluate_sharpe(sess, dl, INITIAL_train) sharpe_train.append(train_epoch_sharpe) ### evaluate valid loss / sharpe valid_epoch_loss, INITIAL_test = model_valid.evaluate_loss(sess, dl_valid, INITIAL_valid, loss_weight_valid) valid_epoch_loss_residual = model_valid.evaluate_loss_residual(sess, dl_valid, INITIAL_valid) valid_epoch_sharpe = model_valid.evaluate_sharpe(sess, dl_valid, INITIAL_valid) sharpe_valid.append(valid_epoch_sharpe) ### evaluate test loss / sharpe if evaluate_test_data: test_epoch_loss, _ = model_test.evaluate_loss(sess, dl_test, INITIAL_test, loss_weight_test) test_epoch_loss_residual = model_test.evaluate_loss_residual(sess, dl_test, INITIAL_test) test_epoch_sharpe = model_test.evaluate_sharpe(sess, dl_test, INITIAL_test) sharpe_test.append(test_epoch_sharpe) ### print loss / sharpe if printOnConsole and epoch % printFreq == 0: print('\n\n') deco_print('Doint epoch %d' %epoch) if evaluate_test_data: deco_print('Epoch %d train/valid/test loss: %0.4f/%0.4f/%0.4f' %(epoch, train_epoch_loss, valid_epoch_loss, test_epoch_loss)) deco_print('Epoch %d train/valid/test loss (residual): %0.4f/%0.4f/%0.4f' %(epoch, train_epoch_loss_residual, valid_epoch_loss_residual, test_epoch_loss_residual)) deco_print('Epoch %d train/valid/test sharpe: %0.4f/%0.4f/%0.4f' %(epoch, train_epoch_sharpe, valid_epoch_sharpe, test_epoch_sharpe)) else: deco_print('Epoch %d train/valid loss: %0.4f/%0.4f' %(epoch, train_epoch_loss, valid_epoch_loss)) deco_print('Epoch %d train/valid loss (residual): %0.4f/%0.4f' %(epoch, train_epoch_loss_residual, valid_epoch_loss_residual)) deco_print('Epoch %d train/valid sharpe: %0.4f/%0.4f' %(epoch, train_epoch_sharpe, valid_epoch_sharpe)) if saveLog: value_loss_train = summary_pb2.Summary.Value(tag='Train_epoch_loss', simple_value=train_epoch_loss) value_loss_residual_train = summary_pb2.Summary.Value(tag='Train_epoch_loss_residual', simple_value=train_epoch_loss_residual) value_loss_valid = summary_pb2.Summary.Value(tag='Valid_epoch_loss', simple_value=valid_epoch_loss) value_loss_residual_valid = summary_pb2.Summary.Value(tag='Valid_epoch_loss_residual', simple_value=valid_epoch_loss_residual) value_sharpe_train = summary_pb2.Summary.Value(tag='Train_epoch_sharpe', simple_value=train_epoch_sharpe) value_sharpe_valid = summary_pb2.Summary.Value(tag='Valid_epoch_sharpe', simple_value=valid_epoch_sharpe) if evaluate_test_data: value_loss_test = summary_pb2.Summary.Value(tag='Test_epoch_loss', simple_value=test_epoch_loss) value_loss_residual_test = summary_pb2.Summary.Value(tag='Test_epoch_loss_residual', simple_value=test_epoch_loss_residual) value_sharpe_test = summary_pb2.Summary.Value(tag='Test_epoch_sharpe', simple_value=test_epoch_sharpe) summary = summary_pb2.Summary(value=[value_loss_train, value_loss_valid, value_loss_test, value_loss_residual_train, value_loss_residual_valid, value_loss_residual_test, value_sharpe_train, value_sharpe_valid, value_sharpe_test]) else: summary = summary_pb2.Summary(value=[value_loss_train, value_loss_valid, value_loss_residual_train, value_loss_residual_valid, value_sharpe_train, value_sharpe_valid]) sw.add_summary(summary, global_step=epoch) sw.flush() ### save epoch if epoch > ignoreEpoch: if valid_epoch_loss < best_valid_loss_unc: best_valid_loss_unc = valid_epoch_loss if printOnConsole and epoch % printFreq == 0: deco_print('Saving current best checkpoint (loss)') saver.save(sess, save_path=os.path.join(logdir_loss, 'model-best')) if valid_epoch_sharpe > best_valid_sharpe_unc: best_valid_sharpe_unc = valid_epoch_sharpe if printOnConsole and epoch % printFreq == 0: deco_print('Saving current best checkpoint (sharpe)') saver.save(sess, save_path=os.path.join(logdir_sharpe, 'model-best')) if saveBestFreq > 0 and (epoch+1) % saveBestFreq == 0: path_epoch_loss = os.path.join(logdir_loss,'UNC',str(epoch)) path_best_loss = os.path.join(logdir_loss, 'model-best*') path_best_checkpoint_loss = os.path.join(logdir_loss, 'checkpoint') os.system('mkdir -p ' + path_epoch_loss) os.system('cp %s %s' %(path_best_loss, path_epoch_loss)) os.system('cp %s %s' %(path_best_checkpoint_loss, path_epoch_loss)) path_epoch_sharpe = os.path.join(logdir_sharpe,'UNC',str(epoch)) path_best_sharpe = os.path.join(logdir_sharpe, 'model-best*') path_best_checkpoint_sharpe = os.path.join(logdir_sharpe, 'checkpoint') os.system('mkdir -p ' + path_epoch_sharpe) os.system('cp %s %s' %(path_best_sharpe, path_epoch_sharpe)) os.system('cp %s %s' %(path_best_checkpoint_sharpe, path_epoch_sharpe)) ### time if printOnConsole and epoch % printFreq == 0: time_elapse = time.time() - time_start time_est = time_elapse / (epoch+1) * self.model_params['num_epochs_unc'] deco_print('Epoch %d Elapse/Estimate: %0.2fs/%0.2fs' %(epoch, time_elapse, time_est)) deco_print('Training Unconditional Loss Finished!\n') ### update moment condition deco_print('Start Updating Moment Conditions...') self.loadSavedModel(sess, logdir_loss) for _, (I_macro, I, R, mask) in enumerate(dl.iterateOneEpoch(subEpoch=self.model_params['sub_epoch'])): best_moment_loss = float('-inf') feed_dict = {self._I_macro_placeholder:I_macro, self._I_placeholder:I, self._R_placeholder:R, self._mask_placeholder:mask, self._dropout_placeholder:self.model_params['dropout']} if self.model_params['weighted_loss']: feed_dict[self._loss_weight] = loss_weight if self.model_params['use_rnn']: feed_dict[self._initial_state_placeholder] = INITIAL_train for epoch in range(self.model_params['num_epochs_moment']): _, loss = sess.run(fetches=[self._update_moment_op, self._loss], feed_dict=feed_dict) if loss > best_moment_loss: best_moment_loss = loss if printOnConsole and epoch % printFreq == 0: deco_print('Saving current best checkpoint (epoch %d)' %epoch) saver.save(sess, save_path=os.path.join(logdir_loss, 'model-best')) deco_print('Updating Moment Conditions Finished!\n') ### train conditional loss time_start = time.time() deco_print('Start Training Conditional Loss...') self.loadSavedModel(sess, logdir_loss) for epoch in range(self.model_params['num_epochs']): for _, (I_macro, I, R, mask) in enumerate(dl.iterateOneEpoch(subEpoch=self.model_params['sub_epoch'])): feed_dict = {self._I_macro_placeholder:I_macro, self._I_placeholder:I, self._R_placeholder:R, self._mask_placeholder:mask, self._dropout_placeholder:self.model_params['dropout']} if self.model_params['weighted_loss']: feed_dict[self._loss_weight] = loss_weight if self.model_params['use_rnn']: feed_dict[self._initial_state_placeholder] = INITIAL_train sess.run(fetches=[self._train_model_op], feed_dict=feed_dict) ### evaluate train loss / sharpe train_epoch_loss, INITIAL_valid = self.evaluate_loss(sess, dl, INITIAL_train, loss_weight) train_epoch_loss_residual = self.evaluate_loss_residual(sess, dl, INITIAL_train) train_epoch_sharpe = self.evaluate_sharpe(sess, dl, INITIAL_train) sharpe_train.append(train_epoch_sharpe) ### evaluate valid loss / sharpe valid_epoch_loss, INITIAL_test = model_valid.evaluate_loss(sess, dl_valid, INITIAL_valid, loss_weight_valid) valid_epoch_loss_residual = model_valid.evaluate_loss_residual(sess, dl_valid, INITIAL_valid) valid_epoch_sharpe = model_valid.evaluate_sharpe(sess, dl_valid, INITIAL_valid) sharpe_valid.append(valid_epoch_sharpe) ### evaluate test loss / sharpe if evaluate_test_data: test_epoch_loss, _ = model_test.evaluate_loss(sess, dl_test, INITIAL_test, loss_weight_test) test_epoch_loss_residual = model_test.evaluate_loss_residual(sess, dl_test, INITIAL_test) test_epoch_sharpe = model_test.evaluate_sharpe(sess, dl_test, INITIAL_test) sharpe_test.append(test_epoch_sharpe) ### print loss / sharpe if printOnConsole and epoch % printFreq == 0: print('\n\n') deco_print('Doint epoch %d' %epoch) if evaluate_test_data: deco_print('Epoch %d train/valid/test loss: %0.4f/%0.4f/%0.4f' %(epoch, train_epoch_loss, valid_epoch_loss, test_epoch_loss)) deco_print('Epoch %d train/valid/test loss (residual): %0.4f/%0.4f/%0.4f' %(epoch, train_epoch_loss_residual, valid_epoch_loss_residual, test_epoch_loss_residual)) deco_print('Epoch %d train/valid/test sharpe: %0.4f/%0.4f/%0.4f' %(epoch, train_epoch_sharpe, valid_epoch_sharpe, test_epoch_sharpe)) else: deco_print('Epoch %d train/valid loss: %0.4f/%0.4f' %(epoch, train_epoch_loss, valid_epoch_loss)) deco_print('Epoch %d train/valid loss (residual): %0.4f/%0.4f' %(epoch, train_epoch_loss_residual, valid_epoch_loss_residual)) deco_print('Epoch %d train/valid sharpe: %0.4f/%0.4f' %(epoch, train_epoch_sharpe, valid_epoch_sharpe)) if saveLog: value_loss_train = summary_pb2.Summary.Value(tag='Train_epoch_loss', simple_value=train_epoch_loss) value_loss_residual_train = summary_pb2.Summary.Value(tag='Train_epoch_loss_residual', simple_value=train_epoch_loss_residual) value_loss_valid = summary_pb2.Summary.Value(tag='Valid_epoch_loss', simple_value=valid_epoch_loss) value_loss_residual_valid = summary_pb2.Summary.Value(tag='Valid_epoch_loss_residual', simple_value=valid_epoch_loss_residual) value_sharpe_train = summary_pb2.Summary.Value(tag='Train_epoch_sharpe', simple_value=train_epoch_sharpe) value_sharpe_valid = summary_pb2.Summary.Value(tag='Valid_epoch_sharpe', simple_value=valid_epoch_sharpe) if evaluate_test_data: value_loss_test = summary_pb2.Summary.Value(tag='Test_epoch_loss', simple_value=test_epoch_loss) value_loss_residual_test = summary_pb2.Summary.Value(tag='Test_epoch_loss_residual', simple_value=test_epoch_loss_residual) value_sharpe_test = summary_pb2.Summary.Value(tag='Test_epoch_sharpe', simple_value=test_epoch_sharpe) summary = summary_pb2.Summary(value=[value_loss_train, value_loss_valid, value_loss_test, value_loss_residual_train, value_loss_residual_valid, value_loss_residual_test, value_sharpe_train, value_sharpe_valid, value_sharpe_test]) else: summary = summary_pb2.Summary(value=[value_loss_train, value_loss_valid, value_loss_residual_train, value_loss_residual_valid, value_sharpe_train, value_sharpe_valid]) sw.add_summary(summary, global_step=epoch+self.model_params['num_epochs_unc']) sw.flush() ### save epoch if epoch > ignoreEpoch: if valid_epoch_loss < best_valid_loss: best_valid_loss = valid_epoch_loss if printOnConsole and epoch % printFreq == 0: deco_print('Saving current best checkpoint (loss)') saver.save(sess, save_path=os.path.join(logdir_loss, 'model-best')) if valid_epoch_sharpe > best_valid_sharpe: best_valid_sharpe = valid_epoch_sharpe if printOnConsole and epoch % printFreq == 0: deco_print('Saving current best checkpoint (sharpe)') saver.save(sess, save_path=os.path.join(logdir_sharpe, 'model-best')) if saveBestFreq > 0 and (epoch+1) % saveBestFreq == 0: path_epoch_loss = os.path.join(logdir_loss,'GAN',str(epoch)) path_best_loss = os.path.join(logdir_loss, 'model-best*') path_best_checkpoint_loss = os.path.join(logdir_loss, 'checkpoint') os.system('mkdir -p ' + path_epoch_loss) os.system('cp %s %s' %(path_best_loss, path_epoch_loss)) os.system('cp %s %s' %(path_best_checkpoint_loss, path_epoch_loss)) path_epoch_sharpe = os.path.join(logdir_sharpe,'GAN',str(epoch)) path_best_sharpe = os.path.join(logdir_sharpe, 'model-best*') path_best_checkpoint_sharpe = os.path.join(logdir_sharpe, 'checkpoint') os.system('mkdir -p ' + path_epoch_sharpe) os.system('cp %s %s' %(path_best_sharpe, path_epoch_sharpe)) os.system('cp %s %s' %(path_best_checkpoint_sharpe, path_epoch_sharpe)) ### time if printOnConsole and epoch % printFreq == 0: time_elapse = time.time() - time_start time_est = time_elapse / (epoch+1) * self.model_params['num_epochs'] deco_print('Epoch %d Elapse/Estimate: %0.2fs/%0.2fs' %(epoch, time_elapse, time_est)) deco_print('Training Conditional Loss Finished!\n') ### save last epoch deco_print('Saving last checkpoint') saver.save(sess, save_path=os.path.join(logdir, 'model-last')) if evaluate_test_data: return sharpe_train, sharpe_valid, sharpe_test else: return sharpe_train, sharpe_valid
def main(_): with open(FLAGS.config + '/config_RF_' + FLAGS.task_id + '.json', 'r') as file: config = json.load(file) deco_print('Read the following in config: ') print(json.dumps(config, indent=4)) deco_print('Creating data layer') dl = data_layer.DataInRamInputLayer( config['individual_feature_file'], pathMacroFeature=config['macro_feature_file'], macroIdx=config['macro_idx']) meanMacroFeature, stdMacroFeature = dl.getMacroFeatureMeanStd() dl_valid = data_layer.DataInRamInputLayer( config['individual_feature_file_valid'], pathMacroFeature=config['macro_feature_file_valid'], macroIdx=config['macro_idx'], meanMacroFeature=meanMacroFeature, stdMacroFeature=stdMacroFeature) dl_test = data_layer.DataInRamInputLayer( config['individual_feature_file_test'], pathMacroFeature=config['macro_feature_file_test'], macroIdx=config['macro_idx'], meanMacroFeature=meanMacroFeature, stdMacroFeature=stdMacroFeature) if config['weighted_loss']: loss_weight = dl.getDateCountList() loss_weight_valid = dl_valid.getDateCountList() loss_weight_test = dl_test.getDateCountList() else: loss_weight = None loss_weight_valid = None loss_weight_test = None deco_print('Data layer created') tf.reset_default_graph() global_step = tf.train.get_or_create_global_step() model = FeedForwardModelWithNA_Return(config, 'train', global_step=global_step) gpu_options = tf.GPUOptions(allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=sess_config) model.randomInitialization(sess) logdir_trial = os.path.join( FLAGS.logdir, 'RF_%s_Trial_%d' % (FLAGS.task_id, FLAGS.trial_id)) os.system('mkdir -p ' + logdir_trial) sharpe_train, sharpe_valid, sharpe_test = model.train( sess, dl, dl_valid, logdir_trial, loss_weight=loss_weight, loss_weight_valid=loss_weight_valid, dl_test=dl_test, loss_weight_test=loss_weight_test, printOnConsole=FLAGS.printOnConsole, printFreq=FLAGS.printFreq, saveLog=FLAGS.saveLog) ### best model on sharpe idxBestEpoch = np.array(sharpe_valid).argmax() sharpe_train_best_sharpe = sharpe_train[idxBestEpoch] sharpe_valid_best_sharpe = sharpe_valid[idxBestEpoch] sharpe_test_best_sharpe = sharpe_test[idxBestEpoch] deco_print( 'SDF Portfolio Sharpe Ratio (Evaluated on Sharpe): Train %0.3f\tValid %0.3f\tTest %0.3f' % (sharpe_train_best_sharpe, sharpe_valid_best_sharpe, sharpe_test_best_sharpe))
def main(_): with open(FLAGS.config, 'r') as file: config = json.load(file) if not 'macro_idx' in config: config['macro_idx'] = None deco_print('Read the following in config: ') print(json.dumps(config, indent=4)) deco_print('Creating data layer') dl = data_layer.DataInRamInputLayer( config['individual_feature_file'], pathMacroFeature=config['macro_feature_file'], macroIdx=config['macro_idx']) meanMacroFeature, stdMacroFeature = dl.getMacroFeatureMeanStd() dl_valid = data_layer.DataInRamInputLayer( config['individual_feature_file_valid'], pathMacroFeature=config['macro_feature_file_valid'], macroIdx=config['macro_idx'], meanMacroFeature=meanMacroFeature, stdMacroFeature=stdMacroFeature) dl_test = data_layer.DataInRamInputLayer( config['individual_feature_file_test'], pathMacroFeature=config['macro_feature_file_test'], macroIdx=config['macro_idx'], meanMacroFeature=meanMacroFeature, stdMacroFeature=stdMacroFeature) if config['weighted_loss']: loss_weight = dl.getDateCountList() loss_weight_valid = dl_valid.getDateCountList() loss_weight_test = dl_test.getDateCountList() else: loss_weight = None loss_weight_valid = None loss_weight_test = None deco_print('Data layer created') global_step = tf.train.get_or_create_global_step() model = FeedForwardModelWithNA_GAN(config, 'train', config['tSize'], global_step=global_step) model_valid = FeedForwardModelWithNA_GAN(config, 'valid', config['tSize_valid'], force_var_reuse=True, global_step=global_step) model_test = FeedForwardModelWithNA_GAN(config, 'test', config['tSize_test'], force_var_reuse=True, global_step=global_step) gpu_options = tf.GPUOptions(allow_growth=True) sess_config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=sess_config) model.randomInitialization(sess) sharpe_train, sharpe_valid, sharpe_test = model.train( sess, dl, dl_valid, FLAGS.logdir, model_valid, loss_weight=loss_weight, loss_weight_valid=loss_weight_valid, dl_test=dl_test, model_test=model_test, loss_weight_test=loss_weight_test, printOnConsole=FLAGS.printOnConsole, printFreq=FLAGS.printFreq, saveLog=FLAGS.saveLog, saveBestFreq=FLAGS.saveBestFreq, ignoreEpoch=FLAGS.ignoreEpoch) ### best model on sharpe idxBestEpoch = np.array(sharpe_valid).argmax() sharpe_train_best_sharpe = sharpe_train[idxBestEpoch] sharpe_valid_best_sharpe = sharpe_valid[idxBestEpoch] sharpe_test_best_sharpe = sharpe_test[idxBestEpoch] deco_print( 'SDF Portfolio Sharpe Ratio (Evaluated on Sharpe): Train %0.3f\tValid %0.3f\tTest %0.3f' % (sharpe_train_best_sharpe, sharpe_valid_best_sharpe, sharpe_test_best_sharpe))
def train(self, sess, dl, dl_valid, logdir, loss_weight=None, loss_weight_valid=None, dl_test=None, loss_weight_test=None, printOnConsole=True, printFreq=128, saveLog=True): saver = tf.train.Saver(max_to_keep=100) if saveLog: sw = tf.summary.FileWriter(logdir, sess.graph) best_valid_loss = float('inf') sharpe_train = [] sharpe_valid = [] ### evaluate test data evaluate_test_data = False if dl_test is not None: evaluate_test_data = True sharpe_test = [] time_start = time.time() for epoch in range(self.model_params['num_epochs']): for _, (I_macro, I, R, mask) in enumerate( dl.iterateOneEpoch( subEpoch=self.model_params['sub_epoch'])): fetches = [self._train_model_op] feed_dict = { self._I_macro_placeholder: I_macro, self._I_placeholder: I, self._R_placeholder: R, self._mask_placeholder: mask, self._dropout_placeholder: self.model_params['dropout'] } if self.model_params['weighted_loss']: feed_dict[self._loss_weight] = loss_weight sess.run(fetches=fetches, feed_dict=feed_dict) ### evaluate train loss / sharpe train_epoch_loss = self.evaluate_loss(sess, dl, loss_weight) train_epoch_sharpe = self.evaluate_sharpe(sess, dl) sharpe_train.append(train_epoch_sharpe) ### evaluate valid loss / sharpe valid_epoch_loss = self.evaluate_loss(sess, dl_valid, loss_weight_valid) valid_epoch_sharpe = self.evaluate_sharpe(sess, dl_valid) sharpe_valid.append(valid_epoch_sharpe) ### evaluate test loss / sharpe if evaluate_test_data: test_epoch_loss = self.evaluate_loss(sess, dl_test, loss_weight_test) test_epoch_sharpe = self.evaluate_sharpe(sess, dl_test) sharpe_test.append(test_epoch_sharpe) ### print loss / sharpe if printOnConsole and epoch % printFreq == 0: print('\n\n') deco_print('Doing epoch %d' % epoch) if evaluate_test_data: deco_print( 'Epoch %d train/valid/test loss: %0.4f/%0.4f/%0.4f' % (epoch, train_epoch_loss, valid_epoch_loss, test_epoch_loss)) deco_print( 'Epoch %d train/valid/test sharpe: %0.4f/%0.4f/%0.4f' % (epoch, train_epoch_sharpe, valid_epoch_sharpe, test_epoch_sharpe)) else: deco_print('Epoch %d train/valid loss: %0.4f/%0.4f' % (epoch, train_epoch_loss, valid_epoch_loss)) deco_print('Epoch %d train/valid sharpe: %0.4f/%0.4f' % (epoch, train_epoch_sharpe, valid_epoch_sharpe)) if saveLog: value_loss_train = summary_pb2.Summary.Value( tag='Train_epoch_loss', simple_value=train_epoch_loss) value_loss_valid = summary_pb2.Summary.Value( tag='Valid_epoch_loss', simple_value=valid_epoch_loss) value_sharpe_train = summary_pb2.Summary.Value( tag='Train_epoch_sharpe', simple_value=train_epoch_sharpe) value_sharpe_valid = summary_pb2.Summary.Value( tag='Valid_epoch_sharpe', simple_value=valid_epoch_sharpe) if evaluate_test_data: value_loss_test = summary_pb2.Summary.Value( tag='Test_epoch_loss', simple_value=test_epoch_loss) value_sharpe_test = summary_pb2.Summary.Value( tag='Test_epoch_sharpe', simple_value=test_epoch_sharpe) summary = summary_pb2.Summary(value=[ value_loss_train, value_loss_valid, value_loss_test, value_sharpe_train, value_sharpe_valid, value_sharpe_test ]) else: summary = summary_pb2.Summary(value=[ value_loss_train, value_loss_valid, value_sharpe_train, value_sharpe_valid ]) sw.add_summary(summary, global_step=epoch) sw.flush() ### save epoch if valid_epoch_loss < best_valid_loss: best_valid_loss = valid_epoch_loss if printOnConsole and epoch % printFreq == 0: deco_print('Saving current best checkpoint') saver.save(sess, save_path=os.path.join(logdir, 'model-best')) ### time if printOnConsole and epoch % printFreq == 0: time_elapse = time.time() - time_start time_est = time_elapse / (epoch + 1) * self.model_params['num_epochs'] deco_print('Epoch %d Elapse/Estimate: %0.2fs/%0.2fs' % (epoch, time_elapse, time_est)) if evaluate_test_data: return sharpe_train, sharpe_valid, sharpe_test else: return sharpe_train, sharpe_valid
from tensorflow.core.framework import summary_pb2 from src.model import Config, Model from src.data_layer import DataInRamInputLayer from src.utils import deco_print, deco_print_dict, feature_ranking_loss, feature_ranking, feature_ranking_pair, feature_ranking_trio tf.flags.DEFINE_string('logdir', '', 'Path to save logs and checkpoints') tf.flags.DEFINE_string( 'mode', 'train', 'Mode: train/test/grad_rank/sens_anlys/sens_anlys_pair/sens_anlys_trio') tf.flags.DEFINE_integer('sample_size', -100, 'Number of samples') tf.flags.DEFINE_integer('num_epochs', 50, 'Number of training epochs') tf.flags.DEFINE_float('delta', 1.1, 'Delta') FLAGS = tf.flags.FLAGS ### Create Data Layer deco_print('Creating Data Layer') if FLAGS.mode == 'train': path = os.path.join(os.path.expanduser('~'), 'data/vol/Numpy_data_subprime_new') dl = DataInRamInputLayer(path=path, shuffle=True) path_valid = os.path.join(os.path.expanduser('~'), 'data/vol/Numpy_data_subprime_Val_new') dl_valid = DataInRamInputLayer(path=path_valid, shuffle=False) elif FLAGS.mode == 'test': path = os.path.join(os.path.expanduser('~'), 'data/vol/Numpy_data_subprime_Test_new') dl = DataInRamInputLayer(path=path, shuffle=False) elif FLAGS.mode == 'grad_rank' or FLAGS.mode == 'sens_anlys' or FLAGS.mode == 'sens_anlys_pair' or FLAGS.mode == 'sens_anlys_trio': path = os.path.join(os.path.expanduser('~'), 'data/vol/Numpy_data_subprime_Test_new') if FLAGS.sample_size == -100: