def predict_close(clf, tickers, **kwargs): """ Use clf (an untrained classifier) to predict closing price for validation data for each stock in 'tickers'. Pass additional keyword arguments to be used in building the stock datasets. Args: --clf: An untrained sklearn regressor --tickers: A list of tickers to use --kwargs: Additional arguments for the StockDataset class Returns: A dictionary where each key is a ticker in 'tickers' and each value is itself as dictionary containing the following: -'v_pred': The predicted closing prices -'v_true': The actual closing prices (both as ndarrays). """ results = {} for ticker in tqdm(tickers): # Build and split dataset ds = StockDataset(tickers=ticker, quiet=True, **kwargs) t_data, v_data, t_label, v_label = ds.split(label_field='Label') # Clone classifier clf_clone = sklearn.base.clone(clf) # Fit classifier to data clf_clone.fit(t_data, t_label) # Predict and store results v_pred = clf_clone.predict(v_data) results[ticker] = { 'v_pred': v_pred, 'v_true': v_label } return results
def predict_direction(clf, tickers, **kwargs): """ Use clf (an untrained classifier) to predict direction of change for validation data for each stock in 'tickers'. Pass additional keyword arguments to be used in building the stock datasets. Args: --clf: An untrained sklearn classifier --tickers: A list of tickers to use --kwargs: Additional arguments for the StockDataset class Returns: A dictionary where each key is a ticker in 'tickers' and each value is the accuracy for the predictions for that ticker. """ results = {} for ticker in tqdm(tickers): # Build and split dataset ds = StockDataset(tickers=ticker, quiet=True, **kwargs) t_data, v_data, t_label, v_label = ds.split(label_field='Direction') # Clone classifier clf_clone = sklearn.base.clone(clf) # Fit classifier to data clf_clone.fit(t_data, t_label) # Predict and store results v_pred = clf_clone.predict(v_data) results[ticker] = mymetrics.direction_accuracy(v_label, v_pred) return results
def main(config, checkpoint): """ Loads a tensorflow pretrained model and use it to infer from the pseudo live intraday data. """ # Get the live data data = _get_live_data() # Prepare for the model X_test, y_test = StockDataset.prepare_for_test_single(data) # print(X_test.shape) # print(y_test.shape) conf = Config(config) root = conf.root ckptPath = _get_correct_checkpoint(root, checkpoint) print(f"Using checkPoint {ckptPath.name}") if ckptPath: # Get meta file to restore the graph metaFile = _get_meta_file(ckptPath) # Restore the graph form the meta file gconf = get_tensorflow_config() with tf.Session(config=gconf) as sess: saver = tf.train.import_meta_graph(str(metaFile)) saver.restore(sess, tf.train.latest_checkpoint(str(ckptPath))) sess.run(tf.global_variables_initializer()) graph = tf.get_default_graph() print("Model restored!!") # Get all the graph placeholders, namely, X, y, keep_prob # ts, iS = conf.cell_dim # inputs = tf.placeholder(tf.float32, # [1, ts, iS], # name="inputs") # targets = tf.placeholder(tf.float32, [1, iS], # name="targets") # kp = tf.placeholder(tf.float32, None, name="keep_prob") inputs = graph.get_tensor_by_name("inputs:0") targets = graph.get_tensor_by_name("targets:0") kp = graph.get_tensor_by_name("keep_prob:0") output = graph.get_tensor_by_name("Linear/output:0") loss = Train.squared_loss(output, targets, "test_loss") # Prepate the feed dict test_data_feed = {inputs: X_test, targets: y_test, kp: 1.0} # test_loss, test_pred = sess.run([loss, output], feed_dict=test_data_feed) print(test_loss) print(test_pred)
def optimizer_ALSTM(): U = [4, 8, 16, 32] T = [2, 3, 4, 5, 10, 15] lamb = [1e-4, 1e-5, 1e-6, 0.] best_performance = 0.0 best_config = None for u, t, l in product(U, T, lamb): class args: epochs = 10 batch_size = 1024 hidden_num = u lr = 0.01 lags = t epsilon = None beta = None regularizer = l model_path = "weight/model.pt" namespace = "res/model" is_regression = False use_adversarial = False verbose = False train_dataset = StockDataset(lags=args.lags, is_train=True, is_regression=args.is_regression) test_dataset = StockDataset(lags=args.lags, is_train=False, is_regression=args.is_regression) performances = [] for _ in range(5): train = Train(args, dataset=train_dataset) train.run() performance = test(args, dataset=test_dataset) performances.append(performance) mean_perf = sum(performances) / len(performances) print("hidden: {} lags: {}, regularizer: {}, performance: {:.4}".format( u, t, l, mean_perf)) if mean_perf > best_performance: best_performance = mean_perf best_config = args print(best_performance) print(best_config)
def test(args, dataset=None): if dataset is None: dataset = StockDataset(lags=args.lags, is_regression=args.is_regression, is_train=False) test_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=2) model = Model() if torch.cuda.device_count() > 1: model = nn.DataParallel(model) model.cuda() model.load_state_dict(torch.load(args.model_path)) model.eval() loss_func = nn.MSELoss( reduction="mean") if args.is_regression else nn.BCELoss() total_loss = 0.0 preds, trues = [], [] for batch, (X, y) in enumerate(test_loader): X = X.to(device) y = y.to(device) score, _ = model(X) if not args.is_regression: score = torch.sigmoid(score) trues += y.tolist() preds += score.tolist() loss = loss_func(score, y) total_loss += loss.item() * X.size(0) total_loss /= len(dataset) if args.verbose and not args.is_regression: template = "LOSS: {:.4}, ACC: {:.4}, MCC: {:.4}" print( template.format(total_loss, get_Acc(preds, trues), get_MCC(preds, trues))) if args.verbose and args.is_regression: print("LOSS: {:.4}".format(total_loss)) if not args.is_regression: res = get_Acc(preds, trues) save(args.namespace, res) return res else: save(args.namespace, total_loss) return total_loss
def __init__(self, args, dataset=None, model_cls=None): self.epochs = args.epochs self.verbose = args.verbose self.epsilon = args.epsilon self.beta = args.beta self.regularizer = args.regularizer self.is_regression = args.is_regression self.use_adversarial = args.use_adversarial if dataset is None: dataset = StockDataset(lags=args.lags, is_regression=args.is_regression) data_len = len(dataset) self.train_num = int(data_len * 0.8) self.vali_num = data_len - self.train_num trainset, valiset = random_split(dataset, [self.train_num, self.vali_num]) self.train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2) self.vali_loader = DataLoader(valiset, batch_size=args.batch_size, shuffle=False, num_workers=2) self.batch_size = args.batch_size if model_cls is None: self.model = Model() else: self.model = model_cls() # for hyperoptimizer if torch.cuda.device_count() > 1: self.model = nn.DataParallel(self.model) self.model.cuda() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.lr, weight_decay=self.regularizer) self.model_path = args.model_path
def train(self, dataset: StockDataset): """ Train the model :param dataset: The sequential dataset :return: """ # Initialize all the DAG variables tf.global_variables_initializer().run() start_time = str(int(time.time())) global_step = 0 EPOCHS = self.conf.ops['epochs'] NUM_BATCHES = dataset.num_batches # Write the graph summary in tensorboard with tf.summary.FileWriter("./LOGDIR") as gs: gs.add_graph(self.model.sess.graph) # Training loop for epoch in range(EPOCHS): epoch_step = 0 # Returns an iterator only for training data data = dataset.generate_for_one_epoch() total_training_loss = 0.0 self.model.training = True with PixelBar(f'Epoch {epoch + 1}: ', max=NUM_BATCHES) as bar: bar.check_tty = False for batch_X, batch_y in data: global_step += 1 epoch_step += 1 # Training feed dict train_data_feed = { self.model.inputs: batch_X, self.model.targets: batch_y, self.model.keep_prob: 1.0 - conf.layers['dropout_rate'] } train_loss = self.model.sess.run( [self.train_loss, self.optim], train_data_feed) # bar.set_postfix(train_loss=round(train_loss[0], 10)) bar.suffix = 'Total training Loss: {:.7e}'.format( total_training_loss) bar.next() total_training_loss += train_loss[0] # Check the performance on the validation dataset val_data_feed = { self.model.inputs: dataset.X_val, self.model.targets: dataset.y_val, self.model.keep_prob: 1.0 } # self.model.training = False # For dropouts val_loss, val_pred = self.model.sess.run( [self.val_loss, self.model.pred], feed_dict=val_data_feed) print( f'\n\nEpoch: {epoch + 1}, Training Loss: {total_training_loss / NUM_BATCHES}' ) print(f'Epoch: {epoch + 1}, Validation Loss: {val_loss}\n') if not self.save_model(conf.root, global_step, val_loss, epoch + 1, 0.00001, start_time): print( f'Validation loss has not improved from the previous value {Train.VALID_LOSS}' )
bar.suffix = 'Total training Loss: {:.7e}'.format( total_training_loss) bar.next() total_training_loss += train_loss[0] # Check the performance on the validation dataset val_data_feed = { self.model.inputs: dataset.X_val, self.model.targets: dataset.y_val, self.model.keep_prob: 1.0 } # self.model.training = False # For dropouts val_loss, val_pred = self.model.sess.run( [self.val_loss, self.model.pred], feed_dict=val_data_feed) print( f'\n\nEpoch: {epoch + 1}, Training Loss: {total_training_loss / NUM_BATCHES}' ) print(f'Epoch: {epoch + 1}, Validation Loss: {val_loss}\n') if not self.save_model(conf.root, global_step, val_loss, epoch + 1, 0.00001, start_time): print( f'Validation loss has not improved from the previous value {Train.VALID_LOSS}' ) if __name__ == '__main__': conf = Config('config.yaml') dataset = StockDataset(config=conf) trainer = Train(dataset, 'config.yaml')