Example #1
0
def predict_close(clf, tickers, **kwargs):
    """
    Use clf (an untrained classifier) to predict closing price for validation
    data for each stock in 'tickers'. Pass additional keyword arguments to be
    used in building the stock datasets.

    Args:
    --clf: An untrained sklearn regressor
    --tickers: A list of tickers to use
    --kwargs: Additional arguments for the StockDataset class

    Returns:
    A dictionary where each key is a ticker in 'tickers' and each value is itself
    as dictionary containing the following:
        -'v_pred': The predicted closing prices
        -'v_true': The actual closing prices
    (both as ndarrays).
    """
    results = {}
    for ticker in tqdm(tickers):
        # Build and split dataset
        ds = StockDataset(tickers=ticker, quiet=True, **kwargs)
        t_data, v_data, t_label, v_label = ds.split(label_field='Label')
        # Clone classifier
        clf_clone = sklearn.base.clone(clf)
        # Fit classifier to data
        clf_clone.fit(t_data, t_label)
        # Predict and store results
        v_pred = clf_clone.predict(v_data)
        results[ticker] = {
            'v_pred': v_pred,
            'v_true': v_label
        }
    return results
Example #2
0
def predict_direction(clf, tickers, **kwargs):
    """
    Use clf (an untrained classifier) to predict direction of change for validation
    data for each stock in 'tickers'. Pass additional keyword arguments to be
    used in building the stock datasets.

    Args:
    --clf: An untrained sklearn classifier
    --tickers: A list of tickers to use
    --kwargs: Additional arguments for the StockDataset class

    Returns:
    A dictionary where each key is a ticker in 'tickers' and each value is the
    accuracy for the predictions for that ticker.
    """
    results = {}
    for ticker in tqdm(tickers):
        # Build and split dataset
        ds = StockDataset(tickers=ticker, quiet=True, **kwargs)
        t_data, v_data, t_label, v_label = ds.split(label_field='Direction')
        # Clone classifier
        clf_clone = sklearn.base.clone(clf)
        # Fit classifier to data
        clf_clone.fit(t_data, t_label)
        # Predict and store results
        v_pred = clf_clone.predict(v_data)
        results[ticker] = mymetrics.direction_accuracy(v_label, v_pred)
    return results
Example #3
0
def main(config, checkpoint):
    """
    Loads a tensorflow pretrained model
    and use it to infer from the pseudo
    live intraday data.
    """

    # Get the live data
    data = _get_live_data()

    # Prepare for the model
    X_test, y_test = StockDataset.prepare_for_test_single(data)

    # print(X_test.shape)
    # print(y_test.shape)

    conf = Config(config)
    root = conf.root
    ckptPath = _get_correct_checkpoint(root, checkpoint)

    print(f"Using checkPoint {ckptPath.name}")
    if ckptPath:
        # Get meta file to restore the graph
        metaFile = _get_meta_file(ckptPath)
        # Restore the graph form the meta file
        gconf = get_tensorflow_config()
        with tf.Session(config=gconf) as sess:
            saver = tf.train.import_meta_graph(str(metaFile))
            saver.restore(sess, tf.train.latest_checkpoint(str(ckptPath)))
            sess.run(tf.global_variables_initializer())
            graph = tf.get_default_graph()
            print("Model restored!!")

            # Get all the graph placeholders, namely, X, y, keep_prob
            # ts, iS = conf.cell_dim
            # inputs = tf.placeholder(tf.float32,
            #                         [1, ts, iS],
            #                         name="inputs")
            # targets = tf.placeholder(tf.float32, [1, iS],
            #                          name="targets")
            # kp = tf.placeholder(tf.float32, None, name="keep_prob")

            inputs = graph.get_tensor_by_name("inputs:0")
            targets = graph.get_tensor_by_name("targets:0")
            kp = graph.get_tensor_by_name("keep_prob:0")

            output = graph.get_tensor_by_name("Linear/output:0")
            loss = Train.squared_loss(output, targets, "test_loss")
            # Prepate the feed dict
            test_data_feed = {inputs: X_test, targets: y_test, kp: 1.0}
            #
            test_loss, test_pred = sess.run([loss, output],
                                            feed_dict=test_data_feed)
            print(test_loss)
            print(test_pred)
Example #4
0
def optimizer_ALSTM():
    U = [4, 8, 16, 32]
    T = [2, 3, 4, 5, 10, 15]
    lamb = [1e-4, 1e-5, 1e-6, 0.]

    best_performance = 0.0
    best_config = None
    for u, t, l in product(U, T, lamb):
        class args:
            epochs = 10
            batch_size = 1024
            hidden_num = u
            lr = 0.01
            lags = t
            epsilon = None
            beta = None
            regularizer = l
            model_path = "weight/model.pt"
            namespace = "res/model"
            is_regression = False
            use_adversarial = False
            verbose = False

        train_dataset = StockDataset(lags=args.lags, is_train=True,
                                     is_regression=args.is_regression)
        test_dataset = StockDataset(lags=args.lags, is_train=False,
                                    is_regression=args.is_regression)

        performances = []
        for _ in range(5):
            train = Train(args, dataset=train_dataset)
            train.run()
            performance = test(args, dataset=test_dataset)
            performances.append(performance)
        mean_perf = sum(performances) / len(performances)
        print("hidden: {} lags: {}, regularizer: {}, performance: {:.4}".format(
            u, t, l, mean_perf))
        if mean_perf > best_performance:
            best_performance = mean_perf
            best_config = args
    print(best_performance)
    print(best_config)
Example #5
0
def test(args, dataset=None):

    if dataset is None:
        dataset = StockDataset(lags=args.lags,
                               is_regression=args.is_regression,
                               is_train=False)
    test_loader = DataLoader(dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=2)

    model = Model()
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.cuda()
    model.load_state_dict(torch.load(args.model_path))
    model.eval()

    loss_func = nn.MSELoss(
        reduction="mean") if args.is_regression else nn.BCELoss()

    total_loss = 0.0
    preds, trues = [], []
    for batch, (X, y) in enumerate(test_loader):
        X = X.to(device)
        y = y.to(device)
        score, _ = model(X)
        if not args.is_regression:
            score = torch.sigmoid(score)
        trues += y.tolist()
        preds += score.tolist()
        loss = loss_func(score, y)
        total_loss += loss.item() * X.size(0)
    total_loss /= len(dataset)

    if args.verbose and not args.is_regression:
        template = "LOSS: {:.4}, ACC: {:.4}, MCC: {:.4}"
        print(
            template.format(total_loss, get_Acc(preds, trues),
                            get_MCC(preds, trues)))
    if args.verbose and args.is_regression:
        print("LOSS: {:.4}".format(total_loss))
    if not args.is_regression:
        res = get_Acc(preds, trues)
        save(args.namespace, res)
        return res
    else:
        save(args.namespace, total_loss)
        return total_loss
Example #6
0
    def __init__(self, args, dataset=None, model_cls=None):
        self.epochs = args.epochs
        self.verbose = args.verbose
        self.epsilon = args.epsilon
        self.beta = args.beta
        self.regularizer = args.regularizer
        self.is_regression = args.is_regression
        self.use_adversarial = args.use_adversarial

        if dataset is None:
            dataset = StockDataset(lags=args.lags,
                                   is_regression=args.is_regression)

        data_len = len(dataset)
        self.train_num = int(data_len * 0.8)
        self.vali_num = data_len - self.train_num
        trainset, valiset = random_split(dataset,
                                         [self.train_num, self.vali_num])

        self.train_loader = DataLoader(trainset,
                                       batch_size=args.batch_size,
                                       shuffle=True,
                                       num_workers=2)
        self.vali_loader = DataLoader(valiset,
                                      batch_size=args.batch_size,
                                      shuffle=False,
                                      num_workers=2)

        self.batch_size = args.batch_size
        if model_cls is None:
            self.model = Model()
        else:
            self.model = model_cls()  # for hyperoptimizer

        if torch.cuda.device_count() > 1:
            self.model = nn.DataParallel(self.model)
        self.model.cuda()
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=args.lr,
                                          weight_decay=self.regularizer)
        self.model_path = args.model_path
    def train(self, dataset: StockDataset):
        """
        Train the model
        :param dataset: The sequential dataset
        :return:
        """

        # Initialize all the DAG variables
        tf.global_variables_initializer().run()
        start_time = str(int(time.time()))
        global_step = 0
        EPOCHS = self.conf.ops['epochs']
        NUM_BATCHES = dataset.num_batches

        # Write the graph summary in tensorboard
        with tf.summary.FileWriter("./LOGDIR") as gs:
            gs.add_graph(self.model.sess.graph)

        # Training loop
        for epoch in range(EPOCHS):
            epoch_step = 0
            # Returns an iterator only for training data
            data = dataset.generate_for_one_epoch()
            total_training_loss = 0.0
            self.model.training = True
            with PixelBar(f'Epoch {epoch + 1}: ', max=NUM_BATCHES) as bar:
                bar.check_tty = False
                for batch_X, batch_y in data:
                    global_step += 1
                    epoch_step += 1

                    # Training feed dict
                    train_data_feed = {
                        self.model.inputs: batch_X,
                        self.model.targets: batch_y,
                        self.model.keep_prob: 1.0 - conf.layers['dropout_rate']
                    }

                    train_loss = self.model.sess.run(
                        [self.train_loss, self.optim], train_data_feed)
                    # bar.set_postfix(train_loss=round(train_loss[0], 10))
                    bar.suffix = 'Total training Loss: {:.7e}'.format(
                        total_training_loss)
                    bar.next()
                    total_training_loss += train_loss[0]

                # Check the performance on the validation dataset
                val_data_feed = {
                    self.model.inputs: dataset.X_val,
                    self.model.targets: dataset.y_val,
                    self.model.keep_prob: 1.0
                }
                # self.model.training = False  # For dropouts
                val_loss, val_pred = self.model.sess.run(
                    [self.val_loss, self.model.pred], feed_dict=val_data_feed)

                print(
                    f'\n\nEpoch: {epoch + 1}, Training Loss: {total_training_loss / NUM_BATCHES}'
                )
                print(f'Epoch: {epoch + 1}, Validation Loss: {val_loss}\n')
                if not self.save_model(conf.root, global_step, val_loss,
                                       epoch + 1, 0.00001, start_time):
                    print(
                        f'Validation loss has not improved from the previous value {Train.VALID_LOSS}'
                    )
                    bar.suffix = 'Total training Loss: {:.7e}'.format(
                        total_training_loss)
                    bar.next()
                    total_training_loss += train_loss[0]

                # Check the performance on the validation dataset
                val_data_feed = {
                    self.model.inputs: dataset.X_val,
                    self.model.targets: dataset.y_val,
                    self.model.keep_prob: 1.0
                }
                # self.model.training = False  # For dropouts
                val_loss, val_pred = self.model.sess.run(
                    [self.val_loss, self.model.pred], feed_dict=val_data_feed)

                print(
                    f'\n\nEpoch: {epoch + 1}, Training Loss: {total_training_loss / NUM_BATCHES}'
                )
                print(f'Epoch: {epoch + 1}, Validation Loss: {val_loss}\n')
                if not self.save_model(conf.root, global_step, val_loss,
                                       epoch + 1, 0.00001, start_time):
                    print(
                        f'Validation loss has not improved from the previous value {Train.VALID_LOSS}'
                    )


if __name__ == '__main__':
    conf = Config('config.yaml')
    dataset = StockDataset(config=conf)
    trainer = Train(dataset, 'config.yaml')