Ejemplo n.º 1
0
    def _step(self, action):
        """
        Step the env.
        Actions should be portfolio [w0...]
        - Where wn is a portfolio weight from 0 to 1. The first is cash_bias
        - cn is the portfolio conversion weights see PortioSim._step for description
        """
        np.testing.assert_almost_equal(action.shape,
                                       (len(self.sim.asset_names) + 1, ))

        # normalise just in case
        action = np.clip(action, 0, 1)

        weights = action  # np.array([cash_bias] + list(action))  # [w0, w1...]
        weights /= (weights.sum() + eps)
        weights[0] += np.clip(
            1 - weights.sum(), 0,
            1)  # so if weights are all zeros we normalise to [1,0...]

        assert ((action >= 0) * (action <= 1)).all(
        ), 'all action values should be between 0 and 1. Not %s' % action
        np.testing.assert_almost_equal(
            np.sum(weights),
            1.0,
            3,
            err_msg='weights should sum to 1. action="%s"' % weights)

        observation, done1, ground_truth_obs = self.src._step()

        # concatenate observation with ones
        cash_observation = np.ones(
            (1, self.window_length, observation.shape[2]))
        observation = np.concatenate((cash_observation, observation), axis=0)

        cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2]))
        ground_truth_obs = np.concatenate(
            (cash_ground_truth, ground_truth_obs), axis=0)

        # relative price vector of last observation day (close/open)
        close_price_vector = observation[:, -1, 3]
        open_price_vector = observation[:, -1, 0]
        y1 = close_price_vector / open_price_vector
        reward, info, done2 = self.sim._step(weights, y1)

        # calculate return for buy and hold a bit of each asset
        info['market_value'] = np.cumprod(
            [inf["return"] for inf in self.infos + [info]])[-1]
        # add dates
        info['date'] = index_to_date(self.start_idx + self.src.idx +
                                     self.src.step)
        info['steps'] = self.src.step
        info['next_obs'] = ground_truth_obs

        self.infos.append(info)

        # print("after step, observation shape={}".format(observation.shape))
        return observation, reward, done1 or done2, info
Ejemplo n.º 2
0
    def _step(self, action):
        """ Step the environment by a vector of actions

        Args:
            action: (num_models, num_stocks + 1)

        Returns:

        """
        assert action.ndim == 2, 'Action must be a two dimensional array with shape (num_models, num_stocks + 1)'
        assert action.shape[1] == len(self.sim[0].asset_names) + 1
        assert action.shape[0] == len(self.model_names)
        # normalise just in case
        action = np.clip(action, 0, 1)
        weights = action  # np.array([cash_bias] + list(action))  # [w0, w1...]
        weights /= (np.sum(weights, axis=1, keepdims=True) + eps)
        # so if weights are all zeros we normalise to [1,0...]
        weights[:, 0] += np.clip(1 - np.sum(weights, axis=1), 0, 1)
        assert ((action >= 0) * (action <= 1)).all(), 'all action values should be between 0 and 1. Not %s' % action
        np.testing.assert_almost_equal(np.sum(weights, axis=1), np.ones(shape=(weights.shape[0])), 3,
                                       err_msg='weights should sum to 1. action="%s"' % weights)
        observation, done1, ground_truth_obs = self.src._step()

        # concatenate observation with ones
        cash_observation = np.ones((1, self.window_length, observation.shape[2]))
        observation = np.concatenate((cash_observation, observation), axis=0)

        cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2]))
        ground_truth_obs = np.concatenate((cash_ground_truth, ground_truth_obs), axis=0)

        # relative price vector of last observation day (close/open)
        close_price_vector = observation[:, -1, 3]
        open_price_vector = observation[:, -1, 0]
        y1 = close_price_vector / open_price_vector

        rewards = np.empty(shape=(weights.shape[0]))
        info = {}
        dones = np.empty(shape=(weights.shape[0]), dtype=bool)
        for i in range(weights.shape[0]):
            reward, current_info, done2 = self.sim[i]._step(weights[i], y1)
            rewards[i] = reward
            info[self.model_names[i]] = current_info['portfolio_value']
            info['return'] = current_info['return']
            dones[i] = done2

        # calculate return for buy and hold a bit of each asset
        info['market_value'] = np.cumprod([inf["return"] for inf in self.infos + [info]])[-1]
        # add dates
        info['date'] = index_to_date(self.start_idx + self.src.idx + self.src.step)
        info['steps'] = self.src.step
        info['next_obs'] = ground_truth_obs

        self.infos.append(info)

        return observation, rewards, np.all(dones) or done1, info
    def _step(self, action):
        """
        Step the env.
        Actions should be portfolio [w0...]
        - Where wn is a portfolio weight from 0 to 1. The first is cash_bias
        - cn is the portfolio conversion weights see PortioSim._step for description
        """
        np.testing.assert_almost_equal(action.shape,
                                       (len(self.sim.asset_names) + 1, ))

        # normalise just in case
        weights = self.action2weights(action)

        observation, done1, ground_truth_obs = self.src._step()

        # concatenate observation with ones
        cash_observation = np.ones(
            (1, self.window_length, observation.shape[2]))
        observation = np.concatenate((cash_observation, observation), axis=0)

        cash_ground_truth = np.ones((1, 1, ground_truth_obs.shape[2]))
        ground_truth_obs = np.concatenate(
            (cash_ground_truth, ground_truth_obs), axis=0)

        # relative price vector of last observation day (close/open)
        close_price_vector = observation[:, -1, 3]
        open_price_vector = observation[:, -1, 0]
        y1 = close_price_vector / open_price_vector
        reward, info, done2 = self.sim._step(weights, y1)

        # calculate return for buy and hold a bit of each asset
        info['market_value'] = np.cumprod(
            [inf["return"] for inf in self.infos + [info]])[-1]
        # add dates
        info['date'] = index_to_date(self.start_idx + self.src.idx +
                                     self.src.step)
        info['steps'] = self.src.step
        info['next_obs'] = ground_truth_obs

        self.infos.append(info)

        return observation, reward, done1 or done2, info
Ejemplo n.º 4
0
def visualise_Data():

    with open('utils/datasets/all_eqw', 'rb') as fr:
        history = pickle.load(fr, encoding='latin1')

    with open('utils/datasets/stock_names', 'rb') as fr:
        abbreviation = pickle.load(fr, encoding='latin1')

    history = history[:, :, :4]
    num_training_time = history.shape[1]
    num_testing_time = history.shape[1]
    window_length = 3

    # get target history
    target_stocks = ['BLK UN EQUITY', 'GS UN EQUITY', 'USB UN EQUITY']
    target_history = np.empty(shape=(len(target_stocks), num_training_time,
                                     history.shape[2]))
    for i, stock in enumerate(target_stocks):
        target_history[i] = history[
            abbreviation.index(stock), :num_training_time, :]
        print(target_history[i])

    # collect testing data
    testing_stocks = [
        'AMG UN EQUITY',
        'BRK/B UN EQUITY',
        'MTB UN EQUITY',
    ]
    testing_history = np.empty(shape=(len(testing_stocks), num_testing_time,
                                      history.shape[2]))
    for i, stock in enumerate(target_stocks):
        testing_history[i] = history[
            abbreviation.index(stock), :num_testing_time, :]

    # dataset for 16 stocks by splitting timestamp
    history, abbreviation = read_stock_history(
        filepath='utils/datasets/stocks_history_target.h5')
    with open('utils/datasets/all_eqw', 'rb') as fr:
        history = pickle.load(fr, encoding='latin1')

    with open('utils/datasets/stock_names', 'rb') as fr:
        abbreviation = pickle.load(fr, encoding='latin1')
    history = history[:, :, :4]

    # 16 stocks are all involved. We choose first 3 years as training data
    num_training_time = 1095
    target_stocks = abbreviation
    target_history = np.empty(shape=(len(target_stocks), num_training_time,
                                     history.shape[2]))

    for i, stock in enumerate(target_stocks):
        target_history[i] = history[
            abbreviation.index(stock), :num_training_time, :]
    print((target_history.shape))

    # and last 2 years as testing data.
    testing_stocks = abbreviation
    testing_history = np.empty(shape=(len(testing_stocks),
                                      history.shape[1] - num_training_time,
                                      history.shape[2]))
    for i, stock in enumerate(testing_stocks):
        testing_history[i] = history[abbreviation.index(stock),
                                     num_training_time:, :]

    print((testing_history.shape))

    nb_classes = len(target_stocks) + 1
    print(target_history.shape)
    print(testing_history.shape)

    if True:
        date_list = [index_to_date(i) for i in range(target_history.shape[1])]
        x = range(target_history.shape[1])
        for i in range(len(target_stocks)):
            plt.figure(i)
            plt.plot(
                x, target_history[i, :,
                                  1])  # open, high, low, close = [0, 1, 2, 3]
            plt.xticks(x[::200], date_list[::200], rotation=30)
            plt.title(target_stocks[i])
            plt.show()

    # common settings
    batch_size = 64
    action_bound = 1.
    tau = 1e-3
    models = []
    model_names = []
    window_length_lst = [3, 7, 14, 21]
    predictor_type_lst = ['cnn', 'lstm']
    use_batch_norm = True

    for window_length in window_length_lst:
        name = 'imit_LSTM%3A window = {}'.format(window_length)
        model_name = 'imitation_lstm_window_{}'.format(window_length)
        model_names.append(model_name)
        # instantiate LSTM model
        lstm_model = StockLSTM(nb_classes,
                               window_length,
                               weights_file='weights/' + name + '.h5')
        lstm_model.build_model(load_weights=True)
        models.append(lstm_model)

        name = 'imit_CNN%3A window = {}'.format(window_length)
        model_name = 'imitation_cnn_window_{}'.format(window_length)
        model_names.append(model_name)
        # instantiate CNN model
        cnn_model = StockCNN(nb_classes,
                             window_length,
                             weights_file='weights/' + name + '.h5')
        cnn_model.build_model(load_weights=True)
        models.append(cnn_model)

    # instantiate environment, 3 stocks, with trading cost, window_length 3, start_date sample each time

    for window_length in window_length_lst:
        for predictor_type in predictor_type_lst:
            name = 'DDPG_window_{}_predictor_{}'.format(
                window_length, predictor_type)
            model_names.append(name)
            tf.reset_default_graph()
            sess = tf.Session()
            tflearn.config.init_training_mode()
            action_dim = [nb_classes]
            state_dim = [nb_classes, window_length]
            variable_scope = get_variable_scope(window_length, predictor_type,
                                                use_batch_norm)
            with tf.variable_scope(variable_scope):
                actor = StockActor(sess, state_dim, action_dim, action_bound,
                                   1e-4, tau, batch_size, predictor_type,
                                   use_batch_norm)
                critic = StockCritic(
                    sess=sess,
                    state_dim=state_dim,
                    action_dim=action_dim,
                    tau=1e-3,
                    learning_rate=1e-3,
                    num_actor_vars=actor.get_num_trainable_vars(),
                    predictor_type=predictor_type,
                    use_batch_norm=use_batch_norm)
                actor_noise = OrnsteinUhlenbeckActionNoise(
                    mu=np.zeros(action_dim))

                model_save_path = get_model_path(window_length, predictor_type,
                                                 use_batch_norm)
                summary_path = get_result_path(window_length, predictor_type,
                                               use_batch_norm)

                ddpg_model = DDPG(None,
                                  sess,
                                  actor,
                                  critic,
                                  actor_noise,
                                  obs_normalizer=obs_normalizer,
                                  config_file='config/stock.json',
                                  model_save_path=model_save_path,
                                  summary_path=summary_path)
                ddpg_model.initialize(load_weights=True, verbose=False)
                models.append(ddpg_model)

    env = MultiActionPortfolioEnv(target_history,
                                  target_stocks,
                                  model_names[8:],
                                  steps=500,
                                  sample_start_date='2012-10-30')

    test_model_multiple(env, models[8:])