Python StockEnv.generate_episode 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: stock_env

클래스/타입: StockEnv

메소드/함수: generate_episode

hotexamples.com에서의 예제들: 2

Python StockEnv.generate_episode - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 stock_env.StockEnv.generate_episode에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

StockEnv(12)

step(9)

reset(8)

render(6)

get_state(3)

generate_episode(2)

set_count(2)

_get_state(1)

gostep(1)

num_action(1)

num_state(1)

random_action(1)

예제 #1

파일 보기

파일: main.py 프로젝트: paulzhou69/rl-stock-portfolio-management

def test(test_data, model, tickers, randomize, num_rand_stocks=0):
    """
    the test function: test agent on test_data
    if randomize == True, we know we have randomized stocks in training, 
    so we test on num_rand_stocks randomly selected stocks from test_data
    if otherwise, we used the entire test_data set

    :param test_data: the testing set
    :param model: the trained model
    :param tickers: stocks tickers corresponding to test_data, including "CASH"
    :param randomize: boolean indicating whether we have randomized stocks
    :param num_rand_stocks: number of stocks randomized in training
    """
    if randomize:
        # the last element of tickers is "CASH", we don't include "CASH" in randomization
        rand_stock_indices = np.random.choice(len(tickers) - 1,
                                              num_rand_stocks,
                                              replace=False)
        # get randomly selected stock names
        episode_tickers = [tickers[index] for index in rand_stock_indices]
        episode_tickers.append("CASH")
        rand_stock_indices = tf.reshape(rand_stock_indices,
                                        (len(rand_stock_indices), 1))
        # saving the randomization to a new variable so we don't mess w/ test_data
        episode_input = tf.gather_nd(test_data, rand_stock_indices)
    else:
        episode_input = test_data
        episode_tickers = tickers

    env = StockEnv(episode_input, episode_tickers, is_testing=True)
    states, actions, rewards = env.generate_episode(model)
    min_testing_episode_len = 20
    while len(rewards) < min_testing_episode_len:
        print("test episode not long enough")
        states, actions, rewards = env.generate_episode(model)
    print(f'final portfolio total value: {rewards[-1]}')

예제 #2

파일 보기

파일: main.py 프로젝트: paulzhou69/rl-stock-portfolio-management

def train(train_data,
          model,
          tickers,
          randomize,
          num_rand_stocks=0,
          episode_max_days=200):
    """
    the train function, train the model for an entire epoch

    :param train_data: the preprocessed training data, of shape [num_stocks, num_days, datum_size]
    :param model: the model to be trained
    :param tickers: stock tickers corresponding to train_data, including "CASH"
    :param randomize: boolean indicating whether we have randomized stocks
    :param num_rand_stocks: number of stocks randomized in training
    :param episode_max_days: the maximum number of days of trading actions in an episode

    :return losses and rewards
    """
    num_days = train_data.shape[1]
    loss_list = []

    offset = model.past_num - 1  # extra days of price history needed at beginning
    start = 0  # start of price history slice (inclusive)
    end = start + episode_max_days + offset  # end of price history slice (exclusive)
    num_episodes = (num_days - offset) // episode_max_days

    # a list of total cash value
    rewards_list = []

    for episode in range(num_episodes):
        print(f"Training episode {episode+1} of {num_episodes}")

        if randomize:
            # the last element of tickers is "CASH", we don't include "CASH" in randomization
            rand_stock_indices = np.random.choice(len(tickers) - 1,
                                                  num_rand_stocks,
                                                  replace=False)
            # get randomly selected stock names
            episode_tickers = [tickers[index] for index in rand_stock_indices]
            episode_tickers.append("CASH")
            rand_stock_indices = tf.reshape(rand_stock_indices,
                                            (len(rand_stock_indices), 1))
            episode_input = tf.gather_nd(train_data, rand_stock_indices)
        else:
            episode_input = train_data
            episode_tickers = tickers

        # Slice of pricing history to generate this episode on
        episode_input = episode_input[:, start:end, :]
        start += episode_max_days
        end += episode_max_days

        # (REMOVED) randomize starting date in each episode
        # rand_start = randint(0, int(episode_max_days / 5))
        # episode_input = episode_input[:, rand_start:episode_max_days,:]

        # Hyperparameters to be adjusted below:
        env = StockEnv(episode_input,
                       episode_tickers,
                       interest_annual=0.1,
                       borrow_interest_annual=0.2,
                       transaction_penalty=0.0001)

        with tf.GradientTape() as tape:
            states, actions, rewards = env.generate_episode(model)
            rewards_list.extend(rewards)
            discounted_rewards = discount(rewards)
            model.remember(states, actions, discounted_rewards)
            repl_states, repl_actions, repl_discounted_rewards = model.experience_replay(
            )
            model_loss = model.loss(repl_states, repl_actions,
                                    repl_discounted_rewards)
        gradients = tape.gradient(model_loss, model.trainable_variables)
        model.optimizer.apply_gradients(
            zip(gradients, model.trainable_variables))
        loss_list.append(model_loss.numpy())  # reward at end of batch
    return list(loss_list), rewards_list