Beispiel #1
0
def Simulate(start, end, mode='train', ep_train=30, ep_start=0):

    # sector = ["Consumer Discretionary", "Consumer Staples", "Energy", "Financials", "Health Care", "Industrials",
    #           "Information Technology", "Materials","Real Estate","Telecommunication Services","Utilities"]

    # group 1
    sector = "Information Technology"
    tickers = [
        'AAPL', 'AMAT', 'AMD', 'CSCO', 'EBAY', 'GLW', 'HPQ', 'IBM', 'INTC',
        'KLAC', 'MSFT', 'MU', 'NVDA', 'QCOM', 'TXN'
    ]

    # group 2
    # sector = "Consumer Discretionary"
    # tickers = ['AZO', 'BBY', 'DHI', 'F', 'GPS', 'GRMN', 'HOG', 'JWN', 'MAT', 'MCD', 'NKE', 'SBUX', 'TJX', 'TWX', 'YUM']
    #
    # group 3
    # sector = "Industrials"
    # tickers = ['BA', 'CAT', 'CTAS', 'EMR', 'FDX', 'GD', 'GE', 'LLL', 'LUV', 'MAS', 'MMM', 'NOC', 'RSG', 'UNP', 'WM']

    train_mode = 2
    BUFFER_SIZE = 200
    BATCH_SIZE = 10
    sample_bias = 1.05  # probability weighting of [sample_bias ** i for i in range(1,buffer_size-batch_size)]
    cash_bias = 0
    dependent_factor = 0
    if mode == "train":
        LRA = 2e-5  # Learning rate for Actor (training)
    else:
        LRA = 9e-5  # Learning rate for Actor (validating or testing)
    train_rolling_steps = 1
    test_rolling_steps = 0
    window_size = 20  # window size per input
    tickers_num = len(tickers)  # the number of assets (exclude Cash)
    action_size = tickers_num + 1
    feature_num = 11  # number of features (adjusted open, close, high, low)
    state_dim = (tickers_num, window_size, feature_num
                 )  # number of assets x window size x number of features
    epsiode_reward = [
    ]  # reward of each episode = Cumulative Return by CNN / Cumulative Return by UCR
    np.random.seed(1337)
    total_step = 0

    # Tensorflow GPU optimization
    sess = gpu_settings()

    # create network and replay buffer
    actor = ActorNetwork(sess, state_dim, action_size, BATCH_SIZE, LRA)
    buff = ReplayBuffer(BUFFER_SIZE, sample_bias)

    # start simulation
    print(
        "Portfolio Management Simualation Experiment Start ({})".format(mode))
    print("{} period: {} to {}".format(mode, start, end))

    # iterate the episode
    for ep in range(ep_start + 1, ep_start + ep_train + 1):

        if not (mode == "train" and ep != ep_start + 1):
            load_weights(mode, actor, ep)

        train_tickers = prepare_tickers(train_mode, mode, tickers, sector,
                                        start, end)

        # construct a portfolio within defined period
        P = Portfolio(train_tickers, start, end, mode=mode)
        if not P.consistent:
            exit(0)

        # construct states
        state = prepare_states(P, window_size)
        predictions = extract_prediction()

        cumulated_return = 1

        # iterate the defined period
        for step in range(len(state) - 2):

            # extract the last action
            if step == 0:
                last_action = np.array([0 for _ in range(state_dim[0])])
            else:
                last_action = np.array(P.portfolio_weights[-1][:state_dim[0]])

            prediction = np.power(predictions[step], dependent_factor)

            # generate action, single batch = batch only conisists one element
            action = actor.model.predict([
                state[step].reshape(
                    [1, state_dim[2], state_dim[1], state_dim[0]]),
                last_action.reshape(1, state_dim[0]),
                np.array([[cash_bias]]),
                prediction.reshape(1, state_dim[0])
            ])

            # generate daily return
            day_return, future_price = P.calculate_return(
                action[0], last_action, step)

            # extract historical data to re-train the model at each time step
            experience_replay(actor, buff, BATCH_SIZE, mode, state, step,
                              future_price, last_action, cash_bias, prediction,
                              train_rolling_steps, test_rolling_steps, sess)

            cumulated_return = cumulated_return * day_return
            total_step += 1
            print(
                "Episode {} Step {} Date {} Cumulated Return {} Day return {}".
                format(
                    ep, total_step,
                    P.df_normalized.index[P.start_index + step +
                                          1].strftime('%Y-%m-%d'),
                    cumulated_return, day_return))
            print(action[0])

        # No trading at last day, clear the portfolio and set return to 1
        P.portfolio_weights.append(
            np.array([0 for _ in range(tickers_num)] + [1]))

        # Generate Uniform Constant Rebalanced Portfolios strategy
        P.UCRP()
        P.UBHP()

        # calculate performance: how cumulative return outperforms Uniform Constant Rebalanced Portfolios strategy
        performance = cumulated_return / np.array(
            P.portfolio_UCRP).cumprod()[-1]
        epsiode_reward.append(performance)

        # save the model and result
        save_weights_results(P, mode, actor, ep, performance)

        # clear the Portfolio and buffer
        P.clear()
        buff.erase()
        if mode == 'valid':
            total_step = 0

        print("")

    # save_epsoide_performance(mode, start, end, epsiode_reward)

    print("Finish")
    print("")

    return epsiode_reward