def invest(self, data, window=30, debug=False):

        if len(data.keys()) == 0:
            return

        data.fillna(method="bfill", inplace=True)

        states = State(window, data)
        # print(data.head(2))
        # print(data.tail(2))
        # print(states.bench[0])
        # print(states.bench[1])
        # print(states.bench[-2])
        # print(states.bench[-1])

        self.state = np.array(
            [states.get_state(i) for i in range(window, len(data))])
        np.set_printoptions(suppress=True)
        # print(self.state)

        self.ror_history = np.full(len(data) - window, 0.)
        self.history = np.full(len(data) - window, 0.)
        self.r_actions = self.clf.predict(self.state[:, [0, 2, 3]])
        # print(self.r_actions)

        for i in range(window, len(data)):
            portfolio = self.cash + data.iloc[i] * self.shares
            # print(portfolio, '|', self.cash, '|', self.shares)
            self.one_step(data.iloc[i], debug, i, window)
            self.ror_history[
                i - window] = self.history[i - window] / self.invested - 1.

        # self.ror_history = (self.history - self.invested) / self.invested
        # [print(r) for r in self.ror_history]
        self.score = my_score(self.ror_history, states.bench)
    def invest(self, data, window=30, debug=False):

        if len(data.keys()) == 0:
            return

        data.fillna(method="bfill", inplace=True)
        states = State(window, data)

        self.state = np.array(
            [states.get_state(i) for i in range(window, len(data))])
        np.set_printoptions(suppress=True)

        self.ror_history = np.full(len(data) - window, 0.)
        self.history = np.full(len(data) - window, 0.)

        self.r_actions = np.empty((len(data) - window, 3))
        self.r_actions[:, 0] = self.clf_hold.predict(self.state[:, [0, 2, 3]])
        self.r_actions[:, 1] = self.clf_sell.predict(self.state[:, [0, 2, 3]])
        self.r_actions[:, 2] = self.clf_buy.predict(self.state[:, [0, 2, 3]])
        # [print(a) for a in self.r_actions]
        self.r_actions = np.argmax(self.r_actions, axis=1)

        for i in range(window, len(data)):
            # portfolio = self.cash + data.iloc[i] * self.shares
            # print(portfolio, '|', self.cash, '|', self.shares)
            self.one_step(data.iloc[i], debug, i, window)
            self.ror_history[
                i - window] = self.history[i - window] / self.invested - 1.

        # self.ror_history = (self.history - self.invested) / self.invested
        # [print(r) for r in self.ror_history]
        self.score = my_score(self.ror_history, states.bench)
def best_of_best(model_dir='best_models_btc_eur/', debug=False):
    # start_date = '2017-10-01'
    # end_date = '2018-05-01'
    start_date = '2018-07-01'
    end_date = '2018-09-14'

    ticket = 'BTC-EUR'
    best_dir = model_dir

    data, max_agent, best_ror_agent = find_best_models(start_date,
                                                       end_date,
                                                       ticket=ticket,
                                                       dir_models=best_dir,
                                                       debug=debug)
    data = data[ticket].reset_index(drop=True).fillna(method="bfill")
    print(data.head(2))
    print(data.tail(2))

    window = 30
    states = State(window, data)
    print(states.bench[0])
    print(states.bench[1])
    print(states.bench[-2])
    print(states.bench[-1])
    print(len(data) - window)

    plt.plot(states.bench[window:], color='black')
    plt.plot(max_agent.ror_history, color='red')
    plt.plot(best_ror_agent.ror_history, color='blue')
    plt.legend(['bench', max_agent.model, best_ror_agent.model])
    plt.show()

    print('best(score):', max_agent.model)
    print('ror:', max_agent.ror_history[-1])
    print('portfolio:', max_agent.history[-1])

    print('best(ror):', best_ror_agent.model)
    print('ror:', best_ror_agent.ror_history[-1])
    print('portfolio:', best_ror_agent.history[-1])
Beispiel #4
0
    import sys

    sys.path.insert(0, '../../../etf_data')
    from etf_data_loader import load_all_data_from_file2

    start_date = '2017-05-01'
    end_date = '2018-05-01'

    ticket = 'BTC-EUR'
    window = 30
    data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date)
    print(start_date, ' - ', end_date)
    data = data[ticket].reset_index(drop=True).fillna(method="bfill")
    print(data.head(2))
    print(data.tail(2))
    print(len(data))

    states = State(window, data)

    t1 = time.time()
    for _ in range(100):
        agent = CryptoRandomAgent(ticket, data, states,
                                  r_actions=np.random.randint(0, 3, size=len(data) - states.window),
                                  coef=0.5)
        agent.run()
    print('time:', time.time() - t1)

    print(len(agent.state))
    print(len(agent.r_actions))
Beispiel #5
0
def run_agent():
    start_date = '2011-01-01'
    end_date = '2018-09-14'

    ticket = 'BTC-EUR'

    data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date,
                                    end_date)
    data = data[data['date'] > str(start_date)]
    data = data[data['date'] < str(end_date)]

    print(start_date, " - ", end_date, " ,len = ", len(data))
    data = data[data[ticket] > 0.]
    data = data.reindex(method='bfill')
    data.reset_index(inplace=True)
    data = data[ticket]

    print(data)

    window = 30
    states = State(window, data)
    print(states.bench[0])
    print(states.bench[1])
    print(states.bench[-2])
    print(states.bench[-1])
    print(len(data) - window)
    learning_rate = 0.1
    # model = policy.create_lstm_model(learning_rate)
    model = policy.create_dense_model(learning_rate)
    x = states.get_whole_state()[window:]
    # x = np.reshape(x, (x.shape[0], 1, x.shape[1]))
    rors = []
    losses = []
    discos = []
    print()
    max_ror = None

    for it in range(1000):
        predicted_action_proba = model.predict(x)
        actions = np.empty(predicted_action_proba.shape[0], dtype=np.int32)

        for i in range(predicted_action_proba.shape[0]):
            actions[i] = np.random.choice(3, 1, p=predicted_action_proba[i])

        agent_evaluator = policy_evaluator.Agent(actions)
        agent_evaluator.run(data, states)

        rors.append(agent_evaluator.ror_history[-1])

        if max_ror is None or max_ror < rors[it]:
            max_ror = rors[it]
            print('saving at ror:', rors[it])
            model.save_weights('weights_temp.h5', overwrite=True)

        disco = agent_evaluator.disco_rewards - np.mean(
            agent_evaluator.disco_rewards)
        disco = disco / np.std(agent_evaluator.disco_rewards)
        # disco = np.reshape(disco, (disco.shape[0], 1))
        discos.append(disco[-1])
        # y = predicted_action_proba + learning_rate * disco

        y = np.zeros_like(predicted_action_proba)
        for i in range(predicted_action_proba.shape[0]):
            y[i][actions[i]] = disco[i]

        loss = model.fit(x,
                         y,
                         nb_epoch=10,
                         verbose=0,
                         shuffle=True,
                         validation_split=0.3)
        losses.append(loss.history['loss'])
        print('\r[%d] %f | %f | %f | %f' %
              (it, rors[it], losses[it][-1], agent_evaluator.rewards[-1],
               disco[-1]),
              end='')

        # if loss.history['loss'][-1] <= 0. or np.isnan(loss.history['loss'][-1]):
        #     break
        if np.isnan(loss.history['loss'][-1]):
            print('loading model...')
            model.load_weights('weights_temp.h5')

    model.save_weights('weights.h5', overwrite=True)

    _, ax = plt.subplots(3, 1)
    ax[0].plot(rors)
    ax[0].set_title('rors')
    ax[1].plot(losses)
    ax[1].set_title('loss')
    ax[2].plot(discos)
    ax[2].set_title('disco')

    plt.show()
Beispiel #6
0
def run_agent():
    # start_date = '2011-01-01'
    start_date = '2018-01-01'
    end_date = '2018-09-14'

    ticket = 'BTC-EUR'

    data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date,
                                    end_date)
    data = data[data['date'] > str(start_date)]
    data = data[data['date'] < str(end_date)]

    print(start_date, " - ", end_date, " ,len = ", len(data))
    data = data[data[ticket] > 0.]
    data = data.reindex(method='bfill')
    data.reset_index(inplace=True)
    data = data[ticket]

    window = 30
    learning_rate = 0.001

    timesteps = 7

    model = policy.create_lstm_model(learning_rate, timesteps)
    # model = policy.create_dense_model(learning_rate)
    # x = np.reshape(x, (x.shape[0], 1, x.shape[1]))
    all_rors = []
    all_losses = []
    all_discos = []
    print()
    print(
        '[episode][it/max it] ror | loss | reward | expected_reward | action')
    actions = {0: 'hold', 1: 'sell', 2: 'buy'}
    states = State(window, data)
    for episode in range(10):
        input = []
        labels = []
        losses = []
        discos = []
        rors = []

        for t in range(window + timesteps, len(data)):
            agent_evaluator = cont_policy_evaluator.RecordingAgent(
                data, states)
            # x = states.get_state(t)
            x = states.get_partial_state(t, timesteps)
            # lstm
            x = x.reshape((1, timesteps, 5))
            input.append(x)
            x = np.array(input).reshape((len(input), timesteps, 5))
            # dense input
            # x = x.reshape((1, 5))
            predicted_action_proba = model.predict(x)
            runs = predicted_action_proba.shape[0] - 1
            for run in range(predicted_action_proba.shape[0]):
                action = np.random.choice(3, 1,
                                          p=predicted_action_proba[run])[0]
                agent_evaluator.run(action, t - runs + run)
                # print(run, '|', action, '|', agent_evaluator.rewards[t - window-runs+run])

            index = t - window

            rors.append(agent_evaluator.ror_history[index])
            discos.append(agent_evaluator.disco_rewards[-1])

            # y = predicted_action_proba + learning_rate * agent_evaluator.disco_rewards
            y = predicted_action_proba * agent_evaluator.disco_rewards
            # print(y.shape)
            # labels.append(y.reshape((3,)))
            # y = np.array(labels)

            loss = model.fit(x,
                             y,
                             nb_epoch=1,
                             verbose=0,
                             shuffle=True,
                             validation_split=0.3)

            if 'loss' in loss.history.keys():
                losses.append(loss.history['loss'])
                print('\r[%d][%d/%d] %f | %f | %f | %f | %s' %
                      (episode, t, len(data), rors[-1], losses[-1][-1],
                       np.mean(agent_evaluator.rewards),
                       agent_evaluator.disco_rewards[-1], actions[action]),
                      end='')
        all_losses.append(losses)
        all_discos.append(discos)
        all_rors.append(rors)

    model.save_weights('weights.h5', overwrite=True)

    _, ax = plt.subplots(3, 1)
    for ii in range(len(all_rors)):
        ax[0].plot(all_rors[ii], label=str(ii))
    ax[0].set_title('rors')
    for ii in range(len(all_losses)):
        ax[1].plot(all_losses[ii], label=str(ii))
    ax[1].set_title('loss')
    for ii in range(len(all_discos)):
        ax[2].plot(all_discos[ii], label=str(ii))
    ax[2].set_title('expected_reward')
    for axis in ax:
        axis.legend()

    plt.show()
Beispiel #7
0
def run_agent():
    start_date = '2018-04-01'
    end_date = '2018-09-14'
    # start_date = '2011-01-01'
    # end_date = '2018-04-01'

    ticket = 'BTC-EUR'

    data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date,
                                    end_date)
    data = data[data['date'] > str(start_date)]
    data = data[data['date'] < str(end_date)]

    print(start_date, " - ", end_date, " ,len = ", len(data))
    data = data[data[ticket] > 0.]
    data = data.reindex(method='bfill')
    data.reset_index(inplace=True)
    data = data[ticket]

    print(data)

    window = 30
    states = State(window, data)
    print(states.bench[0])
    print(states.bench[1])
    print(states.bench[-2])
    print(states.bench[-1])
    print(len(data) - window)

    # model = policy.create_dense_model(0.001)
    timesteps = 7
    model = policy.create_lstm_model(0.001, timesteps)
    model.load_weights('weights.h5')
    # x = states.get_whole_state()[window:]
    # x = np.reshape(x, (x.shape[0], timesteps, x.shape[1]))
    input = []
    for t in range(window + timesteps, len(data)):
        x = states.get_partial_state(t, timesteps)
        x = x.reshape((1, timesteps, 5))
        input.append(x)

    x = np.array(input).reshape((len(input), timesteps, 5))

    predicted_action_proba = model.predict(x)
    print(predicted_action_proba.shape)
    print(x.shape)
    # actions = np.empty(predicted_action_proba.shape[0], dtype=np.int32)
    agent_evaluator = cont_policy_evaluator.RecordingAgent(data, states)
    # for i in range(predicted_action_proba.shape[0]):
    #     actions[i] = np.random.choice(3, 1, p=predicted_action_proba[i])
    for run in range(predicted_action_proba.shape[0]):
        action = np.random.choice(3, 1, p=predicted_action_proba[run])[0]
        agent_evaluator.run(action, window + run)

    # agent_evaluator = policy_evaluator.Agent(actions)
    # agent_evaluator.run(data, states)

    print(agent_evaluator.ror_history[-1])

    plt.plot(agent_evaluator.ror_history)
    plt.title('ror')
    plt.show()

    _, ax = plt.subplots(2, 1)
    ax[0].plot(agent_evaluator.rewards)
    ax[1].plot(agent_evaluator.disco_rewards)
    plt.show()
from etf_data_loader import load_all_data_from_file2

print('Loading price data...')
start_date = '2018-01-01'
end_date = '2018-05-01'

ticket = 'BTC-EUR'

data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date,
                                end_date)
print(start_date, ' - ', end_date)
data = data[[ticket]]
data = data.reset_index(drop=True)
data.fillna(method="bfill", inplace=True)

state = State(30, data)
print(state.get_state(10))

print(data.head(2))
print(data.tail(2))
print(len(data))
plt.plot(data)
plt.show()

action_map = {1: 'S', 2: 'B', 0: 'H'}

print('Loading data....')
x = np.load(
    '/home/martin/Projects/AI_playground/reinforcement_learning/crypto_market/data_ga_periodic/x_back.npy'
)
y = np.load(