def invest(self, data, window=30, debug=False): if len(data.keys()) == 0: return data.fillna(method="bfill", inplace=True) states = State(window, data) # print(data.head(2)) # print(data.tail(2)) # print(states.bench[0]) # print(states.bench[1]) # print(states.bench[-2]) # print(states.bench[-1]) self.state = np.array( [states.get_state(i) for i in range(window, len(data))]) np.set_printoptions(suppress=True) # print(self.state) self.ror_history = np.full(len(data) - window, 0.) self.history = np.full(len(data) - window, 0.) self.r_actions = self.clf.predict(self.state[:, [0, 2, 3]]) # print(self.r_actions) for i in range(window, len(data)): portfolio = self.cash + data.iloc[i] * self.shares # print(portfolio, '|', self.cash, '|', self.shares) self.one_step(data.iloc[i], debug, i, window) self.ror_history[ i - window] = self.history[i - window] / self.invested - 1. # self.ror_history = (self.history - self.invested) / self.invested # [print(r) for r in self.ror_history] self.score = my_score(self.ror_history, states.bench)
def invest(self, data, window=30, debug=False): if len(data.keys()) == 0: return data.fillna(method="bfill", inplace=True) states = State(window, data) self.state = np.array( [states.get_state(i) for i in range(window, len(data))]) np.set_printoptions(suppress=True) self.ror_history = np.full(len(data) - window, 0.) self.history = np.full(len(data) - window, 0.) self.r_actions = np.empty((len(data) - window, 3)) self.r_actions[:, 0] = self.clf_hold.predict(self.state[:, [0, 2, 3]]) self.r_actions[:, 1] = self.clf_sell.predict(self.state[:, [0, 2, 3]]) self.r_actions[:, 2] = self.clf_buy.predict(self.state[:, [0, 2, 3]]) # [print(a) for a in self.r_actions] self.r_actions = np.argmax(self.r_actions, axis=1) for i in range(window, len(data)): # portfolio = self.cash + data.iloc[i] * self.shares # print(portfolio, '|', self.cash, '|', self.shares) self.one_step(data.iloc[i], debug, i, window) self.ror_history[ i - window] = self.history[i - window] / self.invested - 1. # self.ror_history = (self.history - self.invested) / self.invested # [print(r) for r in self.ror_history] self.score = my_score(self.ror_history, states.bench)
def best_of_best(model_dir='best_models_btc_eur/', debug=False): # start_date = '2017-10-01' # end_date = '2018-05-01' start_date = '2018-07-01' end_date = '2018-09-14' ticket = 'BTC-EUR' best_dir = model_dir data, max_agent, best_ror_agent = find_best_models(start_date, end_date, ticket=ticket, dir_models=best_dir, debug=debug) data = data[ticket].reset_index(drop=True).fillna(method="bfill") print(data.head(2)) print(data.tail(2)) window = 30 states = State(window, data) print(states.bench[0]) print(states.bench[1]) print(states.bench[-2]) print(states.bench[-1]) print(len(data) - window) plt.plot(states.bench[window:], color='black') plt.plot(max_agent.ror_history, color='red') plt.plot(best_ror_agent.ror_history, color='blue') plt.legend(['bench', max_agent.model, best_ror_agent.model]) plt.show() print('best(score):', max_agent.model) print('ror:', max_agent.ror_history[-1]) print('portfolio:', max_agent.history[-1]) print('best(ror):', best_ror_agent.model) print('ror:', best_ror_agent.ror_history[-1]) print('portfolio:', best_ror_agent.history[-1])
import sys sys.path.insert(0, '../../../etf_data') from etf_data_loader import load_all_data_from_file2 start_date = '2017-05-01' end_date = '2018-05-01' ticket = 'BTC-EUR' window = 30 data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) print(start_date, ' - ', end_date) data = data[ticket].reset_index(drop=True).fillna(method="bfill") print(data.head(2)) print(data.tail(2)) print(len(data)) states = State(window, data) t1 = time.time() for _ in range(100): agent = CryptoRandomAgent(ticket, data, states, r_actions=np.random.randint(0, 3, size=len(data) - states.window), coef=0.5) agent.run() print('time:', time.time() - t1) print(len(agent.state)) print(len(agent.r_actions))
def run_agent(): start_date = '2011-01-01' end_date = '2018-09-14' ticket = 'BTC-EUR' data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) data = data[data['date'] > str(start_date)] data = data[data['date'] < str(end_date)] print(start_date, " - ", end_date, " ,len = ", len(data)) data = data[data[ticket] > 0.] data = data.reindex(method='bfill') data.reset_index(inplace=True) data = data[ticket] print(data) window = 30 states = State(window, data) print(states.bench[0]) print(states.bench[1]) print(states.bench[-2]) print(states.bench[-1]) print(len(data) - window) learning_rate = 0.1 # model = policy.create_lstm_model(learning_rate) model = policy.create_dense_model(learning_rate) x = states.get_whole_state()[window:] # x = np.reshape(x, (x.shape[0], 1, x.shape[1])) rors = [] losses = [] discos = [] print() max_ror = None for it in range(1000): predicted_action_proba = model.predict(x) actions = np.empty(predicted_action_proba.shape[0], dtype=np.int32) for i in range(predicted_action_proba.shape[0]): actions[i] = np.random.choice(3, 1, p=predicted_action_proba[i]) agent_evaluator = policy_evaluator.Agent(actions) agent_evaluator.run(data, states) rors.append(agent_evaluator.ror_history[-1]) if max_ror is None or max_ror < rors[it]: max_ror = rors[it] print('saving at ror:', rors[it]) model.save_weights('weights_temp.h5', overwrite=True) disco = agent_evaluator.disco_rewards - np.mean( agent_evaluator.disco_rewards) disco = disco / np.std(agent_evaluator.disco_rewards) # disco = np.reshape(disco, (disco.shape[0], 1)) discos.append(disco[-1]) # y = predicted_action_proba + learning_rate * disco y = np.zeros_like(predicted_action_proba) for i in range(predicted_action_proba.shape[0]): y[i][actions[i]] = disco[i] loss = model.fit(x, y, nb_epoch=10, verbose=0, shuffle=True, validation_split=0.3) losses.append(loss.history['loss']) print('\r[%d] %f | %f | %f | %f' % (it, rors[it], losses[it][-1], agent_evaluator.rewards[-1], disco[-1]), end='') # if loss.history['loss'][-1] <= 0. or np.isnan(loss.history['loss'][-1]): # break if np.isnan(loss.history['loss'][-1]): print('loading model...') model.load_weights('weights_temp.h5') model.save_weights('weights.h5', overwrite=True) _, ax = plt.subplots(3, 1) ax[0].plot(rors) ax[0].set_title('rors') ax[1].plot(losses) ax[1].set_title('loss') ax[2].plot(discos) ax[2].set_title('disco') plt.show()
def run_agent(): # start_date = '2011-01-01' start_date = '2018-01-01' end_date = '2018-09-14' ticket = 'BTC-EUR' data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) data = data[data['date'] > str(start_date)] data = data[data['date'] < str(end_date)] print(start_date, " - ", end_date, " ,len = ", len(data)) data = data[data[ticket] > 0.] data = data.reindex(method='bfill') data.reset_index(inplace=True) data = data[ticket] window = 30 learning_rate = 0.001 timesteps = 7 model = policy.create_lstm_model(learning_rate, timesteps) # model = policy.create_dense_model(learning_rate) # x = np.reshape(x, (x.shape[0], 1, x.shape[1])) all_rors = [] all_losses = [] all_discos = [] print() print( '[episode][it/max it] ror | loss | reward | expected_reward | action') actions = {0: 'hold', 1: 'sell', 2: 'buy'} states = State(window, data) for episode in range(10): input = [] labels = [] losses = [] discos = [] rors = [] for t in range(window + timesteps, len(data)): agent_evaluator = cont_policy_evaluator.RecordingAgent( data, states) # x = states.get_state(t) x = states.get_partial_state(t, timesteps) # lstm x = x.reshape((1, timesteps, 5)) input.append(x) x = np.array(input).reshape((len(input), timesteps, 5)) # dense input # x = x.reshape((1, 5)) predicted_action_proba = model.predict(x) runs = predicted_action_proba.shape[0] - 1 for run in range(predicted_action_proba.shape[0]): action = np.random.choice(3, 1, p=predicted_action_proba[run])[0] agent_evaluator.run(action, t - runs + run) # print(run, '|', action, '|', agent_evaluator.rewards[t - window-runs+run]) index = t - window rors.append(agent_evaluator.ror_history[index]) discos.append(agent_evaluator.disco_rewards[-1]) # y = predicted_action_proba + learning_rate * agent_evaluator.disco_rewards y = predicted_action_proba * agent_evaluator.disco_rewards # print(y.shape) # labels.append(y.reshape((3,))) # y = np.array(labels) loss = model.fit(x, y, nb_epoch=1, verbose=0, shuffle=True, validation_split=0.3) if 'loss' in loss.history.keys(): losses.append(loss.history['loss']) print('\r[%d][%d/%d] %f | %f | %f | %f | %s' % (episode, t, len(data), rors[-1], losses[-1][-1], np.mean(agent_evaluator.rewards), agent_evaluator.disco_rewards[-1], actions[action]), end='') all_losses.append(losses) all_discos.append(discos) all_rors.append(rors) model.save_weights('weights.h5', overwrite=True) _, ax = plt.subplots(3, 1) for ii in range(len(all_rors)): ax[0].plot(all_rors[ii], label=str(ii)) ax[0].set_title('rors') for ii in range(len(all_losses)): ax[1].plot(all_losses[ii], label=str(ii)) ax[1].set_title('loss') for ii in range(len(all_discos)): ax[2].plot(all_discos[ii], label=str(ii)) ax[2].set_title('expected_reward') for axis in ax: axis.legend() plt.show()
def run_agent(): start_date = '2018-04-01' end_date = '2018-09-14' # start_date = '2011-01-01' # end_date = '2018-04-01' ticket = 'BTC-EUR' data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) data = data[data['date'] > str(start_date)] data = data[data['date'] < str(end_date)] print(start_date, " - ", end_date, " ,len = ", len(data)) data = data[data[ticket] > 0.] data = data.reindex(method='bfill') data.reset_index(inplace=True) data = data[ticket] print(data) window = 30 states = State(window, data) print(states.bench[0]) print(states.bench[1]) print(states.bench[-2]) print(states.bench[-1]) print(len(data) - window) # model = policy.create_dense_model(0.001) timesteps = 7 model = policy.create_lstm_model(0.001, timesteps) model.load_weights('weights.h5') # x = states.get_whole_state()[window:] # x = np.reshape(x, (x.shape[0], timesteps, x.shape[1])) input = [] for t in range(window + timesteps, len(data)): x = states.get_partial_state(t, timesteps) x = x.reshape((1, timesteps, 5)) input.append(x) x = np.array(input).reshape((len(input), timesteps, 5)) predicted_action_proba = model.predict(x) print(predicted_action_proba.shape) print(x.shape) # actions = np.empty(predicted_action_proba.shape[0], dtype=np.int32) agent_evaluator = cont_policy_evaluator.RecordingAgent(data, states) # for i in range(predicted_action_proba.shape[0]): # actions[i] = np.random.choice(3, 1, p=predicted_action_proba[i]) for run in range(predicted_action_proba.shape[0]): action = np.random.choice(3, 1, p=predicted_action_proba[run])[0] agent_evaluator.run(action, window + run) # agent_evaluator = policy_evaluator.Agent(actions) # agent_evaluator.run(data, states) print(agent_evaluator.ror_history[-1]) plt.plot(agent_evaluator.ror_history) plt.title('ror') plt.show() _, ax = plt.subplots(2, 1) ax[0].plot(agent_evaluator.rewards) ax[1].plot(agent_evaluator.disco_rewards) plt.show()
from etf_data_loader import load_all_data_from_file2 print('Loading price data...') start_date = '2018-01-01' end_date = '2018-05-01' ticket = 'BTC-EUR' data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) print(start_date, ' - ', end_date) data = data[[ticket]] data = data.reset_index(drop=True) data.fillna(method="bfill", inplace=True) state = State(30, data) print(state.get_state(10)) print(data.head(2)) print(data.tail(2)) print(len(data)) plt.plot(data) plt.show() action_map = {1: 'S', 2: 'B', 0: 'H'} print('Loading data....') x = np.load( '/home/martin/Projects/AI_playground/reinforcement_learning/crypto_market/data_ga_periodic/x_back.npy' ) y = np.load(