def main(): env = BTgymEnv( filename='./data/DAT_ASCII_EURUSD_M1_2016.csv', state_shape={ 'raw_state': spaces.Box(low=-100, high=100, shape=(4, 4)) }, skip_frame=5, start_cash=100000, broker_commission=0.02, fixed_stake=100, drawdown_call=90, render_ylabel='Price Lines', render_size_episode=(12, 8), render_size_human=(8, 3.5), render_size_state=(10, 3.5), render_dpi=75, verbose=0, ) gamma = 0.9 epsilon = .95 trials = 100 trial_len = 1000 # updateTargetNetwork = 1000 dqn_agent = DQN(env=env) steps = [] for trial in range(trials): #dqn_agent.model= load_model("./model.model") cur_state = np.array(list(env.reset().items())[0][1]) cur_state = np.reshape(cur_state, (4, 4, 1)) for step in range(trial_len): action = dqn_agent.act(cur_state) new_state, reward, done, _ = env.step(action) reward = reward * 10 if not done else -10 new_state = list(new_state.items())[0][1] new_state = np.reshape(new_state, (4, 4, 1)) dqn_agent.target_train() # iterates target model cur_state = new_state if done: break print("Completed trial #{} ".format(trial)) dqn_agent.render_all_modes(env) dqn_agent.save_model("model.model".format(trial))
def __init__(self, dir): self.dir = dir self.config = json.load(open('{}/config.json'.format(dir))) self.sample_batch_size = 32 self.episodes = 10000 self.time_steps = 30 self.features = 8 self.input_shape = (30, 4) self.monitor = Monitor(dir) self.env = BTgymEnv( filename=self.config['data'], episode_duration={ 'days': 1, 'hours': 0, 'minutes': 0 }, strategy=MyStrategy, start_00=True, start_cash=self.config['capital'], broker_commission=self.config['commission'], fixed_stake=self.config['stake'], #drawdown_call=50, state_shape={ 'raw_state': spaces.Box(low=1, high=2, shape=self.input_shape), 'indicator_states': spaces.Box(low=-1, high=10, shape=self.input_shape) }, port=5006, data_port=4804, verbose=0, ) self.state_size = self.env.observation_space.shape['raw_state'][ 0] + self.env.observation_space.shape['indicator_states'][0] self.action_size = self.env.action_space.n self.agent = Agent(self.state_size, self.action_size, dir) print("Engine:{}".format(self.env.engine.broker.getcash()))
def main(filename): env = BTgymEnv(filename=filename, state_shape={'raw': spaces.Box(low=-100, high=100, shape=(30, 4))}, skip_frame=5, start_cash=100000, broker_commission=0.02, fixed_stake=100, connect_timeout=180, drawdown_call=90, render_state_as_image=True, render_ylabel='Price Lines', render_size_episode=(12, 8), render_size_human=(8, 3.5), render_size_state=(10, 3.5), render_dpi=75, multiprocessing=1, port=5000, data_port=4999, verbose=0, ) env.reset() # <=== CORRECTED HERE: fake reset() tells data_master to start data_server_process gamma = 0.9 epsilon = .95 trials = 10 trial_len = 1000 # updateTargetNetwork = 1000 dqn_agent = DQN(env=env) steps = [] for trial in range(trials): # dqn_agent.model= load_model("./model.model") cur_state = np.array(list(env.reset().items())[0][1]) cur_state = np.reshape(cur_state, (30, 4, 1)) for step in range(trial_len): action = dqn_agent.act(cur_state) new_state, reward, done, _ = env.step(action) reward = reward * 10 if not done else -10 new_state = list(new_state.items())[0][1] new_state = np.reshape(new_state, (30, 4, 1)) dqn_agent.target_train() # iterates target model cur_state = new_state if done: break print("Completed trial #{} ".format(trial)) dqn_agent.render_all_modes(env) dqn_agent.save_model("model.model".format(trial))
def testInitial(self): with open("log.txt", "a") as myfile: env = BTgymEnv( filename='./btgym/examples/data/DAT_ASCII_EURUSD_M1_2016.csv', start_cash=100, broker_commission=0.0001, leverage=10.0, fixed_stake=10) done = False o = env.reset() while not done: action = env.action_space.sample() obs, reward, done, info = env.step(action) myfile.write('action: {},reward: {},info: {}\n'.format( action, reward, info)) env.close()
render_size_episode=(12,8), render_size_human=(8, 3.5), render_size_state=(10, 3.5), render_dpi=75, verbose=0, ) ''' env = BTgymEnv( filename='./data/sh601318.csv', start_weekdays={0, 1, 2, 3, 4}, episode_duration={ 'days': 90, 'hours': 0, 'minutes': 0 }, # Want to start every episode at the begiining of the day: start_00=True, time_gap={'days': 1}, start_cash=100, render_ylabel='Price Lines', render_size_episode=(12, 8), render_size_human=(8, 3.5), render_size_state=(10, 3.5), render_dpi=75, verbose=1, ) o = env.reset() take_some_steps(env, 10000) render_all_modes(env)
def __init__(self): self.MyCerebro = bt.Cerebro() self.MyCerebro.addstrategy( DevStrat_4_11, start_cash=2000, # initial broker cash commission=0.0001, # commisssion to imitate spread leverage=10.0, order_size=2000, # fixed stake, mind leverage drawdown_call=10, # max % to loose, in percent of initial cash target_call=10, # max % to win, same skip_frame=10, gamma=0.99, reward_scale=7, # gardient`s nitrox, touch with care! state_ext_scale=np.linspace(3e3, 1e3, num=5)) self.MyCerebro.addobserver(Reward) self.MyCerebro.addobserver(Position) self.MyCerebro.addobserver(NormPnL) self.MyDataset = BTgymDataset( filename= "/Users/bluecharles/Desktop/data/DAT_ASCII_EURUSD_M1_2016.csv", start_weekdays=[0, 1, 2, 3, 4], start_00=True, episode_duration={ 'days': 0, 'hours': 23, 'minutes': 55 }, time_gap={'hours': 5}, ) self._env = BTgymEnv( dataset=self.MyDataset, engine=self.MyCerebro, port=5555, render_enabled=False, verbose=0, ) time_dim = 30 avg_period = 20 self.observation_space = spaces.Dict({ 'external': spaces.Box(low=-100, high=100, shape=(time_dim, 1, 5), dtype=np.float32), 'internal': spaces.Box(low=-2, high=2, shape=(avg_period, 1, 6), dtype=np.float32), 'metadata': spaces.Dict({ 'type': spaces.Box(shape=(), low=0, high=1, dtype=np.uint32), 'trial_num': spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32), 'trial_type': spaces.Box(shape=(), low=0, high=1, dtype=np.uint32), 'sample_num': spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32), 'first_row': spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32), 'timestamp': spaces.Box(shape=(), low=0, high=np.finfo(np.float64).max, dtype=np.float64), }) }) self.action_space = spaces.Discrete(4)
env = BTgymEnv( filename='/home/aaron8tang/projects/btgym/examples/data/sh601318.csv', parsing_params=dict(header=None, ), trial_params=dict( start_weekdays={0, 1, 2, 3, 4, 5, 6}, sample_duration={ 'days': 5, 'hours': 0, 'minutes': 0 }, start_00=False, time_gap={ 'days': 1, 'hours': 0 }, test_period={ 'days': 5, 'hours': 0, 'minutes': 0 }, ), episode_params=dict( start_weekdays={0, 1, 2, 3, 4, 5, 6}, sample_duration={ 'days': 5, 'hours': 0, 'minutes': 0 }, start_00=False, time_gap={ 'days': 1, 'hours': 0 }, ), verbose=1, )
def __init__(self): self.nb_steps = 1000 self.episode_length = 1000 self.learning_rate = 0.02 self.nb_directions = 16 self.nb_best_directions = 16 assert self.nb_best_directions <= self.nb_directions self.noise = 0.03 self.seed = 1 self.env_name = BTgymEnv( filename='DAT_ASCII_EURUSD_M1_2016.csv', # This param is the only one changed: state_shape={ 'raw_state': spaces.Box( shape=(30, 4), low=-100, high=100, dtype=np.float32, ), 'metadata': DictSpace( { 'type': spaces.Box( shape=(), low=0, high=1, dtype=np.uint32 ), 'trial_num': spaces.Box( shape=(), low=0, high=10 ** 10, dtype=np.uint32 ), 'trial_type': spaces.Box( shape=(), low=0, high=1, dtype=np.uint32 ), 'sample_num': spaces.Box( shape=(), low=0, high=10 ** 10, dtype=np.uint32 ), 'first_row': spaces.Box( shape=(), low=0, high=10 ** 10, dtype=np.uint32 ), 'timestamp': spaces.Box( shape=(), low=0, high=np.finfo(np.float64).max, dtype=np.float64 ), } ) }, skip_frame=5, start_cash=100, render_ylabel='Price Lines', render_size_episode=(12,8), render_size_human=(8, 3.5), render_size_state=(10, 3.5), render_dpi=75, verbose=0, )
class Forex: def __init__(self, dir): self.dir = dir self.config = json.load(open('{}/config.json'.format(dir))) self.sample_batch_size = 32 self.episodes = 10000 self.time_steps = 30 self.features = 8 self.input_shape = (30, 4) self.monitor = Monitor(dir) self.env = BTgymEnv( filename=self.config['data'], episode_duration={ 'days': 1, 'hours': 0, 'minutes': 0 }, strategy=MyStrategy, start_00=True, start_cash=self.config['capital'], broker_commission=self.config['commission'], fixed_stake=self.config['stake'], #drawdown_call=50, state_shape={ 'raw_state': spaces.Box(low=1, high=2, shape=self.input_shape), 'indicator_states': spaces.Box(low=-1, high=10, shape=self.input_shape) }, port=5006, data_port=4804, verbose=0, ) self.state_size = self.env.observation_space.shape['raw_state'][ 0] + self.env.observation_space.shape['indicator_states'][0] self.action_size = self.env.action_space.n self.agent = Agent(self.state_size, self.action_size, dir) print("Engine:{}".format(self.env.engine.broker.getcash())) def run(self): #path = dict([(0, 2),(54, 3),(72, 2),(83, 3),(84, 1),(125, 3),(126, 2),(156, 3),(157, 1),(171, 3),(179, 1),(188, 3),(189, 2),(204, 3),(205, 1),(295, 3),(307, 2),(316, 3),(363, 2),(390, 3),(391, 1),(476, 3),(477, 2),(484, 3),(485, 1),(574, 3)]) try: episodes = 0 for index_episode in range(self.config['episodes'] - self.config['trained_episodes']): state = self.env.reset() # state = np.reshape(state['raw_state'], [4, self.state_size]) state = self.getFullState(state) #print("State:{}".format(state)) state = np.reshape(state, (1, self.time_steps, 8)) #print("Re State:{}".format(state)) # np.concatenate((state['raw_state'],state['indicator_states']),axis=1) done = False index = 0 final_cash = 0 message = "" count = 0 negativeReward = 0 positiveReward = 0 mapper = { 0: 0, 1: 0, 2: 0, 3: 0, } while not done: # self.env.render() action = self.agent.act(state) next_state, reward, done, info = self.env.step(action) mapper[action] += 1 if reward > 0: positiveReward += reward else: negativeReward += reward # next_state = np.reshape(next_state, [1, self.state_size]) # next_state = np.array(next_state['raw_state']) # print("Shape1 :{} Shape2 :{}".format(next_state['raw_state'].shape,next_state['indicator_states'].shape)) final_cash = info[0]['broker_cash'] message = info[0]['broker_message'] next_state = self.getFullState(next_state) next_state = np.reshape(next_state, (1, self.time_steps, 8)) #print("Action:{} Reward:{} Done:{}".format(action, reward,done)) self.agent.remember(state, action, reward, next_state, done) state = next_state index += 1 self.config['steps'] += info[0]['step'] self.monitor.logstep({ "reward": reward, "drawdown": info[0]['drawdown'], 'broker_value': info[0]['broker_value'], "steps": self.config['steps'] }) if self.config['steps'] % 100 == 0: self.monitor.logimage( feed_dict={ 'human': self.env.render('human')[None, :], }, global_step=self.config['steps'], ) # print('action: {},reward: {},info: {}\n'.format(action, reward, info)) episodes += 1 episode_stat = self.env.get_stat() self.monitor.logepisode({ "reward": reward, "cpu_time_sec": episode_stat['runtime'].total_seconds(), "global_step": self.config['trained_episodes'] + episodes, 'broker_value': info[0]['broker_value'], "episode": self.env.render('episode')[None, :] }) if "CLOSE, END OF DATA" == message: print("\x1b[6;30;42m{}\t{} {} \t{} {}\t\t{}\x1b[0m".format( time.strftime("%H:%M:%S"), index + 1, positiveReward, int(final_cash), mapper, message)) else: if positiveReward > 0: print("{}\t{} {} \t{} {}\t\t{}".format( time.strftime("%H:%M:%S"), index + 1, positiveReward, int(final_cash), mapper, message)) #print("{} {} \t{} {}\t\t{}".format(index + 1,positiveReward,int(final_cash),mapper,message)) self.agent.replay(self.sample_batch_size) finally: self.config['trained_episodes'] += episodes self.agent.save_model() self.monitor.close() with open('{}/config.json'.format(self.dir), 'w') as outfile: json.dump(self.config, outfile) def getFullState(self, state): return np.concatenate((state['raw_state'], state['indicator_states']), axis=1) def testInitial(self): with open("log.txt", "a") as myfile: env = BTgymEnv( filename='./btgym/examples/data/DAT_ASCII_EURUSD_M1_2016.csv', start_cash=100, broker_commission=0.0001, leverage=10.0, fixed_stake=10) done = False o = env.reset() while not done: action = env.action_space.sample() obs, reward, done, info = env.step(action) myfile.write('action: {},reward: {},info: {}\n'.format( action, reward, info)) env.close() def test(self): state = self.env.reset() # state = np.reshape(state['raw_state'], [4, self.state_size]) state = self.getFullState(state) state = np.reshape(state, (1, self.time_steps, 8)) done = False index = 0 final_cash = 0 message = "" while not done: #self.env.render() action = self.agent.act(state) next_state, reward, done, info = self.env.step(action) final_cash = info[0]['broker_cash'] message = info[0]['broker_message'] # next_state = np.reshape(next_state, [1, self.state_size]) # next_state = np.array(next_state['raw_state']) # print("Shape1 :{} Shape2 :{}".format(next_state['raw_state'].shape,next_state['indicator_states'].shape)) next_state = self.getFullState(next_state) next_state = np.reshape(next_state, (1, self.time_steps, 8)) state = next_state index += 1 print("\x1b[6;30;42m{}\t{} \t{} \t\t{}\x1b[0m".format( time.strftime("%H:%M:%S"), index + 1, final_cash, message))
from btgym import BTgymEnv env = BTgymEnv( filename='/root/btgym/examples/data/DAT_ASCII_EURUSD_M1_2016.csv') o = env.reset() for i in range(2): action = env.action_space.sample() # random action obs, reward, done, info = env.step(action) print('ACTION: {}\nREWARD: {}\nINFO: {}'.format(action, reward, info)) #print(info[0]['step'],info[0]['broker_value']) env.close()