def test_stay(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') stay_mount = self.env.step(action='stay', mount=1) assert stay_mount == {'success': 250000.0} assert self.env.stock_balance == 0 assert self.env.balance == 250000
def test_buy(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') buy_mount = self.env.step(action='buy', mount=1) assert buy_mount == {'success': 249999.987} assert self.env.stock_balance == 1 assert self.env.balance == 249888.668
def test_reset(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') self.env.step(action='buy', mount=1) self.env.step(action='sell', mount=1) self.env.reset() assert self.env.stock_balance == 0 assert self.env.balance == 250000
def test_sell(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') sell_mount = self.env.step(action='sell', mount=1) assert sell_mount == {'fail': 0} self.env.step(action='buy', mount=1) sell_mount = self.env.step(action='sell', mount=1) assert sell_mount == {'success': 249999.98500000002} assert self.env.stock_balance == 0 assert self.env.balance == 249999.98500000002
def test_evaluate(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') self.agent = Agent() self.agent.model.compile(optimizer=Adam(), loss="mse") state = (self.env.balance, self.env.stock_balance) y = self.agent.evaluate(state=state) assert y.shape == (3, ) assert any(y) is True
def test_train(self): env = Env(balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') agent = Agent(input_data_shape=(10, )) mount_agent = Agent(actions=10, input_data_shape=(10, )) print(len(env.fx_time_data_buy)) trainer = Trainer_priority(env, agent, mount_agent, data_end_index=len(env.fx_time_data_buy) - 2) trainer.train()
def test_act(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') self.agent = Agent() self.agent.model.compile(optimizer=Adam(), loss="mse") state = (self.env.balance, self.env.stock_balance) action = self.agent.act(state, epsilon=0.1) action_state = False if action == 0 or action == 1 or action == 2: action_state = True assert action_state is True
def main(): env = Env() play_pipe, predict_pipe = Pipe() train_pipe1, train_pipe2 = Pipe() is_training = Value("b", True) manager = DQNManager(env.state_n, env.action_n, train_pipe1, predict_pipe, is_training) controller = AIControl(env, train_pipe2, play_pipe, is_training) manager.start() controller.control_start() manager.join()
def main(): parser = argparse.ArgumentParser( description='Execute train reinforcement learning.') parser.add_argument( '--dataset_name', type=str, default="../data/raw/FX_Demo/sample10000_USD_JPY_S5.pickle", help='an integer for the accumulator') args = parser.parse_args() print(args.dataset_name) env = Env(balance=250000, FX_DATA_FILE=args.dataset_name) agent = Agent(input_data_shape=(10, )) mount_agent = Agent(actions=10, input_data_shape=(10, )) trainer = Trainer(env, agent, mount_agent, Adam(lr=1e-6), data_end_index=len(env.fx_time_data_buy) - 2) trainer.train()
def main(): env = Env() controller = AIControl(env) controller.control_start()
class TestEnv(TestCase): def test_sell(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') sell_mount = self.env.step(action='sell', mount=1) assert sell_mount == {'fail': 0} self.env.step(action='buy', mount=1) sell_mount = self.env.step(action='sell', mount=1) assert sell_mount == {'success': 249999.98500000002} assert self.env.stock_balance == 0 assert self.env.balance == 249999.98500000002 def test_buy(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') buy_mount = self.env.step(action='buy', mount=1) assert buy_mount == {'success': 249999.987} assert self.env.stock_balance == 1 assert self.env.balance == 249888.668 def test_stay(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') stay_mount = self.env.step(action='stay', mount=1) assert stay_mount == {'success': 250000.0} assert self.env.stock_balance == 0 assert self.env.balance == 250000 def test_reset(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') self.env.step(action='buy', mount=1) self.env.step(action='sell', mount=1) self.env.reset() assert self.env.stock_balance == 0 assert self.env.balance == 250000
else: q_value = self.model.predict(history) return np.argmax(q_value[0]) def load_model(self, filename): self.model.load_weights(filename) def pre_processing(observe): processed_observe = np.uint8( resize(rgb2gray(observe), (84, 84), mode='constant') * 255) return processed_observe if __name__ == "__main__": env = Env() agent = TestAgent(action_size=6, env=env) agent.load_model("./save_model/supermario_per.h5") for e in range(EPISODES): done = False max_x = 0 now_x = 0 hold_frame = 0 before_max_x = 200 start_position = 500 step, score = 0, 0 observe = env.reset(start_position=start_position) state = pre_processing(observe)