class TestEnv(TestCase): def test_sell(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') sell_mount = self.env.step(action='sell', mount=1) assert sell_mount == {'fail': 0} self.env.step(action='buy', mount=1) sell_mount = self.env.step(action='sell', mount=1) assert sell_mount == {'success': 249999.98500000002} assert self.env.stock_balance == 0 assert self.env.balance == 249999.98500000002 def test_buy(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') buy_mount = self.env.step(action='buy', mount=1) assert buy_mount == {'success': 249999.987} assert self.env.stock_balance == 1 assert self.env.balance == 249888.668 def test_stay(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') stay_mount = self.env.step(action='stay', mount=1) assert stay_mount == {'success': 250000.0} assert self.env.stock_balance == 0 assert self.env.balance == 250000 def test_reset(self): self.env = Env( balance=250000, FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle') self.env.step(action='buy', mount=1) self.env.step(action='sell', mount=1) self.env.reset() assert self.env.stock_balance == 0 assert self.env.balance == 250000
if action == 0: real_action = 0 elif action == 1: real_action = 3 elif action == 2: real_action = 4 elif action == 3: real_action = 7 elif action == 4: real_action = 8 else: real_action = 11 # 선택한 행동으로 환경에서 한 타임스텝 진행 observe, reward, done, clear, max_x, timeout, now_x = \ env.step(real_action) if now_x >= 8776: reward += 200 done = True if done and now_x < 8776: reward = -30 reward /= 100 # reward = np.clip(reward, -1., 1.) print(now_x) next_state = pre_processing(observe) next_state = np.reshape([next_state], (1, 84, 84, 1)) next_history = np.append(next_state, history[:, :, :, :3], axis=3)