コード例 #1
0
class TestEnv(TestCase):
    def test_sell(self):
        self.env = Env(
            balance=250000,
            FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle')
        sell_mount = self.env.step(action='sell', mount=1)
        assert sell_mount == {'fail': 0}
        self.env.step(action='buy', mount=1)
        sell_mount = self.env.step(action='sell', mount=1)
        assert sell_mount == {'success': 249999.98500000002}
        assert self.env.stock_balance == 0
        assert self.env.balance == 249999.98500000002

    def test_buy(self):
        self.env = Env(
            balance=250000,
            FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle')
        buy_mount = self.env.step(action='buy', mount=1)
        assert buy_mount == {'success': 249999.987}
        assert self.env.stock_balance == 1
        assert self.env.balance == 249888.668

    def test_stay(self):
        self.env = Env(
            balance=250000,
            FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle')
        stay_mount = self.env.step(action='stay', mount=1)
        assert stay_mount == {'success': 250000.0}
        assert self.env.stock_balance == 0
        assert self.env.balance == 250000

    def test_reset(self):
        self.env = Env(
            balance=250000,
            FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle')
        self.env.step(action='buy', mount=1)
        self.env.step(action='sell', mount=1)
        self.env.reset()
        assert self.env.stock_balance == 0
        assert self.env.balance == 250000
コード例 #2
0
            if action == 0:
                real_action = 0
            elif action == 1:
                real_action = 3
            elif action == 2:
                real_action = 4
            elif action == 3:
                real_action = 7
            elif action == 4:
                real_action = 8
            else:
                real_action = 11

            # 선택한 행동으로 환경에서 한 타임스텝 진행
            observe, reward, done, clear, max_x, timeout, now_x = \
                env.step(real_action)

            if now_x >= 8776:
                reward += 200
                done = True

            if done and now_x < 8776:
                reward = -30

            reward /= 100
            # reward = np.clip(reward, -1., 1.)
            print(now_x)
            next_state = pre_processing(observe)
            next_state = np.reshape([next_state], (1, 84, 84, 1))
            next_history = np.append(next_state, history[:, :, :, :3], axis=3)