Exemple #1
0
class TestEnv(TestCase):
    def test_sell(self):
        self.env = Env(
            balance=250000,
            FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle')
        sell_mount = self.env.step(action='sell', mount=1)
        assert sell_mount == {'fail': 0}
        self.env.step(action='buy', mount=1)
        sell_mount = self.env.step(action='sell', mount=1)
        assert sell_mount == {'success': 249999.98500000002}
        assert self.env.stock_balance == 0
        assert self.env.balance == 249999.98500000002

    def test_buy(self):
        self.env = Env(
            balance=250000,
            FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle')
        buy_mount = self.env.step(action='buy', mount=1)
        assert buy_mount == {'success': 249999.987}
        assert self.env.stock_balance == 1
        assert self.env.balance == 249888.668

    def test_stay(self):
        self.env = Env(
            balance=250000,
            FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle')
        stay_mount = self.env.step(action='stay', mount=1)
        assert stay_mount == {'success': 250000.0}
        assert self.env.stock_balance == 0
        assert self.env.balance == 250000

    def test_reset(self):
        self.env = Env(
            balance=250000,
            FX_DATA_FILE='../data/raw/FX_Demo/sample_USD_JPY_S5.pickle')
        self.env.step(action='buy', mount=1)
        self.env.step(action='sell', mount=1)
        self.env.reset()
        assert self.env.stock_balance == 0
        assert self.env.balance == 250000
Exemple #2
0
if __name__ == "__main__":
    env = Env()
    agent = TestAgent(action_size=6, env=env)
    agent.load_model("./save_model/supermario_per.h5")

    for e in range(EPISODES):
        done = False
        max_x = 0
        now_x = 0
        hold_frame = 0
        before_max_x = 200

        start_position = 500
        step, score = 0, 0
        observe = env.reset(start_position=start_position)

        state = pre_processing(observe)
        history = np.stack((state, state, state, state), axis=2)
        history = np.reshape([history], (1, 84, 84, 4))

        action_count = 0
        real_action, action = 0, 0

        while not done:
            step += 1

            action = agent.get_action(history)
            if action == 0:
                real_action = 0
            elif action == 1: