Beispiel #1
0
 def test_generator(self):
     env = TradeEnv()
     board = env.new_sec()
     print(board.current_time)
     print(board.q_value)
     board = env.new_sec()
     print(board.q_value)
     print(board.current_time)
     board = env.new_sec()
     print(board.q_value)
     print(board.current_time)
Beispiel #2
0
 def __init__(self, buffer_size=BUFFER_SIZE):
     self.buffer_size = buffer_size
     self.experiences = deque(maxlen=self.buffer_size)
     self.q_values = deque(maxlen=self.buffer_size)
     self.reward = 0
     self.start_time = 0
     self.end_time = 0
     self.loss = None
     self.total_reward = 0
     self.duration = 0
     self.episode_no = 0
     self.env = TradeEnv()
Beispiel #3
0
    def test_action1(self):
        env = TradeEnv()
        print(env.board.current_time)
        print(env.q_value)
        env.step(ACTION.NOP)

        print(env.board.current_time)
        print(env.q_value)
        env.step(ACTION.SELL_NOW)

        print(env.board.current_time)
        print(env.q_value)
        env.step(ACTION.BUY_NOW)
Beispiel #4
0
 def test_buy(self):
     env = TradeEnv()
     env.action_buy()
Beispiel #5
0
 def test_create(self):
     env = TradeEnv()
     self.assertTrue(env is not None)
Beispiel #6
0
    def test_action_sell(self):
        env = TradeEnv()

        env.step(ACTION.SELL)

        print('1')

        env.step(ACTION.BUY)

        print('2')
        env.step(ACTION.SELL)

        print('3')
        env.step(ACTION.BUY)
        env.step(ACTION.SELL)
        env.step(ACTION.SELL)
        env.step(ACTION.SELL)
        env.step(ACTION.SELL)
        env.step(ACTION.SELL)
        env.step(ACTION.SELL)
        env.step(ACTION.SELL)
        env.step(ACTION.SELL)
        env.step(ACTION.SELL)
Beispiel #7
0
 def test_action2(self):
     env = TradeEnv()
     env.step(ACTION.SELL)
     env.step(ACTION.BUY)
Beispiel #8
0
 def test_close(self):
     env = TradeEnv()
     env.close()
Beispiel #9
0
 def test_seed(self):
     env = TradeEnv()
     env.seed()
Beispiel #10
0
 def test_step(self):
     env = TradeEnv()
     env.step(1)
Beispiel #11
0
 def test_render(self):
     env = TradeEnv()
     env.render()
Beispiel #12
0
 def test_reset(self):
     env = TradeEnv()
     env.reset()
Beispiel #13
0
 def test_sell_now(self):
     env = TradeEnv()
     env.action_sell_now()
Beispiel #14
0
 def test_buy_now(self):
     env = TradeEnv()
     env.action_buy_now()
Beispiel #15
0
 def test_sell(self):
     env = TradeEnv()
     env.action_sell()
Beispiel #16
0
 def test_new_episode(self):
     env = TradeEnv()
     env.new_episode()
Beispiel #17
0
 def _init():
     e = TradeEnv()
     e.seed(seed + rank)
     return e
Beispiel #18
0
class Trainer:
    def __init__(self, buffer_size=BUFFER_SIZE):
        self.buffer_size = buffer_size
        self.experiences = deque(maxlen=self.buffer_size)
        self.q_values = deque(maxlen=self.buffer_size)
        self.reward = 0
        self.start_time = 0
        self.end_time = 0
        self.loss = None
        self.total_reward = 0
        self.duration = 0
        self.episode_no = 0
        self.env = TradeEnv()

    def train(self, episode=NUM_OF_EPISODE):
        for i in range(episode):
            last_q_time = 0

            self.episode_begin(i, None)
            self.env.reset()
            n_state, reward, done, info = self.env.step(ACTION.NOP)
            s = n_state

            while True:
                if not self.env.q_value:
                    break

                if last_q_time != self.env.q_value.time:
                    last_q_time = self.env.q_value.time

                a = self.env.q_value.get_best_action()

                if a != ACTION.NOP:
                    print(self.env.q_value)

                n_state, reward, done, info = self.env.step(a)
                self.reward += reward

                if (n_state is not None) and (self.env.q_value is not None):
                    q = QState(n_state, self.env.q_value)
                    self.q_values.append(q)

                s = n_state

                if done:
                    break

            self.episode_end(i, s)
            np.savez_compressed('/tmp/q_values.npz', self.q_values)

            states = np.array([q.s for q in self.q_values])
            q_values = np.array([q.q.to_array() for q in self.q_values])

            states = states.reshape(states.shape)
            q_values = q_values.reshape(q_values.shape)

            np.savez_compressed('/tmp/q_stats.npz', s=states, q=q_values)

    def learning(self):
        npz = np.load('/tmp/q_stats.npz')

        states = npz['s']
        q_values = npz['q']

        print('stateshape', states.shape)
        print('qvalueshape', q_values.shape)

        #reg = ImageRegressor(output_dim=5, seed=12314, max_trials=3)
        reg = self.create_image_regressor()

        reg.fit(states, q_values, validation_split=0.2, epochs=2)

        model = reg.export_model()
        print(type(model))
        model.save('./auto_model.hd5')

    def create_image_regressor(self):
        input_node = ak.ImageInput()
        output_node = ak.ConvBlock()(input_node)
        output_node = ak.DenseBlock()(output_node)
        output_node = ak.RegressionHead()(output_node)

        reg = ak.AutoModel(inputs=input_node,
                           outputs=output_node,
                           max_trials=10)

        return reg

    def episode_begin(self, i: int, s):
        pass

    def episode_end(self, i: int, s):
        self.duration = float(self.end_time) - float(self.start_time)
        self.total_reward += self.reward

        s = '<- EPISODE END ({:5d}) TOTAL{:5.2f}'.format(i, self.total_reward)
        self.episode_no += 1