def test_generator(self): env = TradeEnv() board = env.new_sec() print(board.current_time) print(board.q_value) board = env.new_sec() print(board.q_value) print(board.current_time) board = env.new_sec() print(board.q_value) print(board.current_time)
def __init__(self, buffer_size=BUFFER_SIZE): self.buffer_size = buffer_size self.experiences = deque(maxlen=self.buffer_size) self.q_values = deque(maxlen=self.buffer_size) self.reward = 0 self.start_time = 0 self.end_time = 0 self.loss = None self.total_reward = 0 self.duration = 0 self.episode_no = 0 self.env = TradeEnv()
def test_action1(self): env = TradeEnv() print(env.board.current_time) print(env.q_value) env.step(ACTION.NOP) print(env.board.current_time) print(env.q_value) env.step(ACTION.SELL_NOW) print(env.board.current_time) print(env.q_value) env.step(ACTION.BUY_NOW)
def test_buy(self): env = TradeEnv() env.action_buy()
def test_create(self): env = TradeEnv() self.assertTrue(env is not None)
def test_action_sell(self): env = TradeEnv() env.step(ACTION.SELL) print('1') env.step(ACTION.BUY) print('2') env.step(ACTION.SELL) print('3') env.step(ACTION.BUY) env.step(ACTION.SELL) env.step(ACTION.SELL) env.step(ACTION.SELL) env.step(ACTION.SELL) env.step(ACTION.SELL) env.step(ACTION.SELL) env.step(ACTION.SELL) env.step(ACTION.SELL) env.step(ACTION.SELL)
def test_action2(self): env = TradeEnv() env.step(ACTION.SELL) env.step(ACTION.BUY)
def test_close(self): env = TradeEnv() env.close()
def test_seed(self): env = TradeEnv() env.seed()
def test_step(self): env = TradeEnv() env.step(1)
def test_render(self): env = TradeEnv() env.render()
def test_reset(self): env = TradeEnv() env.reset()
def test_sell_now(self): env = TradeEnv() env.action_sell_now()
def test_buy_now(self): env = TradeEnv() env.action_buy_now()
def test_sell(self): env = TradeEnv() env.action_sell()
def test_new_episode(self): env = TradeEnv() env.new_episode()
def _init(): e = TradeEnv() e.seed(seed + rank) return e
class Trainer: def __init__(self, buffer_size=BUFFER_SIZE): self.buffer_size = buffer_size self.experiences = deque(maxlen=self.buffer_size) self.q_values = deque(maxlen=self.buffer_size) self.reward = 0 self.start_time = 0 self.end_time = 0 self.loss = None self.total_reward = 0 self.duration = 0 self.episode_no = 0 self.env = TradeEnv() def train(self, episode=NUM_OF_EPISODE): for i in range(episode): last_q_time = 0 self.episode_begin(i, None) self.env.reset() n_state, reward, done, info = self.env.step(ACTION.NOP) s = n_state while True: if not self.env.q_value: break if last_q_time != self.env.q_value.time: last_q_time = self.env.q_value.time a = self.env.q_value.get_best_action() if a != ACTION.NOP: print(self.env.q_value) n_state, reward, done, info = self.env.step(a) self.reward += reward if (n_state is not None) and (self.env.q_value is not None): q = QState(n_state, self.env.q_value) self.q_values.append(q) s = n_state if done: break self.episode_end(i, s) np.savez_compressed('/tmp/q_values.npz', self.q_values) states = np.array([q.s for q in self.q_values]) q_values = np.array([q.q.to_array() for q in self.q_values]) states = states.reshape(states.shape) q_values = q_values.reshape(q_values.shape) np.savez_compressed('/tmp/q_stats.npz', s=states, q=q_values) def learning(self): npz = np.load('/tmp/q_stats.npz') states = npz['s'] q_values = npz['q'] print('stateshape', states.shape) print('qvalueshape', q_values.shape) #reg = ImageRegressor(output_dim=5, seed=12314, max_trials=3) reg = self.create_image_regressor() reg.fit(states, q_values, validation_split=0.2, epochs=2) model = reg.export_model() print(type(model)) model.save('./auto_model.hd5') def create_image_regressor(self): input_node = ak.ImageInput() output_node = ak.ConvBlock()(input_node) output_node = ak.DenseBlock()(output_node) output_node = ak.RegressionHead()(output_node) reg = ak.AutoModel(inputs=input_node, outputs=output_node, max_trials=10) return reg def episode_begin(self, i: int, s): pass def episode_end(self, i: int, s): self.duration = float(self.end_time) - float(self.start_time) self.total_reward += self.reward s = '<- EPISODE END ({:5d}) TOTAL{:5.2f}'.format(i, self.total_reward) self.episode_no += 1