observation = market.reset() while True: try: # TODO add callbacks? ## Agent vybiraet dejstvie # (candles=9(mb=>(2,4)?), tickers=4, trades=2) # TODO actions for multy symbols market action = agent.forward(observation) ## Execute action observation, reward, done, info = market.step([action]) ## Poluchaem otvet ot sredy agent.backward(reward, terminal=done) ## Esli dostigli konca if done: observation = market.reset() agent.reset_states() done = False log.info('Is terminal state. Reset..') log.info('='*40) log.info('Tick: {t} | {info}'.format( t=tickcount, info=info )) ## Check point if tickcount % 100 == 0:
class CenteringDqn: def __init__(self, **args): height, width = getsize() self.params = params self.params['width'] = width self.params['height'] = height self.params['num_training'] = args['numTraining'] self.params['load_file'] = params['load_file'] self._build_dqn_agent(self.params) if args['numTraining'] > 0: self._dqn.training = True else: self._dqn.training = False self.img = None self.zone = 0 self.reward = 0 self._dqn_action = None self.terminal = None self.accum_reward = 0 self._train() def _build_dqn_agent(self, params): NB_ACTIONS = 7 # ---------------------------------------------------------------------------------------------------------------- inputShape = (params['width'], params['height'], 3) model = Sequential() model.add( Conv2D(16, (3, 3), input_shape=inputShape, padding='same', activation='relu')) model.add(Conv2D(32, (3, 3), padding='same', activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2), padding='same')) model.add(NoisyNetDense(16, activation='linear')) model.add(Flatten()) model.add(NoisyNetDense(NB_ACTIONS, activation='linear')) model.summary() # ---------------------------------------------------------------------------------------------------------------- # Memory replay if not params['prio_memory']: print("Using Sequential memory") memory = SequentialMemory(limit=params['mem_size'], window_length=1) else: print("Using Prioritized memory") params['lr'] = params['lr'] / 4 memory = PrioritizedMemory(limit=params['mem_size'], alpha=0.6, start_beta=0.5, end_beta=1.0, steps_annealed=params['annealing'], window_length=1) # Epsilon Greedy policy, linearly decreasing if not params['noisy_layer']: print("Using Annealed Eps Greedy policy") self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=params['eps'], value_min=params['eps_final'], value_test=0.0, nb_steps=params['annealing']) # Or Greedy policy in case of noisy layers else: print("Using Q Greedy policy (with noisy layer)") self.policy = GreedyQPolicy() # Keras DQN agent self._dqn = DQNAgent( model=model, nb_actions=NB_ACTIONS, policy=self.policy, memory=memory, batch_size=params['batch_size'], processor=WindowProcessor(), enable_double_dqn=True, enable_dueling_network=True, nb_steps_warmup=params['train_start'], gamma=params['discount'], target_model_update=1000, train_interval=1, delta_clip=1., custom_model_objects={"NoisyNetDense": NoisyNetDense}) self._dqn.compile(Adam(lr=params['lr']), metrics=['mae']) if params['load_file']: print("file loaded") self._dqn.load_weights(params['load_file']) def _load_img(self): self.img, self.zone = getimageandzone() def _get_reward(self): tab = [[20, 10, 5, 0, -5, -10, -20], [5, 20, 10, 0, -5, -10, -20], [-5, 5, 20, 0, -5, -10, -20], [-20, -10, 5, 20, 5, -10, -20], [-20, -10, -5, 0, 20, 5, -5], [-20, -10, -5, 0, 10, 20, 5], [-20, -10, -5, 0, 5, 10, 20]] if self._dqn_action is not None: self.reward = tab[self.zone][self._dqn_action] def _train(self): self.terminal = False for i in range(0, self.params['num_training']): # generer une image et la zone correspondante self._load_img() if self._dqn_action is not None: # go backward self._dqn.backward(self.reward, self.terminal) # go forward self._dqn_action = self._dqn.forward(self.img) self._get_reward() self._dqn.step += 1 self.accum_reward = self.accum_reward + self.reward print("Setp : " + str(i) + " \treward : " + str(self.reward) + " \taccumrwd : " + str(self.accum_reward) + " \tzone : " + str(self.zone) + " \taction : " + str(self._dqn_action)) log_file = open("log.txt", 'a') log_file.write("Setp : " + str(i) + " \treward : " + str(self.reward) + " \taccumrwd : " + str(self.accum_reward) + " \tzone : " + str(self.zone) + " \taction : " + str(self._dqn_action) + "\n") # Si avant dernier tour de boucle passer terminal a true if i == self.params['num_training'] - 2: self.terminal = True self._dqn.save_weights(params['save_file']) if i % 1000 == 0: self._dqn.save_weights(params['save_file'] + str(i), True) self.accum_reward = 0 print("Model saved")