observation = market.reset()

while True:
    try:
        # TODO add callbacks?

        ## Agent vybiraet dejstvie
        # (candles=9(mb=>(2,4)?), tickers=4, trades=2)
        # TODO actions for multy symbols market
        action = agent.forward(observation)

        ## Execute action
        observation, reward, done, info = market.step([action])

        ## Poluchaem otvet ot sredy
        agent.backward(reward, terminal=done)

        ## Esli dostigli konca
        if done:
            observation = market.reset()
            agent.reset_states()
            done = False
            log.info('Is terminal state. Reset..')
            log.info('='*40)
        
        log.info('Tick: {t} | {info}'.format(
                t=tickcount, info=info
        ))

        ## Check point
        if tickcount % 100 == 0:
Beispiel #2
0
class CenteringDqn:
    def __init__(self, **args):
        height, width = getsize()

        self.params = params
        self.params['width'] = width
        self.params['height'] = height
        self.params['num_training'] = args['numTraining']
        self.params['load_file'] = params['load_file']

        self._build_dqn_agent(self.params)

        if args['numTraining'] > 0:
            self._dqn.training = True
        else:
            self._dqn.training = False

        self.img = None
        self.zone = 0
        self.reward = 0
        self._dqn_action = None
        self.terminal = None
        self.accum_reward = 0

        self._train()

    def _build_dqn_agent(self, params):
        NB_ACTIONS = 7

        # ----------------------------------------------------------------------------------------------------------------
        inputShape = (params['width'], params['height'], 3)

        model = Sequential()
        model.add(
            Conv2D(16, (3, 3),
                   input_shape=inputShape,
                   padding='same',
                   activation='relu'))
        model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
        model.add(NoisyNetDense(16, activation='linear'))
        model.add(Flatten())
        model.add(NoisyNetDense(NB_ACTIONS, activation='linear'))

        model.summary()
        # ----------------------------------------------------------------------------------------------------------------

        # Memory replay
        if not params['prio_memory']:
            print("Using Sequential memory")
            memory = SequentialMemory(limit=params['mem_size'],
                                      window_length=1)
        else:
            print("Using Prioritized memory")
            params['lr'] = params['lr'] / 4
            memory = PrioritizedMemory(limit=params['mem_size'],
                                       alpha=0.6,
                                       start_beta=0.5,
                                       end_beta=1.0,
                                       steps_annealed=params['annealing'],
                                       window_length=1)

        # Epsilon Greedy policy, linearly decreasing
        if not params['noisy_layer']:
            print("Using Annealed Eps Greedy policy")
            self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                               attr='eps',
                                               value_max=params['eps'],
                                               value_min=params['eps_final'],
                                               value_test=0.0,
                                               nb_steps=params['annealing'])

        # Or Greedy policy in case of noisy layers
        else:
            print("Using Q Greedy policy (with noisy layer)")
            self.policy = GreedyQPolicy()

        # Keras DQN agent
        self._dqn = DQNAgent(
            model=model,
            nb_actions=NB_ACTIONS,
            policy=self.policy,
            memory=memory,
            batch_size=params['batch_size'],
            processor=WindowProcessor(),
            enable_double_dqn=True,
            enable_dueling_network=True,
            nb_steps_warmup=params['train_start'],
            gamma=params['discount'],
            target_model_update=1000,
            train_interval=1,
            delta_clip=1.,
            custom_model_objects={"NoisyNetDense": NoisyNetDense})

        self._dqn.compile(Adam(lr=params['lr']), metrics=['mae'])

        if params['load_file']:
            print("file loaded")
            self._dqn.load_weights(params['load_file'])

    def _load_img(self):
        self.img, self.zone = getimageandzone()

    def _get_reward(self):
        tab = [[20, 10, 5, 0, -5, -10, -20], [5, 20, 10, 0, -5, -10, -20],
               [-5, 5, 20, 0, -5, -10, -20], [-20, -10, 5, 20, 5, -10, -20],
               [-20, -10, -5, 0, 20, 5, -5], [-20, -10, -5, 0, 10, 20, 5],
               [-20, -10, -5, 0, 5, 10, 20]]

        if self._dqn_action is not None:
            self.reward = tab[self.zone][self._dqn_action]

    def _train(self):
        self.terminal = False
        for i in range(0, self.params['num_training']):
            # generer une image et la zone correspondante
            self._load_img()

            if self._dqn_action is not None:
                # go backward
                self._dqn.backward(self.reward, self.terminal)

            # go forward
            self._dqn_action = self._dqn.forward(self.img)
            self._get_reward()

            self._dqn.step += 1

            self.accum_reward = self.accum_reward + self.reward

            print("Setp : " + str(i) + " \treward : " + str(self.reward) +
                  " \taccumrwd : " + str(self.accum_reward) + " \tzone : " +
                  str(self.zone) + " \taction : " + str(self._dqn_action))
            log_file = open("log.txt", 'a')
            log_file.write("Setp : " + str(i) + " \treward : " +
                           str(self.reward) + " \taccumrwd : " +
                           str(self.accum_reward) + " \tzone : " +
                           str(self.zone) + " \taction : " +
                           str(self._dqn_action) + "\n")

            # Si avant dernier tour de boucle passer terminal a true
            if i == self.params['num_training'] - 2:
                self.terminal = True
                self._dqn.save_weights(params['save_file'])

            if i % 1000 == 0:
                self._dqn.save_weights(params['save_file'] + str(i), True)
                self.accum_reward = 0
                print("Model saved")