Exemple #1
0
class TrainedPlayer(AbstractPlayer):
    def __init__(self, env, number, file_name):
        super().__init__()
        self.env = env
        self.number = number
        self.action = 0
        self.nb_actions = spaces.Discrete(len(HAND)).n
        # build model.
        self.model = Sequential()
        self.model.add(
            Flatten(input_shape=(1, ) +
                    spaces.Box(low=0,
                               high=2,
                               shape=((PLAYER_NUM + 1) * 2, ROUND_NUM),
                               dtype='float32').shape))
        self.model.add(Dense(256))
        self.model.add(Activation('relu'))
        self.model.add(Dense(256))
        self.model.add(Activation('relu'))
        self.model.add(Dense(self.nb_actions))
        self.model.add(Activation('linear'))

        # configure agent.
        memory = SequentialMemory(limit=50000, window_length=1)
        policy = EpsGreedyQPolicy()
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=1000,
                            target_model_update=1e-2,
                            policy=policy)
        self.dqn.compile(Adam(learning_rate=1e-3), metrics=[])
        self.dqn.load_weights(file_name)
        print('モデル読み込み……完了')

    def play(self):
        self.action = self.dqn.forward(self.env.get_observation(self.number))
        # print("Trained AI Action:{}".format(
        #     self.dqn.compute_q_values([self.env.get_observation(self.number)])
        #     ))
        # 不可能なアクションを選択された場合
        if self.used[self.action] == 1:
            # 一番近くの高いものに選択しなおす
            for i in range(self.action, len(HAND)):
                if self.used[i] == 0.:
                    self.action = i
                    break
        # それでもダメな場合
        if self.used[self.action] == 1:
            # 一番近くの低いものに選択しなおす
            for i in range(self.action, -1, -1):
                if self.used[i] == 0.:
                    self.action = i
                    break
        self.used[self.action] = 1

        return self.action
agent.load_weights('{p}/dqn_{fn}_weights.h5f'.format(p=PATH, fn=ENV_NAME))

## Train or evaluate
if TRAIN:
    agent.training = True

observation = market.reset()

while True:
    try:
        # TODO add callbacks?

        ## Agent vybiraet dejstvie
        # (candles=9(mb=>(2,4)?), tickers=4, trades=2)
        # TODO actions for multy symbols market
        action = agent.forward(observation)

        ## Execute action
        observation, reward, done, info = market.step([action])

        ## Poluchaem otvet ot sredy
        agent.backward(reward, terminal=done)

        ## Esli dostigli konca
        if done:
            observation = market.reset()
            agent.reset_states()
            done = False
            log.info('Is terminal state. Reset..')
            log.info('='*40)
        
Exemple #3
0
    # Verify directories
    if not bootROM is None and not os.path.exists(bootROM):
        print ("Boot-ROM not found. Please copy the Boot-ROM to '%s'. Using replacement in the meanwhile..." % bootROM)
        bootROM = None

    try:
        filename = "../ROMs/Pokemon Red.gb"

        # Start PyBoy and run loop
        pyboy = PyBoy(Window(scale=scale), filename, bootROM)
        step = 0
        while not pyboy.tick():
            try:
                # ((160,144) * scale)-sized black/white array
                screen_array = pyboy.getScreenBuffer().getScreenBuffer()
                # print screen_array.shape
                observation = dqn.processor.process_observation(screen_array)
                action = dqn.forward(observation)
                pyboy.sendInput(actions[action])
            except Exception as e:
                print e
            pass
        pyboy.stop()

    except KeyboardInterrupt:
        print ("Interrupted by keyboard")
        pyboy.stop()
    except Exception as ex:
        traceback.print_exc()
Exemple #4
0
    dqn.compile(Adam(lr=0.001), metrics=['mae'])

    if args.train:
        # dqn.load_weights('dqn_traffic_weights.h5f')
        history = dqn.fit(env, nb_steps=100_000, visualize=False, verbose=2)

        env.close()
        dqn.save_weights('dqn_traffic_weights.h5f', overwrite=True)
        pandas.DataFrame(history.history['episode_reward']).plot(figsize=(16,
                                                                          5))
        plt.savefig('output.png')

    if args.test:
        dqn.load_weights(WEIGHTS_PATH)
        done = False
        step = 0
        obs = env.reset()
        while not done:
            action = dqn.forward(obs)
            obs, reward, done, _ = env.step(action)
            # print(env.get_trafficlight_phase('cp'))
            # env.get_road_info(road_id)
            print(f'Action: {action}')
            print(f'Observation: {obs}')
            print(f'Reward: {reward}')
            print(f'Done: {done}')
            print(f'Max Occupancy: {env.get_max_occupancy()}')
            step += 1

        env.close()
Exemple #5
0
class CenteringDqn:
    def __init__(self, **args):
        height, width = getsize()

        self.params = params
        self.params['width'] = width
        self.params['height'] = height
        self.params['num_training'] = args['numTraining']
        self.params['load_file'] = params['load_file']

        self._build_dqn_agent(self.params)

        if args['numTraining'] > 0:
            self._dqn.training = True
        else:
            self._dqn.training = False

        self.img = None
        self.zone = 0
        self.reward = 0
        self._dqn_action = None
        self.terminal = None
        self.accum_reward = 0

        self._train()

    def _build_dqn_agent(self, params):
        NB_ACTIONS = 7

        # ----------------------------------------------------------------------------------------------------------------
        inputShape = (params['width'], params['height'], 3)

        model = Sequential()
        model.add(
            Conv2D(16, (3, 3),
                   input_shape=inputShape,
                   padding='same',
                   activation='relu'))
        model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
        model.add(NoisyNetDense(16, activation='linear'))
        model.add(Flatten())
        model.add(NoisyNetDense(NB_ACTIONS, activation='linear'))

        model.summary()
        # ----------------------------------------------------------------------------------------------------------------

        # Memory replay
        if not params['prio_memory']:
            print("Using Sequential memory")
            memory = SequentialMemory(limit=params['mem_size'],
                                      window_length=1)
        else:
            print("Using Prioritized memory")
            params['lr'] = params['lr'] / 4
            memory = PrioritizedMemory(limit=params['mem_size'],
                                       alpha=0.6,
                                       start_beta=0.5,
                                       end_beta=1.0,
                                       steps_annealed=params['annealing'],
                                       window_length=1)

        # Epsilon Greedy policy, linearly decreasing
        if not params['noisy_layer']:
            print("Using Annealed Eps Greedy policy")
            self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                               attr='eps',
                                               value_max=params['eps'],
                                               value_min=params['eps_final'],
                                               value_test=0.0,
                                               nb_steps=params['annealing'])

        # Or Greedy policy in case of noisy layers
        else:
            print("Using Q Greedy policy (with noisy layer)")
            self.policy = GreedyQPolicy()

        # Keras DQN agent
        self._dqn = DQNAgent(
            model=model,
            nb_actions=NB_ACTIONS,
            policy=self.policy,
            memory=memory,
            batch_size=params['batch_size'],
            processor=WindowProcessor(),
            enable_double_dqn=True,
            enable_dueling_network=True,
            nb_steps_warmup=params['train_start'],
            gamma=params['discount'],
            target_model_update=1000,
            train_interval=1,
            delta_clip=1.,
            custom_model_objects={"NoisyNetDense": NoisyNetDense})

        self._dqn.compile(Adam(lr=params['lr']), metrics=['mae'])

        if params['load_file']:
            print("file loaded")
            self._dqn.load_weights(params['load_file'])

    def _load_img(self):
        self.img, self.zone = getimageandzone()

    def _get_reward(self):
        tab = [[20, 10, 5, 0, -5, -10, -20], [5, 20, 10, 0, -5, -10, -20],
               [-5, 5, 20, 0, -5, -10, -20], [-20, -10, 5, 20, 5, -10, -20],
               [-20, -10, -5, 0, 20, 5, -5], [-20, -10, -5, 0, 10, 20, 5],
               [-20, -10, -5, 0, 5, 10, 20]]

        if self._dqn_action is not None:
            self.reward = tab[self.zone][self._dqn_action]

    def _train(self):
        self.terminal = False
        for i in range(0, self.params['num_training']):
            # generer une image et la zone correspondante
            self._load_img()

            if self._dqn_action is not None:
                # go backward
                self._dqn.backward(self.reward, self.terminal)

            # go forward
            self._dqn_action = self._dqn.forward(self.img)
            self._get_reward()

            self._dqn.step += 1

            self.accum_reward = self.accum_reward + self.reward

            print("Setp : " + str(i) + " \treward : " + str(self.reward) +
                  " \taccumrwd : " + str(self.accum_reward) + " \tzone : " +
                  str(self.zone) + " \taction : " + str(self._dqn_action))
            log_file = open("log.txt", 'a')
            log_file.write("Setp : " + str(i) + " \treward : " +
                           str(self.reward) + " \taccumrwd : " +
                           str(self.accum_reward) + " \tzone : " +
                           str(self.zone) + " \taction : " +
                           str(self._dqn_action) + "\n")

            # Si avant dernier tour de boucle passer terminal a true
            if i == self.params['num_training'] - 2:
                self.terminal = True
                self._dqn.save_weights(params['save_file'])

            if i % 1000 == 0:
                self._dqn.save_weights(params['save_file'] + str(i), True)
                self.accum_reward = 0
                print("Model saved")
Exemple #6
0
def startDummy(env, Comm, tryHard=False):
    
    nb_actions = env.action_space.n


    layer0Size = 4096
    layer1Size = 4096
    layer2Size = 4096
    layer3Size = 0
    layer4Size = 0
    layer5Size = 1

    # Next, we build a very simple model. 
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(layer0Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(layer1Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(layer2Size))
    model.add(LeakyReLU(alpha=0.003))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    #A little diagnosis of the model summary
    print(model.summary())

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=800000, window_length=1)
    policy = GreedyQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, policy=policy, enable_dueling_network=True)
    dqn.compile(nadam(lr=0.001), metrics=['mae']) 

    #Load Previous training 
    previousfileLength = 0
    #Start traing
    # Ctrl + C.
    # We train and store 
    load_file_number = 39
    loadFile = "Larger_Memeory_BOARDSIZE_" + str(max_board_size) + "_DQN_LAYERS_" + str(layer0Size) + "_" + str(layer1Size) + "_" + str(layer2Size) + "_" + str(layer3Size) + "_" + str(layer4Size) + "_" + str(layer5Size) +  "_SAVENUMBER_" + str(load_file_number) + ".h5f"
    dqn.load_weights(loadFile)
        
    while(True):
        data = None
        while data == None:
            data = Comm.getNewData()
        observation, notUsed, currSafeMoves, headButtSafeMoves, noStuckMoves, foodMoves = env.findObservation(data=data)
        action = dqn.forward(observation)
        if action == 0:
            moveChosen = 'left' 
        if action == 1:
            moveChosen = 'right' 
        if action == 2:
            moveChosen = 'up' 
        if action == 3:
            moveChosen = 'down' 
        if moveChosen not in currSafeMoves and len(currSafeMoves) > 0:
            moveChosen = choice(currSafeMoves)
        if moveChosen not in noStuckMoves and len(noStuckMoves) > 0:
            moveChosen = choice(noStuckMoves)
        if moveChosen not in headButtSafeMoves and len(headButtSafeMoves) > 0:
            moveChosen = choice(headButtSafeMoves)
        
        if moveChosen not in foodMoves and len(foodMoves) > 0:
            moveChosen = choice(foodMoves)


        Comm.giveNewMove(moveChosen)
# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
env.is_train = True

dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME))
dqn.fit(env, nb_steps=100000, visualize=False, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

with open('dqn_action.json', 'w') as fw:
    observation = status.tolist()
    action = [
        float(actions[dqn.forward(np.array([obs]))]) for obs in observation
    ]
    json.dump({'observation': observation, 'action': action}, fw)

state_batch = status.reshape([-1, 1, 1])
q_val = pd.DataFrame(dqn.compute_batch_q_values(state_batch))
q_val.to_csv('dqn_qvalue.csv')

env.is_train = False
env.plot_row = 1
env.plot_col = 5
# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)

env.plt.ioff()
env.plt.show()