Beispiel #1
0
def make_dqn_rl_agent(processor: Processor_56x5,
                      nbr_layers=2,
                      enable_dueling_network: bool = False,
                      enable_double_dqn: bool = True):
    """
    
    :param processor: 
    :param nbr_layers: 
    :param enable_dueling_network:
    :param enable_double_dqn:
    :return: 
    """

    model = processor.create_model(nbr_layers=nbr_layers)
    test_policy = GreedyQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)

    dqn_agent = DQNAgent(model=model,
                         nb_actions=NBR_TICHU_ACTIONS,
                         memory=memory,
                         nb_steps_warmup=100,
                         target_model_update=1e-2,
                         test_policy=test_policy,
                         processor=processor,
                         enable_dueling_network=enable_dueling_network,
                         enable_double_dqn=enable_double_dqn)
    dqn_agent.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn_agent
    def agent(self):
        nb_actions = self.env.action_space.n
        model = self.build()
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=32,
                       enable_dueling_network=True,
                       target_model_update=1e-2,
                       policy=InformedBoltzmannGumbelQPolicy(self.env),
                       test_policy=InformedGreedyQPolicy(self.env),
                       batch_size=32,
                       train_interval=32)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if self.initial_weights_file is not None:
            try:
                dqn.load_weights(self.initial_weights_file)
            except:
                # just skip loading
                pass

        return dqn
    def __init__(self, shape, action_count: int):
        super().__init__()

        inp = Input(shape=shape)
        flat = Flatten()(inp)

        # Activation: relu, sigmoid, ...
        hidden1 = Dense(256, activation='relu')(flat)
        hidden2 = Dense(64, activation='relu')(hidden1)
        hidden3 = Dense(16, activation='relu')(hidden2)
        output = Dense(action_count, activation='softmax')(hidden3)

        self.model = Model(inputs=inp, outputs=output)
        print(self.model.summary())

        self.memory = SequentialMemory(limit=50000,
                                       window_length=WINDOW_LENGTH)
        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)
        self.callbacks = self.build_callbacks("msnake")
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=50,
                            target_model_update=1e-2,
                            policy=self.policy)

        Adam._name = "fix_bug"  # https://github.com/keras-rl/keras-rl/issues/345
        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])
    def play(self, nb_episodes=5, render=False):
        """Let the agent play"""
        memory = SequentialMemory(limit=memory_limit, window_length=window_length)
        policy = TrumpPolicy()

        class CustomProcessor(Processor):
            """The agent and the environment"""

            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = self.env.action_space.n

        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2, policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])  # pylint: disable=no-member

        self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)
Beispiel #5
0
def test_double_dqn():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1, )))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    memory = SequentialMemory(limit=1000, window_length=1)
    policy = EpsGreedyQPolicy(eps=.1)
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50,
                   target_model_update=1e-1,
                   policy=policy,
                   enable_double_dqn=True)
    dqn.compile(Adam(lr=1e-3))

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
    policy.eps = 0.
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)
Beispiel #6
0
def run():
    env = game_env.MeleeEnv()
    nb_actions = env.action_space.shape[0]
    actor = build_network(env, nb_actions)
    critic, action_input = build_critic(env, nb_actions)
    memory = SequentialMemory(limit=25000)
    #random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3,  size=nb_actions)
    agent = DQNAgent(
        batch_size=1000,
        nb_actions=nb_actions,
        model=
        actor,  #processor=Process(), #window_length=4,#critic_action_input=action_input,
        memory=memory,
        nb_steps_warmup=100
    )  # nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
    #random_process=random_process, gamma=.95, target_model_update=1e-1)#,
    ##delta_range=(-10., 10.))
    agent.compile(RMSprop(lr=.0005), metrics=['mae'])

    agent.fit(env,
              nb_steps=100000,
              visualize=True,
              verbose=1,
              nb_max_start_steps=100,
              start_step_policy=lambda x: np.random.randint(nb_actions))
    # After training is done, we save the final weights.
    agent.save_weights('ddpg_{}_weights.h5f'.format(
        str(random.randrange(0, 100000))),
                       overwrite=True)
def init_dqn(env, nb_actions):
    """ Initialize the DQN agent using the keras-rl package.

    :param env: the environment to be played, required to determine the input size
    :param nb_actions: number of actions
    :return: DQN Agent
    """
    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # compile agent
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.model_name = f"DQN"
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn
    def __init__(self, env: gym.Env, logger=Logger()):
        nb_actions = env.action_space.shape[0]

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))

        policy = BoltzmannQPolicy()
        memory = SequentialMemory(limit=100000, window_length=1)
        agent = DQNAgent(model=model,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         policy=policy)
        agent.compile(Adam(lr=1e-3), metrics=['mae'])
        self.agent = agent
        self.env = env
        super().__init__(env, logger)
Beispiel #9
0
    def _build_dqn(nb_actions, nb_states):
        # build network
        model = Sequential()
        model.add(Flatten(input_shape=(1, nb_states)))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions, activation='linear'))

        # build agent
        memory = SequentialMemory(limit=10240, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=10,
                       enable_dueling_network=True,
                       dueling_type='avg',
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(), metrics=['mae'])

        return dqn
def build_agent(model, actions):
    '''Build Agent'''
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=1.,
        value_min=.1,
        value_test=.2,
        nb_steps=10000
    )
    memory = SequentialMemory(
        limit=1000000,
        window_length=3
    )
    DQN_agent = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type='avg',
        nb_actions=actions,
        nb_steps_warmup=1000
    )
    DQN_agent.compile(optimizer=Adam(lr=0.00025), metrics=['mae', 'accuracy'])
    return DQN_agent
Beispiel #11
0
def build_model(env, num_actions):
    input = Input(shape=(1, env.observation_space.shape[0]))
    x = Flatten()(input)
    x = Dense(128, activation='relu')(x)  #128
    x = Dense(64, activation='relu')(x)  #64
    x = Dense(32, activation='relu')(x)  #32
    output = Dense(num_actions, activation='linear')(x)
    model = Model(inputs=input, outputs=output)
    print(model.summary())

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=10000)
    # policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=num_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    return dqn
    def agent(self):
        nb_actions = self.env.action_space.n
        obs_dim = self.env.observation_space.shape
        model = Sequential()
        model.add(Flatten(input_shape=(1, obs_dim)))
        model.add(Dense(nb_actions, activation='linear'))
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=256,
                       enable_dueling_network=True,
                       target_model_update=1e-2,
                       policy=InformedBoltzmannGumbelQPolicy(self.env),
                       test_policy=InformedGreedyQPolicy(self.env),
                       batch_size=128,
                       train_interval=128)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if self.initial_weights_file is not None:
            dqn.load_weights(self.initial_weights_file)
            self.train_episodes = 0

        return dqn
Beispiel #13
0
    def __init__(self, state_dim, action_space, epsilon, lr):
        self._model = self._get_model(state_dim, action_space)
        self.agent = DQNAgent(self._model,
                              policy=EpsGreedyQPolicy(epsilon),
                              test_policy=EpsGreedyQPolicy(eps=0.01))

        self.agent.compile(Adam(lr))
Beispiel #14
0
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.05,
                                  value_test=.05,
                                  nb_steps=150_000)
    #policy = EpsGreedyQPolicy(eps=.1)
    #policy = GreedyQPolicy()

    #policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=30000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   processor=processor,
                   nb_actions=actions,
                   nb_steps_warmup=100,
                   target_model_update=1e-3,
                   enable_double_dqn=True,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   batch_size=8,
                   gamma=.95)
    return dqn
def build_agent(model, nb_actions):
    """
    build an agent
    """
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=MAX_EPSILON,
        value_min=MIN_EPSILON,
        value_test=TEST_EPSILON,
        nb_steps=MAX_STEPS
    )
    memory = SequentialMemory(
        limit=MAX_STEPS,
        window_length=WINDOW_WIDTH
    )
    dqn = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type='avg',
        nb_actions=nb_actions,
        nb_steps_warmup=WARMUP_STEPS
    )
    dqn.compile(Adam(learning_rate=LEARNING_RATE), metrics=['mae'])
    return dqn
    def initiate_agent(self, env):
        """initiate a deep Q agent"""
        tf.compat.v1.disable_eager_execution()
        self.env = env

        nb_actions = self.env.action_space.n

        self.model = Sequential()
        self.model.add(Dense(512, activation='relu', input_shape=env.observation_space))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(nb_actions, activation='linear'))

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=memory_limit, window_length=window_length)
        policy = TrumpPolicy()

        nb_actions = env.action_space.n

        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2, policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., 
                                  value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                   enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000)
    return dqn
Beispiel #18
0
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory, 
                   policy=policy, 
                   nb_actions=actions, 
                   nb_steps_warmup=10, 
                   target_model_update=1e-2)
    return dqn
Beispiel #19
0
    def get_agent(self):
        agent = DQNAgent(model=self.model,
                         policy=self.policy,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         nb_actions=self.action,
                         memory=self.memory,
                         enable_double_dnq=False)

        return agent
def build_agent(model, actions):
    """
    Builds an Epsilon Greedy Deep Q Learning Agent
    """
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=0.1, value_test=0.2,
                                  nb_steps=10000)
    memory = SequentialMemory(limit=2000, window_length=3)

    dqn = DQNAgent(model, policy, memory=memory, enable_dueling_network=True, dueling_type='avg', nb_actions=actions,
                   nb_steps_warmup=1000)
    return dqn
Beispiel #21
0
    def set_num_states(self, state_dimension: int, num_actions: int) -> None:
        model = self._build_model(state_dimension, num_actions)
        memory = SequentialMemory(limit=10000, window_length=1)
        self._internal_agent = DQNAgent(model=model,
                                        nb_actions=num_actions,
                                        memory=memory,
                                        nb_steps_warmup=1000,
                                        target_model_update=1000,
                                        gamma=0.99,
                                        delta_clip=1)

        self._internal_agent.compile(Adam(lr=0.0001), metrics=['mae'])
Beispiel #22
0
def train(learn_rate, model_update_interval, steps):
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50000,
                   target_model_update=model_update_interval,
                   policy=policy,
                   gamma=.99,
                   train_interval=4)
    dqn.compile(Adam(lr=learn_rate), metrics=['mae'])
    dqn.fit(env, nb_steps=steps, verbose=2, visualize=VISUALIZE)
    dqn.save_weights(SAVEFILE_FOLDER + "/dqn_pong_params.h5f", overwrite=True)
Beispiel #23
0
def setupDQN(cfg, nb_actions, processor):
    image_in = Input(shape=cfg.input_shape, name='main_input')
    input_perm = Permute((2, 3, 1), input_shape=cfg.input_shape)(image_in)
    conv1 = Conv2D(32, (8, 8), activation="relu", strides=(4, 4),
                   name='conv1')(input_perm)
    conv2 = Conv2D(64, (4, 4), activation="relu", strides=(2, 2),
                   name='conv2')(conv1)
    conv3 = Conv2D(64, (3, 3), activation="relu", strides=(1, 1),
                   name='conv3')(conv2)
    conv_out = Flatten(name='flat_feat')(conv3)
    dense_out = Dense(512, activation='relu')(conv_out)
    q_out = Dense(nb_actions, activation='linear')(dense_out)
    model = Model(inputs=[image_in], outputs=[q_out])
    print(model.summary())
    # hstate_size = int(np.prod(conv3.shape[1:]))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=cfg.memory_limit,
                              window_length=cfg.WINDOW_LENGTH)

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=cfg.nb_steps_annealed_policy)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    # policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=cfg.nb_steps_warmup_dqn_agent,
                   gamma=.99,
                   target_model_update=cfg.target_model_update_dqn_agent,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])

    return dqn
Beispiel #24
0
    def initiate_agent(self, env):
        """initiate a deep Q agent"""
        tf.compat.v1.disable_eager_execution()
        self.env = env

        nb_actions = self.env.action_space.n

        self.model = Sequential()
        self.model.add(
            Dense(512, activation='relu', input_shape=env.observation_space))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(nb_actions, activation='linear'))

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=memory_limit,
                                  window_length=window_length)
        policy = TrumpPolicy()

        class CustomProcessor(Processor):
            """The agent and the environment"""
            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = env.action_space.n

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2,
                            policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size,
                            train_interval=train_interval,
                            enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae'])
Beispiel #25
0
    def build_agent(self, mem_file=None, w_file=None):
        #Create a dummy env to get size of input/output.
        #Makes it simpler if we ever choose to update env shapes.
        env = TradingEnv([], "", [])
        np.random.seed(314)
        env.seed(314)

        nb_actions = env.action_space.n
        obs_dim = env.observation_space.shape[0]
        model = Sequential()
        model.add(
            LSTM(5, input_shape=(7, 4),
                 return_sequences=True))  # 4 features + 1 bias term. 5 neurons
        model.add(Activation('tanh'))
        model.add(LSTM(4))
        model.add(Activation('tanh'))
        model.add(Dropout(0.2))
        model.add(Dense(4))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))  #Best activation for BoltzmanPolicy

        #policy = EpsGreedyQPolicy(eps=EPS_VAL) #Off policy
        policy = BoltzmannQPolicy()  #Off-policy
        test_policy = MaxBoltzmannQPolicy()  #On-policy
        memory = None
        if mem_file is None:
            memory = SequentialMemory(
                limit=50000,
                window_length=7)  ## returns observations of len (7,)
        else:
            (memory, memory.actions, memory.rewards, memory.terminals,
             memory.observations) = pickle.load(open(mem_file, "rb"))

        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       gamma=GAMMA_VAL,
                       nb_steps_warmup=100,
                       policy=policy,
                       test_policy=test_policy)
        dqn.compile("adam", metrics=['mse'])

        if w_file is not None:
            model.load_weights(w_file)

        return dqn, env, memory
    def __init__(self, shape, initial_randomness: float, action_count: int):
        super().__init__()

        model = Sequential()
        model.add(Input(shape=shape))
        model.add(Conv2D(8, (3, 3), activation='relu', input_shape=shape))
        model.add(Conv2D(16, (3, 3), activation='relu', input_shape=shape))
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=shape))
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dense(512, activation='relu'))
        model.add(Dense(action_count, activation='softmax'))

        print(model.summary())

        self.model = model

        self.callbacks = self.build_callbacks("msnake")

        self.processor = RemoveDimensionProcessor()

        self.memory = SequentialMemory(limit=50000, window_length=1)

        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=10,
                            target_model_update=1e-2,
                            policy=self.policy,
                            batch_size=1,
                            processor=self.processor)

        # https://github.com/keras-rl/keras-rl/issues/345
        Adam._name = "fix_bug"

        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-3), metrics=['mse'])

        self.initial_randomness = initial_randomness
Beispiel #27
0
    def getAgent(self):
        agent = DQNAgent(
            model = self.model, 
            policy = self.policy,
            nb_steps_warmup = 10,
<<<<<<< HEAD
            target_model_update = 1e-2,
            nb_actions = self.action,
            memory = self.memory,
=======
            nb_actions = self.action,
            memory = self.memory,
            target_model_update = 1e-2,
>>>>>>> 29bdbdfe2117d45f7316cda3de21e1dfaf76fc66
            enable_double_dqn=False
        )
        return agent
Beispiel #28
0
    def __init__(self,
                 env: gym.Env,
                 memory=SequentialMemory(limit=50000, window_length=1),
                 logger=Logger(),
                 boxes_resolution=10,
                 nb_steps_warmup=20,
                 hidden_layers=[16, 16, 16],
                 policy=BoltzmannQPolicy(),
                 target_model_update=1e-2,
                 optimizer=Adam(lr=1e-3)):

        self.env = env

        if isinstance(boxes_resolution, int):
            boxes_resolution = (boxes_resolution, ) * len(
                env.action_space.shape)

        self.boxes_resolution = boxes_resolution
        self.nb_actions = np.zeros(boxes_resolution).size

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) +
                          env.observation_space.shape))  # TODO check this
        for l in hidden_layers:
            model.add(Dense(l, activation='relu'))
        model.add(Dense(self.nb_actions,
                        activation='linear'))  # TODO move this to util file?

        self.model = model
        print("dqn model summary :{0}".format(model.summary()))

        self.dqn = DQNAgent(model=model,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=target_model_update,
                            policy=policy,
                            processor=DqnProcessor(self.boxes_resolution,
                                                   env.action_space.low,
                                                   env.action_space.high))
        self.dqn.compile(optimizer=optimizer, metrics=['mae'])
        super().__init__(env, logger)
    def build_agent(self, model, actions, nb_steps):
        """
        building the deep q agent

        GAMMA:
        REWARD = r1 + gamma*r2 + gamma^2*r3 + gamma^3*r4 ...
        -> gamma defines penalty for future reward
        In general, most algorithms learn faster when they don't have to look too far into the future.
        So, it sometimes helps the performance to set gamma relatively low.
        for many problems a gamma of 0.9 or 0.95 is fine

        LAMBDA:
        The lambda parameter determines how much you bootstrap on earlier learned value versus using
        the current Monte Carlo roll-out. This implies a trade-off between more bias (low lambda)
        and more variance (high lambda).
        A general rule of thumb is to use a lambda equal to 0.9.
        However, it might be good just to try a few settings (e.g., 0, 0.5, 0.8, 0.9, 0.95 and 1.0)
        """
        policy = LinearAnnealedPolicy(
            EpsGreedyQPolicy(
            ),  # takes current best action with prob (1 - epsilon)
            attr='eps',  # decay epsilon (=exploration) per agent step
            value_max=self.
            EPSILON_START,  # start value of epsilon (default =1)
            value_min=self.EPSILON_END,  # last value of epsilon (default =0
            value_test=self.EPSILON_TEST,
            nb_steps=self.EPSILON_DECAY * nb_steps)
        memory = SequentialMemory(limit=self.SEQUENTIAL_MEMORY_LIMIT,
                                  window_length=1)
        build_agent = DQNAgent(model=model,
                               memory=memory,
                               policy=policy,
                               gamma=self.GAMMA,
                               batch_size=self.BATCH_SIZE,
                               nb_actions=actions,
                               nb_steps_warmup=1000,
                               target_model_update=self.TARGET_MODEL_UPDATE,
                               enable_double_dqn=False,
                               train_interval=4)
        return build_agent
Beispiel #30
0
def get_agent(agent_type, model_type, lr):
    if agent_type == "sarsa":
        policy = BoltzmannQPolicy()
        model = get_model(model_type)
        agent = SARSAAgent(model=model,
                           policy=policy,
                           nb_actions=nb_actions,
                           nb_steps_warmup=10,
                           gamma=0.99)
        agent.compile(Adam(lr), metrics=['mae'])
        return agent
    elif agent_type == "dqn":
        policy = BoltzmannQPolicy()
        model = get_model(model_type)
        memory = SequentialMemory(limit=50000, window_length=1)
        agent = DQNAgent(model=model,
                         policy=policy,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         enable_double_dqn=True)
        agent.compile(Adam(lr), metrics=['mae'])
        return agent
    elif agent_type == "a2c":
        agent = A2CAgent(nb_actions,
                         len(env.observation_space.high),
                         nb_steps_warmup=10,
                         actor_lr=0.001,
                         critic_lr=0.005)
        agent.compile(Adam(lr))
        return agent
    elif agent_type == "ppo":
        pass
    else:
        print("Unsupported model")
        exit(1)