Python DQNAgent.DQNAgent Beispiele, rl.agents.DQNAgent.DQNAgent Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: keras_rl_utils.py Projekt: kalitsos/Tichu-1

def make_dqn_rl_agent(processor: Processor_56x5,
                      nbr_layers=2,
                      enable_dueling_network: bool = False,
                      enable_double_dqn: bool = True):
    """
    
    :param processor: 
    :param nbr_layers: 
    :param enable_dueling_network:
    :param enable_double_dqn:
    :return: 
    """

    model = processor.create_model(nbr_layers=nbr_layers)
    test_policy = GreedyQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)

    dqn_agent = DQNAgent(model=model,
                         nb_actions=NBR_TICHU_ACTIONS,
                         memory=memory,
                         nb_steps_warmup=100,
                         target_model_update=1e-2,
                         test_policy=test_policy,
                         processor=processor,
                         enable_dueling_network=enable_dueling_network,
                         enable_double_dqn=enable_double_dqn)
    dqn_agent.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn_agent

Beispiel #2

0

Datei anzeigen

Datei: kerasrl.py Projekt: chairbender/fantasy-football-auction-ai

    def agent(self):
        nb_actions = self.env.action_space.n
        model = self.build()
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=32,
                       enable_dueling_network=True,
                       target_model_update=1e-2,
                       policy=InformedBoltzmannGumbelQPolicy(self.env),
                       test_policy=InformedGreedyQPolicy(self.env),
                       batch_size=32,
                       train_interval=32)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if self.initial_weights_file is not None:
            try:
                dqn.load_weights(self.initial_weights_file)
            except:
                # just skip loading
                pass

        return dqn

Beispiel #3

0

Datei anzeigen

Datei: DeepNetworkAgent.py Projekt: BogyMitutoyoCTL/AI-Preparation

    def __init__(self, shape, action_count: int):
        super().__init__()

        inp = Input(shape=shape)
        flat = Flatten()(inp)

        # Activation: relu, sigmoid, ...
        hidden1 = Dense(256, activation='relu')(flat)
        hidden2 = Dense(64, activation='relu')(hidden1)
        hidden3 = Dense(16, activation='relu')(hidden2)
        output = Dense(action_count, activation='softmax')(hidden3)

        self.model = Model(inputs=inp, outputs=output)
        print(self.model.summary())

        self.memory = SequentialMemory(limit=50000,
                                       window_length=WINDOW_LENGTH)
        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)
        self.callbacks = self.build_callbacks("msnake")
        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=50,
                            target_model_update=1e-2,
                            policy=self.policy)

        Adam._name = "fix_bug"  # https://github.com/keras-rl/keras-rl/issues/345
        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])

Beispiel #4

0

Datei anzeigen

Datei: agent_keras_rl_dqn.py Projekt: MrStonkus/neuron_poker

    def play(self, nb_episodes=5, render=False):
        """Let the agent play"""
        memory = SequentialMemory(limit=memory_limit, window_length=window_length)
        policy = TrumpPolicy()

        class CustomProcessor(Processor):
            """The agent and the environment"""

            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = self.env.action_space.n

        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2, policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])  # pylint: disable=no-member

        self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)

Beispiel #5

0

Datei anzeigen

def test_double_dqn():
    env = TwoRoundDeterministicRewardEnv()
    np.random.seed(123)
    env.seed(123)
    random.seed(123)
    nb_actions = env.action_space.n

    # Next, we build a very simple model.
    model = Sequential()
    model.add(Dense(16, input_shape=(1, )))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))

    memory = SequentialMemory(limit=1000, window_length=1)
    policy = EpsGreedyQPolicy(eps=.1)
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50,
                   target_model_update=1e-1,
                   policy=policy,
                   enable_double_dqn=True)
    dqn.compile(Adam(lr=1e-3))

    dqn.fit(env, nb_steps=2000, visualize=False, verbose=0)
    policy.eps = 0.
    h = dqn.test(env, nb_episodes=20, visualize=False)
    assert_allclose(np.mean(h.history['episode_reward']), 3.)

Beispiel #6

0

Datei anzeigen

Datei: p3.py Projekt: Sciguymjm/athena

def run():
    env = game_env.MeleeEnv()
    nb_actions = env.action_space.shape[0]
    actor = build_network(env, nb_actions)
    critic, action_input = build_critic(env, nb_actions)
    memory = SequentialMemory(limit=25000)
    #random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3,  size=nb_actions)
    agent = DQNAgent(
        batch_size=1000,
        nb_actions=nb_actions,
        model=
        actor,  #processor=Process(), #window_length=4,#critic_action_input=action_input,
        memory=memory,
        nb_steps_warmup=100
    )  # nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
    #random_process=random_process, gamma=.95, target_model_update=1e-1)#,
    ##delta_range=(-10., 10.))
    agent.compile(RMSprop(lr=.0005), metrics=['mae'])

    agent.fit(env,
              nb_steps=100000,
              visualize=True,
              verbose=1,
              nb_max_start_steps=100,
              start_step_policy=lambda x: np.random.randint(nb_actions))
    # After training is done, we save the final weights.
    agent.save_weights('ddpg_{}_weights.h5f'.format(
        str(random.randrange(0, 100000))),
                       overwrite=True)

Beispiel #7

0

Datei anzeigen

Datei: deep_learning_models.py Projekt: ADockhorn/Forward-Model-Learning-for-Motion-Control-Tasks

def init_dqn(env, nb_actions):
    """ Initialize the DQN agent using the keras-rl package.

    :param env: the environment to be played, required to determine the input size
    :param nb_actions: number of actions
    :return: DQN Agent
    """
    # Next, we build a very simple model.
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('linear'))
    print(model.summary())

    # compile agent
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=10,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.model_name = f"DQN"
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    return dqn

Beispiel #8

0

Datei anzeigen

Datei: deep_learners.py Projekt: Thomas0Gilles/LocomotionRL

    def __init__(self, env: gym.Env, logger=Logger()):
        nb_actions = env.action_space.shape[0]

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))

        policy = BoltzmannQPolicy()
        memory = SequentialMemory(limit=100000, window_length=1)
        agent = DQNAgent(model=model,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         policy=policy)
        agent.compile(Adam(lr=1e-3), metrics=['mae'])
        self.agent = agent
        self.env = env
        super().__init__(env, logger)

Beispiel #9

0

Datei anzeigen

    def _build_dqn(nb_actions, nb_states):
        # build network
        model = Sequential()
        model.add(Flatten(input_shape=(1, nb_states)))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions, activation='linear'))

        # build agent
        memory = SequentialMemory(limit=10240, window_length=1)
        policy = BoltzmannQPolicy()
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=10,
                       enable_dueling_network=True,
                       dueling_type='avg',
                       target_model_update=1e-2,
                       policy=policy)
        dqn.compile(Adam(), metrics=['mae'])

        return dqn

Beispiel #10

0

Datei anzeigen

Datei: train.py Projekt: YosriGFX/holbertonschool-machine_learning

def build_agent(model, actions):
    '''Build Agent'''
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=1.,
        value_min=.1,
        value_test=.2,
        nb_steps=10000
    )
    memory = SequentialMemory(
        limit=1000000,
        window_length=3
    )
    DQN_agent = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type='avg',
        nb_actions=actions,
        nb_steps_warmup=1000
    )
    DQN_agent.compile(optimizer=Adam(lr=0.00025), metrics=['mae', 'accuracy'])
    return DQN_agent

Beispiel #11

0

Datei anzeigen

Datei: DQNTest.py Projekt: raphacosta27/GeoFriend2

def build_model(env, num_actions):
    input = Input(shape=(1, env.observation_space.shape[0]))
    x = Flatten()(input)
    x = Dense(128, activation='relu')(x)  #128
    x = Dense(64, activation='relu')(x)  #64
    x = Dense(32, activation='relu')(x)  #32
    output = Dense(num_actions, activation='linear')(x)
    model = Model(inputs=input, outputs=output)
    print(model.summary())

    memory = SequentialMemory(limit=50000, window_length=1)
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=10000)
    # policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model,
                   nb_actions=num_actions,
                   memory=memory,
                   nb_steps_warmup=100,
                   target_model_update=1e-2,
                   policy=policy)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    return dqn

Beispiel #12

0

Datei anzeigen

Datei: kerasrl.py Projekt: chairbender/fantasy-football-auction-ai

    def agent(self):
        nb_actions = self.env.action_space.n
        obs_dim = self.env.observation_space.shape
        model = Sequential()
        model.add(Flatten(input_shape=(1, obs_dim)))
        model.add(Dense(nb_actions, activation='linear'))
        print(model.summary())

        memory = SequentialMemory(limit=50000, window_length=1)
        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       nb_steps_warmup=256,
                       enable_dueling_network=True,
                       target_model_update=1e-2,
                       policy=InformedBoltzmannGumbelQPolicy(self.env),
                       test_policy=InformedGreedyQPolicy(self.env),
                       batch_size=128,
                       train_interval=128)
        dqn.compile(Adam(lr=1e-3), metrics=['mae'])

        if self.initial_weights_file is not None:
            dqn.load_weights(self.initial_weights_file)
            self.train_episodes = 0

        return dqn

Beispiel #13

0

Datei anzeigen

    def __init__(self, state_dim, action_space, epsilon, lr):
        self._model = self._get_model(state_dim, action_space)
        self.agent = DQNAgent(self._model,
                              policy=EpsGreedyQPolicy(epsilon),
                              test_policy=EpsGreedyQPolicy(eps=0.01))

        self.agent.compile(Adam(lr))

Beispiel #14

0

Datei anzeigen

def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.05,
                                  value_test=.05,
                                  nb_steps=150_000)
    #policy = EpsGreedyQPolicy(eps=.1)
    #policy = GreedyQPolicy()

    #policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=30000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory,
                   policy=policy,
                   processor=processor,
                   nb_actions=actions,
                   nb_steps_warmup=100,
                   target_model_update=1e-3,
                   enable_double_dqn=True,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   batch_size=8,
                   gamma=.95)
    return dqn

Beispiel #15

0

Datei anzeigen

Datei: train.py Projekt: AhmedOmi/holbertonschool-machine_learning

def build_agent(model, nb_actions):
    """
    build an agent
    """
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr='eps',
        value_max=MAX_EPSILON,
        value_min=MIN_EPSILON,
        value_test=TEST_EPSILON,
        nb_steps=MAX_STEPS
    )
    memory = SequentialMemory(
        limit=MAX_STEPS,
        window_length=WINDOW_WIDTH
    )
    dqn = DQNAgent(
        model=model,
        memory=memory,
        policy=policy,
        enable_dueling_network=True,
        dueling_type='avg',
        nb_actions=nb_actions,
        nb_steps_warmup=WARMUP_STEPS
    )
    dqn.compile(Adam(learning_rate=LEARNING_RATE), metrics=['mae'])
    return dqn

Beispiel #16

0

Datei anzeigen

Datei: agent_keras_rl_dqn.py Projekt: MrStonkus/neuron_poker

    def initiate_agent(self, env):
        """initiate a deep Q agent"""
        tf.compat.v1.disable_eager_execution()
        self.env = env

        nb_actions = self.env.action_space.n

        self.model = Sequential()
        self.model.add(Dense(512, activation='relu', input_shape=env.observation_space))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(nb_actions, activation='linear'))

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=memory_limit, window_length=window_length)
        policy = TrumpPolicy()

        nb_actions = env.action_space.n

        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2, policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])

Beispiel #17

0

Datei anzeigen

Datei: dz_19(pro).py Projekt: vladkudiurov89/Neural_Network_Projects

def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., 
                                  value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                   enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000)
    return dqn

Beispiel #18

0

Datei anzeigen

Datei: cartpole.py Projekt: aarav18/cartpole-rl

def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model,
                   memory=memory, 
                   policy=policy, 
                   nb_actions=actions, 
                   nb_steps_warmup=10, 
                   target_model_update=1e-2)
    return dqn

Beispiel #19

0

Datei anzeigen

    def get_agent(self):
        agent = DQNAgent(model=self.model,
                         policy=self.policy,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         nb_actions=self.action,
                         memory=self.memory,
                         enable_double_dnq=False)

        return agent

Beispiel #20

0

Datei anzeigen

Datei: space_invaders_dqn.py Projekt: LaggyHammer/reinforcement-learning-projects

def build_agent(model, actions):
    """
    Builds an Epsilon Greedy Deep Q Learning Agent
    """
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=0.1, value_test=0.2,
                                  nb_steps=10000)
    memory = SequentialMemory(limit=2000, window_length=3)

    dqn = DQNAgent(model, policy, memory=memory, enable_dueling_network=True, dueling_type='avg', nb_actions=actions,
                   nb_steps_warmup=1000)
    return dqn

Beispiel #21

0

Datei anzeigen

    def set_num_states(self, state_dimension: int, num_actions: int) -> None:
        model = self._build_model(state_dimension, num_actions)
        memory = SequentialMemory(limit=10000, window_length=1)
        self._internal_agent = DQNAgent(model=model,
                                        nb_actions=num_actions,
                                        memory=memory,
                                        nb_steps_warmup=1000,
                                        target_model_update=1000,
                                        gamma=0.99,
                                        delta_clip=1)

        self._internal_agent.compile(Adam(lr=0.0001), metrics=['mae'])

Beispiel #22

0

Datei anzeigen

Datei: train.py Projekt: GerPhoenix/pong

def train(learn_rate, model_update_interval, steps):
    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   nb_steps_warmup=50000,
                   target_model_update=model_update_interval,
                   policy=policy,
                   gamma=.99,
                   train_interval=4)
    dqn.compile(Adam(lr=learn_rate), metrics=['mae'])
    dqn.fit(env, nb_steps=steps, verbose=2, visualize=VISUALIZE)
    dqn.save_weights(SAVEFILE_FOLDER + "/dqn_pong_params.h5f", overwrite=True)

Beispiel #23

0

Datei anzeigen

def setupDQN(cfg, nb_actions, processor):
    image_in = Input(shape=cfg.input_shape, name='main_input')
    input_perm = Permute((2, 3, 1), input_shape=cfg.input_shape)(image_in)
    conv1 = Conv2D(32, (8, 8), activation="relu", strides=(4, 4),
                   name='conv1')(input_perm)
    conv2 = Conv2D(64, (4, 4), activation="relu", strides=(2, 2),
                   name='conv2')(conv1)
    conv3 = Conv2D(64, (3, 3), activation="relu", strides=(1, 1),
                   name='conv3')(conv2)
    conv_out = Flatten(name='flat_feat')(conv3)
    dense_out = Dense(512, activation='relu')(conv_out)
    q_out = Dense(nb_actions, activation='linear')(dense_out)
    model = Model(inputs=[image_in], outputs=[q_out])
    print(model.summary())
    # hstate_size = int(np.prod(conv3.shape[1:]))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=cfg.memory_limit,
                              window_length=cfg.WINDOW_LENGTH)

    # Select a policy. We use eps-greedy action selection, which means that a random action is selected
    # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
    # the agent initially explores the environment (high eps) and then gradually sticks to what it knows
    # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
    # so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=cfg.nb_steps_annealed_policy)

    # The trade-off between exploration and exploitation is difficult and an on-going research topic.
    # If you want, you can experiment with the parameters or use a different policy. Another popular one
    # is Boltzmann-style exploration:
    # policy = BoltzmannQPolicy(tau=1.)
    # Feel free to give it a try!

    dqn = DQNAgent(model=model,
                   nb_actions=nb_actions,
                   policy=policy,
                   memory=memory,
                   processor=processor,
                   nb_steps_warmup=cfg.nb_steps_warmup_dqn_agent,
                   gamma=.99,
                   target_model_update=cfg.target_model_update_dqn_agent,
                   train_interval=4,
                   delta_clip=1.)
    dqn.compile(Adam(lr=.00025), metrics=['mae'])

    return dqn

Beispiel #24

0

Datei anzeigen

    def initiate_agent(self, env):
        """initiate a deep Q agent"""
        tf.compat.v1.disable_eager_execution()
        self.env = env

        nb_actions = self.env.action_space.n

        self.model = Sequential()
        self.model.add(
            Dense(512, activation='relu', input_shape=env.observation_space))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(512, activation='relu'))
        self.model.add(Dropout(0.2))
        self.model.add(Dense(nb_actions, activation='linear'))

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        memory = SequentialMemory(limit=memory_limit,
                                  window_length=window_length)
        policy = TrumpPolicy()

        class CustomProcessor(Processor):
            """The agent and the environment"""
            def process_state_batch(self, batch):
                """
                Given a state batch, I want to remove the second dimension, because it's
                useless and prevents me from feeding the tensor into my CNN
                """
                return np.squeeze(batch, axis=1)

            def process_info(self, info):
                processed_info = info['player_data']
                if 'stack' in processed_info:
                    processed_info = {'x': 1}
                return processed_info

        nb_actions = env.action_space.n

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=1e-2,
                            policy=policy,
                            processor=CustomProcessor(),
                            batch_size=batch_size,
                            train_interval=train_interval,
                            enable_double_dqn=enable_double_dqn)
        self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae'])

Beispiel #25

0

Datei anzeigen

    def build_agent(self, mem_file=None, w_file=None):
        #Create a dummy env to get size of input/output.
        #Makes it simpler if we ever choose to update env shapes.
        env = TradingEnv([], "", [])
        np.random.seed(314)
        env.seed(314)

        nb_actions = env.action_space.n
        obs_dim = env.observation_space.shape[0]
        model = Sequential()
        model.add(
            LSTM(5, input_shape=(7, 4),
                 return_sequences=True))  # 4 features + 1 bias term. 5 neurons
        model.add(Activation('tanh'))
        model.add(LSTM(4))
        model.add(Activation('tanh'))
        model.add(Dropout(0.2))
        model.add(Dense(4))
        model.add(Activation('relu'))
        model.add(Dense(nb_actions))
        model.add(Activation('linear'))  #Best activation for BoltzmanPolicy

        #policy = EpsGreedyQPolicy(eps=EPS_VAL) #Off policy
        policy = BoltzmannQPolicy()  #Off-policy
        test_policy = MaxBoltzmannQPolicy()  #On-policy
        memory = None
        if mem_file is None:
            memory = SequentialMemory(
                limit=50000,
                window_length=7)  ## returns observations of len (7,)
        else:
            (memory, memory.actions, memory.rewards, memory.terminals,
             memory.observations) = pickle.load(open(mem_file, "rb"))

        dqn = DQNAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       gamma=GAMMA_VAL,
                       nb_steps_warmup=100,
                       policy=policy,
                       test_policy=test_policy)
        dqn.compile("adam", metrics=['mse'])

        if w_file is not None:
            model.load_weights(w_file)

        return dqn, env, memory

Beispiel #26

0

Datei anzeigen

Datei: ConvolutionalDeepNetworkAgentBenedikt.py Projekt: BogyMitutoyoCTL/AI-Preparation

    def __init__(self, shape, initial_randomness: float, action_count: int):
        super().__init__()

        model = Sequential()
        model.add(Input(shape=shape))
        model.add(Conv2D(8, (3, 3), activation='relu', input_shape=shape))
        model.add(Conv2D(16, (3, 3), activation='relu', input_shape=shape))
        model.add(Conv2D(32, (3, 3), activation='relu', input_shape=shape))
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dense(512, activation='relu'))
        model.add(Dense(action_count, activation='softmax'))

        print(model.summary())

        self.model = model

        self.callbacks = self.build_callbacks("msnake")

        self.processor = RemoveDimensionProcessor()

        self.memory = SequentialMemory(limit=50000, window_length=1)

        self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(),
                                           attr='eps',
                                           value_max=1.,
                                           value_min=.1,
                                           value_test=.05,
                                           nb_steps=1000)

        self.dqn = DQNAgent(model=self.model,
                            nb_actions=action_count,
                            memory=self.memory,
                            nb_steps_warmup=10,
                            target_model_update=1e-2,
                            policy=self.policy,
                            batch_size=1,
                            processor=self.processor)

        # https://github.com/keras-rl/keras-rl/issues/345
        Adam._name = "fix_bug"

        # Metrics: mae, mse, accuracy
        # LR: learning rate
        self.dqn.compile(Adam(lr=1e-3), metrics=['mse'])

        self.initial_randomness = initial_randomness

Beispiel #27

0

Datei anzeigen

    def getAgent(self):
        agent = DQNAgent(
            model = self.model, 
            policy = self.policy,
            nb_steps_warmup = 10,
<<<<<<< HEAD
            target_model_update = 1e-2,
            nb_actions = self.action,
            memory = self.memory,
=======
            nb_actions = self.action,
            memory = self.memory,
            target_model_update = 1e-2,
>>>>>>> 29bdbdfe2117d45f7316cda3de21e1dfaf76fc66
            enable_double_dqn=False
        )
        return agent

Beispiel #28

0

Datei anzeigen

Datei: agent.py Projekt: amaurySabran/TrackMaster

    def __init__(self,
                 env: gym.Env,
                 memory=SequentialMemory(limit=50000, window_length=1),
                 logger=Logger(),
                 boxes_resolution=10,
                 nb_steps_warmup=20,
                 hidden_layers=[16, 16, 16],
                 policy=BoltzmannQPolicy(),
                 target_model_update=1e-2,
                 optimizer=Adam(lr=1e-3)):

        self.env = env

        if isinstance(boxes_resolution, int):
            boxes_resolution = (boxes_resolution, ) * len(
                env.action_space.shape)

        self.boxes_resolution = boxes_resolution
        self.nb_actions = np.zeros(boxes_resolution).size

        model = Sequential()
        model.add(Flatten(input_shape=(1, ) +
                          env.observation_space.shape))  # TODO check this
        for l in hidden_layers:
            model.add(Dense(l, activation='relu'))
        model.add(Dense(self.nb_actions,
                        activation='linear'))  # TODO move this to util file?

        self.model = model
        print("dqn model summary :{0}".format(model.summary()))

        self.dqn = DQNAgent(model=model,
                            nb_actions=self.nb_actions,
                            memory=memory,
                            nb_steps_warmup=nb_steps_warmup,
                            target_model_update=target_model_update,
                            policy=policy,
                            processor=DqnProcessor(self.boxes_resolution,
                                                   env.action_space.low,
                                                   env.action_space.high))
        self.dqn.compile(optimizer=optimizer, metrics=['mae'])
        super().__init__(env, logger)

Beispiel #29

0

Datei anzeigen

Datei: deep_q_agent.py Projekt: Bosmansc/custom_tetris_RL

    def build_agent(self, model, actions, nb_steps):
        """
        building the deep q agent

        GAMMA:
        REWARD = r1 + gamma*r2 + gamma^2*r3 + gamma^3*r4 ...
        -> gamma defines penalty for future reward
        In general, most algorithms learn faster when they don't have to look too far into the future.
        So, it sometimes helps the performance to set gamma relatively low.
        for many problems a gamma of 0.9 or 0.95 is fine

        LAMBDA:
        The lambda parameter determines how much you bootstrap on earlier learned value versus using
        the current Monte Carlo roll-out. This implies a trade-off between more bias (low lambda)
        and more variance (high lambda).
        A general rule of thumb is to use a lambda equal to 0.9.
        However, it might be good just to try a few settings (e.g., 0, 0.5, 0.8, 0.9, 0.95 and 1.0)
        """
        policy = LinearAnnealedPolicy(
            EpsGreedyQPolicy(
            ),  # takes current best action with prob (1 - epsilon)
            attr='eps',  # decay epsilon (=exploration) per agent step
            value_max=self.
            EPSILON_START,  # start value of epsilon (default =1)
            value_min=self.EPSILON_END,  # last value of epsilon (default =0
            value_test=self.EPSILON_TEST,
            nb_steps=self.EPSILON_DECAY * nb_steps)
        memory = SequentialMemory(limit=self.SEQUENTIAL_MEMORY_LIMIT,
                                  window_length=1)
        build_agent = DQNAgent(model=model,
                               memory=memory,
                               policy=policy,
                               gamma=self.GAMMA,
                               batch_size=self.BATCH_SIZE,
                               nb_actions=actions,
                               nb_steps_warmup=1000,
                               target_model_update=self.TARGET_MODEL_UPDATE,
                               enable_double_dqn=False,
                               train_interval=4)
        return build_agent

Beispiel #30

0

Datei anzeigen

def get_agent(agent_type, model_type, lr):
    if agent_type == "sarsa":
        policy = BoltzmannQPolicy()
        model = get_model(model_type)
        agent = SARSAAgent(model=model,
                           policy=policy,
                           nb_actions=nb_actions,
                           nb_steps_warmup=10,
                           gamma=0.99)
        agent.compile(Adam(lr), metrics=['mae'])
        return agent
    elif agent_type == "dqn":
        policy = BoltzmannQPolicy()
        model = get_model(model_type)
        memory = SequentialMemory(limit=50000, window_length=1)
        agent = DQNAgent(model=model,
                         policy=policy,
                         nb_actions=nb_actions,
                         memory=memory,
                         nb_steps_warmup=10,
                         target_model_update=1e-2,
                         enable_double_dqn=True)
        agent.compile(Adam(lr), metrics=['mae'])
        return agent
    elif agent_type == "a2c":
        agent = A2CAgent(nb_actions,
                         len(env.observation_space.high),
                         nb_steps_warmup=10,
                         actor_lr=0.001,
                         critic_lr=0.005)
        agent.compile(Adam(lr))
        return agent
    elif agent_type == "ppo":
        pass
    else:
        print("Unsupported model")
        exit(1)