def make_dqn_rl_agent(processor: Processor_56x5, nbr_layers=2, enable_dueling_network: bool = False, enable_double_dqn: bool = True): """ :param processor: :param nbr_layers: :param enable_dueling_network: :param enable_double_dqn: :return: """ model = processor.create_model(nbr_layers=nbr_layers) test_policy = GreedyQPolicy() memory = SequentialMemory(limit=50000, window_length=1) dqn_agent = DQNAgent(model=model, nb_actions=NBR_TICHU_ACTIONS, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, test_policy=test_policy, processor=processor, enable_dueling_network=enable_dueling_network, enable_double_dqn=enable_double_dqn) dqn_agent.compile(Adam(lr=1e-3), metrics=['mae']) return dqn_agent
def agent(self): nb_actions = self.env.action_space.n model = self.build() print(model.summary()) memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=32, enable_dueling_network=True, target_model_update=1e-2, policy=InformedBoltzmannGumbelQPolicy(self.env), test_policy=InformedGreedyQPolicy(self.env), batch_size=32, train_interval=32) dqn.compile(Adam(lr=1e-3), metrics=['mae']) if self.initial_weights_file is not None: try: dqn.load_weights(self.initial_weights_file) except: # just skip loading pass return dqn
def __init__(self, shape, action_count: int): super().__init__() inp = Input(shape=shape) flat = Flatten()(inp) # Activation: relu, sigmoid, ... hidden1 = Dense(256, activation='relu')(flat) hidden2 = Dense(64, activation='relu')(hidden1) hidden3 = Dense(16, activation='relu')(hidden2) output = Dense(action_count, activation='softmax')(hidden3) self.model = Model(inputs=inp, outputs=output) print(self.model.summary()) self.memory = SequentialMemory(limit=50000, window_length=WINDOW_LENGTH) self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000) self.callbacks = self.build_callbacks("msnake") self.dqn = DQNAgent(model=self.model, nb_actions=action_count, memory=self.memory, nb_steps_warmup=50, target_model_update=1e-2, policy=self.policy) Adam._name = "fix_bug" # https://github.com/keras-rl/keras-rl/issues/345 # Metrics: mae, mse, accuracy # LR: learning rate self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])
def play(self, nb_episodes=5, render=False): """Let the agent play""" memory = SequentialMemory(limit=memory_limit, window_length=window_length) policy = TrumpPolicy() class CustomProcessor(Processor): """The agent and the environment""" def process_state_batch(self, batch): """ Given a state batch, I want to remove the second dimension, because it's useless and prevents me from feeding the tensor into my CNN """ return np.squeeze(batch, axis=1) def process_info(self, info): processed_info = info['player_data'] if 'stack' in processed_info: processed_info = {'x': 1} return processed_info nb_actions = self.env.action_space.n self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=1e-2, policy=policy, processor=CustomProcessor(), batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn) self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae']) # pylint: disable=no-member self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)
def test_double_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1, ))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
def run(): env = game_env.MeleeEnv() nb_actions = env.action_space.shape[0] actor = build_network(env, nb_actions) critic, action_input = build_critic(env, nb_actions) memory = SequentialMemory(limit=25000) #random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = DQNAgent( batch_size=1000, nb_actions=nb_actions, model= actor, #processor=Process(), #window_length=4,#critic_action_input=action_input, memory=memory, nb_steps_warmup=100 ) # nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, #random_process=random_process, gamma=.95, target_model_update=1e-1)#, ##delta_range=(-10., 10.)) agent.compile(RMSprop(lr=.0005), metrics=['mae']) agent.fit(env, nb_steps=100000, visualize=True, verbose=1, nb_max_start_steps=100, start_step_policy=lambda x: np.random.randint(nb_actions)) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format( str(random.randrange(0, 100000))), overwrite=True)
def init_dqn(env, nb_actions): """ Initialize the DQN agent using the keras-rl package. :param env: the environment to be played, required to determine the input size :param nb_actions: number of actions :return: DQN Agent """ # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # compile agent memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.model_name = f"DQN" dqn.compile(Adam(lr=1e-3), metrics=['mae']) return dqn
def __init__(self, env: gym.Env, logger=Logger()): nb_actions = env.action_space.shape[0] model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) policy = BoltzmannQPolicy() memory = SequentialMemory(limit=100000, window_length=1) agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) agent.compile(Adam(lr=1e-3), metrics=['mae']) self.agent = agent self.env = env super().__init__(env, logger)
def _build_dqn(nb_actions, nb_states): # build network model = Sequential() model.add(Flatten(input_shape=(1, nb_states))) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) # build agent memory = SequentialMemory(limit=10240, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy) dqn.compile(Adam(), metrics=['mae']) return dqn
def build_agent(model, actions): '''Build Agent''' policy = LinearAnnealedPolicy( EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000 ) memory = SequentialMemory( limit=1000000, window_length=3 ) DQN_agent = DQNAgent( model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000 ) DQN_agent.compile(optimizer=Adam(lr=0.00025), metrics=['mae', 'accuracy']) return DQN_agent
def build_model(env, num_actions): input = Input(shape=(1, env.observation_space.shape[0])) x = Flatten()(input) x = Dense(128, activation='relu')(x) #128 x = Dense(64, activation='relu')(x) #64 x = Dense(32, activation='relu')(x) #32 output = Dense(num_actions, activation='linear')(x) model = Model(inputs=input, outputs=output) print(model.summary()) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) # policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) return dqn
def agent(self): nb_actions = self.env.action_space.n obs_dim = self.env.observation_space.shape model = Sequential() model.add(Flatten(input_shape=(1, obs_dim))) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=256, enable_dueling_network=True, target_model_update=1e-2, policy=InformedBoltzmannGumbelQPolicy(self.env), test_policy=InformedGreedyQPolicy(self.env), batch_size=128, train_interval=128) dqn.compile(Adam(lr=1e-3), metrics=['mae']) if self.initial_weights_file is not None: dqn.load_weights(self.initial_weights_file) self.train_episodes = 0 return dqn
def __init__(self, state_dim, action_space, epsilon, lr): self._model = self._get_model(state_dim, action_space) self.agent = DQNAgent(self._model, policy=EpsGreedyQPolicy(epsilon), test_policy=EpsGreedyQPolicy(eps=0.01)) self.agent.compile(Adam(lr))
def build_agent(model, actions): policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.05, value_test=.05, nb_steps=150_000) #policy = EpsGreedyQPolicy(eps=.1) #policy = GreedyQPolicy() #policy = BoltzmannQPolicy() memory = SequentialMemory(limit=30000, window_length=1) dqn = DQNAgent(model=model, memory=memory, policy=policy, processor=processor, nb_actions=actions, nb_steps_warmup=100, target_model_update=1e-3, enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg', batch_size=8, gamma=.95) return dqn
def build_agent(model, nb_actions): """ build an agent """ policy = LinearAnnealedPolicy( EpsGreedyQPolicy(), attr='eps', value_max=MAX_EPSILON, value_min=MIN_EPSILON, value_test=TEST_EPSILON, nb_steps=MAX_STEPS ) memory = SequentialMemory( limit=MAX_STEPS, window_length=WINDOW_WIDTH ) dqn = DQNAgent( model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=nb_actions, nb_steps_warmup=WARMUP_STEPS ) dqn.compile(Adam(learning_rate=LEARNING_RATE), metrics=['mae']) return dqn
def initiate_agent(self, env): """initiate a deep Q agent""" tf.compat.v1.disable_eager_execution() self.env = env nb_actions = self.env.action_space.n self.model = Sequential() self.model.add(Dense(512, activation='relu', input_shape=env.observation_space)) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(nb_actions, activation='linear')) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=memory_limit, window_length=window_length) policy = TrumpPolicy() nb_actions = env.action_space.n self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=1e-2, policy=policy, processor=CustomProcessor(), batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn) self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
def build_agent(model, actions): policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000) memory = SequentialMemory(limit=1000, window_length=3) dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000) return dqn
def build_agent(model, actions): policy = BoltzmannQPolicy() memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2) return dqn
def get_agent(self): agent = DQNAgent(model=self.model, policy=self.policy, nb_steps_warmup=10, target_model_update=1e-2, nb_actions=self.action, memory=self.memory, enable_double_dnq=False) return agent
def build_agent(model, actions): """ Builds an Epsilon Greedy Deep Q Learning Agent """ policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=0.1, value_test=0.2, nb_steps=10000) memory = SequentialMemory(limit=2000, window_length=3) dqn = DQNAgent(model, policy, memory=memory, enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000) return dqn
def set_num_states(self, state_dimension: int, num_actions: int) -> None: model = self._build_model(state_dimension, num_actions) memory = SequentialMemory(limit=10000, window_length=1) self._internal_agent = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1000, gamma=0.99, delta_clip=1) self._internal_agent.compile(Adam(lr=0.0001), metrics=['mae'])
def train(learn_rate, model_update_interval, steps): dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50000, target_model_update=model_update_interval, policy=policy, gamma=.99, train_interval=4) dqn.compile(Adam(lr=learn_rate), metrics=['mae']) dqn.fit(env, nb_steps=steps, verbose=2, visualize=VISUALIZE) dqn.save_weights(SAVEFILE_FOLDER + "/dqn_pong_params.h5f", overwrite=True)
def setupDQN(cfg, nb_actions, processor): image_in = Input(shape=cfg.input_shape, name='main_input') input_perm = Permute((2, 3, 1), input_shape=cfg.input_shape)(image_in) conv1 = Conv2D(32, (8, 8), activation="relu", strides=(4, 4), name='conv1')(input_perm) conv2 = Conv2D(64, (4, 4), activation="relu", strides=(2, 2), name='conv2')(conv1) conv3 = Conv2D(64, (3, 3), activation="relu", strides=(1, 1), name='conv3')(conv2) conv_out = Flatten(name='flat_feat')(conv3) dense_out = Dense(512, activation='relu')(conv_out) q_out = Dense(nb_actions, activation='linear')(dense_out) model = Model(inputs=[image_in], outputs=[q_out]) print(model.summary()) # hstate_size = int(np.prod(conv3.shape[1:])) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=cfg.memory_limit, window_length=cfg.WINDOW_LENGTH) # Select a policy. We use eps-greedy action selection, which means that a random action is selected # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that # the agent initially explores the environment (high eps) and then gradually sticks to what it knows # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05 # so that the agent still performs some random actions. This ensures that the agent cannot get stuck. policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=cfg.nb_steps_annealed_policy) # The trade-off between exploration and exploitation is difficult and an on-going research topic. # If you want, you can experiment with the parameters or use a different policy. Another popular one # is Boltzmann-style exploration: # policy = BoltzmannQPolicy(tau=1.) # Feel free to give it a try! dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=cfg.nb_steps_warmup_dqn_agent, gamma=.99, target_model_update=cfg.target_model_update_dqn_agent, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) return dqn
def initiate_agent(self, env): """initiate a deep Q agent""" tf.compat.v1.disable_eager_execution() self.env = env nb_actions = self.env.action_space.n self.model = Sequential() self.model.add( Dense(512, activation='relu', input_shape=env.observation_space)) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(nb_actions, activation='linear')) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=memory_limit, window_length=window_length) policy = TrumpPolicy() class CustomProcessor(Processor): """The agent and the environment""" def process_state_batch(self, batch): """ Given a state batch, I want to remove the second dimension, because it's useless and prevents me from feeding the tensor into my CNN """ return np.squeeze(batch, axis=1) def process_info(self, info): processed_info = info['player_data'] if 'stack' in processed_info: processed_info = {'x': 1} return processed_info nb_actions = env.action_space.n self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=1e-2, policy=policy, processor=CustomProcessor(), batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn) self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae'])
def build_agent(self, mem_file=None, w_file=None): #Create a dummy env to get size of input/output. #Makes it simpler if we ever choose to update env shapes. env = TradingEnv([], "", []) np.random.seed(314) env.seed(314) nb_actions = env.action_space.n obs_dim = env.observation_space.shape[0] model = Sequential() model.add( LSTM(5, input_shape=(7, 4), return_sequences=True)) # 4 features + 1 bias term. 5 neurons model.add(Activation('tanh')) model.add(LSTM(4)) model.add(Activation('tanh')) model.add(Dropout(0.2)) model.add(Dense(4)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) #Best activation for BoltzmanPolicy #policy = EpsGreedyQPolicy(eps=EPS_VAL) #Off policy policy = BoltzmannQPolicy() #Off-policy test_policy = MaxBoltzmannQPolicy() #On-policy memory = None if mem_file is None: memory = SequentialMemory( limit=50000, window_length=7) ## returns observations of len (7,) else: (memory, memory.actions, memory.rewards, memory.terminals, memory.observations) = pickle.load(open(mem_file, "rb")) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=GAMMA_VAL, nb_steps_warmup=100, policy=policy, test_policy=test_policy) dqn.compile("adam", metrics=['mse']) if w_file is not None: model.load_weights(w_file) return dqn, env, memory
def __init__(self, shape, initial_randomness: float, action_count: int): super().__init__() model = Sequential() model.add(Input(shape=shape)) model.add(Conv2D(8, (3, 3), activation='relu', input_shape=shape)) model.add(Conv2D(16, (3, 3), activation='relu', input_shape=shape)) model.add(Conv2D(32, (3, 3), activation='relu', input_shape=shape)) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dense(512, activation='relu')) model.add(Dense(action_count, activation='softmax')) print(model.summary()) self.model = model self.callbacks = self.build_callbacks("msnake") self.processor = RemoveDimensionProcessor() self.memory = SequentialMemory(limit=50000, window_length=1) self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000) self.dqn = DQNAgent(model=self.model, nb_actions=action_count, memory=self.memory, nb_steps_warmup=10, target_model_update=1e-2, policy=self.policy, batch_size=1, processor=self.processor) # https://github.com/keras-rl/keras-rl/issues/345 Adam._name = "fix_bug" # Metrics: mae, mse, accuracy # LR: learning rate self.dqn.compile(Adam(lr=1e-3), metrics=['mse']) self.initial_randomness = initial_randomness
def getAgent(self): agent = DQNAgent( model = self.model, policy = self.policy, nb_steps_warmup = 10, <<<<<<< HEAD target_model_update = 1e-2, nb_actions = self.action, memory = self.memory, ======= nb_actions = self.action, memory = self.memory, target_model_update = 1e-2, >>>>>>> 29bdbdfe2117d45f7316cda3de21e1dfaf76fc66 enable_double_dqn=False ) return agent
def __init__(self, env: gym.Env, memory=SequentialMemory(limit=50000, window_length=1), logger=Logger(), boxes_resolution=10, nb_steps_warmup=20, hidden_layers=[16, 16, 16], policy=BoltzmannQPolicy(), target_model_update=1e-2, optimizer=Adam(lr=1e-3)): self.env = env if isinstance(boxes_resolution, int): boxes_resolution = (boxes_resolution, ) * len( env.action_space.shape) self.boxes_resolution = boxes_resolution self.nb_actions = np.zeros(boxes_resolution).size model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) # TODO check this for l in hidden_layers: model.add(Dense(l, activation='relu')) model.add(Dense(self.nb_actions, activation='linear')) # TODO move this to util file? self.model = model print("dqn model summary :{0}".format(model.summary())) self.dqn = DQNAgent(model=model, nb_actions=self.nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=target_model_update, policy=policy, processor=DqnProcessor(self.boxes_resolution, env.action_space.low, env.action_space.high)) self.dqn.compile(optimizer=optimizer, metrics=['mae']) super().__init__(env, logger)
def build_agent(self, model, actions, nb_steps): """ building the deep q agent GAMMA: REWARD = r1 + gamma*r2 + gamma^2*r3 + gamma^3*r4 ... -> gamma defines penalty for future reward In general, most algorithms learn faster when they don't have to look too far into the future. So, it sometimes helps the performance to set gamma relatively low. for many problems a gamma of 0.9 or 0.95 is fine LAMBDA: The lambda parameter determines how much you bootstrap on earlier learned value versus using the current Monte Carlo roll-out. This implies a trade-off between more bias (low lambda) and more variance (high lambda). A general rule of thumb is to use a lambda equal to 0.9. However, it might be good just to try a few settings (e.g., 0, 0.5, 0.8, 0.9, 0.95 and 1.0) """ policy = LinearAnnealedPolicy( EpsGreedyQPolicy( ), # takes current best action with prob (1 - epsilon) attr='eps', # decay epsilon (=exploration) per agent step value_max=self. EPSILON_START, # start value of epsilon (default =1) value_min=self.EPSILON_END, # last value of epsilon (default =0 value_test=self.EPSILON_TEST, nb_steps=self.EPSILON_DECAY * nb_steps) memory = SequentialMemory(limit=self.SEQUENTIAL_MEMORY_LIMIT, window_length=1) build_agent = DQNAgent(model=model, memory=memory, policy=policy, gamma=self.GAMMA, batch_size=self.BATCH_SIZE, nb_actions=actions, nb_steps_warmup=1000, target_model_update=self.TARGET_MODEL_UPDATE, enable_double_dqn=False, train_interval=4) return build_agent
def get_agent(agent_type, model_type, lr): if agent_type == "sarsa": policy = BoltzmannQPolicy() model = get_model(model_type) agent = SARSAAgent(model=model, policy=policy, nb_actions=nb_actions, nb_steps_warmup=10, gamma=0.99) agent.compile(Adam(lr), metrics=['mae']) return agent elif agent_type == "dqn": policy = BoltzmannQPolicy() model = get_model(model_type) memory = SequentialMemory(limit=50000, window_length=1) agent = DQNAgent(model=model, policy=policy, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, enable_double_dqn=True) agent.compile(Adam(lr), metrics=['mae']) return agent elif agent_type == "a2c": agent = A2CAgent(nb_actions, len(env.observation_space.high), nb_steps_warmup=10, actor_lr=0.001, critic_lr=0.005) agent.compile(Adam(lr)) return agent elif agent_type == "ppo": pass else: print("Unsupported model") exit(1)