def make_dqn_rl_agent(processor: Processor_56x5, nbr_layers=2, enable_dueling_network: bool = False, enable_double_dqn: bool = True): """ :param processor: :param nbr_layers: :param enable_dueling_network: :param enable_double_dqn: :return: """ model = processor.create_model(nbr_layers=nbr_layers) test_policy = GreedyQPolicy() memory = SequentialMemory(limit=50000, window_length=1) dqn_agent = DQNAgent(model=model, nb_actions=NBR_TICHU_ACTIONS, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, test_policy=test_policy, processor=processor, enable_dueling_network=enable_dueling_network, enable_double_dqn=enable_double_dqn) dqn_agent.compile(Adam(lr=1e-3), metrics=['mae']) return dqn_agent
def __init__(self, env: gym.Env, logger=Logger()): nb_actions = env.action_space.shape[0] model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) policy = BoltzmannQPolicy() memory = SequentialMemory(limit=100000, window_length=1) agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) agent.compile(Adam(lr=1e-3), metrics=['mae']) self.agent = agent self.env = env super().__init__(env, logger)
def build_model(env, num_actions): input = Input(shape=(1, env.observation_space.shape[0])) x = Flatten()(input) x = Dense(128, activation='relu')(x) #128 x = Dense(64, activation='relu')(x) #64 x = Dense(32, activation='relu')(x) #32 output = Dense(num_actions, activation='linear')(x) model = Model(inputs=input, outputs=output) print(model.summary()) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) # policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) return dqn
def build_agent(model, actions): '''Build Agent''' policy = LinearAnnealedPolicy( EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000 ) memory = SequentialMemory( limit=1000000, window_length=3 ) DQN_agent = DQNAgent( model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000 ) DQN_agent.compile(optimizer=Adam(lr=0.00025), metrics=['mae', 'accuracy']) return DQN_agent
def init_dqn(env, nb_actions): """ Initialize the DQN agent using the keras-rl package. :param env: the environment to be played, required to determine the input size :param nb_actions: number of actions :return: DQN Agent """ # Next, we build a very simple model. model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) print(model.summary()) # compile agent memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.model_name = f"DQN" dqn.compile(Adam(lr=1e-3), metrics=['mae']) return dqn
def __init__(self, shape, action_count: int): super().__init__() inp = Input(shape=shape) flat = Flatten()(inp) # Activation: relu, sigmoid, ... hidden1 = Dense(256, activation='relu')(flat) hidden2 = Dense(64, activation='relu')(hidden1) hidden3 = Dense(16, activation='relu')(hidden2) output = Dense(action_count, activation='softmax')(hidden3) self.model = Model(inputs=inp, outputs=output) print(self.model.summary()) self.memory = SequentialMemory(limit=50000, window_length=WINDOW_LENGTH) self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000) self.callbacks = self.build_callbacks("msnake") self.dqn = DQNAgent(model=self.model, nb_actions=action_count, memory=self.memory, nb_steps_warmup=50, target_model_update=1e-2, policy=self.policy) Adam._name = "fix_bug" # https://github.com/keras-rl/keras-rl/issues/345 # Metrics: mae, mse, accuracy # LR: learning rate self.dqn.compile(Adam(lr=1e-5), metrics=['mse'])
def _build_dqn(nb_actions, nb_states): # build network model = Sequential() model.add(Flatten(input_shape=(1, nb_states))) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(16)) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) # build agent memory = SequentialMemory(limit=10240, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy) dqn.compile(Adam(), metrics=['mae']) return dqn
def build_agent(model, nb_actions): """ build an agent """ policy = LinearAnnealedPolicy( EpsGreedyQPolicy(), attr='eps', value_max=MAX_EPSILON, value_min=MIN_EPSILON, value_test=TEST_EPSILON, nb_steps=MAX_STEPS ) memory = SequentialMemory( limit=MAX_STEPS, window_length=WINDOW_WIDTH ) dqn = DQNAgent( model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=nb_actions, nb_steps_warmup=WARMUP_STEPS ) dqn.compile(Adam(learning_rate=LEARNING_RATE), metrics=['mae']) return dqn
def __init__(self, state_dim, action_space, epsilon, lr): self._model = self._get_model(state_dim, action_space) self.agent = DQNAgent(self._model, policy=EpsGreedyQPolicy(epsilon), test_policy=EpsGreedyQPolicy(eps=0.01)) self.agent.compile(Adam(lr))
def initiate_agent(self, env): """initiate a deep Q agent""" tf.compat.v1.disable_eager_execution() self.env = env nb_actions = self.env.action_space.n self.model = Sequential() self.model.add(Dense(512, activation='relu', input_shape=env.observation_space)) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(nb_actions, activation='linear')) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=memory_limit, window_length=window_length) policy = TrumpPolicy() nb_actions = env.action_space.n self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=1e-2, policy=policy, processor=CustomProcessor(), batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn) self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
def play(self, nb_episodes=5, render=False): """Let the agent play""" memory = SequentialMemory(limit=memory_limit, window_length=window_length) policy = TrumpPolicy() class CustomProcessor(Processor): """The agent and the environment""" def process_state_batch(self, batch): """ Given a state batch, I want to remove the second dimension, because it's useless and prevents me from feeding the tensor into my CNN """ return np.squeeze(batch, axis=1) def process_info(self, info): processed_info = info['player_data'] if 'stack' in processed_info: processed_info = {'x': 1} return processed_info nb_actions = self.env.action_space.n self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=1e-2, policy=policy, processor=CustomProcessor(), batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn) self.dqn.compile(tf.keras.optimizers.Adam(lr=1e-3), metrics=['mae']) # pylint: disable=no-member self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)
class DeepAgent: """ This algorithm is trying to use a DQN agent that learns himself just given a gym. After quite some trouble with various error messages, this now at least runs and trains. It does not yet achieve good results. Best result: ??? """ def __init__(self, shape, action_count: int): super().__init__() inp = Input(shape=shape) flat = Flatten()(inp) # Activation: relu, sigmoid, ... hidden1 = Dense(256, activation='relu')(flat) hidden2 = Dense(64, activation='relu')(hidden1) hidden3 = Dense(16, activation='relu')(hidden2) output = Dense(action_count, activation='softmax')(hidden3) self.model = Model(inputs=inp, outputs=output) print(self.model.summary()) self.memory = SequentialMemory(limit=50000, window_length=WINDOW_LENGTH) self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000) self.callbacks = self.build_callbacks("msnake") self.dqn = DQNAgent(model=self.model, nb_actions=action_count, memory=self.memory, nb_steps_warmup=50, target_model_update=1e-2, policy=self.policy) Adam._name = "fix_bug" # https://github.com/keras-rl/keras-rl/issues/345 # Metrics: mae, mse, accuracy # LR: learning rate self.dqn.compile(Adam(lr=1e-5), metrics=['mse']) def build_callbacks(self, env_name): callbacks = [] checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f' callbacks += [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000) ] log_filename = 'dqn_{}_log.json'.format(env_name) callbacks += [FileLogger(log_filename, interval=100)] return callbacks
class QLearningAgent(Agent): def __init__(self, state_dim, action_space, epsilon, lr): self._model = self._get_model(state_dim, action_space) self.agent = DQNAgent(self._model, policy=EpsGreedyQPolicy(epsilon), test_policy=EpsGreedyQPolicy(eps=0.01)) self.agent.compile(Adam(lr)) def model_summary(self): print(self._model.summary())
class DqnAgent(Agent): def __init__(self, env: gym.Env, memory=SequentialMemory(limit=50000, window_length=1), logger=Logger(), boxes_resolution=10, nb_steps_warmup=20, hidden_layers=[16, 16, 16], policy=BoltzmannQPolicy(), target_model_update=1e-2, optimizer=Adam(lr=1e-3)): self.env = env if isinstance(boxes_resolution, int): boxes_resolution = (boxes_resolution, ) * len( env.action_space.shape) self.boxes_resolution = boxes_resolution self.nb_actions = np.zeros(boxes_resolution).size model = Sequential() model.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) # TODO check this for l in hidden_layers: model.add(Dense(l, activation='relu')) model.add(Dense(self.nb_actions, activation='linear')) # TODO move this to util file? self.model = model print("dqn model summary :{0}".format(model.summary())) self.dqn = DQNAgent(model=model, nb_actions=self.nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=target_model_update, policy=policy, processor=DqnProcessor(self.boxes_resolution, env.action_space.low, env.action_space.high)) self.dqn.compile(optimizer=optimizer, metrics=['mae']) super().__init__(env, logger) def act(self, state, explore): action = self.dqn.processor.process_action(self.dqn.forward(state)) return action def train(self, nb_episodes=1000, verbose=2, visualize=True): self.dqn.fit(env=self.env, nb_steps=nb_episodes, verbose=verbose, visualize=visualize)
def set_num_states(self, state_dimension: int, num_actions: int) -> None: model = self._build_model(state_dimension, num_actions) memory = SequentialMemory(limit=10000, window_length=1) self._internal_agent = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=1000, target_model_update=1000, gamma=0.99, delta_clip=1) self._internal_agent.compile(Adam(lr=0.0001), metrics=['mae'])
def setupDQN(cfg, nb_actions, processor): image_in = Input(shape=cfg.input_shape, name='main_input') input_perm = Permute((2, 3, 1), input_shape=cfg.input_shape)(image_in) conv1 = Conv2D(32, (8, 8), activation="relu", strides=(4, 4), name='conv1')(input_perm) conv2 = Conv2D(64, (4, 4), activation="relu", strides=(2, 2), name='conv2')(conv1) conv3 = Conv2D(64, (3, 3), activation="relu", strides=(1, 1), name='conv3')(conv2) conv_out = Flatten(name='flat_feat')(conv3) dense_out = Dense(512, activation='relu')(conv_out) q_out = Dense(nb_actions, activation='linear')(dense_out) model = Model(inputs=[image_in], outputs=[q_out]) print(model.summary()) # hstate_size = int(np.prod(conv3.shape[1:])) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=cfg.memory_limit, window_length=cfg.WINDOW_LENGTH) # Select a policy. We use eps-greedy action selection, which means that a random action is selected # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that # the agent initially explores the environment (high eps) and then gradually sticks to what it knows # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05 # so that the agent still performs some random actions. This ensures that the agent cannot get stuck. policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=cfg.nb_steps_annealed_policy) # The trade-off between exploration and exploitation is difficult and an on-going research topic. # If you want, you can experiment with the parameters or use a different policy. Another popular one # is Boltzmann-style exploration: # policy = BoltzmannQPolicy(tau=1.) # Feel free to give it a try! dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=cfg.nb_steps_warmup_dqn_agent, gamma=.99, target_model_update=cfg.target_model_update_dqn_agent, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) return dqn
def initiate_agent(self, env): """initiate a deep Q agent""" tf.compat.v1.disable_eager_execution() self.env = env nb_actions = self.env.action_space.n self.model = Sequential() self.model.add( Dense(512, activation='relu', input_shape=env.observation_space)) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(512, activation='relu')) self.model.add(Dropout(0.2)) self.model.add(Dense(nb_actions, activation='linear')) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=memory_limit, window_length=window_length) policy = TrumpPolicy() class CustomProcessor(Processor): """The agent and the environment""" def process_state_batch(self, batch): """ Given a state batch, I want to remove the second dimension, because it's useless and prevents me from feeding the tensor into my CNN """ return np.squeeze(batch, axis=1) def process_info(self, info): processed_info = info['player_data'] if 'stack' in processed_info: processed_info = {'x': 1} return processed_info nb_actions = env.action_space.n self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update=1e-2, policy=policy, processor=CustomProcessor(), batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn) self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae'])
def run(): env = game_env.MeleeEnv() nb_actions = env.action_space.shape[0] actor = build_network(env, nb_actions) critic, action_input = build_critic(env, nb_actions) memory = SequentialMemory(limit=25000) #random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = DQNAgent( batch_size=1000, nb_actions=nb_actions, model= actor, #processor=Process(), #window_length=4,#critic_action_input=action_input, memory=memory, nb_steps_warmup=100 ) # nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, #random_process=random_process, gamma=.95, target_model_update=1e-1)#, ##delta_range=(-10., 10.)) agent.compile(RMSprop(lr=.0005), metrics=['mae']) agent.fit(env, nb_steps=100000, visualize=True, verbose=1, nb_max_start_steps=100, start_step_policy=lambda x: np.random.randint(nb_actions)) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format( str(random.randrange(0, 100000))), overwrite=True)
class DeepAgentConvolution: """ This algorithm is trying to use a DQN agent that learns himself just given a gym. At the moment, it cannot successfully work with convolution: Error when checking input: expected input_1 to have 4 dimensions, but got array with shape (1, 1, 20, 10, 3) Best result: ??? """ def __init__(self, shape, action_count: int): super().__init__() inp = Input(shape=shape) # Convolution part (image recognition / feature extraction) conv = Conv2D(16, kernel_size=2, padding="same")(inp) conv = Conv2D(8, kernel_size=2)(conv) # Classification (decision making) flat = Flatten()(conv) # Activation: relu, sigmoid, ... hidden = Dense(256, activation='relu')(flat) hidden = Dense(64, activation='relu')(hidden) hidden = Dense(16, activation='relu')(hidden) output = Dense(action_count, activation='softmax')(hidden) self.model = Model(inputs=inp, outputs=output) print(self.model.summary()) self.memory = SequentialMemory(limit=50000, window_length=WINDOW_LENGTH) self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000) self.callbacks = self.build_callbacks("msnake") self.dqn = DQNAgent(model=self.model, nb_actions=action_count, memory=self.memory, nb_steps_warmup=20, target_model_update=1e-2, policy=self.policy) Adam._name = "fix_bug" # https://github.com/keras-rl/keras-rl/issues/345 # Metrics: mae, mse, accuracy # LR: learning rate self.dqn.compile(Adam(lr=1e-5), metrics=['mse']) def build_callbacks(self, env_name): callbacks = [] checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f' callbacks += [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)] log_filename = 'dqn_{}_log.json'.format(env_name) callbacks += [FileLogger(log_filename, interval=100)] return callbacks
def build_agent(model, actions): policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.05, value_test=.05, nb_steps=150_000) #policy = EpsGreedyQPolicy(eps=.1) #policy = GreedyQPolicy() #policy = BoltzmannQPolicy() memory = SequentialMemory(limit=30000, window_length=1) dqn = DQNAgent(model=model, memory=memory, policy=policy, processor=processor, nb_actions=actions, nb_steps_warmup=100, target_model_update=1e-3, enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg', batch_size=8, gamma=.95) return dqn
def build_agent(model, actions): policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000) memory = SequentialMemory(limit=1000, window_length=3) dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000) return dqn
def __init__(self, shape, initial_randomness: float, action_count: int): super().__init__() model = Sequential() model.add(Input(shape=shape)) model.add(Conv2D(8, (3, 3), activation='relu', input_shape=shape)) model.add(Conv2D(16, (3, 3), activation='relu', input_shape=shape)) model.add(Conv2D(32, (3, 3), activation='relu', input_shape=shape)) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dense(512, activation='relu')) model.add(Dense(action_count, activation='softmax')) print(model.summary()) self.model = model self.callbacks = self.build_callbacks("msnake") self.processor = RemoveDimensionProcessor() self.memory = SequentialMemory(limit=50000, window_length=1) self.policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000) self.dqn = DQNAgent(model=self.model, nb_actions=action_count, memory=self.memory, nb_steps_warmup=10, target_model_update=1e-2, policy=self.policy, batch_size=1, processor=self.processor) # https://github.com/keras-rl/keras-rl/issues/345 Adam._name = "fix_bug" # Metrics: mae, mse, accuracy # LR: learning rate self.dqn.compile(Adam(lr=1e-3), metrics=['mse']) self.initial_randomness = initial_randomness
def build_agent(self, mem_file=None, w_file=None): #Create a dummy env to get size of input/output. #Makes it simpler if we ever choose to update env shapes. env = TradingEnv([], "", []) np.random.seed(314) env.seed(314) nb_actions = env.action_space.n obs_dim = env.observation_space.shape[0] model = Sequential() model.add( LSTM(5, input_shape=(7, 4), return_sequences=True)) # 4 features + 1 bias term. 5 neurons model.add(Activation('tanh')) model.add(LSTM(4)) model.add(Activation('tanh')) model.add(Dropout(0.2)) model.add(Dense(4)) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) #Best activation for BoltzmanPolicy #policy = EpsGreedyQPolicy(eps=EPS_VAL) #Off policy policy = BoltzmannQPolicy() #Off-policy test_policy = MaxBoltzmannQPolicy() #On-policy memory = None if mem_file is None: memory = SequentialMemory( limit=50000, window_length=7) ## returns observations of len (7,) else: (memory, memory.actions, memory.rewards, memory.terminals, memory.observations) = pickle.load(open(mem_file, "rb")) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, gamma=GAMMA_VAL, nb_steps_warmup=100, policy=policy, test_policy=test_policy) dqn.compile("adam", metrics=['mse']) if w_file is not None: model.load_weights(w_file) return dqn, env, memory
def test_double_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1, ))) model.add(Activation('relu')) model.add(Dense(nb_actions)) model.add(Activation('linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
def get_agent(self): agent = DQNAgent(model=self.model, policy=self.policy, nb_steps_warmup=10, target_model_update=1e-2, nb_actions=self.action, memory=self.memory, enable_double_dnq=False) return agent
def build_agent(model, actions): policy = BoltzmannQPolicy() memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2) return dqn
def train(learn_rate, model_update_interval, steps): dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50000, target_model_update=model_update_interval, policy=policy, gamma=.99, train_interval=4) dqn.compile(Adam(lr=learn_rate), metrics=['mae']) dqn.fit(env, nb_steps=steps, verbose=2, visualize=VISUALIZE) dqn.save_weights(SAVEFILE_FOLDER + "/dqn_pong_params.h5f", overwrite=True)
def build_agent(model, actions): """ Builds an Epsilon Greedy Deep Q Learning Agent """ policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=0.1, value_test=0.2, nb_steps=10000) memory = SequentialMemory(limit=2000, window_length=3) dqn = DQNAgent(model, policy, memory=memory, enable_dueling_network=True, dueling_type='avg', nb_actions=actions, nb_steps_warmup=1000) return dqn
def agent(self): nb_actions = self.env.action_space.n obs_dim = self.env.observation_space.shape model = Sequential() model.add(Flatten(input_shape=(1, obs_dim))) model.add(Dense(nb_actions, activation='linear')) print(model.summary()) memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=256, enable_dueling_network=True, target_model_update=1e-2, policy=InformedBoltzmannGumbelQPolicy(self.env), test_policy=InformedGreedyQPolicy(self.env), batch_size=128, train_interval=128) dqn.compile(Adam(lr=1e-3), metrics=['mae']) if self.initial_weights_file is not None: dqn.load_weights(self.initial_weights_file) self.train_episodes = 0 return dqn
def agent(self): nb_actions = self.env.action_space.n model = self.build() print(model.summary()) memory = SequentialMemory(limit=50000, window_length=1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=32, enable_dueling_network=True, target_model_update=1e-2, policy=InformedBoltzmannGumbelQPolicy(self.env), test_policy=InformedGreedyQPolicy(self.env), batch_size=32, train_interval=32) dqn.compile(Adam(lr=1e-3), metrics=['mae']) if self.initial_weights_file is not None: try: dqn.load_weights(self.initial_weights_file) except: # just skip loading pass return dqn
def test_duel_dqn(): env = TwoRoundDeterministicRewardEnv() np.random.seed(123) env.seed(123) random.seed(123) nb_actions = env.action_space.n # Next, we build a very simple model. model = Sequential() model.add(Dense(16, input_shape=(1,))) model.add(Activation('relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=1000, window_length=1) policy = EpsGreedyQPolicy(eps=.1) dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True) dqn.compile(Adam(lr=1e-3)) dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) policy.eps = 0. h = dqn.test(env, nb_episodes=20, visualize=False) assert_allclose(np.mean(h.history['episode_reward']), 3.)
# print(model.summary()) print(model.output._keras_shape) return model if __name__ == '__main__': env = myTGym(episode_type='0', percent_goal_profit=2, percent_stop_loss=5) # s1, s2, s3 = env.reset() # state = aggregate_state(s1, s2, s3) memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() model = build_network() dqn = DQNAgent(model=model, nb_actions=2, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=50000, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format('trading'), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)