def train(env, name, steps=25000, pretrained_path=None): agent = build_agent(env) # Load pre-trained weights optionally if pretrained_path is not None: agent.load_weights(pretrained_path) save_path = os.path.join("models", name) os.makedirs(save_path, exist_ok=False) os.makedirs(os.path.join(save_path, "checkpoints"), exist_ok=False) h = agent.fit(env, nb_steps=steps, visualize=False, verbose=2, callbacks=[ ModelIntervalCheckpoint(os.path.join( save_path, "checkpoints", "chkpt_{step}.h5f"), interval=int(steps / 20), verbose=1), TensorBoard(log_dir=os.path.join("logs", name)) ]) pickle.dump(h.history, open(os.path.join(save_path, "history.pkl"), "wb")) agent.save_weights(os.path.join(save_path, "last_weights.h5f"), overwrite=True)
def run_agent(queue_agent, model_name, mode="test", version=None): print("started new process") dqn = get_rl_agent(queue_agent) model_iteration = get_newest_version(model_name) if version is None else version transfer_weights_filename = Path(f"tmp/{model_name}-{model_iteration}/") / "dqn_test_weights.h5f" if mode == "train": model_path = Path(f"tmp/{model_name}-{model_iteration + 1}/") model_path.mkdir(parents=True) with open(model_path / "model_cfg.json", "w+") as fp: json.dump(dqn.get_config(), fp) else: model_path = Path(f"tmp/{model_name}-{model_iteration}/") if not model_path.exists(): raise ValueError("Model does not exist") print(f"Using model {model_path}") view_distance = view_distance_from_dqn(dqn) env = RestrictedViewTronEnv(queue_agent, view_distance) weights_filename = str(model_path / 'dqn_test_weights.h5f') checkpoint_weights_filename = str(model_path / "dqn_test_weights_{step}.h5f") callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000 // TRAIN_DIV)] def train(): if transfer_weights_filename.exists(): dqn.load_weights(transfer_weights_filename) print(f"Transfer learning from old model loaded from '{transfer_weights_filename}'") dqn.fit(env, callbacks=callbacks, nb_steps=2000000 // TRAIN_DIV, log_interval=10000) dqn.save_weights(weights_filename, overwrite=True) dqn.save_weights("tmp/dqn_test_weights.h5f", overwrite=True) # dqn.test(env, nb_episodes=20, visualize=True) def opponent(): while True: dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=2000000, visualize=False, verbose=0) def test(steps=20, visualize=True): dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=steps, visualize=visualize) if mode == "train": train() elif mode == "test": test(20, True) elif mode == "evaluate": test(1000, False) elif mode == "opponent": opponent() else: raise ValueError("Invalid mode") queue_agent.quit()
def start(self): if self.train: weights_filename = 'dqn_{}_weights.h5f'.format(self.env_id) if self.weights: self.agent.load_weights(weights_filename) print('...loading weights for {}'.format(self.env_id)) checkpoint_weights_filename = 'dqn_' + self.env_id + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(self.env_id) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] self.agent.fit(self, callbacks=callbacks, nb_steps=self.training_steps, log_interval=10000) self.agent.save_weights(weights_filename, overwrite=True) self.agent.test(self, nb_episodes=2, visualize=False) self.render() else: weights_filename = 'dqn_{}_weights.h5f'.format(self.env_id) self.agent.load_weights(weights_filename) self.agent.test(self, nb_episodes=2, visualize=False) self.render()
def start(self): weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format( self.cwd, self.env_name) if self.weights: self.agent.load_weights(weights_filename) print('...loading weights for {}'.format(self.env_name)) if self.train: checkpoint_weights_filename = 'dqn_' + self.env_name + \ '_weights_{step}.h5f' checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \ checkpoint_weights_filename log_filename = '{}/dqn_weights/dqn_{}_log.json'.format( self.cwd, self.env_name) print('FileLogger: {}'.format(log_filename)) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] print('Starting training...') self.agent.fit(self.env, callbacks=callbacks, nb_steps=self.number_of_training_steps, log_interval=10000, verbose=0, visualize=self.visualize) print('Saving AGENT weights...') self.agent.save_weights(weights_filename, overwrite=True) else: print('Starting TEST...') self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
def train_model(seed=1, setup=0): np.random.seed(seed) if setup == 0: env = CameraControlEnv(a_p=0, a_r=0, e_thres=0) elif setup == 1: env = CameraControlEnv(a_p=0, a_r=0, e_thres=0.8) else: env = CameraControlEnv(a_p=0.5, a_r=0.2, e_thres=0.8) env.seed(seed) model = define_model(actions=7) memory = SequentialMemory(limit=10000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.1, value_test=0.05, nb_steps=95000) dqn = DQNAgent(model=model, nb_actions=7, policy=policy, memory=memory, processor=None, nb_steps_warmup=500, gamma=0.95, delta_clip=1, target_model_update=0.001, batch_size=32) dqn.compile(RMSprop(lr=.0001), metrics=['mae']) log_filename = 'results/drone_camera_control_log_' + str(setup) + '.json' model_checkpoint_filename = 'results/drone_camera_cnn_weights_' + str(setup) + '_{step}.model' callbacks = [ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)] callbacks += [FileLogger(log_filename, interval=1)] dqn.fit(env, nb_steps=100000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1, callbacks=callbacks) # After training is done, save the final weights. model_filename = 'models/drone_camera_cnn_' + str(setup) + '.model' dqn.save_weights(model_filename, overwrite=True)
def train(self): # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! weights_filename = self.get_weight_path(self.stock) checkpoint_weights_filename = self.get_weight_path( self.stock) + '_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(self.stock) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] # callbacks += [WandbLogger( # project = "stock-bot-v0" # )] self.dqn.fit(self.env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. self.dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. self.dqn.test(self.env, nb_episodes=10, visualize=False)
def rl_learn(a=None): keras.backend.clear_session() actor = actor_net(env) critic = critic_net(env) # build actor/critic network model_path = 'save_model/{}_weights.h5f'.format(agent_name) memory = SequentialMemory(limit=50000, window_length=1) histcallback = TrainHistoryLogCallback(file_path='save_model/', plot_interval=1000) chkpoint = ModelIntervalCheckpoint(filepath=model_path, interval=10000) # define policy policy_minigame = starcraft_multiagent_eGreedyPolicy( nb_agents=env.nb_agents, nb_actions=env.nb_actions) policy = LinearAnnealedPolicy(policy_minigame, attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=100000) test_policy = starcraft_multiagent_eGreedyPolicy(nb_agents=env.nb_agents, nb_actions=env.nb_actions, eps=0.05) agent = MA_DDPGAgent(nb_agents=env.nb_agents, nb_actions=env.nb_actions, actor=actor, critic=critic, action_type='discrete', critic_action_input=critic.inputs[2:4], train_interval=10, batch_size=128, memory=memory, nb_steps_warmup_critic=5000, reward_factor=0.1, nb_steps_warmup_actor=5000, policy=policy, test_policy=test_policy, gamma=.99, target_model_update=1e-3) agent.compile([Adam(lr=5e-5), Adam(lr=5e-5)], metrics=['mae']) actor.summary() critic.summary() time.sleep(1) hist_train = agent.fit(env, nb_steps=6000000, nb_max_episode_steps=2000, visualize=False, verbose=2, callbacks=[histcallback, chkpoint]) np.save('save_model/hist_train.npy', hist_train.history) # After training is done, we save the final weights. agent.save_weights(model_path, overwrite=True) '''
def build_callbacks(env_name): checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000) ] callbacks += [FileLogger(log_filename, interval=100)] return callbacks
def build_callbacks(env_name): checkpoint_weights_filename = 'results/Swimmer/ddpg_' + env_name + '_weights_{step}.h5f' log_filename = 'results/Swimmer6/exp_5/ddpg_{}_log.json'.format(env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000) ] callbacks += [FileLogger(log_filename, interval=5000)] return callbacks
def train(index, policy_nb_steps, fit_nb_steps): # Get the environment and extract the number of actions. print("Using environment", environment_name) environment = gym.make(environment_name) np.random.seed(666) nb_actions = environment.action_space.shape[0] # Build the model. v_model, mu_model, l_model = build_models((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions) v_model.summary() mu_model.summary() l_model.summary() # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = CarRacingProcessor() random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=v_model, L_model=l_model, mu_model=mu_model, memory=memory, nb_steps_warmup=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor) agent.compile(optimizers.Adam(lr=.001, clipnorm=1.), metrics=['mae']) weights_filename = 'naf_{}_{}_weights.h5f'.format(environment_name, index) # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! checkpoint_weights_filename = 'naf_' + environment_name + '_weights_{step}.h5f' log_filename = 'naf_{}_log.json'.format(environment_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [TensorboardCallback()] callbacks += [FileLogger(log_filename, interval=100)] agent.fit( environment, callbacks=callbacks, #nb_steps=1750000, nb_steps=fit_nb_steps, log_interval=10000, visualize="visualize" in sys.argv) # After training is done, we save the final weights one more time. agent.save_weights(weights_filename, overwrite=True)
def build_callbacks(): checkpoint_weights_filename = PREFIX + '_weights_{step}.h5f' checkpoint_memory_filename = args.memory callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)] callbacks += [MemoryIntervalCheckpoint(checkpoint_memory_filename, interval=1000)] callbacks += [TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=False, write_grads=False, write_images=False, embeddings_freq=0, update_freq='epoch')] return callbacks
def build_callbacks(env_name, filename_pre, filename_exp): checkpoint_weights_filename = filename_pre + env_name + '_weights_{step}.h5f' log_filename = filename_pre + filename_exp + '/ddpg_{}_log.json'.format( env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=50000) ] callbacks += [FileLogger(log_filename, interval=50000)] return callbacks
def train(self): callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=INTERVAL_CALLBACK), FileLogger(log_filename, interval=FILE_LOGGER_INTERVAL) ] self.dqn.fit(self.env, callbacks=callbacks, nb_steps=NB_STEPS, log_interval=FIT_LOG_INTERVAL)
def init_save_model_manager(self, outdir, interval): if interval is not None: model_dir = os.path.join(outdir, "models") if not os.path.exists(model_dir): # Create if necessary os.makedirs(model_dir, exist_ok=True) fp = os.path.join(model_dir, 'weights.{step:02d}.hdf5') return ModelIntervalCheckpoint(filepath=fp, interval=interval, verbose=1) return None
def start(self) -> None: """ Entry point for agent training and testing :return: (void) """ output_directory = os.path.join(self.cwd, 'dqn_weights') if not os.path.exists(output_directory): LOGGER.info('{} does not exist. Creating Directory.'.format( output_directory)) os.mkdir(output_directory) weight_name = 'dqn_{}_{}_weights.h5f'.format(self.env_name, self.neural_network_type) weights_filename = os.path.join(output_directory, weight_name) LOGGER.info("weights_filename: {}".format(weights_filename)) if self.load_weights: LOGGER.info('...loading weights for {} from\n{}'.format( self.env_name, weights_filename)) self.agent.load_weights(weights_filename) if self.train: step_chkpt = '{step}.h5f' step_chkpt = 'dqn_{}_weights_{}'.format(self.env_name, step_chkpt) checkpoint_weights_filename = os.path.join(self.cwd, 'dqn_weights', step_chkpt) LOGGER.info("checkpoint_weights_filename: {}".format( checkpoint_weights_filename)) log_filename = os.path.join( self.cwd, 'dqn_weights', 'dqn_{}_log.json'.format(self.env_name)) LOGGER.info('log_filename: {}'.format(log_filename)) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] LOGGER.info('Starting training...') self.agent.fit(self.env, callbacks=callbacks, nb_steps=self.number_of_training_steps, log_interval=10000, verbose=0, visualize=self.visualize) LOGGER.info("training over.") LOGGER.info('Saving AGENT weights...') self.agent.save_weights(weights_filename, overwrite=True) LOGGER.info("AGENT weights saved.") else: LOGGER.info('Starting TEST...') self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
def build_callbacks(): """ callbacks for the deep q agent """ checkpoint_weights_filename = 'model_checkpoints/dqn_weights_.h5f' log_filename = 'dqn_log.json' callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500) ] callbacks += [FileLogger(log_filename, interval=100)] return callbacks
def fit_dqn(fit_steps: int, cont: bool): training_env = gym.make('adver-v0', target_label=1, test_mode=False) callbacks = [ModelIntervalCheckpoint(DQN_WEIGHTS_FILENAME, interval=WEIGHTS_CHECKPOINT)] callbacks += [FileLogger(DQN_LOG_FILENAME, interval=LOG_CHECKPOINT)] dqn = init_dqn(fit_steps) if cont: dqn.load_weights(DQN_WEIGHTS_FILENAME.format(step="final")) print("continue fitting from last checkpoint") dqn.fit(training_env, callbacks=callbacks, nb_steps=fit_steps, start_step_policy=start_policy, nb_max_start_steps=3) dqn.save_weights(DQN_WEIGHTS_FILENAME.format(step="final"), overwrite=True)
def build_callbacks(env_name): log_dir = 'logs' if not exists(log_dir): os.makedirs(log_dir) checkpoint_weights_filename = join( log_dir, 'dqn_' + env_name + '_weights_{step}.h5f') log_filename = join(log_dir, 'dqn_{}_log.json'.format(env_name)) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000) ] callbacks += [FileLogger(log_filename, interval=100)] return callbacks
def train(self, steps=50000, interval=100, visualise=False): callbacks = [ ModelIntervalCheckpoint(self.checkpoint_path, interval=10), FileLogger(self.log_path, interval=100), ] self.dqn.fit(env=self.env, nb_steps=steps, visualize=visualise, callbacks=callbacks, log_interval=interval, nb_max_episode_steps=self.env.total_steps - 1) self.dqn.save_weights(self.model_path, overwrite=True)
def trainDQN(cfg, env, dqn): # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that you can the built-in Keras callbacks! callbacks = [ ModelIntervalCheckpoint(cfg.checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(cfg.log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=cfg.nb_steps_dqn_fit, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(cfg.weights_filename, overwrite=True)
def fit(self, env, num_steps, weights_path=None, visualize=False): callbacks = [] if weights_path is not None: callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)] self.agent.fit(env=env, nb_steps=num_steps, action_repetition=opt.dqn_action_repetition, callbacks=callbacks, log_interval=opt.log_interval, test_interval=opt.test_interval, test_nb_episodes=opt.test_nb_episodes, test_action_repetition=opt.dqn_action_repetition, visualize=visualize, test_visualize=visualize, verbose=1)
def train(self, env): policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,nb_steps=1000) amount_memory = 10000000 memory = SequentialMemory(limit=amount_memory, window_length=WINDOW_LENGTH) processor = EmptyProcessor() self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50, gamma=.99, target_model_update=10000,train_interval=4, delta_clip=1., enable_double_dqn=False) self.dqn.compile(Adam(lr=0.01), metrics=['mae']) weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME) checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(ENV_NAME) callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=2500)] callbacks += [FileLogger(log_filename, interval=100)] #self.dqn.load_weights(ENV_NAME+'weights.h5f') self.dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000, visualize=True, verbose=2) # After training is done, we save the final weights one more time. self.dqn.save_weights(weights_filename, overwrite=True)
def train(self, env, nb_actions): weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME) checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(ENV_NAME) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=10000) ] callbacks += [FileLogger(log_filename, interval=100)] self.dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000, visualize=True, verbose=3) # After training is done, we save the final weights one more time. self.dqn.save_weights(weights_filename, overwrite=True)
def train_dqn(agent, optimizer, train_episodes, episode_len, logdir, checkpoint, verbose_flag=1): tb_callback = [SubTensorBoard(logdir=logdir)] tb_callback += [ModelIntervalCheckpoint(checkpoint, 10000)] agent.compile(optimizer) env = gym.make('Tutankham-v4') agent.fit(env, visualize=False, nb_steps=train_episodes, verbose=verbose_flag, nb_max_episode_steps=episode_len, callbacks=tb_callback)
def fit(self, env, nb_steps): weights_dir = 'weights/{}'.format(self.mission_name) if not os.path.exists(weights_dir): os.makedirs(weights_dir) weights_path = os.path.join(weights_dir, '{}'.format(self.name)) callbacks = [ ModelIntervalCheckpoint(weights_path, interval=10000, verbose=1) ] self.agent.fit(env, nb_steps, action_repetition=4, callbacks=callbacks, verbose=1, log_interval=10000, test_interval=10000, test_nb_episodes=10, test_action_repetition=4, test_visualize=False)
def Run_DQL(): model=build_model(14,4) num_actions = 4 policy = EpsGreedyQPolicy(0.1) env = BusEnv("DQL") env.seed(123) memory = SequentialMemory(limit=5000, window_length=1) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,\ target_model_update=1e-3, policy=policy,gamma=0.9,memory_interval=1) files = open("testDQL.csv","w") files.write("kq\n") #create callback callbacks = CustomerTrainEpisodeLogger("DQL_5phut.csv") callback2 = ModelIntervalCheckpoint("weight_DQL.h5f",interval=50000) callback3 = TestLogger11(files) dqn.compile(Adam(lr=1e-3), metrics=['mae']) dqn.fit(env, nb_steps= 94151, visualize=False, verbose=2,callbacks=[callbacks,callback2])
def main(): # Get the environment and extract the number of actions. environment_name = "lawnmower-medium-obstacles-v0" environment = gym.make(environment_name) environment.print_description() nb_actions = environment.action_space.n # Build the model. model = build_model_cnn((WINDOW_LENGTH,) + INPUT_SHAPE, nb_actions) print(model.summary()) # Create sequential memory for memory replay. memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) # Process environment inputs and outputs. processor = LawnmowerProcessor() # Use epsilon-greedy as our policy. policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=int(STEPS * 0.8)) # Instantiate and compile our agent. dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae']) # Set up some callbacks for training. checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(environment_name) callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)] callbacks += [TensorboardCallback(os.path.join("tensorboard", datetime_string))] callbacks += [FileLogger(log_filename, interval=100)] # Train the agent. dqn.fit(environment, callbacks=callbacks, nb_steps=STEPS, log_interval=10000) # Save the final networkt after training. weights_filename = 'dqn_{}_weights.h5f'.format(environment_name) dqn.save_weights(weights_filename, overwrite=True) # Run the agent. dqn.test(environment, nb_episodes=10, visualize=False)
def train_model(seed=1): np.random.seed(seed) env = CameraControlEnvCont() env.seed(seed) actor, critic, action_input = define_actor_critic_models(actions=3) memory = SequentialMemory(limit=10000, window_length=1) random_process = GaussianWhiteNoiseProcess(mu=0, sigma=0.1, sigma_min=0.01, n_steps_annealing=49000, size=3) agent = DDPGAgent(nb_actions=3, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=500, nb_steps_warmup_actor=500, random_process=random_process, gamma=.1, target_model_update=1e-3, batch_size=32) agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae']) log_filename = 'results/drone_camera_cont_control_log.json' model_checkpoint_filename = 'results/drone_camera_cont_cnn_weights_{step}.model' callbacks = [ ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000) ] callbacks += [FileLogger(log_filename, interval=1)] agent.fit(env, nb_steps=50000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1, callbacks=callbacks)
def Run_FDQO(): FDQO_method = Model_Deep_Q_Learning(14,4) model = FDQO_method.build_model() #Create enviroment FDQO env = BusEnv("FDQO") env.seed(123) #create memory memory = SequentialMemory(limit=5000, window_length=1) #create policy policy = EpsGreedyQPolicy(0.0) #open files files = open("testFDQO.csv","w") files.write("kq\n") #create callback callbacks = CustomerTrainEpisodeLogger("FDQO_5phut.csv") callback2 = ModelIntervalCheckpoint("weight_FDQO.h5f",interval=50000) callback3 = TestLogger11(files) model.compile(Adam(lr=1e-3), metrics=['mae']) model.fit(env, nb_steps= 94151, visualize=False, verbose=2,callbacks=[callbacks,callback2]) files.close()
def create_dqn(model, log_interval=50000, model_name='dqn_agent_checkpoint', file_log_path='./logs/log.txt', tensorboard_path='./logs/tensorboard/'): model_path = './models/' + model_name + '.h5' file_logger = FileLogger(file_log_path, interval=log_interval) checkpoint = ModelIntervalCheckpoint(model_path, interval=log_interval) tensorboard = TensorboardLogger(tensorboard_path) callbacks = [file_logger, checkpoint, tensorboard] # Use 4 last observation - history_length = 4 memory = SequentialMemory(limit=500000, window_length=history_length) # Use combine of BoltzmannQPolicy and EpsGreedyQPolicy policy = MaxBoltzmannQPolicy() # Set epsilon to 1.0 and decrease it over every step to stop taking random action when map is explored policy = LinearAnnealedPolicy(inner_policy=policy, attr='eps', value_max=1.0, value_min=0.1, value_test=0.04, nb_steps=NUMBER_OF_STEPS) # Create an instance of DQNAgent from keras-rl dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, policy=policy, processor=CustomProcessor(), nb_steps_warmup=512, enable_dueling_network=True, dueling_type='avg', target_model_update=5e2, batch_size=32) dqn.compile(Adam(lr=5e-4), metrics=['mae']) return dqn, callbacks