def test_deepq(): """ test DeepQ on atari """ logger.configure() set_global_seeds(SEED) env = make_atari(ENV_ID) env = bench.Monitor(env, logger.get_dir()) env = wrap_atari_dqn(env) model = DQN(env=env, policy=CnnPolicy, learning_rate=1e-4, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=True, prioritized_replay_alpha=0.6, checkpoint_freq=10000) model.learn(total_timesteps=NUM_TIMESTEPS) env.close() del model, env
def main(self, args): """ Train and save the DQN model, for the cartpole problem :param args: (ArgumentParser) the input arguments """ #env = gym.make('CartPole-v1') #model = DQN(MlpPolicy, env, verbose=1) #model.load("cartpole_model.pkl") model = DQN(env=env, policy=CustomPolicy, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.01, exploration_final_eps=0.02, verbose=1) model.learn(total_timesteps=args.max_timesteps, callback=self.callback) print("Saving model to cartpole_model.pkl") model.save("cartpole_model.pkl") #if __name__ == '__main__': #parser = argparse.ArgumentParser(description="Train DQN on cartpole") #parser.add_argument('--max-timesteps', default=100000000, type=int, help="Maximum number of timesteps") #args = parser.parse_args() #main(args)
def train_DQN(env, out_dir, seed=None, **kwargs): # Logs will be saved in log_dir/monitor.csv global output_dir output_dir = out_dir log_dir = os.path.join(out_dir, 'log') os.makedirs(log_dir, exist_ok=True) env = Monitor(env, log_dir + '/', allow_early_resets=True) global n_steps, best_mean_reward best_mean_reward, n_steps = -np.inf, 0 policy = kwargs['policy'] n_timesteps = kwargs['n_timesteps'] del kwargs['policy'] del kwargs['n_timesteps'] model = DQN(policy, env, verbose=1, tensorboard_log=os.path.join(log_dir, 'tb'), full_tensorboard_log=True, checkpoint_path=log_dir, seed=seed, **kwargs) model.learn(total_timesteps=n_timesteps, callback=log_callback) return model
def main(args): """ Train and save the DQN model, for the cartpole problem :param args: (ArgumentParser) the input arguments """ # env = gym.make("CartPole-v0") # model = DQN( # env=env, # policy=MlpPolicy, # verbose=1, # learning_rate=1e-3, # buffer_size=50000, # exploration_fraction=0.1, # exploration_final_eps=0.02, # tensorboard_log='./log', # ) # model.learn(total_timesteps=args.max_timesteps, callback=callback) # print("Saving model to cartpole_model.pkl") # model.save("cartpole_model.pkl") # env = Vrep_Env() env = gym.make('vrep-v0') model = DQN( env=env, gamma=0.95, policy=MlpPolicy, #policy=CustomPolicy, verbose=1, learning_rate=1e-4, buffer_size=50000, #5000 train_freq=1, learning_starts=100, batch_size=64, # 32 checkpoint_freq=3000, checkpoint_path='./model/', target_network_update_freq=300, prioritized_replay=True, exploration_fraction=0.1, exploration_final_eps=0.02, tensorboard_log='./log', ) # path = './model/' # model = DQN.load(path+'bk2_16/cartpole_model6000.pkl', env, tensorboard_log='./log') model.learn(total_timesteps=args.max_timesteps, callback=callback, log_interval=30) print("Saving model to slab_installing_model.pkl") model.save("slab_installing_model.pkl")
def train(env, fname): env.setRender(False) env.reset() start = time.time() model = DQN( env=env, policy=CustomPolicy, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02 ) model.learn(total_timesteps=STEPS, callback=callback) # save trained model model.save(fname) print("Duration: %.1f" % ((time.time() - start)/60))
def main(): """ Run the atari test """ parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env', help='environment ID', default='BreakoutNoFrameskip-v4') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--prioritized', type=int, default=1) parser.add_argument('--dueling', type=int, default=1) parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6) parser.add_argument('--num-timesteps', type=int, default=int(10e6)) parser.add_argument('--checkpoint-freq', type=int, default=10000) parser.add_argument('--checkpoint-path', type=str, default=None) args = parser.parse_args() logger.configure() set_global_seeds(args.seed) env = make_atari(args.env) env = bench.Monitor(env, logger.get_dir()) env = wrap_atari_dqn(env) policy = partial(CnnPolicy, dueling=args.dueling == 1) model = DQN( env=env, policy=policy, learning_rate=1e-4, buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, prioritized_replay=bool(args.prioritized), prioritized_replay_alpha=args.prioritized_replay_alpha, checkpoint_freq=args.checkpoint_freq, checkpoint_path=args.checkpoint_path, ) model.learn(total_timesteps=args.num_timesteps) env.close()
def main(args): """ Train and save the DQN model, for the cartpole problem :param args: (ArgumentParser) the input arguments """ env = gym.make("CartPole-v0") model = DQN( env=env, policy=MlpPolicy, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, ) model.learn(total_timesteps=args.max_timesteps, callback=callback) print("Saving model to cartpole_model.pkl") model.save("cartpole_model.pkl")
def main(args): """ Train and save the DQN model, for the mountain car problem :param args: (ArgumentParser) the input arguments """ env = gym.make("MountainCar-v0") # using layer norm policy here is important for parameter space noise! model = DQN(policy=CustomPolicy, env=env, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.1, param_noise=True) model.learn(total_timesteps=args.max_timesteps) print("Saving model to mountaincar_model.pkl") model.save("mountaincar_model.pkl")
if __name__ == '__main__': env = SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml', route_file='nets/2way-single-intersection/single-intersection-vhvh.rou.xml', out_csv_name='outputs/2way-single-intersection/dqn-vhvh2-stable-mlp-bs', single_agent=True, use_gui=True, num_seconds=100000, time_to_load_vehicles=120, max_depart_delay=0, phases=[ traci.trafficlight.Phase(32000, 32000, 32000, "GGrrrrGGrrrr"), traci.trafficlight.Phase(2000, 2000, 2000, "yyrrrryyrrrr"), traci.trafficlight.Phase(32000, 32000, 32000, "rrGrrrrrGrrr"), traci.trafficlight.Phase(2000, 2000, 2000, "rryrrrrryrrr"), traci.trafficlight.Phase(32000, 32000, 32000, "rrrGGrrrrGGr"), traci.trafficlight.Phase(2000, 2000, 2000, "rrryyrrrryyr"), traci.trafficlight.Phase(32000, 32000, 32000, "rrrrrGrrrrrG"), traci.trafficlight.Phase(2000, 2000, 2000, "rrrrryrrrrry") ]) model = DQN( env=env, policy=MlpPolicy, learning_rate=1e-3, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02 ) model.learn(total_timesteps=100000)
def train_deep(env_name='CartPole-v1', steps=10000, lr=5e-4, exploration_fraction=0.1, exploration_final_eps=0.02, log_dir='./Logs/', log_name=None): """ Wrapper for training a network with DQN :param env_name: The name of the environment to load [String] :param steps: The number of time-steps to train for [Int] :param exploration_fraction: The exploration rate for the algorithm [double or whatever] :param exploration_final_eps: The final exploration rate after decay [double or whatever] :param lr: The learning rate for the algorithm [double or whatever] :param log_dir: The base log folder [String] :param log_name: Puts the logs in a subdir of this name [String] """ # Generates a folder hierarchy for the logging: if log_name is None: log_dir = log_dir + env_name + '/' + 'DeepQ/deep_{0:.0E}'.format( lr) + '/' else: log_dir = log_dir + env_name + '/' + log_name + '/' + 'DeepQ/deep_{0:.0E}'.format( lr) + '/' init_logging(log_dir) # Generates an environment for the algorithm to train against env = DummyVecEnv([ lambda: Monitor(gym.make(env_name), log_dir, allow_early_resets=True) ]) # Sets up a modified callback funtion to be able to handle saving etc. (Not really needed) best_mean_reward, n_steps, hist_rew = -np.inf, 0, 0 def callback(_locals, _globals): """ Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2) :param _locals: (dict) :param _globals: (dict) """ nonlocal n_steps, best_mean_reward, hist_rew # Print stats every 1000 calls if (n_steps + 1) % 5 == 0: # Evaluate policy performance x, y = ts2xy(load_results(log_dir), 'timesteps') if len(x) > 0: # mean_rew_plot(y, len(x)) hist_rew = y.copy() mean_reward = np.mean(y[-100:]) if (n_steps + 1) % 100 == 0: print(x[-1], 'timesteps') print( "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}" .format(best_mean_reward, mean_reward)) # New best model, you could save the agent here if mean_reward > best_mean_reward: best_mean_reward = mean_reward # Example for saving best model print("Saving new best model") _locals['self'].save(log_dir + "/deep_{0:.0E}.pkl".format(lr)) n_steps += 1 return False # Creates the training model etc. dqn_nw = DQN('MlpPolicy', env, learning_rate=lr, exploration_fraction=exploration_fraction, exploration_final_eps=exploration_final_eps, checkpoint_freq=2000, learning_starts=1000, target_network_update_freq=500) # Starts the training: dqn_nw.learn(total_timesteps=steps, callback=callback)