Ejemplo n.º 1
0
def test_deepq():
    """
    test DeepQ on atari
    """
    logger.configure()
    set_global_seeds(SEED)
    env = make_atari(ENV_ID)
    env = bench.Monitor(env, logger.get_dir())
    env = wrap_atari_dqn(env)

    model = DQN(env=env,
                policy=CnnPolicy,
                learning_rate=1e-4,
                buffer_size=10000,
                exploration_fraction=0.1,
                exploration_final_eps=0.01,
                train_freq=4,
                learning_starts=10000,
                target_network_update_freq=1000,
                gamma=0.99,
                prioritized_replay=True,
                prioritized_replay_alpha=0.6,
                checkpoint_freq=10000)
    model.learn(total_timesteps=NUM_TIMESTEPS)

    env.close()
    del model, env
Ejemplo n.º 2
0
    def main(self, args):
        """
        Train and save the DQN model, for the cartpole problem
        :param args: (ArgumentParser) the input arguments
        """

        #env = gym.make('CartPole-v1')
        #model = DQN(MlpPolicy, env, verbose=1)
        #model.load("cartpole_model.pkl")
        model = DQN(env=env,
                    policy=CustomPolicy,
                    learning_rate=1e-3,
                    buffer_size=50000,
                    exploration_fraction=0.01,
                    exploration_final_eps=0.02,
                    verbose=1)
        model.learn(total_timesteps=args.max_timesteps, callback=self.callback)

        print("Saving model to cartpole_model.pkl")
        model.save("cartpole_model.pkl")


#if __name__ == '__main__':
#parser = argparse.ArgumentParser(description="Train DQN on cartpole")
#parser.add_argument('--max-timesteps', default=100000000, type=int, help="Maximum number of timesteps")
#args = parser.parse_args()
#main(args)
Ejemplo n.º 3
0
def train_DQN(env, out_dir, seed=None, **kwargs):

    # Logs will be saved in log_dir/monitor.csv
    global output_dir
    output_dir = out_dir
    log_dir = os.path.join(out_dir, 'log')
    os.makedirs(log_dir, exist_ok=True)
    env = Monitor(env, log_dir + '/', allow_early_resets=True)

    global n_steps, best_mean_reward
    best_mean_reward, n_steps = -np.inf, 0

    policy = kwargs['policy']
    n_timesteps = kwargs['n_timesteps']
    del kwargs['policy']
    del kwargs['n_timesteps']

    model = DQN(policy,
                env,
                verbose=1,
                tensorboard_log=os.path.join(log_dir, 'tb'),
                full_tensorboard_log=True,
                checkpoint_path=log_dir,
                seed=seed,
                **kwargs)

    model.learn(total_timesteps=n_timesteps, callback=log_callback)

    return model
Ejemplo n.º 4
0
def main(args):
    """
    Train and save the DQN model, for the cartpole problem

    :param args: (ArgumentParser) the input arguments
    """

    # env = gym.make("CartPole-v0")
    # model = DQN(
    #     env=env,
    #     policy=MlpPolicy,
    #     verbose=1,
    #     learning_rate=1e-3,
    #     buffer_size=50000,
    #     exploration_fraction=0.1,
    #     exploration_final_eps=0.02,
    #     tensorboard_log='./log',
    # )
    # model.learn(total_timesteps=args.max_timesteps, callback=callback)

    # print("Saving model to cartpole_model.pkl")
    # model.save("cartpole_model.pkl")

    # env = Vrep_Env()
    env = gym.make('vrep-v0')

    model = DQN(
        env=env,
        gamma=0.95,
        policy=MlpPolicy,
        #policy=CustomPolicy,
        verbose=1,
        learning_rate=1e-4,
        buffer_size=50000,  #5000
        train_freq=1,
        learning_starts=100,
        batch_size=64,  # 32
        checkpoint_freq=3000,
        checkpoint_path='./model/',
        target_network_update_freq=300,
        prioritized_replay=True,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        tensorboard_log='./log',
    )
    # path = './model/'
    # model = DQN.load(path+'bk2_16/cartpole_model6000.pkl', env, tensorboard_log='./log')
    model.learn(total_timesteps=args.max_timesteps,
                callback=callback,
                log_interval=30)

    print("Saving model to slab_installing_model.pkl")
    model.save("slab_installing_model.pkl")
Ejemplo n.º 5
0
def train(env, fname):
    env.setRender(False)
    env.reset()
    
    start = time.time()
    model = DQN(
        env=env,
        policy=CustomPolicy,
        learning_rate=1e-3,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02
    )
    model.learn(total_timesteps=STEPS, callback=callback)

    # save trained model
    model.save(fname)
    print("Duration: %.1f" % ((time.time() - start)/60))
Ejemplo n.º 6
0
def main():
    """
    Run the atari test
    """
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env',
                        help='environment ID',
                        default='BreakoutNoFrameskip-v4')
    parser.add_argument('--seed', help='RNG seed', type=int, default=0)
    parser.add_argument('--prioritized', type=int, default=1)
    parser.add_argument('--dueling', type=int, default=1)
    parser.add_argument('--prioritized-replay-alpha', type=float, default=0.6)
    parser.add_argument('--num-timesteps', type=int, default=int(10e6))
    parser.add_argument('--checkpoint-freq', type=int, default=10000)
    parser.add_argument('--checkpoint-path', type=str, default=None)

    args = parser.parse_args()
    logger.configure()
    set_global_seeds(args.seed)
    env = make_atari(args.env)
    env = bench.Monitor(env, logger.get_dir())
    env = wrap_atari_dqn(env)
    policy = partial(CnnPolicy, dueling=args.dueling == 1)

    model = DQN(
        env=env,
        policy=policy,
        learning_rate=1e-4,
        buffer_size=10000,
        exploration_fraction=0.1,
        exploration_final_eps=0.01,
        train_freq=4,
        learning_starts=10000,
        target_network_update_freq=1000,
        gamma=0.99,
        prioritized_replay=bool(args.prioritized),
        prioritized_replay_alpha=args.prioritized_replay_alpha,
        checkpoint_freq=args.checkpoint_freq,
        checkpoint_path=args.checkpoint_path,
    )
    model.learn(total_timesteps=args.num_timesteps)

    env.close()
Ejemplo n.º 7
0
def main(args):
    """
    Train and save the DQN model, for the cartpole problem

    :param args: (ArgumentParser) the input arguments
    """
    env = gym.make("CartPole-v0")
    model = DQN(
        env=env,
        policy=MlpPolicy,
        learning_rate=1e-3,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
    )
    model.learn(total_timesteps=args.max_timesteps, callback=callback)

    print("Saving model to cartpole_model.pkl")
    model.save("cartpole_model.pkl")
Ejemplo n.º 8
0
def main(args):
    """
    Train and save the DQN model, for the mountain car problem

    :param args: (ArgumentParser) the input arguments
    """
    env = gym.make("MountainCar-v0")

    # using layer norm policy here is important for parameter space noise!
    model = DQN(policy=CustomPolicy,
                env=env,
                learning_rate=1e-3,
                buffer_size=50000,
                exploration_fraction=0.1,
                exploration_final_eps=0.1,
                param_noise=True)
    model.learn(total_timesteps=args.max_timesteps)

    print("Saving model to mountaincar_model.pkl")
    model.save("mountaincar_model.pkl")
if __name__ == '__main__':

    env = SumoEnvironment(net_file='nets/2way-single-intersection/single-intersection.net.xml',
                                    route_file='nets/2way-single-intersection/single-intersection-vhvh.rou.xml',
                                    out_csv_name='outputs/2way-single-intersection/dqn-vhvh2-stable-mlp-bs',
                                    single_agent=True,
                                    use_gui=True,
                                    num_seconds=100000,
                                    time_to_load_vehicles=120,
                                    max_depart_delay=0,
                                    phases=[
                                        traci.trafficlight.Phase(32000, 32000, 32000, "GGrrrrGGrrrr"),  
                                        traci.trafficlight.Phase(2000, 2000, 2000, "yyrrrryyrrrr"),
                                        traci.trafficlight.Phase(32000, 32000, 32000, "rrGrrrrrGrrr"),   
                                        traci.trafficlight.Phase(2000, 2000, 2000, "rryrrrrryrrr"),
                                        traci.trafficlight.Phase(32000, 32000, 32000, "rrrGGrrrrGGr"),   
                                        traci.trafficlight.Phase(2000, 2000, 2000, "rrryyrrrryyr"),
                                        traci.trafficlight.Phase(32000, 32000, 32000, "rrrrrGrrrrrG"), 
                                        traci.trafficlight.Phase(2000, 2000, 2000, "rrrrryrrrrry")
                                        ])

    model = DQN(
        env=env,
        policy=MlpPolicy,
        learning_rate=1e-3,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02
    )
    model.learn(total_timesteps=100000)
def train_deep(env_name='CartPole-v1',
               steps=10000,
               lr=5e-4,
               exploration_fraction=0.1,
               exploration_final_eps=0.02,
               log_dir='./Logs/',
               log_name=None):
    """
    Wrapper for training a network with DQN

    :param env_name: The name of the environment to load [String]
    :param steps: The number of time-steps to train for [Int]
    :param exploration_fraction: The exploration rate for the algorithm [double or whatever]
    :param exploration_final_eps: The final exploration rate after decay [double or whatever]
    :param lr: The learning rate for the algorithm [double or whatever]
    :param log_dir: The base log folder [String]
    :param log_name: Puts the logs in a subdir of this name [String]
    """

    # Generates a folder hierarchy for the logging:
    if log_name is None:
        log_dir = log_dir + env_name + '/' + 'DeepQ/deep_{0:.0E}'.format(
            lr) + '/'
    else:
        log_dir = log_dir + env_name + '/' + log_name + '/' + 'DeepQ/deep_{0:.0E}'.format(
            lr) + '/'
    init_logging(log_dir)

    # Generates an environment for the algorithm to train against
    env = DummyVecEnv([
        lambda: Monitor(gym.make(env_name), log_dir, allow_early_resets=True)
    ])

    # Sets up a modified callback funtion to be able to handle saving etc. (Not really needed)
    best_mean_reward, n_steps, hist_rew = -np.inf, 0, 0

    def callback(_locals, _globals):
        """
        Callback called at each step (for DQN an others) or after n steps (see ACER or PPO2)
        :param _locals: (dict)
        :param _globals: (dict)
        """
        nonlocal n_steps, best_mean_reward, hist_rew
        # Print stats every 1000 calls
        if (n_steps + 1) % 5 == 0:
            # Evaluate policy performance
            x, y = ts2xy(load_results(log_dir), 'timesteps')
            if len(x) > 0:
                # mean_rew_plot(y, len(x))
                hist_rew = y.copy()
                mean_reward = np.mean(y[-100:])
                if (n_steps + 1) % 100 == 0:
                    print(x[-1], 'timesteps')
                    print(
                        "Best mean reward: {:.2f} - Last mean reward per episode: {:.2f}"
                        .format(best_mean_reward, mean_reward))

                # New best model, you could save the agent here
                if mean_reward > best_mean_reward:
                    best_mean_reward = mean_reward
                    # Example for saving best model
                    print("Saving new best model")
                    _locals['self'].save(log_dir +
                                         "/deep_{0:.0E}.pkl".format(lr))

        n_steps += 1
        return False

    # Creates the training model etc.
    dqn_nw = DQN('MlpPolicy',
                 env,
                 learning_rate=lr,
                 exploration_fraction=exploration_fraction,
                 exploration_final_eps=exploration_final_eps,
                 checkpoint_freq=2000,
                 learning_starts=1000,
                 target_network_update_freq=500)

    # Starts the training:
    dqn_nw.learn(total_timesteps=steps, callback=callback)