예제 #1
0
파일: run.py 프로젝트: mcgrche/DriveML
Original Authors: Sanjeevan Ahilan, Julian Villella, David Rusu
Modified By: Benedikt Kolbeinsson, Jiaze Sun, Pedro Castro
"""
import os

import gym
from hiway.sumo_scenario import SumoScenario

from policy import Policy, OBSERVATION_SPACE, ACTION_SPACE, reward, observation, action


# Path to the scenario to test
scenario_path = '../tracks/2lane_sharp_bwd'

scenario = SumoScenario(
    scenario_root=scenario_path,
    random_social_vehicle_count=10)

env = gym.make('gym_hiway:hiway-competition-v0',
               config={
                   'sumo_scenario': scenario,
                   'headless': False,
                   'visdom': False,
                   'seed': 41,
                   'max_step_length': 10000,
                   'observation_space': OBSERVATION_SPACE,
                   'action_space': ACTION_SPACE,
                   'reward_function': reward,
                   'observation_function': observation,
                   'action_function': action,
               })
예제 #2
0
파일: trainer.py 프로젝트: mcgrche/DriveML
 def __init__(self, env_config):
     env_config['sumo_scenario'] = SumoScenario(
         scenario_root=env_setting[env_config.worker_index - 1][0],
         random_social_vehicle_count=env_setting[env_config.worker_index
                                                 - 1][1])
     super(MultiEnv, self).__init__(config=env_config)
예제 #3
0
파일: trainer.py 프로젝트: mcgrche/DriveML
def main(args):
    sumo_scenario = SumoScenario(
        scenario_root=os.path.abspath(args.scenario),
        random_social_vehicle_count=args.num_social_vehicles)

    dataset_dir = os.path.abspath(
        os.path.join(os.path.dirname(__file__), '../..', 'tracks'))

    # tracks = ['1lane', '1lane_bwd', '1lane_sharp', '1lane_sharp_bwd',
    #           '1lane_new_a', '1lane_new_b', '1lane_new_c',
    #           '1lane_new_a_bwd', '1lane_new_b_bwd', '1lane_new_c_bwd',
    #           '2lane', '2lane_bwd', '2lane_sharp', '2lane_sharp_bwd',
    #           '2lane_new_a', '2lane_new_b', '2lane_new_c',
    #           '2lane_new_a_bwd', '2lane_new_b_bwd', '2lane_new_c_bwd',
    #           '3lane', '3lane_b', '3lane_bwd', '3lane_bwd_b', '3lane_sharp',
    #           '3lane_sharp_b', '3lane_sharp_bwd', '3lane_sharp_bwd_b',
    #           '3lane_new_a', '3lane_new_b', '3lane_new_c',
    #           '3lane_new_a_bwd', '3lane_new_b_bwd', '3lane_new_c_bwd']

    # tracks = ['2lane', '2lane_bwd', '2lane_sharp', '2lane_sharp_bwd',
    #           '2lane_new_a', '2lane_new_b', '2lane_new_c',
    #           '2lane_new_a_bwd', '2lane_new_b_bwd', '2lane_new_c_bwd']
    tracks = [
        '1lane', '2lane_sharp_bwd', '3lane_bwd', '3lane_sharp_bwd_b',
        '1lane_new_a', '2lane_new_b', '3lane_new_c', '1lane_new_c_bwd',
        '2lane_new_a_bwd', '3lane_new_a_bwd', '1lane_new_b', '2lane_new_c',
        '3lane_new_b_bwd', '3lane_sharp_b', '2lane_sharp_bwd'
    ]

    tracks_dir = [os.path.join(dataset_dir, track) for track in tracks]

    num_social_vehicles = [
        10, 10, 10, 50, 15, 15, 70, 20, 30, 100, 15, 50, 70, 25, 10
    ]

    env_setting = list(zip(tracks_dir, num_social_vehicles))

    # note: ensure that len(env_setting) > args.num_workers

    #train each worker with different environmental setting
    class MultiEnv(CompetitionEnv):
        def __init__(self, env_config):
            env_config['sumo_scenario'] = SumoScenario(
                scenario_root=env_setting[env_config.worker_index - 1][0],
                random_social_vehicle_count=env_setting[env_config.worker_index
                                                        - 1][1])
            super(MultiEnv, self).__init__(config=env_config)

    tune_config = {
        'env':
        MultiEnv,
        'log_level':
        'WARN',
        'num_workers':
        args.num_workers,
        'horizon':
        1000,
        'env_config': {
            'seed': tune.randint(1000),
            'sumo_scenario': sumo_scenario,
            'headless': args.headless,
            'observation_space': OBSERVATION_SPACE,
            'action_space': ACTION_SPACE,
            'reward_function': tune.function(reward),
            'observation_function': tune.function(observation),
            'action_function': tune.function(action),
            'max_step_length': 1000,
        },
        'model': {
            'custom_model': MODEL_NAME,
            'fcnet_activation': "relu"
        },
        "callbacks": {
            "on_episode_start": on_episode_start,
            "on_episode_step": on_episode_step,
            "on_episode_end": on_episode_end
        },
        # These params are tuned from a fixed starting value.
        "lambda":
        0.95,
        "clip_param":
        0.2,
        # "lr": 0.0,
        "lr_schedule": [[0, 1e-3], [3000000, 3e-4], [6000000, 1e-4],
                        [9000000, 3e-5], [12000000, 1e-5]],
        "num_sgd_iter":
        10,
        "sgd_minibatch_size":
        4096,
        "train_batch_size":
        131072
    }

    experiment_name = 'rllib_multi_env'

    result_dir = args.result_dir
    checkpoint = None
    if args.checkpoint_num is not None:
        checkpoint = ('{dir}/checkpoint_{n}/checkpoint-{n}'.format(
            dir=result_dir, n=args.checkpoint_num))

    log_dir = os.path.expanduser("~/ray_results")
    print(f"Checkpointing at {log_dir}")

    # for debugging e.g. with pycharm, turn on local_mode
    # ray.init(local_mode=True)

    # scheduler = pbt if args.pbt else None
    analysis = tune.run(
        'PPO',
        name=experiment_name,
        stop={'time_total_s': 60 * 60 * 40},
        checkpoint_freq=1,
        checkpoint_at_end=True,
        local_dir=log_dir,
        resume=args.resume_training,
        # restore=checkpoint,
        max_failures=1000,
        num_samples=args.num_samples,
        export_formats=['model', 'checkpoint'],
        config=tune_config,
        scheduler=scheduler,
    )

    print(analysis.dataframe().head())

    logdir = analysis.get_best_logdir('episode_reward_max')
    model_path = os.path.join(logdir, 'model')
    dest_model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   "model")

    if not os.path.exists(dest_model_path):
        shutil.copytree(model_path, dest_model_path)
        print(f"wrote model to: {dest_model_path}")
    else:
        print(f"Model already exists at {dest_model_path} not overwriting")
        print(f"New model is stored at {model_path}")
예제 #4
0
def main(args):
    sumo_scenario = SumoScenario(
        scenario_root=os.path.abspath(args.scenario),
        random_social_vehicle_count=args.num_social_vehicles)

    tune_config = {
        'env': CompetitionEnv,
        'log_level': 'WARN',
        'num_workers': 2,
        'horizon': 5000,
        'env_config': {
            'seed': tune.randint(1000),
            'sumo_scenario': sumo_scenario,
            'headless': args.headless,
            'observation_space': OBSERVATION_SPACE,
            'action_space': ACTION_SPACE,
            'reward_function': tune.function(reward),
            'observation_function': tune.function(observation),
            'action_function': tune.function(action),
        },
        'model': {
            'custom_model': MODEL_NAME,
        },
        "callbacks": {
            "on_episode_start": on_episode_start,
            "on_episode_step": on_episode_step,
            "on_episode_end": on_episode_end
        }
    }

    experiment_name = 'rllib_example'

    log_dir = os.path.expanduser("~/ray_results")
    print(f"Checkpointing at {log_dir}")
    analysis = tune.run(
        'PPO',
        name=experiment_name,
        stop={'time_total_s': 60 * 60},  # 1 hour
        checkpoint_freq=1,
        checkpoint_at_end=True,
        local_dir=log_dir,
        resume=args.resume_training,
        max_failures=10,
        num_samples=args.num_samples,
        export_formats=['model', 'checkpoint'],
        config=tune_config,
    )

    print(analysis.dataframe().head())

    logdir = analysis.get_best_logdir('episode_reward_max')
    model_path = os.path.join(logdir, 'model')
    dest_model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   "model")

    if not os.path.exists(dest_model_path):
        shutil.copytree(model_path, dest_model_path)
        print(f"wrote model to: {dest_model_path}")
    else:
        print(f"Model already exists at {dest_model_path} not overwriting")
        print(f"New model is stored at {model_path}")