Exemple #1
0
def make_env(env_id,
             env_type,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None):
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(
            game=env_id,
            max_episode_steps=10000,
            use_restricted_actions=retro.Actions.DISCRETE,
            state=gamestate)
    elif env_type == 'trafficenvs':
        ##when it's a traffic environment, add a wrapper to set environment default
        print('making traffic envs')
        env = gym.make(env_id)
        if wrapper_kwargs is not None:
            env = TrafficParameterSetWrapper(env, wrapper_kwargs)

        env = SingleAgentWrapper(env)
    else:
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    #env = Monitor(env,
    #              logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)),
    #                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
    def _thunk():


        env = gym.make(args.env_name)
        env.seed(args.seed + rank)

        env_args = {}
        env_args['state_representation'] = args.state_rep
        env_args['reward_type'] = args.reward_type
        env_args['penetration_rate'] = args.penetration_rate

        if not no_logging and not args.no_log_waiting_time and rank == 0:
            if args.save_dir != "":
                env_args['log_waiting_time'] = True
                env_args['logger_type'] ='baselines_logger'
                #env_args['record_file'] = os.path.join(args.save_path, "waiting_time_process_"+str(rank+1)+".txt")
            else:
                print("No waiting time logging is done because no save file is given")

        if args.penetration_type == 'linear':
            prm = PenetrationRateManager(
                trend = 'linear',
                transition_time = 3*365, #3 years
                pr_start = 0.1,
                pr_end = 1
                )
            env_args['reset_manager'] = prm

        env = TrafficParameterSetWrapper(env, env_args).unwrapped

        if visual:
            env = TrafficVisualizationWrapper(env).unwrapped

        return env
Exemple #3
0
#######################################################################

import gym
import gym_trafficlight
from gym_trafficlight.trafficenvs import TrafficEnv
from gym_trafficlight.wrappers import TrafficParameterSetWrapper
args = {}
if cmd_args.visual:
    args['visual'] = True
args['reward_present_form'] = 'reward'  # we use reward as opposed to penalty
if cmd_args.no_normalize_reward:
    args['normalize_reward'] = False
env = gym.make(cmd_args.env_name)
if cmd_args.delay:
    args['action_delay'] = cmd_args.delay
env = TrafficParameterSetWrapper(env, args)
env = env.unwrapped

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('checking device... the computation device used in the training is: ' +
      str(device))

######################################################################
# Replay Memory
# -------------
#
# We'll be using experience replay memory for training our DQN. It stores
# the transitions that the agent observes, allowing us to reuse this data
# later. By sampling from it randomly, the transitions that build up a
# batch are decorrelated. It has been shown that this greatly stabilizes
Exemple #4
0
import argparse
parser = argparse.ArgumentParser(description='Run Q learning for cloud rltl')
parser.add_argument('--visual', action='store_true', help='use visualization')
cmd_args = parser.parse_args()
import gym
import gym_trafficlight
from gym_trafficlight.trafficenvs import TrafficEnv
from gym_trafficlight.wrappers import TrafficParameterSetWrapper
args = TrafficEnv.get_default_init_parameters()
if cmd_args.visual:
    args['visual'] = True
print(args)
env = gym.make('TrafficLight-v0')
env = TrafficParameterSetWrapper(env, args)
#args={'penetration_rate': 0.5}
itr = 0
env.reset()
while itr < 3000:
    itr += 1
    next_state, reward, terminal, _ = env.step([0])
    # print (next_state)

env.reset()
Exemple #5
0
def main():
    parser = argparse.ArgumentParser(description='Run DQN on Traffic Lights')
    # parser.add_argument('--env', default='SpaceInvaders-v0', help='Atari env name')
    parser.add_argument('--mode', choices=['train', 'test'], default='train')
    # parser.add_argument(
    #     '-o', '--output', default='atari-v0', help='Directory to save data to')
    parser.add_argument('--seed', default=0, type=int, help='Random seed')
    parser.add_argument('--gpu', default=0, help='comma separated list of GPU(s) to use.')
    parser.add_argument('--cpu', action='store_true', help='use CPU')
    parser.add_argument('--load', help='load model.')
    parser.add_argument('--record', action = 'store_true', help='record results when loading.')
    # parser.add_argument('--render', action='store_true', default=False, help='render while testing')
    parser.add_argument('--pysumo', action='store_true', help='use pysumo')
    parser.add_argument('--dir_name', default = 'TL',help = 'directory name')
    parser.add_argument('--no_counter', action = 'store_true', default = False, help = 'no counter in saving files, note it might overwrite previous results')
    parser.add_argument('--penetration_rate', type = float, default = 1., help = 'specify penetration rate')
    parser.add_argument('--sumo', action='store_true', help='force to use non-gui sumo')
    parser.add_argument('--whole_day', action = 'store_true', help = 'specify the time of the day when training')
    parser.add_argument('--day_time', type = int, help = 'specify day time')
    parser.add_argument('--phase_representation', default = 'original', help = 'specify representation')
    parser.add_argument('--shared_model', action='store_true', help='use a common model between all agents')
    parser.add_argument('--simulator', choices=['original', 'osm'], default='original')
    parser.add_argument('--simple_inputs', action='store_true', help='use simplified inputs with fixed number of states (12)')
    parser.add_argument('--map', choices=['osm_3_intersections', 'osm_13_intersections', 'manhattan_small','manhattan'], default='osm_13_intersections')
    #parser.add_argument('--route', choices=['osm_3_intersections', 'osm_13_intersections', 'manhattan_small','manhattan'], default='osm_13_intersections')
    #parser.add_argument('--aggregated_reward', action='store_true', help='choose to combine waiting times to optimize waiting time on entire network instead of individually at each TL')
    parser.add_argument('--arrival_rate', default='1', help='arrival rate of cars')
    parser.add_argument('--unstationary_flow', action = 'store_true', help='use when the training flow is unstationary')
    parser.add_argument('--dynamic_penetration', action = 'store_true', help='dynamic penetration rate')
    parser.add_argument('--reward_type', default = 'local', help='dynamic penetration rate')
    parser.add_argument('--evaluation_interval', default = 5, type = int, help='how many episodes per evaluation')
    parser.add_argument('--env', default = 'TrafficLight-simple-medium-v0', help='env name')
    parser.add_argument('--test_while_learn', action = 'store_true', help='set agent mode to test while learn, this will disable epsilon greedy')
    parser.add_argument('--log_waiting_time', action='store_true', help='set env def logger on')

    args = parser.parse_args()

    ## PARAMS ##

    num_episodes = 150
    episode_time = 3000  # must be less than 3600
    num_iterations = num_episodes * episode_time
    memory_size = 200000
    decay_steps = 100000
    target_update_freq = 3000
    lr = 0.0001
    num_burn_in = 10000
    train_freq = 1
    tl_state = 1

    window = 1  # total size of the state
    stride = 0  # stride/skip of states
    #pdb.set_trace()

    if args.pysumo:
        import libsumo
        visual = False
    else:
        import traci
        visual = True
    #
    # if args.simulator == 'original':
    #     env = Simulator(visual=visual,
    #                     episode_time=episode_time,
    #                     num_traffic_state = 26,
    #                     penetration_rate = args.penetration_rate,
    #                     #config_file= './map/OneIntersectionLuSTScenario-12408/traffic.sumocfg',
    #                     #standard_file_name ='./map/OneIntersectionLuSTScenario-12408/traffic-standard.rou.xml',
    #                     map_file='./map/OneIntersectionLuST-12408-stationary/8/traffic.net.xml',
    #                     route_file='./map/OneIntersectionLuST-12408-stationary/8/traffic.rou.xml',
    #                     #map_file='./map/LuxembougDetailed-DUE-12408/traffic.net.xml',
    #                     #route_file='./map/LuxembougDetailed-DUE-12408/traffic-8.rou.xml',
    #                     whole_day = args.whole_day,
    #                     flow_manager_file_prefix='./map/LuxembougDetailed-DUE-12408/traffic',
    #                     state_representation = args.phase_representation,
    #                     unstationary_flow = args.unstationary_flow,
    #                     traffic_light_module = TrafficLightLuxembourg,
    #                     tl_list = ['0'],
    #                     force_sumo = args.sumo)
    # elif args.simulator == 'osm':
    #     env = Simulator_osm(visual=visual,
    #                     episode_time=episode_time,
    #                     penetration_rate = args.penetration_rate,
    #                     map_file= args.map,
    #                     arrival_rate= args.arrival_rate,
    #                     simple = args.simple_inputs,
    #                     aggregated_reward = args.aggregated_reward)

    env = gym.make(args.env)
    #env = gym.make('TrafficLight-Lust12408-regular-time-v0')
    #env_args = TrafficEnv.get_default_init_parameters()
    env_args    = {
        'visual':                 visual,
        'episode_time':           episode_time,
        #'num_traffic_state':      10,
        'penetration_rate':       args.penetration_rate,
        #config_file= './map/OneIntersectionLuSTScenario-12408/traffic.sumocfg',
        #standard_file_name ='./map/OneIntersectionLuSTScenario-12408/traffic-standard.rou.xml',
        #map_file='./map/OneIntersectionLuST-12408-stationary/8/traffic.net.xml',
        #route_file='./map/OneIntersectionLuST-12408-stationary/8/traffic.rou.xml',
        #map_file='./map/LuxembougDetailed-DUE-12408/traffic.net.xml',
        #route_file='./map/LuxembougDetailed-DUE-12408/traffic-8.rou.xml',
        'whole_day':              args.whole_day,
        #flow_manager_file_prefix:'./map/LuxembougDetailed-DUE-12408/traffic',
        #state_representation:   args.phase_representation,
        #unstationary_flow:      args.unstationary_flow,
        #traffic_light_module:   TrafficLightLuxembourg,
        #tl_list:                ['0'],
        'force_sumo':             args.sumo,
        'reward_type':            args.reward_type,
        'reward_present_form':    'penalty',
        'log_waiting_time':       args.log_waiting_time
        #'observation_processor':  process_observation
    }
    if args.dynamic_penetration:
        prm = PenetrationRateManager(
              trend = 'linear',
              transition_time = 3*365, #3 years
              pr_start = 0.1,
              pr_end = 1
              )
        env_args['reset_manager'] = prm
        args.test_while_learn = True ## in dynamic penetration rate, we normally want to disable epsilon greedy, so we do it in default here
        args.evaluation_interval = sys.maxsize
    # env_args    = {
    #     'visual':                 visual,
    #     'episode_time':           episode_time,
    #     #'num_traffic_state':      10,
    #     'penetration_rate':       args.penetration_rate,
    #     #config_file= './map/OneIntersectionLuSTScenario-12408/traffic.sumocfg',
    #     #standard_file_name ='./map/OneIntersectionLuSTScenario-12408/traffic-standard.rou.xml',
    #     #map_file='./map/OneIntersectionLuST-12408-stationary/8/traffic.net.xml',
    #     #route_file='./map/OneIntersectionLuST-12408-stationary/8/traffic.rou.xml',
    #     #map_file='./map/LuxembougDetailed-DUE-12408/traffic.net.xml',
    #     #route_file='./map/LuxembougDetailed-DUE-12408/traffic-8.rou.xml',
    #     'whole_day':              args.whole_day,
    #     #flow_manager_file_prefix:'./map/LuxembougDetailed-DUE-12408/traffic',
    #     #state_representation:   args.phase_representation,
    #     #unstationary_flow:      args.unstationary_flow,
    #     #traffic_light_module:   TrafficLightLuxembourg,
    #     #tl_list:                ['0'],
    #     'force_sumo':             args.sumo,
    #     'reward_type':            args.reward_type,
    #     'reward_present_form':    'penalty',
    #     #'observation_processor':  process_observation
    # }
    ##This wrapper is used to pass the parameter into the env, the wrapper will re-init the whole\
    ## env with the new parameters, so by wrapping it and unwrap it, we get a new #!/usr/bin/env python
    ## This is obviously not the best way to init env, TODO: more intuitional way to init env
    env = TrafficParameterSetWrapper(env, env_args).unwrapped
    print(env.num_traffic_state)

    id_list = env.tl_id_list

    num_agents = len(id_list)
    #print num_agents
    #os.system("pause")
    #pdb.set_trace() #TODO delete after debugging
    if tl_state == 1:
        input_shape = (1, env.num_traffic_state)
        buffer_input_shape = (num_agents, 1, env.num_traffic_state)
        preprocessor = TLStatePreprocessor()
    elif tl_state == 2:
        input_shape = (4, 250)
        window = 1
        preprocessor = TLMAPPreprocessor()
    else:
        print('invalid state')
        return

    network = 'DQN'

    # choose device
    device = '/gpu:{}'.format(args.gpu)
    if args.cpu:
        device = '/cpu:0'

    env_name = 'SUMO'
    seed = args.seed
    # env.seed(seed)

    num_actions = env.action_space.n
    # print 'num_actions', num_actions

    # memory grows as it requires
    #This will assign the computation to CPU automatically whenever GPU is not available
    config = tf.ConfigProto(allow_soft_placement=True)

    #config = tf.ConfigProto()
    config.gpu_options.allow_growth=True

    sess = tf.Session(config=config)
    K.set_session(sess)

    with tf.device(device):
        model = create_model(window=window, input_shape=input_shape, num_actions=num_actions)
        # memory
        memory = ReplayMemory(max_size=memory_size, window_length=window, stride=stride, state_input=buffer_input_shape)
        # policy
        policy = LinearDecayGreedyEpsilonPolicy(start_value=1.0, end_value=0.05, num_steps=decay_steps, num_actions=num_actions)
        # optimizer
        adam = Adam(lr=lr)
        agent_list = []
        index = 0
        for id in id_list:
            # agent
            if not args.shared_model:
                model = create_model(window=window, input_shape=input_shape, num_actions=num_actions)
            agent = DQNAgent(
                model=model,
                preprocessor=preprocessor,
                memory=memory,
                policy=policy,
                gamma=0.99,
                target_update_freq=target_update_freq,
                num_burn_in=num_burn_in,
                train_freq=train_freq,
                batch_size=32,
                window_length=window,
                start_random_steps=20,
                num_actions=num_actions,
                env_name=env_name,
                network=network,
                name=id,
                index = index,
                input_shape=input_shape,
                stride=stride,
                test_while_learn = args.test_while_learn)
            index+=1

            # compile
            agent.compile(optimizer=adam, loss_func=mean_huber_loss, metrics=['mae'])
            agent_list.append(agent)

        agents = DQNAgents(
                agent_list,
                #model=model,
                preprocessor=preprocessor,
                memory=memory,
                #policy=policy,
                #gamma=0.9,
                #target_update_freq=target_update_freq,
                num_burn_in=num_burn_in,
                train_freq=train_freq,
                batch_size=32,
                #window_length=window,
                start_random_steps=20,
                #num_actions=num_actions,
                env_name=env_name,
                network=network,
                #name=id,
                input_shape=input_shape,
                stride=stride,
                evaluation_interval = args.evaluation_interval)

        if args.load:
            for agent in agents.agents:

                weight_name = args.load + '_' + agent.name + '.hdf5'
                agent.model.load_weights(weight_name)

        if args.mode == 'train':
            # log file
            logfile_name = network + '_log/'
            if args.no_counter == False:
                logfile = get_output_folder(logfile_name, args.dir_name)
            else:
                logfile = logfile_name+args.dir_name
            writer = tf.summary.FileWriter(logfile, sess.graph)
            # weights file
            weights_file_name = network + '_weights/'
            if args.no_counter == False:
                weights_file = get_output_folder(weights_file_name, args.dir_name)
            else:
                weights_file = weights_file_name+args.dir_name

            os.makedirs(weights_file)
            weights_file += '/'

            save_interval = num_iterations / 30  # save model every 1/3
            # print 'start training....'
            agents.fit(env=env, num_iterations=num_iterations, save_interval=save_interval, writer=writer, weights_file=weights_file)

            # save weights
            for agent in agents.agents:
                file_name = '{}_{}_{}_weights_{}.hdf5'.format(network, env_name, num_iterations, agent.name)
                file_path = weights_file + file_name
                agent.model.save_weights(file_path)

        else:  # test
            if not args.load:
                print('please load a model')
                return
            num_episodes = 5
            if args.whole_day:
                env.flow_manager.travel_to_time(args.day_time)
                num_episodes = 5
                env.reset_to_same_time = True
            avg_reward,overall_waiting_time,equipped_waiting_time,unequipped_waiting_time = agents.evaluate(env=env, num_episodes=num_episodes)
            print('Evaluation Result for average of {} episodes'.format(num_episodes))
            print('average total reward: {} \noverall waiting time: {} \nequipped waiting time: {} \nunequipped waiting time: {}'\
                .format(avg_reward,overall_waiting_time,equipped_waiting_time,unequipped_waiting_time))

            if args.record:
                record_file_name = 'record.txt'
                f = open(record_file_name,'a')
                f.write('{}\t{}\t{}\n'.format(overall_waiting_time,equipped_waiting_time,unequipped_waiting_time))
                f.close()
            env.stop()
Exemple #6
0
        sys.exit(1)

    if not cmd_args.saving_file:
        print('need to know saving file, use --saving_file')
        sys.exit(1)
    env = gym.make(cmd_args.env_name)
    args = {}
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if cmd_args.visual:
        args['visual'] = True
        device = torch.device(
            'cpu'
        )  #always use cpu when need to visualize (*Is there a fix for this? Is it necessary to fix this?*)
    if cmd_args.delay:
        args['action_delay'] = cmd_args.delay
    env = TrafficParameterSetWrapper(env, args)
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    env = env.unwrapped

    screen_height, screen_width = env.observation_space.shape
    n_actions = env.action_space.n
    target_net = DQN(screen_height, screen_width, n_actions).to(device)
    target_net.load_state_dict(torch.load(cmd_args.filename))
    t_waiting = 0
    for _ in range(0, cmd_args.n_trials):
        waiting_times = evaluate(target_net, env)
        t_waiting += waiting_times[0]
    avg_waiting_time = t_waiting / float(cmd_args.n_trials)
    with open(cmd_args.saving_file, 'a') as f:
        f.write('for env {}, delay {}, waiting time is: {}\n'.format(
            cmd_args.env_name, cmd_args.delay, avg_waiting_time))
## making environment:
env = gym.make('TrafficLight-v0')
###########################################Changing Env Parameters############################################################################
## specify env parameters: (this will re-init the class, and lose all the original class, do it only at initial stage)
# args = TrafficEnv.get_default_init_parameters()
# args.update({'penetration_rate': 0.5})
# env = TrafficParameterSetWrapper(env, args).unwrapped

############################################Running the Env #################################################################################################
# args = TrafficEnv.get_default_init_parameters()
# args.update({'state_representation': 'full'})
# env = TrafficParameterSetWrapper(env, args).unwrapped
# env.reset()
# for _ in range(0,1000):
#     action = env.action_space.sample()
#     next_state, reward, terminal, info = env.step(action)
#     print(next_state, reward, terminal)

##################################################Visualizing Environment####################################################################
## visualize env (this will not run inside of docker, run it outside in a sumo-installed machine):
from gym_trafficlight.wrappers import TrafficVisualizationWrapper
args = TrafficEnv.get_default_init_parameters()
args.update({'state_representation': 'full'})
env = TrafficParameterSetWrapper(env, args).unwrapped
env = TrafficVisualizationWrapper(env).unwrapped
env.reset()
for _ in range(0, 1000):
    action = env.action_space.sample()
    next_state, reward, terminal, info = env.step(action)
    print(next_state, reward, terminal)