Exemple #1
0
def env_config(args):
    # preparing config
    # # for environment

    config = json.load(open(args.config))
    config["num_step"] = args.num_step

    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']

    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    config["state_time_span"] = args.state_time_span
    config["time_span"] = args.time_span

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())
    config['intersection_id'] = intersection_id
    config["thread_num"] = 1
    #phase_list = config['lane_phase_info'][intersection_id]['phase']
    #logging.info(phase_list)
    # config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    #config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane'])
    #config["action_size"] = len(phase_list)
    #config["batch_size"] = args.batch_size
    return config
Exemple #2
0
def gen_env_config():
    # preparing config
    # for environment
    with open(args.config) as f:
        config = json.load(f)
    with open(config['cityflow_config_file']) as f:
        cityflow_config = json.load(f)

    roadnet_file = cityflow_config['dir'] + cityflow_config['roadnetFile']

    config["num_step"] = args.num_step
    config["state_time_span"] = args.state_time_span
    config["time_span"] = args.time_span

    config["lane_phase_info"] = parse_roadnet(roadnet_file)

    intersection_id = list(config['lane_phase_info'].keys())[0]
    # intersection_id = list(config['lane_phase_info'].keys())

    config["intersection_id"] = intersection_id
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    # logging.info(phase_list)

    config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane'])
    config["action_size"] = len(phase_list)
    config["batch_size"] = args.batch_size
    return config
Exemple #3
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=2000)
    parser.add_argument('--ckpt', type=str)
    parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm')


    args = parser.parse_args()

    # preparing config
    # # for environment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = "intersection_1_1"
    config["intersection_id"] = intersection_id
    config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1  # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    config["action_size"] = len(phase_list)
    config["batch_size"] = args.batch_size
    
    logging.info(phase_list)

    # build cityflow environment
    env = CityFlowEnv(config)

    # build agent
    agent = DQNAgent(config)
    
    # inference
    agent.load(args.ckpt)
    env.reset()
    state = env.get_state()
    
    for i in range(args.num_step): 
        action = agent.choose_action(state) # index of action
        action_phase = phase_list[action] # actual action
        next_state, reward = env.step(action_phase) # one step

        state = next_state

        # logging
        logging.info("step:{}/{}, action:{}, reward:{}"
                        .format(i, args.num_step, action, reward))
def generate_config(args):
    with open(args.config) as f:
        config = json.load(f)
    with open(config['cityflow_config_file']) as f:
        cityflow_config = json.load(f)
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["num_step"] = args.num_step
    config["lane_phase_info"] = parse_roadnet(roadnetFile)
    intersection_id = list(config['lane_phase_info'].keys())
    config["intersection_id"] = intersection_id
    config["state_time_span"] = args.state_time_span
    config["time_span"] = args.time_span
    config["thread_num"] = 1
    config["state_time_span"] = args.state_time_span
    config["time_span"] = args.time_span
    # phase_list = config['lane_phase_info'][intersection_id]['phase']
    # logging.info(phase_list)
    # config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    return config
Exemple #5
0
def env_config(config_env):
    # preparing config
    # # for environment
    config = json.load(open(config_env['config']))

    config["num_step"] = config_env['num_step']

    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)
    config["state_time_span"] = config_env['state_time_span']
    config["time_span"] = config_env['time_span']

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    logging.info(phase_list)
    # config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    config["state_size"] = len(
        config['lane_phase_info'][intersection_id]['start_lane'])
    config["action_size"] = len(phase_list)
    config["batch_size"] = config_env['batch_size']
    return config
import os
from utility import parse_arguments

args = parse_arguments()
roadnet = 'data/{}/roadnet.json'.format(args.scenario)

if __name__ == "__main__":
    ## configuration for both environment and agent
    config = {
        'scenario': args.scenario,
        'data': 'data/{}'.format(args.scenario),
        'roadnet': roadnet,
        'flow': 'data/{}/flow.json'.format(args.scenario),
        #'replay_data_path': 'data/frontend/web',
        'num_step': args.num_step,
        'lane_phase_info': parse_roadnet(
            roadnet)  # get lane and phase mapping by parsing the roadnet
    }

    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    config['state_size'] = len(
        config['lane_phase_info'][intersection_id]['start_lane']) + 1
    config['action_size'] = len(phase_list)

    # add visible gpu if necessary
    os.environ["CUDA_VISIBLE_DEVICES"] = ''

    env = CityFlowEnv(config)
    agent = DQNAgent(config)

    # some parameters in dqn
Exemple #7
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--config',
                        type=str,
                        default='config/global_config_multi.json',
                        help='config file')
    parser.add_argument('--algo',
                        type=str,
                        default='MDQN',
                        choices=[
                            'MDQN',
                        ],
                        help='choose an algorithm')
    parser.add_argument('--inference',
                        action="store_true",
                        help='inference or training')
    parser.add_argument('--ckpt', type=str, help='inference or training')
    parser.add_argument('--epoch',
                        type=int,
                        default=10,
                        help='number of training epochs')
    parser.add_argument(
        '--num_step',
        type=int,
        default=1500,
        help='number of timesteps for one episode, and for inference')
    parser.add_argument('--save_freq',
                        type=int,
                        default=1,
                        help='model saving frequency')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='batchsize for training')
    parser.add_argument('--phase_step',
                        type=int,
                        default=15,
                        help='seconds of one phase')

    args = parser.parse_args()

    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']

    cityflow_config["saveReplay"] = True if args.inference else False
    json.dump(cityflow_config, open(config["cityflow_config_file"], 'w'))

    config["lane_phase_info"] = parse_roadnet(roadnetFile)
    config["batch_size"] = args.batch_size
    intersection_id = list(
        config['lane_phase_info'].keys())  # all intersections
    config["intersection_id"] = intersection_id
    phase_list = {
        id_: config["lane_phase_info"][id_]["phase"]
        for id_ in intersection_id
    }
    config["phase_list"] = phase_list

    model_dir = "model/{}_{}".format(args.algo, date)
    result_dir = "result/{}_{}".format(args.algo, date)
    config["result_dir"] = result_dir

    # parameters for training and inference
    EPISODES = args.epoch
    learning_start = 300
    update_model_freq = args.batch_size // 3
    update_target_model_freq = 300 // args.phase_step

    # make dirs
    if not os.path.exists("model"):
        os.makedirs("model")
    if not os.path.exists("result"):
        os.makedirs("result")
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    env = CityFlowEnvM(config["lane_phase_info"],
                       intersection_id,
                       num_step=config["num_step"],
                       thread_num=1,
                       cityflow_config_file=config["cityflow_config_file"])

    config["state_size"] = env.state_size
    if args.algo == 'MDQN':
        Magent = MDQNAgent(
            intersection_id,
            state_size=config["state_size"],
            batch_size=config["batch_size"],
            phase_list=config[
                "phase_list"],  # action_size is len(phase_list[id_])
            env=env)
    else:
        raise Exception("{} algorithm not implemented now".format(args.algo))

    if not args.inference:  # training
        total_step = 0
        episode_rewards = {id_: [] for id_ in intersection_id}
        episode_scores = {id_: [] for id_ in intersection_id}
        with tqdm(total=EPISODES * args.num_step) as pbar:
            for i in range(EPISODES):
                # print("episode: {}".format(i))
                env.reset()
                state = env.get_state()

                episode_length = 0
                episode_reward = {id_: 0
                                  for id_ in intersection_id
                                  }  # for every agent
                episode_score = {id_: 0
                                 for id_ in intersection_id}  # for everg agent
                while episode_length < args.num_step:

                    action = Magent.choose_action(state)  # index of action
                    action_phase = {}
                    for id_, a in action.items():
                        action_phase[id_] = phase_list[id_][a]

                    next_state, reward = env.step(action_phase)  # one step
                    score = env.get_score()

                    # consistent time of every phase
                    for _ in range(args.phase_step - 1):
                        next_state, reward_ = env.step(action_phase)
                        score_ = env.get_score()
                        for id_ in intersection_id:
                            reward[id_] += reward_[id_]
                            score[id_] += score_[id_]

                    for id_ in intersection_id:
                        reward[id_] /= args.phase_step
                        score[id_] /= args.phase_step

                    for id_ in intersection_id:
                        episode_reward[id_] += reward[id_]
                        episode_score[id_] += score[id_]

                    episode_length += 1
                    total_step += 1
                    pbar.update(1)

                    # store to replay buffer
                    if episode_length > learning_start:
                        Magent.remember(state, action_phase, reward,
                                        next_state)

                    state = next_state

                    # training
                    if episode_length > learning_start and total_step % update_model_freq == 0:
                        if len(Magent.agents[
                                intersection_id[0]].memory) > args.batch_size:
                            Magent.replay()

                    # update target Q netwark
                    if episode_length > learning_start and total_step % update_target_model_freq == 0:
                        Magent.update_target_network()

                    # logging.info("\repisode:{}/{}, total_step:{}, action:{}, reward:{}"
                    #             .format(i+1, EPISODES, total_step, action, reward))
                    print_reward = {
                        '_'.join(k.split('_')[1:]): v
                        for k, v in reward.items()
                    }
                    pbar.set_description("t_st:{}, epi:{}, st:{}, r:{}".format(
                        total_step, i + 1, episode_length, print_reward))

                # compute episode mean reward
                for id_ in intersection_id:
                    episode_reward[id_] /= args.num_step

                # save episode rewards
                for id_ in intersection_id:
                    episode_rewards[id_].append(episode_reward[id_])
                    episode_scores[id_].append(episode_score[id_])

                print_episode_reward = {
                    '_'.join(k.split('_')[1:]): v
                    for k, v in episode_reward.items()
                }
                print_episode_score = {
                    '_'.join(k.split('_')[1:]): v
                    for k, v in episode_score.items()
                }
                print('\n')
                print("Episode:{}, Mean reward:{}, Score: {}".format(
                    i + 1, print_episode_reward, print_episode_score))

                # save model
                if (i + 1) % args.save_freq == 0:
                    if args.algo == 'MDQN':
                        # Magent.save(model_dir + "/{}-ckpt".format(args.algo), i+1)
                        Magent.save(model_dir +
                                    "/{}-{}.h5".format(args.algo, i + 1))

                    # save reward to file
                    df = pd.DataFrame(episode_rewards)
                    df.to_csv(result_dir + '/rewards.csv', index=None)

                    df = pd.DataFrame(episode_scores)
                    df.to_csv(result_dir + '/scores.csv', index=None)

                    # save figure
                    plot_data_lists(
                        [episode_rewards[id_] for id_ in intersection_id],
                        intersection_id,
                        figure_name=result_dir + '/rewards.pdf')
                    plot_data_lists(
                        [episode_scores[id_] for id_ in intersection_id],
                        intersection_id,
                        figure_name=result_dir + '/scores.pdf')

    else:  # inference
        Magent.load(args.ckpt)

        episode_reward = {id_: []
                          for id_ in intersection_id}  # for every agent
        episode_score = {id_: [] for id_ in intersection_id}  # for everg agent

        state = env.get_state()
        for i in range(args.num_step):
            action = Magent.choose_action(state)  # index of action
            action_phase = {}

            for id_, a in action.items():
                action_phase[id_] = phase_list[id_][a]

            # one step #####
            next_state, reward = env.step(action_phase)  # one step
            score = env.get_score()

            for _ in range(args.phase_step - 1):
                next_state, reward_ = env.step(action_phase)
                score_ = env.get_score()
                for id_ in intersection_id:
                    reward[id_] += reward_[id_]
                    score[id_] += score_[id_]

            for id_ in intersection_id:
                reward[id_] /= args.phase_step
                score[id_] /= args.phase_step
            # one step #####

            for id_ in intersection_id:
                episode_reward[id_].append(reward[id_])
                episode_score[id_].append(score[id_])

            state = next_state

            print("step:{}/{}, action:{}, reward:{}, score:{}".format(
                i + 1, args.num_step, action, reward, score))

        mean_reward = {}
        mean_score = {}
        for id_ in intersection_id:
            mean_reward[id_] = np.mean(episode_reward[id_])
            mean_score[id_] = np.mean(episode_score[id_])
        print('\n')
        print("[Inference] Mean reward:{}, Mean score:{},".format(
            mean_reward, mean_score))
def main():
    logging.getLogger().setLevel(logging.INFO)
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--config',
                        type=str,
                        default='config/global_config.json',
                        help='config file')
    parser.add_argument('--algo',
                        type=str,
                        default='DQN',
                        choices=['DQN', 'DDQN', 'DuelDQN'],
                        help='choose an algorithm')
    parser.add_argument('--inference',
                        action="store_true",
                        help='inference or training')
    parser.add_argument('--ckpt', type=str, help='inference or training')
    parser.add_argument('--epoch',
                        type=int,
                        default=10,
                        help='number of training epochs')
    parser.add_argument(
        '--num_step',
        type=int,
        default=200,
        help='number of timesteps for one episode, and for inference')
    parser.add_argument('--save_freq',
                        type=int,
                        default=1,
                        help='model saving frequency')
    parser.add_argument('--batch_size',
                        type=int,
                        default=64,
                        help='batchsize for training')
    parser.add_argument('--phase_step',
                        type=int,
                        default=15,
                        help='seconds of one phase')

    args = parser.parse_args()

    # preparing config
    # # for environment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step

    assert "1x1" in config[
        'cityflow_config_file'], "please use 1x1 config file for cityflow"

    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    config["intersection_id"] = intersection_id

    phase_list = config['lane_phase_info'][config["intersection_id"]]['phase']
    config["action_size"] = len(phase_list)
    config["batch_size"] = args.batch_size

    logging.info(phase_list)

    model_dir = "model/{}_{}".format(args.algo, date)
    result_dir = "result/{}_{}".format(args.algo, date)
    config["result_dir"] = result_dir

    # parameters for training and inference
    # batch_size = 32
    EPISODES = args.epoch
    learning_start = 300
    # update_model_freq = args.batch_size
    update_model_freq = 1
    update_target_model_freq = 10

    if not args.inference:
        # build cityflow environment
        cityflow_config["saveReplay"] = True
        json.dump(cityflow_config, open(config["cityflow_config_file"], 'w'))
        env = CityFlowEnv(
            lane_phase_info=config["lane_phase_info"],
            intersection_id=config["intersection_id"],  # for single agent
            num_step=args.num_step,
            cityflow_config_file=config["cityflow_config_file"])

        # build agent
        config["state_size"] = env.state_size
        if args.algo == 'DQN':
            agent = DQNAgent(intersection_id,
                             state_size=config["state_size"],
                             action_size=config["action_size"],
                             batch_size=config["batch_size"],
                             phase_list=phase_list,
                             env=env)

        elif args.algo == 'DDQN':
            agent = DDQNAgent(config)
        elif args.algo == 'DuelDQN':
            agent = DuelingDQNAgent(config)

        # make dirs
        if not os.path.exists("model"):
            os.makedirs("model")
        if not os.path.exists("result"):
            os.makedirs("result")
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        if not os.path.exists(result_dir):
            os.makedirs(result_dir)

        # training
        total_step = 0
        episode_rewards = []
        episode_scores = []
        with tqdm(total=EPISODES * args.num_step) as pbar:
            for i in range(EPISODES):
                # print("episode: {}".format(i))
                env.reset()
                state = env.get_state()

                episode_length = 0
                episode_reward = 0
                episode_score = 0
                while episode_length < args.num_step:

                    action = agent.choose_action_(state)  # index of action
                    action_phase = phase_list[action]  # actual action
                    # no yellow light
                    next_state, reward = env.step(action_phase)  # one step
                    # last_action_phase = action_phase
                    episode_length += 1
                    total_step += 1
                    episode_reward += reward
                    episode_score += env.get_score()

                    for _ in range(args.phase_step - 1):
                        next_state, reward_ = env.step(action_phase)
                        reward += reward_

                    reward /= args.phase_step

                    pbar.update(1)
                    # store to replay buffer
                    if episode_length > learning_start:
                        agent.remember(state, action_phase, reward, next_state)

                    state = next_state

                    # training
                    if episode_length > learning_start and total_step % update_model_freq == 0:
                        if len(agent.memory) > args.batch_size:
                            agent.replay()

                    # update target Q netwark
                    if episode_length > learning_start and total_step % update_target_model_freq == 0:
                        agent.update_target_network()

                    # logging
                    # logging.info("\repisode:{}/{}, total_step:{}, action:{}, reward:{}"
                    #             .format(i+1, EPISODES, total_step, action, reward))
                    pbar.set_description(
                        "total_step:{}, episode:{}, episode_step:{}, reward:{}"
                        .format(total_step, i + 1, episode_length, reward))

                # save episode rewards
                episode_rewards.append(
                    episode_reward /
                    args.num_step)  # record episode mean reward
                episode_scores.append(episode_score)
                print("score: {}, mean reward:{}".format(
                    episode_score, episode_reward / args.num_step))

                # save model
                if (i + 1) % args.save_freq == 0:
                    if args.algo != 'DuelDQN':
                        agent.model.save(model_dir +
                                         "/{}-{}.h5".format(args.algo, i + 1))
                    else:
                        agent.save(model_dir + "/{}-ckpt".format(args.algo),
                                   i + 1)

                    # save reward to file
                    df = pd.DataFrame({"rewards": episode_rewards})
                    df.to_csv(result_dir + '/rewards.csv', index=None)

                    df = pd.DataFrame({"rewards": episode_scores})
                    df.to_csv(result_dir + '/scores.csv', index=None)

                    # save figure
                    plot_data_lists([episode_rewards], ['episode reward'],
                                    figure_name=result_dir + '/rewards.pdf')
                    plot_data_lists([episode_scores], ['episode score'],
                                    figure_name=result_dir + '/scores.pdf')

    else:
        # inference
        cityflow_config["saveReplay"] = True
        json.dump(cityflow_config, open(config["cityflow_config_file"], 'w'))
        env = CityFlowEnv(
            lane_phase_info=config["lane_phase_info"],
            intersection_id=config["intersection_id"],  # for single agent
            num_step=args.num_step,
            cityflow_config_file=config["cityflow_config_file"])
        env.reset()

        # build agent
        config["state_size"] = env.state_size
        if args.algo == 'DQN':
            agent = DQNAgent(intersection_id,
                             state_size=config["state_size"],
                             action_size=config["action_size"],
                             batch_size=config["batch_size"],
                             phase_list=phase_list,
                             env=env)

        elif args.algo == 'DDQN':
            agent = DDQNAgent(config)
        elif args.algo == 'DuelDQN':
            agent = DuelingDQNAgent(config)
        agent.load(args.ckpt)

        state = env.get_state()
        scores = []
        for i in range(args.num_step):
            action = agent.choose_action(state)  # index of action
            action_phase = phase_list[action]  # actual action
            next_state, reward = env.step(action_phase)  # one step

            for _ in range(args.phase_step - 1):
                next_state, reward_ = env.step(action_phase)
                reward += reward_

            reward /= args.phase_step

            score = env.get_score()
            scores.append(score)
            state = next_state

            # logging
            logging.info("step:{}/{}, action:{}, reward:{}, score:{}".format(
                i + 1, args.num_step, action, reward, score))

        inf_result_dir = "result/" + args.ckpt.split("/")[1]
        df = pd.DataFrame({"inf_scores": scores})
        df.to_csv(inf_result_dir + '/inf_scores.csv', index=None)
        plot_data_lists([scores], ['inference scores'],
                        figure_name=inf_result_dir + '/inf_scores.pdf')
Exemple #9
0
def main():
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', type=str, default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=3000,
                        help='number of timesteps for one episode, and for inference')
    parser.add_argument('--algo', type=str, default='DQN',
                        choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm')
    parser.add_argument('--inference', action="store_true", help='inference or training')
    parser.add_argument('--ckpt', type=str, help='inference or training')
    parser.add_argument('--epoch', type=int, default=30, help='number of training epochs')
    parser.add_argument('--save_freq', type=int, default=100, help='model saving frequency')

    args = parser.parse_args()


    # preparing config
    # # for environment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    logging.info(phase_list)
    state_size = config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1
    #state_size = config["state_size"] = 25
    # the single dimension appended to the tail is for the current phase.
    # [vehicle_count for each start lane] + [current_phase]
    logging.info('state size:%s' % state_size)
    config["action_size"] = len(phase_list)
    phase_list = [1,2,3,4,5,6,7,8]
    # build cityflow environment
    env = CityFlowEnv(config)
    EPISODES = 1
    num_step = config['num_step']
    state_size = config['state_size']
    total_step = 0
    #num_step = 10
    with tqdm(total=EPISODES*args.num_step) as pbar:
        for i in range(1, EPISODES+1):
            logging.info('EPISODE >>:%s' % i)
            episode_length = 1
            env.reset()
            t=0
            state = env.get_state()
            state = np.array(list(state['start_lane_vehicle_count'].values()) + [
                state['current_phase']])  # a sample state definition
            # print ('state1:', state)
            state = np.reshape(state, [1, state_size])
            print('state2:', state)
            agent = QLAgent(starting_state=env.get_rl_state(),
                                 state_space=1,
                                 action_space=env.action_space,
                                 alpha=0.1,
                                 gamma=0.99,
                                 exploration_strategy=EpsilonGreedy(initial_epsilon=0.05,
                                                                    min_epsilon=0.005,
                                                                    decay=1.0))

            last_action = phase_list[agent.act(state)]
            print('last action:', last_action)

            print('episode_length:{}, num_step:{}'.format(episode_length, num_step))
            while episode_length < num_step:
                #logging.info('current state:%s' % state)
                logging.info('EPISODE LENGTH >>%s' % episode_length)
                action = agent.act(state)  # index of action
                logging.info('new action:%s' % action)
                action_phase = phase_list[action]  # actual action
                logging.info('action phase:>>%s' % action_phase)
                next_state, reward = env.step(action_phase)  # one step
                logging.info('STATE>>:%s' % next_state)
                logging.info('ACTION PHASE:{}'.format(action_phase))
                logging.info('ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.current_phase_time))
                logging.info('NORM ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.get_elapsed_time()))
                #for n_s in next_state.iteritems():
                #    logging.info(n_s)
                logging.info('REWARD:%s' % reward)

                # last_action_phase = action_phase
                episode_length += 1
                total_step += 1

                pbar.update(1)
                # store to replay buffer
                # prepare state
                agent.learn(new_state=env.get_rl_state(), reward=reward)

                env._compute_step_info()

                state = next_state
                logging.info("episode:{}/{}, total_step:{}, action:{}, reward:{}"
                             .format(i, EPISODES, total_step, action, reward))
                pbar.set_description("total_step:{total_step}, episode:{i}, episode_step:{episode_length}, "
                                     "reward:{reward}")

    env.save_csv()
Exemple #10
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--config',
                        type=str,
                        default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=10**3)
    args = parser.parse_args()

    # preparing config
    # # for rnvironment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    logging.info(phase_list)
    config["state_size"] = len(
        config['lane_phase_info'][intersection_id]['start_lane']
    ) + 1  # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    config["action_size"] = len(phase_list)

    # build cotyflow environment
    env = CityFlowEnv(config)

    # build learner
    tf.reset_default_graph()
    sess = tf.InteractiveSession()
    coord = tf.train.Coordinator()

    reward_clip = agent_config.reward_clip[1]
    lock = threading.Lock()
    agent = IMPALAAgent(sess=sess,
                        name='global',
                        unroll=agent_config.unroll,
                        state_shape=agent_config.state_shape,
                        output_size=agent_config.output_size,
                        activation=agent_config.activation,
                        final_activation=agent_config.final_activation,
                        hidden=agent_config.hidden,
                        coef=agent_config.entropy_coef,
                        reward_clip=reward_clip)

    # build agents
    n_threads = 16

    thread_list = []

    for i in range(n_threads):
        single_agent = async_agent.Agent(session=sess,
                                         coord=coord,
                                         name='thread_{}'.format(i),
                                         global_network=agent,
                                         reward_clip=reward_clip,
                                         lock=lock)

        thread_list.append(single_agent)

    init = tf.global_variables_initializer()
    sess.run(init)

    for t in thread_list:
        t.start()

    # training
    batch_size = 32
    EPISODES = 11
    learning_start = 300
    update_model_freq = 300
    update_target_model_freq = 1500
    num_step = config['num_step']
    state_size = config['state_size']

    ### the dqp learning code
    if not os.path.exists("model"):
        os.makedirs("model")
    model_dir = "model/{}".format(date)
    os.makedirs(model_dir)

    total_step = 0
    for i in range(EPISODES):
        env.reset()
        state = env.get_state()
        state = np.array(
            list(state['start_lane_vehicle_count'].values()) +
            [state['current_phase']])
        state = np.reshape(state, [1, state_size])

        episode_length = 0
        while episode_length < num_step:
            action = agent.choose_action(state)  # index of action
            action_phase = phase_list[action]  # actual action
            # no yellow light
            next_state, reward = env.step(action_phase)  # one step
            last_action_phase = action_phase
            episode_length += 1
            total_step += 1

            # store to replay buffer
            next_state = np.array(
                list(next_state['start_lane_vehicle_count'].values()) +
                [next_state['current_phase']])
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action_phase, reward, next_state)

            state = next_state

            # training
            if total_step > learning_start and total_step % update_model_freq == 0:
                agent.replay()

            # update target Q netwark
            if total_step > learning_start and total_step % update_target_model_freq == 0:
                agent.update_target_network()

            # log
            logging.info(
                "episode:{}/{}, total_step:{}, action:{}, reward:{}".format(
                    i, EPISODES, total_step, action, reward))

        # save model
        if i % 10 == 0:
            agent.model.save(model_dir + "/dqn-{}.h5".format(i))
Exemple #11
0
def main():
    logging.getLogger().setLevel(logging.INFO)
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--config',
                        type=str,
                        default='config/global_config.json')
    parser.add_argument('--num_step', type=int, default=10**3)
    args = parser.parse_args()

    # preparing config
    # # for rnvironment
    config = json.load(open(args.config))
    config["num_step"] = args.num_step
    # config["replay_data_path"] = "replay"
    cityflow_config = json.load(open(config['cityflow_config_file']))
    roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile']
    config["lane_phase_info"] = parse_roadnet(roadnetFile)

    # # for agent
    intersection_id = list(config['lane_phase_info'].keys())[0]
    phase_list = config['lane_phase_info'][intersection_id]['phase']
    logging.info(phase_list)
    config["state_size"] = len(
        config['lane_phase_info'][intersection_id]['start_lane']
    ) + 1  # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase]
    config["action_size"] = len(phase_list)

    # build cotyflow environment
    env = CityFlowEnv(config)

    # build agent
    agent = DQNAgent(config)

    # training
    batch_size = 32
    EPISODES = 11
    learning_start = 300
    update_model_freq = 300
    update_target_model_freq = 1500
    num_step = config['num_step']
    state_size = config['state_size']

    ### the dqp learning code
    if not os.path.exists("model"):
        os.makedirs("model")
    model_dir = "model/{}".format(date)
    os.makedirs(model_dir)

    total_step = 0
    for i in range(EPISODES):
        env.reset()
        state = env.get_state()
        state = np.array(
            list(state['start_lane_vehicle_count'].values()) +
            [state['current_phase']])
        state = np.reshape(state, [1, state_size])

        episode_length = 0
        while episode_length < num_step:
            action = agent.choose_action(state)  # index of action
            action_phase = phase_list[action]  # actual action
            # no yellow light
            next_state, reward = env.step(action_phase)  # one step
            last_action_phase = action_phase
            episode_length += 1
            total_step += 1

            # store to replay buffer
            next_state = np.array(
                list(next_state['start_lane_vehicle_count'].values()) +
                [next_state['current_phase']])
            next_state = np.reshape(next_state, [1, state_size])
            agent.remember(state, action_phase, reward, next_state)

            state = next_state

            # training
            if total_step > learning_start and total_step % update_model_freq == 0:
                agent.replay()

            # update target Q netwark
            if total_step > learning_start and total_step % update_target_model_freq == 0:
                agent.update_target_network()

            # log
            logging.info(
                "episode:{}/{}, total_step:{}, action:{}, reward:{}".format(
                    i, EPISODES, total_step, action, reward))

        # save model
        if i % 10 == 0:
            agent.model.save(model_dir + "/dqn-{}.h5".format(i))