def env_config(args): # preparing config # # for environment config = json.load(open(args.config)) config["num_step"] = args.num_step # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) config["state_time_span"] = args.state_time_span config["time_span"] = args.time_span # # for agent intersection_id = list(config['lane_phase_info'].keys()) config['intersection_id'] = intersection_id config["thread_num"] = 1 #phase_list = config['lane_phase_info'][intersection_id]['phase'] #logging.info(phase_list) # config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] #config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) #config["action_size"] = len(phase_list) #config["batch_size"] = args.batch_size return config
def gen_env_config(): # preparing config # for environment with open(args.config) as f: config = json.load(f) with open(config['cityflow_config_file']) as f: cityflow_config = json.load(f) roadnet_file = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["num_step"] = args.num_step config["state_time_span"] = args.state_time_span config["time_span"] = args.time_span config["lane_phase_info"] = parse_roadnet(roadnet_file) intersection_id = list(config['lane_phase_info'].keys())[0] # intersection_id = list(config['lane_phase_info'].keys()) config["intersection_id"] = intersection_id phase_list = config['lane_phase_info'][intersection_id]['phase'] # logging.info(phase_list) config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) config["action_size"] = len(phase_list) config["batch_size"] = args.batch_size return config
def main(): logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=2000) parser.add_argument('--ckpt', type=str) parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm') args = parser.parse_args() # preparing config # # for environment config = json.load(open(args.config)) config["num_step"] = args.num_step cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = "intersection_1_1" config["intersection_id"] = intersection_id config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] phase_list = config['lane_phase_info'][intersection_id]['phase'] config["action_size"] = len(phase_list) config["batch_size"] = args.batch_size logging.info(phase_list) # build cityflow environment env = CityFlowEnv(config) # build agent agent = DQNAgent(config) # inference agent.load(args.ckpt) env.reset() state = env.get_state() for i in range(args.num_step): action = agent.choose_action(state) # index of action action_phase = phase_list[action] # actual action next_state, reward = env.step(action_phase) # one step state = next_state # logging logging.info("step:{}/{}, action:{}, reward:{}" .format(i, args.num_step, action, reward))
def generate_config(args): with open(args.config) as f: config = json.load(f) with open(config['cityflow_config_file']) as f: cityflow_config = json.load(f) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["num_step"] = args.num_step config["lane_phase_info"] = parse_roadnet(roadnetFile) intersection_id = list(config['lane_phase_info'].keys()) config["intersection_id"] = intersection_id config["state_time_span"] = args.state_time_span config["time_span"] = args.time_span config["thread_num"] = 1 config["state_time_span"] = args.state_time_span config["time_span"] = args.time_span # phase_list = config['lane_phase_info'][intersection_id]['phase'] # logging.info(phase_list) # config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] return config
def env_config(config_env): # preparing config # # for environment config = json.load(open(config_env['config'])) config["num_step"] = config_env['num_step'] # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) config["state_time_span"] = config_env['state_time_span'] config["time_span"] = config_env['time_span'] # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] logging.info(phase_list) # config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] config["state_size"] = len( config['lane_phase_info'][intersection_id]['start_lane']) config["action_size"] = len(phase_list) config["batch_size"] = config_env['batch_size'] return config
import os from utility import parse_arguments args = parse_arguments() roadnet = 'data/{}/roadnet.json'.format(args.scenario) if __name__ == "__main__": ## configuration for both environment and agent config = { 'scenario': args.scenario, 'data': 'data/{}'.format(args.scenario), 'roadnet': roadnet, 'flow': 'data/{}/flow.json'.format(args.scenario), #'replay_data_path': 'data/frontend/web', 'num_step': args.num_step, 'lane_phase_info': parse_roadnet( roadnet) # get lane and phase mapping by parsing the roadnet } intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] config['state_size'] = len( config['lane_phase_info'][intersection_id]['start_lane']) + 1 config['action_size'] = len(phase_list) # add visible gpu if necessary os.environ["CUDA_VISIBLE_DEVICES"] = '' env = CityFlowEnv(config) agent = DQNAgent(config) # some parameters in dqn
def main(): logging.getLogger().setLevel(logging.INFO) date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4') parser.add_argument('--config', type=str, default='config/global_config_multi.json', help='config file') parser.add_argument('--algo', type=str, default='MDQN', choices=[ 'MDQN', ], help='choose an algorithm') parser.add_argument('--inference', action="store_true", help='inference or training') parser.add_argument('--ckpt', type=str, help='inference or training') parser.add_argument('--epoch', type=int, default=10, help='number of training epochs') parser.add_argument( '--num_step', type=int, default=1500, help='number of timesteps for one episode, and for inference') parser.add_argument('--save_freq', type=int, default=1, help='model saving frequency') parser.add_argument('--batch_size', type=int, default=32, help='batchsize for training') parser.add_argument('--phase_step', type=int, default=15, help='seconds of one phase') args = parser.parse_args() config = json.load(open(args.config)) config["num_step"] = args.num_step cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] cityflow_config["saveReplay"] = True if args.inference else False json.dump(cityflow_config, open(config["cityflow_config_file"], 'w')) config["lane_phase_info"] = parse_roadnet(roadnetFile) config["batch_size"] = args.batch_size intersection_id = list( config['lane_phase_info'].keys()) # all intersections config["intersection_id"] = intersection_id phase_list = { id_: config["lane_phase_info"][id_]["phase"] for id_ in intersection_id } config["phase_list"] = phase_list model_dir = "model/{}_{}".format(args.algo, date) result_dir = "result/{}_{}".format(args.algo, date) config["result_dir"] = result_dir # parameters for training and inference EPISODES = args.epoch learning_start = 300 update_model_freq = args.batch_size // 3 update_target_model_freq = 300 // args.phase_step # make dirs if not os.path.exists("model"): os.makedirs("model") if not os.path.exists("result"): os.makedirs("result") if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(result_dir): os.makedirs(result_dir) env = CityFlowEnvM(config["lane_phase_info"], intersection_id, num_step=config["num_step"], thread_num=1, cityflow_config_file=config["cityflow_config_file"]) config["state_size"] = env.state_size if args.algo == 'MDQN': Magent = MDQNAgent( intersection_id, state_size=config["state_size"], batch_size=config["batch_size"], phase_list=config[ "phase_list"], # action_size is len(phase_list[id_]) env=env) else: raise Exception("{} algorithm not implemented now".format(args.algo)) if not args.inference: # training total_step = 0 episode_rewards = {id_: [] for id_ in intersection_id} episode_scores = {id_: [] for id_ in intersection_id} with tqdm(total=EPISODES * args.num_step) as pbar: for i in range(EPISODES): # print("episode: {}".format(i)) env.reset() state = env.get_state() episode_length = 0 episode_reward = {id_: 0 for id_ in intersection_id } # for every agent episode_score = {id_: 0 for id_ in intersection_id} # for everg agent while episode_length < args.num_step: action = Magent.choose_action(state) # index of action action_phase = {} for id_, a in action.items(): action_phase[id_] = phase_list[id_][a] next_state, reward = env.step(action_phase) # one step score = env.get_score() # consistent time of every phase for _ in range(args.phase_step - 1): next_state, reward_ = env.step(action_phase) score_ = env.get_score() for id_ in intersection_id: reward[id_] += reward_[id_] score[id_] += score_[id_] for id_ in intersection_id: reward[id_] /= args.phase_step score[id_] /= args.phase_step for id_ in intersection_id: episode_reward[id_] += reward[id_] episode_score[id_] += score[id_] episode_length += 1 total_step += 1 pbar.update(1) # store to replay buffer if episode_length > learning_start: Magent.remember(state, action_phase, reward, next_state) state = next_state # training if episode_length > learning_start and total_step % update_model_freq == 0: if len(Magent.agents[ intersection_id[0]].memory) > args.batch_size: Magent.replay() # update target Q netwark if episode_length > learning_start and total_step % update_target_model_freq == 0: Magent.update_target_network() # logging.info("\repisode:{}/{}, total_step:{}, action:{}, reward:{}" # .format(i+1, EPISODES, total_step, action, reward)) print_reward = { '_'.join(k.split('_')[1:]): v for k, v in reward.items() } pbar.set_description("t_st:{}, epi:{}, st:{}, r:{}".format( total_step, i + 1, episode_length, print_reward)) # compute episode mean reward for id_ in intersection_id: episode_reward[id_] /= args.num_step # save episode rewards for id_ in intersection_id: episode_rewards[id_].append(episode_reward[id_]) episode_scores[id_].append(episode_score[id_]) print_episode_reward = { '_'.join(k.split('_')[1:]): v for k, v in episode_reward.items() } print_episode_score = { '_'.join(k.split('_')[1:]): v for k, v in episode_score.items() } print('\n') print("Episode:{}, Mean reward:{}, Score: {}".format( i + 1, print_episode_reward, print_episode_score)) # save model if (i + 1) % args.save_freq == 0: if args.algo == 'MDQN': # Magent.save(model_dir + "/{}-ckpt".format(args.algo), i+1) Magent.save(model_dir + "/{}-{}.h5".format(args.algo, i + 1)) # save reward to file df = pd.DataFrame(episode_rewards) df.to_csv(result_dir + '/rewards.csv', index=None) df = pd.DataFrame(episode_scores) df.to_csv(result_dir + '/scores.csv', index=None) # save figure plot_data_lists( [episode_rewards[id_] for id_ in intersection_id], intersection_id, figure_name=result_dir + '/rewards.pdf') plot_data_lists( [episode_scores[id_] for id_ in intersection_id], intersection_id, figure_name=result_dir + '/scores.pdf') else: # inference Magent.load(args.ckpt) episode_reward = {id_: [] for id_ in intersection_id} # for every agent episode_score = {id_: [] for id_ in intersection_id} # for everg agent state = env.get_state() for i in range(args.num_step): action = Magent.choose_action(state) # index of action action_phase = {} for id_, a in action.items(): action_phase[id_] = phase_list[id_][a] # one step ##### next_state, reward = env.step(action_phase) # one step score = env.get_score() for _ in range(args.phase_step - 1): next_state, reward_ = env.step(action_phase) score_ = env.get_score() for id_ in intersection_id: reward[id_] += reward_[id_] score[id_] += score_[id_] for id_ in intersection_id: reward[id_] /= args.phase_step score[id_] /= args.phase_step # one step ##### for id_ in intersection_id: episode_reward[id_].append(reward[id_]) episode_score[id_].append(score[id_]) state = next_state print("step:{}/{}, action:{}, reward:{}, score:{}".format( i + 1, args.num_step, action, reward, score)) mean_reward = {} mean_score = {} for id_ in intersection_id: mean_reward[id_] = np.mean(episode_reward[id_]) mean_score[id_] = np.mean(episode_score[id_]) print('\n') print("[Inference] Mean reward:{}, Mean score:{},".format( mean_reward, mean_score))
def main(): logging.getLogger().setLevel(logging.INFO) date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4') parser.add_argument('--config', type=str, default='config/global_config.json', help='config file') parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm') parser.add_argument('--inference', action="store_true", help='inference or training') parser.add_argument('--ckpt', type=str, help='inference or training') parser.add_argument('--epoch', type=int, default=10, help='number of training epochs') parser.add_argument( '--num_step', type=int, default=200, help='number of timesteps for one episode, and for inference') parser.add_argument('--save_freq', type=int, default=1, help='model saving frequency') parser.add_argument('--batch_size', type=int, default=64, help='batchsize for training') parser.add_argument('--phase_step', type=int, default=15, help='seconds of one phase') args = parser.parse_args() # preparing config # # for environment config = json.load(open(args.config)) config["num_step"] = args.num_step assert "1x1" in config[ 'cityflow_config_file'], "please use 1x1 config file for cityflow" # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] config["intersection_id"] = intersection_id phase_list = config['lane_phase_info'][config["intersection_id"]]['phase'] config["action_size"] = len(phase_list) config["batch_size"] = args.batch_size logging.info(phase_list) model_dir = "model/{}_{}".format(args.algo, date) result_dir = "result/{}_{}".format(args.algo, date) config["result_dir"] = result_dir # parameters for training and inference # batch_size = 32 EPISODES = args.epoch learning_start = 300 # update_model_freq = args.batch_size update_model_freq = 1 update_target_model_freq = 10 if not args.inference: # build cityflow environment cityflow_config["saveReplay"] = True json.dump(cityflow_config, open(config["cityflow_config_file"], 'w')) env = CityFlowEnv( lane_phase_info=config["lane_phase_info"], intersection_id=config["intersection_id"], # for single agent num_step=args.num_step, cityflow_config_file=config["cityflow_config_file"]) # build agent config["state_size"] = env.state_size if args.algo == 'DQN': agent = DQNAgent(intersection_id, state_size=config["state_size"], action_size=config["action_size"], batch_size=config["batch_size"], phase_list=phase_list, env=env) elif args.algo == 'DDQN': agent = DDQNAgent(config) elif args.algo == 'DuelDQN': agent = DuelingDQNAgent(config) # make dirs if not os.path.exists("model"): os.makedirs("model") if not os.path.exists("result"): os.makedirs("result") if not os.path.exists(model_dir): os.makedirs(model_dir) if not os.path.exists(result_dir): os.makedirs(result_dir) # training total_step = 0 episode_rewards = [] episode_scores = [] with tqdm(total=EPISODES * args.num_step) as pbar: for i in range(EPISODES): # print("episode: {}".format(i)) env.reset() state = env.get_state() episode_length = 0 episode_reward = 0 episode_score = 0 while episode_length < args.num_step: action = agent.choose_action_(state) # index of action action_phase = phase_list[action] # actual action # no yellow light next_state, reward = env.step(action_phase) # one step # last_action_phase = action_phase episode_length += 1 total_step += 1 episode_reward += reward episode_score += env.get_score() for _ in range(args.phase_step - 1): next_state, reward_ = env.step(action_phase) reward += reward_ reward /= args.phase_step pbar.update(1) # store to replay buffer if episode_length > learning_start: agent.remember(state, action_phase, reward, next_state) state = next_state # training if episode_length > learning_start and total_step % update_model_freq == 0: if len(agent.memory) > args.batch_size: agent.replay() # update target Q netwark if episode_length > learning_start and total_step % update_target_model_freq == 0: agent.update_target_network() # logging # logging.info("\repisode:{}/{}, total_step:{}, action:{}, reward:{}" # .format(i+1, EPISODES, total_step, action, reward)) pbar.set_description( "total_step:{}, episode:{}, episode_step:{}, reward:{}" .format(total_step, i + 1, episode_length, reward)) # save episode rewards episode_rewards.append( episode_reward / args.num_step) # record episode mean reward episode_scores.append(episode_score) print("score: {}, mean reward:{}".format( episode_score, episode_reward / args.num_step)) # save model if (i + 1) % args.save_freq == 0: if args.algo != 'DuelDQN': agent.model.save(model_dir + "/{}-{}.h5".format(args.algo, i + 1)) else: agent.save(model_dir + "/{}-ckpt".format(args.algo), i + 1) # save reward to file df = pd.DataFrame({"rewards": episode_rewards}) df.to_csv(result_dir + '/rewards.csv', index=None) df = pd.DataFrame({"rewards": episode_scores}) df.to_csv(result_dir + '/scores.csv', index=None) # save figure plot_data_lists([episode_rewards], ['episode reward'], figure_name=result_dir + '/rewards.pdf') plot_data_lists([episode_scores], ['episode score'], figure_name=result_dir + '/scores.pdf') else: # inference cityflow_config["saveReplay"] = True json.dump(cityflow_config, open(config["cityflow_config_file"], 'w')) env = CityFlowEnv( lane_phase_info=config["lane_phase_info"], intersection_id=config["intersection_id"], # for single agent num_step=args.num_step, cityflow_config_file=config["cityflow_config_file"]) env.reset() # build agent config["state_size"] = env.state_size if args.algo == 'DQN': agent = DQNAgent(intersection_id, state_size=config["state_size"], action_size=config["action_size"], batch_size=config["batch_size"], phase_list=phase_list, env=env) elif args.algo == 'DDQN': agent = DDQNAgent(config) elif args.algo == 'DuelDQN': agent = DuelingDQNAgent(config) agent.load(args.ckpt) state = env.get_state() scores = [] for i in range(args.num_step): action = agent.choose_action(state) # index of action action_phase = phase_list[action] # actual action next_state, reward = env.step(action_phase) # one step for _ in range(args.phase_step - 1): next_state, reward_ = env.step(action_phase) reward += reward_ reward /= args.phase_step score = env.get_score() scores.append(score) state = next_state # logging logging.info("step:{}/{}, action:{}, reward:{}, score:{}".format( i + 1, args.num_step, action, reward, score)) inf_result_dir = "result/" + args.ckpt.split("/")[1] df = pd.DataFrame({"inf_scores": scores}) df.to_csv(inf_result_dir + '/inf_scores.csv', index=None) plot_data_lists([scores], ['inference scores'], figure_name=inf_result_dir + '/inf_scores.pdf')
def main(): date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=3000, help='number of timesteps for one episode, and for inference') parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm') parser.add_argument('--inference', action="store_true", help='inference or training') parser.add_argument('--ckpt', type=str, help='inference or training') parser.add_argument('--epoch', type=int, default=30, help='number of training epochs') parser.add_argument('--save_freq', type=int, default=100, help='model saving frequency') args = parser.parse_args() # preparing config # # for environment config = json.load(open(args.config)) config["num_step"] = args.num_step # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] logging.info(phase_list) state_size = config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 #state_size = config["state_size"] = 25 # the single dimension appended to the tail is for the current phase. # [vehicle_count for each start lane] + [current_phase] logging.info('state size:%s' % state_size) config["action_size"] = len(phase_list) phase_list = [1,2,3,4,5,6,7,8] # build cityflow environment env = CityFlowEnv(config) EPISODES = 1 num_step = config['num_step'] state_size = config['state_size'] total_step = 0 #num_step = 10 with tqdm(total=EPISODES*args.num_step) as pbar: for i in range(1, EPISODES+1): logging.info('EPISODE >>:%s' % i) episode_length = 1 env.reset() t=0 state = env.get_state() state = np.array(list(state['start_lane_vehicle_count'].values()) + [ state['current_phase']]) # a sample state definition # print ('state1:', state) state = np.reshape(state, [1, state_size]) print('state2:', state) agent = QLAgent(starting_state=env.get_rl_state(), state_space=1, action_space=env.action_space, alpha=0.1, gamma=0.99, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=1.0)) last_action = phase_list[agent.act(state)] print('last action:', last_action) print('episode_length:{}, num_step:{}'.format(episode_length, num_step)) while episode_length < num_step: #logging.info('current state:%s' % state) logging.info('EPISODE LENGTH >>%s' % episode_length) action = agent.act(state) # index of action logging.info('new action:%s' % action) action_phase = phase_list[action] # actual action logging.info('action phase:>>%s' % action_phase) next_state, reward = env.step(action_phase) # one step logging.info('STATE>>:%s' % next_state) logging.info('ACTION PHASE:{}'.format(action_phase)) logging.info('ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.current_phase_time)) logging.info('NORM ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.get_elapsed_time())) #for n_s in next_state.iteritems(): # logging.info(n_s) logging.info('REWARD:%s' % reward) # last_action_phase = action_phase episode_length += 1 total_step += 1 pbar.update(1) # store to replay buffer # prepare state agent.learn(new_state=env.get_rl_state(), reward=reward) env._compute_step_info() state = next_state logging.info("episode:{}/{}, total_step:{}, action:{}, reward:{}" .format(i, EPISODES, total_step, action, reward)) pbar.set_description("total_step:{total_step}, episode:{i}, episode_step:{episode_length}, " "reward:{reward}") env.save_csv()
def main(): logging.getLogger().setLevel(logging.INFO) date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4') parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=10**3) args = parser.parse_args() # preparing config # # for rnvironment config = json.load(open(args.config)) config["num_step"] = args.num_step # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] logging.info(phase_list) config["state_size"] = len( config['lane_phase_info'][intersection_id]['start_lane'] ) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] config["action_size"] = len(phase_list) # build cotyflow environment env = CityFlowEnv(config) # build learner tf.reset_default_graph() sess = tf.InteractiveSession() coord = tf.train.Coordinator() reward_clip = agent_config.reward_clip[1] lock = threading.Lock() agent = IMPALAAgent(sess=sess, name='global', unroll=agent_config.unroll, state_shape=agent_config.state_shape, output_size=agent_config.output_size, activation=agent_config.activation, final_activation=agent_config.final_activation, hidden=agent_config.hidden, coef=agent_config.entropy_coef, reward_clip=reward_clip) # build agents n_threads = 16 thread_list = [] for i in range(n_threads): single_agent = async_agent.Agent(session=sess, coord=coord, name='thread_{}'.format(i), global_network=agent, reward_clip=reward_clip, lock=lock) thread_list.append(single_agent) init = tf.global_variables_initializer() sess.run(init) for t in thread_list: t.start() # training batch_size = 32 EPISODES = 11 learning_start = 300 update_model_freq = 300 update_target_model_freq = 1500 num_step = config['num_step'] state_size = config['state_size'] ### the dqp learning code if not os.path.exists("model"): os.makedirs("model") model_dir = "model/{}".format(date) os.makedirs(model_dir) total_step = 0 for i in range(EPISODES): env.reset() state = env.get_state() state = np.array( list(state['start_lane_vehicle_count'].values()) + [state['current_phase']]) state = np.reshape(state, [1, state_size]) episode_length = 0 while episode_length < num_step: action = agent.choose_action(state) # index of action action_phase = phase_list[action] # actual action # no yellow light next_state, reward = env.step(action_phase) # one step last_action_phase = action_phase episode_length += 1 total_step += 1 # store to replay buffer next_state = np.array( list(next_state['start_lane_vehicle_count'].values()) + [next_state['current_phase']]) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action_phase, reward, next_state) state = next_state # training if total_step > learning_start and total_step % update_model_freq == 0: agent.replay() # update target Q netwark if total_step > learning_start and total_step % update_target_model_freq == 0: agent.update_target_network() # log logging.info( "episode:{}/{}, total_step:{}, action:{}, reward:{}".format( i, EPISODES, total_step, action, reward)) # save model if i % 10 == 0: agent.model.save(model_dir + "/dqn-{}.h5".format(i))
def main(): logging.getLogger().setLevel(logging.INFO) date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4') parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=10**3) args = parser.parse_args() # preparing config # # for rnvironment config = json.load(open(args.config)) config["num_step"] = args.num_step # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] logging.info(phase_list) config["state_size"] = len( config['lane_phase_info'][intersection_id]['start_lane'] ) + 1 # 1 is for the current phase. [vehicle_count for each start lane] + [current_phase] config["action_size"] = len(phase_list) # build cotyflow environment env = CityFlowEnv(config) # build agent agent = DQNAgent(config) # training batch_size = 32 EPISODES = 11 learning_start = 300 update_model_freq = 300 update_target_model_freq = 1500 num_step = config['num_step'] state_size = config['state_size'] ### the dqp learning code if not os.path.exists("model"): os.makedirs("model") model_dir = "model/{}".format(date) os.makedirs(model_dir) total_step = 0 for i in range(EPISODES): env.reset() state = env.get_state() state = np.array( list(state['start_lane_vehicle_count'].values()) + [state['current_phase']]) state = np.reshape(state, [1, state_size]) episode_length = 0 while episode_length < num_step: action = agent.choose_action(state) # index of action action_phase = phase_list[action] # actual action # no yellow light next_state, reward = env.step(action_phase) # one step last_action_phase = action_phase episode_length += 1 total_step += 1 # store to replay buffer next_state = np.array( list(next_state['start_lane_vehicle_count'].values()) + [next_state['current_phase']]) next_state = np.reshape(next_state, [1, state_size]) agent.remember(state, action_phase, reward, next_state) state = next_state # training if total_step > learning_start and total_step % update_model_freq == 0: agent.replay() # update target Q netwark if total_step > learning_start and total_step % update_target_model_freq == 0: agent.update_target_network() # log logging.info( "episode:{}/{}, total_step:{}, action:{}, reward:{}".format( i, EPISODES, total_step, action, reward)) # save model if i % 10 == 0: agent.model.save(model_dir + "/dqn-{}.h5".format(i))