def __init__(self, configs): super().__init__(configs) if configs['mode'] == 'train' or configs['mode'] == 'simulate': os.mkdir( os.path.join(self.configs['current_path'], 'training_data', self.configs['time_data'], 'model')) self.configs = merge_dict(configs, DEFAULT_CONFIG) else: # test self.configs = merge_dict_non_conflict(configs, DEFAULT_CONFIG) self.num_agent = len(self.configs['tl_rl_list']) self.state_space = self.configs['state_space'] # action space # rate action space self.rate_action_space = self.configs['rate_action_space'] # time action space self.time_action_space = self.configs['time_action_space'] self.action_size = self.configs['action_size'] self.gamma = self.configs['gamma'] self.epsilon = self.configs['epsilon'] self.criterion = nn.MSELoss() self.lr = self.configs['lr'] self.lr_decay_rate = self.configs['lr_decay_rate'] self.epsilon_decay_rate = self.configs['epsilon_decay_rate'] self.batch_size = self.configs['batch_size'] self.device = self.configs['device'] self.running_loss = 0 # self.writer=writer # NN composition # size에 따라 다르게 해주어야함 self.rate_key_list = list() for i, key in enumerate(self.configs['traffic_node_info'].keys()): if configs['mode'] == 'train': rate_key = self.configs['traffic_node_info'][key]['num_phase'] elif configs['mode'] == 'test': rate_key = str( self.configs['traffic_node_info'][key]['num_phase']) self.rate_key_list.append(rate_key) self.mainSuperQNetwork = SuperQNetwork( self.state_space, self.rate_action_space[rate_key], self.time_action_space[0], self.configs) self.targetSuperQNetwork = SuperQNetwork( self.state_space, self.rate_action_space[rate_key], self.time_action_space[0], self.configs) # hard update, optimizer setting self.optimizer = optim.Adadelta(self.mainSuperQNetwork.parameters(), lr=self.configs['lr']) hard_update(self.targetSuperQNetwork, self.mainSuperQNetwork) self.lr_scheduler = optim.lr_scheduler.StepLR( optimizer=self.optimizer, step_size=self.configs['lr_decay_period'], gamma=self.configs['lr_decay_rate'])
def train(flags, time_data, configs, sumoConfig): # check gui option if flags.disp == True: sumoBinary = checkBinary('sumo-gui') else: sumoBinary = checkBinary('sumo') sumoCmd = [sumoBinary, "-c", sumoConfig, '--start'] # configs setting configs['num_agent'] = len(configs['tl_rl_list']) configs['algorithm'] = flags.algorithm.lower() configs['randomness'] = flags.randomness print("training algorithm: ", configs['algorithm']) configs['action_size'] = 2 # state space 는 map.py에서 결정 if flags.network.lower() == 'grid': configs['state_space'] = 10 configs['model'] = 'city' from train import city_dqn_train from configs import SUPER_DQN_TRAFFIC_CONFIGS configs = merge_dict_non_conflict(configs, SUPER_DQN_TRAFFIC_CONFIGS) city_dqn_train(configs, time_data, sumoCmd)
def main(args): random_seed = 20000 random.seed(random_seed) torch.manual_seed(random_seed) np.random.seed(random_seed) flags = parse_args(args) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda and flags.gpu == True else "cpu") # device = torch.device('cpu') print("Using device: {}".format(device)) configs = EXP_CONFIGS configs['device'] = str(device) configs['current_path'] = os.path.dirname(os.path.abspath(__file__)) configs['mode'] = flags.mode.lower() time_data = time.strftime('%m-%d_%H-%M-%S', time.localtime(time.time())) configs['time_data'] = str(time_data) if os.path.exists(os.path.join(os.path.dirname(__file__), 'data')): if os.path.exists( os.path.join(os.path.dirname(__file__), 'data', configs['mode'])) == False: os.mkdir( os.path.join(os.path.dirname(__file__), 'data', configs['mode'])) configs['file_name'] = configs['time_data'] # check the network configs['network'] = flags.network.lower() if configs['network'] == 'grid': from Network.grid import GridNetwork # network바꿀때 이걸로 바꾸세요(수정 예정) configs['grid_num'] = 5 configs['scale'] = 1 if configs['mode'] == 'simulate': configs['file_name'] = '{}x{}grid'.format(configs['grid_num'], configs['grid_num']) elif configs['mode'] == 'test': # test configs['file_name'] = flags.replay_name.lower() # Generating Network network = GridNetwork(configs, grid_num=configs['grid_num']) network.generate_cfg(True, configs['mode']) NET_CONFIGS = network.get_configs() configs = merge_dict_non_conflict(configs, NET_CONFIGS) # Generating Network else: # map file 에서 불러오기 print("Load from map file") from Network.map import MapNetwork # TODO Grid num은 삭제요망 configs['grid_num'] = 3 configs['num_lanes'] = 2 configs['load_file_name'] = configs['network'] mapnet = MapNetwork(configs) MAP_CONFIGS = mapnet.get_tl_from_xml() for key in MAP_CONFIGS.keys(): configs[key] = MAP_CONFIGS[key] mapnet.gen_net_from_xml() mapnet.gen_rou_from_xml() mapnet.generate_cfg(True, configs['mode']) mapnet._generate_add_xml() if configs['network'] == '3x3grid': configs['scale'] = str(1) if configs['network'] == '5x5grid': configs['scale'] = str(1) if configs['network'] == '5x5grid_v2': configs['scale'] = str(1.5) if configs['network'] == 'dunsan': configs['scale'] = str(1) if configs['network'] == 'dunsan_v2': configs['scale'] = str(0.8) print("Scale:", configs['scale']) # check the environment if 'SUMO_HOME' in os.environ: tools = os.path.join(os.environ['SUMO_HOME'], 'tools') sys.path.append(tools) else: sys.exit("please declare environment variable 'SUMO_HOME'") # check the mode if configs['mode'] == 'train': # init train setting configs['update_type'] = flags.update_type sumoConfig = os.path.join(configs['current_path'], 'training_data', time_data, 'net_data', configs['file_name'] + '_train.sumocfg') train(flags, time_data, configs, sumoConfig) elif configs['mode'] == 'test': configs['file_name'] = flags.replay_name configs['replay_name'] = configs['time_data'] sumoConfig = os.path.join(configs['current_path'], 'training_data', time_data, 'net_data', configs['time_data'] + '_test.sumocfg') test(flags, configs, sumoConfig) else: # simulate sumoConfig = os.path.join(configs['current_path'], 'Net_data', configs['file_name'] + '_simulate.sumocfg') simulate(flags, configs, sumoConfig)
def __init__(self, configs): super().__init__(configs) if configs['mode'] == 'train' or configs['mode'] == 'simulate': os.mkdir( os.path.join(self.configs['current_path'], 'training_data', self.configs['time_data'], 'model')) self.configs = merge_dict(configs, DEFAULT_CONFIG) else: # test self.configs = merge_dict_non_conflict(configs, DEFAULT_CONFIG) self.num_agent = len(self.configs['tl_rl_list']) self.state_space = self.configs['state_space'] # action space # rate action space self.rate_action_space = self.configs['rate_action_space'] # time action space self.time_action_space = self.configs['time_action_space'] self.action_size = self.configs['action_size'] self.gamma = self.configs['gamma'] self.epsilon = self.configs['epsilon'] self.criterion = nn.SmoothL1Loss() self.lr = self.configs['lr'] self.lr_decay_rate = self.configs['lr_decay_rate'] self.epsilon_decay_rate = self.configs['epsilon_decay_rate'] self.batch_size = self.configs['batch_size'] self.device = self.configs['device'] self.running_loss = 0 self.super_output_size = int(self.num_agent * 2) self.super_input_size = int(self.num_agent) # NN composition self.mainSuperQNetwork = SuperQNetwork(self.super_input_size, self.super_output_size, self.configs) self.targetSuperQNetwork = SuperQNetwork(self.super_input_size, self.super_output_size, self.configs) # size에 따라 다르게 해주어야함 self.mainQNetwork = list() self.targetQNetwork = list() self.rate_key_list = list() for i, key in enumerate(self.configs['traffic_node_info'].keys()): if configs['mode'] == 'train': rate_key = self.configs['traffic_node_info'][key]['num_phase'] elif configs['mode'] == 'test': rate_key = str( self.configs['traffic_node_info'][key]['num_phase']) self.rate_key_list.append(rate_key) self.mainQNetwork.append( QNetwork(self.super_output_size, self.rate_action_space[rate_key], self.time_action_space[i], self.configs)) self.targetQNetwork.append( QNetwork(self.super_output_size, self.rate_action_space[rate_key], self.time_action_space[i], self.configs)) # hard update, optimizer setting self.optimizer = list() hard_update(self.targetSuperQNetwork, self.mainSuperQNetwork) for targetQ, mainQ in zip(self.targetQNetwork, self.mainQNetwork): hard_update(targetQ, mainQ) params = chain(self.mainSuperQNetwork.parameters(), mainQ.parameters()) self.optimizer.append(optim.Adam(params, lr=self.lr)) # Network print("========SUPER NETWORK==========\n", self.mainSuperQNetwork) print("========NETWORK==========\n") for i in range(self.num_agent): print(self.mainQNetwork[i])