def __init__(self, configs): self.configs = merge_dict(configs, DEFAULT_CONFIG) #self.configs=configs self.model = PolicyNet(self.configs) self.gamma = self.configs['gamma'] self.lr = self.configs['lr'] self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) self.data = [] self.eps = torch.tensor(np.finfo(np.double).eps.item()) self.lr_decay_rate = 0.99
def __init__(self, configs): super().__init__(configs) if configs['mode'] == 'train' or configs['mode'] == 'simulate': os.mkdir( os.path.join(self.configs['current_path'], 'training_data', self.configs['time_data'], 'model')) self.configs = merge_dict(configs, DEFAULT_CONFIG) else: # test self.configs = merge_dict_non_conflict(configs, DEFAULT_CONFIG) self.num_agent = len(self.configs['tl_rl_list']) self.state_space = self.configs['state_space'] # action space # rate action space self.rate_action_space = self.configs['rate_action_space'] # time action space self.time_action_space = self.configs['time_action_space'] self.action_size = self.configs['action_size'] self.gamma = self.configs['gamma'] self.epsilon = self.configs['epsilon'] self.criterion = nn.MSELoss() self.lr = self.configs['lr'] self.lr_decay_rate = self.configs['lr_decay_rate'] self.epsilon_decay_rate = self.configs['epsilon_decay_rate'] self.batch_size = self.configs['batch_size'] self.device = self.configs['device'] self.running_loss = 0 # self.writer=writer # NN composition # size에 따라 다르게 해주어야함 self.rate_key_list = list() for i, key in enumerate(self.configs['traffic_node_info'].keys()): if configs['mode'] == 'train': rate_key = self.configs['traffic_node_info'][key]['num_phase'] elif configs['mode'] == 'test': rate_key = str( self.configs['traffic_node_info'][key]['num_phase']) self.rate_key_list.append(rate_key) self.mainSuperQNetwork = SuperQNetwork( self.state_space, self.rate_action_space[rate_key], self.time_action_space[0], self.configs) self.targetSuperQNetwork = SuperQNetwork( self.state_space, self.rate_action_space[rate_key], self.time_action_space[0], self.configs) # hard update, optimizer setting self.optimizer = optim.Adadelta(self.mainSuperQNetwork.parameters(), lr=self.configs['lr']) hard_update(self.targetSuperQNetwork, self.mainSuperQNetwork) self.lr_scheduler = optim.lr_scheduler.StepLR( optimizer=self.optimizer, step_size=self.configs['lr_decay_period'], gamma=self.configs['lr_decay_rate'])
def __init__(self, configs): super().__init__(configs) print("Current_Mode:", configs['mode']) if configs['mode'] == 'train': os.mkdir( os.path.join(self.configs['current_path'], 'training_data', self.configs['time_data'], 'model')) configs = merge_dict(configs, DEFAULT_CONFIG) self.configs = configs self.state_space = self.configs['state_space'] self.action_space = self.configs['action_space'] self.action_size = self.configs['action_size'] self.gamma = self.configs['gamma'] self.epsilon = self.configs['epsilon'] self.criterion = nn.MSELoss() self.lr = self.configs['lr'] self.lr_decay_rate = self.configs['lr_decay_rate'] self.epsilon_decay_rate = self.configs['epsilon_decay_rate'] self.experience_replay = ReplayMemory( self.configs['experience_replay_size']) self.batch_size = self.configs['batch_size'] self.num_agent = len(self.configs['tl_rl_list']) if self.configs['model'].lower() == 'frap': from Agent.Model.FRAP import FRAP model = FRAP( self.state_space * self.num_agent * self.configs['num_phase'], self.action_space * self.num_agent, self.configs['device']) # model.add_module('QNetwork', # QNetwork(self.state_space, self.action_space, self.configs)) else: model = QNetwork( self.state_space * self.configs['num_phase'] * self.num_agent, self.action_space * self.num_agent, self.configs) # 1개 네트워크용 model.to(self.configs['device']) self.mainQNetwork = deepcopy(model).to(self.configs['device']) print("========NETWORK==========\n", self.mainQNetwork) self.targetQNetwork = deepcopy(model).to(self.configs['device']) self.targetQNetwork.load_state_dict(self.mainQNetwork.state_dict()) self.optimizer = optim.Adam(self.mainQNetwork.parameters(), lr=self.lr) self.action = tuple() self.running_loss = 0 if self.configs['mode'] == 'train': self.mainQNetwork.train() elif self.configs['mode'] == 'test': self.mainQNetwork.eval() self.targetQNetwork.eval()
def __init__(self, configs): self.memory = Memory() self.configs = merge_dict(configs, DEFAULT_CONFIG) self.gamma = self.configs['gamma'] self.eps_clip = self.configs['eps_clip'] self.lr = self.configs['lr'] self.lr_decay_rate = self.configs['lr_decay_rate'] self.num_sgd_iter = self.configs['num_sgd_iter'] self.vf_loss_coeff = self.configs['vf_loss_coeff'] self.model = Net(self.memory, self.configs).to(self.configs['device']) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr) self.entropy_coeff = self.configs['entropy_coeff'] self.model_old = Net(self.memory, self.configs).to(self.configs['device']) self.model_old.load_state_dict(self.model.state_dict()) self.running_loss = 0 self.action = tuple() self.criterion = nn.MSELoss()
def __init__(self, configs): super().__init__(configs) if configs['mode'] == 'train' or configs['mode'] == 'simulate': os.mkdir( os.path.join(self.configs['current_path'], 'training_data', self.configs['time_data'], 'model')) self.configs = merge_dict(configs, DEFAULT_CONFIG) else: # test self.configs = merge_dict_non_conflict(configs, DEFAULT_CONFIG) self.num_agent = len(self.configs['tl_rl_list']) self.state_space = self.configs['state_space'] # action space # rate action space self.rate_action_space = self.configs['rate_action_space'] # time action space self.time_action_space = self.configs['time_action_space'] self.action_size = self.configs['action_size'] self.gamma = self.configs['gamma'] self.epsilon = self.configs['epsilon'] self.criterion = nn.SmoothL1Loss() self.lr = self.configs['lr'] self.lr_decay_rate = self.configs['lr_decay_rate'] self.epsilon_decay_rate = self.configs['epsilon_decay_rate'] self.batch_size = self.configs['batch_size'] self.device = self.configs['device'] self.running_loss = 0 self.super_output_size = int(self.num_agent * 2) self.super_input_size = int(self.num_agent) # NN composition self.mainSuperQNetwork = SuperQNetwork(self.super_input_size, self.super_output_size, self.configs) self.targetSuperQNetwork = SuperQNetwork(self.super_input_size, self.super_output_size, self.configs) # size에 따라 다르게 해주어야함 self.mainQNetwork = list() self.targetQNetwork = list() self.rate_key_list = list() for i, key in enumerate(self.configs['traffic_node_info'].keys()): if configs['mode'] == 'train': rate_key = self.configs['traffic_node_info'][key]['num_phase'] elif configs['mode'] == 'test': rate_key = str( self.configs['traffic_node_info'][key]['num_phase']) self.rate_key_list.append(rate_key) self.mainQNetwork.append( QNetwork(self.super_output_size, self.rate_action_space[rate_key], self.time_action_space[i], self.configs)) self.targetQNetwork.append( QNetwork(self.super_output_size, self.rate_action_space[rate_key], self.time_action_space[i], self.configs)) # hard update, optimizer setting self.optimizer = list() hard_update(self.targetSuperQNetwork, self.mainSuperQNetwork) for targetQ, mainQ in zip(self.targetQNetwork, self.mainQNetwork): hard_update(targetQ, mainQ) params = chain(self.mainSuperQNetwork.parameters(), mainQ.parameters()) self.optimizer.append(optim.Adam(params, lr=self.lr)) # Network print("========SUPER NETWORK==========\n", self.mainSuperQNetwork) print("========NETWORK==========\n") for i in range(self.num_agent): print(self.mainQNetwork[i])