Esempio n. 1
0
 def __init__(self, configs):
     self.configs = merge_dict(configs, DEFAULT_CONFIG)
     #self.configs=configs
     self.model = PolicyNet(self.configs)
     self.gamma = self.configs['gamma']
     self.lr = self.configs['lr']
     self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
     self.data = []
     self.eps = torch.tensor(np.finfo(np.double).eps.item())
     self.lr_decay_rate = 0.99
Esempio n. 2
0
    def __init__(self, configs):
        super().__init__(configs)
        if configs['mode'] == 'train' or configs['mode'] == 'simulate':
            os.mkdir(
                os.path.join(self.configs['current_path'], 'training_data',
                             self.configs['time_data'], 'model'))
            self.configs = merge_dict(configs, DEFAULT_CONFIG)
        else:  # test
            self.configs = merge_dict_non_conflict(configs, DEFAULT_CONFIG)
        self.num_agent = len(self.configs['tl_rl_list'])
        self.state_space = self.configs['state_space']

        # action space
        # rate action space
        self.rate_action_space = self.configs['rate_action_space']
        # time action space
        self.time_action_space = self.configs['time_action_space']
        self.action_size = self.configs['action_size']
        self.gamma = self.configs['gamma']
        self.epsilon = self.configs['epsilon']
        self.criterion = nn.MSELoss()
        self.lr = self.configs['lr']
        self.lr_decay_rate = self.configs['lr_decay_rate']
        self.epsilon_decay_rate = self.configs['epsilon_decay_rate']
        self.batch_size = self.configs['batch_size']
        self.device = self.configs['device']
        self.running_loss = 0
        # self.writer=writer

        # NN composition
        # size에 따라 다르게 해주어야함
        self.rate_key_list = list()
        for i, key in enumerate(self.configs['traffic_node_info'].keys()):
            if configs['mode'] == 'train':
                rate_key = self.configs['traffic_node_info'][key]['num_phase']
            elif configs['mode'] == 'test':
                rate_key = str(
                    self.configs['traffic_node_info'][key]['num_phase'])
            self.rate_key_list.append(rate_key)

        self.mainSuperQNetwork = SuperQNetwork(
            self.state_space, self.rate_action_space[rate_key],
            self.time_action_space[0], self.configs)
        self.targetSuperQNetwork = SuperQNetwork(
            self.state_space, self.rate_action_space[rate_key],
            self.time_action_space[0], self.configs)
        # hard update, optimizer setting
        self.optimizer = optim.Adadelta(self.mainSuperQNetwork.parameters(),
                                        lr=self.configs['lr'])
        hard_update(self.targetSuperQNetwork, self.mainSuperQNetwork)
        self.lr_scheduler = optim.lr_scheduler.StepLR(
            optimizer=self.optimizer,
            step_size=self.configs['lr_decay_period'],
            gamma=self.configs['lr_decay_rate'])
Esempio n. 3
0
 def __init__(self, configs):
     super().__init__(configs)
     print("Current_Mode:", configs['mode'])
     if configs['mode'] == 'train':
         os.mkdir(
             os.path.join(self.configs['current_path'], 'training_data',
                          self.configs['time_data'], 'model'))
         configs = merge_dict(configs, DEFAULT_CONFIG)
     self.configs = configs
     self.state_space = self.configs['state_space']
     self.action_space = self.configs['action_space']
     self.action_size = self.configs['action_size']
     self.gamma = self.configs['gamma']
     self.epsilon = self.configs['epsilon']
     self.criterion = nn.MSELoss()
     self.lr = self.configs['lr']
     self.lr_decay_rate = self.configs['lr_decay_rate']
     self.epsilon_decay_rate = self.configs['epsilon_decay_rate']
     self.experience_replay = ReplayMemory(
         self.configs['experience_replay_size'])
     self.batch_size = self.configs['batch_size']
     self.num_agent = len(self.configs['tl_rl_list'])
     if self.configs['model'].lower() == 'frap':
         from Agent.Model.FRAP import FRAP
         model = FRAP(
             self.state_space * self.num_agent * self.configs['num_phase'],
             self.action_space * self.num_agent, self.configs['device'])
         # model.add_module('QNetwork',
         #                  QNetwork(self.state_space, self.action_space, self.configs))
     else:
         model = QNetwork(
             self.state_space * self.configs['num_phase'] * self.num_agent,
             self.action_space * self.num_agent, self.configs)  # 1개 네트워크용
     model.to(self.configs['device'])
     self.mainQNetwork = deepcopy(model).to(self.configs['device'])
     print("========NETWORK==========\n", self.mainQNetwork)
     self.targetQNetwork = deepcopy(model).to(self.configs['device'])
     self.targetQNetwork.load_state_dict(self.mainQNetwork.state_dict())
     self.optimizer = optim.Adam(self.mainQNetwork.parameters(), lr=self.lr)
     self.action = tuple()
     self.running_loss = 0
     if self.configs['mode'] == 'train':
         self.mainQNetwork.train()
     elif self.configs['mode'] == 'test':
         self.mainQNetwork.eval()
     self.targetQNetwork.eval()
 def __init__(self, configs):
     self.memory = Memory()
     self.configs = merge_dict(configs, DEFAULT_CONFIG)
     self.gamma = self.configs['gamma']
     self.eps_clip = self.configs['eps_clip']
     self.lr = self.configs['lr']
     self.lr_decay_rate = self.configs['lr_decay_rate']
     self.num_sgd_iter = self.configs['num_sgd_iter']
     self.vf_loss_coeff = self.configs['vf_loss_coeff']
     self.model = Net(self.memory, self.configs).to(self.configs['device'])
     self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
     self.entropy_coeff = self.configs['entropy_coeff']
     self.model_old = Net(self.memory,
                          self.configs).to(self.configs['device'])
     self.model_old.load_state_dict(self.model.state_dict())
     self.running_loss = 0
     self.action = tuple()
     self.criterion = nn.MSELoss()
Esempio n. 5
0
    def __init__(self, configs):
        super().__init__(configs)
        if configs['mode'] == 'train' or configs['mode'] == 'simulate':
            os.mkdir(
                os.path.join(self.configs['current_path'], 'training_data',
                             self.configs['time_data'], 'model'))
            self.configs = merge_dict(configs, DEFAULT_CONFIG)
        else:  # test
            self.configs = merge_dict_non_conflict(configs, DEFAULT_CONFIG)
        self.num_agent = len(self.configs['tl_rl_list'])
        self.state_space = self.configs['state_space']

        # action space
        # rate action space
        self.rate_action_space = self.configs['rate_action_space']
        # time action space
        self.time_action_space = self.configs['time_action_space']
        self.action_size = self.configs['action_size']
        self.gamma = self.configs['gamma']
        self.epsilon = self.configs['epsilon']
        self.criterion = nn.SmoothL1Loss()
        self.lr = self.configs['lr']
        self.lr_decay_rate = self.configs['lr_decay_rate']
        self.epsilon_decay_rate = self.configs['epsilon_decay_rate']
        self.batch_size = self.configs['batch_size']
        self.device = self.configs['device']
        self.running_loss = 0
        self.super_output_size = int(self.num_agent * 2)
        self.super_input_size = int(self.num_agent)
        # NN composition
        self.mainSuperQNetwork = SuperQNetwork(self.super_input_size,
                                               self.super_output_size,
                                               self.configs)
        self.targetSuperQNetwork = SuperQNetwork(self.super_input_size,
                                                 self.super_output_size,
                                                 self.configs)
        # size에 따라 다르게 해주어야함
        self.mainQNetwork = list()
        self.targetQNetwork = list()
        self.rate_key_list = list()
        for i, key in enumerate(self.configs['traffic_node_info'].keys()):
            if configs['mode'] == 'train':
                rate_key = self.configs['traffic_node_info'][key]['num_phase']
            elif configs['mode'] == 'test':
                rate_key = str(
                    self.configs['traffic_node_info'][key]['num_phase'])
            self.rate_key_list.append(rate_key)
            self.mainQNetwork.append(
                QNetwork(self.super_output_size,
                         self.rate_action_space[rate_key],
                         self.time_action_space[i], self.configs))
            self.targetQNetwork.append(
                QNetwork(self.super_output_size,
                         self.rate_action_space[rate_key],
                         self.time_action_space[i], self.configs))

        # hard update, optimizer setting
        self.optimizer = list()
        hard_update(self.targetSuperQNetwork, self.mainSuperQNetwork)
        for targetQ, mainQ in zip(self.targetQNetwork, self.mainQNetwork):
            hard_update(targetQ, mainQ)
            params = chain(self.mainSuperQNetwork.parameters(),
                           mainQ.parameters())
            self.optimizer.append(optim.Adam(params, lr=self.lr))

        # Network
        print("========SUPER NETWORK==========\n", self.mainSuperQNetwork)
        print("========NETWORK==========\n")
        for i in range(self.num_agent):
            print(self.mainQNetwork[i])