Ejemplo n.º 1
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger
        self.n_agents = args.n_agents

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        self.role_mixer = None
        if args.role_mixer is not None:
            if args.role_mixer == "vdn":
                self.role_mixer = VDNMixer()
            elif args.role_mixer == "qmix":
                self.role_mixer = QMixer(args)
            else:
                raise ValueError("Role Mixer {} not recognised.".format(
                    args.role_mixer))
            self.params += list(self.role_mixer.parameters())
            self.target_role_mixer = copy.deepcopy(self.role_mixer)

        self.optimiser = RMSprop(params=self.params,
                                 lr=args.lr,
                                 alpha=args.optim_alpha,
                                 eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1

        self.role_interval = args.role_interval
        self.device = self.args.device

        self.role_action_spaces_updated = True

        # action encoder
        self.action_encoder_params = list(self.mac.action_encoder_params())
        self.action_encoder_optimiser = RMSprop(
            params=self.action_encoder_params,
            lr=args.lr,
            alpha=args.optim_alpha,
            eps=args.optim_eps)
Ejemplo n.º 2
0
    def __init__(self, mac, scheme, logger, args):
        torch.autograd.set_detect_anomaly(True)
        self.args = args
        self.n_agents = args.n_agents
        self.n_actions = args.n_actions
        self.mac = mac
        self.logger = logger

        self.last_target_update_step = 0
        self.critic_training_steps = 0

        self.log_stats_t = -self.args.learner_log_interval - 1

        self.control_critic = SCControlCritic(scheme, args)
        self.execution_critic = SCExecutionCritic(scheme, args)
        self.target_control_critic = copy.deepcopy(self.control_critic)
        self.target_execution_critic = copy.deepcopy(self.execution_critic)

        self.control_actor_params = list(self.mac.agent_dlstm_parameters()) + list(self.mac.latent_state_encoder_parameters())
        self.execution_actor_params = list(self.mac.agent_lstm_parameters())
        self.control_critic_params = list(self.control_critic.parameters())
        self.execution_critic_params = list(self.execution_critic.parameters())

        self.control_mixer = None
        if args.control_mixer is not None:
            if args.control_mixer == "vdn":
                self.control_mixer = VDNMixer()
            elif args.control_mixer == "qmix":
                self.control_mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.control_mixer))
            self.target_control_mixer = copy.deepcopy(self.control_mixer)
            self.control_critic_params += list(self.control_mixer.parameters())

        self.execution_mixer = None
        if args.execution_mixer is not None:
            if args.execution_mixer == "vdn":
                self.execution_mixer = VDNMixer()
            elif args.execution_mixer == "qmix":
                self.execution_mixer = DirMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.execution_mixer))
            self.target_execution_mixer = copy.deepcopy(self.execution_mixer)
            self.execution_critic_params += list(self.execution_mixer.parameters())     

        self.control_actor_optimiser = RMSprop(params=self.control_actor_params, lr=args.control_actor_lr, alpha=args.optim_alpha, eps=args.optim_eps)
        self.control_critic_optimiser = RMSprop(params=self.control_critic_params, lr=args.control_critic_lr, alpha=args.optim_alpha, eps=args.optim_eps)
        self.execution_actor_optimiser = RMSprop(params=self.execution_actor_params, lr=args.execution_actor_lr, alpha=args.optim_alpha, eps=args.optim_eps)
        self.execution_critic_optimiser = RMSprop(params=self.execution_critic_params, lr=args.execution_critic_lr, alpha=args.optim_alpha, eps=args.optim_eps)
Ejemplo n.º 3
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.n_agents = args.n_agents
        self.n_actions = args.n_actions
        self.mac = mac
        self.logger = logger

        self.last_target_update_step = 0
        self.critic_training_steps = 0

        self.log_stats_t = -self.args.learner_log_interval - 1

        self.target_mac = copy.deepcopy(mac)
        self.params = list(self.mac.parameters())

        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())

        if self.args.optim == 'adam':
            self.optimiser = Adam(params=self.params, lr=args.lr)
        else:
            self.optimiser = RMSprop(params=self.params, lr=args.lr)
Ejemplo n.º 4
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            elif args.mixer == "point_like":
                self.mixer = PointLikeMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        if self.args.communicating:
            self.params += list(self.mac.communication.parameters())

        self.optimiser = RMSprop(params=self.params,
                                 lr=args.lr,
                                 alpha=args.optim_alpha,
                                 eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
        th.autograd.set_detect_anomaly(True)
Ejemplo n.º 5
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger
        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        self.params += list(self.mac.msg_rnn.parameters())
        self.optimiser = RMSprop(params=self.params,
                                 lr=args.lr,
                                 alpha=args.optim_alpha,
                                 eps=args.optim_eps)
        self.target_mac = copy.deepcopy(mac)
        self.log_stats_t = -self.args.learner_log_interval - 1

        self.loss_weight = [0.5, 1, 1.5]  # its the beta in the Algorithm 1
Ejemplo n.º 6
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac   # 控制器
        self.logger = logger
        
        self.last_target_update_episode = 0
        self.device = th.device('cuda' if args.use_cuda  else 'cpu')
        self.params = list(mac.parameters())

        if args.mixer == "qatten":
            self.mixer = QattenMixer(args)
        elif args.mixer == "vdn":
            self.mixer = VDNMixer()
        elif args.mixer == "qmix":
            self.mixer = Mixer(args)
        else:
            raise Exception("mixer error")
        self.target_mixer = copy.deepcopy(self.mixer)
        self.params += list(self.mixer.parameters())

        print('Mixer的参数量为: ', end='')
        print(get_parameters_num(self.mixer.parameters()))

        if self.args.optimizer == 'adam':
            self.optimiser = Adam(params=self.params,  lr=args.lr)
        else:
            self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps)
        # 深度复制有点浪费(例如重复动作选择器),但对任何MAC都应该有效。
        #
        self.target_mac = copy.deepcopy(mac)
        # 设置日志打印间隔
        self.log_stats_t = -self.args.learner_log_interval - 1
        
        self.train_t = 0
Ejemplo n.º 7
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        self.rep_paras = mac.rep_parameters()
        #for param in self.rep_paras:
         #      param.requires_grad = False
          #     self.params.append(param)
        
        self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 8
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger
        
        self.params = list(mac.parameters())

        self.last_target_update_episode = 0
        self.device = th.device('cuda' if args.use_cuda  else 'cpu')

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        if self.args.optimizer == 'adam':
            self.optimiser = Adam(params=self.params,  lr=args.lr)
        else:
            self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
        
        self.train_t = 0
Ejemplo n.º 9
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger
        self.regularization_const = self.args.normalization_const
        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        self.params += list(self.mac.env_blender.parameters())
        self.optimiser = RMSprop(params=self.params,
                                 lr=args.lr,
                                 alpha=args.optim_alpha,
                                 eps=args.optim_eps)
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 10
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            elif args.mixer == "flex_qmix":
                assert args.entity_scheme, "FlexQMixer only available with entity scheme"
                self.mixer = FlexQMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps,
                                 weight_decay=args.weight_decay)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 11
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            elif args.mixer == "graphmix":
                self.mixer = GraphMixer(args)
            else:
                raise ValueError(f"Mixer {args.mixer} not recognized.")
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        #self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps)
        self.optimizer = Adam(params=self.params, lr=args.lr)

        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 12
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.act_params, self.comm_params, self.freq_params = mac.parameters()

        if args.comm_type != "no":
            self.act_params += self.comm_params

        self.last_target_update_episode = 0

        self.mixer = None
        self.freq_mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.act_params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        if args.freq_mixer is not None and args.comm_type == "normal":
            if args.freq_mixer == "vdn":
                self.freq_mixer = VDNMixer()
            elif args.freq_mixer == "qmix":
                self.freq_mixer = QMixer(args)
            else:
                raise ValueError("Freq Mixer {} not recognised.".format(args.freq_mixer))
            self.freq_params += list(self.freq_mixer.parameters())
            self.target_freq_mixer = copy.deepcopy(self.freq_mixer)
            self.freq_optimiser = RMSprop(params=self.freq_params,
                                          lr=args.lr,
                                          alpha=args.optim_alpha,
                                          eps=args.optim_eps)

        self.ac_optimiser = RMSprop(params=self.act_params,
                                    lr=args.lr,
                                    alpha=args.optim_alpha,
                                    eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 13
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.n_agents = args.n_agents
        self.n_actions = args.n_actions
        self.logger = logger

        self.mac = mac
        self.target_mac = copy.deepcopy(self.mac)
        self.agent_params = list(mac.parameters())

        self.critic = FMACCritic(scheme, args)
        self.target_critic = copy.deepcopy(self.critic)
        self.critic_params = list(self.critic.parameters())

        self.mixer = None
        if args.mixer is not None and self.args.n_agents > 1:  # if just 1 agent do not mix anything
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.critic_params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        print('Mixer Size: ')
        print(get_parameters_num(self.critic_params))

        if getattr(self.args, "optimizer", "rmsprop") == "rmsprop":
            self.agent_optimiser = RMSprop(params=self.agent_params,
                                           lr=args.lr,
                                           alpha=args.optim_alpha,
                                           eps=args.optim_eps)
        elif getattr(self.args, "optimizer", "rmsprop") == "adam":
            self.agent_optimiser = Adam(params=self.agent_params,
                                        lr=args.lr,
                                        eps=getattr(args, "optimizer_epsilon",
                                                    10E-8))
        else:
            raise Exception("unknown optimizer {}".format(
                getattr(self.args, "optimizer", "rmsprop")))

        if getattr(self.args, "optimizer", "rmsprop") == "rmsprop":
            self.critic_optimiser = RMSprop(params=self.critic_params,
                                            lr=args.critic_lr,
                                            alpha=args.optim_alpha,
                                            eps=args.optim_eps)
        elif getattr(self.args, "optimizer", "rmsprop") == "adam":
            self.critic_optimiser = Adam(params=self.critic_params,
                                         lr=args.critic_lr,
                                         eps=getattr(args, "optimizer_epsilon",
                                                     10E-8))
        else:
            raise Exception("unknown optimizer {}".format(
                getattr(self.args, "optimizer", "rmsprop")))

        self.log_stats_t = -self.args.learner_log_interval - 1
        self.last_target_update_episode = 0
Ejemplo n.º 14
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger
        self.scheme = scheme
        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            self.mixer_list = []
            self.target_mixer_list = []
            if args.mixer == "vdn":
                self.mixer_list.append(VDNMixer())
            elif args.mixer == "qmix":
                self.mixer_list.append(QMixer(args))
            elif args.mixer == "hqmix":
                self.mixer_list.append(HQMixer(args))
            elif args.mixer == "hqmix_noabs":
                self.mixer_list.append(HQMixerFF(args))
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            if args.ensemble_num == 1:
                self.mixer = self.mixer_list[0]
                self.params += list(self.mixer.parameters())
                self.target_mixer_list.append(copy.deepcopy(self.mixer))
                self.target_mixer = self.target_mixer_list[0]
                self.meta_params = list(self.mac.parameters())
            else:
                raise NotImplementedError
        if args.optimizer == "RMSprop":
            self.optimiser = RMSprop(params=self.params,
                                     lr=args.lr,
                                     alpha=args.optim_alpha,
                                     eps=args.optim_eps)
            self.meta_optimiser = RMSprop(params=self.meta_params,
                                          lr=args.meta_lr,
                                          alpha=args.optim_alpha,
                                          eps=args.optim_eps)
        elif args.optimizer == "Adam":
            self.optimiser = Adam(params=self.params, lr=args.lr)
            self.meta_optimiser = Adam(params=self.meta_params,
                                       lr=args.meta_lr)
        else:
            raise NotImplementedError

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 15
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.n_actions_levin = args.n_actions

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())

            if not args.SubAVG_Mixer_flag:
                self.target_mixer = copy.deepcopy(self.mixer)

            elif args.mixer == "qmix":
                self.target_mixer_list = []
                for i in range(self.args.SubAVG_Mixer_K):
                    self.target_mixer_list.append(copy.deepcopy(self.mixer))
                self.levin_iter_target_mixer_update = 0

        self.optimiser = RMSprop(params=self.params,
                                 lr=args.lr,
                                 alpha=args.optim_alpha,
                                 eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        if not self.args.SubAVG_Agent_flag:
            self.target_mac = copy.deepcopy(mac)
        else:
            self.target_mac_list = []
            for i in range(self.args.SubAVG_Agent_K):
                self.target_mac_list.append(copy.deepcopy(mac))
            self.levin_iter_target_update = 0

        self.log_stats_t = -self.args.learner_log_interval - 1

        # ====== levin =====
        self.number = 0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.mac_params = list(mac.parameters())
        self.params = list(self.mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        assert args.mixer is not None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.mixer_params = list(self.mixer.parameters())
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        # Central Q
        # TODO: Clean this mess up!
        self.central_mac = None
        assert self.args.central_mixer == "ff"
        self.central_mixer = QMixerCentralFF(args)
        assert args.central_mac == "basic_central_mac"
        self.central_mac = mac_REGISTRY[args.central_mac](
            scheme, args
        )  # Groups aren't used in the CentralBasicController. Little hacky
        self.target_central_mac = copy.deepcopy(self.central_mac)
        self.params += list(self.central_mac.parameters())
        self.params += list(self.central_mixer.parameters())
        self.target_central_mixer = copy.deepcopy(self.central_mixer)

        self.optimiser = RMSprop(params=self.params,
                                 lr=args.lr,
                                 alpha=args.optim_alpha,
                                 eps=args.optim_eps)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 17
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.last_target_update_episode = 0
        self.device = th.device('cuda' if args.use_cuda else 'cpu')
        self.params = list(mac.parameters())

        if args.mixer == "qatten":
            self.mixer = QattenMixer(args)
        elif args.mixer == "vdn":
            self.mixer = VDNMixer()
        elif args.mixer == "qmix":
            self.mixer = Mixer(args)
        else:
            raise "mixer error"
        self.target_mixer = copy.deepcopy(self.mixer)
        self.params += list(self.mixer.parameters())

        print('Mixer Size: ')
        print(get_parameters_num(self.mixer.parameters()))

        if self.args.optimizer == 'adam':
            self.optimiser = Adam(params=self.params,
                                  lr=args.lr,
                                  weight_decay=getattr(args, "weight_decay",
                                                       0))
        else:
            self.optimiser = RMSprop(params=self.params,
                                     lr=args.lr,
                                     alpha=args.optim_alpha,
                                     eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)
        self.log_stats_t = -self.args.learner_log_interval - 1
        self.train_t = 0

        # priority replay
        self.use_per = getattr(self.args, 'use_per', False)
        self.return_priority = getattr(self.args, "return_priority", False)
        if self.use_per:
            self.priority_max = float('-inf')
            self.priority_min = float('inf')
Ejemplo n.º 18
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())



        #added by keegan
        self.critic = LIIRCritic(scheme, args).to(device)



        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = NoiseQMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        discrim_input = np.prod(self.args.state_shape) + self.args.n_agents * self.args.n_actions
        self.n_agents = self.args.n_agents

        if self.args.rnn_discrim:
            self.rnn_agg = RNNAggregator(discrim_input, args)
            self.discrim = Discrim(args.rnn_agg_size, self.args.noise_dim, args)
            self.params += list(self.discrim.parameters())
            self.params += list(self.rnn_agg.parameters())
        else:
            self.discrim = Discrim(discrim_input, self.args.noise_dim, args)
            self.params += list(self.discrim.parameters())
        self.discrim_loss = th.nn.CrossEntropyLoss(reduction="none")

        self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps)

        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 19
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.n_agents_team1 = args.n_agents_team1
        self.n_agents_team2 = args.n_agents_team2
        self.mac = mac
        self.logger = logger
        self.params = [list(param) for param in mac.parameters()]

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
                self.params += list(self.mixer.parameters())
                self.target_mixer = copy.deepcopy(self.mixer)
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
                self.params += list(self.mixer.parameters())
                self.target_mixer = copy.deepcopy(self.mixer)
            elif args.mixer == "qmix_multi":
                self.mixer = [
                    QMixerMulti(args, self.n_agents_team1),
                    QMixerMulti(args, self.n_agents_team2)
                ]
                for idx, mixer in enumerate(self.mixer):
                    self.params[idx] += list(mixer.parameters())
                self.target_mixer = copy.deepcopy(self.mixer)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))

        self.optimiser = [
            RMSprop(params=param,
                    lr=args.lr,
                    alpha=args.optim_alpha,
                    eps=args.optim_eps) for param in self.params
        ]

        # a little wasteful to deepcopy (e.g. duplicates action selector),
        # but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 20
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())
        self.named_params = dict(mac.named_parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None and self.args.n_agents > 1:  # if just 1 agent do not mix anything
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == 'vdn-s':
                self.mixer = VDNState(args)
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.named_params.update(dict(self.mixer.named_parameters()))
            self.target_mixer = copy.deepcopy(self.mixer)

        if getattr(self.args, "optimizer", "rmsprop") == "rmsprop":
            self.optimiser = RMSprop(params=self.params,
                                     lr=args.lr,
                                     alpha=args.optim_alpha,
                                     eps=args.optim_eps)
        elif getattr(self.args, "optimizer", "rmsprop") == "adam":
            self.optimiser = Adam(params=self.params,
                                  lr=args.lr,
                                  eps=getattr(args, "optimizer_epsilon",
                                              10E-8))
        else:
            raise Exception("unknown optimizer {}".format(
                getattr(self.args, "optimizer", "rmsprop")))

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 21
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        self.optimiser = RMSprop(params=self.params,
                                 lr=args.lr,
                                 alpha=args.optim_alpha,
                                 eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1

        # Setup intrinsic module
        self.e_type = args.e_type
        if args.e_type == "count":
            self.count_dict = defaultdict(int)
            self.bin_coef = args.bin_coef
        elif args.e_type == "rnd":
            self.rnd_model = RNDModel(args)
Ejemplo n.º 22
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            elif args.mixer == "qmix_plus":
                self.mixer = QMixerPlus(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        lr = args.initial_lr if args.use_decay else args.lr
        if args.optimizer == 'rmsprop':
            self.optimiser = RMSprop(params=self.params, lr=lr, alpha=args.optim_alpha, eps=args.optim_eps, weight_decay=args.regularization)
        elif args.optimizer == 'adam':
            self.optimiser = Adam(params=self.params, lr=lr, eps=args.optim_eps, weight_decay=args.regularization)
        else:
            raise ValueError("Optimizer {} not recognized".format(args.optimizer))

        if args.use_decay:
            # Decay after reaching episode number (1 episode ~ 50 timesteps)
            self.scheduler = lr_scheduler.MultiStepLR(self.optimiser, milestones=[4000, 10000, 180000], gamma=args.lr_decay_gamma)
        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 23
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.params = list(mac.parameters())

        self.last_target_update_episode = 0
        self.self_last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            elif args.mixer == "qmix_attnv1":
                self.mixer = QAttnMixerV1(args, self.mac.input_shape,
                                          self.mac.input_alone_shape)
            elif args.mixer == "qmix_attnv2":
                self.mixer = QAttnMixerV2(args, self.mac.input_shape,
                                          self.mac.input_alone_shape)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        self.optimiser = RMSprop(params=self.params,
                                 lr=args.lr,
                                 alpha=args.optim_alpha,
                                 eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 24
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.scheme = scheme
        self.logger = logger
        self.params_list = []
        if type(mac) == list:
            for mac_single in mac:
                self.params_list.append(list(mac_single.parameters()))
        else:
            self.params_list.append(list(mac.parameters()))
            self.params = self.params_list[0]

        self.last_target_update_episode = 0

        self.mixer = None
        if args.mixer is not None:
            self.mixer_list = []
            self.target_mixer_list = []
            for i in range(args.ensemble_num):
                if args.mixer == "vdn":
                    self.mixer_list.append(VDNMixer())
                elif args.mixer == "qmix":
                    self.mixer_list.append(QMixer(args))
                elif args.mixer == "aqmix":
                    self.mixer_list.append(AQMixer(args))
                elif args.mixer == "qmix_noabs":
                    self.mixer_list.append(QMixerCentralFF(args))
                else:
                    raise ValueError("Mixer {} not recognised.".format(
                        args.mixer))
            if args.ensemble_num == 1:
                self.mixer = self.mixer_list[0]
                self.params += list(self.mixer.parameters())
                self.target_mixer_list.append(copy.deepcopy(self.mixer))
                self.target_mixer = self.target_mixer_list[0]
            else:
                self.mixer = self.mixer_list
                for i in range(args.ensemble_num):
                    if args.q_net_ensemble:
                        self.params_list[i] += list(
                            self.mixer_list[i].parameters())
                    else:
                        self.params += list(self.mixer_list[i].parameters())
                    self.target_mixer_list.append(
                        copy.deepcopy(self.mixer_list[i]))
        self.optimiser_list = []
        if args.q_net_ensemble:
            for i in range(args.ensemble_num):
                self.optimiser_list.append(
                    RMSprop(params=self.params_list[i],
                            lr=args.lr,
                            alpha=args.optim_alpha,
                            eps=args.optim_eps))
        else:
            self.optimiser = RMSprop(params=self.params,
                                     lr=args.lr,
                                     alpha=args.optim_alpha,
                                     eps=args.optim_eps)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        self.log_stats_t = -self.args.learner_log_interval - 1
Ejemplo n.º 25
0
    def __init__(self, mac, scheme, logger, args):
        self.args = args
        self.mac = mac
        self.logger = logger

        self.mac_params = list(mac.parameters())
        self.params = list(self.mac.parameters())

        self.last_target_update_episode = 0

        self.mixer = None
        assert args.mixer is not None
        if args.mixer is not None:
            if args.mixer == "vdn":
                self.mixer = VDNMixer()
            elif args.mixer == "qmix":
                self.mixer = QMixer(args)
            else:
                raise ValueError("Mixer {} not recognised.".format(args.mixer))
            self.mixer_params = list(self.mixer.parameters())
            self.params += list(self.mixer.parameters())
            self.target_mixer = copy.deepcopy(self.mixer)

        # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC
        self.target_mac = copy.deepcopy(mac)

        # Central Q
        # TODO: Clean this mess up!
        self.central_mac = None
        if self.args.central_mixer in ["ff", "atten"]:
            if self.args.central_loss == 0:
                self.central_mixer = self.mixer
                self.central_mac = self.mac
                self.target_central_mac = self.target_mac
            else:
                if self.args.central_mixer == "ff":
                    self.central_mixer = QMixerCentralFF(
                        args
                    )  # Feedforward network that takes state and agent utils as input
                # elif self.args.central_mixer == "atten":
                # self.central_mixer = QMixerCentralAtten(args)
                else:
                    raise Exception("Error with central_mixer")

                assert args.central_mac == "basic_central_mac"
                self.central_mac = mac_REGISTRY[args.central_mac](
                    scheme, args
                )  # Groups aren't used in the CentralBasicController. Little hacky
                self.target_central_mac = copy.deepcopy(self.central_mac)
                self.params += list(self.central_mac.parameters())
        else:
            raise Exception("Error with qCentral")
        self.params += list(self.central_mixer.parameters())
        self.target_central_mixer = copy.deepcopy(self.central_mixer)

        print('Mixer Size: ')
        print(
            get_parameters_num(
                list(self.mixer.parameters()) +
                list(self.central_mixer.parameters())))

        self.optimiser = Adam(params=self.params, lr=args.lr)

        self.log_stats_t = -self.args.learner_log_interval - 1

        self.grad_norm = 1
        self.mixer_norm = 1
        self.mixer_norms = deque([1], maxlen=100)