def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.n_agents = args.n_agents self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) self.role_mixer = None if args.role_mixer is not None: if args.role_mixer == "vdn": self.role_mixer = VDNMixer() elif args.role_mixer == "qmix": self.role_mixer = QMixer(args) else: raise ValueError("Role Mixer {} not recognised.".format( args.role_mixer)) self.params += list(self.role_mixer.parameters()) self.target_role_mixer = copy.deepcopy(self.role_mixer) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1 self.role_interval = args.role_interval self.device = self.args.device self.role_action_spaces_updated = True # action encoder self.action_encoder_params = list(self.mac.action_encoder_params()) self.action_encoder_optimiser = RMSprop( params=self.action_encoder_params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps)
def __init__(self, mac, scheme, logger, args): torch.autograd.set_detect_anomaly(True) self.args = args self.n_agents = args.n_agents self.n_actions = args.n_actions self.mac = mac self.logger = logger self.last_target_update_step = 0 self.critic_training_steps = 0 self.log_stats_t = -self.args.learner_log_interval - 1 self.control_critic = SCControlCritic(scheme, args) self.execution_critic = SCExecutionCritic(scheme, args) self.target_control_critic = copy.deepcopy(self.control_critic) self.target_execution_critic = copy.deepcopy(self.execution_critic) self.control_actor_params = list(self.mac.agent_dlstm_parameters()) + list(self.mac.latent_state_encoder_parameters()) self.execution_actor_params = list(self.mac.agent_lstm_parameters()) self.control_critic_params = list(self.control_critic.parameters()) self.execution_critic_params = list(self.execution_critic.parameters()) self.control_mixer = None if args.control_mixer is not None: if args.control_mixer == "vdn": self.control_mixer = VDNMixer() elif args.control_mixer == "qmix": self.control_mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.control_mixer)) self.target_control_mixer = copy.deepcopy(self.control_mixer) self.control_critic_params += list(self.control_mixer.parameters()) self.execution_mixer = None if args.execution_mixer is not None: if args.execution_mixer == "vdn": self.execution_mixer = VDNMixer() elif args.execution_mixer == "qmix": self.execution_mixer = DirMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.execution_mixer)) self.target_execution_mixer = copy.deepcopy(self.execution_mixer) self.execution_critic_params += list(self.execution_mixer.parameters()) self.control_actor_optimiser = RMSprop(params=self.control_actor_params, lr=args.control_actor_lr, alpha=args.optim_alpha, eps=args.optim_eps) self.control_critic_optimiser = RMSprop(params=self.control_critic_params, lr=args.control_critic_lr, alpha=args.optim_alpha, eps=args.optim_eps) self.execution_actor_optimiser = RMSprop(params=self.execution_actor_params, lr=args.execution_actor_lr, alpha=args.optim_alpha, eps=args.optim_eps) self.execution_critic_optimiser = RMSprop(params=self.execution_critic_params, lr=args.execution_critic_lr, alpha=args.optim_alpha, eps=args.optim_eps)
def __init__(self, mac, scheme, logger, args): self.args = args self.n_agents = args.n_agents self.n_actions = args.n_actions self.mac = mac self.logger = logger self.last_target_update_step = 0 self.critic_training_steps = 0 self.log_stats_t = -self.args.learner_log_interval - 1 self.target_mac = copy.deepcopy(mac) self.params = list(self.mac.parameters()) if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) if self.args.optim == 'adam': self.optimiser = Adam(params=self.params, lr=args.lr) else: self.optimiser = RMSprop(params=self.params, lr=args.lr)
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) elif args.mixer == "point_like": self.mixer = PointLikeMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) if self.args.communicating: self.params += list(self.mac.communication.parameters()) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1 th.autograd.set_detect_anomaly(True)
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) self.params += list(self.mac.msg_rnn.parameters()) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1 self.loss_weight = [0.5, 1, 1.5] # its the beta in the Algorithm 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac # 控制器 self.logger = logger self.last_target_update_episode = 0 self.device = th.device('cuda' if args.use_cuda else 'cpu') self.params = list(mac.parameters()) if args.mixer == "qatten": self.mixer = QattenMixer(args) elif args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = Mixer(args) else: raise Exception("mixer error") self.target_mixer = copy.deepcopy(self.mixer) self.params += list(self.mixer.parameters()) print('Mixer的参数量为: ', end='') print(get_parameters_num(self.mixer.parameters())) if self.args.optimizer == 'adam': self.optimiser = Adam(params=self.params, lr=args.lr) else: self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # 深度复制有点浪费(例如重复动作选择器),但对任何MAC都应该有效。 # self.target_mac = copy.deepcopy(mac) # 设置日志打印间隔 self.log_stats_t = -self.args.learner_log_interval - 1 self.train_t = 0
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) self.rep_paras = mac.rep_parameters() #for param in self.rep_paras: # param.requires_grad = False # self.params.append(param) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.device = th.device('cuda' if args.use_cuda else 'cpu') self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) if self.args.optimizer == 'adam': self.optimiser = Adam(params=self.params, lr=args.lr) else: self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1 self.train_t = 0
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.regularization_const = self.args.normalization_const self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) self.params += list(self.mac.env_blender.parameters()) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) elif args.mixer == "flex_qmix": assert args.entity_scheme, "FlexQMixer only available with entity scheme" self.mixer = FlexQMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps, weight_decay=args.weight_decay) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) elif args.mixer == "graphmix": self.mixer = GraphMixer(args) else: raise ValueError(f"Mixer {args.mixer} not recognized.") self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) #self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) self.optimizer = Adam(params=self.params, lr=args.lr) self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.act_params, self.comm_params, self.freq_params = mac.parameters() if args.comm_type != "no": self.act_params += self.comm_params self.last_target_update_episode = 0 self.mixer = None self.freq_mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.act_params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) if args.freq_mixer is not None and args.comm_type == "normal": if args.freq_mixer == "vdn": self.freq_mixer = VDNMixer() elif args.freq_mixer == "qmix": self.freq_mixer = QMixer(args) else: raise ValueError("Freq Mixer {} not recognised.".format(args.freq_mixer)) self.freq_params += list(self.freq_mixer.parameters()) self.target_freq_mixer = copy.deepcopy(self.freq_mixer) self.freq_optimiser = RMSprop(params=self.freq_params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) self.ac_optimiser = RMSprop(params=self.act_params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.n_agents = args.n_agents self.n_actions = args.n_actions self.logger = logger self.mac = mac self.target_mac = copy.deepcopy(self.mac) self.agent_params = list(mac.parameters()) self.critic = FMACCritic(scheme, args) self.target_critic = copy.deepcopy(self.critic) self.critic_params = list(self.critic.parameters()) self.mixer = None if args.mixer is not None and self.args.n_agents > 1: # if just 1 agent do not mix anything if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.critic_params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) print('Mixer Size: ') print(get_parameters_num(self.critic_params)) if getattr(self.args, "optimizer", "rmsprop") == "rmsprop": self.agent_optimiser = RMSprop(params=self.agent_params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) elif getattr(self.args, "optimizer", "rmsprop") == "adam": self.agent_optimiser = Adam(params=self.agent_params, lr=args.lr, eps=getattr(args, "optimizer_epsilon", 10E-8)) else: raise Exception("unknown optimizer {}".format( getattr(self.args, "optimizer", "rmsprop"))) if getattr(self.args, "optimizer", "rmsprop") == "rmsprop": self.critic_optimiser = RMSprop(params=self.critic_params, lr=args.critic_lr, alpha=args.optim_alpha, eps=args.optim_eps) elif getattr(self.args, "optimizer", "rmsprop") == "adam": self.critic_optimiser = Adam(params=self.critic_params, lr=args.critic_lr, eps=getattr(args, "optimizer_epsilon", 10E-8)) else: raise Exception("unknown optimizer {}".format( getattr(self.args, "optimizer", "rmsprop"))) self.log_stats_t = -self.args.learner_log_interval - 1 self.last_target_update_episode = 0
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.scheme = scheme self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: self.mixer_list = [] self.target_mixer_list = [] if args.mixer == "vdn": self.mixer_list.append(VDNMixer()) elif args.mixer == "qmix": self.mixer_list.append(QMixer(args)) elif args.mixer == "hqmix": self.mixer_list.append(HQMixer(args)) elif args.mixer == "hqmix_noabs": self.mixer_list.append(HQMixerFF(args)) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) if args.ensemble_num == 1: self.mixer = self.mixer_list[0] self.params += list(self.mixer.parameters()) self.target_mixer_list.append(copy.deepcopy(self.mixer)) self.target_mixer = self.target_mixer_list[0] self.meta_params = list(self.mac.parameters()) else: raise NotImplementedError if args.optimizer == "RMSprop": self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) self.meta_optimiser = RMSprop(params=self.meta_params, lr=args.meta_lr, alpha=args.optim_alpha, eps=args.optim_eps) elif args.optimizer == "Adam": self.optimiser = Adam(params=self.params, lr=args.lr) self.meta_optimiser = Adam(params=self.meta_params, lr=args.meta_lr) else: raise NotImplementedError # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.n_actions_levin = args.n_actions self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) if not args.SubAVG_Mixer_flag: self.target_mixer = copy.deepcopy(self.mixer) elif args.mixer == "qmix": self.target_mixer_list = [] for i in range(self.args.SubAVG_Mixer_K): self.target_mixer_list.append(copy.deepcopy(self.mixer)) self.levin_iter_target_mixer_update = 0 self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC if not self.args.SubAVG_Agent_flag: self.target_mac = copy.deepcopy(mac) else: self.target_mac_list = [] for i in range(self.args.SubAVG_Agent_K): self.target_mac_list.append(copy.deepcopy(mac)) self.levin_iter_target_update = 0 self.log_stats_t = -self.args.learner_log_interval - 1 # ====== levin ===== self.number = 0
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.mac_params = list(mac.parameters()) self.params = list(self.mac.parameters()) self.last_target_update_episode = 0 self.mixer = None assert args.mixer is not None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.mixer_params = list(self.mixer.parameters()) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) # Central Q # TODO: Clean this mess up! self.central_mac = None assert self.args.central_mixer == "ff" self.central_mixer = QMixerCentralFF(args) assert args.central_mac == "basic_central_mac" self.central_mac = mac_REGISTRY[args.central_mac]( scheme, args ) # Groups aren't used in the CentralBasicController. Little hacky self.target_central_mac = copy.deepcopy(self.central_mac) self.params += list(self.central_mac.parameters()) self.params += list(self.central_mixer.parameters()) self.target_central_mixer = copy.deepcopy(self.central_mixer) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.last_target_update_episode = 0 self.device = th.device('cuda' if args.use_cuda else 'cpu') self.params = list(mac.parameters()) if args.mixer == "qatten": self.mixer = QattenMixer(args) elif args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = Mixer(args) else: raise "mixer error" self.target_mixer = copy.deepcopy(self.mixer) self.params += list(self.mixer.parameters()) print('Mixer Size: ') print(get_parameters_num(self.mixer.parameters())) if self.args.optimizer == 'adam': self.optimiser = Adam(params=self.params, lr=args.lr, weight_decay=getattr(args, "weight_decay", 0)) else: self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1 self.train_t = 0 # priority replay self.use_per = getattr(self.args, 'use_per', False) self.return_priority = getattr(self.args, "return_priority", False) if self.use_per: self.priority_max = float('-inf') self.priority_min = float('inf')
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) #added by keegan self.critic = LIIRCritic(scheme, args).to(device) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = NoiseQMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) discrim_input = np.prod(self.args.state_shape) + self.args.n_agents * self.args.n_actions self.n_agents = self.args.n_agents if self.args.rnn_discrim: self.rnn_agg = RNNAggregator(discrim_input, args) self.discrim = Discrim(args.rnn_agg_size, self.args.noise_dim, args) self.params += list(self.discrim.parameters()) self.params += list(self.rnn_agg.parameters()) else: self.discrim = Discrim(discrim_input, self.args.noise_dim, args) self.params += list(self.discrim.parameters()) self.discrim_loss = th.nn.CrossEntropyLoss(reduction="none") self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.n_agents_team1 = args.n_agents_team1 self.n_agents_team2 = args.n_agents_team2 self.mac = mac self.logger = logger self.params = [list(param) for param in mac.parameters()] self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) elif args.mixer == "qmix": self.mixer = QMixer(args) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) elif args.mixer == "qmix_multi": self.mixer = [ QMixerMulti(args, self.n_agents_team1), QMixerMulti(args, self.n_agents_team2) ] for idx, mixer in enumerate(self.mixer): self.params[idx] += list(mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.optimiser = [ RMSprop(params=param, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) for param in self.params ] # a little wasteful to deepcopy (e.g. duplicates action selector), # but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.named_params = dict(mac.named_parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None and self.args.n_agents > 1: # if just 1 agent do not mix anything if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == 'vdn-s': self.mixer = VDNState(args) elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.named_params.update(dict(self.mixer.named_parameters())) self.target_mixer = copy.deepcopy(self.mixer) if getattr(self.args, "optimizer", "rmsprop") == "rmsprop": self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) elif getattr(self.args, "optimizer", "rmsprop") == "adam": self.optimiser = Adam(params=self.params, lr=args.lr, eps=getattr(args, "optimizer_epsilon", 10E-8)) else: raise Exception("unknown optimizer {}".format( getattr(self.args, "optimizer", "rmsprop"))) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1 # Setup intrinsic module self.e_type = args.e_type if args.e_type == "count": self.count_dict = defaultdict(int) self.bin_coef = args.bin_coef elif args.e_type == "rnd": self.rnd_model = RNDModel(args)
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) elif args.mixer == "qmix_plus": self.mixer = QMixerPlus(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) lr = args.initial_lr if args.use_decay else args.lr if args.optimizer == 'rmsprop': self.optimiser = RMSprop(params=self.params, lr=lr, alpha=args.optim_alpha, eps=args.optim_eps, weight_decay=args.regularization) elif args.optimizer == 'adam': self.optimiser = Adam(params=self.params, lr=lr, eps=args.optim_eps, weight_decay=args.regularization) else: raise ValueError("Optimizer {} not recognized".format(args.optimizer)) if args.use_decay: # Decay after reaching episode number (1 episode ~ 50 timesteps) self.scheduler = lr_scheduler.MultiStepLR(self.optimiser, milestones=[4000, 10000, 180000], gamma=args.lr_decay_gamma) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.self_last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) elif args.mixer == "qmix_attnv1": self.mixer = QAttnMixerV1(args, self.mac.input_shape, self.mac.input_alone_shape) elif args.mixer == "qmix_attnv2": self.mixer = QAttnMixerV2(args, self.mac.input_shape, self.mac.input_alone_shape) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.scheme = scheme self.logger = logger self.params_list = [] if type(mac) == list: for mac_single in mac: self.params_list.append(list(mac_single.parameters())) else: self.params_list.append(list(mac.parameters())) self.params = self.params_list[0] self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: self.mixer_list = [] self.target_mixer_list = [] for i in range(args.ensemble_num): if args.mixer == "vdn": self.mixer_list.append(VDNMixer()) elif args.mixer == "qmix": self.mixer_list.append(QMixer(args)) elif args.mixer == "aqmix": self.mixer_list.append(AQMixer(args)) elif args.mixer == "qmix_noabs": self.mixer_list.append(QMixerCentralFF(args)) else: raise ValueError("Mixer {} not recognised.".format( args.mixer)) if args.ensemble_num == 1: self.mixer = self.mixer_list[0] self.params += list(self.mixer.parameters()) self.target_mixer_list.append(copy.deepcopy(self.mixer)) self.target_mixer = self.target_mixer_list[0] else: self.mixer = self.mixer_list for i in range(args.ensemble_num): if args.q_net_ensemble: self.params_list[i] += list( self.mixer_list[i].parameters()) else: self.params += list(self.mixer_list[i].parameters()) self.target_mixer_list.append( copy.deepcopy(self.mixer_list[i])) self.optimiser_list = [] if args.q_net_ensemble: for i in range(args.ensemble_num): self.optimiser_list.append( RMSprop(params=self.params_list[i], lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps)) else: self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.mac_params = list(mac.parameters()) self.params = list(self.mac.parameters()) self.last_target_update_episode = 0 self.mixer = None assert args.mixer is not None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.mixer_params = list(self.mixer.parameters()) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) # Central Q # TODO: Clean this mess up! self.central_mac = None if self.args.central_mixer in ["ff", "atten"]: if self.args.central_loss == 0: self.central_mixer = self.mixer self.central_mac = self.mac self.target_central_mac = self.target_mac else: if self.args.central_mixer == "ff": self.central_mixer = QMixerCentralFF( args ) # Feedforward network that takes state and agent utils as input # elif self.args.central_mixer == "atten": # self.central_mixer = QMixerCentralAtten(args) else: raise Exception("Error with central_mixer") assert args.central_mac == "basic_central_mac" self.central_mac = mac_REGISTRY[args.central_mac]( scheme, args ) # Groups aren't used in the CentralBasicController. Little hacky self.target_central_mac = copy.deepcopy(self.central_mac) self.params += list(self.central_mac.parameters()) else: raise Exception("Error with qCentral") self.params += list(self.central_mixer.parameters()) self.target_central_mixer = copy.deepcopy(self.central_mixer) print('Mixer Size: ') print( get_parameters_num( list(self.mixer.parameters()) + list(self.central_mixer.parameters()))) self.optimiser = Adam(params=self.params, lr=args.lr) self.log_stats_t = -self.args.learner_log_interval - 1 self.grad_norm = 1 self.mixer_norm = 1 self.mixer_norms = deque([1], maxlen=100)