def __init__(self, mac, scheme, logger, args): self.args = args self.n_agents = args.n_agents self.n_actions = args.n_actions self.mac = mac self.logger = logger self.last_target_update_episode = 0 self.critic_training_steps = 0 self.log_stats_t = -self.args.learner_log_interval - 1 self.log_stats_t_agent = -self.args.learner_log_interval - 1 self.critic = LICACritic(scheme, args) self.target_critic = copy.deepcopy(self.critic) self.agent_params = list(self.mac.parameters()) self.critic_params = list(self.critic.parameters()) self.params = self.agent_params + self.critic_params self.agent_optimiser = Adam(params=self.agent_params, lr=args.lr) self.critic_optimiser = Adam(params=self.critic_params, lr=args.critic_lr) self.entropy_coef = args.entropy_coef print('Mixer Size: ') print(get_parameters_num(self.critic.parameters()))
def __init__(self, mac, scheme, logger, args): self.args = args self.n_agents = args.n_agents self.n_actions = args.n_actions self.mac = mac self.logger = logger self.last_target_update_step = 0 self.critic_training_steps = 0 self.log_stats_t = -self.args.learner_log_interval - 1 self.critic = OffPGCritic(scheme, args) self.mixer = QMixer(args) self.target_critic = copy.deepcopy(self.critic) self.target_mixer = copy.deepcopy(self.mixer) self.agent_params = list(mac.parameters()) self.critic_params = list(self.critic.parameters()) self.mixer_params = list(self.mixer.parameters()) self.params = self.agent_params + self.critic_params self.c_params = self.critic_params + self.mixer_params self.agent_optimiser = Adam(params=self.agent_params, lr=args.lr) self.critic_optimiser = Adam(params=self.critic_params, lr=args.lr) self.mixer_optimiser = Adam(params=self.mixer_params, lr=args.lr) print('Mixer Size: ') print(get_parameters_num(list(self.c_params)))
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac # 控制器 self.logger = logger self.last_target_update_episode = 0 self.device = th.device('cuda' if args.use_cuda else 'cpu') self.params = list(mac.parameters()) if args.mixer == "qatten": self.mixer = QattenMixer(args) elif args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = Mixer(args) else: raise Exception("mixer error") self.target_mixer = copy.deepcopy(self.mixer) self.params += list(self.mixer.parameters()) print('Mixer的参数量为: ', end='') print(get_parameters_num(self.mixer.parameters())) if self.args.optimizer == 'adam': self.optimiser = Adam(params=self.params, lr=args.lr) else: self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # 深度复制有点浪费(例如重复动作选择器),但对任何MAC都应该有效。 # self.target_mac = copy.deepcopy(mac) # 设置日志打印间隔 self.log_stats_t = -self.args.learner_log_interval - 1 self.train_t = 0
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.params = list(mac.parameters()) self.last_target_update_episode = 0 self.mixer = None if args.mixer is not None: if args.mixer == "dmaq": self.mixer = DMAQer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) self.optimiser = Adam(params=self.params, lr=args.lr) print('Mixer Size: ') print(get_parameters_num(self.mixer.parameters())) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1 self.n_actions = self.args.n_actions
def __init__(self, mac, scheme, logger, args): self.args = args self.n_agents = args.n_agents self.n_actions = args.n_actions self.logger = logger self.mac = mac self.target_mac = copy.deepcopy(self.mac) self.agent_params = list(mac.parameters()) self.critic = FMACCritic(scheme, args) self.target_critic = copy.deepcopy(self.critic) self.critic_params = list(self.critic.parameters()) self.mixer = None if args.mixer is not None and self.args.n_agents > 1: # if just 1 agent do not mix anything if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.critic_params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) print('Mixer Size: ') print(get_parameters_num(self.critic_params)) if getattr(self.args, "optimizer", "rmsprop") == "rmsprop": self.agent_optimiser = RMSprop(params=self.agent_params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) elif getattr(self.args, "optimizer", "rmsprop") == "adam": self.agent_optimiser = Adam(params=self.agent_params, lr=args.lr, eps=getattr(args, "optimizer_epsilon", 10E-8)) else: raise Exception("unknown optimizer {}".format( getattr(self.args, "optimizer", "rmsprop"))) if getattr(self.args, "optimizer", "rmsprop") == "rmsprop": self.critic_optimiser = RMSprop(params=self.critic_params, lr=args.critic_lr, alpha=args.optim_alpha, eps=args.optim_eps) elif getattr(self.args, "optimizer", "rmsprop") == "adam": self.critic_optimiser = Adam(params=self.critic_params, lr=args.critic_lr, eps=getattr(args, "optimizer_epsilon", 10E-8)) else: raise Exception("unknown optimizer {}".format( getattr(self.args, "optimizer", "rmsprop"))) self.log_stats_t = -self.args.learner_log_interval - 1 self.last_target_update_episode = 0
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.last_target_update_episode = 0 self.device = th.device('cuda' if args.use_cuda else 'cpu') self.params = list(mac.parameters()) if args.mixer == "qatten": self.mixer = QattenMixer(args) elif args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = Mixer(args) else: raise "mixer error" self.target_mixer = copy.deepcopy(self.mixer) self.params += list(self.mixer.parameters()) print('Mixer Size: ') print(get_parameters_num(self.mixer.parameters())) if self.args.optimizer == 'adam': self.optimiser = Adam(params=self.params, lr=args.lr, weight_decay=getattr(args, "weight_decay", 0)) else: self.optimiser = RMSprop(params=self.params, lr=args.lr, alpha=args.optim_alpha, eps=args.optim_eps) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) self.log_stats_t = -self.args.learner_log_interval - 1 self.train_t = 0 # priority replay self.use_per = getattr(self.args, 'use_per', False) self.return_priority = getattr(self.args, "return_priority", False) if self.use_per: self.priority_max = float('-inf') self.priority_min = float('inf')
def __init__(self, mac, scheme, logger, args): self.args = args self.mac = mac self.logger = logger self.mac_params = list(mac.parameters()) self.params = list(self.mac.parameters()) self.last_target_update_episode = 0 self.mixer = None assert args.mixer is not None if args.mixer is not None: if args.mixer == "vdn": self.mixer = VDNMixer() elif args.mixer == "qmix": self.mixer = QMixer(args) else: raise ValueError("Mixer {} not recognised.".format(args.mixer)) self.mixer_params = list(self.mixer.parameters()) self.params += list(self.mixer.parameters()) self.target_mixer = copy.deepcopy(self.mixer) # a little wasteful to deepcopy (e.g. duplicates action selector), but should work for any MAC self.target_mac = copy.deepcopy(mac) # Central Q # TODO: Clean this mess up! self.central_mac = None if self.args.central_mixer in ["ff", "atten"]: if self.args.central_loss == 0: self.central_mixer = self.mixer self.central_mac = self.mac self.target_central_mac = self.target_mac else: if self.args.central_mixer == "ff": self.central_mixer = QMixerCentralFF( args ) # Feedforward network that takes state and agent utils as input # elif self.args.central_mixer == "atten": # self.central_mixer = QMixerCentralAtten(args) else: raise Exception("Error with central_mixer") assert args.central_mac == "basic_central_mac" self.central_mac = mac_REGISTRY[args.central_mac]( scheme, args ) # Groups aren't used in the CentralBasicController. Little hacky self.target_central_mac = copy.deepcopy(self.central_mac) self.params += list(self.central_mac.parameters()) else: raise Exception("Error with qCentral") self.params += list(self.central_mixer.parameters()) self.target_central_mixer = copy.deepcopy(self.central_mixer) print('Mixer Size: ') print( get_parameters_num( list(self.mixer.parameters()) + list(self.central_mixer.parameters()))) self.optimiser = Adam(params=self.params, lr=args.lr) self.log_stats_t = -self.args.learner_log_interval - 1 self.grad_norm = 1 self.mixer_norm = 1 self.mixer_norms = deque([1], maxlen=100)