def __init__(self, agent_init_params, alg_types, gamma=0.95, tau=0.01, lr=0.01, hidden_dim=64, discrete_action=False, shared_params=False): """ Inputs: agent_init_params (list of dict): List of dicts with parameters to initialize each agent num_in_pol (int): Input dimensions to policy num_out_pol (int): Output dimensions to policy num_in_critic (int): Input dimensions to critic alg_types (list of str): Learning algorithm for each agent (DDPG or MADDPG) gamma (float): Discount factor tau (float): Target update rate lr (float): Learning rate for policy and critic hidden_dim (int): Number of hidden dimensions for networks discrete_action (bool): Whether or not to use discrete action space """ self.nagents = len(alg_types) self.alg_types = alg_types self.shared_params = shared_params if not shared_params: self.agents = [ DDPGAgent(lr=lr, discrete_action=discrete_action, hidden_dim=hidden_dim, **params) for params in agent_init_params ] else: model = DDPGAgent(lr=lr, discrete_action=discrete_action, hidden_dim=hidden_dim, **agent_init_params[0]) self.agents = [model for _ in agent_init_params] self.agent_init_params = agent_init_params self.gamma = gamma self.tau = tau self.lr = lr self.discrete_action = discrete_action self.pol_dev = 'cpu' # device for policies self.critic_dev = 'cpu' # device for critics self.trgt_pol_dev = 'cpu' # device for target policies self.trgt_critic_dev = 'cpu' # device for target critics self.niter = 0
def __init__(self, agent_init_params, alg_types, gamma, tau, lr, lr_fe_coef, lr_critic_coef, grad_clip_value, hidden_dim, use_discrete_action, weight_decay, discrete_exploration_scheme, boltzmann_temperature, feature_extractor, critic_concat_all_obs, logger=None): """ Inputs: agent_init_params (list of dict): List of dicts with parameters to initialize each agent num_in_pol (int): Input dimensions to policy num_out_pol (int): Output dimensions to policy num_in_critic (int): Input dimensions to critic alg_types (list of str): Learning algorithm for each agent (DDPG or MADDPG) gamma (float): Discount factor tau (float): Target update rate lr (float): Learning rate for policy and critic hidden_dim (int): Number of hidden dimensions for networks use_discrete_action (bool): Whether or not to use discrete action space """ super().__init__(agent_init_params=agent_init_params, alg_types=alg_types, gamma=gamma, tau=tau, lr=lr, lr_critic_coef=lr_critic_coef, grad_clip_value=grad_clip_value, use_discrete_action=use_discrete_action) self.agents = [ DDPGAgent(lr=lr, lr_fe_coef=lr_fe_coef, lr_critic_coef=lr_critic_coef, use_discrete_action=use_discrete_action, weight_decay=weight_decay, hidden_dim=hidden_dim, discrete_exploration_scheme=discrete_exploration_scheme, boltzmann_temperature=boltzmann_temperature, feature_extractor=feature_extractor, **params, logger=logger) for params in agent_init_params ] self.feature_extractor = feature_extractor self.critic_concat_all_obs = critic_concat_all_obs self.soft = False
def __init__(self, agent_init_params, alg_types, gamma=0.95, tau=0.01, lr=0.01, hidden_dim=64, discrete_action=False, noisy_sharing=True, noisy_SNR=50, game_id=None, est_ac=False): """ Inputs: agent_init_params (list of dict): List of dicts with parameters to initialize each agent num_in_pol (int): Input dimensions to policy num_out_pol (int): Output dimensions to policy num_in_critic (int): Input dimensions to critic alg_types (list of str): Learning algorithm for each agent (DDPG or MADDPG) gamma (float): Discount factor tau (float): Target update rate lr (float): Learning rate for policy and critic hidden_dim (int): Number of hidden dimensions for networks discrete_action (bool): Whether or not to use discrete action space """ self.nagents = len(alg_types) self.alg_types = alg_types self.agents = [ DDPGAgent(lr=lr, discrete_action=discrete_action, hidden_dim=hidden_dim, **params) for params in agent_init_params ] self.agent_init_params = agent_init_params self.gamma = gamma self.tau = tau self.lr = lr self.discrete_action = discrete_action self.pol_dev = 'cpu' # device for policies self.critic_dev = 'cpu' # device for critics self.trgt_pol_dev = 'cpu' # device for target policies self.trgt_critic_dev = 'cpu' # device for target critics self.niter = 0 # ==========================Adding noise==================== self.noisy_sharing = noisy_sharing self.noisy_SNR = noisy_SNR # In dB # ====================End of Adding noise==================== # =========================Differential Obs======================== self.game_id = game_id self.est_ac = est_ac
def __init__(self, agent_init_params, alg_types, gamma=0.95, tau=0.01, actor_lr=0.01, critic_lr=0.01, discrete_action=False, **kwargs): """ Inputs: agent_init_params (list of dict): List of dicts with parameters to initialize each agent num_in_pol (int): Input dimensions to policy num_out_pol (int): Output dimensions to policy num_in_critic (int): Input dimensions to critic alg_types (list of str): Learning algorithm for each agent (DDPG or MADDPG) gamma (float): Discount factor tau (float): Target update rate actor_lr (float): Learning rate for policy critic_lr (float): Learning rate for critic hidden_dim (int): Number of hidden dimensions for networks discrete_action (bool): Whether or not to use discrete action space """ # print(kwargs.keys()) self.use_cuda = torch.cuda.is_available() and kwargs['use_cuda'] self.nagents = len(alg_types) self.alg_types = alg_types self.agents = [DDPGAgent(actor_lr=actor_lr, critic_lr=critic_lr, discrete_action=discrete_action, **params, **kwargs) for params in agent_init_params] self.agent_init_params = agent_init_params self.gamma = gamma self.tau = tau self.actor_lr = actor_lr self.critic_lr = critic_lr self.discrete_action = discrete_action self.pol_dev = 'cpu' # device for policies self.critic_dev = 'cpu' # device for critics self.trgt_pol_dev = 'cpu' # device for target policies self.trgt_critic_dev = 'cpu' # device for target critics self.niter = 0
def __init__(self, agent_init_params, alg_types, gamma=0.95, tau=0.01, lr=0.01, hidden_dim=64, discrete_action=False, stochastic = False, commonCritic = False, gasil = False, dlr = 0.0003, lambda_disc = 0.5, batch_size_disc = 512, dynamic = False): """ Inputs: agent_init_params (list of dict): List of dicts with parameters to initialize each agent num_in_pol (int): Input dimensions to policy num_out_pol (int): Output dimensions to policy num_in_critic (int): Input dimensions to critic alg_types (list of str): Learning algorithm for each agent (DDPG or MADDPG) gamma (float): Discount factor tau (float): Target update rate lr (float): Learning rate for policy and critic hidden_dim (int): Number of hidden dimensions for networks discrete_action (bool): Whether or not to use discrete action space """ self.nagents = len(alg_types) self.alg_types = alg_types self.agents = [DDPGAgent(lr=lr, discrete_action=discrete_action, hidden_dim=hidden_dim, **params) for params in agent_init_params] for i in self.agents: i.target_policy.requires_grad = False self.agent_init_params = agent_init_params self.gamma = gamma self.tau = tau self.lr = lr self.dlr = dlr self.discrete_action = discrete_action self.pol_dev = 'cpu' # device for policies self.critic_dev = 'cpu' # device for critics self.trgt_pol_dev = 'cpu' # device for target policies self.trgt_critic_dev = 'cpu' # device for target critics self.disc_dev = 'cpu' self.niter = 0 self.stochastic = stochastic self.commonCritic = commonCritic self.gasil = gasil self.lambda_disc = lambda_disc self.batch_size_disc = batch_size_disc self.dynamic = dynamic num_in_critic = self.agent_init_params[0]['num_in_critic'] self.cuda = True if torch.cuda.is_available() else False if self.commonCritic: #num_in_discriminator = self.agent_init_params[0]['num_in_pol'] + self.agent_init_params[0]['num_out_pol'] #This can be changed and looked at self.critic = MLPNetwork(num_in_critic, 1, hidden_dim=hidden_dim, constrain_out=False) self.target_critic = MLPNetwork(num_in_critic, 1, hidden_dim=hidden_dim, constrain_out=False) hard_update(self.target_critic, self.critic) self.critic_optimizer = Adam(self.critic.parameters(), lr=lr) if self.gasil: self.discriminator = MLPNetwork_Disc(num_in_critic, 1, hidden_dim=hidden_dim, norm_in=False, constrain_out=False, discrete_action=False) self.discriminator_optimizer = Adam(self.discriminator.parameters(), lr=dlr)
def __init__(self, agent_init_params, alg_types, gamma, tau, lr, lr_fe_coef, lr_critic_coef, grad_clip_value, hidden_dim, use_discrete_action, weight_decay, discrete_exploration_scheme, boltzmann_temperature, action_spaces, lambdat_1, lambdat_2, feature_extractor, critic_concat_all_obs, logger=None): """ Inputs: agent_init_params (list of dict): List of dicts with parameters to initialize each agent num_in_pol (int): Input dimensions to policy num_out_pol (int): Output dimensions to policy num_in_critic (int): Input dimensions to critic alg_types (list of str): Learning algorithm for each agent (DDPG or MADDPG) gamma (float): Discount factor tau (float): Target update rate lr (float): Learning rate for policy and critic hidden_dim (int): Number of hidden dimensions for networks use_discrete_action (bool): Whether or not to use discrete action space """ super().__init__(agent_init_params=agent_init_params, alg_types=alg_types, gamma=gamma, tau=tau, lr=lr, lr_critic_coef=lr_critic_coef, grad_clip_value=grad_clip_value, use_discrete_action=use_discrete_action) self.lambdat_1 = lambdat_1 self.lambdat_2 = lambdat_2 self.action_spaces = action_spaces assert self.action_spaces.count(self.action_spaces[0]) == len(self.action_spaces), \ "All action spaces must be of equal size for TeamMADDPG." if "TeamMADDPG" in self.alg_types: assert self.alg_types.count(self.alg_types[0]) == len(self.alg_types), \ "If one agent is of type TeamMADDPG, all agents must be (do not support mix atm)." else: assert self.lambdat_1 == 0., f"config.lamda1 should be set to 0 for non-Team agents" assert self.lambdat_2 == 0., f"config.lamda2 should be set to 0 for non-Team agents" self.agents = [ DDPGAgent(id=i, lr=lr, lr_fe_coef=lr_fe_coef, lr_critic_coef=lr_critic_coef, use_discrete_action=use_discrete_action, weight_decay=weight_decay, hidden_dim=hidden_dim, discrete_exploration_scheme=discrete_exploration_scheme, boltzmann_temperature=boltzmann_temperature, feature_extractor=feature_extractor, **params, logger=logger) for i, params in enumerate(agent_init_params) ] self.feature_extractor = feature_extractor self.critic_concat_all_obs = critic_concat_all_obs self.soft = False
def __init__(self, agent_init_params, alg_types, group_types, gamma=0.95, tau=0.01, lr=0.01, hidden_dim=64, device='cuda:0', discrete_action=False, predators_comm=False, predators_comm_size=0, symbolic_comm=False): """ Inputs: agent_init_params (list of dict): List of dicts with parameters to initialize each agent num_in_pol (int): Input dimensions to policy num_out_pol (int): Output dimensions to policy num_in_critic (int): Input dimensions to critic alg_types (list of str): Learning algorithm for each agent (DDPG or MADDPG) gamma (float): Discount factor tau (float): Target update rate lr (float): Learning rate for policy and critic hidden_dim (int): Number of hidden dimensions for networks discrete_action (bool): Whether or not to use discrete action space """ self.predators_comm = predators_comm self.predators_comm_size = predators_comm_size self.symbolic_comm = symbolic_comm self.nagents = len(alg_types) self.alg_types = alg_types self.agents = [] for ag_i, params in enumerate(agent_init_params): if alg_types[ag_i] in ["MADDPG", "DDPG"]: self.agents.append( DDPGAgent(ag_id=ag_i, lr=lr, discrete_action=discrete_action, hidden_dim=hidden_dim, device=device, comm=predators_comm if alg_types[ag_i] is 'MADDPG' else False, comm_size=predators_comm_size, group_type=group_types[ag_i], **params)) elif alg_types[ag_i] == "CONTROLLER": self.agents.append(Prey_Controller(**params)) # for agent in self.agents: # print("An agent of type: ") self.agent_init_params = agent_init_params self.gamma = gamma self.tau = tau self.lr = lr self.discrete_action = discrete_action # self.pol_dev = 'cpu' # device for policies # self.critic_dev = 'cpu' # device for critics # self.trgt_pol_dev = 'cpu' # device for target policies # self.trgt_critic_dev = 'cpu' # device for target critics self.pol_dev = device # device for policies self.critic_dev = device # device for critics self.trgt_pol_dev = device # device for target policies self.trgt_critic_dev = device # device for target critics self.niter = 0