Exemple #1
0
    def __init__(self, conf, qnet_conf, mixer_gnn_conf, mixer_ff_conf,
                 brain_conf, buffer_conf):
        super(QmixAgent, self).__init__()
        self.conf = conf

        qnet = MultiStepInputQnet(qnet_conf)
        mixer = QMixer(mixer_gnn_conf, mixer_ff_conf)

        if self.conf.agent_conf['use_target']:
            qnet_target = MultiStepInputQnet(qnet_conf)
            mixer_target = QMixer(mixer_gnn_conf, mixer_ff_conf)
        else:
            qnet_target = None
            mixer_target = None

        if self.conf.agent_conf['use_clipped_q']:
            qnet2 = MultiStepInputQnet(qnet_conf)
            mixer2 = QMixer(mixer_gnn_conf, mixer_ff_conf)
        else:
            qnet2 = None
            mixer2 = None

        self.brain = QMixBrain(conf=brain_conf,
                               qnet=qnet,
                               mixer=mixer,
                               qnet_target=qnet_target,
                               mixer_target=mixer_target,
                               qnet2=qnet2,
                               mixer2=mixer2)

        self.buffer = NstepInputMemory(**buffer_conf.memory_conf)
Exemple #2
0
    def __init__(self,
                 conf,
                 qnet_conf,
                 mixer_gnn_conf,
                 mixer_ff_conf,
                 sup_mixer_conf,
                 brain_conf,
                 buffer_conf,
                 soft_assignment=False):
        super(HierarchicalQmixAgent, self).__init__()
        self.conf = conf

        qnet = HierarchicalMultiStepInputQnet(qnet_conf,
                                              mixer_gnn_conf,
                                              mixer_ff_conf,
                                              soft_assignment=soft_assignment)
        mixer = SupQmixer(
            input_dim=qnet_conf.qnet_actor_conf['node_input_dim'],
            conf=sup_mixer_conf)

        if self.conf.agent_conf['use_target']:
            qnet_target = HierarchicalMultiStepInputQnet(
                qnet_conf,
                mixer_gnn_conf,
                mixer_ff_conf,
                soft_assignment=soft_assignment)
            mixer_target = SupQmixer(
                input_dim=qnet_conf.qnet_actor_conf['node_input_dim'],
                conf=sup_mixer_conf)
        else:
            qnet_target = None
            mixer_target = None

        if self.conf.agent_conf['use_clipped_q']:
            qnet2 = HierarchicalMultiStepInputQnet(
                qnet_conf,
                mixer_gnn_conf,
                mixer_ff_conf,
                soft_assignment=soft_assignment)
            mixer2 = SupQmixer(
                input_dim=qnet_conf.qnet_actor_conf['node_input_dim'],
                conf=sup_mixer_conf)
        else:
            qnet2 = None
            mixer2 = None

        self.brain = HierarchicalQmixBrain(conf=brain_conf,
                                           qnet=qnet,
                                           mixer=mixer,
                                           qnet_target=qnet_target,
                                           mixer_target=mixer_target,
                                           qnet2=qnet2,
                                           mixer2=mixer2)

        self.buffer = NstepInputMemory(**buffer_conf.memory_conf)
    def __init__(self,
                 conf,
                 network_conf,
                 brain_conf,
                 buffer_conf,
                 use_attention=True,
                 use_hierarchical_actor=False):
        super(MultiStepActorCriticAgent,
              self).__init__(brain_conf=brain_conf, buffer_conf=buffer_conf)
        self.conf = conf

        actor = MultiStepInputActor(
            network_conf,
            use_attention=use_attention,
            use_hierarchical_actor=use_hierarchical_actor)
        critic = MultiStepInputActor(
            network_conf,
            use_attention=use_attention,
            use_hierarchical_actor=use_hierarchical_actor)

        if self.conf.module_conf['use_target']:
            critic_target = MultiStepInputActor(
                network_conf,
                use_attention=use_attention,
                use_hierarchical_actor=use_hierarchical_actor)
        else:
            critic_target = None

        if self.conf.module_conf['use_double_q']:
            critic2 = MultiStepInputActor(
                network_conf,
                use_attention=use_attention,
                use_hierarchical_actor=use_hierarchical_actor)
            critic2_target = MultiStepInputActor(
                network_conf,
                use_attention=use_attention,
                use_hierarchical_actor=use_hierarchical_actor)
        else:
            critic2 = None
            critic2_target = None

        self.brain = MultiStepActorCriticBrain(actor=actor,
                                               critic=critic,
                                               conf=brain_conf,
                                               critic_target=critic_target,
                                               critic2=critic2,
                                               critic2_target=critic2_target)

        self.buffer = NstepInputMemory(**buffer_conf.memory_conf)
    def __init__(self,
                 conf,
                 network_conf,
                 brain_conf,
                 buffer_conf,
                 use_attention=True,
                 use_hierarchical_actor=False):
        super(MultiStepActorCriticAgent,
              self).__init__(brain_conf=brain_conf, buffer_conf=buffer_conf)
        self.conf = conf

        actor = MultiStepInputActor(
            network_conf,
            use_attention=use_attention,
            use_hierarchical_actor=use_hierarchical_actor)

        self.brain = MultiStepPolicyGradientBrain(actor=actor, conf=brain_conf)

        self.buffer = NstepInputMemory(**buffer_conf.memory_conf)
Exemple #5
0
class QmixAgent(torch.nn.Module):
    def __init__(self, conf, qnet_conf, mixer_gnn_conf, mixer_ff_conf,
                 brain_conf, buffer_conf):
        super(QmixAgent, self).__init__()
        self.conf = conf

        qnet = MultiStepInputQnet(qnet_conf)
        mixer = QMixer(mixer_gnn_conf, mixer_ff_conf)

        if self.conf.agent_conf['use_target']:
            qnet_target = MultiStepInputQnet(qnet_conf)
            mixer_target = QMixer(mixer_gnn_conf, mixer_ff_conf)
        else:
            qnet_target = None
            mixer_target = None

        if self.conf.agent_conf['use_clipped_q']:
            qnet2 = MultiStepInputQnet(qnet_conf)
            mixer2 = QMixer(mixer_gnn_conf, mixer_ff_conf)
        else:
            qnet2 = None
            mixer2 = None

        self.brain = QMixBrain(conf=brain_conf,
                               qnet=qnet,
                               mixer=mixer,
                               qnet_target=qnet_target,
                               mixer_target=mixer_target,
                               qnet2=qnet2,
                               mixer2=mixer2)

        self.buffer = NstepInputMemory(**buffer_conf.memory_conf)

    def get_action(self, hist_graph, curr_graph, tag2unit_dict):

        assert isinstance(
            curr_graph,
            dgl.DGLGraph), "get action is designed to work on a single graph!"
        num_time_steps = hist_graph.batch_size
        hist_node_feature = hist_graph.ndata.pop('node_feature')
        curr_node_feature = curr_graph.ndata.pop('node_feature')
        maximum_num_enemy = get_largest_number_of_enemy_nodes([curr_graph])

        nn_actions, info_dict = self.brain.get_action(
            num_time_steps, hist_graph, hist_node_feature, curr_graph,
            curr_node_feature, maximum_num_enemy)

        ally_tags = info_dict['ally_tags']
        enemy_tags = info_dict['enemy_tags']

        sc2_actions = nn_action_to_sc2_action(nn_actions=nn_actions,
                                              ally_tags=ally_tags,
                                              enemy_tags=enemy_tags,
                                              tag2unit_dict=tag2unit_dict)

        hist_graph.ndata['node_feature'] = hist_node_feature
        curr_graph.ndata['node_feature'] = curr_node_feature
        return nn_actions, sc2_actions, info_dict

    def sample_noise(self):
        for m in self.modules():
            if isinstance(m, NoisyLinear):
                m.sample_noise()

    def remove_noise(self):
        for m in self.modules():
            if isinstance(m, NoisyLinear):
                m.remove_noise()

    def fit(self, device='cpu'):
        # the prefix 'c' indicates #current# time stamp inputs
        # the prefix 'n' indicates #next# time stamp inputs

        # expected specs:
        # bs = batch_size, nt = hist_num_time_steps
        # 'h_graph' = list of graph lists [[g_(0,0), g_(0,1), ... g_(0,nt)],
        #                                  [g_(1,0), g_(1,1), ..., g_(1,nt)],
        #                                  [g_(2,0), ..., g_(bs, 0), ... g_(bs, nt)]]
        # 'graph' = list of graphs  [g_(0), g_(1), ..., g_(bs)]

        fit_conf = self.conf.fit_conf

        batch_size = fit_conf['batch_size']
        hist_num_time_steps = fit_conf['hist_num_time_steps']

        c_h_graph, c_graph, actions, rewards, n_h_graph, n_graph, dones = self.buffer.sample(
            batch_size)

        c_maximum_num_enemy = get_largest_number_of_enemy_nodes(c_graph)
        n_maximum_num_enemy = get_largest_number_of_enemy_nodes(n_graph)

        # batching graphs
        list_c_h_graph = [g for L in c_h_graph for g in L]
        list_n_h_graph = [g for L in n_h_graph for g in L]

        c_hist_graph = dgl.batch(list_c_h_graph)
        n_hist_graph = dgl.batch(list_n_h_graph)

        c_curr_graph = dgl.batch(c_graph)
        n_curr_graph = dgl.batch(n_graph)

        # casting actions to one torch tensor
        actions = torch.cat(actions).long()

        # prepare rewards
        rewards = torch.Tensor(rewards)

        # preparing dones
        dones = torch.Tensor(dones)

        if device != 'cpu':
            c_hist_graph.to(torch.device('cuda'))
            n_hist_graph.to(torch.device('cuda'))
            c_curr_graph.to(torch.device('cuda'))
            n_curr_graph.to(torch.device('cuda'))
            actions = actions.to(torch.device('cuda'))
            rewards = rewards.to(torch.device('cuda'))
            dones = dones.to(torch.device('cuda'))

        c_hist_feature = c_hist_graph.ndata.pop('node_feature')
        c_curr_feature = c_curr_graph.ndata.pop('node_feature')

        n_hist_feature = n_hist_graph.ndata.pop('node_feature')
        n_curr_feature = n_curr_graph.ndata.pop('node_feature')

        fit_return_dict = self.brain.fit(
            num_time_steps=hist_num_time_steps,
            c_hist_graph=c_hist_graph,
            c_hist_feature=c_hist_feature,
            c_curr_graph=c_curr_graph,
            c_curr_feature=c_curr_feature,
            c_maximum_num_enemy=c_maximum_num_enemy,
            n_hist_graph=n_hist_graph,
            n_hist_feature=n_hist_feature,
            n_curr_graph=n_curr_graph,
            n_curr_feature=n_curr_feature,
            n_maximum_num_enemy=n_maximum_num_enemy,
            actions=actions,
            rewards=rewards,
            dones=dones)

        return fit_return_dict
Exemple #6
0
    brain_hyper_param = get_hyper_param_dict()
    brain = MultiStepSharedActorCriticBrain(actor_critic=actor_critic,
                                            hist_encoder=hist_encoder,
                                            curr_encoder=curr_encoder,
                                            hyper_params=brain_hyper_param)

    sample_spec = namedtuple(
        'exp_args', ["state", "action", "reward", "next_state", "done"],
        defaults=tuple([list() for _ in range(4)]))

    num_hist_steps = 5

    buffer = NstepInputMemory(N=num_hist_steps,
                              max_n_episodes=100,
                              spec=sample_spec,
                              gamma=1.0,
                              max_traj_len=40)

    agent = MultiStepActorCriticAgent(brain=brain, buffer=buffer)

    init_graph = env.observe()['g']
    history_manager = HistoryManager(n_hist_steps=num_hist_steps,
                                     init_graph=init_graph)

    done_cnt = 0
    iters = 0
    while True:
        # print("Itertation : {} ".format(iters))
        curr_state_dict = env.observe()
        hist_graph = history_manager.get_hist()
class MultiStepActorCriticAgent(AgentBase):
    def __init__(self,
                 conf,
                 network_conf,
                 brain_conf,
                 buffer_conf,
                 use_attention=True,
                 use_hierarchical_actor=False):
        super(MultiStepActorCriticAgent,
              self).__init__(brain_conf=brain_conf, buffer_conf=buffer_conf)
        self.conf = conf

        actor = MultiStepInputActor(
            network_conf,
            use_attention=use_attention,
            use_hierarchical_actor=use_hierarchical_actor)
        critic = MultiStepInputActor(
            network_conf,
            use_attention=use_attention,
            use_hierarchical_actor=use_hierarchical_actor)

        if self.conf.module_conf['use_target']:
            critic_target = MultiStepInputActor(
                network_conf,
                use_attention=use_attention,
                use_hierarchical_actor=use_hierarchical_actor)
        else:
            critic_target = None

        if self.conf.module_conf['use_double_q']:
            critic2 = MultiStepInputActor(
                network_conf,
                use_attention=use_attention,
                use_hierarchical_actor=use_hierarchical_actor)
            critic2_target = MultiStepInputActor(
                network_conf,
                use_attention=use_attention,
                use_hierarchical_actor=use_hierarchical_actor)
        else:
            critic2 = None
            critic2_target = None

        self.brain = MultiStepActorCriticBrain(actor=actor,
                                               critic=critic,
                                               conf=brain_conf,
                                               critic_target=critic_target,
                                               critic2=critic2,
                                               critic2_target=critic2_target)

        self.buffer = NstepInputMemory(**buffer_conf.memory_conf)

    def get_action(self, hist_graph, curr_graph, tag2unit_dict):

        assert isinstance(
            curr_graph,
            dgl.DGLGraph), "get action is designed to work on a single graph!"
        num_time_steps = hist_graph.batch_size
        hist_node_feature = hist_graph.ndata.pop('node_feature')
        curr_node_feature = curr_graph.ndata.pop('node_feature')
        maximum_num_enemy = get_largest_number_of_enemy_nodes([curr_graph])

        nn_actions, info_dict = self.brain.get_action(
            num_time_steps, hist_graph, hist_node_feature, curr_graph,
            curr_node_feature, maximum_num_enemy)

        ally_tags = info_dict['ally_tags']
        enemy_tags = info_dict['enemy_tags']

        sc2_actions = nn_action_to_sc2_action(nn_actions=nn_actions,
                                              ally_tags=ally_tags,
                                              enemy_tags=enemy_tags,
                                              tag2unit_dict=tag2unit_dict)

        hist_graph.ndata['node_feature'] = hist_node_feature
        curr_graph.ndata['node_feature'] = curr_node_feature
        return nn_actions, sc2_actions

    def forward(self, *args, **kwargs):
        return None

    def fit(self, device='cpu'):
        # the prefix 'c' indicates #current# time stamp inputs
        # the prefix 'n' indicates #next# time stamp inputs

        # expected specs:
        # bs = batch_size, nt = hist_num_time_steps
        # 'h_graph' = list of graph lists [[g_(0,0), g_(0,1), ... g_(0,nt)],
        #                                  [g_(1,0), g_(1,1), ..., g_(1,nt)],
        #                                  [g_(2,0), ..., g_(bs, 0), ... g_(bs, nt)]]
        # 'graph' = list of graphs  [g_(0), g_(1), ..., g_(bs)]

        fit_conf = self.conf.fit_conf

        batch_size = fit_conf['batch_size']
        hist_num_time_steps = fit_conf['hist_num_time_steps']

        c_h_graph, c_graph, actions, rewards, n_h_graph, n_graph, dones = self.buffer.sample(
            batch_size)

        c_maximum_num_enemy = get_largest_number_of_enemy_nodes(c_graph)
        n_maximum_num_enemy = get_largest_number_of_enemy_nodes(n_graph)

        # casting actions to one torch tensor
        actions = torch.cat(actions).long()

        # 'c_graph' is now list of graphs
        c_ally_units = [
            len(get_filtered_node_index_by_type(graph, NODE_ALLY))
            for graph in c_graph
        ]
        c_ally_units = torch.Tensor(c_ally_units).long()

        # prepare rewards
        rewards = torch.Tensor(rewards)
        rewards = rewards.repeat_interleave(c_ally_units, dim=0)

        # preparing dones
        dones = torch.Tensor(dones)
        dones = dones.repeat_interleave(c_ally_units, dim=0)

        # batching graphs
        list_c_h_graph = [g for L in c_h_graph for g in L]
        list_n_h_graph = [g for L in n_h_graph for g in L]

        c_hist_graph = dgl.batch(list_c_h_graph)
        n_hist_graph = dgl.batch(list_n_h_graph)

        c_curr_graph = dgl.batch(c_graph)
        n_curr_graph = dgl.batch(n_graph)

        if device != 'cpu':
            c_hist_graph.to(torch.device('cuda'))
            n_hist_graph.to(torch.device('cuda'))
            c_curr_graph.to(torch.device('cuda'))
            n_curr_graph.to(torch.device('cuda'))
            actions = actions.to(torch.device('cuda'))
            rewards = rewards.to(torch.device('cuda'))
            dones = dones.to(torch.device('cuda'))

        c_hist_feature = c_hist_graph.ndata.pop('node_feature')
        c_curr_feature = c_curr_graph.ndata.pop('node_feature')

        n_hist_feature = n_hist_graph.ndata.pop('node_feature')
        n_curr_feature = n_curr_graph.ndata.pop('node_feature')

        fit_return_dict = self.brain.fit(
            num_time_steps=hist_num_time_steps,
            c_hist_graph=c_hist_graph,
            c_hist_feature=c_hist_feature,
            c_curr_graph=c_curr_graph,
            c_curr_feature=c_curr_feature,
            c_maximum_num_enemy=c_maximum_num_enemy,
            n_hist_graph=n_hist_graph,
            n_hist_feature=n_hist_feature,
            n_curr_graph=n_curr_graph,
            n_curr_feature=n_curr_feature,
            n_maximum_num_enemy=n_maximum_num_enemy,
            actions=actions,
            rewards=rewards,
            dones=dones)

        return fit_return_dict