Example #1
0
    def get_action(self, hist_graph, curr_graph, tag2unit_dict):

        assert isinstance(
            curr_graph,
            dgl.DGLGraph), "get action is designed to work on a single graph!"
        num_time_steps = hist_graph.batch_size
        hist_node_feature = hist_graph.ndata.pop('node_feature')
        curr_node_feature = curr_graph.ndata.pop('node_feature')
        maximum_num_enemy = get_largest_number_of_enemy_nodes([curr_graph])

        nn_actions, info_dict = self.brain.get_action(
            num_time_steps, hist_graph, hist_node_feature, curr_graph,
            curr_node_feature, maximum_num_enemy)

        ally_tags = info_dict['ally_tags']
        enemy_tags = info_dict['enemy_tags']

        sc2_actions = nn_action_to_sc2_action(nn_actions=nn_actions,
                                              ally_tags=ally_tags,
                                              enemy_tags=enemy_tags,
                                              tag2unit_dict=tag2unit_dict)

        hist_graph.ndata['node_feature'] = hist_node_feature
        curr_graph.ndata['node_feature'] = curr_node_feature
        return nn_actions, sc2_actions, info_dict
Example #2
0
        # print("=========={} th iter ============".format(i))
        cur_state = env.observe()
        # for tag, unit in cur_state['tag2unit_dict'].items():
        #     print("TAG: {} | NAME: {} | Health: {} | POS: {} ".format(tag, unit.name, unit.health, unit.position))

        g = cur_state['g']
        node_feature = g.ndata.pop('node_feature')

        num_enemy = get_largest_number_of_enemy_nodes([g])
        nn_actions, info_dict = ac_module.get_action(g, node_feature, num_enemy)

        tag2unit_dict = cur_state['tag2unit_dict']
        ally_tags = info_dict['ally_tags']
        enemy_tags = info_dict['enemy_tags']

        sc2_actions = nn_action_to_sc2_action(nn_actions=nn_actions,
                                              ally_tags=ally_tags,
                                              enemy_tags=enemy_tags,
                                              tag2unit_dict=tag2unit_dict)

        loss = ac_module(g, node_feature, num_enemy)

        next_state, reward, done = env.step(action=sc2_actions)

        if done:
            done_cnt += 1
            if done_cnt >= 10:
                break

        i += 1