Ejemplo n.º 1
0
                    'num_layers': enc_gnn_num_layer,
                    'model_dim': node_input_dim,
                    'num_neurons': num_neurons,
                    'num_relations': num_relations,
                    'num_head': num_attn_head
                })
    else:
        gnn_conf = MultiStepInputGraphNetworkConfig(
            hist_rnn_conf={'input_size': node_input_dim},
            hist_enc_conf={
                'spectral_norm': spectral_norm,
                'num_layers': enc_gnn_num_layer,
                'model_dim': node_input_dim,
                'use_concat': use_concat_input_gnn,
                'num_neurons': num_neurons,
                'num_relations': num_relations
            },
            curr_enc_conf={
                'spectral_norm': spectral_norm,
                'num_layers': enc_gnn_num_layer,
                'model_dim': node_input_dim,
                'use_concat': use_concat_input_gnn,
                'num_neurons': num_neurons,
                'num_relations': num_relations
            })
    qnet_conf.gnn_conf = gnn_conf

    buffer_conf = NstepInputMemoryConfig(memory_conf={
        'use_return': True,
        'N': num_hist_time_steps,
        'gamma': gamma
    })
Ejemplo n.º 2
0
from sc2rl.utils.reward_funcs import great_victor_with_kill_bonus
from sc2rl.utils.state_process_funcs import process_game_state_to_dgl

from sc2rl.rl.agents.MAAC.MultiStepActorCriticAgent import MultiStepActorCriticAgent, MultiStepActorCriticAgentConfig
from sc2rl.rl.brains.MAAC.MultiStepActorCriticBrain import MultiStepActorCriticBrainConfig
from sc2rl.rl.networks.MultiStepInputGraphNetwork import MultiStepInputGraphNetworkConfig

from sc2rl.memory.n_step_memory import NstepInputMemoryConfig
from sc2rl.runners.RunnerManager import RunnerConfig, RunnerManager

if __name__ == "__main__":

    map_name = "training_scenario_1"

    agent_conf = MultiStepActorCriticAgentConfig()
    network_conf = MultiStepInputGraphNetworkConfig()
    brain_conf = MultiStepActorCriticBrainConfig()
    buffer_conf = NstepInputMemoryConfig()
    use_attention = False
    num_runners = 5
    num_samples = 10

    sample_spec = buffer_conf.memory_conf['spec']
    num_hist_steps = buffer_conf.memory_conf['N']

    agent = MultiStepActorCriticAgent(agent_conf,
                                      network_conf,
                                      brain_conf,
                                      buffer_conf,
                                      use_attention=use_attention)
Ejemplo n.º 3
0
    reward_name = 'victory_if_zero_enemy'

    qnet_conf = MultiStepInputQnetConfig(
        multi_step_input_qnet_conf={'exploration_method': 'clustered_random'},
        qnet_actor_conf={
            'spectral_norm': spectral_norm,
            'node_input_dim': node_input_dim
        })
    if use_attention:
        gnn_conf = MultiStepInputNetworkConfig()
    else:
        gnn_conf = MultiStepInputGraphNetworkConfig(
            hist_rnn_conf={'input_size': node_input_dim},
            hist_enc_conf={
                'spectral_norm': spectral_norm,
                'model_dim': node_input_dim
            },
            curr_enc_conf={
                'spectral_norm': spectral_norm,
                'model_dim': node_input_dim
            })

    qnet_conf.gnn_conf = gnn_conf

    buffer_conf = NstepInputMemoryConfig(memory_conf={'use_return': True})
    brain_conf = QmixBrainConfig(brain_conf={'use_double_q': True},
                                 fit_conf={'tau': 0.9})

    sample_spec = buffer_conf.memory_conf['spec']
    num_hist_steps = buffer_conf.memory_conf['N']

    run_device = 'cpu'
Ejemplo n.º 4
0
    use_attention = False
    use_hierarchical_actor = True
    num_runners = 1
    num_samples = 10
    eval_episodes = 20
    reward_name = 'victory_if_zero_enemy'
    exp_name = '[S4] scheduler'

    qnet_conf = MultiStepInputQnetConfig(
        qnet_actor_conf={'spectral_norm': spectral_norm})
    actor_conf = MultiStepInputActorConfig()
    if use_attention:
        gnn_conf = MultiStepInputNetworkConfig()
    else:
        gnn_conf = MultiStepInputGraphNetworkConfig(
            hist_enc_conf={'spectral_norm': spectral_norm},
            curr_enc_conf={'spectral_norm': spectral_norm})

    qnet_conf.gnn_conf = gnn_conf
    actor_conf.gnn_conf = gnn_conf

    buffer_conf = NstepInputMemoryConfig(memory_conf={'use_return': True})
    brain_conf = QmixActorCriticBrainConfig(brain_conf={'use_double_q': True})

    sample_spec = buffer_conf.memory_conf['spec']
    num_hist_steps = buffer_conf.memory_conf['N']

    run_device = 'cpu'
    fit_device = 'cuda' if torch.cuda.is_available() else 'cpu'

    if use_attention:
Ejemplo n.º 5
0
            n_hist_feature=n_hist_feature,
            n_curr_graph=n_curr_graph,
            n_curr_feature=n_curr_feature,
            n_maximum_num_enemy=n_maximum_num_enemy,
            actions=actions,
            rewards=rewards,
            dones=dones)

        return fit_return_dict


if __name__ == "__main__":
    from sc2rl.rl.networks.RelationalGraphNetwork import RelationalGraphNetworkConfig
    from sc2rl.rl.networks.FeedForward import FeedForwardConfig
    from sc2rl.memory.n_step_memory import NstepInputMemoryConfig
    from sc2rl.rl.networks.MultiStepInputGraphNetwork import MultiStepInputGraphNetworkConfig

    conf = QmixAgentConf()
    actor_conf = ActorModuleConfig()

    qnet_conf = MultiStepInputQnetConfig()
    qnet_conf.gnn_conf = MultiStepInputGraphNetworkConfig()

    mixer_gnn_conf = RelationalGraphNetworkConfig()
    mixer_ff_conf = FeedForwardConfig()
    brain_conf = QmixActorCriticBrainConfig()
    buffer_conf = NstepInputMemoryConfig()

    QmixAgent(conf, actor_conf, qnet_conf, mixer_gnn_conf, mixer_ff_conf,
              brain_conf, buffer_conf)