sample_spec = buffer_conf.memory_conf['spec']
    num_hist_steps = buffer_conf.memory_conf['N']

    agent = MultiStepActorCriticAgent(agent_conf,
                                      network_conf,
                                      brain_conf,
                                      buffer_conf,
                                      use_attention=use_attention)

    config = RunnerConfig(map_name=map_name, reward_func=great_victor_with_kill_bonus,
                          state_proc_func=process_game_state_to_dgl,
                          agent=agent,
                          n_hist_steps=num_hist_steps)

    runner_manager = RunnerManager(config, num_runners)

    wandb.init(project="sc2rl")
    wandb.config.update({'use_attention': use_attention,
                         'num_runners': num_runners,
                         'num_samples': num_samples})
    wandb.config.update(agent_conf())
    wandb.config.update(network_conf())
    wandb.config.update(brain_conf())
    wandb.config.update(buffer_conf())

    iters = 0
    while iters < 1000000:
        iters += 1
        runner_manager.sample(num_samples)
        runner_manager.transfer_sample()
Exemple #2
0
        raise NotImplementedError(
            "Not supported reward function:{}".format(reward_name))

    game_state_to_dgl = partial(process_game_state_to_dgl,
                                use_absolute_pos=use_absolute_pos,
                                edge_ally_to_enemy=edge_ally_to_enemy)

    config = RunnerConfig(map_name=map_name,
                          reward_func=reward_func,
                          state_proc_func=game_state_to_dgl,
                          agent=agent,
                          n_hist_steps=num_hist_steps,
                          gamma=gamma,
                          realtime=False)

    runner_manager = RunnerManager(config, num_runners)

    wandb.init(project="qmix2", name=exp_name)
    wandb.watch(agent)
    wandb.config.update({
        'use_attention': use_attention,
        'num_runners': num_runners,
        'num_samples': num_samples,
        'use_hierarchical_actor': use_hierarchical_actor,
        'map_name': map_name,
        'reward': reward_name,
        'frame_skip_rate': frame_skip_rate,
        'use_absolute_pos': use_absolute_pos,
        'victory_coeff': victory_coeff
    })
Exemple #3
0
    brain_conf = MultiStepActorCriticBrainConfig()
    buffer_conf = NstepInputMemoryConfig()

    sample_spec = buffer_conf.memory_conf['spec']
    num_hist_steps = buffer_conf.memory_conf['N']

    agent = MultiStepActorCriticAgent(agent_conf, network_conf, brain_conf,
                                      buffer_conf)

    config = RunnerConfig(map_name=map_name,
                          reward_func=great_victor_with_kill_bonus,
                          state_proc_func=process_game_state_to_dgl,
                          agent=agent,
                          n_hist_steps=num_hist_steps)

    runner_manager = RunnerManager(config, 3)

    wandb.init(project="sc2rl")
    wandb.config.update(agent_conf())
    wandb.config.update(network_conf())
    wandb.config.update(brain_conf())
    wandb.config.update(buffer_conf())

    iters = 0
    while iters < 10:
        iters += 1
        runner_manager.sample(10)
        runner_manager.transfer_sample()
        print("fit at {}".format(iters))
        fit_return_dict = agent.fit()
        wandb.log(fit_return_dict, step=iters)