sample_spec = buffer_conf.memory_conf['spec'] num_hist_steps = buffer_conf.memory_conf['N'] agent = MultiStepActorCriticAgent(agent_conf, network_conf, brain_conf, buffer_conf, use_attention=use_attention) config = RunnerConfig(map_name=map_name, reward_func=great_victor_with_kill_bonus, state_proc_func=process_game_state_to_dgl, agent=agent, n_hist_steps=num_hist_steps) runner_manager = RunnerManager(config, num_runners) wandb.init(project="sc2rl") wandb.config.update({'use_attention': use_attention, 'num_runners': num_runners, 'num_samples': num_samples}) wandb.config.update(agent_conf()) wandb.config.update(network_conf()) wandb.config.update(brain_conf()) wandb.config.update(buffer_conf()) iters = 0 while iters < 1000000: iters += 1 runner_manager.sample(num_samples) runner_manager.transfer_sample()
raise NotImplementedError( "Not supported reward function:{}".format(reward_name)) game_state_to_dgl = partial(process_game_state_to_dgl, use_absolute_pos=use_absolute_pos, edge_ally_to_enemy=edge_ally_to_enemy) config = RunnerConfig(map_name=map_name, reward_func=reward_func, state_proc_func=game_state_to_dgl, agent=agent, n_hist_steps=num_hist_steps, gamma=gamma, realtime=False) runner_manager = RunnerManager(config, num_runners) wandb.init(project="qmix2", name=exp_name) wandb.watch(agent) wandb.config.update({ 'use_attention': use_attention, 'num_runners': num_runners, 'num_samples': num_samples, 'use_hierarchical_actor': use_hierarchical_actor, 'map_name': map_name, 'reward': reward_name, 'frame_skip_rate': frame_skip_rate, 'use_absolute_pos': use_absolute_pos, 'victory_coeff': victory_coeff })
brain_conf = MultiStepActorCriticBrainConfig() buffer_conf = NstepInputMemoryConfig() sample_spec = buffer_conf.memory_conf['spec'] num_hist_steps = buffer_conf.memory_conf['N'] agent = MultiStepActorCriticAgent(agent_conf, network_conf, brain_conf, buffer_conf) config = RunnerConfig(map_name=map_name, reward_func=great_victor_with_kill_bonus, state_proc_func=process_game_state_to_dgl, agent=agent, n_hist_steps=num_hist_steps) runner_manager = RunnerManager(config, 3) wandb.init(project="sc2rl") wandb.config.update(agent_conf()) wandb.config.update(network_conf()) wandb.config.update(brain_conf()) wandb.config.update(buffer_conf()) iters = 0 while iters < 10: iters += 1 runner_manager.sample(10) runner_manager.transfer_sample() print("fit at {}".format(iters)) fit_return_dict = agent.fit() wandb.log(fit_return_dict, step=iters)