agent.load_state_dict(target_agent.state_dict()) break ## CRASH AND RESUME LOGIC: if args.prod_mode: if not os.path.exists(f"models/{experiment_name}"): os.makedirs(f"models/{experiment_name}") torch.save(agent.state_dict(), f"{wandb.run.dir}/agent.pt") wandb.save(f"agent.pt") # TRY NOT TO MODIFY: record rewards for plotting purposes writer.add_scalar("charts/learning_rate", optimizer.param_groups[0]['lr'], global_step) writer.add_scalar("charts/update", update, global_step) writer.add_scalar("losses/value_loss", v_loss.item(), global_step) writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step) writer.add_scalar("losses/entropy", entropy.mean().item(), global_step) writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step) if args.kle_stop or args.kle_rollback: writer.add_scalar("debug/pg_stop_iter", i_epoch_pi, global_step) print("SPS:", int(global_step / (time.time() - start_time))) print(stopwatch.format_report(sw.get_last_aggregated_report())) envs.close() writer.close()
try: env = MicroRTSVecEnv(num_envs=1, render_theme=2, ai2s=[microrts_ai.coacAI], map_path="maps/16x16/basesWorkers16x16.xml", reward_weight=np.array( [10.0, 1.0, 1.0, 0.2, 1.0, 4.0])) # env = gym.make('MicrortsDefeatCoacAIShaped-v3').env # env = gym.wrappers.RecordEpisodeStatistics(env) # env.action_space.seed(0) obs = env.reset() env.render() except Exception as e: e.printStackTrace() env.action_space.seed(0) env.reset() for i in range(10000): env.render() action_mask = np.array(env.vec_client.getUnitLocationMasks()).flatten() time.sleep(0.001) action = env.action_space.sample() # optional: selecting only valid units. if len(action_mask.nonzero()[0]) != 0: action[0] = action_mask.nonzero()[0][0] next_obs, reward, done, info = env.step([action]) if done: env.reset() env.close()