trajectories = TrajectoryStore() ewm_tracker = ExponentialMovingAverageTracker(alpha=0.025) episode_length = 0 num_episodes = 0 num_steps = 0 if args.save_logs: repo = git.Repo(search_parent_directories=True) sha = repo.head.object.hexsha comment = f'Git commit: {sha}\n' comment += f'Args: {json.dumps(args.__dict__)}\n' comment += 'Prettier args:\n' comment += pformat(args.__dict__) logger = CSVLogger(filename=f'{PATH}/logs/{save_file}.csv', header_comment=comment) if args.save_video: os.makedirs(PATH + f'/videos/{save_file}', exist_ok=True) recorder = VideoRecorder(env, path=PATH + f'/videos/{save_file}/0.mp4') a2c = A2C(gamma=args.gamma, normalise_returns=args.norm_returns, dtype=dtype, use_gae=args.gae_lambda is not None, gae_lambda=args.gae_lambda) ############################ # Run agent in environment # ############################ t0 = time() hidden_states = {f'agent_{i}': torch.zeros((args.n_envs, 64), device=args.device) for i in range(args.n_agents)} if args.warm_start: # Run all agents for warm_start steps before training
env = SingleSnakeEnvironments1(num_envs=args.num_envs, size=args.size, device=args.device, observation_mode=observation_type, render_args=render_args) else: raise ValueError('Unrecognised environment') trajectories = TrajectoryStore() ewm_tracker = ExponentialMovingAverageTracker(alpha=0.025) episode_length = 0 num_episodes = 0 num_steps = 0 if args.save_logs: logger = CSVLogger(filename=f'{PATH}/logs/{save_file}.csv') if args.save_video: os.makedirs(PATH + f'/videos/{save_file}', exist_ok=True) recorder = VideoRecorder(env, path=PATH + f'/videos/{save_file}/0.mp4') a2c = A2C(model, gamma=args.gamma) ############################ # Run agent in environment # ############################ t0 = time() state = env.reset() for i_step in count(1): if args.render: env.render() sleep(1. / FPS)
optimizer = optim.Adam(model.parameters(), lr=args.lr) eps = np.finfo(np.float32).eps.item() running_length = None running_self_collisions = None running_edge_collisions = None running_reward_rate = None running_entropy = None saved_transitions = [] episode_length = 0 num_episodes = 0 num_steps = 0 logger = CSVLogger(filename=f'{PATH}/logs/{argstring}.csv') t0 = time() state = env.reset() for i_step in count(1): env.render() sleep(1. / FPS) probs, state_value = model(state) m = Categorical(probs) entropy = m.entropy().mean() action = m.sample().clone().long() state, reward, done, info = env.step(action) if args.env == 'snake': env_consistency(env.envs[~done.squeeze(-1)])