Ejemplo n.º 1
0

trajectories = TrajectoryStore()
ewm_tracker = ExponentialMovingAverageTracker(alpha=0.025)

episode_length = 0
num_episodes = 0
num_steps = 0
if args.save_logs:
    repo = git.Repo(search_parent_directories=True)
    sha = repo.head.object.hexsha
    comment = f'Git commit: {sha}\n'
    comment += f'Args: {json.dumps(args.__dict__)}\n'
    comment += 'Prettier args:\n'
    comment += pformat(args.__dict__)
    logger = CSVLogger(filename=f'{PATH}/logs/{save_file}.csv', header_comment=comment)
if args.save_video:
    os.makedirs(PATH + f'/videos/{save_file}', exist_ok=True)
    recorder = VideoRecorder(env, path=PATH + f'/videos/{save_file}/0.mp4')

a2c = A2C(gamma=args.gamma, normalise_returns=args.norm_returns, dtype=dtype,
          use_gae=args.gae_lambda is not None, gae_lambda=args.gae_lambda)


############################
# Run agent in environment #
############################
t0 = time()
hidden_states = {f'agent_{i}': torch.zeros((args.n_envs, 64), device=args.device) for i in range(args.n_agents)}
if args.warm_start:
    # Run all agents for warm_start steps before training
Ejemplo n.º 2
0
    env = SingleSnakeEnvironments1(num_envs=args.num_envs,
                                   size=args.size,
                                   device=args.device,
                                   observation_mode=observation_type,
                                   render_args=render_args)
else:
    raise ValueError('Unrecognised environment')

trajectories = TrajectoryStore()
ewm_tracker = ExponentialMovingAverageTracker(alpha=0.025)

episode_length = 0
num_episodes = 0
num_steps = 0
if args.save_logs:
    logger = CSVLogger(filename=f'{PATH}/logs/{save_file}.csv')
if args.save_video:
    os.makedirs(PATH + f'/videos/{save_file}', exist_ok=True)
    recorder = VideoRecorder(env, path=PATH + f'/videos/{save_file}/0.mp4')

a2c = A2C(model, gamma=args.gamma)

############################
# Run agent in environment #
############################
t0 = time()
state = env.reset()
for i_step in count(1):
    if args.render:
        env.render()
        sleep(1. / FPS)
Ejemplo n.º 3
0
optimizer = optim.Adam(model.parameters(), lr=args.lr)
eps = np.finfo(np.float32).eps.item()

running_length = None
running_self_collisions = None
running_edge_collisions = None
running_reward_rate = None
running_entropy = None

saved_transitions = []

episode_length = 0
num_episodes = 0
num_steps = 0
logger = CSVLogger(filename=f'{PATH}/logs/{argstring}.csv')

t0 = time()
state = env.reset()
for i_step in count(1):
    env.render()
    sleep(1. / FPS)

    probs, state_value = model(state)
    m = Categorical(probs)
    entropy = m.entropy().mean()
    action = m.sample().clone().long()

    state, reward, done, info = env.step(action)
    if args.env == 'snake':
        env_consistency(env.envs[~done.squeeze(-1)])