Beispiel #1
0
        net=net,
        nb_actions=nb_actions,
        gamma=params.gamma,
        device=device,
    )

    if args.evaluate:
        agent.net.load_state_dict(torch.load(args.evaluate))
        env = make_env(params.env_name, episodic=False)
        evaluate(agent, env, render=args.render)
        exit()

    if args.resume:
        agent.load(args.resume[0])

    memory = ExperienceBuffer(params.memory_capacity, obs_shape,
                              params.frame_stack)

    opt = torch.optim.Adam(net.parameters(), lr=params.learning_rate)
    eps_schedule = EpisilonAnnealer(params.epsilon_start, params.epsilon_end,
                                    params.epsilon_frames)

    save_id = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    path_str = lambda p: p.absolute().as_posix(
    )  # converts a pathlib Path to string

    # Create the run directories
    runs_dir = Path(f"runs")
    root_dir = runs_dir / f'{args.task}-{save_id}'
    chk_dir = root_dir / 'checkpoints'

    runs_dir.mkdir(exist_ok=True)
Beispiel #2
0
                                unroll_steps=params.unroll_steps,
                                device=device)

    if args.evaluate:
        agent.net.load_state_dict(torch.load(args.evaluate))
        env = make_env(params.env_name, episodic=False)
        evaluate(agent, env, render=args.render)
        exit()

    if args.resume:
        agent.load(args.resume[0])

    memory = ExperienceBuffer(params.memory_capacity,
                              obs_shape,
                              params.frame_stack,
                              prioritized=True,
                              unroll_steps=params.unroll_steps,
                              beta_start=params.beta_start,
                              beta_end=params.beta_end,
                              beta_steps=params.beta_frames)

    opt = torch.optim.Adam(net.parameters(), lr=params.learning_rate)

    save_id = str(datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
    path_str = lambda p: p.absolute().as_posix(
    )  # converts a pathlib Path to string

    # Create the run directories
    runs_dir = Path(f"runs")
    root_dir = runs_dir / f'{args.task}-{save_id}'
    chk_dir = root_dir / 'checkpoints'