Ejemplo n.º 1
0
 def f():
     env = gym.make(ENV_NAME + ENV_SUFFIX)
     env = common.wrap_env(env,
                           resize=True,
                           pytorch_layout=True,
                           stack_frames=frames_per_state)
     return env
Ejemplo n.º 2
0
    signal.signal(signal.SIGHUP, handle_sighup)
    signal.signal(signal.SIGINT, handle_sigint)

    args = parse_args()
    print(args)
    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in args.gpu)

    USE_CUDA = t.cuda.is_available()
    device = t.device('cuda' if USE_CUDA else 'cpu')
    if USE_CUDA:
        print(f'Using CUDA device: {t.cuda.get_device_name(device)}')

    frames_per_state = 4
    env = gym.make(ENV_NAME + ENV_SUFFIX)
    env = common.wrap_env(env,
                          resize=True,
                          pytorch_layout=True,
                          stack_frames=frames_per_state)
    model = build_model(frames_per_state).to(device)
    target_model = build_model(frames_per_state).to(
        device) if args.double else model
    opt = t.optim.Adam(model.parameters(), lr=1e-4)
    loss_fn = t.nn.SmoothL1Loss()

    eps, eps_final = 1, 0.01
    eps_steps = 50000
    gamma = 0.99
    replay = common.ReplayCnnBuffer(100000)
    batch_size = 32

    all_rewards = collections.deque(maxlen=250)
    losses = collections.deque(maxlen=10000)