gym.make(run.get("defaults", "env")))), width=im_width, height=im_height, grayscale=grayscale) if args.monitor: e = gym.wrappers.Monitor(e, args.monitor) return e env = make_env() env_pool = [env] for i in range(run.getint("defaults", "env_pool_size", fallback=1) - 1): env_pool.append(make_env()) params = env_params.EnvParams.from_env(env) params.load_runfile(run) env_params.register(params) model = Net(params.n_actions, input_shape=(1 if grayscale else 3, im_height, im_width)) if params.cuda_enabled: model.cuda() loss_fn = nn.MSELoss(size_average=False) optimizer = optim.Adam(model.parameters(), lr=run.getfloat("learning", "lr")) action_selector = ActionSelectorEpsilonGreedy(epsilon=run.getfloat( "defaults", "epsilon"), params=params) target_net = agent.TargetNet(model) dqn_agent = agent.DQNAgent(dqn_model=model,
def test_register(self): self.assertIsNone(env_params.get()) params = env_params.EnvParams.from_env(self.env) env_params.register(params) self.assertEqual(params, env_params.get())