wandb.save(os.path.abspath(__file__)) # TRY NOT TO MODIFY: seeding device = torch.device( 'cuda' if torch.cuda.is_available() and args.cuda else 'cpu') env = gym.make(args.gym_id) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic env.seed(args.seed) env.action_space.seed(args.seed) env.observation_space.seed(args.seed) input_shape, preprocess_obs_fn = preprocess_obs_space(env.observation_space, device) output_shape = preprocess_ac_space(env.action_space) # respect the default timelimit if int(args.episode_length): if not isinstance(env, TimeLimit): env = TimeLimit(env, int(args.episode_length)) else: env._max_episode_steps = int(args.episode_length) else: args.episode_length = env._max_episode_steps if isinstance( env, TimeLimit) else 200 if args.capture_video: env = Monitor(env, f'videos/{experiment_name}') assert isinstance(env.action_space, Box), "only continuous action space is supported"
help="the time it takes from start-e to go end-e") args = parser.parse_args() if not args.seed: args.seed = int(time.time()) # TRY NOT TO MODIFY: setup the environment env = gym.make(args.gym_id) env.seed(args.seed) env.action_space.np_random.seed(args.seed) env.observation_space.np_random.seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.backends.cudnn.deterministic = args.torch_deterministic input_shape, preprocess_obs_fn = preprocess_obs_space(env.observation_space) output_shape, preprocess_ac_fn = preprocess_ac_space(env.action_space, stochastic=False) # TODO: initialize agent here: er = ReplayBuffer(args.buffer_size) class QNetwork(nn.Module): def __init__(self): super(QNetwork, self).__init__() self.fc1 = nn.Linear(input_shape, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, output_shape) def forward(self, x): x = preprocess_obs_fn(x) x = F.relu(self.fc1(x))