def __init__(self, envs, args): self.envs = envs self.args = args # start to build the network. if self.args.env_type == 'atari': self.net = cnn_net(envs.action_space.n) elif self.args.env_type == 'mujoco': self.net = mlp_net(envs.observation_space.shape[0], envs.action_space.shape[0], self.args.dist) # define our dpp network self.intrinsic_net = mlp_dpp_net(envs.observation_space.shape[0]) self.old_net = copy.deepcopy(self.net) # if use the cuda... if self.args.cuda: self.net.cuda() self.intrinsic_net.cuda() self.old_net.cuda() # define the optimizer... self.optimizer = optim.Adam(self.net.parameters(), self.args.lr, eps=self.args.eps) self.intrinsic_optimizer = optim.Adam(self.intrinsic_net.parameters(), self.args.lr_in, eps=self.args.eps) # running filter... if self.args.env_type == 'mujoco': num_states = self.envs.observation_space.shape[0] self.running_state = ZFilter((num_states, ), clip=5) # check saving folder.. if not os.path.exists(self.args.save_dir): os.mkdir(self.args.save_dir) # env folder.. self.model_path = os.path.join(self.args.save_dir, self.args.env_name) if not os.path.exists(self.model_path): os.mkdir(self.model_path) # get the observation self.batch_ob_shape = (self.args.num_workers * self.args.nsteps, ) + self.envs.observation_space.shape self.obs = np.zeros( (self.args.num_workers, ) + self.envs.observation_space.shape, dtype=self.envs.observation_space.dtype.name) if self.args.env_type == 'mujoco': self.obs[:] = np.expand_dims(self.running_state(self.envs.reset()), 0) else: self.obs[:] = self.envs.reset() self.dones = [False for _ in range(self.args.num_workers)] # init the state self.state_optims = None # start to create the folder to save the log_data if not os.path.exists(self.args.log_data_dir): os.mkdir(self.args.log_data_dir) self.intrinsic_data_path = '{}/reward_delay_{}'.format( self.args.log_data_dir, self.args.reward_delay_freq) if not os.path.exists(self.intrinsic_data_path): os.mkdir(self.intrinsic_data_path) self.intrinsic_data_path = '{}/seed_{}'.format( self.intrinsic_data_path, self.args.seed) if not os.path.exists(self.intrinsic_data_path): os.mkdir(self.intrinsic_data_path)
if __name__ == '__main__': # get the arguments args = get_args() # create the environment if args.env_type == 'atari': env = make_atari(args.env_name) env = wrap_deepmind(env, frame_stack=True) elif args.env_type == 'mujoco': env = gym.make(args.env_name) # get the model path model_path = args.save_dir + args.env_name + '/model.pt' # create the network if args.env_type == 'atari': network = cnn_net(env.action_space.n) network.load_state_dict( torch.load(model_path, map_location=lambda storage, loc: storage)) filters = None elif args.env_type == 'mujoco': network = mlp_net(env.observation_space.shape[0], env.action_space.shape[0], args.dist) net_models, filters = torch.load( model_path, map_location=lambda storage, loc: storage) # load models network.load_state_dict(net_models) # start to play the demo obs = env.reset() reward_total = 0 # just one episode while True: