def setUp(self): self.env = GymEnv('Pendulum-v0') self.env = SkillEnv(self.env, num_skill=4)
Feature extractor of the discriminator. please see the paper 4.4.2 question 7. e.g.: def discrim_f(x): return x f_dim = 9 # dimension of feature space ''' def discrim_f(x): return x[:, 0:2]+x[:, 2:4] f_dim = 2 env = gym.make(args.env_name) env = SkillEnv(env, num_skill=args.num_skill) obs = env.reset() observation_space = env.real_observation_space skill_space = env.skill_space ob_skill_space = env.observation_space action_space = env.action_space ob_dim = ob_skill_space.shape[0] - args.num_skill device_name = 'cpu' if args.cuda < 0 else "cuda:{}".format(args.cuda) device = torch.device(device_name) set_device(device) # policy pol_net = PolNet(ob_skill_space, action_space) pol = GaussianPol(ob_skill_space, action_space, pol_net, data_parallel=args.data_parallel, parallel_dim=0)