Python train 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: machina.algos.airl

메소드/함수: train

hotexamples.com에서의 예제들: 2

Python train - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 machina.algos.airl.train에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def test_learning(self):
        pol_net = PolNet(self.env.observation_space,
                         self.env.action_space, h1=32, h2=32)
        pol = GaussianPol(self.env.observation_space,
                          self.env.action_space, pol_net)

        vf_net = VNet(self.env.observation_space)
        vf = DeterministicSVfunc(self.env.observation_space, vf_net)

        rewf_net = VNet(self.env.observation_space, h1=32, h2=32)
        rewf = DeterministicSVfunc(self.env.observation_space, rewf_net)
        shaping_vf_net = VNet(self.env.observation_space, h1=32, h2=32)
        shaping_vf = DeterministicSVfunc(
            self.env.observation_space, shaping_vf_net)

        sampler = EpiSampler(self.env, pol, num_parallel=1)

        optim_vf = torch.optim.Adam(vf_net.parameters(), 3e-4)
        optim_discrim = torch.optim.Adam(
            list(rewf_net.parameters()) + list(shaping_vf_net.parameters()), 3e-4)

        with open(os.path.join('data/expert_epis', 'Pendulum-v0_2epis.pkl'), 'rb') as f:
            expert_epis = pickle.load(f)
        expert_traj = Traj()
        expert_traj.add_epis(expert_epis)
        expert_traj = ef.add_next_obs(expert_traj)
        expert_traj.register_epis()

        epis = sampler.sample(pol, max_steps=32)

        agent_traj = Traj()
        agent_traj.add_epis(epis)
        agent_traj = ef.add_next_obs(agent_traj)
        agent_traj = ef.compute_pseudo_rews(
            agent_traj, rew_giver=rewf, state_only=True)
        agent_traj = ef.compute_vs(agent_traj, vf)
        agent_traj = ef.compute_rets(agent_traj, 0.99)
        agent_traj = ef.compute_advs(agent_traj, 0.99, 0.95)
        agent_traj = ef.centerize_advs(agent_traj)
        agent_traj = ef.compute_h_masks(agent_traj)
        agent_traj.register_epis()

        result_dict = airl.train(agent_traj, expert_traj, pol, vf, optim_vf, optim_discrim,
                                 rewf=rewf, shaping_vf=shaping_vf,
                                 rl_type='trpo',
                                 epoch=1,
                                 batch_size=32, discrim_batch_size=32,
                                 discrim_step=1,
                                 pol_ent_beta=1e-3, gamma=0.99)

        del sampler

예제 #2

파일 보기

        agent_traj = ef.compute_rets(agent_traj, args.gamma)
        agent_traj = ef.compute_advs(agent_traj, args.gamma, args.lam)
        agent_traj = ef.centerize_advs(agent_traj)
        agent_traj = ef.compute_h_masks(agent_traj)
        agent_traj.register_epis()

        if args.rl_type == 'trpo':
            result_dict = airl.train(
                agent_traj,
                expert_traj,
                pol,
                vf,
                optim_vf,
                optim_discrim,
                rewf=rewf,
                shaping_vf=shaping_vf,
                advf=advf,
                rl_type=args.rl_type,
                epoch=args.epoch_per_iter,
                batch_size=args.batch_size,
                discrim_batch_size=args.discrim_batch_size,
                discrim_step=args.discrim_step,
                pol_ent_beta=args.pol_ent_beta,
                gamma=args.gamma)
        elif args.rl_type == 'ppo_clip':
            result_dict = airl.train(
                agent_traj,
                expert_traj,
                pol,
                vf,
                optim_vf,