Python DeterministicSAVfunc.dp_runの例

プログラミング言語: Python

名前空間/パッケージ名: machina.vfuncs

メソッド/関数: dp_run

hotexamples.comのコード掲載数: 5

Python DeterministicSAVfunc.dp_run - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmachina.vfuncs.DeterministicSAVfunc.dp_runの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

DeterministicSAVfunc(26)

state_dict(9)

dp_run(5)

コード例 #1

ファイルを表示

        # on-policyのサンプリング
        print('on-policy')
        on_traj = Traj(traj_device='cpu')
        on_traj.add_epis(epis)
        on_traj = epi_functional.add_next_obs(on_traj)
        on_traj.register_epis()
        off_traj.add_traj(on_traj)  # off-policyに加える

        # episodeとstepのカウント
        total_epi += on_traj.num_epi
        step = on_traj.num_step
        total_step += step
        epoch = step

        if args.data_parallel:
            qf.dp_run = True
            lagged_qf.dp_run = True
            targ_qf1.dp_run = True
            targ_qf2.dp_run = True
        # train
        print('train')
        result_dict = qtopt.train(off_traj,
                                  qf,
                                  lagged_qf,
                                  targ_qf1,
                                  targ_qf2,
                                  optim_qf,
                                  epoch,
                                  args.batch_size,
                                  args.tau,
                                  args.gamma,

コード例 #2

ファイルを表示

        epis = sampler.sample(pol, max_steps=args.max_steps_per_iter)
    with measure('train'):
        agent_traj = Traj()
        agent_traj.add_epis(epis)
        agent_traj = ef.compute_pseudo_rews(agent_traj, discrim)
        agent_traj = ef.compute_vs(agent_traj, vf)
        agent_traj = ef.compute_rets(agent_traj, args.gamma)
        agent_traj = ef.compute_advs(agent_traj, args.gamma, args.lam)
        agent_traj = ef.centerize_advs(agent_traj)
        agent_traj = ef.compute_h_masks(agent_traj)
        agent_traj.register_epis()

        if args.data_parallel:
            pol.dp_run = True
            vf.dp_run = True
            discrim.dp_run = True

        if args.rl_type == 'trpo':
            result_dict = gail.train(
                agent_traj,
                expert_traj,
                pol,
                vf,
                discrim,
                optim_vf,
                optim_discrim,
                rl_type=args.rl_type,
                epoch=args.epoch_per_iter,
                batch_size=args.batch_size
                if not args.rnn else args.rnn_batch_size,
                discrim_batch_size=args.discrim_batch_size,

コード例 #3

ファイルを表示

ファイル: run_ppo_sac.py プロジェクト: takerfume/machina

    with measure('train'):
        on_traj = Traj()
        on_traj.add_epis(epis)

        on_traj = ef.add_next_obs(on_traj)
        on_traj = ef.compute_vs(on_traj, vf)
        on_traj = ef.compute_rets(on_traj, args.gamma)
        on_traj = ef.compute_advs(on_traj, args.gamma, args.lam)
        on_traj = ef.centerize_advs(on_traj)
        on_traj = ef.compute_h_masks(on_traj)
        on_traj.register_epis()

        if args.data_parallel:
            pol.dp_run = True
            vf.dp_run = True
            qf.dp_run = True

        result_dict1 = ppo_clip.train(traj=on_traj,
                                      pol=pol,
                                      vf=vf,
                                      clip_param=args.clip_param,
                                      optim_pol=optim_pol,
                                      optim_vf=optim_vf,
                                      epoch=args.epoch_per_iter,
                                      batch_size=args.batch_size,
                                      max_grad_norm=args.max_grad_norm)

        total_epi += on_traj.num_epi
        step = on_traj.num_step
        total_step += step

コード例 #4

ファイルを表示

ファイル: run_airl.py プロジェクト: yumion/machina

            state_only=True if args.rew_type == 'rew' else False)
        agent_traj = ef.compute_vs(agent_traj, vf)
        agent_traj = ef.compute_rets(agent_traj, args.gamma)
        agent_traj = ef.compute_advs(agent_traj, args.gamma, args.lam)
        agent_traj = ef.centerize_advs(agent_traj)
        agent_traj = ef.compute_h_masks(agent_traj)
        agent_traj.register_epis()

        if args.data_parallel:
            pol.dp_run = True
            vf.dp_run = True
            if args.rew_type == 'rew':
                rewf.dp_run = True
                shaping_vf.dp_run = True
            elif args.rew_type == 'adv':
                advf.dp_run = True

        if args.rl_type == 'trpo':
            result_dict = airl.train(
                agent_traj,
                expert_traj,
                pol,
                vf,
                optim_vf,
                optim_discrim,
                rewf=rewf,
                shaping_vf=shaping_vf,
                advf=advf,
                rl_type=args.rl_type,
                epoch=args.epoch_per_iter,
                batch_size=args.batch_size,

コード例 #5

ファイルを表示

ファイル: run_prioritized_ddpg.py プロジェクト: takerfume/machina

        on_traj = ef.add_next_obs(on_traj)
        max_pri = on_traj.get_max_pri()
        on_traj = ef.set_all_pris(on_traj, max_pri)
        on_traj.register_epis()

        off_traj.add_traj(on_traj)

        total_epi += on_traj.num_epi
        step = on_traj.num_step
        total_step += step

        if args.data_parallel:
            pol.dp_run = True
            targ_pol.dp_run = True
            qf.dp_run = True
            targ_qf.dp_run = True

        result_dict = prioritized_ddpg.train(
            off_traj,
            pol, targ_pol, qf, targ_qf,
            optim_pol, optim_qf, step, args.batch_size,
            args.tau, args.gamma
        )

        if args.data_parallel:
            pol.dp_run = False
            targ_pol.dp_run = False
            qf.dp_run = False
            targ_qf.dp_run = False