Ejemplos de GaussianPol.state_dict en Python

Lenguaje de programación: Python

Namespace/Package Name: machina.pols

Clase / Tipo: GaussianPol

Método / Función: state_dict

Ejemplos en hotexamples.com: 4

Python GaussianPol.state_dict - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de machina.pols.GaussianPol.state_dict extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

GaussianPol(28)

state_dict(4)

dp_run(3)

load_state_dict(3)

parameters(2)

deterministic_ac_real(1)

reset(1)

to(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: run_bc_ppo.py Proyecto: syundo0730/rl-robo-book-examples

expert_rewards = [np.sum(epi['rews']) for epi in expert_epis]
expert_mean_rew = np.mean(expert_rewards)
logger.log('expert_score={}'.format(expert_mean_rew))
logger.log('expert_num_epi={}'.format(expert_traj.num_epi))

total_epi = 0
total_step = 0
max_rew = -1e6
kl_beta = args.init_kl_beta

if args.pretrain:
    with measure('bc pretrain'):
        for _ in range(args.bc_epoch):
            _ = behavior_clone.train(expert_traj, pol, optim_pol,
                                     args.bc_batch_size)
    torch.save(pol.state_dict(), os.path.join(args.log, 'models',
                                              'pol_bc.pkl'))

while args.max_epis > total_epi:
    with measure('sample'):
        epis = sampler.sample(pol, max_steps=args.max_steps_per_iter)
    with measure('train'):
        traj = Traj()
        traj.add_epis(epis)

        traj = ef.compute_vs(traj, vf)
        traj = ef.compute_rets(traj, args.gamma)
        traj = ef.compute_advs(traj, args.gamma, args.lam)
        traj = ef.centerize_advs(traj)
        traj = ef.compute_h_masks(traj)
        traj.register_epis()

Ejemplo n.º 2

Mostrar archivo

    total_epi += traj.num_epi
    step = traj.num_step
    total_step += step
    rewards = [np.sum(epi['rews']) for epi in epis]
    mean_rew = np.mean(rewards)
    logger.record_results(args.log,
                          result_dict,
                          score_file,
                          total_epi,
                          step,
                          total_step,
                          rewards,
                          plot_title=args.env_name)

    if mean_rew > max_rew:
        torch.save(pol.state_dict(),
                   os.path.join(args.log, 'models', 'pol_max.pkl'))
        torch.save(vf.state_dict(),
                   os.path.join(args.log, 'models', 'vf_max.pkl'))
        torch.save(optim_pol.state_dict(),
                   os.path.join(args.log, 'models', 'optim_pol_max.pkl'))
        torch.save(optim_vf.state_dict(),
                   os.path.join(args.log, 'models', 'optim_vf_max.pkl'))
        max_rew = mean_rew

    torch.save(pol.state_dict(),
               os.path.join(args.log, 'models', 'pol_last.pkl'))
    torch.save(vf.state_dict(), os.path.join(args.log, 'models',
                                             'vf_last.pkl'))
    torch.save(optim_pol.state_dict(),
               os.path.join(args.log, 'models', 'optim_pol_last.pkl'))

Ejemplo n.º 3

Mostrar archivo

Archivo: run_r2d2_sac.py Proyecto: yuishihara/machina

            off_traj,
            pol, qfs, targ_qfs, log_alpha,
            optim_pol, optim_qfs, optim_alpha,
            step//50, args.rnn_batch_size, args.seq_length, args.burn_in_length,
            args.tau, args.gamma, args.sampling, not args.no_reparam
        )

    rewards = [np.sum(epi['rews']) for epi in epis]
    mean_rew = np.mean(rewards)
    logger.record_results(args.log, result_dict, score_file,
                          total_epi, step, total_step,
                          rewards,
                          plot_title=args.env_name)

    if mean_rew > max_rew:
        torch.save(pol.state_dict(), os.path.join(
            args.log, 'models', 'pol_max.pkl'))
        torch.save(qf1.state_dict(), os.path.join(
            args.log, 'models', 'qf1_max.pkl'))
        torch.save(qf2.state_dict(), os.path.join(
            args.log, 'models', 'qf2_max.pkl'))
        torch.save(optim_pol.state_dict(), os.path.join(
            args.log, 'models', 'optim_pol_max.pkl'))
        torch.save(optim_qf1.state_dict(), os.path.join(
            args.log, 'models', 'optim_qf1_max.pkl'))
        torch.save(optim_qf2.state_dict(), os.path.join(
            args.log, 'models', 'optim_qf2_max.pkl'))
        max_rew = mean_rew

    torch.save(pol.state_dict(), os.path.join(
        args.log, 'models', 'pol_last.pkl'))

Ejemplo n.º 4

Mostrar archivo

Archivo: run_diayn.py Proyecto: takerfume/machina

                          score_file,
                          total_epi,
                          step,
                          total_step,
                          rewards,
                          plot_title=args.env_name)

    # save models regular intervals
    steps_as = str(
        int(
            int(total_step / args.steps_per_save_models + 1) *
            args.steps_per_save_models))
    if 'prev_as' in locals():
        if not prev_as == steps_as:
            torch.save(
                pol.state_dict(),
                os.path.join(args.log, 'models', 'pol_' + steps_as + '.pkl'))
            torch.save(
                qf1.state_dict(),
                os.path.join(args.log, 'models', 'qf1_' + steps_as + '.pkl'))
            torch.save(
                qf2.state_dict(),
                os.path.join(args.log, 'models', 'qf2_' + steps_as + '.pkl'))
            torch.save(
                discrim.state_dict(),
                os.path.join(args.log, 'models',
                             'discrim_' + steps_as + '.pkl'))
            torch.save(
                optim_pol.state_dict(),
                os.path.join(args.log, 'models',
                             'optim_pol_' + steps_as + '.pkl'))