Ejemplo n.º 1
0
                           gamma=config['cyclic_lr_gamma'])


# Update stats function to include the current learning rate
def stats(policy, train_batch):
    return {
        'policy_entropy': policy.entropy.item(),
        'policy_loss': policy.pi_err.item(),
        'vf_loss': policy.value_err.item(),
        'cur_lr': policy._optimizers[0].param_groups[0]['lr'],
    }


def get_policy_class(config):
    return TunedA2CPolicy


TunedA2CPolicy = A3CTorchPolicy.with_updates(
    name='TunedA2CPolicy',
    get_default_config=lambda: TUNED_A2C_CONFIG,
    loss_fn=actor_critic_loss,
    stats_fn=stats,
    postprocess_fn=add_advantages,
    mixins=[ValueNetworkMixin],
    optimizer_fn=torch_optimizer)

TunedA2CTrainer = A2CTrainer.with_updates(name='TunedA2C',
                                          default_config=TUNED_A2C_CONFIG,
                                          default_policy=TunedA2CPolicy,
                                          get_policy_class=get_policy_class)
Ejemplo n.º 2
0
    return {
        'policy_entropy': policy.entropy.item(),
        'policy_loss': policy.pi_err.item(),
        'manager_loss': policy.manager_loss.item(),
        'manager_vf_loss': policy.manager_value_err.item(),
        'worker_vf_loss': policy.worker_value_err.item(),
        'cur_lr': policy._optimizers[0].param_groups[0]['lr'],
        'fun_intrinsic_reward':
        train_batch['fun_intrinsic_reward'].mean().item()
    }


def get_policy_class(config):
    return FuNPolicy


FuNPolicy = A3CTorchPolicy.with_updates(
    name='FuNPolicy',
    get_default_config=lambda: FUN_CONFIG,
    extra_action_out_fn=model_extra_out,
    postprocess_fn=postprocesses_trajectories,
    loss_fn=actor_critic_loss,
    stats_fn=stats,
    mixins=[ValueNetworkMixin],
    optimizer_fn=torch_optimizer)

FuNTrainer = A2CTrainer.with_updates(name='FuN',
                                     default_config=FUN_CONFIG,
                                     default_policy=FuNPolicy,
                                     get_policy_class=get_policy_class)
Ejemplo n.º 3
0
        'manager_loss': policy.manager_loss.item(),
        'manager_vf_loss': policy.manager_value_err.item(),
        'worker_vf_loss': policy.worker_value_err.item(),
        'cur_lr': policy._optimizers[0].param_groups[0]['lr'],
        'fun_intrinsic_reward':
        train_batch['fun_intrinsic_reward'].mean().item(),
        'icm_loss': policy.icm_loss.item(),
        'exploration_rewards':
        train_batch['exploration_rewards'].mean().item(),
    }


def get_policy_class(config):
    return WherPolicy


WherPolicy = A3CTorchPolicy.with_updates(
    name='WherPolicy',
    get_default_config=lambda: WHER_CONFIG,
    extra_action_out_fn=model_extra_out,
    postprocess_fn=postprocesses_trajectories,
    loss_fn=actor_critic_loss,
    stats_fn=stats,
    mixins=[ValueNetworkMixin],
    optimizer_fn=torch_optimizer)

WherTrainer = A2CTrainer.with_updates(name='Wher',
                                      default_config=WHER_CONFIG,
                                      default_policy=WherPolicy,
                                      get_policy_class=get_policy_class)