Ejemplo n.º 1
0
def get_args(rest_args):

    base_parser = get_base_args()

    # --- BOTTLENECK ---
    base_parser.add_argument(
        "--use_bottleneck",
        type=boolean_argument,
        default=True,
        help=
        'Whether to use the variational information bottleneck (default: True).'
    )
    base_parser.add_argument(
        "--vib_coef",
        type=float,
        default=1e-4,
        help=
        'VIB coefficient in front of KL divergence term in loss. Operates as a trade-off \
                             parameter between the complexity rate of the representation I(s;z) and the \
                             amount of preserved relevant information I(a;z).')

    args = base_parser.parse_args(rest_args)
    args.cuda = torch.cuda.is_available()

    return args
Ejemplo n.º 2
0
def get_args(rest_args):
    base_parser = get_base_args()

    # --- AUP ---
    base_parser.add_argument(
        "--use_aup",
        type=boolean_argument,
        default=True,
        help='Whether to use the auxiliary utility preservation.')
    base_parser.add_argument("--num_q_aux",
                             type=int,
                             default=1,
                             help='The number of Q_aux functions to use.')
    base_parser.add_argument("--aup_coef_start",
                             type=float,
                             default=0.001,
                             help='Starting coefficient for AUP.')
    base_parser.add_argument(
        "--aup_coef_end",
        type=float,
        default=0.01,
        help='AUP coefficient will be linearly increased over time.')
    base_parser.add_argument(
        "--q_aux_dir",
        type=str,
        default="q_aux_dir/coinrun/",
        help='Directory to load the Q_aux model weights from.')

    args = base_parser.parse_args(rest_args)
    args.cuda = torch.cuda.is_available()

    return args
Ejemplo n.º 3
0
def get_args(rest_args):

    base_parser = get_base_args()

    # --- BOTTLENECK ---
    base_parser.add_argument("--use_bottleneck", type=boolean_argument, default=True,
                             help='Whether to use the variational information bottleneck (default: True).')
    base_parser.add_argument("--vib_coef", type=float, default=1e-4,
                             help='DVIB coefficient in front of KL divergence term in loss. Operates as a trade-off \
                             parameter between the complexity rate of the representation I(s;z) and the \
                             amount of preserved relevant information I(a;z).')

    # --- DISTRIBUTION MATCHING ---
    base_parser.add_argument("--use_dist_matching", type=boolean_argument, default=True,
                             help='Whether to optimise the distribution matching loss.')

    # hyperparameters
    base_parser.add_argument("--dist_matching_loss", type=str, default="kl",
                             help='Which divergence to use for calculating the loss for distribution matching {kl, jsd}.')
    base_parser.add_argument("--dist_matching_coef", type=float, default=1e-3,
                             help='Coefficient in front distribution matching loss term.')

    # splitting train envs and levels
    base_parser.add_argument("--percentage_levels_train", type=float, default=0.8,
                             help='Proportion of the train levels to use for train and the rest is used for validation. Range is [0,1]')
    base_parser.add_argument("--num_val_envs", type=int, default=10,
                             help='Number of environments from --num_processes to use for validation.')

    args = base_parser.parse_args(rest_args)
    args.cuda = torch.cuda.is_available()

    return args
Ejemplo n.º 4
0
def get_args(rest_args):

    base_parser = get_base_args()

    # --- GENERAL ---
    # train parameters
    base_parser.add_argument(
        '--num_frames_r_aux',
        type=int,
        default=1e5,
        help=
        'number of frames to train for training the auxiliary reward function R_aux.'
    )
    base_parser.add_argument(
        '--num_frames_q_aux',
        type=int,
        default=1e6,
        help='number of frames to train for training the Q-function Q_aux.')

    # --- CB-VAE ---
    base_parser.add_argument(
        "--cb_vae_latent_dim",
        type=int,
        default=1,
        help=
        'The size of the latent dimension. We have only implemented the case when latent_dim=1.'
    )
    base_parser.add_argument("--cb_vae_epochs",
                             type=int,
                             default=100,
                             help='Number of epochs to train the CB-VAE.')
    base_parser.add_argument("--cb_vae_batch_size",
                             type=int,
                             default=2048,
                             help='The size of the latent dimension.')
    base_parser.add_argument("--cb_vae_learning_rate",
                             type=float,
                             default=5e-4,
                             help='The learning rate for the ADAM optimiser.')
    base_parser.add_argument(
        "--cb_vae_num_samples",
        type=int,
        default=7,
        help=
        'The number of reconstruction samples to show from the trained CB-VAE.'
    )

    # --- Q_aux ---
    base_parser.add_argument("--q_aux_path",
                             type=str,
                             default="q_aux_dir/coinrun/0.pt",
                             help='Directory to save the Q_aux model weights.')

    args = base_parser.parse_args(rest_args)
    args.cuda = torch.cuda.is_available()

    return args