Example #1
0
def get_parser(allow_policy_list=False):
    parser = argparse.ArgumentParser()

    parser.add_argument('--universe',
                        type=str,
                        choices=UNIVERSES,
                        default=DEFAULT_UNIVERSE)
    parser.add_argument('--domain',
                        type=str,
                        choices=AVAILABLE_DOMAINS,
                        default=DEFAULT_DOMAIN)
    parser.add_argument('--task',
                        type=str,
                        choices=AVAILABLE_TASKS,
                        default=DEFAULT_TASK)
    parser.add_argument('--sampler',
                        type=str,
                        choices=AVAILABLE_SAMPLERS,
                        default=DEFAULT_SAMPLER)
    parser.add_argument('--replay_pool',
                        type=str,
                        choices=AVAILABLE_REPLAY_POOLS,
                        default=DEFAULT_REPLAY_POOL)

    parser.add_argument(
        '--checkpoint-replay-pool',
        type=lambda x: bool(strtobool(x)),
        default=None,
        help=("Whether a checkpoint should also saved the replay"
              " pool. If set, takes precedence over"
              " variant['run_params']['checkpoint_replay_pool']."
              " Note that the replay pool is saved (and "
              " constructed) piece by piece so that each"
              " experience is saved only once."))

    parser.add_argument('--algorithm',
                        type=str,
                        choices=AVAILABLE_ALGORITHMS,
                        default=DEFAULT_ALGORITHM)
    if allow_policy_list:
        parser.add_argument('--policy',
                            type=str,
                            nargs='+',
                            choices=('gaussian', ),
                            default='gaussian')
    else:
        parser.add_argument('--policy',
                            type=str,
                            choices=('gaussian', ),
                            default='gaussian')

    parser.add_argument('--exp-name', type=str, default=datetimestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--confirm-remote',
                        type=lambda x: bool(strtobool(x)),
                        nargs='?',
                        const=True,
                        default=True,
                        help="Whether or not to query yes/no on remote run.")

    parser.add_argument('--video-save-frequency',
                        type=int,
                        default=None,
                        help="Save frequency for videos.")

    parser = add_ray_init_args(parser)
    parser = add_ray_tune_args(parser)

    return parser
Example #2
0
def unique_cluster_name(args):
    cluster_name_parts = (datetimestamp(''), str(uuid.uuid4())[:6],
                          args.domain, args.task)
    cluster_name = "-".join(cluster_name_parts).lower()
    return cluster_name
Example #3
0
def launch_experiments_ray(variant_specs,
                           args,
                           local_dir,
                           experiment_fn,
                           scheduler=None):
    import ray
    from ray import tune

    tune.register_trainable('mujoco-runner', experiment_fn)

    resources_per_trial = _normalize_trial_resources(args.resources_per_trial,
                                                     args.trial_cpus,
                                                     args.trial_gpus,
                                                     args.trial_extra_cpus,
                                                     args.trial_extra_gpus)

    if 'local' in args.mode or 'debug' in args.mode:
        resources = args.resources or {}

        if 'debug' in args.mode:
            # Require a debug resource for each trial, so that we never run
            # more than one trial at a time. This makes debugging easier, since
            # the debugger stdout behaves more reasonably with single process.
            # TODO(hartikainen): Change this from 'extra_gpu' to
            # 'debug-resource' once tune supports custom resources.
            # See: https://github.com/ray-project/ray/pull/2979.
            resources['extra_gpu'] = 1
            resources_per_trial['extra_gpu'] = 1

        ray.init(resources=resources, num_cpus=args.cpus, num_gpus=args.gpus)
    else:
        ray.init(redis_address=ray.services.get_node_ip_address() + ':6379')

    datetime_prefix = datetimestamp()
    experiment_id = '-'.join((datetime_prefix, args.exp_name))

    tune.run_experiments(
        {
            "{}-{}".format(experiment_id, i): {
                'run':
                'mujoco-runner',
                'resources_per_trial':
                resources_per_trial,
                'config':
                variant_spec,
                'local_dir':
                local_dir,
                'num_samples':
                args.num_samples,
                'upload_dir':
                args.upload_dir,
                'checkpoint_freq':
                (args.checkpoint_frequency if args.checkpoint_frequency
                 is not None else variant_spec['run_params'].get(
                     'checkpoint_frequency', 0)),
                'checkpoint_at_end':
                (args.checkpoint_at_end if args.checkpoint_at_end
                 is not None else variant_spec['run_params'].get(
                     'checkpoint_at_end', True)),
                'restore':
                args.restore,  # Defaults to None
            }
            for i, variant_spec in enumerate(variant_specs)
        },
        scheduler=scheduler,
    )
Example #4
0
def get_parser(allow_policy_list=False):
    parser = argparse.ArgumentParser()

    parser.add_argument('--universe',
                        type=str,
                        choices=UNIVERSES,
                        default=None)
    parser.add_argument('--domain',
                        type=str,
                        choices=AVAILABLE_DOMAINS,
                        default=None)
    parser.add_argument('--task',
                        type=str,
                        choices=AVAILABLE_TASKS,
                        default=DEFAULT_TASK)
    parser.add_argument('--num-samples', type=int, default=1)

    parser.add_argument('--resources',
                        type=json.loads,
                        default=None,
                        help=("Resources to allocate to ray process. Passed"
                              " to `ray.init`."))
    parser.add_argument('--cpus',
                        type=int,
                        default=None,
                        help=("Cpus to allocate to ray process. Passed"
                              " to `ray.init`."))
    parser.add_argument('--gpus',
                        type=int,
                        default=None,
                        help=("Gpus to allocate to ray process. Passed"
                              " to `ray.init`."))

    parser.add_argument('--resources-per-trial',
                        type=json.loads,
                        default={},
                        help=("Resources to allocate for each trial. Passed"
                              " to `tune.run_experiments`."))
    parser.add_argument('--trial-cpus',
                        type=int,
                        default=multiprocessing.cpu_count(),
                        help=("Resources to allocate for each trial. Passed"
                              " to `tune.run_experiments`."))
    parser.add_argument('--trial-gpus',
                        type=float,
                        default=None,
                        help=("Resources to allocate for each trial. Passed"
                              " to `tune.run_experiments`."))
    parser.add_argument(
        '--trial-extra-cpus',
        type=int,
        default=None,
        help=("Extra CPUs to reserve in case the trials need to"
              " launch additional Ray actors that use CPUs."))
    parser.add_argument(
        '--trial-extra-gpus',
        type=float,
        default=None,
        help=("Extra GPUs to reserve in case the trials need to"
              " launch additional Ray actors that use GPUs."))

    parser.add_argument(
        '--checkpoint-frequency',
        type=int,
        default=None,
        help=("Save the training checkpoint every this many"
              " epochs. If set, takes precedence over"
              " variant['run_params']['checkpoint_frequency']."))
    parser.add_argument('--checkpoint-at-end',
                        type=lambda x: bool(strtobool(x)),
                        default=None,
                        help=("Whether a checkpoint should be saved at the end"
                              " of training. If set, takes precedence over"
                              " variant['run_params']['checkpoint_at_end']."))
    parser.add_argument(
        '--checkpoint-replay-pool',
        type=lambda x: bool(strtobool(x)),
        default=None,
        help=("Whether a checkpoint should also saved the replay"
              " pool. If set, takes precedence over"
              " variant['run_params']['checkpoint_replay_pool']."
              " Note that the replay pool is saved (and "
              " constructed) piece by piece so that each"
              " experience is saved only once."))
    parser.add_argument('--restore',
                        type=str,
                        default=None,
                        help=("Path to checkpoint. Only makes sense to set if"
                              " running 1 trial. Defaults to None."))

    if allow_policy_list:
        parser.add_argument('--policy',
                            type=str,
                            nargs='+',
                            choices=('gaussian', ),
                            default='gaussian')
    else:
        parser.add_argument('--policy',
                            type=str,
                            choices=('gaussian', ),
                            default='gaussian')
    parser.add_argument('--env', type=str, default='gym-swimmer-default')
    parser.add_argument('--exp-name', type=str, default=datetimestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--log-dir', type=str, default=None)
    parser.add_argument('--upload-dir',
                        type=str,
                        default='',
                        help=("Optional URI to sync training results to (e.g."
                              " s3://<bucket> or gs://<bucket>)."))

    parser.add_argument("--confirm-remote",
                        type=lambda x: bool(strtobool(x)),
                        nargs='?',
                        const=True,
                        default=True,
                        help="Whether or not to query yes/no on remote run.")

    return parser
Example #5
0
def get_parser(allow_policy_list=False):
    parser = argparse.ArgumentParser()

    parser.add_argument('--universe',
                        type=str,
                        choices=AVAILABLE_UNIVERSES,
                        default=DEFAULT_UNIVERSE)
    parser.add_argument('--domain', type=str, default=DEFAULT_DOMAIN)
    parser.add_argument('--task', type=str, default=DEFAULT_TASK)
    parser.add_argument('--task-evaluation', type=str, default=DEFAULT_TASK)
    parser.add_argument('--n-epochs', type=int, default=1000)
    parser.add_argument(
        '--checkpoint-replay-pool',
        type=lambda x: bool(strtobool(x)),
        default=None,
        help=("Whether a checkpoint should also saved the replay"
              " pool. If set, takes precedence over"
              " variant['run_params']['checkpoint_replay_pool']."
              " Note that the replay pool is saved (and "
              " constructed) piece by piece so that each"
              " experience is saved only once."))

    parser.add_argument('--algorithm',
                        type=str,
                        choices=AVAILABLE_ALGORITHMS,
                        default=DEFAULT_ALGORITHM)
    if allow_policy_list:
        parser.add_argument('--policy',
                            type=str,
                            nargs='+',
                            choices=('gaussian', ),
                            default='gaussian')
    else:
        parser.add_argument('--policy',
                            type=str,
                            choices=('gaussian', ),
                            default='gaussian')

    parser.add_argument('--exp-name', type=str, default=datetimestamp())
    parser.add_argument('--mode', type=str, default='local')
    parser.add_argument('--confirm-remote',
                        type=lambda x: bool(strtobool(x)),
                        nargs='?',
                        const=True,
                        default=True,
                        help="Whether or not to query yes/no on remote run.")

    parser.add_argument('--video-save-frequency',
                        type=int,
                        default=None,
                        help="Save frequency for videos.")
    parser.add_argument('--save-training-video-frequency',
                        type=int,
                        default=None,
                        help="Save frequency for training videos.")

    from softlearning.preprocessors.utils import PREPROCESSOR_FUNCTIONS
    parser.add_argument('--preprocessor-type',
                        type=str,
                        default=None,
                        help="Preprocessor type for observations.",
                        choices=list(PREPROCESSOR_FUNCTIONS.keys()))
    parser.add_argument('--vision',
                        type=lambda x: bool(strtobool(x)),
                        default=True)

    parser = add_ray_init_args(parser)
    parser = add_ray_tune_args(parser)

    return parser