def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')
    parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs')
    parser.add_argument('--ctx',
                        type=int,
                        default=4,
                        help='Number of tasks per GPU')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------
    vg = VariantGenerator()

    vg.add('seed', [22, 23, 24])

    # env spec
    vg.add('env', ['WalkerEnvRandomParams'])
    vg.add('log_scale_limit', [0.0])
    vg.add('target_velocity', [None])
    vg.add('path_length_env', [200])

    # Model-based MAML algo spec
    vg.add('n_itr', [500])
    vg.add('fast_lr', [0.001])
    vg.add('outer_lr', [1e-3])
    vg.add('meta_batch_size', [20])  # must be a multiple of num_models
    vg.add('discount', [0.99])
    vg.add('entropy_bonus', [0])
    vg.add('clip_eps', [0.5, 0.7])
    vg.add('target_inner_step', [3e-3, 1e-2, 3e-2])
    vg.add('init_kl_penalty', [1e-10])
    vg.add('adaptive_kl_penalty', [True])
    vg.add('max_epochs', [8])
    vg.add('num_batches', [1])

    vg.add('batch_size_env_samples', [1])
    vg.add('batch_size_dynamics_samples', [50])
    vg.add('initial_random_samples', [5000])
    vg.add('num_maml_steps_per_iter', [5, 15, 25])
    vg.add('retrain_model_when_reward_decreases', [False])
    vg.add('reset_from_env_traj', [False])
    vg.add('trainable_step_size', [False])
    vg.add('num_models', [5])

    # neural network configuration
    vg.add('hidden_nonlinearity_policy', ['tanh'])
    vg.add('hidden_nonlinearity_model', ['relu'])
    vg.add('hidden_sizes_policy', [(32, 32)])
    vg.add('hidden_sizes_model', [(512, 512, 512)])
    vg.add('weight_normalization_model', [True])
    vg.add('reset_policy_std', [False])
    vg.add('reinit_model_cycle', [0])
    vg.add('optimizer_model', ['adam'])
    vg.add('policy', ['MAMLImprovedGaussianMLPPolicy'])
    vg.add('bias_transform', [False])
    vg.add('param_noise_std', [0.0])
    vg.add('dynamic_model_max_epochs', [(500, 500)])

    vg.add('valid_split_ratio', [0.2])
    vg.add('rolling_average_persitency', [0.95])

    # other stuff
    vg.add('exp_prefix', [EXP_PREFIX])

    variants = vg.variants()

    default_dict = dict(exp_prefix=EXP_PREFIX,
                        snapshot_mode="gap",
                        snapshot_gap=5,
                        periodic_sync=True,
                        sync_s3_pkl=True,
                        sync_s3_log=True,
                        python_command="python3",
                        pre_commands=[
                            "yes | pip install tensorflow=='1.6.0'",
                            "pip list",
                            "yes | pip install --upgrade cloudpickle"
                        ],
                        use_cloudpickle=True,
                        variants=variants)

    if args.mode == 'mgpu':
        current_path = os.path.dirname(os.path.abspath(__file__))
        script_path = os.path.join(current_path, 'gpu-mb-mpo-train.py')
        n_gpu = args.n_gpu
        if n_gpu == 0:
            n_gpu = len(os.listdir('/proc/driver/nvidia/gpus'))
        run_multi_gpu(script_path,
                      default_dict,
                      n_gpu=n_gpu,
                      ctx_per_gpu=args.ctx)

    else:
        # ----------------------- AWS conficuration ---------------------------------
        if args.mode == 'ec2':
            info = config.INSTANCE_TYPE_INFO[ec2_instance]
            n_parallel = int(info["vCPU"])
        else:
            n_parallel = 12

        if args.mode == 'ec2':

            config.AWS_INSTANCE_TYPE = ec2_instance
            config.AWS_SPOT_PRICE = str(info["price"])
            subnets = cheapest_subnets(ec2_instance,
                                       num_subnets=NUM_EC2_SUBNETS)
            print(
                "\n" + "**********" * 10 +
                "\nexp_prefix: {}\nvariants: {}".format('PPO', len(variants)))
            print(
                'Running on type {}, with price {}, on the subnets: '.format(
                    config.AWS_INSTANCE_TYPE,
                    config.AWS_SPOT_PRICE,
                ), str(subnets))

        # ----------------------- TRAINING ---------------------------------------
        exp_ids = random.sample(range(1, 1000), len(variants))
        for v, exp_id in zip(variants, exp_ids):
            exp_name = "model_ensemble_maml_train_env_%s_%i_%i_%i_%i_id_%i" % (
                v['env'], v['path_length_env'], v['num_models'],
                v['batch_size_env_samples'], v['seed'], exp_id)
            v = instantiate_class_stings(v)

            if args.mode == 'ec2':
                subnet = random.choice(subnets)
                config.AWS_REGION_NAME = subnet[:-1]
                config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
                    config.AWS_REGION_NAME]
                config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
                    config.AWS_REGION_NAME]
                config.AWS_SECURITY_GROUP_IDS = \
                    config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                        config.AWS_REGION_NAME]

            run_experiment_lite(
                run_train_task,
                exp_prefix=EXP_PREFIX,
                exp_name=exp_name,
                # Number of parallel workers for sampling
                n_parallel=n_parallel,
                snapshot_mode="gap",
                snapshot_gap=5,
                periodic_sync=True,
                sync_s3_pkl=True,
                sync_s3_log=True,
                # Specifies the seed for the experiment. If this is not provided, a random seed
                # will be used
                seed=v["seed"],
                python_command="python3",
                pre_commands=[
                    "yes | pip install tensorflow=='1.6.0'", "pip list",
                    "yes | pip install --upgrade cloudpickle"
                ],
                mode=args.mode,
                use_cloudpickle=True,
                variant=v,
            )
Exemple #2
0
def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')
    parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs')
    parser.add_argument('--ctx',
                        type=int,
                        default=4,
                        help='Number of tasks per GPU')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------

    vg = VariantGenerator()
    vg.add('env', ['HalfCheetahEnvRandParams'
                   ])  # HalfCheetahEnvRandParams #TODO ReacherEnvRandParams
    vg.add('n_itr', [40])
    vg.add('log_scale_limit', [0.0])
    vg.add('step_size', [0.01])
    vg.add('seed', [22, 33, 55])  #TODO set back to [1, 11, 21, 31, 41]
    vg.add('discount', [0.99])
    vg.add('path_length', [100])
    vg.add('batch_size_env_samples', [4000])
    vg.add('batch_size_dynamics_samples', [100000])
    vg.add('initial_random_samples', [None])
    vg.add('dynamic_model_epochs', [(1000, 1000)])  #TODO
    vg.add('num_gradient_steps_per_iter', [30])  #TODO
    vg.add('hidden_nonlinearity_policy', ['tanh'])
    vg.add('hidden_nonlinearity_model', ['relu'])
    vg.add('hidden_sizes_policy', [(32, 32)])
    vg.add('hidden_sizes_model', [(512, 512)])
    vg.add('weight_normalization_model', [False])
    vg.add('retrain_model_when_reward_decreases', [False])
    vg.add('reset_policy_std', [False])
    vg.add('reinit_model_cycle', [0])
    vg.add('num_models', [5])

    vg.add('output_bias_range', [(0, 0.1), (0, 0.5), (0, 1)])
    vg.add('output_noise_std', [0.0, 0.1])
    vg.add('resample_output_bias', [True, False])

    vg.add('exp_prefix', [EXP_PREFIX])

    variants = vg.variants()

    default_dict = dict(exp_prefix=EXP_PREFIX,
                        snapshot_mode="gap",
                        snapshot_gap=10,
                        periodic_sync=True,
                        sync_s3_pkl=True,
                        sync_s3_log=True,
                        python_command="python3",
                        pre_commands=[
                            "yes | pip install tensorflow=='1.6.0'",
                            "pip list",
                            "yes | pip install --upgrade cloudpickle"
                        ],
                        use_cloudpickle=True,
                        variants=variants)

    if args.mode == 'mgpu':
        current_path = os.path.dirname(os.path.abspath(__file__))
        script_path = os.path.join(current_path,
                                   'mgpu_model_ensemble_trpo_train.py')
        n_gpu = args.n_gpu
        if n_gpu == 0:
            n_gpu = len(os.listdir('/proc/driver/nvidia/gpus'))
        run_multi_gpu(script_path,
                      default_dict,
                      n_gpu=n_gpu,
                      ctx_per_gpu=args.ctx)

    else:
        # ----------------------- AWS conficuration ---------------------------------
        if args.mode == 'ec2':
            info = config.INSTANCE_TYPE_INFO[ec2_instance]
            n_parallel = int(info["vCPU"] /
                             2)  # make the default 4 if not using ec2
        else:
            n_parallel = 6

        if args.mode == 'ec2':

            config.AWS_INSTANCE_TYPE = ec2_instance
            config.AWS_SPOT_PRICE = str(info["price"])
            subnets = cheapest_subnets(ec2_instance,
                                       num_subnets=NUM_EC2_SUBNETS)

            print(
                "\n" + "**********" * 10 +
                "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants)))
            print(
                'Running on type {}, with price {}, on the subnets: '.format(
                    config.AWS_INSTANCE_TYPE,
                    config.AWS_SPOT_PRICE,
                ), str(subnets))

        # ----------------------- TRAINING ---------------------------------------
        exp_ids = random.sample(range(1, 1000), len(variants))
        for v, exp_id in zip(variants, exp_ids):
            exp_name = "model_trpo_train_env_%s_%i_%i_%i_%i_id_%i" % (
                v['env'], v['path_length'], v['num_gradient_steps_per_iter'],
                v['batch_size_env_samples'], v['seed'], exp_id)
            v = instantiate_class_stings(v)

            if args.mode == 'ec2':
                subnet = random.choice(subnets)
                config.AWS_REGION_NAME = subnet[:-1]
                config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
                    config.AWS_REGION_NAME]
                config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
                    config.AWS_REGION_NAME]
                config.AWS_SECURITY_GROUP_IDS = \
                    config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                        config.AWS_REGION_NAME]

            run_experiment_lite(
                run_train_task,
                exp_prefix=EXP_PREFIX,
                exp_name=exp_name,
                # Number of parallel workers for sampling
                n_parallel=n_parallel,
                snapshot_mode="gap",
                snapshot_gap=5,
                periodic_sync=True,
                sync_s3_pkl=True,
                sync_s3_log=True,
                # Specifies the seed for the experiment. If this is not provided, a random seed
                # will be used
                seed=v["seed"],
                python_command='python3',
                pre_commands=[
                    "yes | pip install tensorflow=='1.6.0'", "pip list",
                    "yes | pip install --upgrade cloudpickle"
                ],
                mode=args.mode,
                use_cloudpickle=True,
                variant=v,
            )
Exemple #3
0
def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------

    vg = VariantGenerator()
    vg.add('env', ['WalkerEnvRandomParams', 'HopperEnvRandParams'])
    vg.add('n_itr', [301])
    vg.add('fast_lr', [0.001, 0.01, 0.1])
    vg.add('meta_batch_size', [40])
    vg.add('num_grad_updates', [1])
    vg.add('meta_step_size', [0.01])
    vg.add('fast_batch_size', [20])
    vg.add('seed', [1, 11, 21])
    vg.add('discount', [0.99])
    vg.add('path_length', [100])
    vg.add('hidden_nonlinearity', ['tanh'])
    vg.add('hidden_sizes', [(64, 64)])
    vg.add('trainable_step_size', [False])
    vg.add('bias_transform', [False])
    vg.add('policy', ['MAMLGaussianMLPPolicy'])
    vg.add('parallel_sampler', [True])

    variants = vg.variants()

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        subnets = cheapest_subnets(ec2_instance, num_subnets=3)
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        config.AWS_INSTANCE_TYPE = ec2_instance
        config.AWS_SPOT_PRICE = str(info["price"])

        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants)))
        print(
            'Running on type {}, with price {}, on the subnets: '.format(
                config.AWS_INSTANCE_TYPE,
                config.AWS_SPOT_PRICE,
            ), str(subnets))

    if args.mode == 'ec2':
        n_parallel = 1  # for MAML use smaller number of parallel worker since parallelization is also done over the meta batch size
    else:
        n_parallel = 1

    # ----------------------- TRAINING ---------------------------------------
    exp_ids = random.sample(range(1, 1000), len(variants))
    for v, exp_id in zip(variants, exp_ids):
        exp_name = "%s_%s_%i_%.3f_%i_id_%i" % (
            EXP_PREFIX, v['env'], v['hidden_sizes'][0], v['meta_step_size'],
            v['seed'], exp_id)
        v = instantiate_class_stings(v)

        if args.mode == 'ec2':
            # configure instance

            subnet = random.choice(subnets)
            config.AWS_REGION_NAME = subnet[:-1]
            config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
                config.AWS_REGION_NAME]
            config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
                config.AWS_REGION_NAME]
            config.AWS_SECURITY_GROUP_IDS = \
                config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                    config.AWS_REGION_NAME]

        run_experiment_lite(
            run_train_task,
            exp_prefix=EXP_PREFIX,
            exp_name=exp_name,
            # Number of parallel workers for sampling
            n_parallel=n_parallel,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last_gap",
            snapshot_gap=50,
            periodic_sync=True,
            sync_s3_pkl=True,
            sync_s3_log=True,
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            pre_commands=[
                "yes | pip install tensorflow=='1.6.0'",
                "yes | pip install --upgrade cloudpickle"
            ],
            seed=v["seed"],
            python_command="python3",
            mode=args.mode,
            use_cloudpickle=True,
            variant=v,
        )
from sandbox.rocky.tf.algos.trpo import TRPO
from rllab.misc.instrument import run_experiment_lite
from sandbox.ours.envs.mujoco import CassieEnv
from rllab.misc.instrument import VariantGenerator
from rllab import config
from experiments.helpers.ec2_helpers import cheapest_subnets

import tensorflow as tf
import sys
import argparse
import random

EXP_PREFIX = 'cassie-trpo-env-params'

ec2_instance = 'c4.2xlarge'
subnets = cheapest_subnets(ec2_instance, num_subnets=3)


def run_train_task(vv):

    env = TfEnv(
        normalize(
            CassieEnv(fixed_gains=vv['fixed_gains'],
                      stability_cost_coef=vv['stability_cost_coef'],
                      ctrl_cost_coef=vv['ctrl_cost_coef'],
                      alive_bonus=vv['alive_bonus'])))

    policy = GaussianMLPPolicy(name="policy",
                               env_spec=env.spec,
                               hidden_sizes=vv['hidden_sizes'],
                               hidden_nonlinearity=vv['hidden_nonlinearity'])
Exemple #5
0
def run_experiment(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')
    parser.add_argument('--n_gpu', type=int, default=0, help='Number of GPUs')
    parser.add_argument('--ctx',
                        type=int,
                        default=4,
                        help='Number of tasks per GPU')

    args = parser.parse_args(argv[1:])

    # -------------------- Define Variants -----------------------------------

    vg = VariantGenerator()

    vg.add('seed', [22, 33])

    # env spec
    vg.add('env', ['HalfCheetahEnvRandParams'])  # HalfCheetahEnvRandParams
    vg.add('log_scale_limit', [0.0])
    vg.add('path_length', [200, 500])

    # Model-based MAML algo spec
    vg.add('n_itr', [100])
    vg.add('step_size', [0.01])
    vg.add('discount', [0.99])

    vg.add('batch_size_env_samples', [4000])
    vg.add('initial_random_samples', [4000])
    vg.add('num_models', [5, 10])
    vg.add('n_candidates', [1000])
    vg.add('horizon', [10])

    # neural network configuration
    vg.add('hidden_nonlinearity_model', ['relu'])
    vg.add('hidden_sizes_model', [(512, 512)])
    vg.add('dynamic_model_epochs', [(200, 200)])
    vg.add('weight_normalization_model', [True])
    vg.add('reinit_model_cycle', [0])

    vg.add('valid_split_ratio', [0.2])
    vg.add('rolling_average_persitency', [0.99])

    # other stuff
    vg.add('exp_prefix', [EXP_PREFIX])

    variants = vg.variants()

    default_dict = dict(exp_prefix=EXP_PREFIX,
                        snapshot_mode="gap",
                        snapshot_gap=5,
                        periodic_sync=True,
                        sync_s3_pkl=True,
                        sync_s3_log=True,
                        python_command="python3",
                        pre_commands=[
                            "yes | pip install tensorflow=='1.6.0'",
                            "pip list",
                            "yes | pip install --upgrade cloudpickle"
                        ],
                        use_cloudpickle=True,
                        variants=variants)

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        n_parallel = int(info["vCPU"] /
                         2)  # make the default 4 if not using ec2
    else:
        n_parallel = 6

    if args.mode == 'ec2':

        config.AWS_INSTANCE_TYPE = ec2_instance
        config.AWS_SPOT_PRICE = str(info["price"])
        subnets = cheapest_subnets(ec2_instance, num_subnets=NUM_EC2_SUBNETS)

        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO', len(variants)))
        print(
            'Running on type {}, with price {}, on the subnets: '.format(
                config.AWS_INSTANCE_TYPE,
                config.AWS_SPOT_PRICE,
            ), str(subnets))

    # ----------------------- TRAINING ---------------------------------------
    exp_ids = random.sample(range(1, 1000), len(variants))
    for v, exp_id in zip(variants, exp_ids):
        exp_name = "model_based_mpc_train_env_%s_%i_%i_%i_id_%i" % (
            v['env'], v['path_length'], v['batch_size_env_samples'], v['seed'],
            exp_id)
        v = instantiate_class_stings(v)

        if args.mode == 'ec2':
            subnet = random.choice(subnets)
            config.AWS_REGION_NAME = subnet[:-1]
            config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
                config.AWS_REGION_NAME]
            config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
                config.AWS_REGION_NAME]
            config.AWS_SECURITY_GROUP_IDS = \
                config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                    config.AWS_REGION_NAME]

        run_experiment_lite(
            run_train_task,
            exp_prefix=EXP_PREFIX,
            exp_name=exp_name,
            # Number of parallel workers for sampling
            n_parallel=n_parallel,
            snapshot_mode="gap",
            snapshot_gap=5,
            periodic_sync=True,
            sync_s3_pkl=True,
            sync_s3_log=True,
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=v["seed"],
            python_command='python3',
            pre_commands=[
                "yes | pip install tensorflow=='1.6.0'", "pip list",
                "yes | pip install --upgrade cloudpickle"
            ],
            mode=args.mode,
            use_cloudpickle=True,
            variant=v,
        )
def run_evaluation(argv):

    # -------------------- Parse Arguments -----------------------------------
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'exp_prefix_dir',
        type=str,
        help='path to dump dir which contains folders with '
        'the train results i.e. params.pkl and variant.json file')
    parser.add_argument(
        '--mode',
        type=str,
        default='local',
        help='Mode for running the experiments - local: runs on local machine, '
        'ec2: runs on AWS ec2 cluster (requires a proper configuration file)')
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=
        'Number of parallel workers to perform rollouts. 0 => don\'t start any workers'
    )
    parser.add_argument('--num_sampled_envs',
                        type=int,
                        default=5,
                        help='number or environments with samples parameters')

    args = parser.parse_args(argv[1:])

    # ----------------------- EVALUATION ---------------------------------------

    exp_prefix = os.path.basename(args.exp_prefix_dir)
    eval_exp_prefix = exp_prefix + '-eval'
    evaluation_runs = eval.prepare_evaluation_runs(
        args.exp_prefix_dir,
        EXP_PREFIX,
        num_sampled_envs=args.num_sampled_envs)

    # ----------------------- AWS conficuration ---------------------------------
    if args.mode == 'ec2':
        subnets = cheapest_subnets(ec2_instance, num_subnets=3)
        info = config.INSTANCE_TYPE_INFO[ec2_instance]
        config.AWS_INSTANCE_TYPE = ec2_instance
        config.AWS_SPOT_PRICE = str(info["price"])

        print("\n" + "**********" * 10 +
              "\nexp_prefix: {}\nvariants: {}".format('TRPO',
                                                      len(evaluation_runs)))
        print(
            'Running on type {}, with price {}, on the subnets: '.format(
                config.AWS_INSTANCE_TYPE,
                config.AWS_SPOT_PRICE,
            ), str(subnets))

    for eval_exp_name, v in evaluation_runs:

        if args.mode == 'ec2':
            subnet = random.choice(subnets)
            config.AWS_REGION_NAME = subnet[:-1]
            config.AWS_KEY_NAME = config.ALL_REGION_AWS_KEY_NAMES[
                config.AWS_REGION_NAME]
            config.AWS_IMAGE_ID = config.ALL_REGION_AWS_IMAGE_IDS[
                config.AWS_REGION_NAME]
            config.AWS_SECURITY_GROUP_IDS = \
                config.ALL_REGION_AWS_SECURITY_GROUP_IDS[
                    config.AWS_REGION_NAME]

        run_experiment_lite(
            run_eval_task,
            exp_prefix=eval_exp_prefix,
            exp_name=eval_exp_name,
            # Number of parallel workers for sampling
            n_parallel=args.n_parallel,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            seed=v["seed"],
            python_command='python3',
            pre_commands=[
                "yes | pip install --upgrade pip",
                "yes | pip install tensorflow=='1.6.0'",
                "yes | pip install --upgrade cloudpickle"
            ],
            mode=args.mode,
            use_cloudpickle=True,
            periodic_sync=True,
            variant=v,
            # plot=True,
            # terminate_machine=False,
        )