Beispiel #1
0
 def __call__(self, rest_endpoint, experiment_dir, nni_source_dir, **kwargs):
     print(rest_endpoint)
     exp = Experiment()
     exp.connect_experiment(rest_endpoint)
     print(exp.get_job_statistics())
     print(exp.get_experiment_status())
     print(exp.list_trial_jobs())
Beispiel #2
0
def view_experiment(args):
    exp_id = args.id
    port = args.port
    exp_dir = args.experiment_dir

    config_json = get_stopped_experiment_config_json(exp_id, exp_dir)
    if config_json.get('trainingServicePlatform'):
        legacy_launcher.view_experiment(args)
        exit()

    exp = Experiment._view(exp_id, exp_dir)
    exp.start(port, run_mode=RunMode.Detach)
Beispiel #3
0
def view_experiment(args):
    exp_id = args.id
    port = args.port
    exp_dir = args.experiment_dir

    init_logger_for_command_line()
    logging.getLogger('nni').setLevel(logging.INFO)

    config_json = get_stopped_experiment_config_json(exp_id, exp_dir)
    if config_json.get('trainingServicePlatform'):
        legacy_launcher.view_experiment(args)
        exit()

    exp = Experiment._view(exp_id, exp_dir)
    exp.start(port, run_mode=RunMode.Detach)
Beispiel #4
0
def resume_experiment(args):
    exp_id = args.id
    port = args.port
    debug = args.debug
    foreground = args.foreground
    exp_dir = args.experiment_dir

    config_json = get_stopped_experiment_config_json(exp_id, exp_dir)
    if config_json.get('trainingServicePlatform'):
        legacy_launcher.resume_experiment(args)
        exit()

    exp = Experiment._resume(exp_id, exp_dir)
    run_mode = RunMode.Foreground if foreground else RunMode.Detach
    exp.start(port, debug, run_mode)
Beispiel #5
0
def resume_experiment(args):
    exp_id = args.id
    port = args.port
    debug = args.debug
    foreground = args.foreground
    exp_dir = args.experiment_dir

    init_logger_for_command_line()
    logging.getLogger('nni').setLevel(logging.INFO)

    config_json = get_stopped_experiment_config_json(exp_id, exp_dir)
    if config_json.get('trainingServicePlatform'):
        legacy_launcher.resume_experiment(args)
        exit()

    exp = Experiment._resume(exp_id, exp_dir)
    run_mode = RunMode.Foreground if foreground else RunMode.Detach
    exp.start(port, debug, run_mode)
Beispiel #6
0
# FIXME: For demonstration only. It should not be here

from pathlib import Path

from nni.experiment import Experiment
from nni.algorithms.hpo.hyperopt_tuner import HyperoptTuner

tuner = HyperoptTuner('tpe')

search_space = {
    "dropout_rate": { "_type": "uniform", "_value": [0.5, 0.9] },
    "conv_size": { "_type": "choice", "_value": [2, 3, 5, 7] },
    "hidden_size": { "_type": "choice", "_value": [124, 512, 1024] },
    "batch_size": { "_type": "choice", "_value": [16, 32] },
    "learning_rate": { "_type": "choice", "_value": [0.0001, 0.001, 0.01, 0.1] }
}

experiment = Experiment(tuner, 'local')
experiment.config.experiment_name = 'test'
experiment.config.trial_concurrency = 2
experiment.config.max_trial_number = 5
experiment.config.search_space = search_space
experiment.config.trial_command = 'python3 mnist.py'
experiment.config.trial_code_directory = Path(__file__).parent
experiment.config.training_service.use_active_gpu = True

experiment.run(8081)
Beispiel #7
0
    },
    "hidden_size": {
        "_type": "choice",
        "_value": [124, 512, 1024]
    },
    "batch_size": {
        "_type": "choice",
        "_value": [16, 32]
    },
    "learning_rate": {
        "_type": "choice",
        "_value": [0.0001, 0.001, 0.01, 0.1]
    }
}

experiment = Experiment(['local', 'remote'])
experiment.config.experiment_name = 'test'
experiment.config.trial_concurrency = 3
experiment.config.max_trial_number = 10
experiment.config.search_space = search_space
experiment.config.trial_command = 'python3 mnist.py'
experiment.config.trial_code_directory = Path(__file__).parent
experiment.config.tuner.name = 'TPE'
experiment.config.tuner.class_args['optimize_mode'] = 'maximize'
experiment.config.training_service[0].use_active_gpu = True
experiment.config.training_service[1].reuse_mode = True
rm_conf = RemoteMachineConfig()
rm_conf.host = '10.1.1.1'
rm_conf.user = '******'
rm_conf.password = '******'
rm_conf.port = 22
Beispiel #8
0
# Define search space
search_space = {
    'features': {
        '_type': 'choice',
        '_value': [128, 256, 512, 1024]
    },
    'lr': {
        '_type': 'loguniform',
        '_value': [0.0001, 0.1]
    },
    'momentum': {
        '_type': 'uniform',
        '_value': [0, 1]
    },
}

# Configure experiment
experiment = Experiment('local')
experiment.config.trial_command = 'python model.py'
experiment.config.trial_code_directory = Path(__file__).parent
experiment.config.search_space = search_space
experiment.config.tuner.name = 'Random'
experiment.config.max_trial_number = 10
experiment.config.trial_concurrency = 2

# Run it!
experiment.run(port=8080, wait_completion=False)

print('Experiment is running. Press Ctrl-C to quit.')
signal.pause()
Beispiel #9
0
def create_experiment(args):
    # to make it clear what are inside args
    config_file = Path(args.config)
    port = args.port
    debug = args.debug
    url_prefix = args.url_prefix
    foreground = args.foreground

    # it should finally be done in nnictl main function
    # but for now don't break routines without logging support
    init_logger_for_command_line()
    logging.getLogger('nni').setLevel(logging.INFO)

    if not config_file.is_file():
        _logger.error(f'"{config_file}" is not a valid file.')
        exit(1)

    with config_file.open() as config:
        config_content = yaml.safe_load(config)

    v1_platform = config_content.get('trainingServicePlatform')
    if v1_platform:
        can_convert = True
        if v1_platform == 'adl':
            can_convert = False
        if v1_platform in ['kubeflow', 'frameworkcontroller']:
            reuse = config_content.get(v1_platform + 'Config', {}).get('reuse')
            can_convert = (
                reuse != False
            )  # if user does not explicitly specify it, convert to reuse mode

        if not can_convert:
            legacy_launcher.create_experiment(args)
            exit()

        try:
            v2_config = convert.to_v2(config_content)
        except Exception:
            _logger.error(
                'You are using legacy config format with incorrect fields or values, '
                'to get more accurate error message please update it to the new format.'
            )
            _logger.error(
                'Reference: https://nni.readthedocs.io/en/stable/reference/experiment_config.html'
            )
            exit(1)
        _logger.warning(
            f'You are using legacy config file, please update it to latest format:'
        )
        # use `print` here because logging will add timestamp and make it hard to copy paste
        print(Fore.YELLOW + '=' * 80 + Fore.RESET)
        print(yaml.dump(v2_config, sort_keys=False).strip())
        print(Fore.YELLOW + '=' * 80 + Fore.RESET)
        print(
            Fore.YELLOW +
            'Reference: https://nni.readthedocs.io/en/stable/reference/experiment_config.html'
            + Fore.RESET)

        utils.set_base_path(config_file.parent)
        config = ExperimentConfig(**v2_config)
        utils.unset_base_path()

    else:
        config = ExperimentConfig.load(config_file)

    if config.use_annotation:
        path = Path(tempfile.gettempdir(), getuser(), 'nni', 'annotation')
        path.mkdir(parents=True, exist_ok=True)
        path = tempfile.mkdtemp(dir=path)
        code_dir = expand_annotations(config.trial_code_directory, path)
        config.trial_code_directory = code_dir
        config.search_space = generate_search_space(code_dir)
        assert config.search_space, 'ERROR: Generated search space is empty'
        config.use_annotation = False

    exp = Experiment(config)
    exp.url_prefix = url_prefix
    run_mode = RunMode.Foreground if foreground else RunMode.Detach
    exp.start(port, debug, run_mode)

    _logger.info(
        f'To stop experiment run "nnictl stop {exp.id}" or "nnictl stop --all"'
    )
    _logger.info(
        'Reference: https://nni.readthedocs.io/en/stable/Tutorial/Nnictl.html')
Beispiel #10
0
        '_type': 'uniform',
        '_value': [0, 1]
    },
}

# %%
# Step 3: Configure the experiment
# --------------------------------
# NNI uses an *experiment* to manage the HPO process.
# The *experiment config* defines how to train the models and how to explore the search space.
#
# In this tutorial we use a *local* mode experiment,
# which means models will be trained on local machine, without using any special training platform.
from nni.experiment import Experiment

experiment = Experiment('local')

# %%
# Now we start to configure the experiment.
#
# Configure trial code
# ^^^^^^^^^^^^^^^^^^^^
# In NNI evaluation of each hyperparameter set is called a *trial*.
# So the model script is called *trial code*.
experiment.config.trial_command = 'python model.py'
experiment.config.trial_code_directory = '.'
# %%
# When ``trial_code_directory`` is a relative path, it relates to current working directory.
# To run ``main.py`` in a different path, you can set trial code directory to ``Path(__file__).parent``.
# (`__file__ <https://docs.python.org/3.10/reference/datamodel.html#index-43>`__
# is only available in standard Python, not in Jupyter Notebook.)
Beispiel #11
0
    },
    "beta": {
        "_type": "choice",
        "_value": [0, 1]
    },
    "weight_decay": {
        "_type": "loguniform",
        "_value": [1e-9, 1]
    },
    "gradient_clip_val": {
        "_type": "choice",
        "_value": [0, 0.25]
    }
}

experiment = Experiment(tuner, ['local', 'remote'])
experiment.config.experiment_name = 'awd_lstm'
experiment.config.author_name = 'Igor Quintanilha'
experiment.config.max_trial_number = 100
experiment.config.max_experiment_duration = '60d'

experiment.config.nni_manager_ip = '10.221.90.21'
experiment.config.search_space = search_space

experiment.config.trial_prepare_command = 'source /home/igor.quintanilha/miniconda3/bin/activate dsc'
experiment.config.trial_command = 'python main.py --gpus 1 data/brtd --vocab data/brtd/b3922f0904f4f1b7b258a9488132f2e6480cf936493be53f74fd7aaa07e14781.8f9337.vocab --batch-size 64 --max_epochs 10 --terminate_on_nan --num-embedding 400 --num-layers 3 --num-hidden 1150 --model awd --bptt 20 --max_steps 150000 --val_check_interval .25'
experiment.config.trial_code_directory = Path(__file__).parent.parent
experiment.config.trial_concurrency = 2
experiment.config.trial_gpu_number = 1

experiment.config.training_service[0].use_active_gpu = True