def test_simple_file_passes(self):
     plxfile = PolyaxonFile(os.path.abspath('tests/fixtures/simple_file.yml'))
     spec = plxfile.experiment_spec_at(0)
     assert plxfile.version == 1
     assert plxfile.project.name == 'project1'
     assert plxfile.project_path == '/tmp/plx_logs/project1'
     assert plxfile.matrix is None
     assert plxfile.settings is None
     assert plxfile.run_type == RunTypes.LOCAL
     assert spec.environment is None
     assert spec.experiment_path == '/tmp/plx_logs/project1/0'
     assert spec.is_runnable
     assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
     assert_equal_dict(spec.get_cluster().to_dict(), {TaskType.MASTER: ['127.0.0.1:10000'],
                                                      TaskType.PS: [],
                                                      TaskType.WORKER: []})
     assert isinstance(spec.model, RegressorConfig)
     assert isinstance(spec.model.loss, MeanSquaredErrorConfig)
     assert isinstance(spec.model.optimizer, AdamConfig)
     assert isinstance(spec.model.graph, GraphConfig)
     assert len(spec.model.graph.layers) == 4
     assert spec.model.graph.input_layers == [['images', 0, 0]]
     last_layer = spec.model.graph.layers[-1].name
     assert spec.model.graph.output_layers == [[last_layer, 0, 0]]
     assert isinstance(spec.train.data_pipeline, TFRecordImagePipelineConfig)
     assert spec.eval is None
    def test_run_matrix_file_passes(self):
        plxfile = PolyaxonFile(os.path.abspath('tests/fixtures/run_exec_matrix_file.yml'))
        assert plxfile.version == 1
        assert plxfile.project.name == 'video_prediction'
        assert plxfile.project_path == get_vol_path(constants.LOGS_VOLUME,
                                                    RunTypes.MINIKUBE) + 'video_prediction'
        assert isinstance(plxfile.matrix['model'], MatrixConfig)
        assert plxfile.matrix['model'].to_dict() == {'values': ['CDNA', 'DNA', 'STP']}
        assert plxfile.matrix_space == 3
        declarations = []
        for loss in plxfile.matrix['model'].to_numpy():
            declarations.append({'model': loss})
        assert sorted(
            plxfile.matrix_declarations, key=lambda x: (x['model'])) == sorted(
            declarations, key=lambda x: (x['model']))
        assert isinstance(plxfile.settings, SettingsConfig)
        assert plxfile.run_type == RunTypes.MINIKUBE
        assert len(plxfile.experiment_specs) == plxfile.matrix_space

        for xp in range(plxfile.matrix_space):
            spec = plxfile.experiment_spec_at(xp)
            assert spec.is_runnable
            assert spec.environment is None
            assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
            assert spec.model is None
            run_exec = spec.run_exec
            assert isinstance(run_exec, RunExecConfig)
            declarations = plxfile.get_declarations_at(xp)
            declarations['num_masks'] = 1 if declarations['model'] == 'DNA' else 10
            assert run_exec.cmd == 'video_prediction_train --model="{model}" --num_masks={num_masks}'.format(
                **declarations
            )
 def test_run_simple_file_passes(self):
     plxfile = PolyaxonFile(os.path.abspath('tests/fixtures/run_exec_simple_file.yml'))
     spec = plxfile.experiment_spec_at(0)
     assert plxfile.version == 1
     assert plxfile.project.name == 'video_prediction'
     assert plxfile.settings is None
     assert plxfile.run_type == RunTypes.LOCAL
     assert plxfile.project_path == "/tmp/plx_logs/video_prediction"
     assert spec.experiment_path == "/tmp/plx_logs/video_prediction/0"
     assert spec.is_runnable
     assert spec.environment is None
     assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
     assert spec.model is None
     run_exec = spec.run_exec
     assert isinstance(run_exec, RunExecConfig)
     assert run_exec.cmd == "video_prediction_train --model=DNA --num_masks=1"
Ejemplo n.º 4
0
 def test_run_matrix_file_passes(self):
     plxfile = PolyaxonFile(
         os.path.abspath('tests/fixtures/run_exec_matrix_file.yml'))
     spec = plxfile.specification
     assert spec.version == 1
     assert spec.project.name == 'video_prediction'
     assert isinstance(spec.settings.matrix['model'], MatrixConfig)
     assert spec.settings.matrix['model'].to_dict() == {
         'values': ['CDNA', 'DNA', 'STP']
     }
     assert spec.matrix_space == 3
     assert isinstance(spec.settings, SettingsConfig)
     declarations = spec.matrix_declaration_test
     spec = spec.get_experiment_spec(declarations)
     assert spec.is_runnable
     assert spec.environment is None
     assert spec.settings is not None
     assert spec.settings.logging is not None
     assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
     assert spec.model is None
     run_exec = spec.run_exec
     assert isinstance(run_exec, RunExecConfig)
     declarations['num_masks'] = 1 if declarations['model'] == 'DNA' else 10
     assert run_exec.cmd == ('video_prediction_train '
                             '--model="{model}" '
                             '--num_masks={num_masks}').format(
                                 **declarations)
Ejemplo n.º 5
0
    def test_one_matrix_file_passes(self):
        plxfile = PolyaxonFile(
            os.path.abspath('tests/fixtures/one_matrix_file.yml'))
        spec = plxfile.specification
        assert spec.version == 1
        assert spec.project.name == 'project1'
        assert spec.settings is not None
        assert isinstance(spec.settings.matrix['loss'], MatrixConfig)
        assert spec.settings.matrix['loss'].to_dict() == {
            'values': ['MeanSquaredError', 'AbsoluteDifference']
        }
        assert spec.matrix_space == 2

        spec = spec.get_experiment_spec(
            matrix_declaration=spec.matrix_declaration_test)
        assert spec.is_runnable
        assert spec.environment is None
        assert spec.framework is None
        assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
        model = spec.model
        assert isinstance(model, RegressorConfig)
        assert isinstance(model.loss,
                          (MeanSquaredErrorConfig, AbsoluteDifferenceConfig))
        assert isinstance(model.optimizer, AdamConfig)
        assert isinstance(model.graph, GraphConfig)
        assert len(model.graph.layers) == 4
        assert model.graph.input_layers == [['images', 0, 0]]
        last_layer = model.graph.layers[-1].name
        assert model.graph.output_layers == [[last_layer, 0, 0]]
        assert isinstance(spec.train.data_pipeline,
                          TFRecordImagePipelineConfig)
Ejemplo n.º 6
0
def start_experiment_run(polyaxonfile, experiment_id, task_type, task_id,
                         schedule):
    plx_file = PolyaxonFile.read(polyaxonfile)
    experiment = prepare_experiment_run(plx_file, experiment_id, task_type,
                                        task_id)
    task = getattr(experiment, schedule)
    return task()
Ejemplo n.º 7
0
def _get_run_configs(polyaxonfile, experiment_id):
    plx_file = PolyaxonFile.read(polyaxonfile)
    environment = plx_file.get_environment_at(experiment_id)
    cluster_def, is_distributed = plx_file.get_cluster_def_at(experiment_id)

    def get_master_config(config, task_type=None, task_id=None):
        config = RunConfig.from_config(config)
        if task_type is None and task_id is None:
            return config
        return config.replace(task_type=task_type, task_id=task_id)

    config = environment.run_config or RunConfig.CONFIG()

    if not is_distributed:
        return {TaskType.MASTER: get_master_config(config)}, False

    config.cluster = plx_file.get_cluster(experiment=experiment_id)

    configs = {
        TaskType.MASTER: [get_master_config(config, TaskType.MASTER, 0)]
    }

    if cluster_def.get(TaskType.WORKER, 0) > 0:
        configs[TaskType.WORKER] = []

    if cluster_def.get(TaskType.PS, 0) > 0:
        configs[TaskType.PS] = []

    worker_session_configs = {}
    for session_config in environment.worker_configs or []:
        worker_session_configs[session_config.index] = session_config

    ps_session_configs = {}
    for session_config in environment.ps_configs or []:
        ps_session_configs[session_config.index] = session_config

    default_worker_config = environment.default_worker_config
    for i in range(cluster_def.get(TaskType.WORKER, 0)):
        w_config = get_master_config(config,
                                     task_type=TaskType.WORKER,
                                     task_id=i)
        session_config = worker_session_configs.get(i, default_worker_config)
        if session_config:
            session_config = RunConfig.get_session_config(session_config)
            w_config = w_config.replace(session_config=session_config)

        configs[TaskType.WORKER].append(w_config)

    default_ps_config = environment.default_ps_config
    for i in range(cluster_def.get(TaskType.PS, 0)):
        ps_config = get_master_config(config, task_type=TaskType.PS, task_id=i)
        session_config = ps_session_configs.get(i, default_ps_config)
        if session_config:
            session_config = RunConfig.get_session_config(session_config)
            ps_config = ps_config.replace(session_config=session_config)

        configs[TaskType.PS].append(ps_config)

    return configs, True
    def test_matrix_file_passes(self):
        plxfile = PolyaxonFile(os.path.abspath('tests/fixtures/matrix_file.yml'))
        assert plxfile.version == 1
        assert plxfile.project.name == 'project1'
        assert plxfile.project_path == '/tmp/plx_logs/project1'
        assert isinstance(plxfile.matrix['lr'], MatrixConfig)
        assert isinstance(plxfile.matrix['loss'], MatrixConfig)
        assert plxfile.matrix['lr'].to_dict() == {
            'logspace': {'start': 0.01, 'stop': 0.1, 'num': 5}}
        assert plxfile.matrix['loss'].to_dict() == {'values': ['MeanSquaredError',
                                                               'AbsoluteDifference']}
        assert plxfile.matrix_space == 10
        declarations = []
        for lr in plxfile.matrix['lr'].to_numpy():
            for loss in plxfile.matrix['loss'].to_numpy():
                declarations.append({'loss': loss, 'lr': lr})
        assert sorted(
            plxfile.matrix_declarations, key=lambda x: (x['lr'], x['loss'])) == sorted(
            declarations, key=lambda x: (x['lr'], x['loss']))
        assert isinstance(plxfile.settings, SettingsConfig)
        assert plxfile.settings.concurrent_experiments == 2
        assert plxfile.run_type == RunTypes.LOCAL

        for xp in range(plxfile.matrix_space):
            spec = plxfile.experiment_spec_at(xp)
            assert spec.is_runnable
            assert spec.environment is None
            assert spec.cluster_def == ({TaskType.MASTER: 1}, False)

            assert_equal_dict(spec.get_cluster().to_dict(),
                              {TaskType.MASTER: ['127.0.0.1:10000'],
                               TaskType.PS: [],
                               TaskType.WORKER: []})
            model = spec.model
            assert isinstance(model, RegressorConfig)
            assert isinstance(model.loss, (MeanSquaredErrorConfig, AbsoluteDifferenceConfig))
            assert isinstance(model.optimizer, AdamConfig)
            assert isinstance(model.graph, GraphConfig)
            assert len(model.graph.layers) == 4
            assert model.graph.input_layers == [['images', 0, 0]]
            last_layer = model.graph.layers[-1].name
            assert model.graph.output_layers == [[last_layer, 0, 0]]
            assert isinstance(spec.train.data_pipeline, TFRecordImagePipelineConfig)
Ejemplo n.º 9
0
def init(project, run, model):
    """Initialize a new polyaxonfile specification."""
    user, project_name = get_project_or_local(project)
    try:
        project_config = PolyaxonClients().project.get_project(user, project_name)
    except (PolyaxonHTTPError, PolyaxonShouldExitError) as e:
        Printer.print_error('Make sure you have a project with this name `{}`'.format(project))
        Printer.print_error('You can a new project with this command: '
                            'polyaxon project create --name={} --description=...'.format(project))
        Printer.print_error('Error message `{}`.'.format(e))
        sys.exit(1)

    if not any([model, run]) and not all([model, run]):
        Printer.print_error("You must specify which an init option, "
                            "possible values: `--model` or `--run`.")
        sys.exit(1)

    result = False
    if model:
        result = create_init_file(constants.INIT_FILE_MODEL)

    elif run:
        result = create_init_file(constants.INIT_FILE_RUN)

    if result:
        ProjectManager.set_config(project_config, init=True)
        IgnoreManager.init_config()
        Printer.print_success(
            "Project `{}` was initialized and Polyaxonfile was created successfully `{}`".format(
                project, constants.INIT_FILE))
        sys.exit(1)

    # if we are here the file was not created
    if not os.path.isfile(constants.INIT_FILE):
        Printer.print_error(
            "Something went wrong, init command did not create a file.\n"
            "Possible reasons: you don't have the write to create the file.")
        sys.exit(1)

    # file was already there, let's check if the project passed correspond to this file
    try:
        PolyaxonFile(constants.INIT_FILE).specification
    except (PolyaxonfileError, ValidationError) as e:
        Printer.print_error(
            "Something went wrong, init command did not create a file.\n"
            "Another file already exist with.")
        Printer.print_error('Error message: `{}`.'.format(e))
        sys.exit(1)

    # At this point we check if we need to re init configurations
    ProjectManager.set_config(project_config, init=True)
    IgnoreManager.init_config()
    Printer.print_success(
        "Project `{}` was initialized and Polyaxonfile was created successfully `{}`".format(
            project, constants.INIT_FILE))
Ejemplo n.º 10
0
def run(polyaxonfile):
    plx_file = PolyaxonFile.read(polyaxonfile)
    for xp in range(plx_file.matrix_space):
        run_experiment(plx_file.experiment_specs[xp], xp)

        while not current_run['finished']:
            check_master_process()
            time.sleep(10)

        current_run['finished'] = False
        current_run['master'] = None
Ejemplo n.º 11
0
    def test_matrix_early_stopping_file_passes(self):
        plxfile = PolyaxonFile(
            os.path.abspath('tests/fixtures/matrix_file_early_stopping.yml'))
        spec = plxfile.specification
        assert spec.version == 1
        assert spec.project.name == 'project1'
        assert isinstance(spec.settings.matrix['lr'], MatrixConfig)
        assert isinstance(spec.settings.matrix['loss'], MatrixConfig)
        assert spec.settings.matrix['lr'].to_dict() == {
            'logspace': {
                'start': 0.01,
                'stop': 0.1,
                'num': 5
            }
        }
        assert spec.settings.matrix['loss'].to_dict() == {
            'values': ['MeanSquaredError', 'AbsoluteDifference']
        }
        assert spec.matrix_space == 10
        assert isinstance(spec.settings, SettingsConfig)
        assert spec.settings.concurrent_experiments == 2
        assert spec.settings.random_search.n_experiments == 5
        assert spec.early_stopping == spec.settings.early_stopping
        assert len(spec.settings.early_stopping) == 1
        assert isinstance(spec.settings.early_stopping[0],
                          EarlyStoppingMetricConfig)

        # assert spec.experiments_def == (
        #     10,
        #     5,
        #     2,
        #     SearchAlgorithms.RANDOM
        # )

        spec = spec.get_experiment_spec(
            matrix_declaration=spec.matrix_declaration_test)
        assert spec.is_runnable
        assert spec.environment is None
        assert spec.framework is None
        assert spec.cluster_def == ({TaskType.MASTER: 1}, False)

        model = spec.model
        assert isinstance(model, RegressorConfig)
        assert isinstance(model.loss,
                          (MeanSquaredErrorConfig, AbsoluteDifferenceConfig))
        assert isinstance(model.optimizer, AdamConfig)
        assert isinstance(model.graph, GraphConfig)
        assert len(model.graph.layers) == 4
        assert model.graph.input_layers == [['images', 0, 0]]
        last_layer = model.graph.layers[-1].name
        assert model.graph.output_layers == [[last_layer, 0, 0]]
        assert isinstance(spec.train.data_pipeline,
                          TFRecordImagePipelineConfig)
Ejemplo n.º 12
0
def prepare_all_experiment_runs(polyaxonfile, experiment_id):
    plx_file = PolyaxonFile.read(polyaxonfile)
    is_distributed = False

    if not plx_file.get_environment_at(experiment_id):
        tf.logging.set_verbosity(tf.logging.INFO)
        configs = {TaskType.MASTER: [RunConfig()]}
        delay_workers_by_global_step = False
    else:
        tf.logging.set_verbosity(
            LOGGING_LEVEL[plx_file.settings.logging.level])
        configs, is_distributed = _get_run_configs(
            plx_file.settings.environment, experiment_id)
        delay_workers_by_global_step = plx_file.settings.environment.delay_workers_by_global_step

    train_input_fn, train_steps, train_hooks = _get_train(
        plx_file.get_train_at(experiment_id))
    (eval_input_fn, eval_steps, eval_hooks, eval_delay_secs,
     continuous_eval_throttle_secs) = _get_eval(
         plx_file.get_eval_at(experiment_id))

    def get_experiment(config):
        estimator = getters.get_estimator(plx_file.model,
                                          config,
                                          output_dir=plx_file.project_path)

        return Experiment(
            estimator=estimator,
            train_input_fn=train_input_fn,
            eval_input_fn=eval_input_fn,
            train_steps=train_steps,
            eval_steps=eval_steps,
            train_hooks=train_hooks,
            eval_hooks=eval_hooks,
            eval_delay_secs=eval_delay_secs,
            continuous_eval_throttle_secs=continuous_eval_throttle_secs,
            delay_workers_by_global_step=delay_workers_by_global_step,
            export_strategies=plx_file.settings.export_strategies)

    xps = [get_experiment(configs[TaskType.MASTER][0])]
    if not is_distributed:
        return xps

    for i_config in configs.get(TaskType.WORKER, []):
        xps.append(get_experiment(i_config))

    for i_config in configs.get(TaskType.PS, []):
        xps.append(get_experiment(i_config))

    return xps
Ejemplo n.º 13
0
 def test_run_simple_file_passes(self):
     plxfile = PolyaxonFile(
         os.path.abspath('tests/fixtures/run_exec_simple_file.yml'))
     spec = plxfile.specification
     assert spec.version == 1
     assert spec.project.name == 'video_prediction'
     assert spec.settings is None
     assert spec.is_runnable
     assert spec.environment is None
     assert spec.framework is None
     assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
     assert spec.model is None
     run_exec = spec.run_exec
     assert isinstance(run_exec, RunExecConfig)
     assert run_exec.cmd == "video_prediction_train --model=DNA --num_masks=1"
 def test_simple_generator_file_passes(self):
     plxfile = PolyaxonFile(os.path.abspath('tests/fixtures/simple_generator_file.yml'))
     spec = plxfile.experiment_spec_at(0)
     assert plxfile.matrix is None
     assert plxfile.version == 1
     assert plxfile.project.name == 'project1'
     assert plxfile.project_path == '/tmp/plx_logs/project1'
     assert plxfile.settings is None
     assert plxfile.run_type == RunTypes.LOCAL
     assert spec.experiment_path == '/tmp/plx_logs/project1/0'
     assert spec.environment is None
     assert spec.is_runnable
     assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
     assert_equal_dict(spec.get_cluster().to_dict(), {TaskType.MASTER: ['127.0.0.1:10000'],
                                                      TaskType.PS: [],
                                                      TaskType.WORKER: []})
     assert isinstance(spec.model, GeneratorConfig)
     assert isinstance(spec.model.loss, MeanSquaredErrorConfig)
     assert isinstance(spec.model.optimizer, AdamConfig)
     assert isinstance(spec.model.encoder, GraphConfig)
     assert isinstance(spec.model.decoder, GraphConfig)
     assert isinstance(spec.model.bridge, NoOpBridgeConfig)
     assert isinstance(spec.train.data_pipeline, TFRecordImagePipelineConfig)
     assert spec.eval is None
Ejemplo n.º 15
0
def run_all(polyaxonfile):
    plx_file = PolyaxonFile.read(polyaxonfile)
    for xp in range(plx_file.matrix_space):
        xp_runs = prepare_all_experiment_runs(polyaxonfile, xp)
        for i, xp_run in enumerate(xp_runs):
            if i == 0:
                schedule = 'train_and_evaluate'
            else:
                schedule = 'train'
            p = Process(target=getattr(xp_run, schedule))
            p.start()
            jobs.append(p)

        for job in jobs:
            job.join()
Ejemplo n.º 16
0
def check_polyaxonfile(file):
    file = to_list(file)
    exists = [os.path.isfile(f) for f in file]

    if not any(exists):
        Printer.print_error('Polyaxonfile is not present, '
                            'please run {}'.format(constants.INIT_COMMAND))
        sys.exit(1)

    try:
        plx_file = PolyaxonFile.read(file)
        Printer.print_success("Polyaxonfile valid")
        return plx_file
    except Exception as e:
        Printer.print_error("Polyaxonfile is not valid")
        sys.exit(1)
Ejemplo n.º 17
0
def prepare_experiment_run(polyaxonfile,
                           experiment_id,
                           task_type=TaskType.MASTER,
                           task_id=0):
    plx_file = PolyaxonFile.read(polyaxonfile)
    cluster, _ = plx_file.get_cluster_def_at(experiment_id)

    if (task_type not in cluster or not isinstance(cluster[task_type], int)
            or task_id >= cluster[task_type]):
        raise ValueError('task_type, task_id `{}, {}` is not supported by '
                         'the specification file passed.'.format(
                             task_type, task_id))

    env = plx_file.get_environment_at(experiment_id)
    if not env:
        tf.logging.set_verbosity(tf.logging.INFO)
        configs = {TaskType.MASTER: [RunConfig()]}
        delay_workers_by_global_step = False
    else:
        tf.logging.set_verbosity(
            LOGGING_LEVEL[plx_file.settings.logging.level])
        configs, _ = _get_run_configs(plx_file, experiment_id)
        delay_workers_by_global_step = env.delay_workers_by_global_step

    train_input_fn, train_steps, train_hooks = _get_train(
        plx_file.get_train_at(experiment_id))
    (eval_input_fn, eval_steps, eval_hooks, eval_delay_secs,
     continuous_eval_throttle_secs) = _get_eval(
         plx_file.get_eval_at(experiment_id))

    estimator = getters.get_estimator(
        plx_file.get_model_at(experiment_id),
        configs[task_type][task_id],
        output_dir=plx_file.get_project_path_at(experiment_id))

    return Experiment(
        estimator=estimator,
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=train_steps,
        eval_steps=eval_steps,
        train_hooks=train_hooks,
        eval_hooks=eval_hooks,
        eval_delay_secs=eval_delay_secs,
        continuous_eval_throttle_secs=continuous_eval_throttle_secs,
        delay_workers_by_global_step=delay_workers_by_global_step,
        export_strategies=plx_file.settings.export_strategies)
Ejemplo n.º 18
0
def check(file, version, cluster, run_type):
    """Command for checking a polyaxonfile."""
    plx_file = PolyaxonFile(file)
    logger.info("Polyaxonfile valid")

    if version:
        logger.info('The version is: {}'.format(plx_file.version))

    elif cluster:
        cluster_def, is_distributed = plx_file.cluster_def
        logger.info('The cluster definition is: {}'.format(cluster_def))

    elif run_type:
        logger.info('The run_type is: {}'.format(plx_file.run_type))

    else:
        logger.info('Validated file:\n{}'.format(plx_file.parsed_data))
Ejemplo n.º 19
0
def check_polyaxonfile(file, log=True):  # pylint:disable=redefined-builtin
    file = to_list(file)
    exists = [os.path.isfile(f) for f in file]

    if not any(exists):
        Printer.print_error('Polyaxonfile is not present, '
                            'please run {}'.format(constants.INIT_COMMAND))
        sys.exit(1)

    try:
        plx_file = PolyaxonFile(file)
        if log:
            Printer.print_success("Polyaxonfile valid")
        return plx_file
    except Exception as e:
        Printer.print_error("Polyaxonfile is not valid ")
        Printer.print_error('Error message `{}`.'.format(e))
        sys.exit(1)
Ejemplo n.º 20
0
 def test_simple_generator_file_passes(self):
     plxfile = PolyaxonFile(
         os.path.abspath('tests/fixtures/simple_generator_file.yml'))
     spec = plxfile.specification
     assert spec.version == 1
     assert spec.project.name == 'project1'
     assert spec.settings is None
     assert spec.environment is None
     assert spec.framework is None
     assert spec.is_runnable
     assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
     assert isinstance(spec.model, GeneratorConfig)
     assert isinstance(spec.model.loss, MeanSquaredErrorConfig)
     assert isinstance(spec.model.optimizer, AdamConfig)
     assert isinstance(spec.model.encoder, GraphConfig)
     assert isinstance(spec.model.decoder, GraphConfig)
     assert isinstance(spec.model.bridge, NoOpBridgeConfig)
     assert isinstance(spec.train.data_pipeline,
                       TFRecordImagePipelineConfig)
     assert spec.eval is None
Ejemplo n.º 21
0
def run(file):
    """Command for running a polyaxonfile."""
    plx_file = PolyaxonFile(file)
    if plx_file.run_type == RunTypes.LOCAL:
        # check that polyaxon is installed
        version = get_version(PROJECT_NAME)
        if version is None:
            click.echo("""In order to run locally, polyaxon must be installed.""")
            if click.confirm("Do you want to install polyaxon now?"):
                from polyaxon_cli.cli.version import pip_upgrade
                pip_upgrade(PROJECT_NAME)
            else:
                click.echo("""Your can manually run:
    pip install -U polyaxon
to install to the latest version of polyaxon)""")
                sys.exit(0)

        logger.info('Running polyaxonfile locally')
        from polyaxon.polyaxonfile.local_runner import run
        run(file)
Ejemplo n.º 22
0
 def test_simple_file_passes(self):
     plxfile = PolyaxonFile(
         os.path.abspath('tests/fixtures/simple_file.yml'))
     spec = plxfile.specification
     assert spec.version == 1
     assert spec.project.name == 'project1'
     assert spec.settings is None
     assert spec.environment is None
     assert spec.framework is None
     assert spec.is_runnable
     assert spec.cluster_def == ({TaskType.MASTER: 1}, False)
     assert isinstance(spec.model, RegressorConfig)
     assert isinstance(spec.model.loss, MeanSquaredErrorConfig)
     assert isinstance(spec.model.optimizer, AdamConfig)
     assert isinstance(spec.model.graph, GraphConfig)
     assert len(spec.model.graph.layers) == 4
     assert spec.model.graph.input_layers == [['images', 0, 0]]
     last_layer = spec.model.graph.layers[-1].name
     assert spec.model.graph.output_layers == [[last_layer, 0, 0]]
     assert isinstance(spec.train.data_pipeline,
                       TFRecordImagePipelineConfig)
     assert spec.eval is None
Ejemplo n.º 23
0
def run(polyaxonfile):
    plx_file = PolyaxonFile.read(polyaxonfile)
    for xp in range(plx_file.matrix_space):
        logging.info("running Experiment n: {}".format(xp))
        cluster, is_distributed = plx_file.get_cluster_def_at(xp)
        if not is_distributed:
            start_experiment_run(plx_file, xp, TaskType.MASTER, 0,
                                 'continuous_train_and_eval')
            current_run['finished'] = True
        else:
            env = {
                'polyaxonfile': polyaxonfile,
                'task_type': TaskType.MASTER,
                'task_id': 0,
                'schedule': 'train_and_evaluate'
            }

            create_process(env)

            for i in xrange(cluster.get(TaskType.WORKER, 0)):
                env['task_id'] = i
                env['task_type'] = TaskType.WORKER
                env['schedule'] = 'train'
                create_process(env)

            for i in xrange(cluster.get(TaskType.PS, 0)):
                env['task_id'] = i
                env['task_type'] = TaskType.PS
                env['schedule'] = 'run_std_server'
                create_process(env)

            for job in jobs:
                job.join()

        while not current_run['finished']:
            time.sleep(30)

        current_run['finished'] = False
    def test_advanced_file_passes(self):
        plxfile = PolyaxonFile(os.path.abspath('tests/fixtures/advanced_file.yml'))
        assert plxfile.version == 1
        assert plxfile.project.name == 'project1'
        assert plxfile.project_path == '/mypath/project1'
        assert plxfile.matrix is None
        assert plxfile.run_type == RunTypes.MINIKUBE
        assert isinstance(plxfile.settings, SettingsConfig)
        assert isinstance(plxfile.settings.logging, LoggingConfig)
        spec = plxfile.experiment_spec_at(0)
        assert spec.is_runnable
        assert isinstance(spec.environment, EnvironmentConfig)
        assert spec.environment.n_workers == 5
        assert spec.environment.n_ps == 10
        assert spec.environment.delay_workers_by_global_step is True
        assert isinstance(spec.environment.run_config, RunConfig)
        assert spec.environment.run_config.tf_random_seed == 100
        assert spec.environment.run_config.save_summary_steps == 100
        assert spec.environment.run_config.save_checkpoints_secs == 60
        assert isinstance(spec.environment.run_config.session, SessionConfig)
        assert spec.environment.run_config.session.allow_soft_placement is True
        assert spec.environment.run_config.session.intra_op_parallelism_threads == 2
        assert spec.environment.run_config.session.inter_op_parallelism_threads == 2

        # check properties for returning worker configs and resources
        assert spec.environment.worker_configs is None
        assert spec.environment.ps_configs is None
        assert spec.environment.resources is None
        assert spec.environment.worker_resources is None
        assert spec.environment.ps_resources is None

        assert spec.worker_configs == {}
        assert spec.ps_configs == {}
        assert spec.worker_resources == {}
        assert spec.ps_resources == {}

        assert spec.cluster_def == ({TaskType.MASTER: 1,
                                        TaskType.WORKER: 5,
                                        TaskType.PS: 10}, True)

        def task_name(task_type, task_idx):
            return constants.TASK_NAME.format(project=plxfile.project.name,
                                              experiment=0,
                                              task_type=task_type,
                                              task_idx=task_idx)

        assert_equal_dict(spec.get_cluster().to_dict(),
                          {TaskType.MASTER: ['{}:2222'.format(task_name(TaskType.MASTER, 0))],
                           TaskType.WORKER: [
                               '{}:2222'.format(task_name(TaskType.WORKER, 0)),
                               '{}:2222'.format(task_name(TaskType.WORKER, 1)),
                               '{}:2222'.format(task_name(TaskType.WORKER, 2)),
                               '{}:2222'.format(task_name(TaskType.WORKER, 3)),
                               '{}:2222'.format(task_name(TaskType.WORKER, 4)),
                           ],
                           TaskType.PS: [
                               '{}:2222'.format(task_name(TaskType.PS, 0)),
                               '{}:2222'.format(task_name(TaskType.PS, 1)),
                               '{}:2222'.format(task_name(TaskType.PS, 2)),
                               '{}:2222'.format(task_name(TaskType.PS, 3)),
                               '{}:2222'.format(task_name(TaskType.PS, 4)),
                               '{}:2222'.format(task_name(TaskType.PS, 5)),
                               '{}:2222'.format(task_name(TaskType.PS, 6)),
                               '{}:2222'.format(task_name(TaskType.PS, 7)),
                               '{}:2222'.format(task_name(TaskType.PS, 8)),
                               '{}:2222'.format(task_name(TaskType.PS, 9)),
                           ]})
        assert isinstance(spec.model, ClassifierConfig)
        assert isinstance(spec.model.loss, MeanSquaredErrorConfig)
        assert isinstance(spec.model.optimizer, AdamConfig)
        assert spec.model.optimizer.learning_rate == 0.21
        assert isinstance(spec.model.graph, GraphConfig)
        assert len(spec.model.graph.layers) == 7
        assert spec.model.graph.input_layers == [['images', 0, 0]]
        assert len(spec.model.graph.output_layers) == 3
        assert ['super_dense', 0, 0] in spec.model.graph.output_layers
        assert isinstance(spec.train.data_pipeline, TFRecordImagePipelineConfig)
        assert len(spec.train.data_pipeline.feature_processors.feature_processors) == 1
        assert isinstance(spec.eval.data_pipeline, TFRecordImagePipelineConfig)
        assert spec.eval.data_pipeline.feature_processors is None
Ejemplo n.º 25
0
    def test_advanced_file_with_custom_configs_and_resources_passes(self):
        plxfile = PolyaxonFile(
            os.path.abspath(
                'tests/fixtures/advanced_file_with_custom_configs_and_resources.yml'
            ))
        spec = plxfile.specification
        assert spec.version == 1
        assert spec.project.name == 'project1'
        assert isinstance(spec.settings, SettingsConfig)
        assert isinstance(spec.settings.logging, LoggingConfig)
        assert spec.settings.matrix is None
        assert isinstance(spec.environment, EnvironmentConfig)
        assert spec.is_runnable
        assert spec.framework == Frameworks.TENSORFLOW
        assert spec.environment.tensorflow.n_workers == 5
        assert spec.environment.tensorflow.n_ps == 10
        assert spec.environment.tensorflow.delay_workers_by_global_step is True
        assert isinstance(spec.environment.tensorflow.run_config, RunConfig)
        assert spec.environment.tensorflow.run_config.tf_random_seed == 100
        assert spec.environment.tensorflow.run_config.save_summary_steps == 100
        assert spec.environment.tensorflow.run_config.save_checkpoints_secs == 60

        assert isinstance(spec.environment.resources, PodResourcesConfig)
        assert isinstance(spec.environment.resources.cpu, K8SResourcesConfig)
        assert spec.environment.resources.cpu.requests == 1
        assert spec.environment.resources.cpu.limits == 2

        assert isinstance(spec.environment.tensorflow.run_config.session,
                          SessionConfig)
        assert spec.environment.tensorflow.run_config.session.allow_soft_placement is True
        assert spec.environment.tensorflow.run_config.session.intra_op_parallelism_threads == 2
        assert spec.environment.tensorflow.run_config.session.inter_op_parallelism_threads == 2

        assert isinstance(spec.environment.tensorflow.default_worker_config,
                          SessionConfig)
        assert spec.environment.tensorflow.default_worker_config.allow_soft_placement is True
        assert spec.environment.tensorflow.default_worker_config.intra_op_parallelism_threads == 1
        assert spec.environment.tensorflow.default_worker_config.inter_op_parallelism_threads == 1

        assert isinstance(spec.environment.tensorflow.worker_configs[0],
                          SessionConfig)
        assert spec.environment.tensorflow.worker_configs[0].index == 3
        assert spec.environment.tensorflow.worker_configs[
            0].allow_soft_placement is False
        assert spec.environment.tensorflow.worker_configs[
            0].intra_op_parallelism_threads == 5
        assert spec.environment.tensorflow.worker_configs[
            0].inter_op_parallelism_threads == 5

        assert spec.environment.tensorflow.ps_configs is None

        assert spec.environment.tensorflow.worker_resources is None

        assert isinstance(spec.environment.tensorflow.default_ps_resources,
                          PodResourcesConfig)
        assert isinstance(spec.environment.tensorflow.default_ps_resources.cpu,
                          K8SResourcesConfig)
        assert spec.environment.tensorflow.default_ps_resources.cpu.requests == 2
        assert spec.environment.tensorflow.default_ps_resources.cpu.limits == 4

        assert isinstance(spec.environment.tensorflow.ps_resources[0],
                          PodResourcesConfig)
        assert isinstance(spec.environment.tensorflow.ps_resources[0].memory,
                          K8SResourcesConfig)
        assert spec.environment.tensorflow.ps_resources[0].index == 9
        assert spec.environment.tensorflow.ps_resources[
            0].memory.requests == 512
        assert spec.environment.tensorflow.ps_resources[
            0].memory.limits == 1024

        # check that properties for return list of configs and resources is working
        cluster, is_distributed = spec.cluster_def
        worker_configs = TensorflowSpecification.get_worker_configs(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed)
        assert len(worker_configs) == spec.environment.tensorflow.n_workers
        assert set(worker_configs.values()) == {
            spec.environment.tensorflow.default_worker_config,
            spec.environment.tensorflow.worker_configs[0]
        }
        assert TensorflowSpecification.get_ps_configs(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed) == {}

        assert TensorflowSpecification.get_worker_resources(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed) == {}
        ps_resources = TensorflowSpecification.get_ps_resources(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed)
        assert len(ps_resources) == spec.environment.tensorflow.n_ps
        assert set(ps_resources.values()) == {
            spec.environment.tensorflow.default_ps_resources,
            spec.environment.tensorflow.ps_resources[0]
        }

        # Check total resources
        assert spec.total_resources == {
            'cpu': {
                'requests': 1 + 2 * 9,
                'limits': 2 + 4 * 9
            },
            'memory': {
                'requests': 512,
                'limits': 1024
            },
            'gpu': None
        }

        assert spec.cluster_def == ({
            TaskType.MASTER: 1,
            TaskType.WORKER: 5,
            TaskType.PS: 10
        }, True)

        assert isinstance(spec.model, ClassifierConfig)
        assert isinstance(spec.model.loss, MeanSquaredErrorConfig)
        assert isinstance(spec.model.optimizer, AdamConfig)
        assert spec.model.optimizer.learning_rate == 0.21
        assert isinstance(spec.model.graph, GraphConfig)
        assert len(spec.model.graph.layers) == 7
        assert spec.model.graph.input_layers == [['images', 0, 0]]
        assert len(spec.model.graph.output_layers) == 3
        assert ['super_dense', 0, 0] in spec.model.graph.output_layers
        assert isinstance(spec.train.data_pipeline,
                          TFRecordImagePipelineConfig)
        assert len(spec.train.data_pipeline.feature_processors.
                   feature_processors) == 1
        assert isinstance(spec.eval.data_pipeline, TFRecordImagePipelineConfig)
        assert spec.eval.data_pipeline.feature_processors is None
 def test_missing_project_raises(self):
     with self.assertRaises(PolyaxonfileError):
         PolyaxonFile(os.path.abspath('tests/fixtures/missing_project.yml'))
Ejemplo n.º 27
0
    def test_distributed_mxnet_passes(self):
        plxfile = PolyaxonFile(
            os.path.abspath('tests/fixtures/distributed_mxnet_file.yml'))
        spec = plxfile.specification
        assert spec.version == 1
        assert spec.project.name == 'project1'
        assert isinstance(spec.settings, SettingsConfig)
        assert isinstance(spec.settings.logging, LoggingConfig)
        assert spec.settings.matrix is None
        assert isinstance(spec.environment, EnvironmentConfig)
        assert spec.is_runnable
        assert spec.framework == Frameworks.MXNET
        assert spec.environment.mxnet.n_workers == 5
        assert spec.environment.mxnet.n_ps == 10

        assert isinstance(spec.environment.resources, PodResourcesConfig)
        assert isinstance(spec.environment.resources.cpu, K8SResourcesConfig)
        assert spec.environment.resources.cpu.requests == 1
        assert spec.environment.resources.cpu.limits == 2

        assert isinstance(spec.environment.mxnet.default_worker_resources,
                          PodResourcesConfig)
        assert isinstance(spec.environment.mxnet.default_worker_resources.cpu,
                          K8SResourcesConfig)
        assert spec.environment.mxnet.default_worker_resources.cpu.requests == 3
        assert spec.environment.mxnet.default_worker_resources.cpu.limits == 3
        assert isinstance(
            spec.environment.mxnet.default_worker_resources.memory,
            K8SResourcesConfig)
        assert spec.environment.mxnet.default_worker_resources.memory.requests == 256
        assert spec.environment.mxnet.default_worker_resources.memory.limits == 256

        assert isinstance(spec.environment.mxnet.worker_resources[0],
                          PodResourcesConfig)
        assert isinstance(spec.environment.mxnet.worker_resources[0].memory,
                          K8SResourcesConfig)
        assert spec.environment.mxnet.worker_resources[0].index == 3
        assert spec.environment.mxnet.worker_resources[
            0].memory.requests == 300
        assert spec.environment.mxnet.worker_resources[0].memory.limits == 300

        assert isinstance(spec.environment.mxnet.default_ps_resources,
                          PodResourcesConfig)
        assert isinstance(spec.environment.mxnet.default_ps_resources.cpu,
                          K8SResourcesConfig)
        assert spec.environment.mxnet.default_ps_resources.cpu.requests == 2
        assert spec.environment.mxnet.default_ps_resources.cpu.limits == 4

        assert isinstance(spec.environment.mxnet.ps_resources[0],
                          PodResourcesConfig)
        assert isinstance(spec.environment.mxnet.ps_resources[0].memory,
                          K8SResourcesConfig)
        assert spec.environment.mxnet.ps_resources[0].index == 9
        assert spec.environment.mxnet.ps_resources[0].memory.requests == 512
        assert spec.environment.mxnet.ps_resources[0].memory.limits == 1024

        # check that properties for return list of configs and resources is working
        cluster, is_distributed = spec.cluster_def
        worker_resources = MXNetSpecification.get_worker_resources(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed)
        assert len(worker_resources) == spec.environment.mxnet.n_workers
        assert set(worker_resources.values()) == {
            spec.environment.mxnet.default_worker_resources,
            spec.environment.mxnet.worker_resources[0]
        }

        ps_resources = MXNetSpecification.get_ps_resources(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed)
        assert len(ps_resources) == spec.environment.mxnet.n_ps
        assert set(ps_resources.values()) == {
            spec.environment.mxnet.default_ps_resources,
            spec.environment.mxnet.ps_resources[0]
        }

        # Check total resources
        assert spec.total_resources == {
            'cpu': {
                'requests': 1 + 3 * 4 + 2 * 9,
                'limits': 2 + 3 * 4 + 4 * 9
            },
            'memory': {
                'requests': 300 + 256 * 4 + 512,
                'limits': 300 + 256 * 4 + 1024
            },
            'gpu': None
        }

        assert spec.cluster_def == ({
            TaskType.MASTER: 1,
            TaskType.WORKER: 5,
            TaskType.SERVER: 10
        }, True)
 def test_wrong_project_name_raises(self):
     with self.assertRaises(PolyaxonfileError):
         PolyaxonFile(os.path.abspath('tests/fixtures/wrong_project_name.yml'))
Ejemplo n.º 29
0
    def test_advanced_file_passes(self):
        plxfile = PolyaxonFile(
            os.path.abspath('tests/fixtures/advanced_file.yml'))
        spec = plxfile.specification
        assert spec.version == 1
        assert spec.project.name == 'project1'
        assert isinstance(spec.settings, SettingsConfig)
        assert isinstance(spec.settings.logging, LoggingConfig)
        assert spec.settings.matrix is None
        assert spec.is_runnable
        assert isinstance(spec.environment, EnvironmentConfig)
        assert spec.framework == Frameworks.TENSORFLOW
        assert spec.environment.tensorflow.n_workers == 5
        assert spec.environment.tensorflow.n_ps == 10
        assert spec.environment.tensorflow.delay_workers_by_global_step is True
        assert isinstance(spec.environment.tensorflow.run_config, RunConfig)
        assert spec.environment.tensorflow.run_config.tf_random_seed == 100
        assert spec.environment.tensorflow.run_config.save_summary_steps == 100
        assert spec.environment.tensorflow.run_config.save_checkpoints_secs == 60
        assert isinstance(spec.environment.tensorflow.run_config.session,
                          SessionConfig)
        assert spec.environment.tensorflow.run_config.session.allow_soft_placement is True
        assert spec.environment.tensorflow.run_config.session.intra_op_parallelism_threads == 2
        assert spec.environment.tensorflow.run_config.session.inter_op_parallelism_threads == 2

        # check properties for returning worker configs and resources
        assert spec.environment.tensorflow.worker_configs is None
        assert spec.environment.tensorflow.ps_configs is None
        assert spec.environment.tensorflow.worker_resources is None
        assert spec.environment.tensorflow.ps_resources is None

        cluster, is_distributed = spec.cluster_def

        assert TensorflowSpecification.get_worker_configs(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed) == {}
        assert TensorflowSpecification.get_ps_configs(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed) == {}
        assert TensorflowSpecification.get_worker_resources(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed) == {}
        assert TensorflowSpecification.get_ps_resources(
            environment=spec.environment,
            cluster=cluster,
            is_distributed=is_distributed) == {}

        assert spec.cluster_def == ({
            TaskType.MASTER: 1,
            TaskType.WORKER: 5,
            TaskType.PS: 10
        }, True)

        assert isinstance(spec.model, ClassifierConfig)
        assert isinstance(spec.model.loss, MeanSquaredErrorConfig)
        assert isinstance(spec.model.optimizer, AdamConfig)
        assert spec.model.optimizer.learning_rate == 0.21
        assert isinstance(spec.model.graph, GraphConfig)
        assert len(spec.model.graph.layers) == 7
        assert spec.model.graph.input_layers == [['images', 0, 0]]
        assert len(spec.model.graph.output_layers) == 3
        assert ['super_dense', 0, 0] in spec.model.graph.output_layers
        assert isinstance(spec.train.data_pipeline,
                          TFRecordImagePipelineConfig)
        assert len(spec.train.data_pipeline.feature_processors.
                   feature_processors) == 1
        assert isinstance(spec.eval.data_pipeline, TFRecordImagePipelineConfig)
        assert spec.eval.data_pipeline.feature_processors is None
Ejemplo n.º 30
0
 def test_advanced_file_passes(self):
     PolyaxonFile(os.path.abspath('tests/fixtures/advanced_file.yml'))