def test_op_specification(self): config_dict = { "version": 1.1, "kind": "operation", "name": "foo", "description": "a description", "tags": ["value"], "params": {"param1": {"value": "foo"}, "param2": {"value": "bar"}}, "trigger": "all_succeeded", "component": { "name": "build-template", "tags": ["kaniko"], "run": { "kind": V1RunKind.JOB, "container": {"image": "test"}, "init": [ { "container": { "name": "polyaxon-init", "image": "foo", "args": "dev", } } ], "sidecars": [{"name": "foo", "image": "foo", "args": "dev"}], }, }, } op_config = V1Operation.read(values=config_dict) run_config = OperationSpecification.compile_operation(op_config) assert run_config.name == "foo" assert run_config.description == "a description" assert run_config.tags == ["kaniko", "value"] assert [ { "container": { "name": i.container.name, "image": i.container.image, "args": i.container.args, } } for i in run_config.run.init ] == [{"container": {"name": "polyaxon-init", "image": "foo", "args": "dev"}}] env = { "runPatch": { "container": { "resources": { "requests": {"gpu": 1, "tpu": 1}, "limits": {"gpu": 1, "tpu": 1}, } } } } run_config = OperationSpecification.compile_operation(op_config, env) assert ( run_config.run.container.resources == env["runPatch"]["container"]["resources"] )
def test_non_yaml_spec(self): config = ",sdf;ldjks" with self.assertRaises(PolyaxonSchemaError): OperationSpecification.read(config) with self.assertRaises(PolyaxonSchemaError): ComponentSpecification.read(config)
def test_op_specification_with_override_info(self): config_dict = { "version": 1.1, "kind": "operation", "name": "foo", "description": "a description", "tags": ["value"], "params": {"param1": {"value": "foo"}, "param2": {"value": "bar"}}, "trigger": "all_succeeded", "component": { "name": "build-template", "tags": ["kaniko"], "run": { "kind": V1RunKind.JOB, "init": [ { "connection": "foo", "container": { "name": "polyaxon-init", "args": "--branch=dev", }, } ], "container": {"name": "polyaxon-main", "image": "test"}, }, }, } op_config = V1Operation.read(values=config_dict) assert op_config.name == "foo" assert op_config.description == "a description" assert op_config.tags == ["value"] run_config = OperationSpecification.compile_operation(op_config) assert run_config.name == "foo" assert run_config.description == "a description" assert run_config.tags == ["value"] assert [i.to_light_dict() for i in run_config.run.init] == [ { "connection": "foo", "container": {"name": "polyaxon-init", "args": "--branch=dev"}, } ] env = { "run": { "container": { "resources": { "requests": {"gpu": 1, "tpu": 1}, "limits": {"gpu": 1, "tpu": 1}, } } } } run_config = OperationSpecification.compile_operation(op_config, env) assert ( run_config.run.container.resources == env["run"]["container"]["resources"] )
def create_run(): click.echo("Creating a run.") try: compiled_operation = OperationSpecification.compile_operation( op_spec) run_name = compiled_operation.name or name resource = compiler.make( owner_name=owner, project_name=project_name, project_uuid=project_name, run_uuid=run_name, run_name=name, run_path=run_name, compiled_operation=compiled_operation, params=op_spec.params, default_sa=settings.AGENT_CONFIG.runs_sa, ) Spawner(namespace=settings.AGENT_CONFIG.namespace).create( run_uuid=run_name, run_kind=compiled_operation.get_run_kind(), resource=resource, ) # cache.cache(config_manager=RunConfigManager, response=response) run_job_uid = get_resource_name(run_name) Printer.print_success( "A new run `{}` was created".format(run_job_uid)) except (PolyaxonCompilerError, PolyaxonK8SError, PolypodException) as e: handle_cli_error(e, message="Could not create a run.") sys.exit(1)
def get_op_specification( self, params=None, profile=None, queue=None, nocache=None ) -> V1Operation: job_data = {"version": self.config.version, "kind": kinds.OPERATION} if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params) ) job_data["params"] = params if profile: job_data["profile"] = profile if queue: job_data["queue"] = queue if nocache is not None: job_data["cache"] = {"disable": nocache} if self.config.kind == kinds.OPERATION: config = get_specification(data=[self.config.to_dict(), job_data]) else: job_data["component"] = self.config.to_dict() config = get_specification(data=[job_data]) params = copy.deepcopy(config.params) # Sanity check if params were passed run_config = OperationSpecification.compile_operation(config) run_config.validate_params(params=params, is_template=False) if run_config.is_dag_run: CompiledOperationSpecification.apply_context(run_config) return config
def test_matrix_file_with_required_inputs_passes(self): plx_file = check_polyaxonfile( polyaxonfile=os.path.abspath( "tests/fixtures/typing/matrix_job_required_inputs_file.yml"), is_cli=False, ) run_config = OperationSpecification.compile_operation(plx_file) run_config = CompiledOperationSpecification.apply_context(run_config) assert run_config.version == 1.1 assert isinstance(run_config.matrix, V1Hyperband) assert isinstance(run_config.matrix.params["lr"], V1HpLinSpace) assert isinstance(run_config.matrix.params["loss"], V1HpChoice) assert run_config.matrix.params["lr"].to_dict() == { "kind": "linspace", "value": { "start": 0.01, "stop": 0.1, "num": 5 }, } assert run_config.matrix.params["loss"].to_dict() == { "kind": "choice", "value": ["MeanSquaredError", "AbsoluteDifference"], } assert run_config.matrix.concurrency == 2 assert isinstance(run_config.matrix, V1Hyperband) assert run_config.matrix.kind == V1Hyperband.IDENTIFIER assert run_config.matrix.early_stopping is None
def test_mapping_early_stopping_file_passes(self): plx_file = check_polyaxonfile( polyaxonfile=os.path.abspath( "tests/fixtures/plain/mapping_job_file_early_stopping.yml"), is_cli=False, to_op=False, ) # Get compiled_operation data config_run = OperationSpecification.compile_operation(plx_file) config_run = CompiledOperationSpecification.apply_context(config_run) assert config_run.version == 1.05 assert isinstance(config_run.parallel, V1Mapping) assert config_run.parallel.values == [ { "lr": 0.001, "loss": "MeanSquaredError" }, { "lr": 0.1, "loss": "AbsoluteDifference" }, ] assert config_run.parallel.concurrency == 2 assert isinstance(config_run.parallel, V1Mapping) assert config_run.parallel.kind == V1Mapping.IDENTIFIER assert len(config_run.parallel.early_stopping) == 1 assert isinstance(config_run.parallel.early_stopping[0], V1MetricEarlyStopping)
def test_matrix_file_passes_int_float_types(self): plx_file = check_polyaxonfile( polyaxonfile=os.path.abspath( "tests/fixtures/plain/matrix_job_file_with_int_float_types.yml" ), is_cli=False, ) # Get compiled_operation data run_config = OperationSpecification.compile_operation(plx_file) run_config = CompiledOperationSpecification.apply_context(run_config) assert run_config.version == 1.05 assert isinstance(run_config.parallel, V1GridSearch) assert isinstance(run_config.parallel.params["param1"], V1HpChoice) assert isinstance(run_config.parallel.params["param2"], V1HpChoice) assert run_config.parallel.params["param1"].to_dict() == { "kind": "choice", "value": [1, 2], } assert run_config.parallel.params["param2"].to_dict() == { "kind": "choice", "value": [3.3, 4.4], } assert run_config.parallel.concurrency == 2 assert isinstance(run_config.parallel, V1GridSearch) assert run_config.parallel.kind == V1GridSearch.IDENTIFIER assert run_config.parallel.early_stopping is None
def test_matrix_early_stopping_file_passes(self): plx_file = check_polyaxonfile( polyaxonfile=os.path.abspath( "tests/fixtures/plain/matrix_job_file_early_stopping.yml"), is_cli=False, to_op=False, ) # Get compiled_operation data run_config = OperationSpecification.compile_operation(plx_file) run_config = CompiledOperationSpecification.apply_context(run_config) assert run_config.version == 1.05 assert isinstance(run_config.parallel, V1RandomSearch) assert isinstance(run_config.parallel.params["lr"], V1HpLinSpace) assert isinstance(run_config.parallel.params["loss"], V1HpChoice) assert run_config.parallel.params["lr"].to_dict() == { "kind": "linspace", "value": { "start": 0.01, "stop": 0.1, "num": 5 }, } assert run_config.parallel.params["loss"].to_dict() == { "kind": "choice", "value": ["MeanSquaredError", "AbsoluteDifference"], } assert run_config.parallel.concurrency == 2 assert run_config.parallel.num_runs == 300 assert isinstance(run_config.parallel, V1RandomSearch) assert run_config.parallel.kind == V1RandomSearch.IDENTIFIER assert len(run_config.parallel.early_stopping) == 1 assert isinstance(run_config.parallel.early_stopping[0], V1MetricEarlyStopping)
def test_op_specification_with_nocache(self): config_dict = { "version": 1.1, "kind": "operation", "name": "foo", "description": "a description", "tags": ["value"], "cache": {"disable": True, "ttl": 12}, "params": {"param1": {"value": "foo"}, "param2": {"value": "bar"}}, "trigger": "all_succeeded", "component": { "name": "build-template", "tags": ["kaniko"], "run": { "kind": V1RunKind.JOB, "container": {"name": "polyaxon-main", "image": "test"}, "init": [{"connection": "some-connection"}], }, }, } op_config = V1Operation.read(values=config_dict) run_config = OperationSpecification.compile_operation(op_config) assert run_config.name == "foo" assert run_config.description == "a description" assert run_config.tags == ["kaniko", "value"] assert run_config.cache.to_dict() == {"disable": True, "ttl": 12} assert [i.to_light_dict() for i in run_config.run.init] == [ {"connection": "some-connection"} ] env = { "runPatch": { "container": { "resources": { "requests": {"gpu": 1, "tpu": 1}, "limits": {"gpu": 1, "tpu": 1}, } } } } run_config = OperationSpecification.compile_operation(op_config, env) assert ( run_config.run.container.resources == env["runPatch"]["container"]["resources"] )
def test_simple_file_with_run_patch_passes(self): op_config = OperationSpecification.read( os.path.abspath("tests/fixtures/plain/simple_job_run_patch.yml"), ) assert op_config.version == 1.1 assert op_config.tags is None assert op_config.run_patch["environment"]["annotations"] == { "tf-version.cloud-tpus.google.com": "2.2" } assert len(op_config.component.run.volumes) == 1 assert op_config.component.run.to_dict()["volumes"][0] == { "name": "foo", "secret": {"secretName": "mysecret"}, } assert op_config.component.run.container.image == "python-with-boto3" assert op_config.component.run.container.command == "python download-s3-bucket" assert op_config.component.run.container.resources == { "requests": {"nvidia.com/gpu": 1}, "limits": {"nvidia.com/gpu": 1}, } assert op_config.component.run.container.volume_mounts == [ {"name": "foo", "mount_path": "~/.aws/credentials", "readOnly": True} ] run_config = OperationSpecification.compile_operation(op_config) assert run_config.version == 1.1 assert run_config.tags is None assert len(run_config.run.volumes) == 1 assert run_config.run.environment.annotations == { "tf-version.cloud-tpus.google.com": "2.2" } assert run_config.run.to_dict()["volumes"][0] == { "name": "foo", "secret": {"secretName": "mysecret"}, } assert run_config.run.container.image == "python-with-boto3" assert run_config.run.container.command == "python download-s3-bucket" assert run_config.run.container.resources == { "requests": {"nvidia.com/gpu": 1}, "limits": {"nvidia.com/gpu": 1}, } assert run_config.run.container.volume_mounts == [ {"name": "foo", "mount_path": "~/.aws/credentials", "readOnly": True} ]
def test_matrix_file_passes(self): plx_file = check_polyaxonfile( polyaxonfile=os.path.abspath( "tests/fixtures/plain/matrix_job_file.yml"), is_cli=False, ) run_config = OperationSpecification.compile_operation(plx_file) run_config = CompiledOperationSpecification.apply_operation_contexts( run_config) assert run_config.version == 1.1 assert isinstance(run_config.matrix, V1Hyperband) assert isinstance(run_config.matrix.params["lr"], V1HpLinSpace) assert isinstance(run_config.matrix.params["loss"], V1HpChoice) assert run_config.matrix.params["lr"].to_dict() == { "kind": "linspace", "value": { "start": 0.01, "stop": 0.1, "num": 5 }, } assert run_config.matrix.params["loss"].to_dict() == { "kind": "choice", "value": ["MeanSquaredError", "AbsoluteDifference"], } assert run_config.matrix.params["normal_rate"].to_dict() == { "kind": "normal", "value": { "loc": 0, "scale": 0.9 }, } assert run_config.matrix.params["dropout"].to_dict() == { "kind": "qloguniform", "value": { "high": 0.8, "low": 0, "q": 0.1 }, } assert run_config.matrix.params["activation"].to_dict() == { "kind": "pchoice", "value": [["relu", 0.1], ["sigmoid", 0.8]], } assert run_config.matrix.params["model"].to_dict() == { "kind": "choice", "value": ["CDNA", "DNA", "STP"], } assert run_config.matrix.concurrency == 2 assert isinstance(run_config.matrix, V1Hyperband) assert run_config.matrix.kind == V1Hyperband.IDENTIFIER assert run_config.matrix.early_stopping is None
def get_op_specification( config: Union[V1Component, V1Operation] = None, hub: str = None, params: Dict = None, profile: str = None, queue: str = None, nocache: bool = None, path_context: str = None, validate_params: bool = True, ) -> V1Operation: job_data = { "version": config.version if config else pkg.SCHEMA_VERSION, "kind": kinds.OPERATION, } if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params) ) job_data["params"] = params if profile: job_data["profile"] = profile if queue: # Check only get_queue_info(queue) job_data["queue"] = queue if nocache is not None: job_data["cache"] = {"disable": nocache} if config and config.kind == kinds.COMPONENT: job_data["component"] = config.to_dict() config = get_specification(data=[job_data]) elif config and config.kind == kinds.OPERATION: config = get_specification(data=[config.to_dict(), job_data]) elif hub: job_data["hubRef"] = hub config = get_specification(data=[job_data]) if hub and config.hub_ref is None: config.hub_ref = hub hub = config.hub_ref public_hub = config.has_public_hub_reference params = copy.deepcopy(config.params) # Sanity check if params were passed and we are not dealing with a hub component if validate_params and not (hub and not public_hub): run_config = OperationSpecification.compile_operation(config) run_config.validate_params(params=params, is_template=False) if run_config.is_dag_run: run_config.run.set_path_context(path_context) CompiledOperationSpecification.apply_operation_contexts(run_config) return config
def test_cron_pipeline(self): plx_file = PolyaxonFile( os.path.abspath("tests/fixtures/pipelines/simple_cron_pipeline.yml") ) # Get compiled_operation data run_config = OperationSpecification.compile_operation(plx_file.config) run_config = CompiledOperationSpecification.apply_context(run_config) assert run_config.run is not None assert len(run_config.run.operations) == 1 assert run_config.run.operations[0].name == "cron-task" assert run_config.schedule is not None assert run_config.schedule.kind == "cron" assert run_config.schedule.cron == "0 0 * * *"
def test_interval_pipeline(self): plx_file = PolyaxonFile( os.path.abspath("tests/fixtures/pipelines/simple_recurrent_pipeline.yml") ) # Get compiled_operation data run_config = OperationSpecification.compile_operation(plx_file.config) run_config = CompiledOperationSpecification.apply_context(run_config) assert run_config.run is not None assert len(run_config.run.operations) == 1 assert run_config.run.operations[0].name == "recurrent-task" assert run_config.schedule is not None assert run_config.schedule.kind == "interval" assert run_config.schedule.start_at.year == 2019 assert run_config.schedule.frequency.seconds == 120 assert run_config.schedule.depends_on_past is True assert run_config.schedule is not None
def get_op_specification( config: Union[V1Component, V1Operation] = None, hub=None, params=None, profile=None, queue=None, nocache=None, ) -> V1Operation: job_data = { "version": config.version if config else pkg.SCHEMA_VERSION, "kind": kinds.OPERATION } if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params)) job_data["params"] = params if profile: job_data["profile"] = profile if queue: job_data["queue"] = queue if nocache is not None: job_data["cache"] = {"disable": nocache} if hub: job_data["hubRef"] = hub config = get_specification(data=[job_data]) elif config.kind == kinds.OPERATION: config = get_specification(data=[config.to_dict(), job_data]) else: job_data["component"] = config.to_dict() config = get_specification(data=[job_data]) params = copy.deepcopy(config.params) # Sanity check if params were passed and we are not dealing with a hub component if not hub: run_config = OperationSpecification.compile_operation(config) run_config.validate_params(params=params, is_template=False) if run_config.is_dag_run: CompiledOperationSpecification.apply_context(run_config) return config
def test_build_run_pipeline(self): run_config = V1CompiledOperation.read( [ os.path.abspath("tests/fixtures/pipelines/build_run_pipeline.yml"), {"kind": "compiled_operation"}, ] ) run_config = CompiledOperationSpecification.apply_context(run_config) assert len(run_config.run.operations) == 2 assert run_config.run.operations[0].name == "build" assert run_config.run.operations[1].name == "run" assert run_config.is_dag_run is True assert run_config.has_pipeline is True assert run_config.schedule is None assert len(run_config.run.components) == 2 assert run_config.run.components[0].name == "experiment-template" assert run_config.run.components[0].termination.to_dict() == {"maxRetries": 2} assert run_config.run.components[0].run.to_dict() == { "kind": V1RunKind.JOB, "environment": { "nodeSelector": {"polyaxon": "experiments"}, "serviceAccountName": "service", "imagePullSecrets": ["secret1", "secret2"], }, "container": { "image": "{{ image }}", "command": ["python3", "main.py"], "args": "--lr={{ lr }}", "name": "polyaxon-main", "resources": {"requests": {"cpu": 1}}, }, } assert run_config.run.components[1].name == "build-template" assert run_config.run.components[1].run.container.image == "base" assert run_config.run.operations[0].name == "build" # Create a an op spec run_config.run.set_op_component("run") assert run_config.run.operations[1].has_component_reference is True job_config = run_config.run.get_op_spec_by_index(1) assert {p: job_config.params[p].to_dict() for p in job_config.params} == { "image": {"value": "outputs.docker-image", "ref": "ops.build"}, "lr": {"value": 0.001}, } run_config = OperationSpecification.compile_operation(job_config) run_config.apply_params({"image": {"value": "foo"}, "lr": {"value": 0.001}}) run_config = CompiledOperationSpecification.apply_context(run_config) run_config = CompiledOperationSpecification.apply_run_contexts(run_config) assert run_config.termination.to_dict() == {"maxRetries": 2} assert run_config.run.to_dict() == { "kind": V1RunKind.JOB, "environment": { "nodeSelector": {"polyaxon": "experiments"}, "serviceAccountName": "service", "imagePullSecrets": ["secret1", "secret2"], }, "container": { "image": "foo", "command": ["python3", "main.py"], "args": "--lr=0.001", "name": "polyaxon-main", "resources": {"requests": {"cpu": 1}}, }, }
def test_job_specification_raises_for_missing_container_section(self): with self.assertRaises(PolyaxonfileError): OperationSpecification.read( os.path.abspath( "tests/fixtures/plain/job_missing_container.yml"))
def get_op_specification( config: Union[V1Component, V1Operation] = None, hub: str = None, params: Dict = None, presets: List[str] = None, queue: str = None, nocache: bool = None, cache: bool = None, validate_params: bool = True, preset_files: List[str] = None, git_init: V1Init = None, ) -> V1Operation: if cache and nocache: raise PolyaxonfileError("Received both cache and nocache") job_data = { "version": config.version if config else pkg.SCHEMA_VERSION, "kind": kinds.OPERATION, } if params: if not isinstance(params, Mapping): raise PolyaxonfileError( "Params: `{}` must be a valid mapping".format(params)) job_data["params"] = params if presets: job_data["presets"] = presets if queue: # Check only get_queue_info(queue) job_data["queue"] = queue if cache: job_data["cache"] = {"disable": False} if nocache: job_data["cache"] = {"disable": True} if config and config.kind == kinds.COMPONENT: job_data["component"] = config.to_dict() config = get_specification(data=[job_data]) elif config and config.kind == kinds.OPERATION: config = get_specification(data=[config.to_dict(), job_data]) elif hub: job_data["hubRef"] = hub config = get_specification(data=[job_data]) if hub and config.hub_ref is None: config.hub_ref = hub # Check if there's presets for preset_plx_file in preset_files: preset_plx_file = OperationSpecification.read(preset_plx_file, is_preset=True) config = config.patch(preset_plx_file, strategy=preset_plx_file.patch_strategy) # Turn git_init to a pre_merge preset if git_init: git_preset = V1Operation(run_patch={"init": [git_init.to_dict()]}, is_preset=True) config = config.patch(git_preset, strategy=V1PatchStrategy.PRE_MERGE) # Sanity check if params were passed and we are not dealing with a hub component params = copy.deepcopy(config.params) if validate_params: # Avoid in-place patch run_config = get_specification(config.to_dict()) run_config = OperationSpecification.compile_operation(run_config) run_config.validate_params(params=params, is_template=False) if run_config.is_dag_run: CompiledOperationSpecification.apply_operation_contexts(run_config) return config