def test_get_registry_context_in_cluster(self): spec = get_registry_context(build_backend=None) assert spec.host == 'registry_localhost' assert spec.secret is None assert spec.secret_items is None assert spec.insecure is True spec = get_registry_context(build_backend=BuildBackend.NATIVE) assert spec.host == 'registry_localhost' assert spec.secret is None assert spec.secret_items is None assert spec.insecure is True spec = get_registry_context(build_backend=BuildBackend.KANIKO) assert spec.host == 'registry_host' assert spec.secret is None assert spec.secret_items is None assert spec.insecure is True # Creating a registry access and using it with in cluster registry secret = K8SSecret.objects.create(owner=self.owner, name='my_secret', k8s_ref='my_secret') registry_access = RegistryAccess.objects.create(owner=self.owner, k8s_secret=secret, name='d-registry') conf.set(ACCESS_REGISTRY, registry_access.id) spec = get_registry_context(build_backend=BuildBackend.KANIKO) assert spec.host == 'registry_host' assert spec.secret == secret.k8s_ref assert spec.secret_items == secret.items assert spec.insecure is True
def test_get_registry_context_in_cluster(self): spec = get_registry_context(build_backend=None) assert spec.host == 'registry_localhost' assert spec.secret is None assert spec.secret_items is None assert spec.insecure is True spec = get_registry_context(build_backend=BuildBackend.NATIVE) assert spec.host == 'registry_localhost' assert spec.secret is None assert spec.secret_items is None assert spec.insecure is True spec = get_registry_context(build_backend=BuildBackend.KANIKO) assert spec.host == 'registry_host' assert spec.secret is None assert spec.secret_items is None assert spec.insecure is True
def test_get_registry_context_no_config_not_in_cluster(self): with self.assertRaises(ContainerRegistryError): get_registry_context(build_backend=None) with self.assertRaises(ContainerRegistryError): get_registry_context(build_backend=BuildBackend.NATIVE) with self.assertRaises(ContainerRegistryError): get_registry_context(build_backend=BuildBackend.KANIKO)
def test_get_external_registry_context(self): secret = K8SSecret.objects.create(owner=self.owner, name='my_secret', k8s_ref='my_secret') registry_access = RegistryAccess.objects.create(owner=self.owner, host='https://index.docker.io/v1/foo', k8s_secret=secret, name='d-registry') conf.set(ACCESS_REGISTRY, registry_access.id) spec = get_registry_context(build_backend=None) assert spec.host == 'https://index.docker.io/v1/foo' assert spec.secret == secret.k8s_ref assert spec.secret_items == secret.items assert spec.insecure is False
def test_get_external_registry_context(self): secret = K8SSecret.objects.create(owner=self.owner, name='my_secret', secret_ref='my_secret') registry_access = RegistryAccess.objects.create( owner=self.owner, host='https://index.docker.io/v1/foo', k8s_secret=secret, name='d-registry') Config.objects.create(owner=self.owner, registry_access=registry_access) spec = get_registry_context(build_backend=None) assert spec.host == 'https://index.docker.io/v1/foo' assert spec.secret == secret.secret_ref assert spec.secret_keys == secret.keys assert spec.insecure is False
def start_notebook(notebook): # Update job status to show that its started notebook.set_status(JobLifeCycle.SCHEDULED) try: registry_spec = get_registry_context(build_backend=None) except ContainerRegistryError: notebook.set_status( JobLifeCycle.FAILED, message= 'Could not start the notebook, please check your registry configuration.' ) return try: image_name, image_tag = get_image_info( build_job=notebook.build_job, registry_host=registry_spec.host) except (ValueError, AttributeError): _logger.error('Could not start the notebook.', exc_info=True) notebook.set_status(JobLifeCycle.FAILED, message='Image info was not found.') return job_docker_image = '{}:{}'.format(image_name, image_tag) _logger.info('Start notebook with built image `%s`', job_docker_image) spawner = NotebookSpawner(project_name=notebook.project.unique_name, project_uuid=notebook.project.uuid.hex, job_name=notebook.unique_name, job_uuid=notebook.uuid.hex, k8s_config=conf.get(K8S_CONFIG), namespace=conf.get(K8S_NAMESPACE), job_docker_image=job_docker_image, in_cluster=True) error = {} try: mount_code_in_notebooks = conf.get(NOTEBOOKS_MOUNT_CODE) results = spawner.start_notebook( persistence_outputs=notebook.persistence_outputs, persistence_data=notebook.persistence_data, outputs_refs_jobs=notebook.outputs_refs_jobs, outputs_refs_experiments=notebook.outputs_refs_experiments, resources=notebook.resources, labels=notebook.labels, annotations=notebook.annotations, secret_refs=notebook.secret_refs, config_map_refs=notebook.config_map_refs, node_selector=notebook.node_selector, affinity=notebook.affinity, tolerations=notebook.tolerations, backend=notebook.backend, max_restarts=get_max_restart(notebook.max_restarts, conf.get(MAX_RESTARTS_NOTEBOOKS)), reconcile_url=get_notebook_reconcile_url(notebook.unique_name), mount_code_in_notebooks=mount_code_in_notebooks) notebook.definition = get_job_definition(results) notebook.save(update_fields=['definition']) return except ApiException: _logger.error( 'Could not start notebook, please check your polyaxon spec.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the job, encountered a Kubernetes ApiException.', } except StoreNotFoundError as e: _logger.error( 'Could not start the notebook, please check your volume definitions', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the job, encountered a volume definition problem. %s' % e, } except Exception as e: _logger.error( 'Could not start notebook, please check your polyaxon spec.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start notebook encountered an {} exception.'.format( e.__class__.__name__) } finally: if error.get('raised'): notebook.set_status(JobLifeCycle.FAILED, message=error.get('message'), traceback=error.get('traceback'))
def start_dockerizer(build_job): # Update job status to show that its started build_job.set_status(JobLifeCycle.SCHEDULED) spawner_class = get_spawner_class(build_job.backend) try: registry_spec = get_registry_context(build_backend=build_job.backend) except ContainerRegistryError: build_job.set_status( JobLifeCycle.FAILED, message= 'Could not start the dockerizer job, please check your registry configuration.' ) return spawner = spawner_class(project_name=build_job.project.unique_name, project_uuid=build_job.project.uuid.hex, job_name=build_job.unique_name, job_uuid=build_job.uuid.hex, commit=build_job.commit, from_image=build_job.build_image, dockerfile_path=build_job.build_dockerfile, context_path=build_job.build_context, image_tag=build_job.uuid.hex, image_name=get_image_name( build_job=build_job, registry_host=registry_spec.host), build_steps=build_job.build_steps, env_vars=build_job.build_env_vars, nocache=build_job.build_nocache, insecure=registry_spec.insecure, creds_secret_ref=registry_spec.secret, creds_secret_items=registry_spec.secret_items, spec=build_job.specification, k8s_config=conf.get(K8S_CONFIG), namespace=conf.get(K8S_NAMESPACE), in_cluster=True, use_sidecar=True) error = {} try: results = spawner.start_dockerizer( resources=build_job.resources, node_selector=build_job.node_selector, affinity=build_job.affinity, tolerations=build_job.tolerations) auditor.record(event_type=BUILD_JOB_STARTED, instance=build_job) build_job.definition = get_job_definition(results) build_job.save(update_fields=['definition']) return True except ApiException: _logger.error( 'Could not start build job, please check your polyaxon spec', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start build job, encountered a Kubernetes ApiException.' } except VolumeNotFoundError as e: _logger.error( 'Could not start build job, please check your volume definitions.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start build job, encountered a volume definition problem. %s' % e } except Exception as e: _logger.error( 'Could not start build job, please check your polyaxon spec.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start build job encountered an {} exception.'.format( e.__class__.__name__) } finally: if error.get('raised'): build_job.set_status(JobLifeCycle.FAILED, message=error.get('message'), traceback=error.get('traceback'))
def start_job(job): # Update job status to show that its started job.set_status(JobLifeCycle.SCHEDULED) try: registry_spec = get_registry_context(build_backend=None) except ContainerRegistryError: job.set_status( JobLifeCycle.FAILED, message= 'Could not start the job, please check your registry configuration.' ) return try: image_name, image_tag = get_image_info( build_job=job.build_job, registry_host=registry_spec.host) except (ValueError, AttributeError): _logger.error('Could not start the job.', exc_info=True) job.set_status(JobLifeCycle.FAILED, message='Image info was not found.') return job_docker_image = '{}:{}'.format(image_name, image_tag) _logger.info('Start job with built image `%s`', job_docker_image) spawner = JobSpawner(project_name=job.project.unique_name, project_uuid=job.project.uuid.hex, job_name=job.unique_name, job_uuid=job.uuid.hex, spec=job.specification, k8s_config=conf.get(K8S_CONFIG), namespace=conf.get(K8S_NAMESPACE), job_docker_image=job_docker_image, in_cluster=True, use_sidecar=True) error = {} try: results = spawner.start_job( persistence_data=job.persistence_data, persistence_outputs=job.persistence_outputs, outputs_refs_jobs=job.outputs_refs_jobs, outputs_refs_experiments=job.outputs_refs_experiments, resources=job.resources, node_selector=job.node_selector, affinity=job.affinity, tolerations=job.tolerations) job.definition = get_job_definition(results) job.save(update_fields=['definition']) return except ApiException: _logger.error('Could not start job, please check your polyaxon spec.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the job, encountered a Kubernetes ApiException.', } except VolumeNotFoundError as e: _logger.error( 'Could not start the job, please check your volume definitions.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the job, encountered a volume definition problem. %s' % e, } except Exception as e: _logger.error('Could not start job, please check your polyaxon spec.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start job encountered an {} exception.'.format( e.__class__.__name__) } finally: if error.get('raised'): job.set_status(JobLifeCycle.FAILED, message=error.get('message'), traceback=error.get('traceback'))
def start_experiment(experiment): # Update experiment status to show that its started experiment.set_status(ExperimentLifeCycle.SCHEDULED) project = experiment.project group = experiment.experiment_group job_docker_image = None # This will force the spawners to use the default docker image if experiment.specification.build: try: registry_spec = get_registry_context(build_backend=None) except ContainerRegistryError: experiment.set_status( ExperimentLifeCycle.FAILED, message= 'Could not start the experiment, please check your registry configuration.' ) return try: image_name, image_tag = get_image_info( build_job=experiment.build_job, registry_host=registry_spec.host) except (ValueError, AttributeError): _logger.error('Could not start the experiment.', exc_info=True) experiment.set_status(ExperimentLifeCycle.FAILED, message='Image info was not found.') return job_docker_image = '{}:{}'.format(image_name, image_tag) _logger.info('Start experiment with built image `%s`', job_docker_image) else: _logger.info('Start experiment with default image.') spawner_class = get_spawner_class(specification=experiment.specification) # token_scope = RedisEphemeralTokens.get_scope(experiment.user.id, # 'experiment', # experiment.id) error = {} try: # Use spawners to start the experiment spawner = spawner_class( project_name=project.unique_name, experiment_name=experiment.unique_name, experiment_group_name=group.unique_name if group else None, project_uuid=project.uuid.hex, experiment_group_uuid=group.uuid.hex if group else None, experiment_uuid=experiment.uuid.hex, persistence_config=experiment.persistence_config, outputs_refs_experiments=experiment.outputs_refs_experiments, outputs_refs_jobs=experiment.outputs_refs_jobs, original_name=experiment.original_unique_name, cloning_strategy=experiment.cloning_strategy, spec=experiment.specification, k8s_config=conf.get(K8S_CONFIG), namespace=conf.get(K8S_NAMESPACE), in_cluster=True, job_docker_image=job_docker_image, use_sidecar=True) # Create db jobs try: create_experiment_jobs(experiment=experiment, spawner=spawner) except IntegrityError: # TODO: Add better handling for this. return # Create k8s jobs response = spawner.start_experiment() # handle response handle_experiment(experiment=experiment, response=response) experiment.set_status(ExperimentLifeCycle.STARTING) except ApiException as e: _logger.error( 'Could not start the experiment, please check your polyaxon spec.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the experiment, encountered a Kubernetes ApiException.' } except StoreNotFoundError as e: _logger.error( 'Could not start the experiment, please check your volume definitions.', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the experiment, ' 'encountered a volume definition problem, %s.' % e } except Exception as e: _logger.error( 'Could not start the experiment, please check your polyaxon spec', exc_info=True) error = { 'raised': True, 'traceback': traceback.format_exc(), 'message': 'Could not start the experiment encountered an {} exception.'. format(e.__class__.__name__) } finally: if error.get('raised'): experiment.set_status(ExperimentLifeCycle.FAILED, message=error.get('message'), traceback=error.get('traceback'))