def start_experiment(experiment):
    # Update experiment status to show that its started
    experiment.set_status(ExperimentLifeCycle.SCHEDULED)

    project = experiment.project
    group = experiment.experiment_group

    job_docker_image = None  # This will force the spawners to use the default docker image
    if experiment.specification.build:
        try:
            image_name, image_tag = get_image_info(build_job=experiment.build_job)
        except ValueError as e:
            _logger.warning('Could not start the experiment, %s', e)
            experiment.set_status(ExperimentLifeCycle.FAILED,
                                  message='External git repo was note found.')
            return
        job_docker_image = '{}:{}'.format(image_name, image_tag)
        _logger.info('Start experiment with built image `%s`', job_docker_image)
    else:
        _logger.info('Start experiment with default image.')

    spawner_class = get_spawner_class(experiment.specification.framework)

    # Use spawners to start the experiment
    spawner = spawner_class(project_name=project.unique_name,
                            experiment_name=experiment.unique_name,
                            experiment_group_name=group.unique_name if group else None,
                            project_uuid=project.uuid.hex,
                            experiment_group_uuid=group.uuid.hex if group else None,
                            experiment_uuid=experiment.uuid.hex,
                            original_name=experiment.original_unique_name,
                            cloning_strategy=experiment.cloning_strategy,
                            spec=experiment.specification,
                            k8s_config=settings.K8S_CONFIG,
                            namespace=settings.K8S_NAMESPACE,
                            in_cluster=True,
                            job_docker_image=job_docker_image,
                            use_sidecar=True,
                            sidecar_config=config.get_requested_params(to_str=True))
    try:
        response = spawner.start_experiment()
    except ApiException as e:
        _logger.warning('Could not start the experiment, please check your polyaxon spec %s', e)
        experiment.set_status(
            ExperimentLifeCycle.FAILED,
            message='Could not start the experiment, encountered a Kubernetes ApiException.')
        return
    except Exception as e:
        _logger.warning('Could not start the experiment, please check your polyaxon spec %s', e)
        experiment.set_status(
            ExperimentLifeCycle.FAILED,
            message='Could not start the experiment encountered an {} exception.'.format(
                e.__class__.__name__
            ))
        return

    handle_experiment(experiment=experiment, spawner=spawner, response=response)
def start_job(job):
    # Update job status to show that its started
    job.set_status(JobLifeCycle.SCHEDULED)

    try:
        image_name, image_tag = get_image_info(build_job=job.build_job)
    except ValueError as e:
        _logger.warning('Could not start the notebook, %s', e)
        job.set_status(JobLifeCycle.FAILED,
                       message='External git repo was note found.')
        return
    job_docker_image = '{}:{}'.format(image_name, image_tag)
    _logger.info('Start notebook with built image `%s`', job_docker_image)

    spawner = JobSpawner(
        project_name=job.project.unique_name,
        project_uuid=job.project.uuid.hex,
        job_name=job.unique_name,
        job_uuid=job.uuid.hex,
        spec=job.specification,
        k8s_config=settings.K8S_CONFIG,
        namespace=settings.K8S_NAMESPACE,
        job_docker_image=job_docker_image,
        in_cluster=True,
        use_sidecar=True,
        sidecar_config=config.get_requested_params(to_str=True))

    try:
        results = spawner.start_job(resources=job.resources,
                                    node_selectors=job.node_selectors)
    except ApiException as e:
        _logger.warning(
            'Could not start job, please check your polyaxon spec %s', e)
        job.set_status(
            JobLifeCycle.FAILED,
            message=
            'Could not start job, encountered a Kubernetes ApiException.')
        return
    except Exception as e:
        _logger.warning(
            'Could not start job, please check your polyaxon spec %s', e)
        job.set_status(
            JobLifeCycle.FAILED,
            message='Could not start job encountered an {} exception.'.format(
                e.__class__.__name__))
        return
    job.definition = get_job_definition(results)
    job.save()
def start_notebook(notebook):
    # Update job status to show that its started
    notebook.set_status(JobLifeCycle.SCHEDULED)

    try:
        image_name, image_tag = get_image_info(build_job=notebook.build_job)
    except ValueError as e:
        _logger.warning('Could not start the notebook, %s', e)
        notebook.set_status(JobLifeCycle.FAILED, message='External git repo was note found.')
        return
    job_docker_image = '{}:{}'.format(image_name, image_tag)
    _logger.info('Start notebook with built image `%s`', job_docker_image)

    spawner = NotebookSpawner(
        project_name=notebook.project.unique_name,
        project_uuid=notebook.project.uuid.hex,
        job_name=notebook.unique_name,
        job_uuid=notebook.uuid.hex,
        k8s_config=settings.K8S_CONFIG,
        namespace=settings.K8S_NAMESPACE,
        in_cluster=True)

    try:
        results = spawner.start_notebook(image=job_docker_image,
                                         resources=notebook.resources,
                                         node_selectors=notebook.node_selectors)
    except ApiException as e:
        _logger.warning('Could not start notebook, please check your polyaxon spec %s', e)
        notebook.set_status(
            JobLifeCycle.FAILED,
            message='Could not start notebook, encountered a Kubernetes ApiException.')
        return
    except Exception as e:
        _logger.warning('Could not start notebook, please check your polyaxon spec %s', e)
        notebook.set_status(
            JobLifeCycle.FAILED,
            message='Could not start notebook encountered an {} exception.'.format(
                e.__class__.__name__
            ))
        return
    notebook.definition = get_job_definition(results)
    notebook.save()
def start_experiment(experiment):
    # Update experiment status to show that its started
    experiment.set_status(ExperimentLifeCycle.SCHEDULED)

    project = experiment.project
    group = experiment.experiment_group

    job_docker_image = None  # This will force the spawners to use the default docker image
    if experiment.specification.build:
        try:
            image_name, image_tag = get_image_info(
                build_job=experiment.build_job)
        except (ValueError, AttributeError):
            _logger.error('Could not start the experiment.', exc_info=True)
            experiment.set_status(ExperimentLifeCycle.FAILED,
                                  message='Image info was not found.')
            return
        job_docker_image = '{}:{}'.format(image_name, image_tag)
        _logger.info('Start experiment with built image `%s`',
                     job_docker_image)
    else:
        _logger.info('Start experiment with default image.')

    spawner_class = get_spawner_class(experiment.specification.framework)
    token_scope = RedisEphemeralTokens.get_scope(experiment.user.id,
                                                 'experiment', experiment.id)

    error = {}
    try:
        # Use spawners to start the experiment
        spawner = spawner_class(
            project_name=project.unique_name,
            experiment_name=experiment.unique_name,
            experiment_group_name=group.unique_name if group else None,
            project_uuid=project.uuid.hex,
            experiment_group_uuid=group.uuid.hex if group else None,
            experiment_uuid=experiment.uuid.hex,
            persistence_config=experiment.persistence_config,
            outputs_refs_experiments=experiment.outputs_refs_experiments,
            outputs_refs_jobs=experiment.outputs_refs_jobs,
            original_name=experiment.original_unique_name,
            cloning_strategy=experiment.cloning_strategy,
            spec=experiment.specification,
            k8s_config=conf.get('K8S_CONFIG'),
            namespace=conf.get('K8S_NAMESPACE'),
            in_cluster=True,
            job_docker_image=job_docker_image,
            use_sidecar=True,
            token_scope=token_scope)
        response = spawner.start_experiment()
        handle_experiment(experiment=experiment,
                          spawner=spawner,
                          response=response)
    except ApiException as e:
        _logger.error(
            'Could not start the experiment, please check your polyaxon spec.',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the experiment, encountered a Kubernetes ApiException.'
        }
    except VolumeNotFoundError as e:
        _logger.error(
            'Could not start the experiment, please check your volume definitions.',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the experiment, '
            'encountered a volume definition problem, %s.' % e
        }
    except Exception as e:
        _logger.error(
            'Could not start the experiment, please check your polyaxon spec',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the experiment encountered an {} exception.'.
            format(e.__class__.__name__)
        }
    finally:
        if error.get('raised'):
            experiment.set_status(ExperimentLifeCycle.FAILED,
                                  message=error.get('message'),
                                  traceback=error.get('traceback'))
Exemple #5
0
def start_notebook(notebook):
    # Update job status to show that its started
    notebook.set_status(JobLifeCycle.SCHEDULED)

    try:
        image_name, image_tag = get_image_info(build_job=notebook.build_job)
    except (ValueError, AttributeError):
        _logger.error('Could not start the notebook.', exc_info=True)
        notebook.set_status(JobLifeCycle.FAILED,
                            message='Image info was not found.')
        return
    job_docker_image = '{}:{}'.format(image_name, image_tag)
    _logger.info('Start notebook with built image `%s`', job_docker_image)

    spawner = NotebookSpawner(project_name=notebook.project.unique_name,
                              project_uuid=notebook.project.uuid.hex,
                              job_name=notebook.unique_name,
                              job_uuid=notebook.uuid.hex,
                              k8s_config=conf.get('K8S_CONFIG'),
                              namespace=conf.get('K8S_NAMESPACE'),
                              job_docker_image=job_docker_image,
                              in_cluster=True)

    error = {}
    try:
        mount_code_in_notebooks = conf.get('MOUNT_CODE_IN_NOTEBOOKS')
        results = spawner.start_notebook(
            persistence_outputs=notebook.persistence_outputs,
            persistence_data=notebook.persistence_data,
            outputs_refs_jobs=notebook.outputs_refs_jobs,
            outputs_refs_experiments=notebook.outputs_refs_experiments,
            resources=notebook.resources,
            secret_refs=notebook.secret_refs,
            configmap_refs=notebook.configmap_refs,
            node_selector=notebook.node_selector,
            affinity=notebook.affinity,
            tolerations=notebook.tolerations,
            backend=notebook.backend,
            mount_code_in_notebooks=mount_code_in_notebooks)
        notebook.definition = get_job_definition(results)
        notebook.save(update_fields=['definition'])
        return
    except ApiException:
        _logger.error(
            'Could not start notebook, please check your polyaxon spec.',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the job, encountered a Kubernetes ApiException.',
        }
    except VolumeNotFoundError as e:
        _logger.error(
            'Could not start the notebook, please check your volume definitions',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the job, encountered a volume definition problem. %s'
            % e,
        }
    except Exception as e:
        _logger.error(
            'Could not start notebook, please check your polyaxon spec.',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start notebook encountered an {} exception.'.format(
                e.__class__.__name__)
        }
    finally:
        if error.get('raised'):
            notebook.set_status(JobLifeCycle.FAILED,
                                message=error.get('message'),
                                traceback=error.get('traceback'))
def start_job(job):
    # Update job status to show that its started
    job.set_status(JobLifeCycle.SCHEDULED)

    try:
        image_name, image_tag = get_image_info(build_job=job.build_job)
    except (ValueError, AttributeError):
        _logger.error('Could not start the job.', exc_info=True)
        job.set_status(JobLifeCycle.FAILED,
                       message='Image info was not found.')
        return
    job_docker_image = '{}:{}'.format(image_name, image_tag)
    _logger.info('Start job with built image `%s`', job_docker_image)

    spawner = JobSpawner(
        project_name=job.project.unique_name,
        project_uuid=job.project.uuid.hex,
        job_name=job.unique_name,
        job_uuid=job.uuid.hex,
        spec=job.specification,
        k8s_config=settings.K8S_CONFIG,
        namespace=settings.K8S_NAMESPACE,
        job_docker_image=job_docker_image,
        in_cluster=True,
        use_sidecar=True,
        sidecar_config=config.get_requested_params(to_str=True))

    error = {}
    try:
        results = spawner.start_job(
            persistence_data=job.persistence_data,
            persistence_outputs=job.persistence_outputs,
            outputs_refs_jobs=job.outputs_refs_jobs,
            outputs_refs_experiments=job.outputs_refs_experiments,
            resources=job.resources,
            node_selector=job.node_selector,
            affinity=job.affinity,
            tolerations=job.tolerations)
        job.definition = get_job_definition(results)
        job.save()
        return
    except ApiException:
        _logger.error('Could not start job, please check your polyaxon spec.',
                      exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the job, encountered a Kubernetes ApiException.',
        }
    except VolumeNotFoundError as e:
        _logger.error(
            'Could not start the job, please check your volume definitions.',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the job, encountered a volume definition problem. %s'
            % e,
        }
    except Exception as e:
        _logger.error('Could not start job, please check your polyaxon spec.',
                      exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start job encountered an {} exception.'.format(
                e.__class__.__name__)
        }
    finally:
        if error.get('raised'):
            job.set_status(JobLifeCycle.FAILED,
                           message=error.get('message'),
                           traceback=error.get('traceback'))
Exemple #7
0
 def test_get_tagged_image(self):
     tagged_image = get_tagged_image(self.build_job)
     image_name = get_image_info(self.build_job)
     assert tagged_image == ':'.join(image_name)
Exemple #8
0
 def test_get_image_image_info(self):
     image_info = get_image_info(self.build_job)
     assert image_info[0] == get_image_name(self.build_job)
     assert image_info[1] == self.build_job.uuid.hex
def start_experiment(experiment):
    # Update experiment status to show that its started
    experiment.set_status(ExperimentLifeCycle.SCHEDULED)

    project = experiment.project
    group = experiment.experiment_group

    job_docker_image = None  # This will force the spawners to use the default docker image
    if experiment.specification.build:
        try:
            image_name, image_tag = get_image_info(
                build_job=experiment.build_job)
        except ValueError as e:
            _logger.warning('Could not start the experiment, %s', e)
            experiment.set_status(ExperimentLifeCycle.FAILED,
                                  message='External git repo was note found.')
            return
        job_docker_image = '{}:{}'.format(image_name, image_tag)
        _logger.info('Start experiment with built image `%s`',
                     job_docker_image)
    else:
        _logger.info('Start experiment with default image.')

    spawner_class = get_spawner_class(experiment.specification.framework)

    # Use spawners to start the experiment
    spawner = spawner_class(
        project_name=project.unique_name,
        experiment_name=experiment.unique_name,
        experiment_group_name=group.unique_name if group else None,
        project_uuid=project.uuid.hex,
        experiment_group_uuid=group.uuid.hex if group else None,
        experiment_uuid=experiment.uuid.hex,
        persistence_config=experiment.persistence_config,
        outputs_refs_experiments=experiment.outputs_refs_experiments,
        outputs_refs_jobs=experiment.outputs_refs_jobs,
        original_name=experiment.original_unique_name,
        cloning_strategy=experiment.cloning_strategy,
        spec=experiment.specification,
        k8s_config=settings.K8S_CONFIG,
        namespace=settings.K8S_NAMESPACE,
        in_cluster=True,
        job_docker_image=job_docker_image,
        use_sidecar=True,
        sidecar_config=config.get_requested_params(to_str=True))
    try:
        response = spawner.start_experiment()
    except ApiException as e:
        _logger.warning(
            'Could not start the experiment, please check your polyaxon spec %s',
            e)
        experiment.set_status(
            ExperimentLifeCycle.FAILED,
            message=
            'Could not start the experiment, encountered a Kubernetes ApiException.'
        )
        return
    except VolumeNotFoundError as e:
        _logger.warning(
            'Could not start the experiment, '
            'please check your volume definitions %s', e)
        experiment.set_status(ExperimentLifeCycle.FAILED,
                              message='Could not start the experiment, '
                              'encountered a volume definition problem. %s' %
                              e)
        return False
    except Exception as e:
        _logger.warning(
            'Could not start the experiment, please check your polyaxon spec %s',
            e)
        experiment.set_status(
            ExperimentLifeCycle.FAILED,
            message='Could not start the experiment encountered an {} exception.'
            .format(e.__class__.__name__))
        return

    handle_experiment(experiment=experiment,
                      spawner=spawner,
                      response=response)
def start_notebook(notebook):
    # Update job status to show that its started
    notebook.set_status(JobLifeCycle.SCHEDULED)

    try:
        image_name, image_tag = get_image_info(build_job=notebook.build_job)
    except (ValueError, AttributeError):
        _logger.error('Could not start the notebook.', exc_info=True)
        notebook.set_status(JobLifeCycle.FAILED,
                            message='Image info was not found.')
        return
    job_docker_image = '{}:{}'.format(image_name, image_tag)
    _logger.info('Start notebook with built image `%s`', job_docker_image)

    spawner = NotebookSpawner(project_name=notebook.project.unique_name,
                              project_uuid=notebook.project.uuid.hex,
                              job_name=notebook.unique_name,
                              job_uuid=notebook.uuid.hex,
                              k8s_config=settings.K8S_CONFIG,
                              namespace=settings.K8S_NAMESPACE,
                              in_cluster=True)

    try:
        allow_commits = False
        if settings.REPOS_CLAIM_NAME or notebook.node_selector:
            allow_commits = True
        results = spawner.start_notebook(
            image=job_docker_image,
            persistence_outputs=notebook.persistence_outputs,
            persistence_data=notebook.persistence_data,
            outputs_refs_jobs=notebook.outputs_refs_jobs,
            outputs_refs_experiments=notebook.outputs_refs_experiments,
            resources=notebook.resources,
            node_selector=notebook.node_selector,
            affinity=notebook.affinity,
            tolerations=notebook.tolerations,
            allow_commits=allow_commits)
    except ApiException:
        _logger.error(
            'Could not start notebook, please check your polyaxon spec.',
            exc_info=True)
        notebook.set_status(
            JobLifeCycle.FAILED,
            message=
            'Could not start notebook, encountered a Kubernetes ApiException.')
        return
    except VolumeNotFoundError as e:
        _logger.error(
            'Could not start the notebook, please check your volume definitions',
            exc_info=True)
        notebook.set_status(JobLifeCycle.FAILED,
                            message='Could not start the notebook, '
                            'encountered a volume definition problem. %s' % e)
        return False
    except Exception as e:
        _logger.error(
            'Could not start notebook, please check your polyaxon spec.',
            exc_info=True)
        notebook.set_status(
            JobLifeCycle.FAILED,
            message='Could not start notebook encountered an {} exception.'.
            format(e.__class__.__name__))
        return
    notebook.definition = get_job_definition(results)
    notebook.save()
 def test_get_tagged_image(self):
     tagged_image = get_tagged_image(self.build_job)
     image_name = get_image_info(self.build_job)
     assert tagged_image == ':'.join(image_name)
 def test_get_image_image_info(self):
     image_info = get_image_info(self.build_job)
     assert image_info[0] == get_image_name(self.build_job)
     assert image_info[1] == self.build_job.uuid.hex