Ejemplo n.º 1
0
    def get_env_vars(self):
        env_vars = get_service_env_vars(namespace=self.namespace)
        for k, v in config.get_requested_data(to_str=True).items():
            env_vars.append(get_env_var(name=k, value=v))

        # Add private registries secrets keys
        for key in config.keys_startswith(settings.PRIVATE_REGISTRIES_PREFIX):
            env_vars.append(get_from_secret(key, key))

        # Add repos access token secret key
        if settings.REPOS_ACCESS_TOKEN:
            env_vars.append(get_from_secret(settings.REPOS_ACCESS_TOKEN_KEY,
                                            settings.REPOS_ACCESS_TOKEN_KEY))

        return env_vars
Ejemplo n.º 2
0
def stop_experiment(project_name,
                    project_uuid,
                    experiment_name,
                    experiment_uuid,
                    specification,
                    experiment_group_name=None,
                    experiment_group_uuid=None):
    spawner_class = get_spawner_class(specification.framework)

    spawner = spawner_class(project_name=project_name,
                            project_uuid=project_uuid,
                            experiment_name=experiment_name,
                            experiment_group_name=experiment_group_name,
                            experiment_group_uuid=experiment_group_uuid,
                            experiment_uuid=experiment_uuid,
                            spec=specification,
                            k8s_config=conf.get('K8S_CONFIG'),
                            namespace=conf.get('K8S_NAMESPACE'),
                            in_cluster=True,
                            use_sidecar=True,
                            sidecar_config=config.get_requested_data(to_str=True))
    return spawner.stop_experiment()
Ejemplo n.º 3
0
    def get_env_vars(self):
        env_vars = get_service_env_vars(namespace=self.namespace)
        for k, v in config.get_requested_data(to_str=True).items():
            env_vars.append(get_env_var(name=k, value=v))

        return env_vars
Ejemplo n.º 4
0
def start_experiment(experiment):
    # Update experiment status to show that its started
    experiment.set_status(ExperimentLifeCycle.SCHEDULED)

    project = experiment.project
    group = experiment.experiment_group

    job_docker_image = None  # This will force the spawners to use the default docker image
    if experiment.specification.build:
        try:
            image_name, image_tag = get_image_info(build_job=experiment.build_job)
        except (ValueError, AttributeError):
            _logger.error('Could not start the experiment.', exc_info=True)
            experiment.set_status(ExperimentLifeCycle.FAILED,
                                  message='Image info was not found.')
            return
        job_docker_image = '{}:{}'.format(image_name, image_tag)
        _logger.info('Start experiment with built image `%s`', job_docker_image)
    else:
        _logger.info('Start experiment with default image.')

    spawner_class = get_spawner_class(experiment.specification.framework)
    token_scope = RedisEphemeralTokens.get_scope(experiment.user.id,
                                                 'experiment',
                                                 experiment.id)

    error = {}
    try:
        # Use spawners to start the experiment
        spawner = spawner_class(project_name=project.unique_name,
                                experiment_name=experiment.unique_name,
                                experiment_group_name=group.unique_name if group else None,
                                project_uuid=project.uuid.hex,
                                experiment_group_uuid=group.uuid.hex if group else None,
                                experiment_uuid=experiment.uuid.hex,
                                persistence_config=experiment.persistence_config,
                                outputs_refs_experiments=experiment.outputs_refs_experiments,
                                outputs_refs_jobs=experiment.outputs_refs_jobs,
                                original_name=experiment.original_unique_name,
                                cloning_strategy=experiment.cloning_strategy,
                                spec=experiment.specification,
                                k8s_config=conf.get('K8S_CONFIG'),
                                namespace=conf.get('K8S_NAMESPACE'),
                                in_cluster=True,
                                job_docker_image=job_docker_image,
                                use_sidecar=True,
                                sidecar_config=config.get_requested_data(to_str=True),
                                token_scope=token_scope)
        response = spawner.start_experiment()
        handle_experiment(experiment=experiment, spawner=spawner, response=response)
    except ApiException as e:
        _logger.error('Could not start the experiment, please check your polyaxon spec.',
                      exc_info=True)
        error = {
            'raised': True,
            'traceback': traceback.format_exc(),
            'message': 'Could not start the experiment, encountered a Kubernetes ApiException.'
        }
    except VolumeNotFoundError as e:
        _logger.error('Could not start the experiment, please check your volume definitions.',
                      exc_info=True)
        error = {
            'raised': True,
            'traceback': traceback.format_exc(),
            'message': 'Could not start the experiment, '
                       'encountered a volume definition problem, %s.' % e
        }
    except Exception as e:
        _logger.error('Could not start the experiment, please check your polyaxon spec',
                      exc_info=True)
        error = {
            'raised': True,
            'traceback': traceback.format_exc(),
            'message': 'Could not start the experiment encountered an {} exception.'.format(
                e.__class__.__name__)
        }
    finally:
        if error.get('raised'):
            experiment.set_status(
                ExperimentLifeCycle.FAILED,
                message=error.get('message'),
                traceback=error.get('traceback'))
Ejemplo n.º 5
0
def start_job(job):
    # Update job status to show that its started
    job.set_status(JobLifeCycle.SCHEDULED)

    try:
        image_name, image_tag = get_image_info(build_job=job.build_job)
    except (ValueError, AttributeError):
        _logger.error('Could not start the job.', exc_info=True)
        job.set_status(JobLifeCycle.FAILED,
                       message='Image info was not found.')
        return
    job_docker_image = '{}:{}'.format(image_name, image_tag)
    _logger.info('Start job with built image `%s`', job_docker_image)

    spawner = JobSpawner(project_name=job.project.unique_name,
                         project_uuid=job.project.uuid.hex,
                         job_name=job.unique_name,
                         job_uuid=job.uuid.hex,
                         spec=job.specification,
                         k8s_config=conf.get('K8S_CONFIG'),
                         namespace=conf.get('K8S_NAMESPACE'),
                         job_docker_image=job_docker_image,
                         in_cluster=True,
                         use_sidecar=True,
                         sidecar_config=config.get_requested_data(to_str=True))

    error = {}
    try:
        results = spawner.start_job(
            persistence_data=job.persistence_data,
            persistence_outputs=job.persistence_outputs,
            outputs_refs_jobs=job.outputs_refs_jobs,
            outputs_refs_experiments=job.outputs_refs_experiments,
            resources=job.resources,
            node_selector=job.node_selector,
            affinity=job.affinity,
            tolerations=job.tolerations)
        job.definition = get_job_definition(results)
        job.save(update_fields=['definition'])
        return
    except ApiException:
        _logger.error('Could not start job, please check your polyaxon spec.',
                      exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the job, encountered a Kubernetes ApiException.',
        }
    except VolumeNotFoundError as e:
        _logger.error(
            'Could not start the job, please check your volume definitions.',
            exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start the job, encountered a volume definition problem. %s'
            % e,
        }
    except Exception as e:
        _logger.error('Could not start job, please check your polyaxon spec.',
                      exc_info=True)
        error = {
            'raised':
            True,
            'traceback':
            traceback.format_exc(),
            'message':
            'Could not start job encountered an {} exception.'.format(
                e.__class__.__name__)
        }
    finally:
        if error.get('raised'):
            job.set_status(JobLifeCycle.FAILED,
                           message=error.get('message'),
                           traceback=error.get('traceback'))