Esempio n. 1
0
def build_jobs_stop(self,
                    project_name,
                    project_uuid,
                    build_job_name,
                    build_job_uuid,
                    update_status=True):
    deleted = dockerizer_scheduler.stop_dockerizer(
        project_name=project_name,
        project_uuid=project_uuid,
        build_job_name=build_job_name,
        build_job_uuid=build_job_uuid)

    if not deleted and self.request.retries < 2:
        _logger.info('Trying again to delete build `%s`.', build_job_name)
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    if not update_status:
        return

    build_job = get_valid_build_job(build_job_uuid=build_job_uuid)
    if not build_job:
        _logger.info(
            'Something went wrong, '
            'the BuildJob `%s` does not exist anymore.', build_job_uuid)
        return

    # Update build job status to show that its stopped
    build_job.set_status(status=JobLifeCycle.STOPPED,
                         message='BuildJob was stopped.')
Esempio n. 2
0
def build_jobs_schedule_deletion(build_job_id, immediate=False):
    build_job = get_valid_build_job(build_job_id=build_job_id,
                                    include_deleted=True)
    if not build_job:
        _logger.info(
            'Something went wrong, '
            'the BuildJob `%s` does not exist anymore.', build_job_id)
        return

    build_job.archive()

    if build_job.is_stoppable:
        project = build_job.project
        celery_app.send_task(SchedulerCeleryTasks.BUILD_JOBS_STOP,
                             kwargs={
                                 'project_name': project.unique_name,
                                 'project_uuid': project.uuid.hex,
                                 'build_job_name': build_job.unique_name,
                                 'build_job_uuid': build_job.uuid.hex,
                                 'update_status': True,
                                 'collect_logs': False,
                                 'message': 'Build is scheduled for deletion.'
                             })

    if immediate:
        celery_app.send_task(SchedulerCeleryTasks.DELETE_ARCHIVED_BUILD_JOB,
                             kwargs={
                                 'job_id': build_job_id,
                             })
Esempio n. 3
0
def build_jobs_stop(build_job_id, update_status=True):
    build_job = get_valid_build_job(build_job_id=build_job_id)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_id)
        return

    dockerizer_scheduler.stop_dockerizer(build_job, update_status=update_status)
Esempio n. 4
0
def build_jobs_start(build_job_id):
    build_job = get_valid_build_job(build_job_id=build_job_id)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_id)
        return

    dockerizer_scheduler.start_dockerizer(build_job)
Esempio n. 5
0
def build_jobs_start(build_job_id):
    build_job = get_valid_build_job(build_job_id=build_job_id)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_id)
        return

    dockerizer_scheduler.start_dockerizer(build_job)
Esempio n. 6
0
def build_jobs_stop(build_job_id, update_status=True):
    build_job = get_valid_build_job(build_job_id=build_job_id)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_id)
        return

    dockerizer_scheduler.stop_dockerizer(build_job, update_status=update_status)
Esempio n. 7
0
def build_jobs_set_dockerfile(build_job_uuid, dockerfile):
    build_job = get_valid_build_job(build_job_uuid=build_job_uuid)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_uuid)
        return

    build_job.dockerfile = dockerfile
    build_job.save()
Esempio n. 8
0
def build_jobs_set_dockerfile(build_job_uuid, dockerfile):
    build_job = get_valid_build_job(build_job_uuid=build_job_uuid)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_uuid)
        return

    build_job.dockerfile = dockerfile
    build_job.save()
Esempio n. 9
0
def build_jobs_check_heartbeat(build_job_id):
    if RedisHeartBeat.build_is_alive(build_id=build_job_id):
        return

    build_job = get_valid_build_job(build_job_id=build_job_id)
    if not build_job:
        return

    # BuildJob is zombie status
    build_job.set_status(JobLifeCycle.FAILED,
                         message='BuildJob is in zombie state (no heartbeat was reported).')
Esempio n. 10
0
def build_jobs_stop(self,
                    project_name,
                    project_uuid,
                    build_job_name,
                    build_job_uuid,
                    update_status=True,
                    collect_logs=True,
                    is_managed=True,
                    message=None):
    if collect_logs and is_managed:
        try:
            logs_collect_build_job(build_uuid=build_job_uuid)
        except (OSError, VolumeNotFoundError, PolyaxonStoresException):
            _logger.warning(
                'Scheduler could not collect the logs for build `%s`.',
                build_job_name)

    if is_managed:
        deleted = dockerizer_scheduler.stop_dockerizer(
            project_name=project_name,
            project_uuid=project_uuid,
            build_job_name=build_job_name,
            build_job_uuid=build_job_uuid)
    else:
        deleted = True

    if not deleted and self.request.retries < 2:
        _logger.info('Trying again to delete build `%s`.', build_job_name)
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    if not update_status:
        return

    build_job = get_valid_build_job(build_job_uuid=build_job_uuid,
                                    include_deleted=True)
    if not build_job:
        _logger.info(
            'Something went wrong, '
            'the BuildJob `%s` does not exist anymore.', build_job_uuid)
        return

    # Update build job status to show that its stopped
    build_job.set_status(status=JobLifeCycle.STOPPED,
                         message=message or 'BuildJob was stopped.')
Esempio n. 11
0
def build_jobs_stop(project_name, project_uuid, build_job_name, build_job_uuid, update_status=True):
    dockerizer_scheduler.stop_dockerizer(
        project_name=project_name,
        project_uuid=project_uuid,
        build_job_name=build_job_name,
        build_job_uuid=build_job_uuid)

    if not update_status:
        return

    build_job = get_valid_build_job(build_job_uuid=build_job_uuid)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_uuid)
        return

    # Update build job status to show that its stopped
    build_job.set_status(status=JobLifeCycle.STOPPED,
                         message='BuildJob was stopped.')
Esempio n. 12
0
def build_jobs_notify_done(build_job_id):
    build_job = get_valid_build_job(build_job_id=build_job_id)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_id)
        return

    # Notify all dependent jobs, notebooks, tensorboards, and experiments
    # Build job Failed -> Set status Failed with message: build failed
    if build_job.failed:
        notify_build_job_failed(build_job)
        return

    # Build job Stopped -> Stop the dependent jobs
    if build_job.stopped:
        notify_build_job_stopped(build_job)
        return

    # Build job Succeeded -> Start the dependent jobs
    if build_job.succeeded:
        notify_build_job_succeeded(build_job)
Esempio n. 13
0
def build_jobs_notify_done(build_job_id):
    build_job = get_valid_build_job(build_job_id=build_job_id)
    if not build_job:
        _logger.info('Something went wrong, '
                     'the BuildJob `%s` does not exist anymore.', build_job_id)
        return

    # Notify all dependent jobs, notebooks, tensorboards, and experiments
    # Build job Failed -> Set status Failed with message: build failed
    if build_job.failed:
        notify_build_job_failed(build_job)
        return

    # Build job Stopped -> Stop the dependent jobs
    if build_job.stopped:
        notify_build_job_stopped(build_job)
        return

    # Build job Succeeded -> Start the dependent jobs
    if build_job.succeeded:
        notify_build_job_succeeded(build_job)