예제 #1
0
def new_pipeline_run_status(sender, **kwargs):
    instance = kwargs['instance']
    pipeline_run = instance.pipeline_run
    # Update job last_status
    pipeline_run.status = instance
    set_started_at(instance=pipeline_run,
                   status=instance.status,
                   starting_statuses=[PipelineStatuses.RUNNING])
    set_finished_at(instance=pipeline_run,
                    status=instance.status,
                    is_done=PipelineStatuses.is_done)
    pipeline_run.save(update_fields=['status', 'started_at', 'finished_at'])
    # Notify operations with status change. This is necessary if we skip or stop the dag run.
    if pipeline_run.stopped:
        celery_app.send_task(PipelinesCeleryTasks.PIPELINES_STOP_OPERATIONS,
                             kwargs={
                                 'pipeline_run_id': pipeline_run.id,
                                 'message': 'Pipeline run was stopped'
                             })
    if pipeline_run.skipped:
        celery_app.send_task(PipelinesCeleryTasks.PIPELINES_SKIP_OPERATIONS,
                             kwargs={
                                 'pipeline_run_id': pipeline_run.id,
                                 'message': 'Pipeline run was skipped'
                             })
예제 #2
0
def handle_new_experiment_status(sender, **kwargs):
    instance = kwargs['instance']
    experiment = instance.experiment
    if not experiment.specification:
        return

    stop_condition = (instance.status in (ExperimentLifeCycle.FAILED,
                                          ExperimentLifeCycle.SUCCEEDED)
                      and experiment.jobs.count() > 0)
    if stop_condition:
        _logger.debug(
            'One of the workers failed or Master for experiment `%s` is done, '
            'send signal to other workers to stop.', experiment.unique_name)
        # Schedule stop for this experiment because other jobs may be still running
        group = experiment.experiment_group
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_STOP,
            kwargs={
                'project_name': experiment.project.unique_name,
                'project_uuid': experiment.project.uuid.hex,
                'experiment_name': experiment.unique_name,
                'experiment_uuid': experiment.uuid.hex,
                'experiment_group_name': group.unique_name if group else None,
                'experiment_group_uuid': group.uuid.hex if group else None,
                'specification': experiment.config,
                'update_status': False,
                'collect_logs': True,
            },
            countdown=RedisTTL.get_for_experiment(experiment_id=experiment.id))
예제 #3
0
def jobs_build(job_id):
    job = get_valid_job(job_id=job_id)
    if not job:
        return None

    if not JobLifeCycle.can_transition(status_from=job.last_status,
                                       status_to=JobLifeCycle.BUILDING):
        _logger.info('Job id `%s` cannot transition from `%s` to `%s`.',
                     job_id, job.last_status, JobLifeCycle.BUILDING)
        return

    build_job, image_exists, build_status = dockerizer_scheduler.create_build_job(
        user=job.user,
        project=job.project,
        config=job.specification.build,
        configmap_refs=job.specification.configmap_refs,
        secret_refs=job.specification.secret_refs,
        code_reference=job.code_reference)

    job.build_job = build_job
    job.save(update_fields=['build_job'])
    if image_exists:
        # The image already exists, so we can start the experiment right away
        celery_app.send_task(
            SchedulerCeleryTasks.JOBS_START,
            kwargs={'job_id': job_id},
            countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN))
        return

    if not build_status:
        job.set_status(JobLifeCycle.FAILED, message='Could not start build process.')
        return

    # Update job status to show that its building docker image
    job.set_status(JobLifeCycle.BUILDING, message='Building container')
예제 #4
0
def start_new_experiment(sender, **kwargs):
    instance = kwargs['instance']
    if instance.is_independent or instance.is_clone:
        # Start building the experiment and then Schedule it to be picked by the spawners
        celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_BUILD,
                             kwargs={'experiment_id': instance.id},
                             countdown=1)
예제 #5
0
 def post(self, request, *args, **kwargs):
     if self.project.has_notebook:
         try:
             if conf.get(
                     'MOUNT_CODE_IN_NOTEBOOKS') and self.project.has_repo:
                 self.handle_code(request)
         except FileNotFoundError:
             # Git probably was not found
             pass
         celery_app.send_task(SchedulerCeleryTasks.PROJECTS_NOTEBOOK_STOP,
                              kwargs={
                                  'project_name':
                                  self.project.unique_name,
                                  'project_uuid':
                                  self.project.uuid.hex,
                                  'notebook_job_name':
                                  self.project.notebook.unique_name,
                                  'notebook_job_uuid':
                                  self.project.notebook.uuid.hex,
                                  'update_status':
                                  True
                              },
                              countdown=conf.get('GLOBAL_COUNTDOWN'))
         auditor.record(event_type=NOTEBOOK_STOPPED_TRIGGERED,
                        instance=self.project.notebook,
                        target='project',
                        actor_id=self.request.user.id,
                        actor_name=self.request.user.username,
                        countdown=1)
     elif self.project.notebook and self.project.notebook.is_stoppable:
         self.project.notebook.set_status(
             status=ExperimentLifeCycle.STOPPED,
             message='Notebook was stopped')
     return Response(status=status.HTTP_200_OK)
예제 #6
0
def projects_notebook_build(notebook_job_id):
    notebook_job = get_valid_notebook(notebook_job_id=notebook_job_id)
    if not notebook_job:
        return None

    if not JobLifeCycle.can_transition(status_from=notebook_job.last_status,
                                       status_to=JobLifeCycle.BUILDING):
        _logger.info('Notebook `%s` cannot transition from `%s` to `%s`.',
                     notebook_job, notebook_job.last_status, JobLifeCycle.BUILDING)
        return

    build_job, image_exists, build_status = dockerizer_scheduler.create_build_job(
        user=notebook_job.user,
        project=notebook_job.project,
        config=notebook_job.specification.build,
        configmap_refs=notebook_job.specification.configmap_refs,
        secret_refs=notebook_job.specification.secret_refs,
        code_reference=notebook_job.code_reference)

    notebook_job.build_job = build_job
    notebook_job.save(update_fields=['build_job'])
    if image_exists:
        # The image already exists, so we can start the experiment right away
        celery_app.send_task(
            SchedulerCeleryTasks.PROJECTS_NOTEBOOK_START,
            kwargs={'notebook_job_id': notebook_job_id},
            countdown=conf.get('GLOBAL_COUNTDOWN'))
        return

    if not build_status:
        notebook_job.set_status(JobLifeCycle.FAILED, message='Could not start build process.')
        return

    # Update job status to show that its building docker image
    notebook_job.set_status(JobLifeCycle.BUILDING, message='Building container')
예제 #7
0
    def build(self, nocache: bool = False, memory_limit: Any = None) -> bool:
        _logger.debug('Starting build for `%s`', self.repo_path)
        # Checkout to the correct commit
        # if self.image_tag != self.LATEST_IMAGE_TAG:
        #     git.checkout_commit(repo_path=self.repo_path, commit=self.image_tag)

        limits = {
            # Disable memory swap for building
            'memswap': -1
        }
        if memory_limit:
            limits['memory'] = memory_limit

        # Create DockerFile
        with open(self.dockerfile_path, 'w') as dockerfile:
            rendered_dockerfile = self.render()
            celery_app.send_task(
                SchedulerCeleryTasks.BUILD_JOBS_SET_DOCKERFILE,
                kwargs={'build_job_uuid': self.job_uuid, 'dockerfile': rendered_dockerfile})
            dockerfile.write(rendered_dockerfile)

        stream = self.docker.build(
            path=self.build_path,
            tag=self.get_tagged_image(),
            forcerm=True,
            rm=True,
            pull=True,
            nocache=nocache,
            container_limits=limits)
        return self._handle_log_stream(stream=stream)
예제 #8
0
def hp_hyperband_iterate(self, experiment_group_id):
    experiment_group = get_running_experiment_group(
        experiment_group_id=experiment_group_id)
    if not experiment_group:
        return

    if experiment_group.non_done_experiments.count() > 0:
        # Schedule another task, because all experiment must be done
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    iteration_config = experiment_group.iteration_config
    iteration_manager = experiment_group.iteration_manager
    search_manager = experiment_group.search_manager

    iteration_manager.update_iteration()

    if search_manager.should_reschedule(
            iteration=iteration_config.iteration,
            bracket_iteration=iteration_config.bracket_iteration):
        celery_app.send_task(
            HPCeleryTasks.HP_HYPERBAND_CREATE,
            kwargs={'experiment_group_id': experiment_group_id})
        return

    if search_manager.should_reduce_configs(
            iteration=iteration_config.iteration,
            bracket_iteration=iteration_config.bracket_iteration):
        iteration_manager.reduce_configs()
        celery_app.send_task(
            HPCeleryTasks.HP_HYPERBAND_START,
            kwargs={'experiment_group_id': experiment_group_id})
        return

    base.check_group_experiments_finished(experiment_group_id)
예제 #9
0
 def post(self, request, *args, **kwargs):
     obj = self.get_object()
     auditor.record(event_type=EXPERIMENT_STOPPED_TRIGGERED,
                    instance=obj,
                    actor_id=request.user.id,
                    actor_name=request.user.username)
     group = obj.experiment_group
     celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_STOP,
                          kwargs={
                              'project_name':
                              self.project.unique_name,
                              'project_uuid':
                              self.project.uuid.hex,
                              'experiment_name':
                              obj.unique_name,
                              'experiment_uuid':
                              obj.uuid.hex,
                              'experiment_group_name':
                              group.unique_name if group else None,
                              'experiment_group_uuid':
                              group.uuid.hex if group else None,
                              'specification':
                              obj.config,
                              'update_status':
                              True
                          })
     return Response(status=status.HTTP_200_OK)
예제 #10
0
def heartbeat_experiments() -> None:
    experiments = Experiment.objects.filter(
        status__status__in=ExperimentLifeCycle.HEARTBEAT_STATUS)
    for experiment in experiments.values_list('id', flat=True):
        celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_CHECK_HEARTBEAT,
                             kwargs={'experiment_id': experiment},
                             countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN))
예제 #11
0
def heartbeat_builds() -> None:
    build_jobs = BuildJob.objects.filter(
        status__status__in=JobLifeCycle.HEARTBEAT_STATUS)
    for build_job in build_jobs.values_list('id', flat=True):
        celery_app.send_task(SchedulerCeleryTasks.BUILD_JOBS_CHECK_HEARTBEAT,
                             kwargs={'build_job_id': build_job},
                             countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN))
예제 #12
0
    def _handle_experiment_cleaned_triggered(cls, event: 'Event') -> None:
        from db.models.experiment_groups import ExperimentGroup

        instance = event.instance

        if not instance.is_managed:
            return
        if not instance or not instance.has_specification or not instance.is_stoppable:
            return
        if instance.jobs.count() == 0:
            return

        try:
            group = instance.experiment_group
            celery_app.send_task(
                SchedulerCeleryTasks.EXPERIMENTS_STOP,
                kwargs={
                    'project_name': instance.project.unique_name,
                    'project_uuid': instance.project.uuid.hex,
                    'experiment_name': instance.unique_name,
                    'experiment_uuid': instance.uuid.hex,
                    'experiment_group_name':
                    group.unique_name if group else None,
                    'experiment_group_uuid': group.uuid.hex if group else None,
                    'specification': instance.content,
                    'update_status': False,
                    'collect_logs': False,
                    'is_managed': instance.is_managed,
                },
                countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN))
        except ExperimentGroup.DoesNotExist:
            # The experiment was already stopped when the group was deleted
            pass
예제 #13
0
def jobs_schedule_deletion(job_id, immediate=False):
    job = get_valid_job(job_id=job_id, include_deleted=True)
    if not job:
        return None

    job.archive()

    if job.is_running:
        project = job.project
        celery_app.send_task(SchedulerCeleryTasks.JOBS_STOP,
                             kwargs={
                                 'project_name': project.unique_name,
                                 'project_uuid': project.uuid.hex,
                                 'job_name': job.unique_name,
                                 'job_uuid': job.uuid.hex,
                                 'update_status': True,
                                 'collect_logs': False,
                                 'message': 'Job is scheduled for deletion.'
                             })

    if immediate:
        celery_app.send_task(SchedulerCeleryTasks.DELETE_ARCHIVED_JOB,
                             kwargs={
                                 'job_id': job_id,
                             })
예제 #14
0
def new_operation_run_status(sender, **kwargs):
    instance = kwargs['instance']
    operation_run = instance.operation_run
    pipeline_run = operation_run.pipeline_run
    # Update job last_status
    operation_run.status = instance
    set_started_at(instance=operation_run,
                   status=instance.status,
                   starting_statuses=[PipelineStatuses.RUNNING])
    set_finished_at(instance=operation_run,
                    status=instance.status,
                    is_done=PipelineStatuses.is_done)
    operation_run.save(update_fields=['status', 'started_at', 'finished_at'])

    # No need to check if it is just created
    if instance.status == OperationStatuses.CREATED:
        return

    # Check if we need to update the pipeline_run's status
    celery_app.send_task(PipelinesCeleryTasks.PIPELINES_CHECK_STATUSES,
                         kwargs={
                             'pipeline_run_id': pipeline_run.id,
                             'status': instance.status,
                             'message': instance.message
                         })
    if operation_run.is_done:
        # Notify downstream that instance is done, and that its dependency can start.
        downstream_runs = operation_run.downstream_runs.filter(
            status__status=OperationStatuses.CREATED)
        for op_run in downstream_runs:
            celery_app.send_task(
                PipelinesCeleryTasks.PIPELINES_START_OPERATION,
                kwargs={'operation_run_id': op_run.id})
예제 #15
0
 def send_chunk():
     celery_app.send_task(HPCeleryTasks.HP_GRID_SEARCH_CREATE_EXPERIMENTS,
                          kwargs={
                              'experiment_group_id': experiment_group.id,
                              'suggestions': chunk_suggestions
                          },
                          countdown=1)
예제 #16
0
 def post(self, request, *args, **kwargs):
     experiments = self.queryset.filter(project=self.project,
                                        id__in=request.data.get('ids', []))
     for experiment in experiments:
         auditor.record(event_type=EXPERIMENT_STOPPED_TRIGGERED,
                        instance=experiment,
                        actor_id=request.user.id,
                        actor_name=request.user.username)
         group = experiment.experiment_group
         celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_STOP,
                              kwargs={
                                  'project_name':
                                  self.project.unique_name,
                                  'project_uuid':
                                  self.project.uuid.hex,
                                  'experiment_name':
                                  experiment.unique_name,
                                  'experiment_uuid':
                                  experiment.uuid.hex,
                                  'experiment_group_name':
                                  group.unique_name if group else None,
                                  'experiment_group_uuid':
                                  group.uuid.hex if group else None,
                                  'specification':
                                  experiment.config,
                                  'update_status':
                                  True
                              })
     return Response(status=status.HTTP_200_OK)
예제 #17
0
    def post(self, request, *args, **kwargs):
        project = self.project
        tensorboard = self.tensorboard
        has_tensorboard = self.has_tensorboard
        experiment_id = self.kwargs.get('experiment_id')
        group_id = self.kwargs.get('group_id')

        if has_tensorboard:
            celery_app.send_task(SchedulerCeleryTasks.TENSORBOARDS_STOP,
                                 kwargs={
                                     'project_name': project.unique_name,
                                     'project_uuid': project.uuid.hex,
                                     'tensorboard_job_name':
                                     tensorboard.unique_name,
                                     'tensorboard_job_uuid':
                                     tensorboard.uuid.hex,
                                     'update_status': True
                                 },
                                 countdown=conf.get('GLOBAL_COUNTDOWN'))
            auditor.record(event_type=TENSORBOARD_STOPPED_TRIGGERED,
                           instance=tensorboard,
                           target=get_target(experiment=experiment_id,
                                             group=group_id),
                           actor_id=self.request.user.id,
                           actor_name=self.request.user.username)
        return Response(status=status.HTTP_200_OK)
예제 #18
0
파일: bo.py 프로젝트: xuduofeng/polyaxon
def hp_bo_iterate(self, experiment_group_id, auto_retry=False):
    experiment_group = get_running_experiment_group(
        experiment_group_id=experiment_group_id)
    if not experiment_group:
        return

    if experiment_group.non_done_experiments.count() > 0:
        if auto_retry:
            # Schedule another task, because all experiment must be done
            self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    iteration_config = experiment_group.iteration_config
    iteration_manager = experiment_group.iteration_manager
    search_manager = experiment_group.search_manager

    iteration_manager.update_iteration()

    if search_manager.should_reschedule(iteration=iteration_config.iteration):
        celery_app.send_task(
            HPCeleryTasks.HP_BO_CREATE,
            kwargs={'experiment_group_id': experiment_group_id})
        return

    base.check_group_experiments_finished(experiment_group_id,
                                          auto_retry=auto_retry)
예제 #19
0
def projects_notebook_schedule_deletion(notebook_job_id, immediate=False):
    notebook_job = get_valid_notebook(notebook_job_id=notebook_job_id, include_deleted=True)
    if not notebook_job:
        return None

    notebook_job.archive()

    if notebook_job.is_stoppable:
        project = notebook_job.project
        celery_app.send_task(
            SchedulerCeleryTasks.PROJECTS_NOTEBOOK_STOP,
            kwargs={
                'project_name': project.unique_name,
                'project_uuid': project.uuid.hex,
                'notebook_job_name': notebook_job.unique_name,
                'notebook_job_uuid': notebook_job.uuid.hex,
                'update_status': True,
                'collect_logs': False,
                'is_managed': notebook_job.is_managed,
                'message': 'Notebook is scheduled for deletion.'
            },
            countdown=conf.get('GLOBAL_COUNTDOWN'))

    if immediate:
        celery_app.send_task(
            SchedulerCeleryTasks.DELETE_ARCHIVED_NOTEBOOK_JOB,
            kwargs={
                'job_id': notebook_job_id,
            },
            countdown=conf.get('GLOBAL_COUNTDOWN_DELAYED'))
예제 #20
0
def start_group_experiments(experiment_group):
    # Check for early stopping before starting new experiments from this group
    if experiment_group.should_stop_early():
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_GROUP_STOP_EXPERIMENTS,
            kwargs={'experiment_group_id': experiment_group.id,
                    'pending': True,
                    'message': 'Early stopping'},
            countdown=conf.get('GLOBAL_COUNTDOWN'))
        return

    experiment_to_start = experiment_group.n_experiments_to_start
    if experiment_to_start <= 0:
        # This could happen due to concurrency or not created yet experiments
        return (experiment_group.pending_experiments.exists() or
                not experiment_group.scheduled_all_suggestions())
    pending_experiments = experiment_group.pending_experiments[:experiment_to_start]
    n_pending_experiment = experiment_group.pending_experiments.count()

    for experiment in pending_experiments:
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_BUILD,
            kwargs={'experiment_id': experiment.id},
            countdown=conf.get('GLOBAL_COUNTDOWN'))

    return (n_pending_experiment - experiment_to_start > 0 or
            not experiment_group.scheduled_all_suggestions())
예제 #21
0
def experiments_group_schedule_deletion(experiment_group_id, immediate=False):
    experiment_group = get_valid_experiment_group(
        experiment_group_id=experiment_group_id, include_deleted=True)
    if not experiment_group:
        # No need to check this group
        return

    experiment_group.archive()

    if experiment_group.is_stoppable:
        celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_GROUP_STOP,
                             kwargs={
                                 'experiment_group_id':
                                 experiment_group_id,
                                 'collect_logs':
                                 False,
                                 'message':
                                 'Experiment Group is scheduled for deletion.'
                             },
                             countdown=conf.get('GLOBAL_COUNTDOWN'))

    if immediate:
        celery_app.send_task(
            SchedulerCeleryTasks.DELETE_ARCHIVED_EXPERIMENT_GROUP,
            kwargs={
                'group_id': experiment_group_id,
            },
            countdown=conf.get('GLOBAL_COUNTDOWN'))
예제 #22
0
def build_jobs_schedule_deletion(build_job_id, immediate=False):
    build_job = get_valid_build_job(build_job_id=build_job_id,
                                    include_deleted=True)
    if not build_job:
        _logger.info(
            'Something went wrong, '
            'the BuildJob `%s` does not exist anymore.', build_job_id)
        return

    build_job.archive()

    if build_job.is_stoppable:
        project = build_job.project
        celery_app.send_task(SchedulerCeleryTasks.BUILD_JOBS_STOP,
                             kwargs={
                                 'project_name': project.unique_name,
                                 'project_uuid': project.uuid.hex,
                                 'build_job_name': build_job.unique_name,
                                 'build_job_uuid': build_job.uuid.hex,
                                 'update_status': True,
                                 'collect_logs': False,
                                 'message': 'Build is scheduled for deletion.'
                             },
                             countdown=conf.get('GLOBAL_COUNTDOWN'))

    if immediate:
        celery_app.send_task(SchedulerCeleryTasks.DELETE_ARCHIVED_BUILD_JOB,
                             kwargs={
                                 'job_id': build_job_id,
                             },
                             countdown=conf.get('GLOBAL_COUNTDOWN'))
예제 #23
0
def stop_running_experiment(sender, **kwargs):
    instance = kwargs['instance']
    if not instance.is_running or instance.jobs.count() == 0:
        return

    try:
        group = instance.experiment_group
        celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_STOP,
                             kwargs={
                                 'project_name':
                                 instance.project.unique_name,
                                 'project_uuid':
                                 instance.project.uuid.hex,
                                 'experiment_name':
                                 instance.unique_name,
                                 'experiment_uuid':
                                 instance.uuid.hex,
                                 'experiment_group_name':
                                 group.unique_name if group else None,
                                 'experiment_group_uuid':
                                 group.uuid.hex if group else None,
                                 'specification':
                                 instance.config,
                                 'update_status':
                                 False
                             })
    except ExperimentGroup.DoesNotExist:
        # The experiment was already stopped when the group was deleted
        pass
예제 #24
0
 def perform_destroy(self, instance):
     instance.archive()
     celery_app.send_task(SchedulerCeleryTasks.BUILD_JOBS_SCHEDULE_DELETION,
                          kwargs={
                              'build_job_id': instance.id,
                              'immediate': True
                          })
예제 #25
0
    def post(self, request, *args, **kwargs):
        project = self.project
        experiment_id = self.kwargs.get('experiment_id')
        group_id = self.kwargs.get('group_id')
        if experiment_id:
            experiment = get_object_or_404(Experiment,
                                           project=project,
                                           id=experiment_id)
            tensorboard = self._handle_experiment_tensorboard(
                project=project, experiment=experiment)
        elif group_id:
            group = get_object_or_404(ExperimentGroup,
                                      project=project,
                                      id=group_id)
            tensorboard = self._handle_group_tensorboard(project=project,
                                                         group=group)
        else:
            tensorboard = self._handle_project_tensorboard(project=project)

        if not tensorboard:
            return Response(data='Tensorboard is already running',
                            status=status.HTTP_200_OK)

        if not tensorboard.is_running:
            celery_app.send_task(SchedulerCeleryTasks.TENSORBOARDS_START,
                                 kwargs={'tensorboard_job_id': tensorboard.id},
                                 countdown=conf.get('GLOBAL_COUNTDOWN'))
        return Response(status=status.HTTP_201_CREATED)
예제 #26
0
def handle_experiment_job_condition(event_object, pod_state, status, labels, container_name):
    update_job_containers(event_object, status, container_name)
    logger.debug("Sending state to handler %s, %s", status, labels)
    # Handle experiment job statuses
    celery_app.send_task(
        K8SEventsCeleryTasks.K8S_EVENTS_HANDLE_EXPERIMENT_JOB_STATUSES,
        kwargs={'payload': pod_state})
예제 #27
0
def experiments_schedule_deletion(experiment_id, immediate=False):
    experiment = get_valid_experiment(experiment_id=experiment_id,
                                      include_deleted=True)
    if not experiment:
        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_id)
        return

    experiment.archive()

    if experiment.is_running:
        project = experiment.project
        celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_STOP,
                             kwargs={
                                 'project_name': project.unique_name,
                                 'project_uuid': project.uuid.hex,
                                 'experiment_name': experiment.unique_name,
                                 'experiment_uuid': experiment.uuid.hex,
                                 'experiment_group_name': None,
                                 'experiment_group_uuid': None,
                                 'specification': experiment.config,
                                 'update_status': True,
                                 'collect_logs': False,
                                 'message':
                                 'Experiment is scheduled for deletion.'
                             })

    if immediate:
        celery_app.send_task(SchedulerCeleryTasks.DELETE_ARCHIVED_EXPERIMENT,
                             kwargs={
                                 'experiment_id': experiment_id,
                             })
예제 #28
0
 def _handle_experiment_created(cls, event: 'Event') -> None:
     if event.data['has_specification'] and (event.data['is_independent']
                                             or event.data['is_clone']):
         # Start building the experiment and then Schedule it to be picked by the spawners
         celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_BUILD,
                              kwargs={'experiment_id': event.data['id']},
                              countdown=1)
예제 #29
0
def jobs_schedule_deletion(job_id, immediate=False):
    job = get_valid_job(job_id=job_id, include_deleted=True)
    if not job:
        return None

    job.archive()

    if job.is_stoppable:
        project = job.project
        celery_app.send_task(
            SchedulerCeleryTasks.JOBS_STOP,
            kwargs={
                'project_name': project.unique_name,
                'project_uuid': project.uuid.hex,
                'job_name': job.unique_name,
                'job_uuid': job.uuid.hex,
                'update_status': True,
                'collect_logs': False,
                'is_managed': job.is_managed,
                'message': 'Job is scheduled for deletion.'
            },
            countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN))

    if immediate:
        celery_app.send_task(
            SchedulerCeleryTasks.DELETE_ARCHIVED_JOB,
            kwargs={
                'job_id': job_id,
            },
            countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN_DELAYED))
예제 #30
0
파일: views.py 프로젝트: xuduofeng/polyaxon
 def post(self, request, *args, **kwargs):
     if self.project.has_notebook:
         commit = request.data.get('commit')
         commit = to_bool(commit) if commit is not None else True
         try:
             if commit:
                 # Commit changes
                 git.commit(self.project.repo.path, request.user.email, request.user.username)
             else:
                 # Reset changes
                 git.undo(self.project.repo.path)
         except FileNotFoundError:
             # Git probably was not found
             pass
         celery_app.send_task(
             SchedulerCeleryTasks.PROJECTS_NOTEBOOK_STOP,
             kwargs={
                 'project_name': self.project.unique_name,
                 'project_uuid': self.project.uuid.hex,
                 'notebook_job_name': self.project.notebook.unique_name,
                 'notebook_job_uuid': self.project.notebook.uuid.hex,
                 'update_status': True
             })
         auditor.record(event_type=NOTEBOOK_STOPPED_TRIGGERED,
                        instance=self.project.notebook,
                        target='project',
                        actor_id=self.request.user.id,
                        actor_name=self.request.user.username,
                        countdown=1)
     elif self.project.notebook and self.project.notebook.is_running:
         self.project.notebook.set_status(status=ExperimentLifeCycle.STOPPED,
                                          message='Notebook was stopped')
     return Response(status=status.HTTP_200_OK)