def experiments_group_schedule_deletion(experiment_group_id, immediate=False): experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id, include_deleted=True) if not experiment_group: # No need to check this group return experiment_group.archive() if experiment_group.is_stoppable: workers.send( SchedulerCeleryTasks.EXPERIMENTS_GROUP_STOP, kwargs={ 'experiment_group_id': experiment_group_id, 'collect_logs': False, 'message': 'Experiment Group is scheduled for deletion.' }, countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN)) if immediate: workers.send( SchedulerCeleryTasks.DELETE_ARCHIVED_EXPERIMENT_GROUP, kwargs={ 'group_id': experiment_group_id, }, countdown=conf.get(SCHEDULER_GLOBAL_COUNTDOWN_DELAYED))
def experiments_group_schedule_deletion(experiment_group_id, immediate=False): experiment_group = get_valid_experiment_group( experiment_group_id=experiment_group_id, include_deleted=True) if not experiment_group: # No need to check this group return experiment_group.archive() if experiment_group.is_running: celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_GROUP_STOP_EXPERIMENTS, kwargs={ 'experiment_group_id': experiment_group_id, 'pending': False, 'collect_logs': False, 'message': 'Experiment Group is scheduled for deletion.' }) if immediate: celery_app.send_task( SchedulerCeleryTasks.DELETE_ARCHIVED_EXPERIMENT_GROUP, kwargs={ 'group_id': experiment_group_id, })
def experiments_group_check_finished(self, experiment_group_id): experiment_group = get_valid_experiment_group( experiment_group_id=experiment_group_id) if experiment_group.non_done_experiments.exists(): self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) return experiment_group.set_status(status=ExperimentGroupLifeCycle.SUCCEEDED)
def experiments_group_check_finished(self, experiment_group_id): experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id) if not experiment_group or experiment_group.is_done: # No need to check this group return if experiment_group.non_done_experiments.exists(): self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) return experiment_group.set_status(status=ExperimentGroupLifeCycle.SUCCEEDED)
def experiments_group_check_done(self, experiment_group_id, auto_retry=False): experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id) if not experiment_group or experiment_group.is_done: # No need to check this group return if experiment_group.non_done_experiments.exists(): if auto_retry: self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) return experiment_group.set_status(status=ExperimentGroupLifeCycle.DONE)
def _get_group_or_retry(experiment_group_id, task): experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id) if experiment_group: return experiment_group # We retry if experiment group does not exist if task.request.retries < 2: _logger.info('Trying again for ExperimentGroup `%s`.', experiment_group_id) task.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) _logger.info('Something went wrong, ' 'the ExperimentGroup `%s` does not exist anymore.', experiment_group_id) return None
def _get_group_or_retry(experiment_group_id, task): experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id) if experiment_group: return experiment_group # We retry if experiment group does not exist if task.request.retries < 2: _logger.info('Trying again for ExperimentGroup `%s`.', experiment_group_id) task.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) _logger.info('Something went wrong, ' 'the ExperimentGroup `%s` does not exist anymore.', experiment_group_id) return None
def experiments_group_stop_experiments(experiment_group_id, pending, collect_logs=True, message=None): experiment_group = get_valid_experiment_group(experiment_group_id=experiment_group_id, include_deleted=True) if not experiment_group: return if pending: # this won't work for archived groups anyways! for experiment in experiment_group.pending_experiments: # Update experiment status to show that its stopped experiment.set_status(status=ExperimentLifeCycle.STOPPED, message=message) else: experiments = experiment_group.all_experiments.exclude( status__status__in=ExperimentLifeCycle.DONE_STATUS).distinct() for experiment in experiments: if experiment.is_stoppable: celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_STOP, kwargs={ 'project_name': experiment.project.unique_name, 'project_uuid': experiment.project.uuid.hex, 'experiment_name': experiment.unique_name, 'experiment_uuid': experiment.uuid.hex, 'experiment_group_name': experiment_group.unique_name, 'experiment_group_uuid': experiment_group.uuid.hex, 'specification': experiment.config, 'update_status': True, 'collect_logs': collect_logs }) else: # Update experiment status to show that its stopped experiment.set_status(status=ExperimentLifeCycle.STOPPED, message=message) experiment_group.set_status(ExperimentGroupLifeCycle.STOPPED, message=message)