def experiments_stop(project_name, project_uuid, experiment_name, experiment_group_name, experiment_group_uuid, experiment_uuid, specification, update_status=True): specification = ExperimentSpecification.read(specification) experiment_scheduler.stop_experiment( project_name=project_name, project_uuid=project_uuid, experiment_name=experiment_name, experiment_group_name=experiment_group_name, experiment_group_uuid=experiment_group_uuid, experiment_uuid=experiment_uuid, specification=specification, ) if not update_status: return experiment = get_valid_experiment(experiment_uuid=experiment_uuid) if not experiment: _logger.info( 'Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_uuid) return # Update experiment status to show that its stopped experiment.set_status(ExperimentLifeCycle.STOPPED)
def experiments_stop(experiment_id, update_status=True): experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: _logger.info('Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_id) return experiment_scheduler.stop_experiment(experiment, update_status=update_status)
def stop_running_experiment(sender, **kwargs): from scheduler import experiment_scheduler instance = kwargs['instance'] try: _ = instance.experiment_group # noqa # Delete all jobs from DB before sending a signal to k8s, # this way no constants will be updated in the meanwhile instance.jobs.all().delete() experiment_scheduler.stop_experiment(instance, update_status=False) except ExperimentGroup.DoesNotExist: # The experiment was already stopped when the group was deleted pass
def stop_running_experiment(sender, **kwargs): from scheduler import experiment_scheduler instance = kwargs['instance'] try: _ = instance.experiment_group # noqa # Delete all jobs from DB before sending a signal to k8s, # this way no constants will be updated in the meanwhile instance.jobs.all().delete() experiment_scheduler.stop_experiment(instance, update_status=False) except ExperimentGroup.DoesNotExist: # The experiment was already stopped when the group was deleted pass
def _clean(): for experiment in Experiment.objects.filter( status__status__in=ExperimentLifeCycle.RUNNING_STATUS): group = experiment.experiment_group experiment_scheduler.stop_experiment( project_name=experiment.project.unique_name, project_uuid=experiment.project.uuid.hex, experiment_name=experiment.unique_name, experiment_uuid=experiment.unique_name, experiment_group_name=group.unique_name if group else None, experiment_group_uuid=group.uuid.hex if group else None, specification=experiment.specification) experiment.set_status(ExperimentLifeCycle.STOPPED, message='Cleanup')
def experiments_stop(self, project_name, project_uuid, experiment_name, experiment_group_name, experiment_group_uuid, experiment_uuid, specification, update_status=True, collect_logs=True, is_managed=True, message=None): if collect_logs and is_managed: try: collectors.logs_collect_experiment_jobs( experiment_uuid=experiment_uuid) except (OSError, StoreNotFoundError, PolyaxonStoresException): _logger.warning( 'Scheduler could not collect ' 'the logs for experiment `%s`.', experiment_name) if specification and is_managed: specification = compiler.compile(kind=kinds.EXPERIMENT, values=specification) deleted = experiment_scheduler.stop_experiment( project_name=project_name, project_uuid=project_uuid, experiment_name=experiment_name, experiment_group_name=experiment_group_name, experiment_group_uuid=experiment_group_uuid, experiment_uuid=experiment_uuid, specification=specification, ) else: deleted = True if not deleted and self.request.retries < 2: _logger.info('Trying again to delete job `%s` in experiment.', experiment_name) self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) return if not update_status: return experiment = get_valid_experiment(experiment_uuid=experiment_uuid, include_deleted=True) if not experiment: _logger.info( 'Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_uuid) return # Update experiment status to show that its stopped experiment.set_status(ExperimentLifeCycle.STOPPED, message=message or 'Experiment was stopped')
def experiments_stop(self, project_name, project_uuid, experiment_name, experiment_group_name, experiment_group_uuid, experiment_uuid, specification, update_status=True): if specification: specification = ExperimentSpecification.read(specification) deleted = experiment_scheduler.stop_experiment( project_name=project_name, project_uuid=project_uuid, experiment_name=experiment_name, experiment_group_name=experiment_group_name, experiment_group_uuid=experiment_group_uuid, experiment_uuid=experiment_uuid, specification=specification, ) else: deleted = True if not deleted and self.request.retries < 2: _logger.info('Trying again to delete job `%s` in experiment.', experiment_name) self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) return if not update_status: return experiment = get_valid_experiment(experiment_uuid=experiment_uuid) if not experiment: _logger.info( 'Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_uuid) return # Update experiment status to show that its stopped experiment.set_status(ExperimentLifeCycle.STOPPED, message='Experiment was stopped')
def _clean(): for experiment in Experiment.objects.filter( status__status__in=ExperimentLifeCycle.RUNNING_STATUS): experiment_scheduler.stop_experiment(experiment, update_status=True)