예제 #1
0
def experiments_stop(project_name,
                     project_uuid,
                     experiment_name,
                     experiment_group_name,
                     experiment_group_uuid,
                     experiment_uuid,
                     specification,
                     update_status=True):
    specification = ExperimentSpecification.read(specification)
    experiment_scheduler.stop_experiment(
        project_name=project_name,
        project_uuid=project_uuid,
        experiment_name=experiment_name,
        experiment_group_name=experiment_group_name,
        experiment_group_uuid=experiment_group_uuid,
        experiment_uuid=experiment_uuid,
        specification=specification,
    )

    if not update_status:
        return

    experiment = get_valid_experiment(experiment_uuid=experiment_uuid)
    if not experiment:
        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_uuid)
        return

    # Update experiment status to show that its stopped
    experiment.set_status(ExperimentLifeCycle.STOPPED)
예제 #2
0
def experiments_stop(experiment_id, update_status=True):
    experiment = get_valid_experiment(experiment_id=experiment_id)
    if not experiment:
        _logger.info('Something went wrong, '
                     'the Experiment `%s` does not exist anymore.', experiment_id)
        return

    experiment_scheduler.stop_experiment(experiment, update_status=update_status)
def stop_running_experiment(sender, **kwargs):
    from scheduler import experiment_scheduler
    instance = kwargs['instance']
    try:
        _ = instance.experiment_group  # noqa
        # Delete all jobs from DB before sending a signal to k8s,
        # this way no constants will be updated in the meanwhile
        instance.jobs.all().delete()
        experiment_scheduler.stop_experiment(instance, update_status=False)
    except ExperimentGroup.DoesNotExist:
        # The experiment was already stopped when the group was deleted
        pass
예제 #4
0
def stop_running_experiment(sender, **kwargs):
    from scheduler import experiment_scheduler
    instance = kwargs['instance']
    try:
        _ = instance.experiment_group  # noqa
        # Delete all jobs from DB before sending a signal to k8s,
        # this way no constants will be updated in the meanwhile
        instance.jobs.all().delete()
        experiment_scheduler.stop_experiment(instance, update_status=False)
    except ExperimentGroup.DoesNotExist:
        # The experiment was already stopped when the group was deleted
        pass
예제 #5
0
 def _clean():
     for experiment in Experiment.objects.filter(
             status__status__in=ExperimentLifeCycle.RUNNING_STATUS):
         group = experiment.experiment_group
         experiment_scheduler.stop_experiment(
             project_name=experiment.project.unique_name,
             project_uuid=experiment.project.uuid.hex,
             experiment_name=experiment.unique_name,
             experiment_uuid=experiment.unique_name,
             experiment_group_name=group.unique_name if group else None,
             experiment_group_uuid=group.uuid.hex if group else None,
             specification=experiment.specification)
         experiment.set_status(ExperimentLifeCycle.STOPPED,
                               message='Cleanup')
예제 #6
0
def experiments_stop(self,
                     project_name,
                     project_uuid,
                     experiment_name,
                     experiment_group_name,
                     experiment_group_uuid,
                     experiment_uuid,
                     specification,
                     update_status=True,
                     collect_logs=True,
                     is_managed=True,
                     message=None):
    if collect_logs and is_managed:
        try:
            collectors.logs_collect_experiment_jobs(
                experiment_uuid=experiment_uuid)
        except (OSError, StoreNotFoundError, PolyaxonStoresException):
            _logger.warning(
                'Scheduler could not collect '
                'the logs for experiment `%s`.', experiment_name)
    if specification and is_managed:
        specification = compiler.compile(kind=kinds.EXPERIMENT,
                                         values=specification)
        deleted = experiment_scheduler.stop_experiment(
            project_name=project_name,
            project_uuid=project_uuid,
            experiment_name=experiment_name,
            experiment_group_name=experiment_group_name,
            experiment_group_uuid=experiment_group_uuid,
            experiment_uuid=experiment_uuid,
            specification=specification,
        )
    else:
        deleted = True

    if not deleted and self.request.retries < 2:
        _logger.info('Trying again to delete job `%s` in experiment.',
                     experiment_name)
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    if not update_status:
        return

    experiment = get_valid_experiment(experiment_uuid=experiment_uuid,
                                      include_deleted=True)
    if not experiment:
        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_uuid)
        return

    # Update experiment status to show that its stopped
    experiment.set_status(ExperimentLifeCycle.STOPPED,
                          message=message or 'Experiment was stopped')
예제 #7
0
def experiments_stop(self,
                     project_name,
                     project_uuid,
                     experiment_name,
                     experiment_group_name,
                     experiment_group_uuid,
                     experiment_uuid,
                     specification,
                     update_status=True):
    if specification:
        specification = ExperimentSpecification.read(specification)
        deleted = experiment_scheduler.stop_experiment(
            project_name=project_name,
            project_uuid=project_uuid,
            experiment_name=experiment_name,
            experiment_group_name=experiment_group_name,
            experiment_group_uuid=experiment_group_uuid,
            experiment_uuid=experiment_uuid,
            specification=specification,
        )
    else:
        deleted = True

    if not deleted and self.request.retries < 2:
        _logger.info('Trying again to delete job `%s` in experiment.',
                     experiment_name)
        self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER)
        return

    if not update_status:
        return

    experiment = get_valid_experiment(experiment_uuid=experiment_uuid)
    if not experiment:
        _logger.info(
            'Something went wrong, '
            'the Experiment `%s` does not exist anymore.', experiment_uuid)
        return

    # Update experiment status to show that its stopped
    experiment.set_status(ExperimentLifeCycle.STOPPED,
                          message='Experiment was stopped')
예제 #8
0
 def _clean():
     for experiment in Experiment.objects.filter(
             status__status__in=ExperimentLifeCycle.RUNNING_STATUS):
         experiment_scheduler.stop_experiment(experiment, update_status=True)