Example #1
0
def handle_new_experiment_status(sender, **kwargs):
    instance = kwargs['instance']
    experiment = instance.experiment
    if not experiment.specification:
        return

    stop_condition = (instance.status in (ExperimentLifeCycle.FAILED,
                                          ExperimentLifeCycle.SUCCEEDED)
                      and experiment.jobs.count() > 0)
    if stop_condition:
        _logger.info(
            'One of the workers failed or Master for experiment `%s` is done, '
            'send signal to other workers to stop.', experiment.unique_name)
        # Schedule stop for this experiment because other jobs may be still running
        group = experiment.experiment_group
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_STOP,
            kwargs={
                'project_name': experiment.project.unique_name,
                'project_uuid': experiment.project.uuid.hex,
                'experiment_name': experiment.unique_name,
                'experiment_uuid': experiment.uuid.hex,
                'experiment_group_name': group.unique_name if group else None,
                'experiment_group_uuid': group.uuid.hex if group else None,
                'specification': experiment.config,
                'update_status': False
            },
            countdown=RedisTTL.get_for_experiment(experiment_id=experiment.id))
Example #2
0
    def _handle_experiment_post_run(cls, event: 'Event') -> None:
        instance = event.instance
        if not instance or not instance.has_specification or not instance.jobs.count() > 0:
            return

        # Schedule stop for this experiment because other jobs may be still running
        group = instance.experiment_group
        celery_app.send_task(
            SchedulerCeleryTasks.EXPERIMENTS_STOP,
            kwargs={
                'project_name': instance.project.unique_name,
                'project_uuid': instance.project.uuid.hex,
                'experiment_name': instance.unique_name,
                'experiment_uuid': instance.uuid.hex,
                'experiment_group_name': group.unique_name if group else None,
                'experiment_group_uuid': group.uuid.hex if group else None,
                'specification': instance.config,
                'update_status': False,
                'collect_logs': True,
            },
            countdown=RedisTTL.get_for_experiment(experiment_id=instance.id))
Example #3
0
 def test_set_for_experiment(self):
     RedisTTL.set_for_experiment(experiment_id=1, value=10)
     assert RedisTTL.get_for_experiment(experiment_id=1) == 10
     assert RedisTTL.get_for_experiment(
         experiment_id=2) == conf.get('GLOBAL_COUNTDOWN')
     assert RedisTTL(experiment=10).get_value() is None
Example #4
0
 def test_set_for_experiment(self):
     RedisTTL.set_for_experiment(experiment_id=1, value=10)
     assert RedisTTL.get_for_experiment(experiment_id=1) == 10
     assert RedisTTL.get_for_experiment(experiment_id=2) == 2
     assert RedisTTL(experiment=10).get_value() is None