def build_experiment(self, experiment_id): experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: if self.request.retries < 2: _logger.info('Trying again for Experiment `%s`.', experiment_id) self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) _logger.info( 'Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_id) return # No need to build the image, start the experiment directly if not (experiment.specification.build and experiment.specification.run): celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_START, kwargs={'experiment_id': experiment_id}) return if not ExperimentLifeCycle.can_transition( status_from=experiment.last_status, status_to=ExperimentLifeCycle.BUILDING): _logger.info('Experiment id `%s` cannot transition from `%s` to `%s`.', experiment_id, experiment.last_status, ExperimentLifeCycle.BUILDING) return None # Update experiment status to show that its building experiment.set_status(ExperimentLifeCycle.BUILDING) # Building the docker image try: status = experiments_builder.build_experiment(experiment) except DockerException as e: _logger.warning('Failed to build experiment %s', e) experiment.set_status(ExperimentLifeCycle.FAILED, message='Failed to build image for experiment.') return except Repo.DoesNotExist: _logger.warning('No code was found for this project') experiment.set_status( ExperimentLifeCycle.FAILED, message='No code was found for to build this experiment.') return except Exception as e: # Other exceptions _logger.error( 'Failed to build experiment, unexpected error occurred.\n%s', traceback.format_exc()) experiment.set_status(ExperimentLifeCycle.FAILED, message='Failed to build image for experiment.') return if not status: experiment.set_status(ExperimentLifeCycle.FAILED, message='Failed to build image for experiment.') return # Now we can start the experiment celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_START, kwargs={'experiment_id': experiment_id})
def set_status(self, status, message=None, traceback=None, **kwargs): if status in ExperimentLifeCycle.HEARTBEAT_STATUS: RedisHeartBeat.experiment_ping(self.id) if ExperimentLifeCycle.can_transition(status_from=self.last_status, status_to=status): ExperimentStatus.objects.create(experiment=self, status=status, message=message, traceback=traceback)
def set_status(self, status, created_at=None, message=None, traceback=None, **kwargs): if status in ExperimentLifeCycle.HEARTBEAT_STATUS: RedisHeartBeat.experiment_ping(self.id) last_status = self.last_status_before(status_date=created_at) if ExperimentLifeCycle.can_transition(status_from=last_status, status_to=status): params = {'created_at': created_at} if created_at else {} ExperimentStatus.objects.create(experiment=self, status=status, message=message, traceback=traceback, **params)
def build_experiment(self, experiment_id): experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: if self.request.retries < 2: _logger.info('Trying again for Experiment `%s`.', experiment_id) self.retry(countdown=Intervals.EXPERIMENTS_SCHEDULER) _logger.info('Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_id) return # No need to build the image, start the experiment directly if not (experiment.specification.build and experiment.specification.run): celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_START, kwargs={'experiment_id': experiment_id}) return if not ExperimentLifeCycle.can_transition(status_from=experiment.last_status, status_to=ExperimentLifeCycle.BUILDING): _logger.info('Experiment id `%s` cannot transition from `%s` to `%s`.', experiment_id, experiment.last_status, ExperimentLifeCycle.BUILDING) return None # Update experiment status to show that its building experiment.set_status(ExperimentLifeCycle.BUILDING) # Building the docker image try: status = experiments_builder.build_experiment(experiment) except DockerException as e: _logger.warning('Failed to build experiment %s', e) experiment.set_status(ExperimentLifeCycle.FAILED, message='Failed to build image for experiment.') return except Repo.DoesNotExist: _logger.warning('No code was found for this project') experiment.set_status(ExperimentLifeCycle.FAILED, message='No code was found for to build this experiment.') return except Exception as e: # Other exceptions _logger.warning('Failed to build experiment %s', e) experiment.set_status(ExperimentLifeCycle.FAILED, message='Failed to build image for experiment.') return if not status: return # Now we can start the experiment celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_START, kwargs={'experiment_id': experiment_id})
def experiments_start(experiment_id): experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: _logger.info('Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_id) return if not ExperimentLifeCycle.can_transition(status_from=experiment.last_status, status_to=ExperimentLifeCycle.SCHEDULED): _logger.info('Experiment `%s` cannot transition from `%s` to `%s`.', experiment.unique_name, experiment.last_status, ExperimentLifeCycle.SCHEDULED) return None experiment_scheduler.start_experiment(experiment)
def experiments_build(experiment_id): experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: return # No need to build the image, start the experiment directly if not (experiment.specification.build and experiment.specification.run): celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_START, kwargs={'experiment_id': experiment_id}, countdown=conf.get('GLOBAL_COUNTDOWN')) return last_status = experiment.last_status if not ExperimentLifeCycle.can_transition( status_from=last_status, status_to=ExperimentLifeCycle.BUILDING): _logger.info('Experiment id `%s` cannot transition from `%s` to `%s`.', experiment_id, last_status, ExperimentLifeCycle.BUILDING) return build_job, image_exists, build_status = dockerizer_scheduler.create_build_job( user=experiment.user, project=experiment.project, config=experiment.specification.build, configmap_refs=experiment.specification.configmap_refs, secret_refs=experiment.specification.secret_refs, code_reference=experiment.code_reference) experiment.build_job = build_job experiment.save(update_fields=['build_job']) if image_exists: # The image already exists, so we can start the experiment right away celery_app.send_task(SchedulerCeleryTasks.EXPERIMENTS_START, kwargs={'experiment_id': experiment_id}, countdown=conf.get('GLOBAL_COUNTDOWN')) return if not build_status: experiment.set_status(ExperimentLifeCycle.FAILED, message='Could not start build process.') return # Update experiment status to show that its building experiment.set_status(ExperimentLifeCycle.BUILDING)
def experiments_start(experiment_id): experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: _logger.info('Something went wrong, ' 'the Experiment `%s` does not exist anymore.', experiment_id) return if not ExperimentLifeCycle.can_transition(status_from=experiment.last_status, status_to=ExperimentLifeCycle.SCHEDULED): _logger.info('Experiment `%s` cannot transition from `%s` to `%s`.', experiment.unique_name, experiment.last_status, ExperimentLifeCycle.SCHEDULED) return None # Check if we need to copy an experiment if experiment.is_copy: copy_experiment(experiment) else: create_experiment_outputs_path(experiment.unique_name) experiment_scheduler.start_experiment(experiment)
def experiments_build(experiment_id): experiment = get_valid_experiment(experiment_id=experiment_id) if not experiment: return # No need to build the image, start the experiment directly if not (experiment.specification.build and experiment.specification.run): celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_START, kwargs={'experiment_id': experiment_id}) return if not ExperimentLifeCycle.can_transition(status_from=experiment.last_status, status_to=ExperimentLifeCycle.BUILDING): _logger.info('Experiment id `%s` cannot transition from `%s` to `%s`.', experiment_id, experiment.last_status, ExperimentLifeCycle.BUILDING) return build_job, image_exists, build_status = dockerizer_scheduler.create_build_job( user=experiment.user, project=experiment.project, config=experiment.specification.build, code_reference=experiment.code_reference) experiment.build_job = build_job experiment.save() if image_exists: # The image already exists, so we can start the experiment right away celery_app.send_task( SchedulerCeleryTasks.EXPERIMENTS_START, kwargs={'experiment_id': experiment_id}) return if not build_status: experiment.set_status(ExperimentLifeCycle.FAILED, message='Could not start build process.') return # Update experiment status to show that its building experiment.set_status(ExperimentLifeCycle.BUILDING)