def copy_experiment(experiment): """If experiment is a restart, we should resume from last check point""" try: publisher.publish_experiment_job_log( log_lines= 'Copying outputs from experiment `{}` into experiment `{}`'.format( experiment.original_experiment.unique_name, experiment.unique_name), experiment_uuid=experiment.uuid.hex, experiment_name=experiment.unique_name, job_uuid='all', ) stores.copy_experiment_outputs( persistence_outputs_from=experiment.original_experiment. persistence_outputs, persistence_outputs_to=experiment.persistence_outputs, experiment_name_from=experiment.original_experiment.unique_name, experiment_name_to=experiment.unique_name) except OSError: publisher.publish_experiment_job_log( log_lines= 'Could not copy the outputs of experiment `{}` into experiment `{}`' .format(experiment.original_experiment.unique_name, experiment.unique_name), experiment_uuid=experiment.uuid.hex, experiment_name=experiment.unique_name, job_uuid='all', ) _logger.warning( 'Could not copy the outputs of experiment `%s` into experiment `%s`', experiment.original_experiment.unique_name, experiment.unique_name)
def copy_experiment(experiment): """If experiment is a restart, we should resume from last check point""" try: publisher.publish_experiment_job_log( log_lines='Copying outputs from experiment `{}` into experiment `{}`'.format( experiment.original_experiment.unique_name, experiment.unique_name ), status=ExperimentLifeCycle.BUILDING, experiment_uuid=experiment.uuid.hex, experiment_name=experiment.unique_name, job_uuid='all', ) copy_experiment_outputs(experiment.original_experiment.unique_name, experiment.unique_name) except OSError: publisher.publish_experiment_job_log( log_lines='Could not copy the outputs of experiment `{}` into experiment `{}`'.format( experiment.original_experiment.unique_name, experiment.unique_name ), status=ExperimentLifeCycle.BUILDING, experiment_uuid=experiment.uuid.hex, experiment_name=experiment.unique_name, job_uuid='all', ) _logger.warning( 'Could not copy the outputs of experiment `%s` into experiment `%s`', experiment.original_experiment.unique_name, experiment.unique_name)
def _handle_logs(self, log_line): publisher.publish_experiment_job_log( log_lines=log_line, experiment_uuid=self.experiment_uuid, experiment_name=self.experiment_name, job_uuid='all', )
def _handle_logs(self, log_line): publisher.publish_experiment_job_log( log_lines=log_line, status=ExperimentLifeCycle.BUILDING, experiment_uuid=self.experiment_uuid, experiment_name=self.experiment_name, job_uuid='all', )
def _handle_logs(self, log_line): publisher.publish_experiment_job_log( log_lines=log_line, status=ExperimentLifeCycle.BUILDING, experiment_uuid=self.experiment_uuid, experiment_name=self.experiment_name, job_uuid='all', )
def publish(log_lines): publisher.publish_experiment_job_log( log_lines=log_lines, status=ExperimentLifeCycle.RUNNING, experiment_uuid=experiment_uuid, experiment_name=experiment_name, job_uuid=job_uuid, task_type=task_type, task_idx=task_idx)
def logs_sidecars_experiments(experiment_name, experiment_uuid, job_uuid, log_lines): """Signal handling for sidecars logs.""" handle_experiment_job_log(experiment_name=experiment_name, experiment_uuid=experiment_uuid, log_lines=log_lines) publisher.publish_experiment_job_log(log_lines=log_lines, experiment_uuid=experiment_uuid, experiment_name=experiment_name, job_uuid=job_uuid, send_task=False)
def logs_sidecars_experiments(experiment_name: str, experiment_uuid: str, job_uuid: str, log_lines: Optional[Union[str, Iterable[str]]]) -> None: """Signal handling for sidecars logs.""" handle_experiment_job_log(experiment_name=experiment_name, experiment_uuid=experiment_uuid, log_lines=log_lines) publisher.publish_experiment_job_log( log_lines=log_lines, experiment_uuid=experiment_uuid, experiment_name=experiment_name, job_uuid=job_uuid, send_task=False )
def run_for_experiment_job(k8s_manager, pod_id, experiment_uuid, experiment_name, job_uuid, task_type, task_idx, container_job_name): raw = k8s_manager.k8s_api.read_namespaced_pod_log( pod_id, k8s_manager.namespace, container=container_job_name, follow=True, _preload_content=False) for log_line in raw.stream(): publisher.publish_experiment_job_log( log_line=log_line, status=ExperimentLifeCycle.RUNNING, experiment_uuid=experiment_uuid, experiment_name=experiment_name, job_uuid=job_uuid, task_type=task_type, task_idx=task_idx)