def validate_job_name_and_folder(self): """ Validate the job name and compute job_folder path. Optionally delete the job_folder if it exists. Returns: job_folder path """ self.job = assets.validate_job_folder(self.job) delete_job = False if self.overwrite: delete_job = False elif self.force: delete_job = True elif self.job.exists(): delete_job = confirm_yn( (colors.red & colors.bold | f"Do you really want to remove ALL contents of ") + (colors.yellow | f"'{self.job}'?\nIf no, then job may be in an inconsistent state.\n" ), "y", ) if delete_job: tell(f"Deleting all of {self.job}.") self.job.delete() return self.job
def __init__(self, job_folder, include_manifest=True, is_dev_override=False): self.job_folder = assets.validate_job_folder( job_folder, search_if_not_present=True ) self._run_results = { run_folder.name: RunResult(run_folder, include_manifest=include_manifest) for run_folder in self.job_folder if run_folder.is_dir() and "run_manifest.yaml" in run_folder } self.is_dev_override = is_dev_override self.job_manifest = None job_yaml = self.job_folder / "job_manifest.yaml" if job_yaml.exists(): self.job_manifest = utils.yaml_load_munch(job_yaml)
def __init__(self, job_folders, include_manifest=True): check.t(job_folders, list) self.job_folder = "MultiJobResult has multiple folders in job_folders" self.job_folders = [] self._run_results = {} for job_folder in job_folders: job_folder = assets.validate_job_folder(job_folder) self.job_folders += [job_folder] self._run_results.update( { run_folder.name: RunResult( run_folder, include_manifest=include_manifest ) for run_folder in job_folder if run_folder.is_dir() and "run_manifest.yaml" in run_folder } )
def main(self, job_folder=None): switches = utils.plumbum_switches(self) if job_folder is None: job_folder = self.job job_folder = assets.validate_job_folder(job_folder) # At this point job_folder is a plumbum path # Add a new handler so we get PER-RUN log files into the job folder per_run_log_path = job_folder / f"{int(time.time()):06x}.log" formatter = zlog.ColorfulFormatter( "%(name)s %(asctime)s %(levelname)s %(message)s %(filename)s %(lineno)d" ) handler = logging.StreamHandler(open(per_run_log_path, "w")) handler.setFormatter(formatter) zlog.add_handler(handler) tell(f"Trapping run logs into {per_run_log_path}") if job_folder is None: log.error(f"No job_folder was specified") return 1 tell( f"Plaster run {job_folder} limit={self.limit} started at {arrow.utcnow().format()}" ) if not job_folder.exists(): log.error(f"Unable to find the path {job_folder}") return 1 # Load the job_uuid if available, evntually this will be nice for logging job_uuid = None job_yaml = job_folder / "job_manifest.yaml" if job_yaml.exists(): job_manifest = utils.yaml_load_munch(job_yaml) job_uuid = job_manifest.uuid # Find all the plaster_run.yaml files. They might be in run subfolders found = list( job_folder.walk(filter=lambda p: p.name == "plaster_run.yaml")) run_dirs = [p.dirname for p in found] if len(run_dirs) == 0: log.error( "Plaster: Nothing to do because no run_dirs have plaster_run.yaml files" ) return 1 def run_reports(): report_paths = [job_folder / "report.ipynb" ] + (job_folder / "_reports" // "*.ipynb") for report_src_path in report_paths: report_dst_path = report_src_path.with_suffix(".html") if report_src_path.exists() and (self.force or out_of_date( report_src_path, report_dst_path)): tell(f"Running report {report_src_path}") self.run_ipynb(report_src_path) if self.reports_only: run_reports() return 0 # A normal run where all happens in this process failure_count = 0 for run_dir_i, run_dir in enumerate(sorted(run_dirs)): zlog.metrics( f"Starting run subdirectory {run_dir}. {run_dir_i + 1} of {len(run_dirs)}", log=log, _type="plaster_start", run_dir=run_dir, run_dir_i=run_dir_i, run_dir_n=len(run_dirs), **switches, ) try: with zap.Context( cpu_limit=self.cpu_limit, mode="debug" if self.debug_mode else None, allow_inner_parallelism=True, ): # allow_inner_parallelism=True needs to be true so that each task such as sigproc_v2 # can allocate parallel jobs to each field. run = RunExecutor(run_dir).load() if "_erisyon" in run.config: zlog.metrics( "run metrics", log=log, _type="erisyon_block", **run.config._erisyon, ) failure_count += run.execute( force=self.force, limit=self.limit.split(",") if self.limit else None, clean=self.clean, n_fields_limit=self.n_fields_limit, no_progress=self.no_progress, ) except Exception as e: failure_count += 1 if not self.continue_on_error: raise e if failure_count == 0 and self.limit is None and not self.clean: # WRITE job_info.yaml with metadata used by the indexer n_runs = len(run_dirs) job_info = Munch(n_runs=n_runs, job_uuid=job_uuid) if n_runs == 1: job = JobResult(job_folder=job_folder) tsv_data = {} try: tsv_data = job.runs[0].ims_import.tsv_data except: pass nd2_metadata = {} try: nd2_metadata = job.runs[0].ims_import._nd2_metadata except: pass job_info.update(tsv_data=tsv_data, nd2_metadata=nd2_metadata) utils.yaml_save(job_folder / "job_info.yaml", job_info) # RUN reports if not skipped if not self.skip_reports: run_reports() return failure_count
def it_raises_on_non_str(): with zest.raises(check.CheckError): assets.validate_job_folder(123)
def it_raises_on_file_outside_jobs_folder(): with zest.raises(ValueError): assets.validate_job_folder("/tmp/foo/bar")
def it_raises_on_non_existing_file_in_jobs_folder(): with zest.raises(FileNotFoundError): assets.validate_job_folder("./jobs_folder/__does_not_exist")
def it_raises_on_run_folder_if_not_specified(): with zest.raises(ValueError): assets.validate_job_folder(f"./jobs_folder/{job_name}/run1", allow_run_folders=False)
def it_accepts_run_folder_if_specified(): assert (assets.validate_job_folder( f"./jobs_folder/{job_name}/run1", allow_run_folders=True) == f"{job_name}/run1")
def it_accepts_named_job_folder_if_it_exists(): assert assets.validate_job_folder(job_name) == job_name
def it_accepts_folder_in_jobs_folder_by_relative_string_with_trailing_slash( ): assert assets.validate_job_folder( f"./jobs_folder/{job_name}/") == job_name
def it_accepts_folder_in_jobs_folder_by_relative_string(): assert assets.validate_job_folder( f"./jobs_folder/{job_name}") == job_name
def it_accepts_folder_in_jobs_folder_by_absolute_plumbum(): job = local.path(local.env["HOST_JOBS_FOLDER"]) / job_name assert assets.validate_job_folder(job) == job_name
def validate_job_folder(self, job_folder, allow_run_folders=False): return assets.validate_job_folder(job_folder, allow_run_folders=allow_run_folders)
def it_raises_if_not_symbol(): with zest.raises(ValueError): assets.validate_job_folder("foo bar")
def it_returns_a_plumbum_path(): assert isinstance(assets.validate_job_folder("foo_bar"), plumbum.path.local.LocalPath)
def it_accepts_slash_slash_jobs_folder(): assert assets.validate_job_folder( f"//jobs_folder/{job_name}") == job_name