Example #1
0
    def __init__(self, src_dir, dst_dir, config, **build_opts):
        src_dir = local.path(src_dir)
        dst_dir = local.path(dst_dir)

        skip_s3 = build_opts.pop("skip_s3", False)

        self._tasks = {}
        task_list = task_list_from_config(config)
        for task_name, task_klass, task_info in task_list:
            self._translate_s3_references(task_info, skip_s3)
            self._tasks[task_name] = (task_klass, task_info, {})

        n_fields_limit = build_opts.pop("n_fields_limit", None)
        if n_fields_limit is not None:
            important(f"Limiting to only {n_fields_limit} fields")

            # TASK: Convert these from named task to ANY task f that type
            if "ims_import" in self._tasks:
                self._tasks["ims_import"][1]["parameters"][
                    "n_fields_limit"] = n_fields_limit

            if "sigproc_v1" in self._tasks:
                self._tasks["sigproc_v1"][1]["parameters"][
                    "n_fields_limit"] = n_fields_limit

            if "sigproc_v2" in self._tasks:
                self._tasks["sigproc_v2"][1]["parameters"][
                    "n_fields_limit"] = n_fields_limit

        super().__init__(src_dir, dst_dir, self._tasks, **build_opts)
Example #2
0
    def run_nbstripout(self):
        """Strip all notebooks of output to save space in commits"""
        important("Stripping Notebooks...")
        result = (local["find"][".", "-type", "f", "-not", "-path", "*/\.*",
                                "-name", "*.ipynb", "-print", ]
                  | local["xargs"]["nbstripout"]) & TF(FG=True)

        if not result:
            raise CommandError
Example #3
0
    def print_local_job_folders(self):
        important("Local job folders:")

        root = local.path("./jobs_folder")
        self._print_job_folders([
            Munch(
                folder=(p - root)[0],
                name=p.name,
                size=int(p.stat().st_size),
                mtime=int(p.stat().st_mtime),
            ) for p in root.walk()
        ])
Example #4
0
 def run_ipynb(self, ipynb_path):
     # Note: the timeout has been set to 8 hours to facilitate reports for
     # huge jobs (e.g. 100+ runs).
     important(f"Executing report notebook {ipynb_path}")
     local["jupyter"](
         "nbconvert",
         "--to",
         "html",
         "--execute",
         ipynb_path,
         "--ExecutePreprocessor.timeout=28800",
     )
Example #5
0
 def run_docker_build(self, docker_tag, quiet=False):
     important(f"Building docker tag {docker_tag}")
     with local.env(LANG="en_US.UTF-8"):
         args = [
             "build",
             "-t",
             f"erisyon:{docker_tag}",
             "-f",
             "./scripts/main_env.docker",
         ]
         if quiet:
             args += ["--quiet"]
         args += "."
         local["docker"][args] & FG
Example #6
0
def train_rf(train_rf_params, sim_result, progress=None):
    X = sim_result.flat_train_radmat()
    y = sim_result.train_true_pep_iz()

    if train_rf_params.n_subsample is not None:
        X, y = _subsample(train_rf_params.n_subsample, X, y)

    else:
        if sim_result.params.n_samples_train > 1000:
            important(
                "Warning: RF does not memory-scale well when the n_samples_train is > 1000."
            )

    del train_rf_params["n_subsample"]
    classifier = SciKitLearnRandomForestClassifier(**train_rf_params)
    classifier.train(X, y, progress)
    return TrainRFResult(params=train_rf_params, classifier=classifier)
Example #7
0
    def validate_job_name_and_folder(self):
        """
        Validate the job name and compute job_folder path.
        Optionally delete the job_folder if it exists.

        Returns:
             job_folder path
        """

        if self.job is None:
            raise ValidationError("job not specified.")
        self.job = self.job.lower()
        if not utils.is_symbol(self.job):
            raise ValidationError(
                "job should be a symbol (a-z, 0-9, and _) are allowed."
            )
        job_folder = local.path(self.jobs_folder) / self.job

        delete_job = False
        if self.overwrite:
            delete_job = False
        elif self.force:
            delete_job = True
        elif job_folder.exists():
            delete_job = confirm_yn(
                (
                    colors.red & colors.bold
                    | f"Do you really want to remove ALL contents of "
                )
                + (
                    colors.yellow
                    | f"'{job_folder}'?\nIf no, then job may be in an inconsistent state.\n"
                ),
                "y",
            )

        if delete_job:
            important(f"Deleting all of {job_folder}.")
            job_folder.delete()

        return job_folder
Example #8
0
    def run_zests(self, **kwargs):
        coverage = kwargs.pop("coverage", False)
        important(f"Running zests{' (with coverage)' if coverage else ''}...")
        if coverage:
            raise NotImplementedError
            ret = local["coverage"]["run", "./gen_main.py",
                                    "zest"] & RETCODE(FG=True)
            if ret == 0:
                local["coverage"]["html"] & FG
                local["xdg-open"]("./.coverage_html/index.html")
        else:
            from zest.zest_runner import ZestRunner

            try:
                runner = ZestRunner(include_dirs="./gen:./run:./tools",
                                    **kwargs)
                if runner.retcode != 0:
                    raise CommandError
                return 0
            except Exception as e:
                colorful_exception(e)
                return 1
Example #9
0
    def _request_field_from_user(self, field_name, type_, default):
        """Mock point"""
        headless = ValueError(f"Attempt to request field {field_name} in headless mode")

        while True:
            resp = input_request(
                f"Enter {field_name} ({type_.__name__} default={default}): ",
                default_when_headless=headless,
            )
            try:
                if resp == "":
                    resp = default
                if resp is None:
                    val = None
                else:
                    val = type_(resp)
            except Exception:
                important(f"Unable to convert '{resp}' to {type_}. Try again.")
            else:
                break

        return val
Example #10
0
    def _translate_s3_references(self, task, skip_s3):
        """
        Any "inputs" block may have S3 references in which case
        plaster will do an s3 sync with that folder to a local cache
        and then substitute that local path so that Pipeline is always
        working with local files.
        """

        for input_name, src_path in dict(task.inputs or {}).items():
            if not input_name.startswith("_"):
                if src_path.startswith("s3:"):
                    if not skip_s3:
                        found_cache, dst_path = tmp.cache_path(
                            "plaster_s3", src_path)
                        if not found_cache:
                            important(f"Syncing from {src_path} to {dst_path}")
                            local["aws"]["s3", "sync", src_path, dst_path] & FG

                        # COPY the old src_path by prefixing with underscore
                        task.inputs["_" + input_name] = str(src_path)

                        # RESET the input to the new dst_path
                        task.inputs[input_name] = str(dst_path)
Example #11
0
 def _save_np(self, arr, name):
     if self.save_as is not None:
         save_as = local.path(self.save_as) + f"_{name}"
         np.save(save_as, arr)
         important(f"Wrote synth image to {save_as}.npy")
Example #12
0
def _convert_message(target_dim, new_dim):
    """Mock-point"""
    important(f"Converting from dim {target_dim} to {new_dim}")
Example #13
0
    def main(self, job_folder=None):
        switches = utils.plumbum_switches(self)

        if job_folder is None:
            error(f"No job_folder was specified")
            return 1

        important(
            f"Plaster run {job_folder} limit={self.limit} started at {arrow.utcnow().format()}"
        )

        job_folder = assets.validate_job_folder_return_path(
            job_folder, allow_run_folders=True)
        if not job_folder.exists():
            error(f"Unable to find the path {job_folder}")
            return 1

        # Find all the plaster_run.yaml files. They might be in run subfolders
        found = list(
            job_folder.walk(filter=lambda p: p.name == "plaster_run.yaml"))
        run_dirs = [p.dirname for p in found]

        if len(run_dirs) == 0:
            error(
                "Plaster: Nothing to do because no run_dirs have plaster_run.yaml files"
            )
            return 1

        # A normal run where all happens in this process
        failure_count = 0
        for run_dir_i, run_dir in enumerate(sorted(run_dirs)):

            metrics(
                _type="plaster_start",
                run_dir=run_dir,
                run_dir_i=run_dir_i,
                run_dir_n=len(run_dirs),
                **switches,
            )
            important(
                f"Starting run subdirectory {run_dir}. {run_dir_i + 1} of {len(run_dirs)}"
            )

            try:
                with zap.Context(cpu_limit=self.cpu_limit,
                                 debug_mode=self.debug_mode):
                    run = RunExecutor(run_dir).load()
                    if "_erisyon" in run.config:
                        metrics(_type="erisyon_block", **run.config._erisyon)

                    failure_count += run.execute(
                        force=self.force,
                        limit=self.limit.split(",") if self.limit else None,
                        clean=self.clean,
                        n_fields_limit=self.n_fields_limit,
                        skip_s3=self.skip_s3,
                    )
            except Exception as e:
                failure_count += 1
                if not self.continue_on_error:
                    raise e

        if (failure_count == 0 and self.limit is None and not self.clean
                and not self.skip_reports):
            # RUN reports
            report_src_path = job_folder / "report.ipynb"
            report_dst_path = job_folder / "report.html"
            if (self.force or report_src_path.exists()
                    and utils.out_of_date(report_src_path, report_dst_path)):
                self.run_ipynb(report_src_path)
            return 0

        return failure_count