Beispiel #1
0
    def validate_job_name_and_folder(self):
        """
        Validate the job name and compute job_folder path.
        Optionally delete the job_folder if it exists.

        Returns:
             job_folder path
        """

        self.job = assets.validate_job_folder(self.job)
        delete_job = False
        if self.overwrite:
            delete_job = False
        elif self.force:
            delete_job = True
        elif self.job.exists():
            delete_job = confirm_yn(
                (colors.red & colors.bold
                 | f"Do you really want to remove ALL contents of ") +
                (colors.yellow
                 |
                 f"'{self.job}'?\nIf no, then job may be in an inconsistent state.\n"
                 ),
                "y",
            )

        if delete_job:
            tell(f"Deleting all of {self.job}.")
            self.job.delete()

        return self.job
Beispiel #2
0
    def __init__(self, src_dir, dst_dir, config, **build_opts):
        src_dir = local.path(src_dir)
        dst_dir = local.path(dst_dir)

        self._tasks = {}
        task_list = task_list_from_config(config)
        for task_name, task_klass, task_info in task_list:
            self._tasks[task_name] = (task_klass, task_info, {})

        n_fields_limit = build_opts.pop("n_fields_limit", None)
        if n_fields_limit is not None:
            tell(f"Limiting to only {n_fields_limit} fields")

            # TASK: Convert these from named task to ANY task f that type
            if "ims_import" in self._tasks:
                self._tasks["ims_import"][1]["parameters"][
                    "n_fields_limit"] = n_fields_limit

            if "sigproc_v1" in self._tasks:
                self._tasks["sigproc_v1"][1]["parameters"][
                    "n_fields_limit"] = n_fields_limit

            if "sigproc_v2" in self._tasks:
                self._tasks["sigproc_v2"][1]["parameters"][
                    "n_fields_limit"] = n_fields_limit

        super().__init__(src_dir, dst_dir, self._tasks, **build_opts)
Beispiel #3
0
        def run_reports():
            report_paths = [job_folder / "report.ipynb"
                            ] + (job_folder / "_reports" // "*.ipynb")

            for report_src_path in report_paths:
                report_dst_path = report_src_path.with_suffix(".html")
                if report_src_path.exists() and (self.force or out_of_date(
                        report_src_path, report_dst_path)):
                    tell(f"Running report {report_src_path}")
                    self.run_ipynb(report_src_path)
Beispiel #4
0
    def _write_runs(self, job_folder, run_descs, props=[]):
        """
        Convert the munch run_descs into folders
        """

        if not job_folder.exists():
            job_folder.mkdir()

        found_run_names = {}

        for i, run in enumerate(run_descs):
            # FIND or OVERRIDE run_name
            run_name = run.get("run_name")

            if run_name in found_run_names:
                raise Exception(
                    f"More than one run with name {run_name} found")
            found_run_names[run_name] = True

            # SETUP _erisyon block
            if "_erisyon" not in run:
                run._erisyon = Munch()
            run._erisyon.run_i = i
            run._erisyon.run_i_of = len(run_descs)
            run._erisyon.run_name = run_name

            # OVERRIDE with props
            for prop in props:
                k, v, t = prop.split("=")
                if t == "bool":
                    v = True if v == "true" else False
                elif t == "int":
                    v = int(v)
                elif t == "float":
                    v = float(v)
                elif t == "int_list":
                    v = [int(i) for i in v.split(",")]
                elif t == "int_dict":
                    v = v.split(",")
                    v = {v[i]: int(v[i + 1]) for i in range(0, len(v), 2)}
                else:
                    raise TypeError(f"Unknown type in prop coversion '{t}")
                utils.block_update(run, k, v)

            # Keep the run_name out
            run.pop("run_name", None)
            folder = job_folder / run_name
            folder.mkdir()
            RunExecutor(folder, tasks=run).save()

            tell(f"Wrote run to {folder}")
Beispiel #5
0
def prof_stop(stacklevel=2):
    block = prof_stack.pop()
    name, full_name, start, log, _group_children_by, _tell, kwargs = block

    now = time.time()
    elapsed = now - start
    mem = prof_get_memory_highwater_mark()
    mem_gb = mem / 1024**3

    kwargs_per_sec = {
        f"{key}_per_sec": val / elapsed
        for key, val in kwargs.items()
    }

    kwargs_strs = [
        f"{key}={val:2.2f}" if isinstance(val,
                                          (int, float)) else f"{key}={val}"
        for key, val in {
            **kwargs,
            **kwargs_per_sec
        }.items()
    ]

    record = Munch(
        name=full_name,
        elapsed=elapsed,
        mem_gb=mem_gb,
        group_children_by=_group_children_by,
        **kwargs,
        **kwargs_per_sec,
    )

    msg = f"{full_name} stop. secs={elapsed:2.2f} mem={mem_gb:2.1f} {' '.join(kwargs_strs)}"
    log.info(
        msg,
        extra=dict(plaster_profile=json.dumps(record), ),
        stacklevel=stacklevel,
    )

    global prof_records
    if prof_records is not None:
        prof_records += [record]

    if _tell:
        tell(msg)
Beispiel #6
0
def rf_train(rf_train_v2_params, prep_result, sim_result, progress=None):
    X = sim_result.flat_train_radmat()

    y = poi_vs_all(sim_result.train_true_pep_iz, prep_result)

    if rf_train_v2_params.n_subsample is not None:
        X, y = _subsample(rf_train_v2_params.n_subsample, X, y)

    else:
        if sim_result.params.n_samples_train > 1000:
            tell(
                "Warning: RF does not memory-scale well when the n_samples_train is > 1000."
            )

    del rf_train_v2_params["n_subsample"]
    classifier = SKLearnRandomForestClassifier(**rf_train_v2_params)
    classifier.train(X, y, progress)
    return RFTrainV2Result(params=rf_train_v2_params, classifier=classifier)
Beispiel #7
0
def cache_source(cache_folder, source, copy_to):
    """
    If this is a URL or S3 fetch and cache at cache_folder.  Local files
    can be loaded from //jobs_folder/... only.
    In all cases, the cached file is optionally copied to copy_to so that
    that job folders contain a copy of all gen source data.

    Returns:
        the contents of the file
    """

    def make_cache_path():
        local.path(cache_folder).mkdir()
        return (
            local.path(cache_folder) / hashlib.md5(source.encode("utf-8")).hexdigest()
        )

    file_contents = None
    if source.startswith("http://") or source.startswith("https://"):
        cache_path = make_cache_path()
        if not cache_path.exists():
            important(f"Fetching {source}... (TO {cache_path})")
            file_contents = _url_get(source)
            utils.save(cache_path, file_contents)
        else:
            file_contents = utils.load(cache_path)
    elif source.startswith("s3://"):
        cache_path = make_cache_path()
        if not cache_path.exists():
            tell(f"Syncing from {source} to {cache_path}")
            local["aws"]["s3", "cp", source, cache_path] & FG
            # s3 cp already saves it to disk, fall thru & load
        file_contents = utils.load(cache_path)
    else:
        file_contents = utils.load(source)

    assert file_contents is not None

    if copy_to:
        assert local.path(copy_to).exists()
        filename = local.path(source).basename
        utils.save(copy_to / filename, file_contents)

    return file_contents
Beispiel #8
0
    def run_zests_v2(self, cli_args, debug_mode):
        tell(f"Running zests v2...")

        # as os.environ is evaluated when it is first imported
        # we can't use any of the more graceful ways to set the environment
        with local.env(RUN_ENV="test", ZAP_DEBUG_MODE=debug_mode):
            zest_version = None
            try:
                from zest.version import __version__ as zest_version
            except ImportError:
                pass

            assert zlog.config_dict is not None
            assert zest_version.startswith("1.1.")
            with tmp.tmp_file() as tmp_path:
                with open(tmp_path, "w") as f:
                    f.write(json.dumps(zlog.config_dict))

                # cli_args += ["--logger_config_json", tmp_path]
                local["python"]["-u", "-m", "zest.zest_cli"].bound_command(
                    *cli_args) & FG(retcode=None)
Beispiel #9
0
    def _cache_s3_reference(self, source):
        if source.startswith("s3:"):
            found_cache, cache_path = tmp.cache_path("plaster_s3", source)
            if not found_cache:
                tell(f"Syncing from {source} to {cache_path}")
                local["aws"]["s3", "sync", source, cache_path] & zlog.ZlogFG(
                    logger=log, drop_dup_lines=True, convert_to_cr=True)

            # A little bit of a hack to apply this to an url but it does what we want
            source_folder_name = local.path(source).basename

            if self.symlink_to_cache:
                local["ln"]["-s", cache_path, self.local_sources_tmp_folder /
                            source_folder_name, ]()
            else:
                local["cp"]["-r", cache_path, self.local_sources_tmp_folder /
                            source_folder_name, ]()

            # Hardcoding relative path here. It will always be the same 3 levels: run/plaster_output/task
            return "../../../_gen_sources/" + source_folder_name

        return source
Beispiel #10
0
    def main(self, jobs_folder):
        assert_env()

        jobs_folder = local.path(jobs_folder)

        if not self.skip_hardware:
            tell(colors.cyan | "Profiling file_io")
            self.fileio_test(jobs_folder)

            tell(colors.cyan | "Profiling cpu")
            self.cpu_test()

            tell(colors.cyan | "Profiling mem")
            self.mem_test()

        if not self.skip_sigproc:
            tell(colors.cyan | "Profiling sigproc")
            self.sigproc_test(jobs_folder)
Beispiel #11
0
def _protein_csv_warning(warning_string):
    """Mock-point"""
    tell(warning_string)
Beispiel #12
0
    def main(self, job_folder=None):
        switches = utils.plumbum_switches(self)

        if job_folder is None:
            job_folder = self.job

        job_folder = assets.validate_job_folder(job_folder)
        # At this point job_folder is a plumbum path

        # Add a new handler so we get PER-RUN log files into the job folder
        per_run_log_path = job_folder / f"{int(time.time()):06x}.log"
        formatter = zlog.ColorfulFormatter(
            "%(name)s %(asctime)s %(levelname)s %(message)s %(filename)s %(lineno)d"
        )
        handler = logging.StreamHandler(open(per_run_log_path, "w"))
        handler.setFormatter(formatter)
        zlog.add_handler(handler)

        tell(f"Trapping run logs into {per_run_log_path}")

        if job_folder is None:
            log.error(f"No job_folder was specified")
            return 1

        tell(
            f"Plaster run {job_folder} limit={self.limit} started at {arrow.utcnow().format()}"
        )

        if not job_folder.exists():
            log.error(f"Unable to find the path {job_folder}")
            return 1

        # Load the job_uuid if available, evntually this will be nice for logging
        job_uuid = None
        job_yaml = job_folder / "job_manifest.yaml"
        if job_yaml.exists():
            job_manifest = utils.yaml_load_munch(job_yaml)
            job_uuid = job_manifest.uuid

        # Find all the plaster_run.yaml files. They might be in run subfolders
        found = list(
            job_folder.walk(filter=lambda p: p.name == "plaster_run.yaml"))
        run_dirs = [p.dirname for p in found]

        if len(run_dirs) == 0:
            log.error(
                "Plaster: Nothing to do because no run_dirs have plaster_run.yaml files"
            )
            return 1

        def run_reports():
            report_paths = [job_folder / "report.ipynb"
                            ] + (job_folder / "_reports" // "*.ipynb")

            for report_src_path in report_paths:
                report_dst_path = report_src_path.with_suffix(".html")
                if report_src_path.exists() and (self.force or out_of_date(
                        report_src_path, report_dst_path)):
                    tell(f"Running report {report_src_path}")
                    self.run_ipynb(report_src_path)

        if self.reports_only:
            run_reports()
            return 0

        # A normal run where all happens in this process
        failure_count = 0
        for run_dir_i, run_dir in enumerate(sorted(run_dirs)):
            zlog.metrics(
                f"Starting run subdirectory {run_dir}. {run_dir_i + 1} of {len(run_dirs)}",
                log=log,
                _type="plaster_start",
                run_dir=run_dir,
                run_dir_i=run_dir_i,
                run_dir_n=len(run_dirs),
                **switches,
            )

            try:
                with zap.Context(
                        cpu_limit=self.cpu_limit,
                        mode="debug" if self.debug_mode else None,
                        allow_inner_parallelism=True,
                ):
                    # allow_inner_parallelism=True needs to be true so that each task such as sigproc_v2
                    # can allocate parallel jobs to each field.

                    run = RunExecutor(run_dir).load()
                    if "_erisyon" in run.config:
                        zlog.metrics(
                            "run metrics",
                            log=log,
                            _type="erisyon_block",
                            **run.config._erisyon,
                        )

                    failure_count += run.execute(
                        force=self.force,
                        limit=self.limit.split(",") if self.limit else None,
                        clean=self.clean,
                        n_fields_limit=self.n_fields_limit,
                        no_progress=self.no_progress,
                    )
            except Exception as e:
                failure_count += 1
                if not self.continue_on_error:
                    raise e

        if failure_count == 0 and self.limit is None and not self.clean:
            # WRITE job_info.yaml with metadata used by the indexer
            n_runs = len(run_dirs)
            job_info = Munch(n_runs=n_runs, job_uuid=job_uuid)
            if n_runs == 1:
                job = JobResult(job_folder=job_folder)
                tsv_data = {}
                try:
                    tsv_data = job.runs[0].ims_import.tsv_data
                except:
                    pass

                nd2_metadata = {}
                try:
                    nd2_metadata = job.runs[0].ims_import._nd2_metadata
                except:
                    pass

                job_info.update(tsv_data=tsv_data, nd2_metadata=nd2_metadata)

            utils.yaml_save(job_folder / "job_info.yaml", job_info)

            # RUN reports if not skipped
            if not self.skip_reports:
                run_reports()

        return failure_count
Beispiel #13
0
def sim(
    pcbs,  # pcb = (p)ep_i, (c)h_i, (b)right_prob == like a "flu" with brightness probability)
    n_samples,
    n_channels,
    n_labels,
    cycles,
    p_bleach,
    p_detach,
    p_edman_fail,
    allow_edman_cterm,
    n_threads=1,
    rng_seed=None,
    progress=None,
):
    count_only = 0  # Set to 1 to use the counting mechanisms

    global global_progress_callback
    global_progress_callback = progress

    lib = load_lib()

    # TODO:
    assert lib.sanity_check() == 0
    _assert_array_contiguous(cycles, CycleKindType)
    _assert_array_contiguous(pcbs, PCBType)

    # BUILD a map from pep_i to pcb_i.
    #   Note, this map needs to be one longer than n_peps so that we
    #   can subtract each offset to get the pcb length for each pep_i
    pep_i_to_pcb_i = np.unique(pcbs[:, 0], return_index=1)[1].astype(np.uint64)
    pep_i_to_pcb_i_view = pep_i_to_pcb_i
    n_peps = pep_i_to_pcb_i.shape[0]

    pep_i_to_pcb_i_buf = (c.c_ulonglong * (n_peps + 1))()
    c.memmove(
        pep_i_to_pcb_i_buf,
        pep_i_to_pcb_i_view.ctypes.data,
        n_peps * c.sizeof(c.c_ulonglong),
    )
    pep_i_to_pcb_i_buf[n_peps] = pcbs.shape[0]

    n_cycles = cycles.shape[0]

    n_dyt_row_bytes = lib.dyt_n_bytes(n_channels, n_cycles)

    # How many dyetrack records are needed?
    # I need to run some experiments to find out where I don't allocate

    if count_only == 1:
        n_max_dyts = 1
        n_max_dyt_hash_recs = 100_000_000
        n_max_dyepeps = 1
        n_max_dyepep_hash_recs = 100_000_000

    else:
        n_max_dyts, n_max_dyepeps = max_counts(n_peps, n_labels, n_channels)

        hash_factor = 1.5
        n_max_dyt_hash_recs = int(hash_factor * n_max_dyts)
        n_max_dyepep_hash_recs = int(hash_factor * n_max_dyepeps)

        dyt_gb = n_max_dyts * n_dyt_row_bytes / 1024**3
        dyepep_gb = n_max_dyepeps * c.sizeof(DyePepRec) / 1024**3
        if dyt_gb + dyepep_gb > 10:
            tell(
                f"Warning: sim_v2 buffers consuming more than 10 GB ({dyt_gb + dyepep_gb:4.1f} GB), "
                f"dyt_gb={dyt_gb}, dyepep_gb={dyepep_gb}, n_max_dyts={n_max_dyts}, n_max_dyepeps={n_max_dyepeps}"
            )

    # It's important that we hold onto a reference to this ndarray before we drop into c so it's not GC'd
    pep_recalls = np.zeros(n_peps, dtype=np.float64)

    ctx = SimV2Context(
        n_peps=n_peps,
        n_cycles=n_cycles,
        n_samples=n_samples,
        n_channels=n_channels,
        pi_bleach=lib.prob_to_p_i(p_bleach),
        pi_detach=lib.prob_to_p_i(p_detach),
        pi_edman_success=lib.prob_to_p_i(1.0 - p_edman_fail),
        allow_edman_cterm=allow_edman_cterm,
        cycles=(c.c_uint8 * 64)(),
        pcbs=Tab.from_mat(pcbs, expected_dtype=np.float64),
        n_max_dyts=int(n_max_dyts),
        n_max_dyt_hash_recs=int(n_max_dyt_hash_recs),
        n_max_dyepeps=int(n_max_dyepeps),
        n_max_dyepep_hash_recs=int(n_max_dyepep_hash_recs),
        n_dyt_row_bytes=n_dyt_row_bytes,
        # TODO: look at F64Arr
        pep_recalls=pep_recalls.ctypes.data_as(c.POINTER(c.c_double)),
        n_threads=n_threads,
        progress_fn=progress_fn,
        check_keyboard_interrupt_fn=check_keyboard_interrupt_fn,
        rng_seed=int(time.time() * 1_000_000),
        count_only=count_only,
        pep_i_to_pcb_i_buf=pep_i_to_pcb_i_buf,
    )

    for i in range(ctx.n_cycles):
        ctx.cycles[i] = cycles[i]

    try:
        # TODO: use convention in radiometry.py with context_init in a context manager, so ctx is always freed
        with handle_sigint():
            ret = lib.context_work_orders_start(ctx)
        if ret != 0:
            raise Exception(f"Worker ended prematurely {ret}")

        if count_only:
            print(f"n_dyts={ctx.output_n_dyts}")
            print(f"n_dyepeps={ctx.output_n_dyepeps}")
            return None, None, None

        # The results are in ctx.dyts and ctx.dyepeps
        # So now allocate the numpy arrays that will be returned
        # to the caller and copy into those arrays from the
        # much larger arrays that were used during the context_work_orders_start()
        n_chcy = ctx.n_channels * ctx.n_cycles
        dyetracks = np.zeros((ctx.dyts.n_rows, n_chcy), dtype=DyeType)

        # We need a special record at 0 for nul so we need to add one here
        dyepeps = np.zeros((ctx.dyepeps.n_rows + 1, 3), dtype=Size)
        _assert_array_contiguous(dyetracks, DyeType)
        _assert_array_contiguous(dyepeps, Size)

        dyetracks_view = dyetracks
        dyepeps_view = dyepeps

        for i in range(ctx.dyts.n_rows):
            dyt_count = lib.context_dyt_get_count(ctx, i)
            dyetrack = lib.context_dyt_dyetrack(ctx, i)
            for j in range(n_chcy):
                dyetracks_view[i, j] = dyetrack[j]

        # nul record
        dyepeps_view[0, 0] = 0
        dyepeps_view[0, 1] = 0
        dyepeps_view[0, 2] = 0
        for i in range(ctx.dyepeps.n_rows):
            dyepeprec = lib.context_dyepep(ctx, i).contents
            dyepeps_view[i + 1, 0] = dyepeprec.dyt_i
            dyepeps_view[i + 1, 1] = dyepeprec.pep_i
            dyepeps_view[i + 1, 2] = dyepeprec.n_reads

        return dyetracks, dyepeps, pep_recalls
    finally:
        lib.context_free(ctx)
Beispiel #14
0
def protein_csv_df(csv_string):
    """
    Parse protein(s) in csv format.

    Must have a header row with some of these column names:
        'Name', 'Seq', 'UniprotAC', 'Abundance', 'PTM', 'POI', columns.

    If Name is present that will become the name
    else and UniprotAC is present then that will become the name

    If Seq is absent and UniprotAC is present then it
    will pull the seqs from Uniprot

    If Abundance is present it will be returned as a parallel array

    If PTM is present it will be added into the protein df

    If POI is present it will be added into the protein df

    Returns:
        DataFrame: (id, seqstr, abundance, ptm_locs)
    """
    src_df = pd.read_csv(StringIO(csv_string))
    for col in src_df.columns:
        src_df = src_df.rename(columns={col: col.strip()})

    if "Seq" not in src_df and "UniprotAC" not in src_df:
        raise ValueError("protein csv missing either a Seq or a UniprotAC column")

    if "Seq" in src_df and "UniprotAC" in src_df:
        raise ValueError("protein csv has both a Seq and a UniprotAC column")
    if "UniprotAC" in src_df.columns:
        # Using the UniprotAC as the Seq column
        _protein_csv_info(f"Requesting ACs from uniprot, this may take a while")
        dst_df = pd.DataFrame(dict(id=src_df.UniprotAC, seqstr=""))
        start_time = time.time()
        for i, ac in enumerate(dst_df.id):
            tell(f"{i+1} of {len(dst_df)}. If it blocks, consider a ^C and restart.")

            seqs = _uniprot_lookup(ac)
            n_seqs = len(seqs)
            seqstr = None
            if n_seqs == 0:
                _protein_csv_warning(f"Uniprot ac {ac} returned no sequences. Ignoring")
            elif n_seqs > 1:
                _protein_csv_warning(
                    f"Uniprot ac {ac} returned > 1 sequence. Using the longest"
                )
                longest = 0
                for seq in seqs:
                    if len(seq["seqstr"]) > longest:
                        seqstr = seq["seqstr"]
                        longest = len(seqstr)
            else:
                seqstr = seqs[0]["seqstr"]

            if seqstr is not None:
                dst_df.loc[i, "seqstr"] = seqstr

        if "Name" in src_df:
            # Overload the UniprotAC with the Name
            dst_df["id"] = src_df.Name

    else:
        # Using the Seq column
        if "Name" not in src_df:
            raise ValueError(
                "protein csv missing a Name column without a UniprotAC column"
            )

        dst_df = pd.DataFrame(dict(id=src_df.Name, seqstr=src_df.Seq))

    # ADD the PTM column if present
    if "PTM" in src_df:
        dst_df["ptm_locs"] = src_df.fillna("").PTM.astype(str)
    else:
        dst_df["ptm_locs"] = ""

    # ADD the abundance column if present
    if "Abundance" in src_df:
        dst_df["abundance"] = src_df.Abundance.astype(float)
    else:
        dst_df["abundance"] = np.nan

    # ADD the POI column if present; note that the gen --protein_of_interest flag may
    # cause this to be overridden.
    if "POI" in src_df:
        dst_df["is_poi"] = src_df.fillna(0).POI.astype(int)

    # STRIP whitespace
    dst_df["id"] = dst_df.id.astype(str).apply(lambda x: x.strip())
    dst_df["seqstr"] = dst_df.seqstr.astype(str).apply(lambda x: x.strip())
    dst_df["ptm_locs"] = dst_df.ptm_locs.astype(str).apply(lambda x: x.strip())

    if "Abundance" in src_df:
        dst_df = dst_df.sort_values(["abundance", "id"], ascending=False)
    else:
        dst_df = dst_df.sort_values(["id"])

    dupes = dst_df.id.duplicated(keep=False)
    if dupes.sum() > 0:
        raise ValueError(f"duplicate names in protein_csv:\n{dst_df[dupes]}")

    dupes = dst_df.seqstr.duplicated(keep=False)
    if dupes.sum() > 0:
        raise ValueError("duplicate seqs in protein_csv")

    return dst_df.reset_index()
Beispiel #15
0
def _protein_csv_info(info_string):
    """Mock-point"""
    tell(info_string)
Beispiel #16
0
    def main(self):
        if self.construct_fail:
            return

        job_folder = self.validate_job_name_and_folder()

        schema = self.generator_klass.schema
        defaults = self.generator_klass.defaults

        requirements = schema.requirements()
        # APPLY defaults and then ask user for any elements that are not declared
        generator_args = {}
        switches = self._switches_by_name

        if self.protein_random is not None:
            tell(
                f"Sampling {self.protein_random} random proteins from imported set"
            )
            n = len(self.derived_vals.protein)
            assert n >= self.protein_random
            self.derived_vals.protein = stats.subsample(
                self.derived_vals.protein, self.protein_random)
            assert len(self.derived_vals.protein) == self.protein_random

        for arg_name, arg_type, _, arg_userdata in requirements:
            if (arg_name in self.derived_vals
                    and self.derived_vals.get(arg_name) is not None):
                # Load from a derived switch (eg: protein)
                generator_args[arg_name] = self.derived_vals[arg_name]
            elif arg_name in switches and switches.get(arg_name) is not None:
                # Load from a switch
                generator_args[arg_name] = getattr(self, arg_name)
            else:
                # If the schema allows the user to enter manually
                if arg_userdata.get("allowed_to_be_entered_manually"):
                    generator_args[arg_name] = self._request_field_from_user(
                        arg_name, arg_type, default=defaults.get(arg_name))

        # Download sigproc sources and replace with local path before handing to generator
        if "sigproc_source" in generator_args:
            source = generator_args["sigproc_source"]
            if source is not None and source.startswith("s3:"):
                generator_args["sigproc_source"] = self._cache_s3_reference(
                    source)

        # Intentionally run the generate before the job folder is written
        # so that if generate fails it doesn't leave around a partial job.
        gen_sources_folder = job_folder / "_gen_sources"
        try:
            generator_args["force_run_name"] = self.run_name
            generator_args[
                "local_sources_tmp_folder"] = self.local_sources_tmp_folder
            generator_args["gen_sources_folder"] = gen_sources_folder
            generator = self.generator_klass(**generator_args)
            run_descs = generator.generate()
        except (SchemaValidationFailed, ValidationError) as e:
            # Emit clean failure and exit 1
            log.exception("Gen failed")
            return 1

        # WRITE the job & copy any file sources
        self._write_runs(job_folder, run_descs, props=self.prop)
        gen_sources_folder.delete()
        self.local_sources_tmp_folder.move(gen_sources_folder)

        if not self.skip_report:
            for report_name, report_builder in generator.reports.items():
                report = report_builder.report_assemble()
                if report is not None:
                    utils.json_save(job_folder / f"{report_name}.ipynb",
                                    report)

            (job_folder / "reports_archive").mkdir()

            for report_name in generator.static_reports:
                if report_name is not None:
                    report_name = f"{report_name}.ipynb"
                    src = local.path(__file__) / "../../reports" / report_name
                    dst_folder = job_folder / "_reports"
                    dst_folder.mkdir()
                    dst = dst_folder / report_name
                    src.copy(dst)

        utils.yaml_write(
            job_folder / "job_manifest.yaml",
            uuid=self.job_uuid,
            localtime=time.strftime("%Y-%m-%d, %H:%M:%S", time.localtime()),
            # Note: it seems localtime inside our container is UTC
            who=local.env.get("RUN_USER", "Unknown"),
            cmdline_args=sys.argv,
        )
Beispiel #17
0
 def _save_debug(self):
     path = "/erisyon/internal/_synth_debug.npy"
     np.save(path, self.render_flchcy())
     tell(f"Wrote debugging to {path}")
Beispiel #18
0
if __name__ == "__main__":
    try:
        assert_env()
        with local.cwd(local.env["PLASTER_ROOT"]):
            with zlog.app_start("./scripts/zlog.yaml"):
                import logging

                log = logging.getLogger()
                log.info(f"Starting run: '{' '.join(sys.argv)}'")

            PlasterCommand.subcommand("gen", "plaster.gen.gen_main.GenApp")
            PlasterCommand.subcommand("run", "plaster.run.run_main.RunApp")
            PlasterCommand.subcommand("run_notebook",
                                      "plaster.main.RunNotebookCommand")
            PlasterCommand.subcommand("jupyter", "plaster.main.JupyterCommand")
            PlasterCommand.subcommand("test", "plaster.main.TestCommand")
            PlasterCommand.subcommand("pluck", "plaster.main.PluckCommand")
            PlasterCommand.subcommand("profile", "plaster.main.ProfileCommand")
            PlasterCommand.subcommand("profile_dump",
                                      "plaster.main.ProfileDumpCommand")
            PlasterCommand.run()

    except KeyboardInterrupt:
        print()  # Add an extra line because various thing terminate with \r
        sys.exit(1)

    except Exception as e:
        zlog.tell(zlog.colorful_exception(e))
        sys.exit(1)