Esempio n. 1
0
def _subsample(n_subsample, X, y):
    n_peps = np.max(y) + 1
    _y = []
    _X = []
    for pep_i in range(n_peps):
        args = np.argwhere(y == pep_i)
        arg_subsample = stats.subsample(args, n_subsample)
        _X += [X[arg_subsample]]
        _y += [y[arg_subsample]]

    X = np.vstack(_X)
    y = np.vstack(_y)
    return np.squeeze(X, axis=1), np.squeeze(y, axis=1)
Esempio n. 2
0
 def it_subsamples_list():
     b = subsample(l, 2)
     assert len(b) == 2
     assert isinstance(b[0], str)
Esempio n. 3
0
    def main(self):
        if self.construct_fail:
            return

        job_folder = self.validate_job_name_and_folder()

        schema = self.generator_klass.schema
        defaults = self.generator_klass.defaults

        requirements = schema.requirements()
        # APPLY defaults and then ask user for any elements that are not declared
        generator_args = {}
        switches = self._switches_by_name

        if self.protein_random is not None:
            tell(
                f"Sampling {self.protein_random} random proteins from imported set"
            )
            n = len(self.derived_vals.protein)
            assert n >= self.protein_random
            self.derived_vals.protein = stats.subsample(
                self.derived_vals.protein, self.protein_random)
            assert len(self.derived_vals.protein) == self.protein_random

        for arg_name, arg_type, _, arg_userdata in requirements:
            if (arg_name in self.derived_vals
                    and self.derived_vals.get(arg_name) is not None):
                # Load from a derived switch (eg: protein)
                generator_args[arg_name] = self.derived_vals[arg_name]
            elif arg_name in switches and switches.get(arg_name) is not None:
                # Load from a switch
                generator_args[arg_name] = getattr(self, arg_name)
            else:
                # If the schema allows the user to enter manually
                if arg_userdata.get("allowed_to_be_entered_manually"):
                    generator_args[arg_name] = self._request_field_from_user(
                        arg_name, arg_type, default=defaults.get(arg_name))

        # Download sigproc sources and replace with local path before handing to generator
        if "sigproc_source" in generator_args:
            source = generator_args["sigproc_source"]
            if source is not None and source.startswith("s3:"):
                generator_args["sigproc_source"] = self._cache_s3_reference(
                    source)

        # Intentionally run the generate before the job folder is written
        # so that if generate fails it doesn't leave around a partial job.
        gen_sources_folder = job_folder / "_gen_sources"
        try:
            generator_args["force_run_name"] = self.run_name
            generator_args[
                "local_sources_tmp_folder"] = self.local_sources_tmp_folder
            generator_args["gen_sources_folder"] = gen_sources_folder
            generator = self.generator_klass(**generator_args)
            run_descs = generator.generate()
        except (SchemaValidationFailed, ValidationError) as e:
            # Emit clean failure and exit 1
            log.exception("Gen failed")
            return 1

        # WRITE the job & copy any file sources
        self._write_runs(job_folder, run_descs, props=self.prop)
        gen_sources_folder.delete()
        self.local_sources_tmp_folder.move(gen_sources_folder)

        if not self.skip_report:
            for report_name, report_builder in generator.reports.items():
                report = report_builder.report_assemble()
                if report is not None:
                    utils.json_save(job_folder / f"{report_name}.ipynb",
                                    report)

            (job_folder / "reports_archive").mkdir()

            for report_name in generator.static_reports:
                if report_name is not None:
                    report_name = f"{report_name}.ipynb"
                    src = local.path(__file__) / "../../reports" / report_name
                    dst_folder = job_folder / "_reports"
                    dst_folder.mkdir()
                    dst = dst_folder / report_name
                    src.copy(dst)

        utils.yaml_write(
            job_folder / "job_manifest.yaml",
            uuid=self.job_uuid,
            localtime=time.strftime("%Y-%m-%d, %H:%M:%S", time.localtime()),
            # Note: it seems localtime inside our container is UTC
            who=local.env.get("RUN_USER", "Unknown"),
            cmdline_args=sys.argv,
        )
Esempio n. 4
0
 def it_subsamples_array():
     b = subsample(a, 2)
     assert b.shape == (2, 3)