Exemple #1
0
class PrepParams(Params):
    defaults = Munch(
        protease=None,
        decoy_mode=None,
        include_misses=0,
        n_peps_limit=None,
        drop_duplicates=False,
        n_ptms_limit=None,
    )

    schema = s(
        s.is_kws_r(
            protease=s.is_list(noneable=True, elems=s.is_str()),
            decoy_mode=s.is_str(noneable=True),
            include_misses=s.is_int(),
            n_peps_limit=s.is_int(noneable=True),
            drop_duplicates=s.is_bool(),
            n_ptms_limit=s.is_int(noneable=True),
            proteins=s.is_list(
                s.is_kws(
                    name=s.is_str(required=True),
                    sequence=s.is_str(required=True),
                    ptm_locs=s.is_str(noneable=True),
                    report=s.is_int(noneable=True),
                    abundance=s.is_number(noneable=True),
                )),
        ))
Exemple #2
0
        def it_returns_required_elems():
            userdata = dict(some_key=1)

            test_s = s(
                s.is_dict(
                    all_required=True,
                    elems=dict(
                        a=s.is_int(),
                        b=s.is_float(help="A float"),
                        c=s.is_number(),
                        d=s.is_str(userdata=userdata),
                        e=s.is_list(),
                        f=s.is_dict(all_required=True,
                                    elems=dict(d=s.is_int(), e=s.is_int())),
                    ),
                ))
            reqs = test_s.requirements()
            assert reqs == [
                ("a", int, None, None),
                ("b", float, "A float", None),
                ("c", float, None, None),
                ("d", str, None, userdata),
                ("e", list, None, None),
                ("f", dict, None, None),
            ]
Exemple #3
0
class SigprocV1Params(Params):
    defaults = dict(
        hat_rad=2,
        iqr_rng=96,
        threshold_abs=1.0,
        channel_indices_for_alignment=None,
        channel_indices_for_peak_finding=None,
        radiometry_channels=None,
        save_debug=False,
        peak_find_n_cycles=4,
        peak_find_start=0,
        radial_filter=None,
        anomaly_iqr_cutoff=95,
        n_fields_limit=None,
        save_full_signal_radmat_npy=False,
    )

    schema = s(
        s.is_kws_r(
            anomaly_iqr_cutoff=s.is_number(noneable=True, bounds=(0, 100)),
            radial_filter=s.is_float(noneable=True, bounds=(0, 1)),
            peak_find_n_cycles=s.is_int(bounds=(1, None), noneable=True),
            peak_find_start=s.is_int(bounds=(0, None), noneable=True),
            save_debug=s.is_bool(),
            hat_rad=s.is_int(bounds=(1, 3)),
            iqr_rng=s.is_number(noneable=True, bounds=(0, 100)),
            threshold_abs=s.is_number(
                bounds=(0, 100)),  # Not sure of a reasonable bound
            channel_indices_for_alignment=s.is_list(s.is_int(), noneable=True),
            channel_indices_for_peak_finding=s.is_list(s.is_int(),
                                                       noneable=True),
            radiometry_channels=s.is_dict(noneable=True),
            n_fields_limit=s.is_int(noneable=True),
            save_full_signal_radmat_npy=s.is_bool(),
        ))

    def validate(self):
        # Note: does not call super because the override_nones is set to false here
        self.schema.apply_defaults(self.defaults,
                                   apply_to=self,
                                   override_nones=False)
        self.schema.validate(self, context=self.__class__.__name__)

        if self.radiometry_channels is not None:
            pat = re.compile(r"[0-9a-z_]+")
            for name, channel_i in self.radiometry_channels.items():
                self._validate(
                    pat.fullmatch(name),
                    "radiometry_channels name must be lower-case alphanumeric (including underscore)",
                )
                self._validate(isinstance(channel_i, int),
                               "channel_i must be an integer")

    def set_radiometry_channels_from_input_channels_if_needed(
            self, n_channels):
        if self.radiometry_channels is None:
            # Assume channels from nd2 manifest
            channels = list(range(n_channels))
            self.radiometry_channels = {f"ch_{ch}": ch for ch in channels}

    @property
    def n_output_channels(self):
        return len(self.radiometry_channels.keys())

    @property
    def n_input_channels(self):
        return len(self.radiometry_channels.keys())

    @property
    def channels_cycles_dim(self):
        # This is a cache set in sigproc_v1.
        # It is a helper for the repeative call:
        # n_outchannels, n_inchannels, n_cycles, dim =
        return self._outchannels_inchannels_cycles_dim

    def _input_channels(self):
        """
        Return a list that converts channel number of the output to the channel of the input
        Example:
            input might have channels ["foo", "bar"]
            the radiometry_channels has: {"bar": 0}]
            Thus this function returns [1] because the 0th output channel is mapped
            to the "1" input channel
        """
        return [
            self.radiometry_channels[name]
            for name in sorted(self.radiometry_channels.keys())
        ]

    # def input_names(self):
    #     return sorted(self.radiometry_channels.keys())

    def output_channel_to_input_channel(self, out_ch):
        return self._input_channels()[out_ch]

    def input_channel_to_output_channel(self, in_ch):
        """Not every input channel necessarily has an output; can return None"""
        return utils.filt_first_arg(self._input_channels(),
                                    lambda x: x == in_ch)
Exemple #4
0
 def it_validates_number():
     test_s = s(s.is_number())
     test_s.validate(1.0)
     test_s.validate(1)
     with zest.raises(SchemaValidationFailed):
         test_s.validate("a str")
Exemple #5
0
class PrepParams(Params):
    PHOTOBLEACHING_PSEUDO_AA = "X"
    ALLOW_NONES_AND_NANS_IN_ABUNDANCE = False
    NORMALIZE_ABUNDANCE = False  # Abundance is normalized in gen

    defaults = Munch(
        protease=None,
        decoy_mode=None,
        include_misses=0,
        n_peps_limit=None,
        drop_duplicates=False,
        n_ptms_limit=None,
        is_photobleaching_run=False,
        photobleaching_n_cycles=None,
        photobleaching_run_n_dye_count=None,
    )

    schema = s(
        s.is_kws_r(
            protease=s.is_list(noneable=True, elems=s.is_str()),
            decoy_mode=s.is_str(noneable=True),
            include_misses=s.is_int(),
            n_peps_limit=s.is_int(noneable=True),
            drop_duplicates=s.is_bool(),
            n_ptms_limit=s.is_int(noneable=True),
            proteins=s.is_list(
                s.is_kws(
                    name=s.is_str(required=True),
                    sequence=s.is_str(required=True),
                    ptm_locs=s.is_str(noneable=True),
                    is_poi=s.is_int(noneable=True),
                    abundance=s.is_number(noneable=True),
                )),
            is_photobleaching_run=s.is_bool(),
            photobleaching_n_cycles=s.is_int(noneable=True),
            photobleaching_run_n_dye_count=s.is_int(noneable=True),
        ))

    def validate(self):
        super().validate()

        # Try to normalize abundance values if provided. If abundance values are provided, do basic validation.
        # If no abundance values are provided, do nothing.
        # When a protein csv with no abundance columns is provided, it will come through as all nans
        # Note that self.proteins is likely a list of Munches, but could be a list of dicts, so don't assume we can access items as attrs

        abundance_info_present = any(
            "abundance" in protein and protein["abundance"] is not None
            and not math.isnan(protein["abundance"])
            for protein in self.proteins)

        if abundance_info_present:
            abundance_criteria = [
                (lambda protein: "abundance" in protein, "Abundance missing"),
                (
                    lambda protein: protein["abundance"] >= 0
                    if protein["abundance"] is not None else True,
                    "Abundance must be greater than or equal to zero",
                ),
            ]

            if not self.ALLOW_NONES_AND_NANS_IN_ABUNDANCE:
                abundance_criteria += [
                    (
                        lambda protein: protein["abundance"] is not None,
                        "Abundance must not be None",
                    ),
                    (
                        lambda protein: not math.isnan(protein["abundance"]),
                        "Abundance must not be NaN",
                    ),
                ]

            # Find min abundance value, also check for zeros and NaNs and error if found
            min_abundance = None
            for protein in self.proteins:
                # Check to make sure abundance passes criteria
                for criteria_fn, msg in abundance_criteria:
                    if not criteria_fn(protein):
                        abundance_value = protein.get("abundance")
                        raise SchemaValidationFailed(
                            f"Protein {protein.get('name')} has invalid abundance: {abundance_value} - {msg}"
                        )

                # Find min abundance value
                if (min_abundance is None or protein["abundance"] <
                        min_abundance) and protein["abundance"] > 0:
                    min_abundance = protein["abundance"]

            if self.NORMALIZE_ABUNDANCE:
                if min_abundance != 1:
                    log.info("abundance data is not normalized, normalizing.")
                    # normalize abundance by min value
                    for protein in self.proteins:
                        if protein["abundance"] is not None:
                            protein["abundance"] /= min_abundance
        else:
            # Abundance information is missing from all proteins
            # Set abudance to 1
            for protein in self.proteins:
                protein["abundance"] = 1