Esempio n. 1
0
    def peak_fits(self):
        df = self._load_df_prop_from_fields("peak_fit_df")
        check.df_t(df, self.peak_fit_df_schema)

        # The peaks have a local frame_peak_i but they
        # don't have their pan-field peak_i set yet.
        df = df.reset_index(drop=True)
        df.peak_i = df.index

        return df
Esempio n. 2
0
    def it_radmats():
        rad_df = res.radmats()
        check.df_t(rad_df, SigprocV1Result.radmat_df_schema)
        assert len(rad_df) == 4 * 2 * 3

        # Sanity check a few
        assert (rad_df[(rad_df.peak_i == 1) & (rad_df.channel_i == 1) &
                       (rad_df.cycle_i == 1)].signal.values[0] == 1.0)
        assert (rad_df[(rad_df.peak_i == 2) & (rad_df.channel_i == 0) &
                       (rad_df.cycle_i == 0)].signal.values[0] == 5.0)
Esempio n. 3
0
    def peaks(self, fields=None, n_peaks_subsample=None):
        df = self._load_df_prop_from_fields(
            "peak_df", field_iz=self._fields_to_field_iz(fields))
        check.df_t(df, self.peak_df_schema)

        if self._has_prop("peak_fit_df"):
            fit_df = self._load_df_prop_from_fields(
                "peak_fit_df", field_iz=self._fields_to_field_iz(fields))
            check.df_t(df, self.peak_fit_df_schema)
            df = df.set_index(["field_i", "field_peak_i"]).join(
                fit_df.set_index(["field_i", "field_peak_i"]))

        # The peaks have a local frame_peak_i but they
        # don't have their pan-field peak_i set yet.
        df = df.reset_index(drop=True)
        df.peak_i = df.index

        if n_peaks_subsample is not None:
            df = df.sample(n_peaks_subsample, replace=True)

        return df
Esempio n. 4
0
 def it_mask_rects():
     rects_df = res.mask_rects()
     check.df_t(rects_df, SigprocV1Result.mask_rects_df_schema)
     assert len(rects_df) == 2 * 2 * 3
Esempio n. 5
0
def _do_ptm_permutations(df, n_ptms_limit):
    """
    df is a dataframe with a single pep_i, and pro_ptm_locs, and pep_offset_in_pro
    and aa for each location in the peptide
    """

    check.df_t(
        df,
        dict(pep_i=int, pro_ptm_locs=object, pep_offset_in_pro=int, aa=object),
        allow_extra_columns=True,
    )

    # pro_ptm_locs is identical for all rows
    pro_ptm_locs = df.pro_ptm_locs.values[0]
    if not pro_ptm_locs:
        return []

    # get 0-based indices from string representation; these are for the
    # entire protein.
    ptm_locs_zero_based = [(int(x) - 1) for x in pro_ptm_locs.split(";")]

    # get the ptms that coincide with the range spanned by this peptide.
    min_pos = df.pep_offset_in_pro.min()
    max_pos = df.pep_offset_in_pro.max()
    ptm_locs_zero_based = [
        x for x in ptm_locs_zero_based if min_pos <= x <= max_pos
    ]

    n_ptms = len(ptm_locs_zero_based)
    if n_ptms > n_ptms_limit:
        _info(
            f"Skipping ptm for peptide {df.pep_i.iloc[0]} with {n_ptms} PTMs")

    if n_ptms_limit is not None and n_ptms > n_ptms_limit:
        return []

    powerset = [
        np.array(x) for length in range(1,
                                        len(ptm_locs_zero_based) + 1)
        for x in itertools.combinations(ptm_locs_zero_based, length)
    ]

    # powerset is a list of tuples.  So if have [2,4,10] in ptms you get
    # powerset = [ (2), (4), (10), (2,4), (2,10), (4,10), (2,4,10) ]
    #
    # The goal is to make a new peptide+seq for each of those tuples by
    # adding the modification '[p]' to the aa at that seq index location
    #

    mod = "[p]"

    new_pep_seqs = []

    for ptm_locs in powerset:
        new_pep_seq = df.copy()

        new_pep_seq.pep_i = np.nan

        new_pep_seq = new_pep_seq.set_index("pep_offset_in_pro")
        new_pep_seq.at[ptm_locs, "aa"] = new_pep_seq.aa[ptm_locs] + mod
        new_pep_seq = new_pep_seq.reset_index()

        new_pep_seqs += [new_pep_seq]

    return new_pep_seqs
Esempio n. 6
0
 def mask_rects(self):
     df = self._load_df_prop_from_fields("mask_rects_df")
     check.df_t(df, self.mask_rects_df_schema)
     return df
Esempio n. 7
0
 def fields(self, fields=None):
     df = self._load_df_prop_from_fields(
         "field_df", field_iz=self._fields_to_field_iz(fields))
     check.df_t(df, self.field_df_schema, allow_extra_columns=True)
     return df
Esempio n. 8
0
 def fields(self):
     df = self._load_df_prop_from_all_fields("field_df")
     check.df_t(df, self.field_df_schema, allow_extra_columns=True)
     return df