コード例 #1
0
ファイル: zest_zap.py プロジェクト: manastech/plaster
 def it_groups():
     df = pd.DataFrame(dict(a=[1, 1, 2, 2, 2], b=[1, 2, 3, 4, 5]))
     res = zap.df_groups(test9, df.groupby("a"))
     a = listi(res, 0)
     ap1 = listi(res, 1)
     assert a == [1, 2]
     assert ap1 == [2, 3]
コード例 #2
0
ファイル: call_bag.py プロジェクト: erisyon/plaster
 def peps_above_thresholds(self, precision=0.0, recall=0.0):
     with zap.Context(mode="thread"):
         df = zap.df_groups(
             _do_peps_above_thresholds,
             self.pr_curve_by_pep().groupby("pep_i"),
             precision=precision,
             recall=recall,
         )
     df = df.reset_index().sort_index().rename(columns={0: "passes"})
     return np.argwhere(df.passes.values).flatten()
コード例 #3
0
ファイル: sim_worker.py プロジェクト: manastech/plaster
def _run_sim(sim_params, pep_seqs_df, name, n_peps, n_samples, progress):
    if sim_params.get("random_seed") is not None:
        # Increment so that train and test will be different
        sim_params.random_seed += 1

    np.random.seed(sim_params.random_seed)

    dyemat = ArrayResult(
        f"{name}_dyemat",
        shape=(n_peps, n_samples, sim_params.n_channels, sim_params.n_cycles),
        dtype=DyeType,
        mode="w+",
    )
    radmat = ArrayResult(
        f"{name}_radmat",
        shape=(n_peps, n_samples, sim_params.n_channels, sim_params.n_cycles),
        dtype=RadType,
        mode="w+",
    )
    recall = ArrayResult(
        f"{name}_recall",
        shape=(n_peps, ),
        dtype=RecallType,
        mode="w+",
    )

    flus__remainders = zap.df_groups(
        _do_pep_sim,
        pep_seqs_df.groupby("pep_i"),
        sim_params=sim_params,
        n_samples=n_samples,
        output_dyemat=dyemat,
        output_radmat=radmat,
        output_recall=recall,
        _progress=progress,
        _trap_exceptions=False,
        _process_mode=True,
    )

    flus = np.array(utils.listi(flus__remainders, 0))
    flu_remainders = np.array(utils.listi(flus__remainders, 1))

    return dyemat, radmat, recall, flus, flu_remainders
コード例 #4
0
ファイル: prep_worker.py プロジェクト: manastech/plaster
def _step_5_create_ptm_peptides(peps_df, pep_seqs_df, pros_df, n_ptms_limit):
    """
    Create new peps and pep_seqs by applying PTMs based on the pro_ptm_locs information
    in pros_df.
    """

    # 1. Get subset of proteins+peps with ptms by filtering proteins with ptms and joining
    # to peps and pep_seqs
    #

    # This None vs "" is messy.

    pros_with_ptms = pros_df[pros_df.pro_ptm_locs != ""]
    df = (pros_with_ptms.set_index("pro_i").join(
        peps_df.set_index("pro_i")).reset_index())
    df = df.set_index("pep_i").join(
        pep_seqs_df.set_index("pep_i")).reset_index()

    if len(df) == 0:
        return None, None

    # 2. for each peptide apply _do_ptm_permutations which will result in
    # a list of new dataframes of the form joined above; new_pep_infos is a
    # list of these lists.
    #
    # new_pep_infos = parallel_groupby_apply(
    #     df.groupby("pep_i"),
    #     _do_ptm_permutations,
    #     n_ptms_limit=n_ptms_limit,
    #     _trap_exceptions=False,
    #     _process_mode=True,
    # )
    new_pep_infos = zap.df_groups(
        _do_ptm_permutations,
        df.groupby("pep_i"),
        n_ptms_limit=n_ptms_limit,
        _trap_exceptions=False,
        _process_mode=True,
    )

    # 3. create new peps, pep_seqs, from list of dfs returned in (2)
    #
    #    peps_columns = ["pep_i", "pep_start", "pep_stop", "pro_i"]
    #    pep_seqs_columns = ["pep_i", "aa", "pep_offset_in_pro"]
    #
    new_peps = []
    new_pep_seqs = []
    pep_iz = peps_df.pep_i.unique()
    next_pep_i = peps_df.pep_i.max() + 1
    for new_peps_info in new_pep_infos:
        for pep_info in new_peps_info:
            # Note we only want one pep entry and pep_info contains enough rows to hold
            # the whole sequence for the peptide in the aa column.  So drop_duplicates()
            pep = pep_info[PrepResult.peps_columns].drop_duplicates()
            pep_seq = pep_info[PrepResult.pep_seqs_columns].copy(
            )  # avoid SettingWithCopyWarning with copy()

            pep.pep_i = next_pep_i
            pep_seq.pep_i = next_pep_i
            next_pep_i += 1

            new_peps += [pep]
            new_pep_seqs += [pep_seq]

    new_peps_df = pd.concat(new_peps)
    new_pep_seqs_df = pd.concat(new_pep_seqs)

    return new_peps_df, new_pep_seqs_df