コード例 #1
0
    def it_reverses_ptm_locs():
        pros_df = pd.DataFrame(
            [
                ("nul", False, 0, ""),
                ("id1", False, 1, ""),
                ("id2", False, 2, "1;3"),
            ],
            columns=["pro_id", "pro_is_decoy", "pro_i", "pro_ptm_locs"],
        )

        pro_seqs_df = pd.DataFrame(
            [
                (0, "."),
                (1, "B"),
                (1, "C"),
                (2, "D"),
                (2, "E"),
                (2, "F"),
                (2, "G"),
            ],
            columns=["pro_i", "aa"],
        )

        decoys_df, decoy_seqs_df = _step_3_generate_decoys(
            pros_df, pro_seqs_df, decoy_mode="reverse")

        # remember that the orgiinal df has the "null" entry at iloc 0, and no
        # decoy is created for that.
        assert decoys_df.iloc[0].pro_ptm_locs == ""
        assert decoys_df.iloc[1].pro_ptm_locs == "2;4"
コード例 #2
0
 def it_handles_no_decoy_mode():
     decoys_df, decoy_seqs_df = _step_3_generate_decoys(pros_df,
                                                        pro_seqs_df,
                                                        decoy_mode=None)
     assert isinstance(decoys_df, pd.DataFrame) and len(decoys_df) == 0
     assert isinstance(decoy_seqs_df,
                       pd.DataFrame) and len(decoy_seqs_df) == 0
コード例 #3
0
    def positive_reverse():
        decoys_df, decoy_seqs_df = _step_3_generate_decoys(
            pros_df, pro_seqs_df, decoy_mode="reverse")

        def it_sets_is_decoy_true():
            assert decoys_df.pro_is_decoy.tolist() == [True, True]

        def it_skips_nul():
            assert decoys_df.pro_i.tolist() == [3, 4]

        def it_names_the_decoy():
            assert decoys_df.pro_id.tolist() == ["rev-id1", "rev-id2"]

        def it_reverses():
            assert decoy_seqs_df.pro_i.tolist() == [3, 3, 4, 4]
            assert decoy_seqs_df.aa.tolist() == ["C", "B", "E", "D"]

        zest()
コード例 #4
0
    def positive_shuffle():
        # add a new longer protein entry we can really test the shuffle with
        nonlocal pros_df, pro_seqs_df
        new_pro = {
            "pro_id": "id3",
            "pro_is_decoy": False,
            "pro_i": 3,
            "pro_ptm_locs": "",
        }
        pros_df = pros_df.append(new_pro, ignore_index=True)

        aas = sorted(list(aaseq.aa_code_df.aa))  # 27 chars long
        tups = list(itertools.zip_longest([], aas, fillvalue=3))
        new_pro_seqs = pd.DataFrame(tups, columns=["pro_i", "aa"])
        pro_seqs_df = pro_seqs_df.append(new_pro_seqs).reset_index(drop=True)

        decoys_df, decoy_seqs_df = _step_3_generate_decoys(
            pros_df, pro_seqs_df, decoy_mode="shuffle")

        def it_sets_is_decoy_true():
            assert decoys_df.pro_is_decoy.tolist() == [True, True, True]

        def it_skips_nul():
            assert decoys_df.pro_i.tolist() == [4, 5, 6]

        def it_names_the_decoy():
            assert decoys_df.pro_id.tolist() == [
                "shu-id1", "shu-id2", "shu-id3"
            ]

        def it_shuffles():
            assert decoy_seqs_df.pro_i.tolist() == [4, 4, 5, 5
                                                    ] + [6] * len(aas)
            assert decoy_seqs_df.aa.tolist()[4:] != aas
            assert decoy_seqs_df.aa.tolist()[4:] != aas[::-1]
            assert sorted(
                decoy_seqs_df.aa.tolist()[0:4]) == ["B", "C", "D", "E"]
            assert sorted(decoy_seqs_df.aa.tolist()[4:]) == aas

        zest()