def it_reverses_ptm_locs(): pros_df = pd.DataFrame( [ ("nul", False, 0, ""), ("id1", False, 1, ""), ("id2", False, 2, "1;3"), ], columns=["pro_id", "pro_is_decoy", "pro_i", "pro_ptm_locs"], ) pro_seqs_df = pd.DataFrame( [ (0, "."), (1, "B"), (1, "C"), (2, "D"), (2, "E"), (2, "F"), (2, "G"), ], columns=["pro_i", "aa"], ) decoys_df, decoy_seqs_df = _step_3_generate_decoys( pros_df, pro_seqs_df, decoy_mode="reverse") # remember that the orgiinal df has the "null" entry at iloc 0, and no # decoy is created for that. assert decoys_df.iloc[0].pro_ptm_locs == "" assert decoys_df.iloc[1].pro_ptm_locs == "2;4"
def it_handles_no_decoy_mode(): decoys_df, decoy_seqs_df = _step_3_generate_decoys(pros_df, pro_seqs_df, decoy_mode=None) assert isinstance(decoys_df, pd.DataFrame) and len(decoys_df) == 0 assert isinstance(decoy_seqs_df, pd.DataFrame) and len(decoy_seqs_df) == 0
def positive_reverse(): decoys_df, decoy_seqs_df = _step_3_generate_decoys( pros_df, pro_seqs_df, decoy_mode="reverse") def it_sets_is_decoy_true(): assert decoys_df.pro_is_decoy.tolist() == [True, True] def it_skips_nul(): assert decoys_df.pro_i.tolist() == [3, 4] def it_names_the_decoy(): assert decoys_df.pro_id.tolist() == ["rev-id1", "rev-id2"] def it_reverses(): assert decoy_seqs_df.pro_i.tolist() == [3, 3, 4, 4] assert decoy_seqs_df.aa.tolist() == ["C", "B", "E", "D"] zest()
def positive_shuffle(): # add a new longer protein entry we can really test the shuffle with nonlocal pros_df, pro_seqs_df new_pro = { "pro_id": "id3", "pro_is_decoy": False, "pro_i": 3, "pro_ptm_locs": "", } pros_df = pros_df.append(new_pro, ignore_index=True) aas = sorted(list(aaseq.aa_code_df.aa)) # 27 chars long tups = list(itertools.zip_longest([], aas, fillvalue=3)) new_pro_seqs = pd.DataFrame(tups, columns=["pro_i", "aa"]) pro_seqs_df = pro_seqs_df.append(new_pro_seqs).reset_index(drop=True) decoys_df, decoy_seqs_df = _step_3_generate_decoys( pros_df, pro_seqs_df, decoy_mode="shuffle") def it_sets_is_decoy_true(): assert decoys_df.pro_is_decoy.tolist() == [True, True, True] def it_skips_nul(): assert decoys_df.pro_i.tolist() == [4, 5, 6] def it_names_the_decoy(): assert decoys_df.pro_id.tolist() == [ "shu-id1", "shu-id2", "shu-id3" ] def it_shuffles(): assert decoy_seqs_df.pro_i.tolist() == [4, 4, 5, 5 ] + [6] * len(aas) assert decoy_seqs_df.aa.tolist()[4:] != aas assert decoy_seqs_df.aa.tolist()[4:] != aas[::-1] assert sorted( decoy_seqs_df.aa.tolist()[0:4]) == ["B", "C", "D", "E"] assert sorted(decoy_seqs_df.aa.tolist()[4:]) == aas zest()