def get_ui_copy_and_paste(pqo, fn, args_dict, with_abundance=False, num_bins=NUM_BINS): df = pd.read_csv(fn, sep='\t') fg = "\n".join(df.loc[df["foreground"].notnull(), "foreground"].tolist()) if not with_abundance: bg = "\n".join(df.loc[df["background"].notnull(), "background"].tolist()) return userinput.Userinput(pqo=pqo, foreground_string=fg, background_string=bg, num_bins=num_bins, args_dict=args_dict) else: bg = df.loc[df["background"].notnull(), "background"].tolist() in_ = [ str(ele) for ele in df.loc[df["intensity"].notnull(), "intensity"].tolist() ] background_string = "" for ele in zip(bg, in_): an, in_ = ele background_string += an + "\t" + in_ + "\n" return userinput.Userinput(pqo=pqo, foreground_string=fg, background_string=background_string, num_bins=num_bins, args_dict=args_dict)
def test_cleanupforanalysis_genome_Userinput_File(pqo_STRING, STRING_examples, args_dict): """ python/test_userinput.py::test_cleanupforanalysis_characterize_foreground_REST_API[edge case, empty DFs with NaNs] XPASS """ ENSPs, taxid = STRING_examples enrichment_method = "genome" args_dict_temp = args_dict.copy() args_dict_temp.update({ "enrichment_method": enrichment_method, "taxid": taxid }) werkzeug_fn = werkzeug.datastructures.FileStorage( stream=StringIO("foreground\n" + "\n".join(ENSPs))) werkzeug_fn.seek(0) ui = userinput.Userinput(pqo_STRING, fn=werkzeug_fn, args_dict=args_dict_temp) # no NaNs where ANs are expected foreground = ui.foreground[ui.col_foreground] assert sum(foreground.isnull()) == 0 assert sum(foreground.notnull()) > 0 # foreground assert isinstance(foreground.iloc[0], str) # no duplicates assert foreground.duplicated().any() == False foreground_n = ui.get_foreground_n() background_n = ui.get_background_n() assert background_n >= foreground_n
def test_cleanupforanalysis_genome_Userinput_copyNpaste( pqo_STRING, STRING_examples, args_dict): """ python/test_userinput.py::test_cleanupforanalysis_characterize_foreground_REST_API[edge case, empty DFs with NaNs] XPASS """ ENSPs, taxid = STRING_examples fg = format_for_REST_API(ENSPs) enrichment_method = "genome" args_dict_temp = args_dict.copy() args_dict_temp.update({ "enrichment_method": enrichment_method, "taxid": taxid }) ui = userinput.Userinput(pqo_STRING, foreground_string=fg, args_dict=args_dict_temp) # no NaNs where ANs are expected foreground = ui.foreground[ui.col_foreground] assert sum(foreground.isnull()) == 0 assert sum(foreground.notnull()) > 0 # foreground assert isinstance(foreground.iloc[0], str) # no duplicates assert foreground.duplicated().any() == False foreground_n = ui.get_foreground_n() background_n = ui.get_background_n() assert background_n >= foreground_n
def test_cleanupforanalysis_compare_groups_Userinput( pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict): enrichment_method = "compare_groups" foreground, background, _ = fixture_fg_bg_meth_expected_cases fg = "\n".join(foreground[foreground.notnull()].tolist()) bg = "\n".join(background.loc[background.background.notnull(), "background"].tolist()) args_dict_temp = args_dict.copy() args_dict_temp["enrichment_method"] = enrichment_method ui = userinput.Userinput(pqo_STRING, foreground_string=fg, background_string=bg, args_dict=args_dict_temp) # no NaNs where ANs are expected foreground = ui.foreground[ui.col_foreground] assert sum(foreground.isnull()) == 0 assert sum(foreground.notnull()) > 0 background = ui.background[ui.col_background] assert sum(background.isnull()) == 0 assert sum(background.notnull()) > 0 # foreground and background are strings assert isinstance(foreground.iloc[0], str) assert isinstance(background.iloc[0], str) # if there were duplicates in the original input they should still be preserved in the cleaned up DF # not equal because of splice variants # remove NaNs from df_orig foreground_df_orig = ui.df_orig[ui.col_foreground] background_df_orig = ui.df_orig[ui.col_background] assert foreground.duplicated().sum() >= foreground_df_orig[ foreground_df_orig.notnull()].duplicated().sum() assert background.duplicated().sum() >= background_df_orig[ background_df_orig.notnull()].duplicated().sum()
def test_cleanupforanalysis_compare_samples_Userinput( pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict): enrichment_method = "compare_samples" foreground, background, _ = fixture_fg_bg_meth_expected_cases fg = "\n".join(foreground[foreground.notnull()].tolist()) bg = "\n".join(background.loc[background.background.notnull(), "background"].tolist()) args_dict_temp = args_dict.copy() args_dict_temp["enrichment_method"] = enrichment_method ui = userinput.Userinput(pqo_STRING, foreground_string=fg, background_string=bg, args_dict=args_dict_temp) # no NaNs where ANs are expected foreground = ui.foreground[ui.col_foreground] assert sum(foreground.isnull()) == 0 assert sum(foreground.notnull()) > 0 background = ui.background[ui.col_background] assert sum(background.isnull()) == 0 assert sum(background.notnull()) > 0 # foreground and background are strings assert isinstance(foreground.iloc[0], str) assert isinstance(background.iloc[0], str) # no duplicates assert foreground.duplicated().any() == False assert background.duplicated().any() == False
def test_check_parse_and_fail_cleanup_0(foreground, background, enrichment_method, pqo): fg = "\n".join(foreground[foreground.notnull()].tolist()) bg = "\n".join(background.loc[background.background.notnull(), "background"].tolist()) ui = userinput.Userinput(pqo=pqo, foreground_string=fg, background_string=bg, num_bins=NUM_BINS, enrichment_method=enrichment_method) assert ui.check_parse == False assert ui.check_cleanup == False assert ui.check == False
def test_cleanupforanalysis_abundance_correction_Userinput( pqo_STRING, fixture_fg_bg_meth_expected_cases, args_dict): foreground, background, enrichment_method = fixture_fg_bg_meth_expected_cases if enrichment_method != "abundance_correction": # assert 1 == 1 return None fg = "\n".join(foreground[foreground.notnull()].tolist()) bg = background.loc[background.background.notnull(), "background"].tolist() in_ = [ str(ele) for ele in background.loc[background.intensity.notnull(), "intensity"].tolist() ] background_string = "" for ele in zip(bg, in_): an, in_ = ele background_string += an + "\t" + in_ + "\n" args_dict_temp = args_dict.copy() args_dict_temp["enrichment_method"] = enrichment_method ui = userinput.Userinput(pqo_STRING, foreground_string=fg, background_string=background_string, args_dict=args_dict_temp) # no NaNs where ANs are expected foreground = ui.foreground[ui.col_foreground] assert sum(foreground.isnull()) == 0 assert sum(foreground.notnull()) > 0 background = ui.background[ui.col_background] assert sum(background.isnull()) == 0 assert sum(background.notnull()) > 0 # every AN has an abundance val foreground_intensity = ui.foreground[ui.col_intensity] assert sum(foreground_intensity.isnull()) == 0 assert sum(foreground_intensity.notnull()) > 0 background_intensity = ui.background[ui.col_intensity] assert sum(background_intensity.isnull()) == 0 assert sum(background_intensity.notnull()) > 0 # foreground and background are strings and abundance values are floats assert isinstance(foreground.iloc[0], str) assert isinstance(background.iloc[0], str) assert isinstance(foreground_intensity.iloc[0], float) assert isinstance(background_intensity.iloc[0], float) # no duplicates assert foreground.duplicated().any() == False assert background.duplicated().any() == False # sorted abundance values assert non_decreasing(foreground_intensity.tolist()) == True assert non_decreasing(background_intensity.tolist()) == True
def test_check_parse_and_cleanup_copy_and_paste_2(foreground, background, enrichment_method, pqo_STRING, args_dict): fg = "\n".join(foreground[foreground.notnull()].tolist()) bg = "\n".join(background.loc[background.background.notnull(), "background"].tolist()) args_dict["enrichment_method"] = enrichment_method ui = userinput.Userinput(pqo=pqo_STRING, foreground_string=fg, background_string=bg, num_bins=NUM_BINS, args_dict=args_dict) assert ui.check_parse == True
def test_check_parse_with_copy_and_paste_1(foreground, background, enrichment_method, pqo_STRING, args_dict): fg = "\n".join(foreground[foreground.notnull()].tolist()) bg = background.loc[background.background.notnull(), "background"].tolist() in_ = [ str(ele) for ele in background.loc[background.intensity.notnull(), "intensity"].tolist() ] background_string = "" for ele in zip_longest(bg, in_, fillvalue=np.nan): an, in_ = ele background_string += an + "\t" + str(in_) + "\n" args_dict["enrichment_method"] = enrichment_method ui = userinput.Userinput(pqo=pqo_STRING, foreground_string=fg, background_string=background_string, num_bins=NUM_BINS, args_dict=args_dict) assert ui.check_parse == True assert ui.foreground.shape == (8, 2)
def get_ui_fn(pqo, fn, args_dict, num_bins=NUM_BINS): return userinput.Userinput(pqo=pqo, fn=fn, args_dict=args_dict, num_bins=num_bins)
def get_ui_fn(pqo, fn, enrichment_method, num_bins=NUM_BINS): return userinput.Userinput(pqo=pqo, fn=fn, enrichment_method=enrichment_method, num_bins=num_bins)
def test_check_parse_and_cleanup_FN_missing(pqo): fn_example_data = os.path.join(variables.EXAMPLE_FOLDER, "This_does_not_exist_and_therefore_can_not_be_parsed.txt") ui = userinput.Userinput(pqo=pqo, fn=fn_example_data, num_bins=NUM_BINS) assert ui.check_parse == False assert ui.check_cleanup == False assert ui.check == False
def test_check_parse_and_cleanup_FN_comma(pqo): fn_example_data = os.path.join(variables.EXAMPLE_FOLDER, "HeLa_Ubi_exampledata.txt") ui = userinput.Userinput(pqo=pqo, fn=fn_example_data, num_bins=NUM_BINS) assert ui.check_parse == True assert ui.check_cleanup == True assert ui.check == True