예제 #1
0
def test_count_terms_v3(random_foreground_background, pqo_STRING):
    """
    this test is for ratio.count_terms_v3,
    since it is testing for the presence of secondary IDs
    # goterm: 'GO:0007610' has secondary id 'GO:0044708'
    :param random_foreground_background:
    :param pqo_STRING:
    :return:
    """
    foreground, background, taxid = random_foreground_background
    etype_2_association_dict_foreground = pqo_STRING.get_association_dict_split_by_category(foreground)
    go_slim_or_basic = "basic"
    for entity_type in variables.entity_types_with_data_in_functions_table:
        obo_dag = run.pick_dag_from_entity_type_and_basic_or_slim(entity_type, go_slim_or_basic, pqo_STRING)
        assoc_dict = etype_2_association_dict_foreground[entity_type]
        for an in (AN for AN in set(foreground) if AN in assoc_dict):
            for association in assoc_dict[an]:
                association_id = obo_dag[association].id
                assert association_id == association

        association_2_count_dict_v2, association_2_ANs_dict_v2, ans_counter_v2 = ratio.count_terms_v2(set(background), assoc_dict, obo_dag)
        association_2_count_dict_v3, association_2_ANs_dict_v3, ans_counter_v3 = ratio.count_terms_v3(set(background), assoc_dict)
        assert association_2_count_dict_v2 == association_2_count_dict_v3
        assert association_2_ANs_dict_v2 == association_2_ANs_dict_v3
        assert ans_counter_v2 <= ans_counter_v3
예제 #2
0
def test_precomputed_associations_counts(pqo_STRING, TaxIDs):
    taxid = TaxIDs
    ENSPs_proteome = query.get_proteins_of_taxid(taxid)
    # A
    etype_2_association_2_count_dict_background, etype_2_association_2_ANs_dict_background, etype_2_background_n = query.get_association_2_count_ANs_background_split_by_entity(
        taxid)
    etype_2_association_dict = pqo_STRING.get_association_dict_split_by_category(
        set(ENSPs_proteome))
    for etype in variables.entity_types_with_data_in_functions_table:
        # B
        association_2_count_dict, association_2_ANs_dict, ans_counter = ratio.count_terms_v3(
            set(ENSPs_proteome), etype_2_association_dict[etype])
        assert association_2_count_dict == etype_2_association_2_count_dict_background[
            etype]
        assert association_2_ANs_dict == etype_2_association_2_ANs_dict_background[
            etype]
예제 #3
0
    def __init__(self,
                 pqo,
                 args_dict,
                 ui,
                 assoc_dict,
                 enrichment_method="genome",
                 entity_type="-51",
                 o_or_u_or_both="overrepresented",
                 multitest_method="benjamini_hochberg",
                 alpha=0.05,
                 association_2_count_dict_background=None,
                 background_n=None,
                 indent=False):
        self.pqo = pqo
        self.args_dict = args_dict
        self.ui = ui
        self.method = enrichment_method
        self.assoc_dict = assoc_dict
        # self.obo_dag = obo_dag
        self.alpha = alpha
        self.multitest_method = multitest_method
        self.results = []
        self.o_or_u_or_both = o_or_u_or_both
        self.entity_type = entity_type
        self.indent = indent  # prepend GO-terms with a "." for each level

        ### prepare run for everyone but "rank_enrichment"
        if self.method != "rank_enrichment":
            self.an_set_foreground = self.ui.get_foreground_an_set()
            self.association_2_count_dict_foreground, self.association_2_ANs_dict_foreground, self.foreground_n = ratio.count_terms_v3(
                self.an_set_foreground, self.assoc_dict)

        if self.method == "genome":
            self.run_genome(association_2_count_dict_background, background_n)
        elif self.method == "rank_enrichment":
            self.df = self.run_rank_enrichment()
        elif self.method == "abundance_correction":
            self.run_abundance_correction()
        elif self.method == "compare_samples":
            self.run_compare_samples()
        elif self.method == "compare_groups":
            self.run_compare_groups()
        elif self.method == "characterize_foreground":
            self.run_characterize_foreground()
        else:
            raise NotImplementedError
예제 #4
0
 def run_characterize_foreground(self):
     self.an_redundant_foreground = self.ui.get_an_redundant_foreground()
     self.association_2_count_dict_foreground, self.association_2_ANs_dict_foreground, unused_an_count = ratio.count_terms_v3(
         self.an_redundant_foreground, self.assoc_dict)
     self.df = self.characterize_foreground(
         self.association_2_count_dict_foreground, self.foreground_n)
예제 #5
0
 def run_compare_groups(self):
     self.foreground_n = self.ui.get_foreground_n()
     self.background_n = self.ui.get_background_n()
     self.an_redundant_foreground = self.ui.get_an_redundant_foreground()
     self.an_redundant_background = self.ui.get_an_redundant_background()
     self.association_2_count_dict_foreground, self.association_2_ANs_dict_foreground, unused_an_count = ratio.count_terms_v3(
         self.an_redundant_foreground, self.assoc_dict)
     self.association_2_count_dict_background, self.association_2_ANs_dict_background, unused_an_count = ratio.count_terms_v3(
         self.an_redundant_background, self.assoc_dict)
     self.df = self.run_study(self.association_2_count_dict_foreground,
                              self.association_2_count_dict_background,
                              self.foreground_n, self.background_n)
예제 #6
0
 def run_compare_samples(self):
     self.an_set_background = self.ui.get_background_an_set()
     self.association_2_count_dict_background, self.association_2_ANs_dict_background, self.background_n = ratio.count_terms_v3(
         self.an_set_background, self.assoc_dict)
     self.df = self.run_study(self.association_2_count_dict_foreground,
                              self.association_2_count_dict_background,
                              self.foreground_n, self.background_n)
예제 #7
0
def test_association_2_count_dict(pqo_STRING, random_foreground_background):
    foreground, background, taxid = random_foreground_background
    etype_2_association_dict_background = pqo_STRING.get_association_dict_split_by_category(
        background)
    etype_2_association_dict_foreground = pqo_STRING.get_association_dict_split_by_category(
        foreground)
    for etype in variables.entity_types_with_data_in_functions_table:
        association_2_count_dict_background, association_2_ANs_dict_background, ans_counter_background = ratio.count_terms_v3(
            set(background), etype_2_association_dict_background[etype])
        association_2_count_dict_foreground, association_2_ANs_dict_foreground, ans_counter_foreground = ratio.count_terms_v3(
            set(foreground), etype_2_association_dict_foreground[etype])

        dag = run.pick_dag_from_entity_type_and_basic_or_slim(
            etype, "basic", pqo_STRING)
        assoc_dict_foreground = etype_2_association_dict_foreground[etype]
        association_2_count_dict_foreground_v2, association_2_ANs_dict_foreground_v2, foreground_n_v2 = ratio.count_terms_v2(
            set(foreground), assoc_dict_foreground, dag)
        # count_terms_v3(ans_set, assoc_dict)
        # count_terms_v2(ans_set, assoc_dict, obo_dag)

        assoc_dict_background = etype_2_association_dict_background[etype]
        association_2_count_dict_background_v2, association_2_ANs_dict_background_v2, background_n_v2 = ratio.count_terms_v2(
            set(background), assoc_dict_background, dag)

        for association, foreground_count in association_2_count_dict_foreground.items(
        ):
            assert association in association_2_count_dict_background
            assert association in association_2_ANs_dict_background

            assert association in association_2_count_dict_foreground_v2
            assert association in association_2_ANs_dict_foreground_v2

            assert association in association_2_count_dict_background_v2
            assert association in association_2_ANs_dict_background_v2
예제 #8
0
def test_functional_association_consistency_of_DB(pqo_STRING):
    """
    all functional associations of given taxid and ensp from protein_2_function need be present in function_2_ensp
    since the ENSPs of the background don't matter for the p-value calculation, but only the lookup of association to number of ENSPs (counts)
    let's compare the precalculated counts to foreground counts of the proteome (the latter being generated from the protein_2_function_table rather to lookup ENSPs
    to also check for consistency between ENSPs of protein_2_function_table and taxid_2_protein_table
    """
    taxid_2_etype_2_association_2_count_dict_background = pqo_STRING.taxid_2_etype_2_association_2_count_dict_background
    for taxid in query.get_taxids():
        # grep ENSPs from protein_2_function table (instead of taxid_2_protein_table) --> use as foreground
        ensp_protein_2_function = {
            ele[0]
            for ele in query.get_results_of_statement(
                "SELECT protein_2_function.an FROM protein_2_function WHERE protein_2_function.an ~ '^{}\.'"
                .format(taxid))
        }
        etype_2_association_dict = pqo_STRING.get_association_dict_split_by_category(
            ensp_protein_2_function
        )  # etype_2_association_dict(key=entity_type(String), val=Dict(key=AN(String), val=SetOfFunctions(String)))
        # for etype in etype_2_association_dict.keys():
        for etype in variables.entity_types_with_data_in_functions_table:
            association_2_count_dict_background = taxid_2_etype_2_association_2_count_dict_background[
                taxid][etype]
            association_2_count_dict_foreground, association_2_ANs_dict_foreground, foreground_n = ratio.count_terms_v3(
                ans_set=ensp_protein_2_function,
                assoc_dict=etype_2_association_dict[etype])
            for goterm, ans_set in association_2_ANs_dict_foreground.items():
                assert association_2_count_dict_background[goterm] == len(
                    ans_set)