Exemple #1
0
def test_ENSP_consistency_of_DB():
    """
    - ENSPs of taxid_2_protein_table are the superset of ENSPs of protein_2_function_table

    foreground with functional association also has to be in the precomputed background
    TaxID_2_Protein_table_STRING: ENSPs expected to be the superset of Protein_2_Function_table_STRING
    Protein_2_Function_table_STRING
    Function_2_ENSP_table_STRING
    """
    for taxid in query.get_taxids():
        ensp_taxid_2_protein = set(query.get_proteins_of_taxid(taxid))
        ensp_protein_2_function = {
            ele[0]
            for ele in query.get_results_of_statement(
                "SELECT protein_2_function.an FROM protein_2_function WHERE protein_2_function.an ~ '^{}\.'"
                .format(taxid))
        }
        # ensp_function_2_ensp = None
        len_ensp_taxid_2_protein = len(ensp_taxid_2_protein)
        len_ensp_protein_2_function = len(ensp_protein_2_function)
        assert len_ensp_taxid_2_protein >= len_ensp_protein_2_function
        assert len(ensp_taxid_2_protein.intersection(
            ensp_protein_2_function)) == len_ensp_protein_2_function
        assert len(ensp_taxid_2_protein.union(
            ensp_protein_2_function)) == len_ensp_taxid_2_protein
Exemple #2
0
def random_foreground_background(
):  # used TaxIDs fixture previously, but now it is random on TaxID level as well
    for _ in range(10):
        taxid = random.choice(query.get_taxids())  # read_from_flat_files=True
        background = query.get_proteins_of_taxid(taxid)
        foreground = random.sample(background, 200)
        return foreground, background, taxid
Exemple #3
0
def test_functional_association_consistency_of_DB(pqo_STRING):
    """
    all functional associations of given taxid and ensp from protein_2_function need be present in function_2_ensp
    since the ENSPs of the background don't matter for the p-value calculation, but only the lookup of association to number of ENSPs (counts)
    let's compare the precalculated counts to foreground counts of the proteome (the latter being generated from the protein_2_function_table rather to lookup ENSPs
    to also check for consistency between ENSPs of protein_2_function_table and taxid_2_protein_table
    """
    taxid_2_etype_2_association_2_count_dict_background = pqo_STRING.taxid_2_etype_2_association_2_count_dict_background
    for taxid in query.get_taxids():
        # grep ENSPs from protein_2_function table (instead of taxid_2_protein_table) --> use as foreground
        ensp_protein_2_function = {
            ele[0]
            for ele in query.get_results_of_statement(
                "SELECT protein_2_function.an FROM protein_2_function WHERE protein_2_function.an ~ '^{}\.'"
                .format(taxid))
        }
        etype_2_association_dict = pqo_STRING.get_association_dict_split_by_category(
            ensp_protein_2_function
        )  # etype_2_association_dict(key=entity_type(String), val=Dict(key=AN(String), val=SetOfFunctions(String)))
        # for etype in etype_2_association_dict.keys():
        for etype in variables.entity_types_with_data_in_functions_table:
            association_2_count_dict_background = taxid_2_etype_2_association_2_count_dict_background[
                taxid][etype]
            association_2_count_dict_foreground, association_2_ANs_dict_foreground, foreground_n = ratio.count_terms_v3(
                ans_set=ensp_protein_2_function,
                assoc_dict=etype_2_association_dict[etype])
            for goterm, ans_set in association_2_ANs_dict_foreground.items():
                assert association_2_count_dict_background[goterm] == len(
                    ans_set)
Exemple #4
0
def random_abundance_correction_foreground_background():
    for _ in range(10):
        taxid = random.choice(query.get_taxids())  # read_from_flat_files=True
        background = query.get_proteins_of_taxid(taxid)
        foreground = random.sample(background, 200)
        intensity = [
            str(ele) for ele in np.random.normal(size=len(background))
        ]
        return foreground, background, intensity, taxid