Example #1
0
def returnDirectories(sub_dir):
    dir_list = unique.returnDirectories(sub_dir)
    return dir_list
Example #2
0
def exportTables(metabolite_list):
    infer_enzyme_to_metabolite_pathway_data = 'no'

    current_species_dirs = unique.returnDirectories('/Databases')
    ### Save results to all species directories
    for species_code in current_species_dirs:
        print 'Exporting metabolite data for:', species_code
        gene_dir = 'Databases/' + species_code + '/gene/HMDB.txt'
        gene_data = export.ExportFile(gene_dir)
        hmdb_cas_dir = 'Databases/' + species_code + '/uid-gene/HMDB-CAS.txt'
        hmdb_cas_data = export.ExportFile(hmdb_cas_dir)
        hmdb_chebi_dir = 'Databases/' + species_code + '/uid-gene/HMDB-ChEBI.txt'
        hmdb_chebi_data = export.ExportFile(hmdb_chebi_dir)
        hmdb_pubchem_dir = 'Databases/' + species_code + '/uid-gene/HMDB-PubChem.txt'
        hmdb_pubchem_data = export.ExportFile(hmdb_pubchem_dir)
        hmdb_keggcomp_dir = 'Databases/' + species_code + '/uid-gene/HMDB-KeggCompound.txt'
        hmdb_keggcomp_data = export.ExportFile(hmdb_keggcomp_dir)
        hmdb_mapp_dir = 'Databases/' + species_code + '/gene-mapp/HMDB-MAPP.txt'
        hmdb_mapp_data = export.ExportFile(hmdb_mapp_dir)
        cas_denom_dir = 'Databases/' + species_code + '/gene-mapp/denominator/CAS.txt'
        cas_denom_data = export.ExportFile(cas_denom_dir)
        hmdb_go_dir = 'Databases/' + species_code + '/gene-go/HMDB-GeneOntology.txt'
        if infer_enzyme_to_metabolite_pathway_data == 'yes':
            hmdb_go_data = export.ExportFile(hmdb_go_dir)

        headers = [
            'hmdb_id', 'name', 'description', 'secondary_id', 'iupac',
            'cas_number', 'chebi_id', 'pubchem_compound_id', 'Pathways',
            'ProteinNames'
        ]
        headers = string.join(headers, '\t') + '\n'
        gene_data.write(headers)

        ### Attempt to add GO and pathway data from database based on associated protein IDs (simple translation from human)
        mod = 'Ensembl'
        try:
            gene_annotations = gene_associations.importGeneData(
                species_code, mod)
        except Exception:
            mod = 'EntrezGene'
            try:
                gene_annotations = gene_associations.importGeneData(
                    species_code, mod)
            except Exception:
                gene_annotations = {}
        symbol_associations = {}
        for geneid in gene_annotations:
            symbol_associations[
                gene_annotations[geneid].SymbolLower()] = geneid
        gotype = 'null'
        try:
            gene_to_go = gene_associations.importGeneGOData(
                species_code, mod, gotype)
        except Exception:
            gene_to_go = {}
        try:
            gene_to_mapp = gene_associations.importGeneMAPPData(
                species_code, mod)
        except Exception:
            gene_to_mapp = {}

        for ed in metabolite_list:
            values = [
                ed.HMDB(),
                ed.Name(),
                ed.Description(),
                ed.SecondaryIDs(),
                ed.IUPAC(),
                ed.CAS(),
                ed.CheBI(),
                ed.PubChem(),
                ed.PathwaysStr(),
                ed.ProteinNamesStr()
            ]
            values = string.join(values, '\t') + '\n'
            gene_data.write(values)
            if len(ed.Pathways()) > 1:
                for pathway in ed.Pathways():
                    values = [ed.HMDB(), 'Ch', pathway]
                    values = string.join(values, '\t') + '\n'
                    hmdb_mapp_data.write(values)

            if len(ed.CAS()) > 0:
                values = [ed.HMDB(), ed.CAS()]
                values = string.join(values, '\t') + '\n'
                hmdb_cas_data.write(values)
                values = [ed.CAS(), 'Ca']
                values = string.join(values, '\t') + '\n'
                cas_denom_data.write(values)
            if len(ed.CheBI()) > 0:
                values = [ed.HMDB(), ed.CheBI()]
                values = string.join(values, '\t') + '\n'
                hmdb_chebi_data.write(values)
            if len(ed.PubChem()) > 0:
                values = [ed.HMDB(), ed.PubChem()]
                values = string.join(values, '\t') + '\n'
                hmdb_pubchem_data.write(values)
            if len(ed.KEGGCompoundID()) > 0:
                values = [ed.HMDB(), ed.KEGGCompoundID()]
                values = string.join(values, '\t') + '\n'
                hmdb_keggcomp_data.write(values)
            temp_go = {}
            temp_mapp = {}

            if infer_enzyme_to_metabolite_pathway_data == 'yes':
                ### If associated enzyme annotated, use the gene symbol to find GO terms associated with the gene symbol for the metabolite
                ### Not sure if this is a bad idea or not
                for protein_name in ed.ProteinNames():
                    protein_name = string.lower(protein_name)
                    if protein_name in symbol_associations:
                        geneid = symbol_associations[protein_name]
                        if geneid in gene_to_go:
                            for goid in gene_to_go[geneid]:
                                temp_go[goid] = []
                        if geneid in gene_to_mapp:
                            for mapp in gene_to_mapp[geneid]:
                                temp_mapp[mapp] = []
                for goid in temp_go:
                    values = [ed.HMDB(), 'GO:' + goid]
                    values = string.join(values, '\t') + '\n'
                    hmdb_go_data.write(values)
                for mapp in temp_mapp:
                    values = [ed.HMDB(), 'Ch', mapp]
                    values = string.join(values, '\t') + '\n'
                    hmdb_mapp_data.write(values)

        gene_data.close()
        hmdb_mapp_data.close()
        hmdb_cas_data.close()
        hmdb_chebi_data.close()
        hmdb_pubchem_data.close()
        if infer_enzyme_to_metabolite_pathway_data == 'yes':
            hmdb_go_data.close()
        print 'File:', gene_dir, 'exported.'
def returnDirectories(sub_dir):
    dir_list = unique.returnDirectories(sub_dir)
    return dir_list