コード例 #1
0
    def testMain(self):
        sql = writeToSQL()
        hmdb = hmdbData()
        hmdb.getDatabaseFiles()
        idconvert = IDconversion()
        stat = getStatistics()
        wiki = wikipathwaysData()
        react = reactomeData()
        kegg = KeggData()
        print('Running overlap plot test case...')

        hmdb.getGenes()
        hmdb.getPathwaysLinkedToGene()

        wiki.getEverything()
        wiki.getCommonNameForChebi()
        react.getGenes()
        react.getCommonNameFromUniprot()
        kegg.getPathways()
        kegg.getMetabolites()
        kegg.getGenes()
        kegg.getGeneInfo()
        kegg.getPathwayLinkedToGene()
        #idconvert.GeneConvert(hmdb.geneInfoDictionary, "hmdb")

        hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0)
        keggnum = sql.createRampGeneID(kegg.geneInfoDictionary, 'kegg',
                                       hmdbgenenum)
        wikinum = sql.createRampGeneID(wiki.geneInfoDictionary, 'wiki',
                                       keggnum)
        reactnum = sql.createRampGeneID(react.geneInfoDictionary, 'reactome',
                                        wikinum)

        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, 'Gene')
コード例 #2
0
    def testMain(self):
        kegg = KeggData()

        sql = writeToSQL()
        idconvert = IDconversion()
        stat = getStatistics()
        # get database file
        kegg.getDatabaseFiles()

        print('get pathways')
        kegg.getPathways()
        kegg.getPathways_with_genes()
        print(len(kegg.pathwayDictionary))
        print('get metabolites')
        kegg.getMetabolites()

        print('get synonyms and chebi')
        kegg.getSynonymsAndCHEBI()
        print(len(kegg.metaboliteIDDictionary))
        print(kegg.metaboliteIDDictionary["C00002"])
        print(kegg.metaboliteIDDictionary["C00001"])

        print('get genes')
        kegg.getGenes()
        print(len(kegg.geneInfoDictionary))
        kegg.getGeneInfo()
        kegg.getPathwayLinkedToGene()

        kegg.write_myself_files(database='kegg')
        idconvert.GeneConvert(kegg.geneInfoDictionary, "kegg")

        # Check duplicates
        kegg.write_myself_files('kegg')
        keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary,
                                                   "kegg", 0)
        kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", 0)

        keggnumbers = sql.write(
            kegg.metaboliteCommonName, kegg.pathwayDictionary,
            kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary,
            kegg.metabolitesWithSynonymsDictionary,
            kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary,
            kegg.metabolitesLinkedToGenes, kegg.geneInfoDictionary,
            kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation,
            kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary,
            kegg.exoEndo, kegg.tissueLocation, kegg.tissue, "kegg", 0, 0)
        print('metaboliteIDdict number is ' +
              str(len(kegg.metaboliteIDDictionary)))
        print('GeneInfo number is ' + str(len(kegg.geneInfoDictionary)))
        print('PathwayDict number is ' + str(len(kegg.pathwayDictionary)))
        print('MetabolitesWithPath is ' +
              str(len(kegg.metabolitesWithPathwaysDictionary)))

        print("Compound:")
        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases,
                             sql.rampCompoundIDdictionary, "Compound")
        print("\n")
        print("Gene:")
        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, "Gene")
コード例 #3
0
    def testMain(self):

        sql = writeToSQL()
        idconvert = IDconversion()
        stat = getStatistics()

        wikipathways = wikipathwaysData()
        wikipathways.getDatabaseFiles()

        wikipathways.getEverything()
        print(wikipathways.setOfType)
        wikipathways.getCommonNameForChebi()

        idconvert.GeneConvert(wikipathways.geneInfoDictionary, "wiki")
        sql.checkForWithinDatabaseDuplicatesCompound(
            wikipathways.metaboliteIDDictionary, "wiki")
        sql.checkForWithinDatabaseDuplicatesGene(
            wikipathways.geneInfoDictionary, "wiki")
        wikicompoundnum = sql.createRampCompoundID(
            wikipathways.metaboliteIDDictionary, "wiki", 0)
        wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary,
                                           "wiki", 0)

        print("Write to file...")
        wikipathwaysnumbers = sql.write(
            wikipathways.metaboliteCommonName, wikipathways.pathwayDictionary,
            wikipathways.pathwayCategory,
            wikipathways.metabolitesWithPathwaysDictionary,
            wikipathways.metabolitesWithSynonymsDictionary,
            wikipathways.metaboliteIDDictionary,
            wikipathways.pathwaysWithGenesDictionary,
            wikipathways.metabolitesLinkedToGenes,
            wikipathways.geneInfoDictionary, wikipathways.biofluidLocation,
            wikipathways.biofluid, wikipathways.cellularLocation,
            wikipathways.cellular, wikipathways.pathwayOntology,
            wikipathways.exoEndoDictionary, wikipathways.exoEndo,
            wikipathways.tissueLocation, wikipathways.tissue, "wiki", 0, 0)

        print("Pathways number is " + str(len(wikipathways.pathwayDictionary)))
        print("metabolites number is " +
              str(len(wikipathways.metaboliteIDDictionary)))
        print('genes number is ' + str(len(wikipathways.geneInfoDictionary)))

        print("Compound:")
        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases,
                             sql.rampCompoundIDdictionary, "Compound")
        print("\n")
        print("Gene:")
        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, "Gene")

        print("Compound:")
        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases,
                             sql.rampCompoundIDdictionary, "Compound")
        print("\n")
        print("Gene:")
        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, "Gene")
コード例 #4
0
    def testMain(self):

        sql = writeToSQL()
        idconvert = IDconversion()
        stat = getStatistics()

        wikipathways = wikipathwaysData()
        wikipathways.getEverything()

        r1 = random.choice(list(wikipathways.geneInfoDictionary.keys()))
        r2 = random.choice(list(wikipathways.geneInfoDictionary.keys()))
        r3 = random.choice(list(wikipathways.geneInfoDictionary.keys()))
        print(wikipathways.geneInfoDictionary[r1])
        print(wikipathways.geneInfoDictionary[r2])
        print(wikipathways.geneInfoDictionary[r3])
        print(wikipathways.geneInfoDictionary['ENSG00000139977'])
        #print(wikipathways.geneInfoDictionary["path:hsa04530"])
        time.sleep(3)
        #idconvert.GeneConvert(wikipathways.geneInfoDictionary, "wiki")
        wikipathways.write_myself_files('wiki')
        wikicompoundnum = sql.createRampCompoundID(
            wikipathways.metaboliteIDDictionary, "wiki", 0)
        wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary,
                                           "wiki", 0)

        print("Write to file...")
        wikipathwaysnumbers = sql.write(
            wikipathways.metaboliteCommonName, wikipathways.pathwayDictionary,
            wikipathways.pathwayCategory,
            wikipathways.metabolitesWithPathwaysDictionary,
            wikipathways.metabolitesWithSynonymsDictionary,
            wikipathways.metaboliteIDDictionary,
            wikipathways.pathwaysWithGenesDictionary,
            wikipathways.metabolitesLinkedToGenes,
            wikipathways.geneInfoDictionary, wikipathways.biofluidLocation,
            wikipathways.biofluid, wikipathways.cellularLocation,
            wikipathways.cellular, wikipathways.pathwayOntology,
            wikipathways.exoEndoDictionary, wikipathways.exoEndo,
            wikipathways.tissueLocation, wikipathways.tissue, "wiki", 0, 0)

        print("Compound:")
        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases,
                             sql.rampCompoundIDdictionary, "Compound")
        print("\n")
        print("Gene:")
        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, "Gene")
コード例 #5
0
 def testMain(self):
     
     wiki = wikipathwaysData()
     reactome = reactomeData()
     kegg = KeggData()
     sql = writeToSQL()
     stat = getStatistics()
     
     wiki.pathwaysWithGenesDictionary["WP254"] = ["geneA", "geneB"]
     reactome.pathwaysWithGenesDictionary["R-HSA-109581"] = ["geneA", "geneD"]
     kegg.pathwaysWithGenesDictionary["04210"] = ["geneC", "geneB"]
     
     sql.rampGeneIDdictionary["geneA"] = "RAMP00001"
     sql.rampGeneIDdictionary["geneB"] = "RAMP00002"
     sql.rampGeneIDdictionary["geneC"] = "RAMP00003"
     sql.rampGeneIDdictionary["geneD"] = "RAMP00001"
     
     stat.Apoptosis(sql.rampGeneIDdictionary, wiki.pathwaysWithGenesDictionary, kegg.pathwaysWithGenesDictionary, reactome.pathwaysWithGenesDictionary)
     
     
     
コード例 #6
0
    def testMain(self):
        kegg = KeggData()

        sql = writeToSQL()
        idconvert = IDconversion()
        stat = getStatistics()
        # get database file
        kegg.getDatabaseFiles()
        #kegg.getDatabaseFiles2()
        print('get pathways')
        kegg.getPathways()
        kegg.getPathways_with_genes()
        print(len(kegg.pathwayDictionary))
        print('get metabolites')
        kegg.getMetabolites()

        print('get synonyms and chebi')
        kegg.getSynonymsAndCHEBI()
        print(len(kegg.metaboliteIDDictionary))
        print(kegg.metaboliteIDDictionary["C00002"])
        print(kegg.metaboliteIDDictionary["C00001"])
        '''
        file = open("../misc/output/keggMetabolitesID.txt","wb")
        for key in kegg.metaboliteIDDictionary:
            file.write(key.encode("utf-8") +b"\n")
        file.close()
        '''
        print('get genes')
        kegg.getGenes()
        print(len(kegg.geneInfoDictionary))
        kegg.getGeneInfo()
        kegg.getPathwayLinkedToGene()
        file = open("../misc/output/keggGenesID.txt", "wb")
        for key in kegg.geneInfoDictionary:
            file.write(key.encode("utf-8") + b"\n")
        file.close()

        idconvert.GeneConvert(kegg.geneInfoDictionary, "kegg")
        keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary,
                                                   "kegg", 0)
        kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", 0)

        # Check duplicates
        sql.checkForWithinDatabaseDuplicatesCompound(
            kegg.metaboliteIDDictionary, "kegg")
        sql.checkForWithinDatabaseDuplicatesGene(kegg.geneInfoDictionary,
                                                 "kegg")
        # create ramp id
        keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary,
                                                   "kegg", 0)
        kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", 0)

        keggnumbers = sql.write(
            kegg.metaboliteCommonName, kegg.pathwayDictionary,
            kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary,
            kegg.metabolitesWithSynonymsDictionary,
            kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary,
            kegg.metabolitesLinkedToGenes, kegg.geneInfoDictionary,
            kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation,
            kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary,
            kegg.exoEndo, kegg.tissueLocation, kegg.tissue, "kegg", 0, 0)
        print('metaboliteIDdict number is ' +
              str(len(kegg.metaboliteIDDictionary)))
        print('GeneInfo number is ' + str(len(kegg.geneInfoDictionary)))
        print('PathwayDict number is ' + str(len(kegg.pathwayDictionary)))
        print('MetabolitesWithPath is ' +
              str(len(kegg.metabolitesWithPathwaysDictionary)))

        print("Compound:")
        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases,
                             sql.rampCompoundIDdictionary, "Compound")
        print("\n")
        print("Gene:")
        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, "Gene")
コード例 #7
0
ファイル: main.py プロジェクト: Mathelab/RaMP-BackEnd2
    def runEverything(self, getDatabaseFiles=False):
        sql = writeToSQL()
        idconvert = IDconversion()
        stat = getStatistics()
        hmdb = hmdbData()
        wikipathways = wikipathwaysData()
        reactome = reactomeData()
        kegg = KeggData()
        print(os.getcwd())
        #pulls needed files from each database if true. Otherwise, assumes files already present. Default false.
        if getDatabaseFiles:
            kegg.getDatabaseFiles()
            wikipathways.getDatabaseFiles()
            reactome.getDatabaseFiles()
            hmdb.getDatabaseFiles()

        print("Getting HMDB Metabolites...")
        hmdb.getMetaboliteOtherIDs()
        print("Getting HMDB pathways and synonyms...")
        hmdb.getPathwaysandSynonyms()
        print("Getting HMDB genes...")
        hmdb.getGenes()
        print("Getting HMDB biofluid and cellular locations...")
        hmdb.getBiofluidCellularLocationDisease()
        print("Getting HMDB pathways links to genes ...")
        hmdb.getPathwaysLinkedToGene()

        print("Getting wikipathways...")
        wikipathways.getEverything()
        wikipathways.getCommonNameForChebi()

        print("Getting reactome genes...")
        reactome.getGenes()
        print("Getting reactome metabolites...")
        reactome.getMetabolites()
        reactome.getCommonNameForChebi()
        reactome.getCommonNameForGenes()
        print("Getting kegg pathways...")
        kegg.getPathways()
        print("Getting kegg genes and metabolites...")
        kegg.getMetabolites()
        kegg.getSynonymsAndCHEBI()
        kegg.getGenes()
        kegg.getGeneInfo()

        print("Converting gene ids...")
        #Here are the identifiers that are present for each gene:
        #kegg: keggid (mainID), 'Ensembl', 'HGNC', 'HPRD', 'NCBI-GeneID', 'NCBI-ProteinID', 'OMIM', 'UniProt', 'Vega', 'miRBase'
        #wikipathways: (no mainID), 'Entrez', 'Enzyme Nomenclature', 'Uniprot (Uniprot-TrEMBL)
        #hmdb: HMDB-protien-accession (mainID), 'Uniprot'
        #reactome:Uniprot (mainID)
        idconvert.GeneConvert(wikipathways.geneInfoDictionary, "wikipathways")
        idconvert.GeneConvert(hmdb.geneInfoDictionary, "hmdb")
        idconvert.GeneConvert(reactome.geneInfoDictionary, "reactome")
        idconvert.GeneConvert(kegg.geneInfoDictionary, "kegg")

        idconvert.GeneUniprotToHMDBP(wikipathways.geneInfoDictionary,
                                     hmdb.geneInfoDictionary, "wikipathways")
        idconvert.GeneUniprotToHMDBP(reactome.geneInfoDictionary,
                                     hmdb.geneInfoDictionary, "reactome")
        idconvert.GeneUniprotToHMDBP(kegg.geneInfoDictionary,
                                     hmdb.geneInfoDictionary, "kegg")

        print("Converting metabolite ids...")
        idconvert.MetaboliteKeggIDToChebi(kegg.metaboliteIDDictionary,
                                          hmdb.metaboliteIDDictionary, "hmdb")
        idconvert.MetaboliteChebiToHMDB(wikipathways.metaboliteIDDictionary,
                                        hmdb.metaboliteIDDictionary,
                                        "wikipathways")
        idconvert.MetaboliteChebiToHMDB(reactome.metaboliteIDDictionary,
                                        hmdb.metaboliteIDDictionary,
                                        "reactome")
        idconvert.MetaboliteChebiToHMDB(kegg.metaboliteIDDictionary,
                                        hmdb.metaboliteIDDictionary, "kegg")

        #check for dups
        print("Wikipathways compounds...")
        sql.checkForWithinDatabaseDuplicatesCompound(
            wikipathways.metaboliteIDDictionary, "wikipathways")
        print("Wikipathways genes...")
        sql.checkForWithinDatabaseDuplicatesGene(
            wikipathways.geneInfoDictionary, "wikipathways")
        print("Kegg compounds...")
        sql.checkForWithinDatabaseDuplicatesCompound(
            kegg.metaboliteIDDictionary, "kegg")
        print("kegg genes...")
        sql.checkForWithinDatabaseDuplicatesGene(kegg.geneInfoDictionary,
                                                 "kegg")
        print("reactome compounds...")
        sql.checkForWithinDatabaseDuplicatesCompound(
            reactome.metaboliteIDDictionary, "reactome")
        print("reactome genes...")
        sql.checkForWithinDatabaseDuplicatesGene(reactome.geneInfoDictionary,
                                                 "reactome")
        print("hmdb compounds...")
        sql.checkForWithinDatabaseDuplicatesCompound(
            hmdb.metaboliteIDDictionary, "hmdb")
        print("hmdb genes...")
        sql.checkForWithinDatabaseDuplicatesGene(hmdb.geneInfoDictionary,
                                                 "hmdb")

        print('Generate compound id')
        hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary,
                                                   "hmdb", 0)
        wikicompoundnum = sql.createRampCompoundID(
            wikipathways.metaboliteIDDictionary, "wiki", hmdbcompoundnum)
        reactomecompoundnum = sql.createRampCompoundID(
            reactome.metaboliteIDDictionary, "reactome", wikicompoundnum)
        keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary,
                                                   "kegg", reactomecompoundnum)
        print('Generate gene id ...')
        hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0)
        wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary,
                                           "wiki", hmdbgenenum)
        reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary,
                                               "reactome", wikigenenum)
        kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg",
                                           reactomegenenum)
        print('Write to sql file...')
        hmdbnumbers = sql.write(
            hmdb.metaboliteCommonName, hmdb.pathwayDictionary,
            hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary,
            hmdb.metabolitesWithSynonymsDictionary,
            hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary,
            hmdb.metabolitesLinkedToGenes, hmdb.geneInfoDictionary,
            hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation,
            hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary,
            hmdb.exoEndo, hmdb.tissueLocation, hmdb.tissue, "hmdb", 0, 0)

        wikipathwaysnumbers = sql.write(
            wikipathways.metaboliteCommonName, wikipathways.pathwayDictionary,
            wikipathways.pathwayCategory,
            wikipathways.metabolitesWithPathwaysDictionary,
            wikipathways.metabolitesWithSynonymsDictionary,
            wikipathways.metaboliteIDDictionary,
            wikipathways.pathwaysWithGenesDictionary,
            wikipathways.metabolitesLinkedToGenes,
            wikipathways.geneInfoDictionary, wikipathways.biofluidLocation,
            wikipathways.biofluid, wikipathways.cellularLocation,
            wikipathways.cellular, wikipathways.pathwayOntology,
            wikipathways.exoEndoDictionary, wikipathways.exoEndo,
            wikipathways.tissueLocation, wikipathways.tissue, "wiki",
            hmdbnumbers[0], hmdbnumbers[1])

        reactomenumbers = sql.write(
            reactome.metaboliteCommonName, reactome.pathwayDictionary,
            reactome.pathwayCategory,
            reactome.metabolitesWithPathwaysDictionary,
            reactome.metabolitesWithSynonymsDictionary,
            reactome.metaboliteIDDictionary,
            reactome.pathwaysWithGenesDictionary,
            reactome.metabolitesLinkedToGenes, reactome.geneInfoDictionary,
            reactome.biofluidLocation, reactome.biofluid,
            reactome.cellularLocation, reactome.cellular,
            reactome.pathwayOntology, reactome.exoEndoDictionary,
            reactome.exoEndo, reactome.tissueLocation, reactome.tissue,
            "reactome", wikipathwaysnumbers[0], wikipathwaysnumbers[1])

        keggnumbers = sql.write(
            kegg.metaboliteCommonName, kegg.pathwayDictionary,
            kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary,
            kegg.metabolitesWithSynonymsDictionary,
            kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary,
            kegg.metabolitesLinkedToGenes, kegg.geneInfoDictionary,
            kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation,
            kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary,
            kegg.exoEndo, kegg.tissueLocation, kegg.tissue, "kegg",
            reactomenumbers[0], reactomenumbers[1])
        print("Done ... for importing database")

        print("Compound:")
        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases,
                             sql.rampCompoundIDdictionary, "Compound")
        print("\n")
        print("Gene:")
        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, "Gene")

        stat.databaseContent(hmdb.pathwayDictionary, hmdb.pathwayCategory,
                             hmdb.metabolitesWithPathwaysDictionary,
                             hmdb.metabolitesWithSynonymsDictionary,
                             hmdb.metaboliteIDDictionary,
                             hmdb.pathwaysWithGenesDictionary,
                             hmdb.geneInfoDictionary, hmdb.biofluidLocation,
                             hmdb.biofluid, hmdb.cellularLocation,
                             hmdb.cellular, hmdb.pathwayOntology,
                             hmdb.exoEndoDictionary, "hmdb")

        stat.databaseContent(kegg.pathwayDictionary, kegg.pathwayCategory,
                             kegg.metabolitesWithPathwaysDictionary,
                             kegg.metabolitesWithSynonymsDictionary,
                             kegg.metaboliteIDDictionary,
                             kegg.pathwaysWithGenesDictionary,
                             kegg.geneInfoDictionary, kegg.biofluidLocation,
                             kegg.biofluid, kegg.cellularLocation,
                             kegg.cellular, kegg.pathwayOntology,
                             kegg.exoEndoDictionary, "kegg")

        stat.databaseContent(
            reactome.pathwayDictionary, reactome.pathwayCategory,
            reactome.metabolitesWithPathwaysDictionary,
            reactome.metabolitesWithSynonymsDictionary,
            reactome.metaboliteIDDictionary,
            reactome.pathwaysWithGenesDictionary, reactome.geneInfoDictionary,
            reactome.biofluidLocation, reactome.biofluid,
            reactome.cellularLocation, reactome.cellular,
            reactome.pathwayOntology, reactome.exoEndoDictionary, "reactome")

        stat.databaseContent(
            wikipathways.pathwayDictionary, wikipathways.pathwayCategory,
            wikipathways.metabolitesWithPathwaysDictionary,
            wikipathways.metabolitesWithSynonymsDictionary,
            wikipathways.metaboliteIDDictionary,
            wikipathways.pathwaysWithGenesDictionary,
            wikipathways.geneInfoDictionary, wikipathways.biofluidLocation,
            wikipathways.biofluid, wikipathways.cellularLocation,
            wikipathways.cellular, wikipathways.pathwayOntology,
            wikipathways.exoEndoDictionary, "wiki")

        stat.Apoptosis(sql.rampGeneIDdictionary,
                       wikipathways.pathwaysWithGenesDictionary,
                       kegg.pathwaysWithGenesDictionary,
                       reactome.pathwaysWithGenesDictionary)
コード例 #8
0
    def testKeggToHMDB(self):

        ###############################################################################################
        #IMPORTANT PART START

        hmdb = hmdbData()
        kegg = KeggData()
        sql = writeToSQL()
        idconvert = IDconversion()
        stat = getStatistics()

        #metabolite mapping for hmdb
        hmdb.metaboliteIDDictionary["HMDB00001"] = {
            "chebi_id": "NA",
            "drugbank_id": "NA",
            "drugbank_metabolite_id": "NA",
            "phenol_explorer_compound_id": "NA",
            "phenol_explorer_metabolite_id": "NA",
            "foodb_id": "FDB012119",
            "knapsack_id": "NA",
            "chemspider_id": "83153",
            "kegg_id": "C14814",
            "biocyc_id": "CPD-1823",
            "bigg_id": "NA",
            "wikipidia": "NA",
            "nugowiki": "NA",
            "metagene": "NA",
            "metlin_id": "3741",
            "pubchem_compound_id": "92105",
            "het_id": "HIC",
            "hmdb_id": ["HMDB00001"],
            "CAS": "NA"
        }

        #metabolite mapping for kegg
        kegg.metaboliteIDDictionary["C14814"] = {
            "chebi_id": ["34131"],
            "drugbank_id": "NA",
            "drugbank_metabolite_id": "NA",
            "phenol_explorer_compound_id": "NA",
            "phenol_explorer_metabolite_id": "NA",
            "foodb_id": "NA",
            "knapsack_id": "NA",
            "chemspider_id": "NA",
            "kegg_id": "C14814",
            "biocyc_id": "NA",
            "bigg_id": "NA",
            "wikipidia": "NA",
            "nugowiki": "NA",
            "metagene": "NA",
            "metlin_id": "NA",
            "pubchem_compound_id": "NA",
            "het_id": "NA",
            "hmdb_id": "NA",
            "CAS": "NA"
        }

        idconvert.MetaboliteKeggIDToChebi(kegg.metaboliteIDDictionary,
                                          hmdb.metaboliteIDDictionary, "hmdb")
        idconvert.MetaboliteChebiToHMDB(kegg.metaboliteIDDictionary,
                                        hmdb.metaboliteIDDictionary, "kegg")

        #IMPORTANT PART END
        ######################################################################################################

        #Pathway names
        kegg.pathwayDictionary["00010"] = "Glycolysis / Gluconeogenesis"
        kegg.pathwayDictionary["00020"] = "Citrate cycle (TCA cycle)"
        kegg.pathwayDictionary["00520"] = "Fake Pathway Name One"
        kegg.pathwayDictionary["00524"] = "Fake Pathway Name Two"
        kegg.pathwayDictionary["00540"] = "Fake Pathway Name Three"
        kegg.pathwayDictionary["00550"] = "Fake Pathway Name Four"
        kegg.pathwayDictionary["00030"] = "Fake Pathway Name Five"
        kegg.pathwayDictionary["00040"] = "Fake Pathway Name Six"
        kegg.pathwayDictionary["00053"] = "Fake Pathway Name Seven"
        kegg.pathwayDictionary["00250"] = "Fake Pathway Name Eight"
        kegg.pathwayDictionary["00260"] = "Fake Pathway Name Nine"

        #Pathway categories
        kegg.pathwayCategory["00010"] = "Metabolism"
        kegg.pathwayCategory["00020"] = "Human Diseases"
        kegg.pathwayCategory["00520"] = "Cellular Processes"
        kegg.pathwayCategory["00524"] = "Human Diseases"
        kegg.pathwayCategory["00540"] = "Human Diseases"
        kegg.pathwayCategory["00550"] = "Metabolism"
        kegg.pathwayCategory["00030"] = "Human Diseases"
        kegg.pathwayCategory["00040"] = "Cellular Processes"
        kegg.pathwayCategory["00053"] = "Metabolism"
        kegg.pathwayCategory["00250"] = "Human Diseases"
        kegg.pathwayCategory["00260"] = "Cellular Processes"

        #metabolites linked with pathways
        kegg.metabolitesWithPathwaysDictionary["C14814"] = ["00020"]

        #metabolites linkes with synonyms
        kegg.metabolitesWithSynonymsDictionary["C14814"] = [
            "MetaboliteSynonym1"
        ]

        #pathway to gene id
        kegg.pathwaysWithGenesDictionary["00010"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00020"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00520"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00524"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00540"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00550"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00030"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00040"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00053"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00250"] = ["geneA", "geneB"]
        kegg.pathwaysWithGenesDictionary["00260"] = ["geneA", "geneB"]

        #gene to geneinfo
        kegg.geneInfoDictionary["geneA"] = {
            'common_name': 'Apple',
            'kegg': 'NA',
            'Ensembl': 'ENSG00000127481',
            'HGNC': '30313',
            'HPRD': 'NA',
            'NCBI-GeneID': '23352',
            'NCBI-ProteinID': 'NP_065816',
            'OMIM': '609890',
            'UniProt': 'Q5T4S7',
            'Vega': 'OTTHUMG00000002498',
            'miRBase': 'NA',
            'HMDB_protien_accession': 'NA',
            'Entrez': 'NA',
            'Enzyme Nomenclature': 'NA'
        }
        kegg.geneInfoDictionary["geneB"] = {
            'common_name': 'Banana',
            'kegg': 'NA',
            'Ensembl': 'ENSG00000100320',
            'HGNC': '9906',
            'HPRD': 'NA',
            'NCBI-GeneID': '23543',
            'NCBI-ProteinID': 'NP_065816',
            'OMIM': '612149',
            'UniProt': 'O43251',
            'Vega': 'OTTHUMG00000150585',
            'miRBase': 'NA',
            'HMDB_protien_accession': 'NA',
            'Entrez': 'NA',
            'Enzyme Nomenclature': 'NA'
        }

        hmdb.metabolitesWithSynonymsDictionary["HMDB00001"] = [
            "1 Methylhistidine", "1-Methyl-L-histidine", "Pi-methylhistidine"
        ]

        hmdb.metabolitesWithPathwaysDictionary["HMDB00001"] = [
            "SMP00716", "SMP00006"
        ]

        hmdb.pathwayDictionary["SMP00716"] = "Thyroid hormone synthesis"
        hmdb.pathwayDictionary["SMP00006"] = "Tyrosine Metabolism"
        hmdb.pathwayDictionary["SMP00001"] = "Pathway1"
        hmdb.pathwayDictionary["SMP00002"] = "Pathway2"
        hmdb.pathwayDictionary["SMP00816"] = "Pathway3"

        hmdb.pathwayCategory["SMP00716"] = "NA"
        hmdb.pathwayCategory["SMP00006"] = "NA"
        hmdb.pathwayCategory["SMP00001"] = "NA"
        hmdb.pathwayCategory["SMP00002"] = "NA"
        hmdb.pathwayCategory["SMP00816"] = "NA"

        hmdb.pathwaysWithGenesDictionary["SMP00716"] = ["Q96KN2", "uniprot1"]
        hmdb.pathwaysWithGenesDictionary["SMP00006"] = ["Q96KN2", "uniprot1"]
        hmdb.pathwaysWithGenesDictionary["SMP00001"] = ["Q96KN2", "uniprot1"]
        hmdb.pathwaysWithGenesDictionary["SMP00002"] = ["Q96KN2", "uniprot1"]
        hmdb.pathwaysWithGenesDictionary["SMP00816"] = ["Q96KN2", "uniprot1"]

        hmdb.geneInfoDictionary["Q96KN2"] = {
            'common_name': 'CNDP1',
            'kegg': 'NA',
            'Ensembl': 'NA',
            'HGNC': 'NA',
            'HPRD': 'NA',
            'NCBI-GeneID': 'NA',
            'NCBI-ProteinID': 'NA',
            'OMIM': 'NA',
            'UniProt': 'Q96KN2',
            'Vega': 'NA',
            'miRBase': 'NA',
            'HMDB_protien_accession': 'HMDBP00473',
            'Entrez': 'NA',
            'Enzyme Nomenclature': 'NA'
        }

        hmdb.geneInfoDictionary["uniprot1"] = {
            'common_name': 'genename1',
            'kegg': 'NA',
            'Ensembl': 'NA',
            'HGNC': 'NA',
            'HPRD': 'NA',
            'NCBI-GeneID': 'NA',
            'NCBI-ProteinID': 'NA',
            'OMIM': 'NA',
            'UniProt': 'uniprot1',
            'Vega': 'NA',
            'miRBase': 'NA',
            'HMDB_protien_accession': 'HMDBP00321',
            'Entrez': 'NA',
            'Enzyme Nomenclature': 'NA'
        }

        hmdb.biofluidLocation["HMDB00001"] = [
            "Blood", "Cerebrospinal Fluid (CSF)", "Feces", "Saliva", "Urine"
        ]

        hmdb.exoEndoDictionary["HMDB00001"] = ["Food"]

        hmdb.biofluid["Blood"] = "placeholder"
        hmdb.biofluid["Cerebrospinal Fluid (CSF)"] = "placeholder"
        hmdb.biofluid["Feces"] = "placeholder"
        hmdb.biofluid["Saliva"] = "placeholder"
        hmdb.biofluid["Urine"] = "placeholder"

        hmdb.cellularLocation["HMDB00001"] = ["Cytoplasm", "Location1"]

        hmdb.cellular["Cytoplasm"] = "placeholder"
        hmdb.cellular["Location1"] = "placeholder"
        hmdb.cellular["Location2"] = "placeholder"
        hmdb.cellular["Location3"] = "placeholder"

        sql.write(kegg.pathwayDictionary, kegg.pathwayCategory,
                  kegg.metabolitesWithPathwaysDictionary,
                  kegg.metabolitesWithSynonymsDictionary,
                  kegg.metaboliteIDDictionary,
                  kegg.pathwaysWithGenesDictionary, kegg.geneInfoDictionary,
                  kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation,
                  kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary,
                  "kegg", 0, 0)

        sql.write(hmdb.pathwayDictionary, hmdb.pathwayCategory,
                  hmdb.metabolitesWithPathwaysDictionary,
                  hmdb.metabolitesWithSynonymsDictionary,
                  hmdb.metaboliteIDDictionary,
                  hmdb.pathwaysWithGenesDictionary, hmdb.geneInfoDictionary,
                  hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation,
                  hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary,
                  "hmdb", 0, 0)

        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases)

        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases)
コード例 #9
0
    def testMain(self):
        sql = writeToSQL()
        hmdb = hmdbData()
        print(hmdb.day)
        # If does not have database file
        hmdb.getDatabaseFiles()
        idconvert = IDconversion()
        stat = getStatistics()
        
        #print(hmdb.pathwaysWithGenesDictionary['SMP00172'])
        #print(len(hmdb.pathwaysWithGenesDictionary['SMP00172']))
        #print(hmdb.pathwaysWithGenesDictionary['00240'])
        #print(len(hmdb.pathwaysWithGenesDictionary['00240']))
        
        # hmdb.getAllId()
        
        print("Getting HMDB Metabolites...")
        hmdb.getMetaboliteOtherIDs()
        print(len(hmdb.metaboliteIDDictionary))
        print(hmdb.metaboliteIDDictionary["HMDB0000538"])
        print(hmdb.metaboliteIDDictionary["HMDB0000122"])
       
        
        print("Getting HMDB pathways and synonyms...")
        hmdb.getPathwaysandSynonyms()
        print('Has pathways ...')
        
        print('How many pathways relationship ...')
        #print(str(len(hmdb.metabolitesWithPathwaysDictionary)))
        print("Getting HMDB genes...")
        
        
        hmdb.getGenes()

        print("Getting HMDB biofluid and cellular locations...")
        #hmdb.getBiofluidCellularLocationDisease()
        
        hmdb.getPathwaysLinkedToGene()
        print("Writing to files...")
        print("output each metabolites has how many pathways ...")
        

        #idconvert.GeneConvert(hmdb.geneInfoDictionary, "hmdb")

        print("hmdb compounds...")
        sql.checkForWithinDatabaseDuplicatesCompound(hmdb.metaboliteIDDictionary, "hmdb")
        print("hmdb genes...")
        sql.checkForWithinDatabaseDuplicatesGene(hmdb.geneInfoDictionary, "hmdb")
        
        hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0)
        hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0)
        
        
                
        sql.write(hmdb.metaboliteCommonName,
                  hmdb.pathwayDictionary,
                  hmdb.pathwayCategory,
                  hmdb.metabolitesWithPathwaysDictionary,
                  hmdb.metabolitesWithSynonymsDictionary,
                  hmdb.metaboliteIDDictionary,
                  hmdb.pathwaysWithGenesDictionary,
                  hmdb.metabolitesLinkedToGenes,
                  hmdb.geneInfoDictionary,
                  hmdb.biofluidLocation,
                  hmdb.biofluid,
                  hmdb.cellularLocation,
                  hmdb.cellular,
                  hmdb.pathwayOntology,
                  hmdb.exoEndoDictionary,
                  hmdb.exoEndo,
                  hmdb.tissueLocation,
                  hmdb.tissue,
                  "hmdb",
                  0, 0)
        print('Compound number is ' + str(len(hmdb.metaboliteIDDictionary)))
        print('Gene number is ' + str(len(hmdb.geneInfoDictionary)))
        print('Pathway number is ' + str(len(hmdb.pathwayDictionary)))
        print('Pathway that have Gene with it ' + str(len(hmdb.pathwaysWithGenesDictionary)))
コード例 #10
0
    def testreactomeToHMDB(self):
        hmdb = hmdbData()
        reactome = reactomeData()
        sql = writeToSQL()
        idconvert = IDconversion()
        stat = getStatistics()

        #metabolite mapping for hmdb
        hmdb.metaboliteIDDictionary["HMDB00001"] = {
            "chebi_id": ["C14814"],
            "drugbank_id": "NA",
            "drugbank_metabolite_id": "NA",
            "phenol_explorer_compound_id": "NA",
            "phenol_explorer_metabolite_id": "NA",
            "foodb_id": "FDB012119",
            "knapsack_id": "NA",
            "chemspider_id": "83153",
            "kegg_id": "C14814",
            "biocyc_id": "CPD-1823",
            "bigg_id": "NA",
            "wikipidia": "NA",
            "nugowiki": "NA",
            "metagene": "NA",
            "metlin_id": "3741",
            "pubchem_compound_id": "92105",
            "het_id": "HIC",
            "hmdb_id": ["HMDB00001"],
            "CAS": "NA"
        }

        #metabolite mapping for kegg
        reactome.metaboliteIDDictionary["C14814"] = {
            "chebi_id": ["C14814"],
            "drugbank_id": "NA",
            "drugbank_metabolite_id": "NA",
            "phenol_explorer_compound_id": "NA",
            "phenol_explorer_metabolite_id": "NA",
            "foodb_id": "NA",
            "knapsack_id": "NA",
            "chemspider_id": "NA",
            "kegg_id": "NA",
            "biocyc_id": "NA",
            "bigg_id": "NA",
            "wikipidia": "NA",
            "nugowiki": "NA",
            "metagene": "NA",
            "metlin_id": "NA",
            "pubchem_compound_id": "NA",
            "het_id": "NA",
            "hmdb_id": "NA",
            "CAS": "NA"
        }

        hmdb.geneInfoDictionary["Q96KN2"] = {
            'common_name': 'genename1',
            'kegg': 'NA',
            'Ensembl': 'NA',
            'HGNC': 'NA',
            'HPRD': 'NA',
            'NCBI-GeneID': 'NA',
            'NCBI-ProteinID': 'NA',
            'OMIM': 'NA',
            'UniProt': ['Q96KN2'],
            'Vega': 'NA',
            'miRBase': 'NA',
            'HMDB_protien_accession': 'HMDBP00321',
            'Entrez': 'NA',
            'Enzyme Nomenclature': 'NA'
        }
        reactome.geneInfoDictionary["Q96KN2"] = {
            'common_name': 'NA',
            'kegg': 'NA',
            'Ensembl': 'NA',
            'HGNC': 'NA',
            'HPRD': 'NA',
            'NCBI-GeneID': 'NA',
            'NCBI-ProteinID': 'NA',
            'OMIM': 'NA',
            'UniProt': ['Q96KN2'],
            'Vega': 'NA',
            'miRBase': 'NA',
            'HMDB_protien_accession': 'NA',
            'Entrez': 'NA',
            'Enzyme Nomenclature': 'NA'
        }

        hmdb.metabolitesWithSynonymsDictionary["HMDB00001"] = [
            "1 Methylhistidine", "1-Methyl-L-histidine", "Pi-methylhistidine"
        ]

        hmdb.metabolitesWithPathwaysDictionary["HMDB00001"] = [
            "SMP00716", "SMP00006"
        ]

        hmdb.pathwayDictionary["SMP00716"] = "Thyroid hormone synthesis"
        hmdb.pathwayDictionary["SMP00006"] = "Tyrosine Metabolism"
        hmdb.pathwayDictionary["SMP00001"] = "Pathway1"
        hmdb.pathwayDictionary["SMP00002"] = "Pathway2"
        hmdb.pathwayDictionary["SMP00816"] = "Pathway3"

        hmdb.pathwayCategory["SMP00716"] = "NA"
        hmdb.pathwayCategory["SMP00006"] = "NA"
        hmdb.pathwayCategory["SMP00001"] = "NA"
        hmdb.pathwayCategory["SMP00002"] = "NA"
        hmdb.pathwayCategory["SMP00816"] = "NA"

        hmdb.pathwaysWithGenesDictionary["SMP00716"] = ["Q96KN2"]
        hmdb.pathwaysWithGenesDictionary["SMP00006"] = ["Q96KN2"]
        hmdb.pathwaysWithGenesDictionary["SMP00001"] = ["Q96KN2"]
        hmdb.pathwaysWithGenesDictionary["SMP00002"] = ["Q96KN2"]
        hmdb.pathwaysWithGenesDictionary["SMP00816"] = ["Q96KN2"]

        hmdb.geneInfoDictionary["Q96KN2"] = {
            'common_name': 'CNDP1',
            'kegg': 'NA',
            'Ensembl': 'NA',
            'HGNC': 'NA',
            'HPRD': 'NA',
            'NCBI-GeneID': 'NA',
            'NCBI-ProteinID': 'NA',
            'OMIM': 'NA',
            'UniProt': ['Q96KN2'],
            'Vega': 'NA',
            'miRBase': 'NA',
            'HMDB_protien_accession': 'HMDBP00473',
            'Entrez': 'NA',
            'Enzyme Nomenclature': 'NA'
        }

        hmdb.biofluidLocation["HMDB00001"] = [
            "Blood", "Cerebrospinal Fluid (CSF)", "Feces", "Saliva", "Urine"
        ]

        hmdb.biofluid["Blood"] = "placeholder"
        hmdb.biofluid["Cerebrospinal Fluid (CSF)"] = "placeholder"
        hmdb.biofluid["Feces"] = "placeholder"
        hmdb.biofluid["Saliva"] = "placeholder"
        hmdb.biofluid["Urine"] = "placeholder"

        hmdb.cellularLocation["HMDB00001"] = ["Cytoplasm", "Location1"]

        hmdb.cellular["Cytoplasm"] = "placeholder"
        hmdb.cellular["Location1"] = "placeholder"
        hmdb.cellular["Location2"] = "placeholder"
        hmdb.cellular["Location3"] = "placeholder"

        hmdb.exoEndoDictionary["HMDB00001"] = "Food"

        reactome.pathwayDictionary[
            "R-HSA-210745"] = "Citrate cycle (TCA cycle)"

        #Pathway categories
        reactome.pathwayCategory["R-HSA-210745"] = "Human Diseases"

        #metabolites linked with pathways
        reactome.metabolitesWithPathwaysDictionary["C14814"] = ["R-HSA-210745"]
        #metabolites linkes with synonyms
        reactome.metabolitesWithSynonymsDictionary["C14814"] = [
            "MetaboliteSynonym1"
        ]

        #pathway to gene id
        reactome.pathwaysWithGenesDictionary["R-HSA-210745"] = ["Q96KN2"]

        idconvert.MetaboliteChebiToHMDB(reactome.metaboliteIDDictionary,
                                        hmdb.metaboliteIDDictionary,
                                        "reactome")
        idconvert.GeneUniprotToHMDBP(reactome.geneInfoDictionary,
                                     hmdb.geneInfoDictionary, "hmdb")

        hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary,
                                                   "hmdb", 0)
        reactomecompoundnum = sql.createRampCompoundID(
            reactome.metaboliteIDDictionary, "reactome", hmdbcompoundnum)

        hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0)
        reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary,
                                               "reactome", hmdbgenenum)

        sql.write(reactome.pathwayDictionary, reactome.pathwayCategory,
                  reactome.metabolitesWithPathwaysDictionary,
                  reactome.metabolitesWithSynonymsDictionary,
                  reactome.metaboliteIDDictionary,
                  reactome.pathwaysWithGenesDictionary,
                  reactome.geneInfoDictionary, reactome.biofluidLocation,
                  reactome.biofluid, reactome.cellularLocation,
                  reactome.cellular, reactome.pathwayOntology,
                  reactome.exoEndoDictionary, "reactome", 0, 0)

        sql.write(hmdb.pathwayDictionary, hmdb.pathwayCategory,
                  hmdb.metabolitesWithPathwaysDictionary,
                  hmdb.metabolitesWithSynonymsDictionary,
                  hmdb.metaboliteIDDictionary,
                  hmdb.pathwaysWithGenesDictionary, hmdb.geneInfoDictionary,
                  hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation,
                  hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary,
                  "hmdb", 0, 0)

        print("Compound:")
        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases,
                             sql.rampCompoundIDdictionary, "Compound")
        print("\n")
        print("Gene:")
        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, "Gene")

        stat.databaseContent(hmdb.pathwayDictionary, hmdb.pathwayCategory,
                             hmdb.metabolitesWithPathwaysDictionary,
                             hmdb.metabolitesWithSynonymsDictionary,
                             hmdb.metaboliteIDDictionary,
                             hmdb.pathwaysWithGenesDictionary,
                             hmdb.geneInfoDictionary, hmdb.biofluidLocation,
                             hmdb.biofluid, hmdb.cellularLocation,
                             hmdb.cellular, hmdb.pathwayOntology,
                             hmdb.exoEndoDictionary, "hmdb")
コード例 #11
0
ファイル: main.py プロジェクト: ncats/RaMP-BackEnd
    def runEverything(self, getDatabaseFiles=True):
        sql = writeToSQL()

        stat = getStatistics()
        hmdb = hmdbData()
        wikipathways = WikipathwaysRDF()
        reactome = reactomeData()
        kegg = KeggData()
        # works based on your computer, setup working directory
        os.chdir('../main/')

        #kegg.getEverything(False)
        #print("KEGG Wonder")
        hmdb.getEverything(True)
        print("Getting wikipathways...")
        wikipathways.getEverything(True)
        print("Getting reactome...")
        reactome.getEverything(True)
        print("Getting kegg...")

        #Here are the identifiers that are present for each gene:
        #kegg: keggid (mainID), 'Ensembl', 'HGNC', 'HPRD', 'NCBI-GeneID', 'NCBI-ProteinID', 'OMIM', 'UniProt', 'Vega', 'miRBase'
        #wikipathways: (no mainID), 'Entrez', 'Enzyme Nomenclature', 'Uniprot (Uniprot-TrEMBL)
        #hmdb: HMDB-protien-accession (mainID), 'Uniprot'
        #reactome:Uniprot (mainID)

        print('Generate compound id')
        hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary,
                                                   "hmdb", 0)
        print("hmdbcompoundnum:   ", hmdbcompoundnum)
        keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary,
                                                   "kegg", hmdbcompoundnum)
        wikicompoundnum = sql.createRampCompoundID(
            wikipathways.metaboliteIDDictionary, "wiki", keggcompoundnum)
        print("wikicompoundnum:   ", wikicompoundnum)
        reactomecompoundnum = sql.createRampCompoundID(
            reactome.metaboliteIDDictionary, "reactome", wikicompoundnum)

        print('Generate gene id ...')
        hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0)
        kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg",
                                           hmdbgenenum)
        wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary,
                                           "wiki", kegggenenum)
        reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary,
                                               "reactome", wikigenenum)
        print(" hmdbgenenum ", hmdbgenenum, " kegggenenum ", kegggenenum,
              " wikigenenum ", wikigenenum, " reactomegenenum ",
              reactomegenenum)
        print('Write to sql file...')
        hmdbnumbers = sql.write(
            hmdb.metaboliteCommonName, hmdb.pathwayDictionary,
            hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary,
            hmdb.metabolitesWithSynonymsDictionary,
            hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary,
            hmdb.metabolitesLinkedToGenes, hmdb.geneInfoDictionary,
            hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation,
            hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary,
            hmdb.exoEndo, hmdb.tissueLocation, hmdb.tissue, hmdb.metaInchi,
            "hmdb", 0, 0)

        wikipathwaysnumbers = sql.write(
            wikipathways.metaboliteCommonName, wikipathways.pathwayDictionary,
            wikipathways.pathwayCategory,
            wikipathways.metabolitesWithPathwaysDictionary,
            wikipathways.metabolitesWithSynonymsDictionary,
            wikipathways.metaboliteIDDictionary,
            wikipathways.pathwaysWithGenesDictionary,
            wikipathways.metabolitesLinkedToGenes,
            wikipathways.geneInfoDictionary, wikipathways.biofluidLocation,
            wikipathways.biofluid, wikipathways.cellularLocation,
            wikipathways.cellular, wikipathways.pathwayOntology,
            wikipathways.exoEndoDictionary, wikipathways.exoEndo,
            wikipathways.tissueLocation, wikipathways.tissue, dict(), "wiki",
            hmdbnumbers[0], hmdbnumbers[1])

        reactomenumbers = sql.write(
            reactome.metaboliteCommonName, reactome.pathwayDictionary,
            reactome.pathwayCategory,
            reactome.metabolitesWithPathwaysDictionary,
            reactome.metabolitesWithSynonymsDictionary,
            reactome.metaboliteIDDictionary,
            reactome.pathwaysWithGenesDictionary,
            reactome.metabolitesLinkedToGenes, reactome.geneInfoDictionary,
            reactome.biofluidLocation, reactome.biofluid,
            reactome.cellularLocation, reactome.cellular,
            reactome.pathwayOntology, reactome.exoEndoDictionary,
            reactome.exoEndo, reactome.tissueLocation, reactome.tissue, dict(),
            "reactome", wikipathwaysnumbers[0], wikipathwaysnumbers[1])

        keggnumbers = sql.write(
            kegg.metaboliteCommonName, kegg.pathwayDictionary,
            kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary,
            kegg.metabolitesWithSynonymsDictionary,
            kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary,
            kegg.metabolitesLinkedToGenes, kegg.geneInfoDictionary,
            kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation,
            kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary,
            kegg.exoEndo, kegg.tissueLocation, kegg.tissue, dict(), "kegg",
            reactomenumbers[0], reactomenumbers[1])

        print("Done ... for importing database")

        print("Compound:")
        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases,
                             sql.rampCompoundIDdictionary, "Compound")
        print("\n")
        print("Gene:")
        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases,
                             sql.rampGeneIDdictionary, "Gene")

        stat.databaseContent(hmdb.pathwayDictionary, hmdb.pathwayCategory,
                             hmdb.metabolitesWithPathwaysDictionary,
                             hmdb.metabolitesWithSynonymsDictionary,
                             hmdb.metaboliteIDDictionary,
                             hmdb.pathwaysWithGenesDictionary,
                             hmdb.geneInfoDictionary, hmdb.biofluidLocation,
                             hmdb.biofluid, hmdb.cellularLocation,
                             hmdb.cellular, hmdb.pathwayOntology,
                             hmdb.exoEndoDictionary, "hmdb")

        stat.databaseContent(kegg.pathwayDictionary, kegg.pathwayCategory,
                             kegg.metabolitesWithPathwaysDictionary,
                             kegg.metabolitesWithSynonymsDictionary,
                             kegg.metaboliteIDDictionary,
                             kegg.pathwaysWithGenesDictionary,
                             kegg.geneInfoDictionary, kegg.biofluidLocation,
                             kegg.biofluid, kegg.cellularLocation,
                             kegg.cellular, kegg.pathwayOntology,
                             kegg.exoEndoDictionary, "kegg")

        stat.databaseContent(
            reactome.pathwayDictionary, reactome.pathwayCategory,
            reactome.metabolitesWithPathwaysDictionary,
            reactome.metabolitesWithSynonymsDictionary,
            reactome.metaboliteIDDictionary,
            reactome.pathwaysWithGenesDictionary, reactome.geneInfoDictionary,
            reactome.biofluidLocation, reactome.biofluid,
            reactome.cellularLocation, reactome.cellular,
            reactome.pathwayOntology, reactome.exoEndoDictionary, "reactome")

        stat.databaseContent(
            wikipathways.pathwayDictionary, wikipathways.pathwayCategory,
            wikipathways.metabolitesWithPathwaysDictionary,
            wikipathways.metabolitesWithSynonymsDictionary,
            wikipathways.metaboliteIDDictionary,
            wikipathways.pathwaysWithGenesDictionary,
            wikipathways.geneInfoDictionary, wikipathways.biofluidLocation,
            wikipathways.biofluid, wikipathways.cellularLocation,
            wikipathways.cellular, wikipathways.pathwayOntology,
            wikipathways.exoEndoDictionary, "wiki")
コード例 #12
0
from hmdbData import hmdbData
from writeToSQL import writeToSQL
from reactomeData import reactomeData
from getStatistics import getStatistics
stat = getStatistics()

import unittest


class TestHMDBMain(unittest.TestCase):
    def testMain(self):

        sql = writeToSQL()
        hmdb = hmdbData()
        reactome = reactomeData()

        hmdb.metaboliteIDDictionary["HMDB00001"] = {
            "chebi_id": "NA",
            "drugbank_id": "NA",
            "drugbank_metabolite_id": "NA",
            "phenol_explorer_compound_id": "NA",
            "phenol_explorer_metabolite_id": "NA",
            "foodb_id": "FDB012119",
            "knapsack_id": "NA",
            "chemspider_id": "83153",
            "kegg_id": "C14814",
            "biocyc_id": "CPD-1823",
            "bigg_id": "NA",
            "wikipidia": "NA",
            "nugowiki": "NA",
            "metagene": "NA",