def testMain(self): kegg = KeggData() sql = writeToSQL() idconvert = IDconversion() stat = getStatistics() # get database file kegg.getDatabaseFiles() print('get pathways') kegg.getPathways() kegg.getPathways_with_genes() print(len(kegg.pathwayDictionary)) print('get metabolites') kegg.getMetabolites() print('get synonyms and chebi') kegg.getSynonymsAndCHEBI() print(len(kegg.metaboliteIDDictionary)) print(kegg.metaboliteIDDictionary["C00002"]) print(kegg.metaboliteIDDictionary["C00001"]) print('get genes') kegg.getGenes() print(len(kegg.geneInfoDictionary)) kegg.getGeneInfo() kegg.getPathwayLinkedToGene() kegg.write_myself_files(database='kegg') idconvert.GeneConvert(kegg.geneInfoDictionary, "kegg") # Check duplicates kegg.write_myself_files('kegg') keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary, "kegg", 0) kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", 0) keggnumbers = sql.write( kegg.metaboliteCommonName, kegg.pathwayDictionary, kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary, kegg.metabolitesWithSynonymsDictionary, kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary, kegg.metabolitesLinkedToGenes, kegg.geneInfoDictionary, kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation, kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary, kegg.exoEndo, kegg.tissueLocation, kegg.tissue, "kegg", 0, 0) print('metaboliteIDdict number is ' + str(len(kegg.metaboliteIDDictionary))) print('GeneInfo number is ' + str(len(kegg.geneInfoDictionary))) print('PathwayDict number is ' + str(len(kegg.pathwayDictionary))) print('MetabolitesWithPath is ' + str(len(kegg.metabolitesWithPathwaysDictionary))) print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene")
def testMain(self): sql = writeToSQL() hmdb = hmdbData() hmdb.getDatabaseFiles() idconvert = IDconversion() stat = getStatistics() wiki = wikipathwaysData() react = reactomeData() kegg = KeggData() print('Running overlap plot test case...') hmdb.getGenes() hmdb.getPathwaysLinkedToGene() wiki.getEverything() wiki.getCommonNameForChebi() react.getGenes() react.getCommonNameFromUniprot() kegg.getPathways() kegg.getMetabolites() kegg.getGenes() kegg.getGeneInfo() kegg.getPathwayLinkedToGene() #idconvert.GeneConvert(hmdb.geneInfoDictionary, "hmdb") hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0) keggnum = sql.createRampGeneID(kegg.geneInfoDictionary, 'kegg', hmdbgenenum) wikinum = sql.createRampGeneID(wiki.geneInfoDictionary, 'wiki', keggnum) reactnum = sql.createRampGeneID(react.geneInfoDictionary, 'reactome', wikinum) stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, 'Gene')
def testMain(self): sql = writeToSQL() idconvert = IDconversion() stat = getStatistics() wikipathways = wikipathwaysData() wikipathways.getDatabaseFiles() wikipathways.getEverything() print(wikipathways.setOfType) wikipathways.getCommonNameForChebi() idconvert.GeneConvert(wikipathways.geneInfoDictionary, "wiki") sql.checkForWithinDatabaseDuplicatesCompound( wikipathways.metaboliteIDDictionary, "wiki") sql.checkForWithinDatabaseDuplicatesGene( wikipathways.geneInfoDictionary, "wiki") wikicompoundnum = sql.createRampCompoundID( wikipathways.metaboliteIDDictionary, "wiki", 0) wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary, "wiki", 0) print("Write to file...") wikipathwaysnumbers = sql.write( wikipathways.metaboliteCommonName, wikipathways.pathwayDictionary, wikipathways.pathwayCategory, wikipathways.metabolitesWithPathwaysDictionary, wikipathways.metabolitesWithSynonymsDictionary, wikipathways.metaboliteIDDictionary, wikipathways.pathwaysWithGenesDictionary, wikipathways.metabolitesLinkedToGenes, wikipathways.geneInfoDictionary, wikipathways.biofluidLocation, wikipathways.biofluid, wikipathways.cellularLocation, wikipathways.cellular, wikipathways.pathwayOntology, wikipathways.exoEndoDictionary, wikipathways.exoEndo, wikipathways.tissueLocation, wikipathways.tissue, "wiki", 0, 0) print("Pathways number is " + str(len(wikipathways.pathwayDictionary))) print("metabolites number is " + str(len(wikipathways.metaboliteIDDictionary))) print('genes number is ' + str(len(wikipathways.geneInfoDictionary))) print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene") print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene")
def testMain2(self): hmdb = hmdbData() sql = writeToSQL() hmdb.getMetaboliteOtherIDs() num = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, 'hmdb', 0) hmdb.check_path('../misc/test/hmdb/') with open('../misc/test/hmdb/rampIDToHMDBIDs.txt','wb') as f: for key,value in sql.rampCompoundIDdictionary.items(): f.write(key.encode('utf-8') +b'\t' + value.encode('utf-8') +b'\n')
def testMain(self): sql = writeToSQL() idconvert = IDconversion() stat = getStatistics() wikipathways = wikipathwaysData() wikipathways.getEverything() r1 = random.choice(list(wikipathways.geneInfoDictionary.keys())) r2 = random.choice(list(wikipathways.geneInfoDictionary.keys())) r3 = random.choice(list(wikipathways.geneInfoDictionary.keys())) print(wikipathways.geneInfoDictionary[r1]) print(wikipathways.geneInfoDictionary[r2]) print(wikipathways.geneInfoDictionary[r3]) print(wikipathways.geneInfoDictionary['ENSG00000139977']) #print(wikipathways.geneInfoDictionary["path:hsa04530"]) time.sleep(3) #idconvert.GeneConvert(wikipathways.geneInfoDictionary, "wiki") wikipathways.write_myself_files('wiki') wikicompoundnum = sql.createRampCompoundID( wikipathways.metaboliteIDDictionary, "wiki", 0) wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary, "wiki", 0) print("Write to file...") wikipathwaysnumbers = sql.write( wikipathways.metaboliteCommonName, wikipathways.pathwayDictionary, wikipathways.pathwayCategory, wikipathways.metabolitesWithPathwaysDictionary, wikipathways.metabolitesWithSynonymsDictionary, wikipathways.metaboliteIDDictionary, wikipathways.pathwaysWithGenesDictionary, wikipathways.metabolitesLinkedToGenes, wikipathways.geneInfoDictionary, wikipathways.biofluidLocation, wikipathways.biofluid, wikipathways.cellularLocation, wikipathways.cellular, wikipathways.pathwayOntology, wikipathways.exoEndoDictionary, wikipathways.exoEndo, wikipathways.tissueLocation, wikipathways.tissue, "wiki", 0, 0) print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene")
def testMain(self): sql = writeToSQL() reactome = reactomeData() reactome.getDatabaseFiles() print("Getting genes...") reactome.getGenes() print("Getting metabolites...") reactome.getMetabolites() print("Getting common names...") reactome.getCommonNameForChebi() print("Getting common names for genes ...") reactome.getGenes() reactome.downloadCommonNameFromUniprot() reactome.getCommonNameFromUniprot() reactomecompoundnum = sql.createRampCompoundID( reactome.metaboliteIDDictionary, "reactome", 0) reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary, "reactome", 0) reactomenumbers = sql.write( reactome.metaboliteCommonName, reactome.pathwayDictionary, reactome.pathwayCategory, reactome.metabolitesWithPathwaysDictionary, reactome.metabolitesWithSynonymsDictionary, reactome.metaboliteIDDictionary, reactome.pathwaysWithGenesDictionary, reactome.metabolitesLinkedToGenes, reactome.geneInfoDictionary, reactome.biofluidLocation, reactome.biofluid, reactome.cellularLocation, reactome.cellular, reactome.pathwayOntology, reactome.exoEndoDictionary, reactome.exoEndo, reactome.tissueLocation, reactome.tissue, "reactome", 0, 0) print("Pathways number is " + str(len(reactome.pathwayDictionary))) print("metabolites number is " + str(len(reactome.metaboliteIDDictionary))) print('genes number is ' + str(len(reactome.geneInfoDictionary)))
def testMain(self): wiki = wikipathwaysData() reactome = reactomeData() kegg = KeggData() sql = writeToSQL() stat = getStatistics() wiki.pathwaysWithGenesDictionary["WP254"] = ["geneA", "geneB"] reactome.pathwaysWithGenesDictionary["R-HSA-109581"] = ["geneA", "geneD"] kegg.pathwaysWithGenesDictionary["04210"] = ["geneC", "geneB"] sql.rampGeneIDdictionary["geneA"] = "RAMP00001" sql.rampGeneIDdictionary["geneB"] = "RAMP00002" sql.rampGeneIDdictionary["geneC"] = "RAMP00003" sql.rampGeneIDdictionary["geneD"] = "RAMP00001" stat.Apoptosis(sql.rampGeneIDdictionary, wiki.pathwaysWithGenesDictionary, kegg.pathwaysWithGenesDictionary, reactome.pathwaysWithGenesDictionary)
def testKeggToHMDB(self): hmdb = hmdbData() kegg = KeggData() sql = writeToSQL() #metabolite mapping for hmdb hmdb.metaboliteIDDictionary["HMDB00001"] = { "chebi_id": "34131", "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "FDB012119", "knapsack_id": "NA", "chemspider_id": "83153", "kegg_id": "NA", "biocyc_id": "CPD-1823", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "3741", "pubchem_compound_id": "92105", "het_id": "HIC", "hmdb_id": ["HMDB00001"], "CAS": "NA" } #metabolite mapping for kegg kegg.metaboliteIDDictionary["C14814"] = { "chebi_id": ["34131"], "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "NA", "knapsack_id": "NA", "chemspider_id": "NA", "kegg_id": "NA", "biocyc_id": "NA", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "NA", "pubchem_compound_id": "NA", "het_id": "NA", "hmdb_id": "NA", "CAS": "NA" } IDconversion.MetaboliteChebiToHMDB(self, kegg.metaboliteIDDictionary, hmdb.metaboliteIDDictionary, "kegg") #Pathway names kegg.pathwayDictionary["00010"] = "Glycolysis / Gluconeogenesis" kegg.pathwayDictionary["00020"] = "Citrate cycle (TCA cycle)" kegg.pathwayDictionary["00520"] = "Fake Pathway Name One" kegg.pathwayDictionary["00524"] = "Fake Pathway Name Two" kegg.pathwayDictionary["00540"] = "Fake Pathway Name Three" kegg.pathwayDictionary["00550"] = "Fake Pathway Name Four" kegg.pathwayDictionary["00030"] = "Fake Pathway Name Five" kegg.pathwayDictionary["00040"] = "Fake Pathway Name Six" kegg.pathwayDictionary["00053"] = "Fake Pathway Name Seven" kegg.pathwayDictionary["00250"] = "Fake Pathway Name Eight" kegg.pathwayDictionary["00260"] = "Fake Pathway Name Nine" #Pathway categories kegg.pathwayCategory["00010"] = "Metabolism" kegg.pathwayCategory["00020"] = "Human Diseases" kegg.pathwayCategory["00520"] = "Cellular Processes" kegg.pathwayCategory["00524"] = "Human Diseases" kegg.pathwayCategory["00540"] = "Human Diseases" kegg.pathwayCategory["00550"] = "Metabolism" kegg.pathwayCategory["00030"] = "Human Diseases" kegg.pathwayCategory["00040"] = "Cellular Processes" kegg.pathwayCategory["00053"] = "Metabolism" kegg.pathwayCategory["00250"] = "Human Diseases" kegg.pathwayCategory["00260"] = "Cellular Processes" #metabolites linked with pathways kegg.metabolitesWithPathwaysDictionary["C00043"] = [ "00520", "00524", "00540", "00550" ] kegg.metabolitesWithPathwaysDictionary["C00022"] = [ "00010", "00020", "00030", "00040", "00053", "00250", "00260" ] kegg.metabolitesWithPathwaysDictionary["C14814"] = ["00020"] kegg.metabolitesWithPathwaysDictionary["C14865"] = ["00540"] #metabolites linkes with synonyms kegg.metabolitesWithSynonymsDictionary["C00043"] = [ "UDP-N-acetyl-alpha-D-glucosamine", "UDP-N-acetyl-D-glucosamine", "UDP-N-acetylglucosamine" ] kegg.metabolitesWithSynonymsDictionary["C00022"] = [ "Pyruvate", "Pyruvic acid", "2-Oxopropanoate", "2-Oxopropanoic acid", "Pyroracemic acid" ] kegg.metabolitesWithSynonymsDictionary["C14814"] = [ "MetaboliteSynonym1" ] kegg.metabolitesWithSynonymsDictionary["C14865"] = [ "MetaboliteSynonym2" ] #pathway to gene id kegg.pathwaysWithGenesDictionary["00010"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00020"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00520"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00524"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00540"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00550"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00030"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00040"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00053"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00250"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00260"] = ["geneA", "geneB"] #gene to geneinfo kegg.geneInfoDictionary["geneA"] = { 'common_name': 'Apple', 'kegg': 'NA', 'Ensembl': 'ENSG00000127481', 'HGNC': '30313', 'HPRD': 'NA', 'NCBI-GeneID': '23352', 'NCBI-ProteinID': 'NP_065816', 'OMIM': '609890', 'UniProt': 'Q5T4S7', 'Vega': 'OTTHUMG00000002498', 'miRBase': 'NA', 'HMDB_protien_accession': 'NA', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } kegg.geneInfoDictionary["geneB"] = { 'common_name': 'Banana', 'kegg': 'NA', 'Ensembl': 'ENSG00000100320', 'HGNC': '9906', 'HPRD': 'NA', 'NCBI-GeneID': '23543', 'NCBI-ProteinID': 'NP_065816', 'OMIM': '612149', 'UniProt': 'O43251', 'Vega': 'OTTHUMG00000150585', 'miRBase': 'NA', 'HMDB_protien_accession': 'NA', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } sql.write(kegg.pathwayDictionary, kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary, kegg.metabolitesWithSynonymsDictionary, kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary, kegg.geneInfoDictionary, kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation, kegg.cellular, kegg.pathwayOntology, "kegg", 0, 0, 0, 0)
def runEverything(self, getDatabaseFiles=False): sql = writeToSQL() idconvert = IDconversion() stat = getStatistics() hmdb = hmdbData() wikipathways = wikipathwaysData() reactome = reactomeData() kegg = KeggData() print(os.getcwd()) #pulls needed files from each database if true. Otherwise, assumes files already present. Default false. if getDatabaseFiles: kegg.getDatabaseFiles() wikipathways.getDatabaseFiles() reactome.getDatabaseFiles() hmdb.getDatabaseFiles() print("Getting HMDB Metabolites...") hmdb.getMetaboliteOtherIDs() print("Getting HMDB pathways and synonyms...") hmdb.getPathwaysandSynonyms() print("Getting HMDB genes...") hmdb.getGenes() print("Getting HMDB biofluid and cellular locations...") hmdb.getBiofluidCellularLocationDisease() print("Getting HMDB pathways links to genes ...") hmdb.getPathwaysLinkedToGene() print("Getting wikipathways...") wikipathways.getEverything() wikipathways.getCommonNameForChebi() print("Getting reactome genes...") reactome.getGenes() print("Getting reactome metabolites...") reactome.getMetabolites() reactome.getCommonNameForChebi() reactome.getCommonNameForGenes() print("Getting kegg pathways...") kegg.getPathways() print("Getting kegg genes and metabolites...") kegg.getMetabolites() kegg.getSynonymsAndCHEBI() kegg.getGenes() kegg.getGeneInfo() print("Converting gene ids...") #Here are the identifiers that are present for each gene: #kegg: keggid (mainID), 'Ensembl', 'HGNC', 'HPRD', 'NCBI-GeneID', 'NCBI-ProteinID', 'OMIM', 'UniProt', 'Vega', 'miRBase' #wikipathways: (no mainID), 'Entrez', 'Enzyme Nomenclature', 'Uniprot (Uniprot-TrEMBL) #hmdb: HMDB-protien-accession (mainID), 'Uniprot' #reactome:Uniprot (mainID) idconvert.GeneConvert(wikipathways.geneInfoDictionary, "wikipathways") idconvert.GeneConvert(hmdb.geneInfoDictionary, "hmdb") idconvert.GeneConvert(reactome.geneInfoDictionary, "reactome") idconvert.GeneConvert(kegg.geneInfoDictionary, "kegg") idconvert.GeneUniprotToHMDBP(wikipathways.geneInfoDictionary, hmdb.geneInfoDictionary, "wikipathways") idconvert.GeneUniprotToHMDBP(reactome.geneInfoDictionary, hmdb.geneInfoDictionary, "reactome") idconvert.GeneUniprotToHMDBP(kegg.geneInfoDictionary, hmdb.geneInfoDictionary, "kegg") print("Converting metabolite ids...") idconvert.MetaboliteKeggIDToChebi(kegg.metaboliteIDDictionary, hmdb.metaboliteIDDictionary, "hmdb") idconvert.MetaboliteChebiToHMDB(wikipathways.metaboliteIDDictionary, hmdb.metaboliteIDDictionary, "wikipathways") idconvert.MetaboliteChebiToHMDB(reactome.metaboliteIDDictionary, hmdb.metaboliteIDDictionary, "reactome") idconvert.MetaboliteChebiToHMDB(kegg.metaboliteIDDictionary, hmdb.metaboliteIDDictionary, "kegg") #check for dups print("Wikipathways compounds...") sql.checkForWithinDatabaseDuplicatesCompound( wikipathways.metaboliteIDDictionary, "wikipathways") print("Wikipathways genes...") sql.checkForWithinDatabaseDuplicatesGene( wikipathways.geneInfoDictionary, "wikipathways") print("Kegg compounds...") sql.checkForWithinDatabaseDuplicatesCompound( kegg.metaboliteIDDictionary, "kegg") print("kegg genes...") sql.checkForWithinDatabaseDuplicatesGene(kegg.geneInfoDictionary, "kegg") print("reactome compounds...") sql.checkForWithinDatabaseDuplicatesCompound( reactome.metaboliteIDDictionary, "reactome") print("reactome genes...") sql.checkForWithinDatabaseDuplicatesGene(reactome.geneInfoDictionary, "reactome") print("hmdb compounds...") sql.checkForWithinDatabaseDuplicatesCompound( hmdb.metaboliteIDDictionary, "hmdb") print("hmdb genes...") sql.checkForWithinDatabaseDuplicatesGene(hmdb.geneInfoDictionary, "hmdb") print('Generate compound id') hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0) wikicompoundnum = sql.createRampCompoundID( wikipathways.metaboliteIDDictionary, "wiki", hmdbcompoundnum) reactomecompoundnum = sql.createRampCompoundID( reactome.metaboliteIDDictionary, "reactome", wikicompoundnum) keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary, "kegg", reactomecompoundnum) print('Generate gene id ...') hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0) wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary, "wiki", hmdbgenenum) reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary, "reactome", wikigenenum) kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", reactomegenenum) print('Write to sql file...') hmdbnumbers = sql.write( hmdb.metaboliteCommonName, hmdb.pathwayDictionary, hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary, hmdb.metabolitesWithSynonymsDictionary, hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary, hmdb.metabolitesLinkedToGenes, hmdb.geneInfoDictionary, hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation, hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary, hmdb.exoEndo, hmdb.tissueLocation, hmdb.tissue, "hmdb", 0, 0) wikipathwaysnumbers = sql.write( wikipathways.metaboliteCommonName, wikipathways.pathwayDictionary, wikipathways.pathwayCategory, wikipathways.metabolitesWithPathwaysDictionary, wikipathways.metabolitesWithSynonymsDictionary, wikipathways.metaboliteIDDictionary, wikipathways.pathwaysWithGenesDictionary, wikipathways.metabolitesLinkedToGenes, wikipathways.geneInfoDictionary, wikipathways.biofluidLocation, wikipathways.biofluid, wikipathways.cellularLocation, wikipathways.cellular, wikipathways.pathwayOntology, wikipathways.exoEndoDictionary, wikipathways.exoEndo, wikipathways.tissueLocation, wikipathways.tissue, "wiki", hmdbnumbers[0], hmdbnumbers[1]) reactomenumbers = sql.write( reactome.metaboliteCommonName, reactome.pathwayDictionary, reactome.pathwayCategory, reactome.metabolitesWithPathwaysDictionary, reactome.metabolitesWithSynonymsDictionary, reactome.metaboliteIDDictionary, reactome.pathwaysWithGenesDictionary, reactome.metabolitesLinkedToGenes, reactome.geneInfoDictionary, reactome.biofluidLocation, reactome.biofluid, reactome.cellularLocation, reactome.cellular, reactome.pathwayOntology, reactome.exoEndoDictionary, reactome.exoEndo, reactome.tissueLocation, reactome.tissue, "reactome", wikipathwaysnumbers[0], wikipathwaysnumbers[1]) keggnumbers = sql.write( kegg.metaboliteCommonName, kegg.pathwayDictionary, kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary, kegg.metabolitesWithSynonymsDictionary, kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary, kegg.metabolitesLinkedToGenes, kegg.geneInfoDictionary, kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation, kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary, kegg.exoEndo, kegg.tissueLocation, kegg.tissue, "kegg", reactomenumbers[0], reactomenumbers[1]) print("Done ... for importing database") print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene") stat.databaseContent(hmdb.pathwayDictionary, hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary, hmdb.metabolitesWithSynonymsDictionary, hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary, hmdb.geneInfoDictionary, hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation, hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary, "hmdb") stat.databaseContent(kegg.pathwayDictionary, kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary, kegg.metabolitesWithSynonymsDictionary, kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary, kegg.geneInfoDictionary, kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation, kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary, "kegg") stat.databaseContent( reactome.pathwayDictionary, reactome.pathwayCategory, reactome.metabolitesWithPathwaysDictionary, reactome.metabolitesWithSynonymsDictionary, reactome.metaboliteIDDictionary, reactome.pathwaysWithGenesDictionary, reactome.geneInfoDictionary, reactome.biofluidLocation, reactome.biofluid, reactome.cellularLocation, reactome.cellular, reactome.pathwayOntology, reactome.exoEndoDictionary, "reactome") stat.databaseContent( wikipathways.pathwayDictionary, wikipathways.pathwayCategory, wikipathways.metabolitesWithPathwaysDictionary, wikipathways.metabolitesWithSynonymsDictionary, wikipathways.metaboliteIDDictionary, wikipathways.pathwaysWithGenesDictionary, wikipathways.geneInfoDictionary, wikipathways.biofluidLocation, wikipathways.biofluid, wikipathways.cellularLocation, wikipathways.cellular, wikipathways.pathwayOntology, wikipathways.exoEndoDictionary, "wiki") stat.Apoptosis(sql.rampGeneIDdictionary, wikipathways.pathwaysWithGenesDictionary, kegg.pathwaysWithGenesDictionary, reactome.pathwaysWithGenesDictionary)
def testMain(self): sql = writeToSQL() hmdb = hmdbData() reactome = reactomeData() hmdb.metaboliteIDDictionary["HMDB00001"] = {"chebi_id": "NA", "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "FDB012119", "knapsack_id": "NA", "chemspider_id": "83153", "kegg_id": "C14814", "biocyc_id": "CPD-1823", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "3741", "pubchem_compound_id": "92105", "het_id": "HIC", "hmdb_id": ["HMDB00001"], "CAS": "NA"} hmdb.metaboliteIDDictionary["HMDB00002"] = {"chebi_id": "NA", "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "FDB012119", "knapsack_id": "NA", "chemspider_id": "83153", "kegg_id": "C14814", "biocyc_id": "CPD-1823", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "3741", "pubchem_compound_id": "92105", "het_id": "HIC", "hmdb_id": ["HMDB00002"], "CAS": "NA"} reactome.metaboliteIDDictionary["C14814"] = {"chebi_id": ["C14814"], "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "NA", "knapsack_id": "NA", "chemspider_id": "NA", "kegg_id": "NA", "biocyc_id": "NA", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "NA", "pubchem_compound_id": "NA", "het_id": "NA", "hmdb_id": "NA", "CAS": "NA"} hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0) reactomecompoundnum = sql.createRampCompoundID(reactome.metaboliteIDDictionary, "reactome", hmdbcompoundnum) print(list(sql.rampCompoundIDdictionary.values()))
def testMain(self): sql = writeToSQL() hmdb = hmdbData() print(hmdb.day) # If does not have database file hmdb.getDatabaseFiles() idconvert = IDconversion() stat = getStatistics() #print(hmdb.pathwaysWithGenesDictionary['SMP00172']) #print(len(hmdb.pathwaysWithGenesDictionary['SMP00172'])) #print(hmdb.pathwaysWithGenesDictionary['00240']) #print(len(hmdb.pathwaysWithGenesDictionary['00240'])) # hmdb.getAllId() print("Getting HMDB Metabolites...") hmdb.getMetaboliteOtherIDs() print(len(hmdb.metaboliteIDDictionary)) print(hmdb.metaboliteIDDictionary["HMDB0000538"]) print(hmdb.metaboliteIDDictionary["HMDB0000122"]) print("Getting HMDB pathways and synonyms...") hmdb.getPathwaysandSynonyms() print('Has pathways ...') print('How many pathways relationship ...') #print(str(len(hmdb.metabolitesWithPathwaysDictionary))) print("Getting HMDB genes...") hmdb.getGenes() print("Getting HMDB biofluid and cellular locations...") #hmdb.getBiofluidCellularLocationDisease() hmdb.getPathwaysLinkedToGene() print("Writing to files...") print("output each metabolites has how many pathways ...") #idconvert.GeneConvert(hmdb.geneInfoDictionary, "hmdb") print("hmdb compounds...") sql.checkForWithinDatabaseDuplicatesCompound(hmdb.metaboliteIDDictionary, "hmdb") print("hmdb genes...") sql.checkForWithinDatabaseDuplicatesGene(hmdb.geneInfoDictionary, "hmdb") hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0) hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0) sql.write(hmdb.metaboliteCommonName, hmdb.pathwayDictionary, hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary, hmdb.metabolitesWithSynonymsDictionary, hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary, hmdb.metabolitesLinkedToGenes, hmdb.geneInfoDictionary, hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation, hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary, hmdb.exoEndo, hmdb.tissueLocation, hmdb.tissue, "hmdb", 0, 0) print('Compound number is ' + str(len(hmdb.metaboliteIDDictionary))) print('Gene number is ' + str(len(hmdb.geneInfoDictionary))) print('Pathway number is ' + str(len(hmdb.pathwayDictionary))) print('Pathway that have Gene with it ' + str(len(hmdb.pathwaysWithGenesDictionary)))
def testreactomeToHMDB(self): hmdb = hmdbData() reactome = reactomeData() sql = writeToSQL() idconvert = IDconversion() stat = getStatistics() #metabolite mapping for hmdb hmdb.metaboliteIDDictionary["HMDB00001"] = { "chebi_id": ["C14814"], "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "FDB012119", "knapsack_id": "NA", "chemspider_id": "83153", "kegg_id": "C14814", "biocyc_id": "CPD-1823", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "3741", "pubchem_compound_id": "92105", "het_id": "HIC", "hmdb_id": ["HMDB00001"], "CAS": "NA" } #metabolite mapping for kegg reactome.metaboliteIDDictionary["C14814"] = { "chebi_id": ["C14814"], "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "NA", "knapsack_id": "NA", "chemspider_id": "NA", "kegg_id": "NA", "biocyc_id": "NA", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "NA", "pubchem_compound_id": "NA", "het_id": "NA", "hmdb_id": "NA", "CAS": "NA" } hmdb.geneInfoDictionary["Q96KN2"] = { 'common_name': 'genename1', 'kegg': 'NA', 'Ensembl': 'NA', 'HGNC': 'NA', 'HPRD': 'NA', 'NCBI-GeneID': 'NA', 'NCBI-ProteinID': 'NA', 'OMIM': 'NA', 'UniProt': ['Q96KN2'], 'Vega': 'NA', 'miRBase': 'NA', 'HMDB_protien_accession': 'HMDBP00321', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } reactome.geneInfoDictionary["Q96KN2"] = { 'common_name': 'NA', 'kegg': 'NA', 'Ensembl': 'NA', 'HGNC': 'NA', 'HPRD': 'NA', 'NCBI-GeneID': 'NA', 'NCBI-ProteinID': 'NA', 'OMIM': 'NA', 'UniProt': ['Q96KN2'], 'Vega': 'NA', 'miRBase': 'NA', 'HMDB_protien_accession': 'NA', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } hmdb.metabolitesWithSynonymsDictionary["HMDB00001"] = [ "1 Methylhistidine", "1-Methyl-L-histidine", "Pi-methylhistidine" ] hmdb.metabolitesWithPathwaysDictionary["HMDB00001"] = [ "SMP00716", "SMP00006" ] hmdb.pathwayDictionary["SMP00716"] = "Thyroid hormone synthesis" hmdb.pathwayDictionary["SMP00006"] = "Tyrosine Metabolism" hmdb.pathwayDictionary["SMP00001"] = "Pathway1" hmdb.pathwayDictionary["SMP00002"] = "Pathway2" hmdb.pathwayDictionary["SMP00816"] = "Pathway3" hmdb.pathwayCategory["SMP00716"] = "NA" hmdb.pathwayCategory["SMP00006"] = "NA" hmdb.pathwayCategory["SMP00001"] = "NA" hmdb.pathwayCategory["SMP00002"] = "NA" hmdb.pathwayCategory["SMP00816"] = "NA" hmdb.pathwaysWithGenesDictionary["SMP00716"] = ["Q96KN2"] hmdb.pathwaysWithGenesDictionary["SMP00006"] = ["Q96KN2"] hmdb.pathwaysWithGenesDictionary["SMP00001"] = ["Q96KN2"] hmdb.pathwaysWithGenesDictionary["SMP00002"] = ["Q96KN2"] hmdb.pathwaysWithGenesDictionary["SMP00816"] = ["Q96KN2"] hmdb.geneInfoDictionary["Q96KN2"] = { 'common_name': 'CNDP1', 'kegg': 'NA', 'Ensembl': 'NA', 'HGNC': 'NA', 'HPRD': 'NA', 'NCBI-GeneID': 'NA', 'NCBI-ProteinID': 'NA', 'OMIM': 'NA', 'UniProt': ['Q96KN2'], 'Vega': 'NA', 'miRBase': 'NA', 'HMDB_protien_accession': 'HMDBP00473', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } hmdb.biofluidLocation["HMDB00001"] = [ "Blood", "Cerebrospinal Fluid (CSF)", "Feces", "Saliva", "Urine" ] hmdb.biofluid["Blood"] = "placeholder" hmdb.biofluid["Cerebrospinal Fluid (CSF)"] = "placeholder" hmdb.biofluid["Feces"] = "placeholder" hmdb.biofluid["Saliva"] = "placeholder" hmdb.biofluid["Urine"] = "placeholder" hmdb.cellularLocation["HMDB00001"] = ["Cytoplasm", "Location1"] hmdb.cellular["Cytoplasm"] = "placeholder" hmdb.cellular["Location1"] = "placeholder" hmdb.cellular["Location2"] = "placeholder" hmdb.cellular["Location3"] = "placeholder" hmdb.exoEndoDictionary["HMDB00001"] = "Food" reactome.pathwayDictionary[ "R-HSA-210745"] = "Citrate cycle (TCA cycle)" #Pathway categories reactome.pathwayCategory["R-HSA-210745"] = "Human Diseases" #metabolites linked with pathways reactome.metabolitesWithPathwaysDictionary["C14814"] = ["R-HSA-210745"] #metabolites linkes with synonyms reactome.metabolitesWithSynonymsDictionary["C14814"] = [ "MetaboliteSynonym1" ] #pathway to gene id reactome.pathwaysWithGenesDictionary["R-HSA-210745"] = ["Q96KN2"] idconvert.MetaboliteChebiToHMDB(reactome.metaboliteIDDictionary, hmdb.metaboliteIDDictionary, "reactome") idconvert.GeneUniprotToHMDBP(reactome.geneInfoDictionary, hmdb.geneInfoDictionary, "hmdb") hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0) reactomecompoundnum = sql.createRampCompoundID( reactome.metaboliteIDDictionary, "reactome", hmdbcompoundnum) hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0) reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary, "reactome", hmdbgenenum) sql.write(reactome.pathwayDictionary, reactome.pathwayCategory, reactome.metabolitesWithPathwaysDictionary, reactome.metabolitesWithSynonymsDictionary, reactome.metaboliteIDDictionary, reactome.pathwaysWithGenesDictionary, reactome.geneInfoDictionary, reactome.biofluidLocation, reactome.biofluid, reactome.cellularLocation, reactome.cellular, reactome.pathwayOntology, reactome.exoEndoDictionary, "reactome", 0, 0) sql.write(hmdb.pathwayDictionary, hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary, hmdb.metabolitesWithSynonymsDictionary, hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary, hmdb.geneInfoDictionary, hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation, hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary, "hmdb", 0, 0) print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene") stat.databaseContent(hmdb.pathwayDictionary, hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary, hmdb.metabolitesWithSynonymsDictionary, hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary, hmdb.geneInfoDictionary, hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation, hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary, "hmdb")
def runEverything(self, getDatabaseFiles=True): sql = writeToSQL() stat = getStatistics() hmdb = hmdbData() wikipathways = WikipathwaysRDF() reactome = reactomeData() kegg = KeggData() # works based on your computer, setup working directory os.chdir('../main/') #kegg.getEverything(False) #print("KEGG Wonder") hmdb.getEverything(True) print("Getting wikipathways...") wikipathways.getEverything(True) print("Getting reactome...") reactome.getEverything(True) print("Getting kegg...") #Here are the identifiers that are present for each gene: #kegg: keggid (mainID), 'Ensembl', 'HGNC', 'HPRD', 'NCBI-GeneID', 'NCBI-ProteinID', 'OMIM', 'UniProt', 'Vega', 'miRBase' #wikipathways: (no mainID), 'Entrez', 'Enzyme Nomenclature', 'Uniprot (Uniprot-TrEMBL) #hmdb: HMDB-protien-accession (mainID), 'Uniprot' #reactome:Uniprot (mainID) print('Generate compound id') hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0) print("hmdbcompoundnum: ", hmdbcompoundnum) keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary, "kegg", hmdbcompoundnum) wikicompoundnum = sql.createRampCompoundID( wikipathways.metaboliteIDDictionary, "wiki", keggcompoundnum) print("wikicompoundnum: ", wikicompoundnum) reactomecompoundnum = sql.createRampCompoundID( reactome.metaboliteIDDictionary, "reactome", wikicompoundnum) print('Generate gene id ...') hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0) kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", hmdbgenenum) wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary, "wiki", kegggenenum) reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary, "reactome", wikigenenum) print(" hmdbgenenum ", hmdbgenenum, " kegggenenum ", kegggenenum, " wikigenenum ", wikigenenum, " reactomegenenum ", reactomegenenum) print('Write to sql file...') hmdbnumbers = sql.write( hmdb.metaboliteCommonName, hmdb.pathwayDictionary, hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary, hmdb.metabolitesWithSynonymsDictionary, hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary, hmdb.metabolitesLinkedToGenes, hmdb.geneInfoDictionary, hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation, hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary, hmdb.exoEndo, hmdb.tissueLocation, hmdb.tissue, hmdb.metaInchi, "hmdb", 0, 0) wikipathwaysnumbers = sql.write( wikipathways.metaboliteCommonName, wikipathways.pathwayDictionary, wikipathways.pathwayCategory, wikipathways.metabolitesWithPathwaysDictionary, wikipathways.metabolitesWithSynonymsDictionary, wikipathways.metaboliteIDDictionary, wikipathways.pathwaysWithGenesDictionary, wikipathways.metabolitesLinkedToGenes, wikipathways.geneInfoDictionary, wikipathways.biofluidLocation, wikipathways.biofluid, wikipathways.cellularLocation, wikipathways.cellular, wikipathways.pathwayOntology, wikipathways.exoEndoDictionary, wikipathways.exoEndo, wikipathways.tissueLocation, wikipathways.tissue, dict(), "wiki", hmdbnumbers[0], hmdbnumbers[1]) reactomenumbers = sql.write( reactome.metaboliteCommonName, reactome.pathwayDictionary, reactome.pathwayCategory, reactome.metabolitesWithPathwaysDictionary, reactome.metabolitesWithSynonymsDictionary, reactome.metaboliteIDDictionary, reactome.pathwaysWithGenesDictionary, reactome.metabolitesLinkedToGenes, reactome.geneInfoDictionary, reactome.biofluidLocation, reactome.biofluid, reactome.cellularLocation, reactome.cellular, reactome.pathwayOntology, reactome.exoEndoDictionary, reactome.exoEndo, reactome.tissueLocation, reactome.tissue, dict(), "reactome", wikipathwaysnumbers[0], wikipathwaysnumbers[1]) keggnumbers = sql.write( kegg.metaboliteCommonName, kegg.pathwayDictionary, kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary, kegg.metabolitesWithSynonymsDictionary, kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary, kegg.metabolitesLinkedToGenes, kegg.geneInfoDictionary, kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation, kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary, kegg.exoEndo, kegg.tissueLocation, kegg.tissue, dict(), "kegg", reactomenumbers[0], reactomenumbers[1]) print("Done ... for importing database") print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene") stat.databaseContent(hmdb.pathwayDictionary, hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary, hmdb.metabolitesWithSynonymsDictionary, hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary, hmdb.geneInfoDictionary, hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation, hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary, "hmdb") stat.databaseContent(kegg.pathwayDictionary, kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary, kegg.metabolitesWithSynonymsDictionary, kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary, kegg.geneInfoDictionary, kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation, kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary, "kegg") stat.databaseContent( reactome.pathwayDictionary, reactome.pathwayCategory, reactome.metabolitesWithPathwaysDictionary, reactome.metabolitesWithSynonymsDictionary, reactome.metaboliteIDDictionary, reactome.pathwaysWithGenesDictionary, reactome.geneInfoDictionary, reactome.biofluidLocation, reactome.biofluid, reactome.cellularLocation, reactome.cellular, reactome.pathwayOntology, reactome.exoEndoDictionary, "reactome") stat.databaseContent( wikipathways.pathwayDictionary, wikipathways.pathwayCategory, wikipathways.metabolitesWithPathwaysDictionary, wikipathways.metabolitesWithSynonymsDictionary, wikipathways.metaboliteIDDictionary, wikipathways.pathwaysWithGenesDictionary, wikipathways.geneInfoDictionary, wikipathways.biofluidLocation, wikipathways.biofluid, wikipathways.cellularLocation, wikipathways.cellular, wikipathways.pathwayOntology, wikipathways.exoEndoDictionary, "wiki")
def testMain(self): sql = writeToSQL() hmdb = hmdbData() reactome = reactomeData() hmdb.metaboliteIDDictionary["HMDB00001"] = { "chebi_id": "NA", "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "FDB012119", "knapsack_id": "NA", "chemspider_id": "83153", "kegg_id": "C14814", "biocyc_id": "CPD-1823", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "3741", "pubchem_compound_id": "92105", "het_id": "HIC", "hmdb_id": ["HMDB00001"], "CAS": "NA" } hmdb.metaboliteIDDictionary["HMDB00002"] = { "chebi_id": "NA", "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "FDB012119", "knapsack_id": "NA", "chemspider_id": "83153", "kegg_id": "C14814", "biocyc_id": "CPD-1823", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "3741", "pubchem_compound_id": "92105", "het_id": "HIC", "hmdb_id": ["HMDB00002"], "CAS": "NA" } reactome.metaboliteIDDictionary["C14814"] = { "chebi_id": ["C14814"], "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "NA", "knapsack_id": "NA", "chemspider_id": "NA", "kegg_id": "NA", "biocyc_id": "NA", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "NA", "pubchem_compound_id": "NA", "het_id": "NA", "hmdb_id": "NA", "CAS": "NA" } hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0) reactomecompoundnum = sql.createRampCompoundID( reactome.metaboliteIDDictionary, "reactome", hmdbcompoundnum) Out = open("../misc/output/TestingHMDB.txt", 'w') print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene")
def testKeggToHMDB(self): ############################################################################################### #IMPORTANT PART START hmdb = hmdbData() kegg = KeggData() sql = writeToSQL() idconvert = IDconversion() stat = getStatistics() #metabolite mapping for hmdb hmdb.metaboliteIDDictionary["HMDB00001"] = { "chebi_id": "NA", "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "FDB012119", "knapsack_id": "NA", "chemspider_id": "83153", "kegg_id": "C14814", "biocyc_id": "CPD-1823", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "3741", "pubchem_compound_id": "92105", "het_id": "HIC", "hmdb_id": ["HMDB00001"], "CAS": "NA" } #metabolite mapping for kegg kegg.metaboliteIDDictionary["C14814"] = { "chebi_id": ["34131"], "drugbank_id": "NA", "drugbank_metabolite_id": "NA", "phenol_explorer_compound_id": "NA", "phenol_explorer_metabolite_id": "NA", "foodb_id": "NA", "knapsack_id": "NA", "chemspider_id": "NA", "kegg_id": "C14814", "biocyc_id": "NA", "bigg_id": "NA", "wikipidia": "NA", "nugowiki": "NA", "metagene": "NA", "metlin_id": "NA", "pubchem_compound_id": "NA", "het_id": "NA", "hmdb_id": "NA", "CAS": "NA" } idconvert.MetaboliteKeggIDToChebi(kegg.metaboliteIDDictionary, hmdb.metaboliteIDDictionary, "hmdb") idconvert.MetaboliteChebiToHMDB(kegg.metaboliteIDDictionary, hmdb.metaboliteIDDictionary, "kegg") #IMPORTANT PART END ###################################################################################################### #Pathway names kegg.pathwayDictionary["00010"] = "Glycolysis / Gluconeogenesis" kegg.pathwayDictionary["00020"] = "Citrate cycle (TCA cycle)" kegg.pathwayDictionary["00520"] = "Fake Pathway Name One" kegg.pathwayDictionary["00524"] = "Fake Pathway Name Two" kegg.pathwayDictionary["00540"] = "Fake Pathway Name Three" kegg.pathwayDictionary["00550"] = "Fake Pathway Name Four" kegg.pathwayDictionary["00030"] = "Fake Pathway Name Five" kegg.pathwayDictionary["00040"] = "Fake Pathway Name Six" kegg.pathwayDictionary["00053"] = "Fake Pathway Name Seven" kegg.pathwayDictionary["00250"] = "Fake Pathway Name Eight" kegg.pathwayDictionary["00260"] = "Fake Pathway Name Nine" #Pathway categories kegg.pathwayCategory["00010"] = "Metabolism" kegg.pathwayCategory["00020"] = "Human Diseases" kegg.pathwayCategory["00520"] = "Cellular Processes" kegg.pathwayCategory["00524"] = "Human Diseases" kegg.pathwayCategory["00540"] = "Human Diseases" kegg.pathwayCategory["00550"] = "Metabolism" kegg.pathwayCategory["00030"] = "Human Diseases" kegg.pathwayCategory["00040"] = "Cellular Processes" kegg.pathwayCategory["00053"] = "Metabolism" kegg.pathwayCategory["00250"] = "Human Diseases" kegg.pathwayCategory["00260"] = "Cellular Processes" #metabolites linked with pathways kegg.metabolitesWithPathwaysDictionary["C14814"] = ["00020"] #metabolites linkes with synonyms kegg.metabolitesWithSynonymsDictionary["C14814"] = [ "MetaboliteSynonym1" ] #pathway to gene id kegg.pathwaysWithGenesDictionary["00010"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00020"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00520"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00524"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00540"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00550"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00030"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00040"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00053"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00250"] = ["geneA", "geneB"] kegg.pathwaysWithGenesDictionary["00260"] = ["geneA", "geneB"] #gene to geneinfo kegg.geneInfoDictionary["geneA"] = { 'common_name': 'Apple', 'kegg': 'NA', 'Ensembl': 'ENSG00000127481', 'HGNC': '30313', 'HPRD': 'NA', 'NCBI-GeneID': '23352', 'NCBI-ProteinID': 'NP_065816', 'OMIM': '609890', 'UniProt': 'Q5T4S7', 'Vega': 'OTTHUMG00000002498', 'miRBase': 'NA', 'HMDB_protien_accession': 'NA', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } kegg.geneInfoDictionary["geneB"] = { 'common_name': 'Banana', 'kegg': 'NA', 'Ensembl': 'ENSG00000100320', 'HGNC': '9906', 'HPRD': 'NA', 'NCBI-GeneID': '23543', 'NCBI-ProteinID': 'NP_065816', 'OMIM': '612149', 'UniProt': 'O43251', 'Vega': 'OTTHUMG00000150585', 'miRBase': 'NA', 'HMDB_protien_accession': 'NA', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } hmdb.metabolitesWithSynonymsDictionary["HMDB00001"] = [ "1 Methylhistidine", "1-Methyl-L-histidine", "Pi-methylhistidine" ] hmdb.metabolitesWithPathwaysDictionary["HMDB00001"] = [ "SMP00716", "SMP00006" ] hmdb.pathwayDictionary["SMP00716"] = "Thyroid hormone synthesis" hmdb.pathwayDictionary["SMP00006"] = "Tyrosine Metabolism" hmdb.pathwayDictionary["SMP00001"] = "Pathway1" hmdb.pathwayDictionary["SMP00002"] = "Pathway2" hmdb.pathwayDictionary["SMP00816"] = "Pathway3" hmdb.pathwayCategory["SMP00716"] = "NA" hmdb.pathwayCategory["SMP00006"] = "NA" hmdb.pathwayCategory["SMP00001"] = "NA" hmdb.pathwayCategory["SMP00002"] = "NA" hmdb.pathwayCategory["SMP00816"] = "NA" hmdb.pathwaysWithGenesDictionary["SMP00716"] = ["Q96KN2", "uniprot1"] hmdb.pathwaysWithGenesDictionary["SMP00006"] = ["Q96KN2", "uniprot1"] hmdb.pathwaysWithGenesDictionary["SMP00001"] = ["Q96KN2", "uniprot1"] hmdb.pathwaysWithGenesDictionary["SMP00002"] = ["Q96KN2", "uniprot1"] hmdb.pathwaysWithGenesDictionary["SMP00816"] = ["Q96KN2", "uniprot1"] hmdb.geneInfoDictionary["Q96KN2"] = { 'common_name': 'CNDP1', 'kegg': 'NA', 'Ensembl': 'NA', 'HGNC': 'NA', 'HPRD': 'NA', 'NCBI-GeneID': 'NA', 'NCBI-ProteinID': 'NA', 'OMIM': 'NA', 'UniProt': 'Q96KN2', 'Vega': 'NA', 'miRBase': 'NA', 'HMDB_protien_accession': 'HMDBP00473', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } hmdb.geneInfoDictionary["uniprot1"] = { 'common_name': 'genename1', 'kegg': 'NA', 'Ensembl': 'NA', 'HGNC': 'NA', 'HPRD': 'NA', 'NCBI-GeneID': 'NA', 'NCBI-ProteinID': 'NA', 'OMIM': 'NA', 'UniProt': 'uniprot1', 'Vega': 'NA', 'miRBase': 'NA', 'HMDB_protien_accession': 'HMDBP00321', 'Entrez': 'NA', 'Enzyme Nomenclature': 'NA' } hmdb.biofluidLocation["HMDB00001"] = [ "Blood", "Cerebrospinal Fluid (CSF)", "Feces", "Saliva", "Urine" ] hmdb.exoEndoDictionary["HMDB00001"] = ["Food"] hmdb.biofluid["Blood"] = "placeholder" hmdb.biofluid["Cerebrospinal Fluid (CSF)"] = "placeholder" hmdb.biofluid["Feces"] = "placeholder" hmdb.biofluid["Saliva"] = "placeholder" hmdb.biofluid["Urine"] = "placeholder" hmdb.cellularLocation["HMDB00001"] = ["Cytoplasm", "Location1"] hmdb.cellular["Cytoplasm"] = "placeholder" hmdb.cellular["Location1"] = "placeholder" hmdb.cellular["Location2"] = "placeholder" hmdb.cellular["Location3"] = "placeholder" sql.write(kegg.pathwayDictionary, kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary, kegg.metabolitesWithSynonymsDictionary, kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary, kegg.geneInfoDictionary, kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation, kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary, "kegg", 0, 0) sql.write(hmdb.pathwayDictionary, hmdb.pathwayCategory, hmdb.metabolitesWithPathwaysDictionary, hmdb.metabolitesWithSynonymsDictionary, hmdb.metaboliteIDDictionary, hmdb.pathwaysWithGenesDictionary, hmdb.geneInfoDictionary, hmdb.biofluidLocation, hmdb.biofluid, hmdb.cellularLocation, hmdb.cellular, hmdb.pathwayOntology, hmdb.exoEndoDictionary, "hmdb", 0, 0) stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases) stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases)
def testMain(self): kegg = KeggData() sql = writeToSQL() idconvert = IDconversion() stat = getStatistics() # get database file kegg.getDatabaseFiles() #kegg.getDatabaseFiles2() print('get pathways') kegg.getPathways() kegg.getPathways_with_genes() print(len(kegg.pathwayDictionary)) print('get metabolites') kegg.getMetabolites() print('get synonyms and chebi') kegg.getSynonymsAndCHEBI() print(len(kegg.metaboliteIDDictionary)) print(kegg.metaboliteIDDictionary["C00002"]) print(kegg.metaboliteIDDictionary["C00001"]) ''' file = open("../misc/output/keggMetabolitesID.txt","wb") for key in kegg.metaboliteIDDictionary: file.write(key.encode("utf-8") +b"\n") file.close() ''' print('get genes') kegg.getGenes() print(len(kegg.geneInfoDictionary)) kegg.getGeneInfo() kegg.getPathwayLinkedToGene() file = open("../misc/output/keggGenesID.txt", "wb") for key in kegg.geneInfoDictionary: file.write(key.encode("utf-8") + b"\n") file.close() idconvert.GeneConvert(kegg.geneInfoDictionary, "kegg") keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary, "kegg", 0) kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", 0) # Check duplicates sql.checkForWithinDatabaseDuplicatesCompound( kegg.metaboliteIDDictionary, "kegg") sql.checkForWithinDatabaseDuplicatesGene(kegg.geneInfoDictionary, "kegg") # create ramp id keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary, "kegg", 0) kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", 0) keggnumbers = sql.write( kegg.metaboliteCommonName, kegg.pathwayDictionary, kegg.pathwayCategory, kegg.metabolitesWithPathwaysDictionary, kegg.metabolitesWithSynonymsDictionary, kegg.metaboliteIDDictionary, kegg.pathwaysWithGenesDictionary, kegg.metabolitesLinkedToGenes, kegg.geneInfoDictionary, kegg.biofluidLocation, kegg.biofluid, kegg.cellularLocation, kegg.cellular, kegg.pathwayOntology, kegg.exoEndoDictionary, kegg.exoEndo, kegg.tissueLocation, kegg.tissue, "kegg", 0, 0) print('metaboliteIDdict number is ' + str(len(kegg.metaboliteIDDictionary))) print('GeneInfo number is ' + str(len(kegg.geneInfoDictionary))) print('PathwayDict number is ' + str(len(kegg.pathwayDictionary))) print('MetabolitesWithPath is ' + str(len(kegg.metabolitesWithPathwaysDictionary))) print("Compound:") stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound") print("\n") print("Gene:") stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene")
def testMain(self): wp = WikipathwaysRDF() sql = writeToSQL() wp.getEverything(True) '''