def print_alignment_kegg(model): f = open("cor.txt") f_o = open("cor_readable.txt", "w") kegg = KEGG() for i in f: if ":***:" in i: k, b = i.split(":***:") b = b.strip() if not k == "MULTIR": k = kegg.get(k) i1 = k.find("NAME") + 4 i2 = k[i1:].find("\n") k = k[i1:i1 + i2].strip() if not b == "MULTIR": b = model.reactions[b] print(k, ":***:", b) f_o.write(k + ":***:" + b + "\n") f.close() f_o.close()
def pathwayInfo(code): # Function to get info about a pathway, from the code # Intialize searcher kSearcher = KEGG() # Get result and parse it in a dictionnary result = kSearcher.get(code) # Add code at the begining of the list dictResult = kSearcher.parse(result) # Initialize an empty list pathwayList = [] # If name exist as a key in dictionnary, else 'NA' insted pathwayList.append(code) if 'NAME' in dictResult.keys(): # If pathway name is a string comma separated, replace comma by semicolon # Fix to avoid wrong column formating at the end of the script nameStr = str(dictResult['NAME'][0].replace(',', ';')) pathwayList.append(nameStr) else: pathwayList.append('NA') # If class exist as a key in dictionnary, else 'NA' instead if 'CLASS' in dictResult.keys(): # If pathway name is a string comma separated, replace comma by semicolon # Fix to avoid wrong column formating at the end of the script classStr = str(dictResult['CLASS']).replace(',', ';') pathwayList.append(classStr) else: pathwayList.append('NA') return pathwayList
def extract_sequences(dict, flist): ''' Get orthologs sequences on KEGG and write to a fasta file for each kegg id arg: dictionnary with keggid as key and orthologs as value (list) ''' k = KEGG() ocount = {} #loop through orthologs dictionnary to get sequences from kegg for key, list in dict.items(): #print(key) if (key + ".fas") in flist: print(key + " is already created !!!") continue #create string with sequences to write fasta file for each genes string = "" for x in range(0, len(list)): for i in range(0, len(list[x])): data_seq = k.get(list[x][i], option="ntseq", parse=True) string = string + data_seq + "\n" #print(data_seq) print("writing : " + key + ".fas") #write file with open(os.path.join('orthologs_fastas/', key + '.fas'), 'w') as f: read_data = f.write(string) f.closed
def get_genes_from_kegg_pathway(pathway): from bioservices.kegg import KEGG k = KEGG() k.organism = 'hsa' pathway = k.get(pathway) genes = k.parse(pathway)['GENE'] entrez, symbol = zip(*[i.split(' ') for i in genes]) return symbol
def get_kegg_info(stId): """ Get kegg dict by pathway id. """ k = KEGG() data = k.get(stId) dict_data = k.parse(data) return dict_data
def retrieve_kegg_formula(reactome_compound_name): k = KEGG() compound_name = reactome_compound_name.replace('COMPOUND', 'cpd') res = k.get(compound_name).split('\n') for line in res: if line.startswith('FORMULA'): formula = line.split()[1] # get the second token return formula return None
def get_single_compound_metadata_online(compound_id): if compound_id.upper().startswith('C'): s = KEGG() res = s.get(compound_id) return s.parse(res) else: ch = ChEBI() res = ch.getCompleteEntity('CHEBI:'+compound_id) return res
def extract_orthologs(filename): ''' Create dictionnary with keggid as key and list of orthologs as value arg: csv with keggids return : dict with orthologs ''' orthos_dict = {} k = KEGG() #get list of gammaproteobacteria from csv df = pd.read_csv(filename, sep="\t", tupleize_cols=1) df_gamma = pd.read_csv('gammaproteo.csv', sep="\t", tupleize_cols=1) gamma_list = df_gamma['KEGG'].tolist() #loop through keggid to get orthologs for keggid in df['kegg_id']: if keggid == "no": continue print(str(keggid)) ortho_list = [] #get orthologs on kegg data = k.get(keggid) dict_data = k.parse(data) if isinstance(dict_data, int): continue #loop through kegg orthologs data and verify that organisms are gammaproteobacteria for key, value in dict_data['GENES'].items(): if key.lower() in gamma_list: # print(key.lower(), value.split('(')[0].split()) para_num = len(value.split('(')[0].split()) para_list = [] for i in range(0, para_num): #print(value.split('(')[0].split()[i]) para_list.append(key.lower() + ":" + value.split('(')[0].split()[i]) ortho_list.append(para_list) orthos_dict[keggid] = ortho_list return orthos_dict
def id2seq(self, hsa): s = KEGG() d = s.get(hsa) dict_d = s.parse(d) pattern = re.compile(r'\s+') try: seq = re.sub(pattern, '', dict_d['AASEQ']) except: seq = '' #print('SEQ:', seq) text_file = open("dummy.txt", "w") text_file.write('>' + str(hsa) + '\n' + seq) text_file.close() return None
def get_metabs(KEGG, reac_id): subs_list = [] prod_list = [] # Get reaction data from KEGG using a KEGG reaction ID r_data = KEGG.get(reac_id) # Parse the information retrieved r_parsed = KEGG.parse(r_data) # Split the equation into substrates and products split_eq = re.split('<=>', r_parsed['EQUATION']) # Remove the plus signs between the metabolites subs_list = [s.strip() for s in split_eq[0].split('+')] prod_list = [p.strip() for p in split_eq[1].split('+')] return [subs_list, prod_list]
def get_compound_metadata_online(kegg_ids): s = KEGG() metadata_map = {} for i in range(len(kegg_ids)): try: if i % 10 == 0: print("Retrieving %d/%d KEGG records" % (i, len(kegg_ids))) kegg_id = kegg_ids[i] res = s.get(kegg_id) d = s.parse(res) first_name = d['NAME'][0] first_name = first_name.replace(';', '') # strip last ';' character metadata_map[kegg_id] = {'display_name': first_name} except TypeError: print('kegg_id=%s parsed_data=%s' % (kegg_id, d)) return metadata_map
def get_seq(filename): ''' Create dictionnary with species as keys and sequences as values for an alignment arg: filename with gene name return: organism dictionnary with sequences ''' k = KEGG() records = list(SeqIO.parse(os.path.join('alignments_nogaps/',filename), "fasta")) idlist = [] orglist = [] seqlist = [] orgdict = {} #go through sequences and search for organism name on kegg for record in records: idsplit = (record.id).split('_',1) id = idsplit[0] + ':' + idsplit[1] handle = k.get(id) if isinstance( handle, int ): print(id) continue org = k.parse(handle)['ORGANISM'] org = org.split() org = org[1] +" "+ org[2] seqlist.append(list(str(record.seq))) orglist.append(org) idlist.append(id) duplist = set(orglist) # create dict with organism as key and sequences for organism as values for org in duplist: indices = [i for i, x in enumerate(orglist) if x == org] seqs = [] for e in indices: seqs.append(seqlist[e]) orgdict[org] = seqs #print(orgdict) return orgdict
def queryKegg(theIDs): print("Currently querying KEGG...") k = KEGG() keggData = list() IDlist = list() for id in theIDs: ids = id[3:] query = k.find("acb", ids) query = query.split('\t') finalQuery = query[0] data = k.get(finalQuery) dictData = k.parse(data) keggData.append(dictData) IDlist.append(ids) return keggData, IDlist
def main(): # Start KEGG interface k = KEGG() # Create a dict to store final result data = dict() # Read in KEGG gene ID & gene symbol pairs with open("hsa_gene_list.json", "r") as g: gene_data = json.load(g) for gene in gene_data.keys(): print gene g_data = k.get(gene) g_prsd = k.parse(g_data) data[gene] = g_prsd with open('ginfo.json', 'w') as fw: json.dump(data, fw)
def get_reaction_ECs_from_kegg(self): self.reaction_ECs = defaultdict(set) kegg = KEGG() for r in self.model.reactions: ECs = [] try: reacts = r.split(" ") for i in reacts: if i not in self.reaction_ECs: print("KEGG reaction", i) ECs += kegg.parse(kegg.get(i))['ENZYME'] for e in ECs: self.reaction_ECs[i].add(e) except Exception as inst: print(inst) #for e in ECs: # self.reaction_ECs[r].add(e) print("EC data loaded from KEGG")
def enzymeInfo(code, ignored, stats, verbosity): # Function to get info about an enzyme, from the code # This function return a double list # Intialize KEGG searcher kSearch = KEGG(verbose=verbosity) # Get result and parse it in a dictionnary print(f"[+] Get info about enzyme {code}") result = kSearch.get(code) # If KEGG return an int, the enzyme code doesn't match in databases if type(result) is int: return False else: dictResult = kSearch.parse(result) # Create prefix list, info about enzyme herself prefixList = [] # Add code at the begining of the list prefixList.append(code) # If name is present as key, else 'NA' insted if 'NAME' in dictResult.keys(): #prefixList.append(dictResult['NAME']) # Convert names from list into a string # with strop '[]' part, and replace initial separator , by ; namesStr = str(dictResult['NAME']).strip("'[]'").replace(',', ';') prefixList.append(namesStr) else: prefixList.append('NA') # If definition is present as key, else 'NA' insted if 'DEFINITION' in dictResult.keys(): # If definition is a string comma separated, replace comma by semicolon # Fix to avoid wrong column formating at the end of the script definitionStr = str(dictResult['DEFINITION']).replace(',', ';') prefixList.append(definitionStr) else: prefixList.append('NA') #Â If pathway exist as a key in result if 'PATHWAY' in dictResult.keys(): # Get all pathways as keys in dictionnary pathwayList = list(dictResult['PATHWAY'].keys()) # Create final list, which contain : # - prefix (info about enzyme) # - suffix list (info about each enzyme's pathways) finalList = [] # Add suffix in final list finalList.append(prefixList) for pathway in pathwayList: # If pathway not in ignored list if pathway not in ignored: print(f" [-] Get info about {pathway} pathway") suffixList = pathwayInfo(pathway) # Add number of pathway for stats stats['NB_PATHWAY'] = stats['NB_PATHWAY'] + 1 # Add suffix of pathway in final list finalList.append(suffixList) # If enzyme have only 1 pathway and this pathway is in ignored list # Bad luck ! elif len(pathwayList) == 1 and pathway in ignored: print( f" [!] Enzyme {code} have only 1 pathway : {pathway}") print(f" [!] and this pathway is ignored") # Add entries for stats stats['ENZYME_ONLY_IGNORED_PATHWAY'] = stats[ 'ENZYME_ONLY_IGNORED_PATHWAY'] + 1 stats['LIST_ENZYME_ONLY_IGNORED_PATHWAY'].append(code) # Artificially create pathway entry, but empty suffixList = ['NA'] # Add suffix of pathway in final list finalList.append(suffixList) else: print(f" [!] Ignored pathway : {pathway}") # Else, if pathway doesn't exist as a key in result elif 'PATHWAY' not in dictResult.keys(): # Initialize an empty list finalList = [] # Add suffix in final list finalList.append(prefixList) #Â Display a alert message print(f"[!] No pathway detected for enzyme {code}\n") # Increment number of failed pathway in stats and add enzyme in list stats['MISSING_PATHWAY_IN_KEGG'] = stats[ 'MISSING_PATHWAY_IN_KEGG'] + 1 stats['LIST_MISSING_PATHWAY_IN_KEGG'].append(code) # Artificially create pathway entry, but empty suffixList = ['NA'] # Add suffix of pathway in final list finalList.append(suffixList) return finalList
enzymes = ReadFile(args.include) else: log("Fetch enzymes from kegg") enzymes = p.enzymeIds log("%s enzymes fetched" % len(enzymes)) ecs = {} if args.outfile: hout = open(args.outfile, 'w') else: hout = sys.stdout houtcsv = csv.writer(hout, delimiter='\t') log("Fetch enzymes from kegg") a = k.get(' '.join(enzymes)) log("Fetch enzymes from kegg") for ec in enzymes: ec = ec.replace("ec:", "") if ec in exclude: continue l = [] try: log("Fecthing %s from kegg" % ec) result = k.get(ec) except urllib2.HTTPError: continue parsed = p.parse(result) # # Check if the enzyme is obsolete if "Obsolete" in parsed["entry"]: continue
t_feat = DataFrame(t_fa.transform(trans_n.T), index=trans_n.columns, columns=['Factor %d' % (i + 1) for i in range(3)]) print t_feat['Factor 2'].sort_values() sns.set(style='ticks', context='paper', rc={'axes.linewidth': .3, 'xtick.major.width': .3, 'ytick.major.width': .3}) g = sns.pairplot(t_hfac, hue='type', palette=pal) plt.savefig('%s/reports/transcriptomics_pairplot.pdf' % wd, bbox_inches='tight') plt.close('all') print '[INFO] Corr plotted!' # -- Bioservices KEGG infomration bioser = KEGG(cache=True) bioser.organism = 'hsa' # Get pathways keggp = {p: bioser.get(p) for p in bioser.pathwayIds} print '[INFO] Pathways fetched' keggp_name = {p: re.findall('NAME\s+(.*)?\n', keggp[p])[0].split(' - ')[0] for p in keggp} keggp_comp = {p: {c for keggc in re.findall('(COMPOUND.*?)\n[A-Z]', keggp[p], re.S)[0].split('\n') for c in re.findall('\s+(C[0-9]+)\s+', keggc)} for p in keggp if 'COMPOUND' in keggp[p]} keggp_gene = {p: {g for keggg in re.findall('(GENE.*?)\n[A-Z]', keggp[p], re.S)[0].split('\n') for g in re.findall('\s+([A-Z]+.+);', keggg)} for p in keggp if 'GENE' in keggp[p]} keggp_tf = {p: {tf for tf in tf_targets_dict if len(tf_targets_dict[tf].intersection(keggp_gene[p])) > 0} for p in keggp_gene} print '[INFO] Pathways genes fetched' keggp_comp_m = DataFrame([(p, m, 1) for p in keggp_comp for m in keggp_comp[p]], columns=['pathway', 'metabolite', 'value']) keggp_comp_m = pivot_table(keggp_comp_m, index='pathway', columns='metabolite', values='value', fill_value=0) print keggp_comp_m.head keggp_gene_m = DataFrame([(p, m, 1) for p in keggp_comp for m in keggp_gene[p]], columns=['pathway', 'gene', 'value']) keggp_gene_m = pivot_table(keggp_gene_m, index='pathway', columns='gene', values='value', fill_value=0) print keggp_gene_m.head()
from bioservices.kegg import KEGG output = open('eclist.txt', 'w') kegg = KEGG() pathway = kegg.get('ath00900') dict_data = kegg.parse(pathway) # print(dict_data) # g = x.get('tbr03440:Tb11.01.0910/aaseq') # print(g) # res = x.parse_kgml_pathway("tbr03440") # print(res['entries'][0]) # for key, value in dict_data['GENE'].items(): # print(key, value) # for gene in dict_data['GENE']: # output.write(gene + '\n') for value in dict_data['GENE'].values(): EC = value.split('[EC:')[1] EC = EC.split(']')[0] EC = EC.replace(' ', '\n') output.write(EC + '\n')
from bioservices.kegg import KEGG kegg = KEGG() pathway = kegg.get("ko01230") dict_data = kegg.parse(pathway) print(dict_data) output = open("modules.txt", "w") modules_dict = {} for key in dict_data['MODULE'].keys(): pathway = kegg.get(key) module_data = kegg.parse(pathway) #print(module_data) orthologs = [] for ortholog in module_data['ORTHOLOGY'].keys(): data = [ortholog, module_data['ORTHOLOGY'][ortholog]] orthologs.append("_".join(data)) modules_dict[ortholog] = key output.write('{}\t{}\t{}\t{}\n'.format(key, module_data['NAME'], module_data['DEFINITION'], "//".join(orthologs)))
def mapSpecies(mousepeptrackfilename): RETRY_TIME = 20.0 mouseTohumanfilepath = os.path.join(os.getcwd(), 'MouseToHuman.tsv') print("Extracting Mouse to Human Map data, job starts", str(datetime.datetime.now())) #increase the field size of CSV csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2)) try: urllib.urlretrieve( 'http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt', mouseTohumanfilepath) urllib.urlcleanup() except: print("Can't able to download MouseToHuman.tsv file!!") colnameMousHu = [ 'HomoloGene ID', 'Common Organism Name', 'NCBI Taxon ID', 'Symbol', 'EntrezGene ID', 'Mouse MGI ID', 'HGNC ID', 'OMIM Gene ID', 'Genetic Location', 'Genomic Coordinates (mouse: , human: )', 'Nucleotide RefSeq IDs', 'Protein RefSeq IDs', 'SWISS_PROT IDs' ] mouseHumandata = [] homologID = [] with open(mouseTohumanfilepath) as mhtsvfile: mhreader = csv.DictReader(mhtsvfile, delimiter='\t') for mhrow in mhreader: mhtemplist = [] for i in colnameMousHu: mhtempdata = str(mhrow[i]).strip() mhtemplist.append(mhtempdata) if len(mhtemplist[-1].strip()) > 0: homologID.append(mhtemplist[0]) mouseHumandata.append(mhtemplist) homologID = list(set(homologID)) homologID.sort() mousehumandic = {} for homologidItem in homologID: tempHumanHomoUniID = '' tempMouseHomoUniID = '' for item in mouseHumandata: if homologidItem == item[0]: if 'mouse' in item[1].strip().lower(): tempMouseHomoUniID = item[-1].strip() else: tempHumanHomoUniID = item[-1].strip() if len(tempMouseHomoUniID.strip()) > 0 and len( tempHumanHomoUniID.strip()) > 0 and tempHumanHomoUniID.strip( ).upper() != 'NA': mousehumandic[tempMouseHomoUniID] = tempHumanHomoUniID colname=['UniProtKB Accession','Protein','Gene','Organism','Peptide Sequence','Summary Concentration Range Data','All Concentration Range Data','All Concentration Range Data-Sample LLOQ Based','Peptide ID',\ 'Special Residues','Molecular Weight','GRAVY Score','Transitions','Retention Time','Analytical inofrmation',\ 'Gradients','AAA Concentration','CZE Purity','Panel','Knockout','LLOQ','ULOQ','Sample LLOQ','Protocol','Trypsin','QC. Conc. Data','Human UniProtKB Accession'] finalresult = [] finalresult.append(colname) humanUniprotID = [] with open(mousepeptrackfilename) as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: templist = [] for i in colname[:-1]: tempdata = str(row[i]).strip() templist.append(tempdata) if len(str(templist[0]).strip()) > 0: if templist[0].split('-')[0] in mousehumandic: humanUniprotID.append( mousehumandic[templist[0].split('-')[0]]) templist.append(mousehumandic[templist[0].split('-')[0]]) else: templist.append('NA') finalresult.append(templist) with open(mousepeptrackfilename, 'wb') as pf: pwriter = csv.writer(pf, delimiter='\t') pwriter.writerows(finalresult) disGenDataDicName = disGenData() #disGenDataDicName='disGen.obj' disGenDataDic = cPickle.load(open(disGenDataDicName, 'rb')) unqhumanUniprotID = list(set(humanUniprotID)) humanUniprotfuncinfodic = {} countProt = 0 for subcode in unqhumanUniprotID: time.sleep(2) drugbanklist = [] PN = 'NA' GN = 'NA' OG = 'NA' OGID = 'NA' dislist = [] unidislist = [] unidisURLlist = [] disgendislist = [] disgendisURLlist = [] GoIDList = [] GoNamList = [] GoTermList = [] GOinfo = [] try: countProt += 1 if countProt % 1000 == 0: print str( countProt ), "th protein Protein Name, Gene, Organism Name,drug bank data,disease data job starts", str( datetime.datetime.now()) SGrequestURL = "https://www.uniprot.org/uniprot/" + str( subcode) + ".xml" SGunifile = urllib.urlopen(SGrequestURL) SGunidata = SGunifile.read() SGunifile.close() try: SGunidata = minidom.parseString(SGunidata) try: drugdata = (SGunidata.getElementsByTagName('dbReference')) for duItem in drugdata: if (duItem.attributes['type'].value ).upper() == 'DRUGBANK': try: drugname = (str( duItem.getElementsByTagName('property') [0].attributes['value'].value).strip()) drugid = str( duItem.attributes['id'].value).strip() durl = '<a target="_blank" href="https://www.drugbank.ca/drugs/' + drugid + '">' + drugname + '</a>' drugbanklist.append(durl) except: pass if (duItem.attributes['type'].value ).strip() == 'NCBI Taxonomy': try: OGID = str( duItem.attributes['id'].value).strip() except: pass except IndexError: pass try: godata = (SGunidata.getElementsByTagName('dbReference')) for gItem in godata: if (gItem.attributes['type'].value).upper() == 'GO': try: gonamedetails = (str( gItem.getElementsByTagName('property') [0].attributes['value'].value).strip() ).split(':')[1] gotermdetails = (str( gItem.getElementsByTagName('property') [0].attributes['value'].value).strip() ).split(':')[0] GoNamList.append(gonamedetails) goid = str( gItem.attributes['id'].value).strip() GoIDList.append(goid) tempGoTerm = None if gotermdetails.lower() == 'p': tempGoTerm = 'Biological Process' if gotermdetails.lower() == 'f': tempGoTerm = 'Molecular Function' if gotermdetails.lower() == 'c': tempGoTerm = 'Cellular Component' GoTermList.append(tempGoTerm) tempGOData = gonamedetails + ';' + goid + ';' + tempGoTerm GOinfo.append(tempGOData) except: pass if (gItem.attributes['type'].value ).strip() == 'NCBI Taxonomy': try: OGID = str( gItem.attributes['id'].value).strip() except: pass except IndexError: pass try: try: PN = (((SGunidata.getElementsByTagName('protein')[0] ).getElementsByTagName('recommendedName')[0] ).getElementsByTagName('fullName')[0] ).firstChild.nodeValue except: PN = (((SGunidata.getElementsByTagName('protein')[0] ).getElementsByTagName('submittedName')[0] ).getElementsByTagName('fullName')[0] ).firstChild.nodeValue except IndexError: pass try: try: GN = (( SGunidata.getElementsByTagName('gene')[0] ).getElementsByTagName('name')[0]).firstChild.nodeValue except: GN = 'NA' except IndexError: pass try: try: OG = (( SGunidata.getElementsByTagName('organism')[0] ).getElementsByTagName('name')[0]).firstChild.nodeValue except: OG = 'NA' except IndexError: pass try: disdata = SGunidata.getElementsByTagName('disease') for dItem in disdata: disname = '' disshort = '' disURL = '' disID = '' try: disname = (dItem.getElementsByTagName('name')[0] ).firstChild.nodeValue disID = (dItem.attributes['id'].value).upper() except: pass try: disshort = (dItem.getElementsByTagName('acronym') [0]).firstChild.nodeValue except: pass if len(disname.strip()) > 0: disURL = '<a target="_blank" href="https://www.uniprot.org/diseases/' + disID + '">' + str( disname.strip()) + '(' + str( disshort) + ')' + '</a>' dislist.append( str(disname.strip()) + '(' + str(disshort) + ')') unidislist.append( str(disname.strip()) + '(' + str(disshort) + ')') unidisURLlist.append(disURL) except IndexError: pass except ExpatError: pass except IOError: pass drugbankdata = 'NA' disdata = 'NA' uniDisData = 'NA' uniDisURLData = 'NA' disgenDisData = 'NA' disgenDisURLData = 'NA' goiddata = 'NA' gonamedata = 'NA' gotermdata = 'NA' goData = 'NA' if GN != 'NA' and GN in disGenDataDic: disgendislist = disGenDataDic[GN][0] disgendisURLlist = disGenDataDic[GN][1] if len(dislist) > 0: dislist = dislist + disGenDataDic[GN][0] else: dislist = disGenDataDic[GN][0] if len(GoIDList) > 0: goiddata = '|'.join(list(set(GoIDList))) if len(GoNamList) > 0: gonamedata = '|'.join(list(set(GoNamList))) if len(GoTermList) > 0: gotermdata = '|'.join(list(set(GoTermList))) if len(GOinfo) > 0: goData = '|'.join(list(set(GOinfo))) if len(drugbanklist) > 0: drugbankdata = '|'.join(list(set(drugbanklist))) if len(dislist) > 0: disdata = '|'.join(list(set(dislist))) if len(unidislist) > 0: uniDisData = '|'.join(list(set(unidislist))) if len(unidisURLlist) > 0: uniDisURLData = '|'.join(list(set(unidisURLlist))) if len(disgendislist) > 0: disgenDisData = '|'.join(list(set(disgendislist))) if len(disgendisURLlist) > 0: disgenDisURLData = '|'.join(list(set(disgendisURLlist))) humanUniprotfuncinfodic[subcode] = [ PN, GN, OG, OGID, disdata, uniDisData, uniDisURLData, disgenDisData, disgenDisURLData, drugbankdata, goiddata, gonamedata, gotermdata, goData ] hudicfile = 'humanUniprotfuncinfodic.obj' hudicf = open(hudicfile, 'wb') pickle.dump(humanUniprotfuncinfodic, hudicf, pickle.HIGHEST_PROTOCOL) hudicf.close() print("Extracting KEGG pathway name, job starts", str(datetime.datetime.now())) hkeggdictfile = {} huniproturl = 'https://www.uniprot.org/uploadlists/' hk = KEGG() for hkx in range(0, len(unqhumanUniprotID), 2000): countProt += hkx + 2000 if countProt % 2000 == 0: print(str(countProt), "th protein kegg job starts", str(datetime.datetime.now())) huniprotcodes = ' '.join(unqhumanUniprotID[hkx:hkx + 2000]) huniprotparams = { 'from': 'ACC', 'to': 'KEGG_ID', 'format': 'tab', 'query': huniprotcodes } while True: try: hkuniprotdata = urllib.urlencode(huniprotparams) hkuniprotrequest = urllib2.Request(huniproturl, hkuniprotdata) hkuniprotresponse = urllib2.urlopen(hkuniprotrequest) for hkuniprotline in hkuniprotresponse: hkudata = hkuniprotline.strip() if not hkudata.startswith("From"): hkuinfo = hkudata.split("\t") if len(hkuinfo[1].strip()): hkegg = hk.get(hkuinfo[1].strip()) hkudict_data = hk.parse(hkegg) try: try: if len(str(hkuinfo[0]).strip()) > 5: tempkeggData = '|'.join( '{};{}'.format(key, value) for key, value in hkudict_data['PATHWAY'].items()) hkeggdictfile[hkuinfo[0].strip()] = [ hkudict_data['PATHWAY'].values(), tempkeggData ] except TypeError: pass except KeyError: pass break except urllib2.HTTPError: time.sleep(RETRY_TIME) print( 'Hey, I am trying again until succeeds to get data from KEGG!', str(datetime.datetime.now())) pass hkdicfile = 'humankeggdic.obj' hkdicf = open(hkdicfile, 'wb') pickle.dump(hkeggdictfile, hkdicf, pickle.HIGHEST_PROTOCOL) hkdicf.close()
from bioservices.kegg import KEGG k = KEGG() path = k.get("K00855") kdict = k.parse(path) print(kdict) help(kdict) with open("play.out", "wt") as result: result.write("\n".join(kdict.keys()))
def get_kegg_info(stId): k = KEGG() data = k.get(stId) dict_data = k.parse(data) return dict_data
import re from bioservices.kegg import KEGG # -- KEGG bioservice bioser = KEGG(cache=True) bioser.organism = 'hsa' # Get pathways keggp = {p: bioser.get(p) for p in bioser.pathwayIds} print '[INFO] Pathways fetched' # Get reactions keggr = {r: bioser.get(r) for r in bioser.reactionIds} print '[INFO] Reactions fetched' # Get enzymes kegge = {e: bioser.get(e) for e in bioser.enzymeIds} print '[INFO] Enzymes fetched' # keggc = {c: bioser.get(c) for c in bioser.compoundIds} # print '[INFO] Compounds fetched' # # # Get modules # keggm = {m: bioser.get(m) for m in bioser.moduleIds} # print '[INFO] Modules fetched' # -- KEGG methods def get_pathway_names(pathways=None): pathways_ = pathways if pathways else set(keggp) return {p: re.findall('NAME\s+(.*)?\n', keggp[p])[0].split(' - ')[0] for p in pathways_}
def get_pathways_from_KEGG(model, update_existing=False): """ This function extracts pathway and subsystem information from KEGG by using the KEGG annotation of each reaction. The pathways we use are the ones given here: https://www.genome.jp/kegg/pathway.html, under heading 1.: Metabolism. However we don't use the *1.0 Global and overview maps* or *1.12 Chemical structure and transformation maps*, because they don't represent metabolic subsystems. What we here refer to as *subsustems* are the subheadings under Metabolism, i.e.: - Carbohydrate metabolism - Energy metabolism - Lipid metabolism - Nucleotide metabolism - Amino acid metabolism - Metabolism of other amino acids - Glycan biosynthesis and metabolism - Metabolism of cofactors and vitamins - Metabolism of terpenoids and polyketides - Biosynthesis of other secondary metabolites - Xenobiotics biodegradation and metabolism """ from bioservices.kegg import KEGG kegg = KEGG() kegg_dict, kegg_overview_maps = _get_KEGG_pathways() inverse_pathway_dict = _get_inverse_pathway_dict(kegg_dict) for reaction in model.reactions: # Skip reactions which already have an kegg.pathway annoatation # if update_existing = False if not update_existing: try: reaction.annotation["kegg.pathway"] except KeyError: pass else: # Skip this one continue try: kegg_id = reaction.annotation["kegg.reaction"] except KeyError: continue kegg_info = kegg.get(kegg_id, parse=True) try: full_kegg_pathways = kegg_info["PATHWAY"].values() except: continue kegg_pathways = [ x for x in full_kegg_pathways if not x in kegg_overview_maps ] try: subsystem = list( set([inverse_pathway_dict[x] for x in kegg_pathways])) except: print("Error!: ", reaction.id, kegg_pathways) continue print("KEGG Subsystem ", reaction.id, subsystem) reaction.annotation["kegg.pathway"] = kegg_pathways reaction.annotation["kegg.subsystem"] = subsystem return model
def mapSpecies(mousepeptrackfilename): #increase the field size of CSV csv.field_size_limit(int(ctypes.c_ulong(-1).value // 2)) uniproturl = 'https://www.uniprot.org/uploadlists/' RETRY_TIME = 20.0 mdf= pd.read_csv(mousepeptrackfilename, delimiter='\t') mouseGenes=list(mdf['Gene'].unique()) mouseGenes=[g for g in mouseGenes if str(g) !='nan'] mousehumandic={} for gx in range(0,len(mouseGenes),1000): genecodes=' '.join(mouseGenes[gx:gx+1000]) geneuniprotparams = { 'from':'GENENAME', 'to':'ACC', 'format':'tab', 'query':genecodes, 'columns':'id,genes(PREFERRED),organism-id,reviewed' } while True: try: geneuniprotdata = urllib.urlencode(geneuniprotparams) geneuniprotrequest = urllib2.Request(uniproturl, geneuniprotdata) geneuniprotresponse = urllib2.urlopen(geneuniprotrequest) for guniprotline in geneuniprotresponse: gudata=guniprotline.strip() if not gudata.startswith("Entry"): guinfo=gudata.split("\t") if '9606' == guinfo[2].lower() and 'reviewed' == guinfo[3].lower() and guinfo[-1].lower() ==guinfo[1].lower() and len(guinfo[0].strip())>1: mousehumandic[guinfo[-1].strip()]=guinfo[0].strip() break except urllib2.HTTPError: time.sleep(RETRY_TIME) print ('Hey, I am trying again until succeeds to get data from uniprot data!',str(datetime.datetime.now())) except httplib.BadStatusLine: time.sleep(RETRY_TIME) print ('Hey, I am trying again until succeeds to get data from uniprot data!',str(datetime.datetime.now())) colname=['UniProtKB Accession','Protein','Gene','Organism','Peptide Sequence','Summary Concentration Range Data','All Concentration Range Data','All Concentration Range Data-Sample LLOQ Based','Peptide ID',\ 'Special Residues','Molecular Weight','GRAVY Score','Transitions','Retention Time','Analytical inofrmation',\ 'Gradients','AAA Concentration','CZE Purity','Panel','Knockout','LLOQ','ULOQ','Sample LLOQ','Protocol','Trypsin','QC. Conc. Data','Human UniProtKB Accession'] finalresult=[] finalresult.append(colname) humanUniprotID=[] with open(mousepeptrackfilename) as csvfile: reader = csv.DictReader(csvfile, delimiter='\t') for row in reader: templist=[] for i in colname[:-1]: tempdata=str(row[i]).strip() templist.append(tempdata) if len(str(templist[2]).strip())>0: if templist[2] in mousehumandic: huUniId=mousehumandic[templist[2]] humanUniprotID.append(huUniId) templist.append(huUniId) else: templist.append('NA') finalresult.append(templist) with open(mousepeptrackfilename,'wb') as pf: pwriter =csv.writer(pf,delimiter='\t') pwriter.writerows(finalresult) unqhumanUniprotID=list(set(humanUniprotID)) humanUniprotfuncinfodic={} countProt=0 for subcode in unqhumanUniprotID: time.sleep(2) drugbanklist=[] PN='NA' GN='NA' OG='NA' OGID='NA' dislist=[] GoIDList=[] GoNamList=[] GoTermList=[] try: countProt+=1 if countProt%1000 ==0: print str(countProt), "th protein Protein Name, Gene, Organism Name,drug bank data,disease data job starts",str(datetime.datetime.now()) SGrequestURL="https://www.uniprot.org/uniprot/"+str(subcode)+".xml" SGunifile=urllib.urlopen(SGrequestURL) SGunidata= SGunifile.read() SGunifile.close() try: SGunidata=minidom.parseString(SGunidata) try: drugdata=(SGunidata.getElementsByTagName('dbReference')) for duItem in drugdata: if (duItem.attributes['type'].value).upper() == 'DRUGBANK': try: drugname=(str(duItem.getElementsByTagName('property')[0].attributes['value'].value).strip()) drugid=str(duItem.attributes['id'].value).strip() durl='<a target="_blank" href="https://www.drugbank.ca/drugs/'+drugid+'">'+drugname+'</a>' drugbanklist.append(durl) except: pass if (duItem.attributes['type'].value).strip() == 'NCBI Taxonomy': try: OGID=str(duItem.attributes['id'].value).strip() except: pass except IndexError: pass try: godata=(SGunidata.getElementsByTagName('dbReference')) for gItem in godata: if (gItem.attributes['type'].value).upper() == 'GO': try: gonamedetails=(str(gItem.getElementsByTagName('property')[0].attributes['value'].value).strip()).split(':')[1] gotermdetails=(str(gItem.getElementsByTagName('property')[0].attributes['value'].value).strip()).split(':')[0] GoNamList.append(gonamedetails) goid=str(gItem.attributes['id'].value).strip() GoIDList.append(goid) if gotermdetails.lower()=='p': GoTermList.append('Biological Process') if gotermdetails.lower()=='f': GoTermList.append('Molecular Function') if gotermdetails.lower()=='c': GoTermList.append('Cellular Component') except: pass if (gItem.attributes['type'].value).strip() == 'NCBI Taxonomy': try: OGID=str(gItem.attributes['id'].value).strip() except: pass except IndexError: pass try: try: PN=(((SGunidata.getElementsByTagName('protein')[0]).getElementsByTagName('recommendedName')[0]).getElementsByTagName('fullName')[0]).firstChild.nodeValue except: PN=(((SGunidata.getElementsByTagName('protein')[0]).getElementsByTagName('submittedName')[0]).getElementsByTagName('fullName')[0]).firstChild.nodeValue except IndexError: pass try: try: GN=((SGunidata.getElementsByTagName('gene')[0]).getElementsByTagName('name')[0]).firstChild.nodeValue except: GN='NA' except IndexError: pass try: try: OG=((SGunidata.getElementsByTagName('organism')[0]).getElementsByTagName('name')[0]).firstChild.nodeValue except: OG='NA' except IndexError: pass try: disdata=SGunidata.getElementsByTagName('disease') for dItem in disdata: disname='' disshort='' try: disname=(dItem.getElementsByTagName('name')[0]).firstChild.nodeValue except: pass try: disshort=(dItem.getElementsByTagName('acronym')[0]).firstChild.nodeValue except: pass if len(disname.strip())>0: dislist.append(str(disname.strip())+'('+str(disshort)+')') except IndexError: pass except ExpatError: pass except IOError: pass drugbankdata='NA' disdata='NA' goiddata='NA' gonamedata='NA' gotermdata='NA' if len(GoIDList)>0: goiddata='|'.join(list(set(GoIDList))) if len(GoNamList)>0: gonamedata='|'.join(list(set(GoNamList))) if len(GoTermList)>0: gotermdata='|'.join(list(set(GoTermList))) if len(drugbanklist)>0: drugbankdata='|'.join(list(set(drugbanklist))) if len(dislist)>0: disdata='|'.join(list(set(dislist))) humanUniprotfuncinfodic[subcode]=[PN,GN,OG,OGID,disdata,drugbankdata,goiddata,gonamedata,gotermdata] hudicfile='humanUniprotfuncinfodic.obj' hudicf = open(hudicfile, 'wb') pickle.dump(humanUniprotfuncinfodic, hudicf , pickle.HIGHEST_PROTOCOL) hudicf.close() print ("Extracting KEGG pathway name, job starts",str(datetime.datetime.now())) hkeggdictfile={} hk = KEGG() for hkx in range(0,len(unqhumanUniprotID),2000): countProt+=hkx+2000 if countProt%2000 ==0: print (str(countProt), "th protein kegg job starts",str(datetime.datetime.now())) huniprotcodes=' '.join(unqhumanUniprotID[hkx:hkx+2000]) huniprotparams = { 'from':'ACC', 'to':'KEGG_ID', 'format':'tab', 'query':huniprotcodes } while True: try: hkuniprotdata = urllib.urlencode(huniprotparams) hkuniprotrequest = urllib2.Request(uniproturl, hkuniprotdata) hkuniprotresponse = urllib2.urlopen(hkuniprotrequest) for hkuniprotline in hkuniprotresponse: hkudata=hkuniprotline.strip() if not hkudata.startswith("From"): hkuinfo=hkudata.split("\t") if len(hkuinfo[1].strip()): hkegg=hk.get(hkuinfo[1].strip()) hkudict_data = hk.parse(hkegg) try: try: if len(str(hkuinfo[0]).strip()) >5: hkeggdictfile[hkuinfo[0].strip()]=hkudict_data['PATHWAY'].values() except TypeError: pass except KeyError: pass break except urllib2.HTTPError: time.sleep(RETRY_TIME) print ('Hey, I am trying again until succeeds to get data from KEGG!',str(datetime.datetime.now())) pass hkdicfile='humankeggdic.obj' hkdicf = open(hkdicfile, 'wb') pickle.dump(hkeggdictfile, hkdicf , pickle.HIGHEST_PROTOCOL) hkdicf.close()
EC2KO_dic[item].append( ko ) #here append may be used, bc only one item is added at a time else: EC2KO_dic[item] = [ko] else: KO_dic[ko] = [definition] #here a list of KOs is made to be searched for - if pathway is defined kostoget = {} if args.pathway != 'none': output = open('eclist.txt', 'w') kegg = KEGG() pathway = kegg.get(args.pathway) dict_data = kegg.parse(pathway) if dict_data == 404: print("WARNING: BAD PATHWAY SUBMITTED TO KEGG!") elif dict_data == None: print("WARNING: ERROR CONNECTING TO KEGG SERVER!") #print(dict_data) print("ECQUERY: PROCESSING PATHWAY/MODULE") try: for key in dict_data['ORTHOLOGY'].keys(): print("adding ortholog {} to eclist".format(key)) value = dict_data['ORTHOLOGY'][key] print(value) if "," in key: for item in key.split(","): kostoget[item] = value
def search(query, source="wikipathways", result_format="xml", species=None, genes=None, user=None): path_array = [] if source.lower() in ["wikipathways", "all"] and species is None: url = "http://webservice.wikipathways.org/" ext = "/findPathwaysByText?query=" + str(query) r = requests.get(url+ext, headers={"Content-Type": "application/json"}) if not r.ok: r.raise_for_status() sys.exit() tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() for child in root: temp_path_dict = {} for subchild in child: if subchild.tag == "{http://www.wikipathways.org/webservice}id": temp_path_dict["identifier"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}score": temp_path_dict["score"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}url": temp_path_dict["url"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}name": temp_path_dict["name"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}species": temp_path_dict["species"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}revision": temp_path_dict["revision"] = subchild.text temp_path = gnomics.objects.pathway.Pathway(identifier = temp_path_dict["identifier"], identifier_type = "WikiPathways ID", name = temp_path_dict["name"], taxon = temp_path_dict["species"], source = "WikiPathways") if temp_path_dict["identifier"] not in path_array: path_array.append(temp_path) elif source.lower() in ["wikipathways", "all"] and species is not None: url = "http://webservice.wikipathways.org/" ext = "/findPathwaysByText?query=" + str(query) + "&species=" + str(species) r = requests.get(url+ext, headers={"Content-Type": "application/json"}) if not r.ok: r.raise_for_status() sys.exit() tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() path_array = [] for child in root: temp_path_dict = {} for subchild in child: if subchild.tag == "{http://www.wikipathways.org/webservice}id": temp_path_dict["identifier"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}score": temp_path_dict["score"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}url": temp_path_dict["url"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}name": temp_path_dict["name"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}species": temp_path_dict["species"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}revision": temp_path_dict["revision"] = subchild.text temp_path = gnomics.objects.pathway.Pathway(identifier = temp_path_dict["identifier"], identifier_type = "WikiPathways ID", name = temp_path_dict["name"], taxon = temp_path_dict["species"], source = "WikiPathways") if temp_path_dict["identifier"] not in path_array: path_array.append(temp_path) if source.lower() in ["kegg", "all"] and genes is not None: k = KEGG() elif source.lower() in ["kegg", "all"] and genes is None: k = KEGG() list_of_pathways = k.find("pathway", query) temp_path_list = list_of_pathways.split("\n") for thing in temp_path_list: temp_split = thing.split("\t") if len(temp_split) != 1: path_id = temp_split[0].strip().split(":")[1] path_name = temp_split[1].strip() if "map" in path_id: temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG MAP PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "ko" in path_id: temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG KO PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "ec" in path_id: temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG EC PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "rn" in path_id: temp_path = gnomics.objects.pathway.Pathway(identifier=path_id, identifier_type="KEGG RN PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) else: print(k.get(path_id)) return path_array
#from Bio import Entrez from bioservices.kegg import KEGG import sys k = KEGG() #Entrez.email = "*****@*****.**" #file = open(sys.argv[1], "r") file = open("../data/ids5.txt", "r") result = "" k.organism = "lpl" k.get() #for line in file.readlines(): # file.close() print(result) # for line in file.readlines(): # handle = Entrez.esearch(db="pubmed", term=line) # record = Entrez.read(handle) # ids = record["IdList"] # print(ids)
def search(query, source="wikipathways", result_format="xml", species=None, genes=None, user=None): path_array = [] if source.lower() in ["wikipathways", "all"] and species is None: url = "http://webservice.wikipathways.org/" ext = "/findPathwaysByText?query=" + str(query) r = requests.get(url + ext, headers={"Content-Type": "application/json"}) if not r.ok: r.raise_for_status() sys.exit() tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() for child in root: temp_path_dict = {} for subchild in child: if subchild.tag == "{http://www.wikipathways.org/webservice}id": temp_path_dict["identifier"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}score": temp_path_dict["score"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}url": temp_path_dict["url"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}name": temp_path_dict["name"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}species": temp_path_dict["species"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}revision": temp_path_dict["revision"] = subchild.text temp_path = gnomics.objects.pathway.Pathway( identifier=temp_path_dict["identifier"], identifier_type="WikiPathways ID", name=temp_path_dict["name"], taxon=temp_path_dict["species"], source="WikiPathways") if temp_path_dict["identifier"] not in path_array: path_array.append(temp_path) elif source.lower() in ["wikipathways", "all"] and species is not None: url = "http://webservice.wikipathways.org/" ext = "/findPathwaysByText?query=" + str(query) + "&species=" + str( species) r = requests.get(url + ext, headers={"Content-Type": "application/json"}) if not r.ok: r.raise_for_status() sys.exit() tree = ET.ElementTree(ET.fromstring(r.text)) root = tree.getroot() path_array = [] for child in root: temp_path_dict = {} for subchild in child: if subchild.tag == "{http://www.wikipathways.org/webservice}id": temp_path_dict["identifier"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}score": temp_path_dict["score"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}url": temp_path_dict["url"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}name": temp_path_dict["name"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}species": temp_path_dict["species"] = subchild.text elif subchild.tag == "{http://www.wikipathways.org/webservice}revision": temp_path_dict["revision"] = subchild.text temp_path = gnomics.objects.pathway.Pathway( identifier=temp_path_dict["identifier"], identifier_type="WikiPathways ID", name=temp_path_dict["name"], taxon=temp_path_dict["species"], source="WikiPathways") if temp_path_dict["identifier"] not in path_array: path_array.append(temp_path) if source.lower() in ["kegg", "all"] and genes is not None: k = KEGG() elif source.lower() in ["kegg", "all"] and genes is None: k = KEGG() list_of_pathways = k.find("pathway", query) temp_path_list = list_of_pathways.split("\n") for thing in temp_path_list: temp_split = thing.split("\t") if len(temp_split) != 1: path_id = temp_split[0].strip().split(":")[1] path_name = temp_split[1].strip() if "map" in path_id: temp_path = gnomics.objects.pathway.Pathway( identifier=path_id, identifier_type="KEGG MAP PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "ko" in path_id: temp_path = gnomics.objects.pathway.Pathway( identifier=path_id, identifier_type="KEGG KO PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "ec" in path_id: temp_path = gnomics.objects.pathway.Pathway( identifier=path_id, identifier_type="KEGG EC PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) elif "rn" in path_id: temp_path = gnomics.objects.pathway.Pathway( identifier=path_id, identifier_type="KEGG RN PATHWAY ID", source="KEGG", name=path_name) path_array.append(temp_path) else: print(k.get(path_id)) return path_array
def pathwayVisualization(KEGG_id, path_to_csv, redirect=True, compound=False): """ The pathwayVisualization function returns a graph visualization based on user input Args: KEGG_id (str): string specifying KEGG pathway ID to visualize path_to_csv (str): string specifying data to overlay on graph redirect (bool): True to split nodes into their components. Defaults to True compound (bool): True to display compounds (such as Ca2+). Defaults to False Returns: A graph visualization using the visjs_network function from visjs_2_jupyter """ s = KEGG() res = s.get(KEGG_id, "kgml") if res == 404 or res == 400: print KEGG_id + ' is not a valid KEGG ID' return result = s.parse_kgml_pathway(KEGG_id) ETroot = parsingXML(KEGG_id, s) G=nx.DiGraph() max_id, compound_array = addNodes(G, result) setCoord(G, ETroot) if redirect is False: getNodeSymbols(G, s, compound) else: parent_list, parent_dict = splitNodes(G, s, max_id) complex_array, component_array, node_dict, comp_dict = undefNodes(G, ETroot) if redirect is False: addEdges(G, result, component_array, node_dict) else: addAndRedirectEdges(G, result, complex_array, component_array, parent_list, parent_dict, node_dict, comp_dict) #add reactions to graph addReaction(G, ETroot) edge_to_name = dict() for edge in G.edges(): if G.edge[edge[0]][edge[1]]['name'] == 'phosphorylation': edge_to_name[edge] = G.edge[edge[0]][edge[1]]['value'] elif G.edge[edge[0]][edge[1]]['name'] == 'dephosphorylation': edge_to_name[edge] = G.edge[edge[0]][edge[1]]['value'] elif 'dephosphorylation' in G.edge[edge[0]][edge[1]]['name']: edge_to_name[edge] = G.edge[edge[0]][edge[1]]['name'].replace('dephosphorylation', '-p') elif 'phosphorylation' in G.edge[edge[0]][edge[1]]['name']: edge_to_name[edge] = G.edge[edge[0]][edge[1]]['name'].replace('phosphorylation', '+p') else: edge_to_name[edge] = G.edge[edge[0]][edge[1]]['name'] edge_to_name[edge] = edge_to_name[edge].replace('activation, ', '') edge_to_name[edge] = edge_to_name[edge].replace('inhibition, ', '') edge_to_name[edge] = edge_to_name[edge].replace('activation', '') edge_to_name[edge] = edge_to_name[edge].replace('inhibition', '') #edges are transparent edge_to_color = dict() for edge in G.edges(): if 'activation' in G.edge[edge[0]][edge[1]]['name']: edge_to_color[edge] = 'rgba(26, 148, 49, 0.3)' #green elif 'inhibition' in G.edge[edge[0]][edge[1]]['name']: edge_to_color[edge] = 'rgba(255, 0, 0, 0.3)' #red else: edge_to_color[edge] = 'rgba(0, 0, 255, 0.3)' #blue #for graph with split nodes if redirect is True: #remove undefined nodes from graph G.remove_nodes_from(complex_array) #remove nodes with more than one gene G.remove_nodes_from(parent_list) if compound is False: #remove compound nodes G.remove_nodes_from(compound_array) node_to_symbol = dict() for node in G.node: if G.node[node]['type'] == 'map': node_to_symbol[node] = G.node[node]['gene_names'] else: if 'symbol' in G.node[node]: node_to_symbol[node] = G.node[node]['symbol'] elif 'gene_names'in G.node[node]: node_to_symbol[node] = G.node[node]['gene_names'] else: node_to_symbol[node] = G.node[node]['name'] # getting name of nodes node_to_gene = dict() for node in G.node: node_to_gene[node] = G.node[node]['gene_names'] # getting x coord of nodes node_to_x = dict() for node in G.node: node_to_x[node] = G.node[node]['x'] # getting y coord of nodes node_to_y = dict() for node in G.node: node_to_y[node] = G.node[node]['y'] id_to_log2fold = log2FoldChange(G, path_to_csv) # Create color scale with negative as green and positive as red my_scale = spectra.scale([ "green", "#CCC", "red" ]).domain([ -4, 0, 4 ]) # color nodes based on log2fold data node_to_color = dict() for node in G.nodes(): if node in id_to_log2fold: node_to_color[node] = my_scale(id_to_log2fold[node][0]).hexcode else: node_to_color[node] = '#f1f1f1' # getting nodes in graph nodes = G.nodes() numnodes = len(nodes) node_map = dict(zip(nodes,range(numnodes))) # map to indices for source/target in edges # getting edges in graph edges = G.edges() numedges = len(edges) # dictionaries that hold per node and per edge attributes nodes_dict = [{"id":node_to_gene[n],"degree":G.degree(n),"color":node_to_color[n], "node_shape":"box", "node_size":10,'border_width':1, "id_num":node_to_symbol[n], "x":node_to_x[n], "y":node_to_y[n]} for n in nodes] edges_dict = [{"source":node_map[edges[i][0]], "target":node_map[edges[i][1]], "color":edge_to_color[edges[i]], "id":edge_to_name[edges[i]], "edge_label":'', "hidden":'false', "physics":'true'} for i in range(numedges)] # html file label for first graph (must manually increment later) time = 1700 # create graph here #return G return visJS_module.visjs_network(nodes_dict, edges_dict, time_stamp = time, node_label_field = "id_num", edge_width = 3, border_color = "black", edge_arrow_to = True, edge_font_size = 15, edge_font_align= "top", physics_enabled = False, graph_width = 1000, graph_height = 1000)