def test_render_KGML_modify(self): """Rendering of KGML to PDF, with modification.""" # We test rendering of the original KGML for KO01100, # modifying line width for the lipid pathway p = self.data with open(p[0].infilename) as f: pathway = read(f) mod_rs = [ e for e in pathway.orthologs if len(set(e.name.split()).intersection(self.ko_ids)) ] for r in mod_rs: for g in r.graphics: g.width = 10 kgml_map = KGMLCanvas(pathway) kgml_map.draw(p[0].output_stem + '_widths.pdf') # We test rendering of the original KGML for KO3070, # modifying the reaction colours for each ortholog entry with open(p[1].infilename) as f: pathway = read(f) orthologs = [e for e in pathway.orthologs] # Use Biopython's ColorSpiral to generate colours cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, jitter=0.03) colors = cs.get_colors(len(orthologs)) for o, c in zip(orthologs, colors): for g in o.graphics: g.bgcolor = c kgml_map = KGMLCanvas(pathway) pathway.image = p[1].pathway_image kgml_map.import_imagemap = p[1].show_pathway_image kgml_map.draw(p[1].output_stem + '_colors.pdf')
def test_render_KGML_transparency(self): """Rendering of KGML to PDF, with color alpha channel.""" # We test rendering of the original KGML for KO01100, # modifying alpha channel for the lipid pathway p = self.data with open(p[0].infilename) as f: pathway = read(f) mod_rs = [e for e in pathway.orthologs if len(set(e.name.split()).intersection(self.ko_ids))] for r in mod_rs: for g in r.graphics: # Modify hex colour directly by appending alpha channel # to hex string g.fgcolor = g.fgcolor + "77" g.width = 20 kgml_map = KGMLCanvas(pathway) kgml_map.draw(p[0].output_stem + "_transparency.pdf") # We test rendering of the original KGML for KO3070, # modifying the alpha channel for each ortholog entry with open(p[1].infilename) as f: pathway = read(f) orthologs = list(pathway.orthologs) # Use Biopython's ColorSpiral to generate colours cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, jitter=0.03) colors = cs.get_colors(len(orthologs)) for o, c in zip(orthologs, colors): # Modify color tuples to add alpha channel c = c + (0.5, ) for g in o.graphics: g.bgcolor = c kgml_map = KGMLCanvas(pathway) pathway.image = p[1].pathway_image kgml_map.import_imagemap = p[1].show_pathway_image kgml_map.draw(p[1].output_stem + "_transparency.pdf")
def plot_pathway(self, enriched_genes, pathway_id='hsa05322', figurename=None): # config figure name if not figurename: figurename = '%s.pdf' % pathway_id assert (figurename.endswith('.pdf')) # fetch pathway pathway = KGML_parser.read(kegg_get(pathway_id, "kgml")) # change color for pathway elements for entry in pathway.entries.values(): possible_gene_names = entry.graphics[0].name matched_name = gene_is_enriched(enriched_genes, possible_gene_names) if matched_name: entry.graphics[ 0].bgcolor = self.enriched_box_color #set box color entry.graphics[ 0].fgcolor = self.enriched_text_color # set text color entry.graphics[0].name = matched_name else: entry.graphics[0].bgcolor = self.non_enriched_box_color entry.graphics[0].fgcolor = self.non_enriched_text_color entry.graphics[0].name = entry.graphics[0].name.split(',')[0] canvas = KGMLCanvas(pathway, import_imagemap=True, fontsize=self.fontsize) canvas.draw(figurename) print('Drawn: ', figurename) return pathway
def draw_kegg_map(map_id): """ Render a local PDF of a KEGG map with the passed map ID """ # Get the background image first pathway = KGML_parser.read(kegg_get(map_id, "kgml")) canvas = KGMLCanvas(pathway, import_imagemap=True) img_filename = "%s.pdf" % map_id canvas.draw(img_filename)
def colorCompounds(pathname, cpdlist, size=20): pathway = KGML_parser.read(kegg_get(pathname, "kgml")) for element in pathway.compounds: for graphic in element.graphics: if graphic.name in cpdlist: graphic.bgcolor = '#ff0000' graphic.width = size graphic.height = size canvas = KGMLCanvas(pathway, import_imagemap=True) canvas.draw("%s.pdf" % pathname)
def test_render_KGML_basic(self): """Basic rendering of KGML: write to PDF without modification.""" # We test rendering of the original KEGG KGML using only local # files. for p in self.data: with open(p.infilename, 'rU') as f: pathway = read(f) pathway.image = p.pathway_image kgml_map = KGMLCanvas(pathway) kgml_map.import_imagemap = p.show_pathway_image kgml_map.draw(p.output_stem + '_original.pdf')
def test_render_KGML_import_map(self): """Basic rendering of KGML: use imported imagemap. Uses the URL indicated in the .xml file. This test may fail if the imagemap is not available (e.g. if there is not a web connection), and may look odd if the remote imagemap has changed since the local KGML file was downloaded. """ # We test rendering of the original KEGG KGML using imported files for p in self.data: with open(p.infilename) as f: pathway = read(f) kgml_map = KGMLCanvas(pathway, import_imagemap=True) kgml_map.draw(p.output_stem + "_importmap.pdf")
def getPic(self, pidpath, pathwaydir): """ 输入文件pid.txt,输出文件夹pathways,作图 """ fs = gridfs.GridFS(self.mongodb) f = open(pidpath) if not os.path.exists(pathwaydir): os.makedirs(pathwaydir) for i in f: if i: i = i.strip('\n').split('\t') pid = i[0] koid = i[1].split(';') l = [] kgml_path = os.path.join(os.getcwd(), "pathway.kgml") png_path = os.path.join(os.getcwd(), "pathway.png") if os.path.exists(kgml_path) and os.path.exists(png_path): os.remove(kgml_path) os.remove(png_path) with open("pathway.kgml", "w+") as k, open("pathway.png", "w+") as p: result = self.png_coll.find_one({"pathway_id": pid}) if result: kgml_id = result['pathway_ko_kgml'] png_id = result['pathway_ko_png'] k.write(fs.get(kgml_id).read()) p.write(fs.get(png_id).read()) p_kgml = KGML_parser.read(open("pathway.kgml")) p_kgml.image = png_path for ko in koid: for degree in p_kgml.entries.values(): if re.search(ko, degree.name): l.append(degree.id) for n in l: for graphic in p_kgml.entries[n].graphics: graphic.fgcolor = '#CC0000' canvas = KGMLCanvas(p_kgml, import_imagemap=True) canvas.draw(pathwaydir + '/' + pid + '.pdf') print "getPic finished!!!"
def pathway_pdf(self, filename = "", imagemap = True, orthologs = True, compounds = True, maps = True, reactions = True): ''' Prints current pathway to PDF file :param filename: (str) - PDF filename :param imagemap: (bol) - Print imagemap :param orthologs: (bol) - Print orthologs :param compounds: (bol) - Print compounds :param maps: (bol) - Print maps :param reactions: (bol) - Print reactions ??? :return: PDF file with current pathway ''' #TODO Verificar o parametro reactions pathway_pdf = KGMLCanvas(self.pathway) pathway_pdf.import_imagemap=imagemap pathway_pdf.label_orthologs = orthologs pathway_pdf.label_compounds = compounds pathway_pdf.label_maps = maps pathway_pdf.label_reaction_entries = reactions if filename == "": pathway_pdf.draw(str(self.pathway_ID+".pdf")) else: pathway_pdf.draw(str(filename+".pdf"))
def gatherDetails(enterPathway,folderName,useCO,CO_values): #check if the directories exist, one for pathway files if not os.path.exists(folderName): os.makedirs(folderName) #else: #raise ValueError('Be careful, this folder already exists') #only one pathway at a time setKeep = 1 try: kegg_get(enterPathway).read() except: #use the ko map if there is nothing species specific...this can also fail... usePathway = 'ko' + enterPathway[3:8] setKeep = 0 try: kegg_get(usePathway).read() except: pass if setKeep: usePathway = enterPathway #get the compounds and genes for this pathway genes = getKfrom_ko(usePathway) compounds = getCfrom_ko(usePathway) #figure out which ones I have data for... setG = set(genes) setC = set(compounds) setT = set(useCO) intCompounds = setC.intersection(setT) ## plot the pathway map for this pathway, get details from KEGG for plotting (%must be at least 4 colors) useColors = pal.colorbrewer.diverging.PuOr_4.hex_colors #useColors = pal.colorbrewer.diverging.RdYlBu_11.hex_colors #set the color of the mtab based on its value, only scale the values from this particular pathway useCOsubset = CO_values.loc[intCompounds] cmin = useCOsubset.min() #find min and max...ignore NaN and inf for the moment cmax = useCOsubset.replace([np.inf],np.nan).dropna(how = 'all').max() size = 20 #increase the size of the compounds in the plots #can have all zeros... if sum(useCOsubset.dropna())==0: pass #print('No measured metabolites in pathway ' + usePathway) elif len(useCOsubset.value_counts())==1: #only two color options: yes/no dummy = useCOsubset.copy(deep = True) dummy.replace([np.inf],np.nan,inplace = True) for idx,item in enumerate(useCOsubset): if np.isnan(item): useCOsubset.iloc[idx] = int(0) else: useCOsubset.iloc[idx] = int(1) #go get the pathway information and customize the plot pathway = KGML_parser.read(kegg_get(usePathway, "kgml")) #no choice in gene color: green # Change the colors of compounds for element in pathway.compounds: for graphic in element.graphics: tc = element.name[4:10] #skip over the 'cpd:' if (tc in intCompounds): #in the pathway, set the color tempColor = useCOsubset.loc[tc] graphic.bgcolor = useColors[int(tempColor)] graphic.width = size graphic.height = size canvas = KGMLCanvas(pathway, import_imagemap=True) pdfName = 'mapWithColors_' + str(usePathway) + '.pdf' canvas.draw(folderName + '/' + pdfName) pdfName = None #empty it in case that is where I am having issues else: dummy = useCOsubset.copy(deep = True) dummy.replace([np.inf],np.nan,inplace = True) for idx,item in enumerate(useCOsubset): if np.isnan(item): useCOsubset.iloc[idx] = 0 elif np.isinf(item): useCOsubset.iloc[idx] = 10*cmax #make inf 10x the biggest value #now, find cmax again...use that downstream cmax = useCOsubset.replace([np.inf],np.nan).dropna(how = 'all').max() #use histogram to make the bins (complete hack) a,bin_edges = np.histogram(useCOsubset,bins = len(useColors)-3,range = (cmin,cmax)) #now...put zero at beginning and inf at end #BUT - can actually have cases with values for all metabolites (novel concept) try: nz = useCOsubset.value_counts()[0] #count the number of zeros a = np.insert(a,0,nz) bin_edges = np.insert(bin_edges,0,0) except: pass try: nm = useCOsubset.value_counts()[cmax] a = np.append(a,nm) bin_edges = np.append(bin_edges,cmax) except: pass #then find the index for each number...this will be the index into useColors useIdx = np.digitize(useCOsubset,bin_edges) color_df = pd.DataFrame({'mtab': useCOsubset,'idx':useIdx}) #go get the pathway information and customize the plot pathway = KGML_parser.read(kegg_get(usePathway, "kgml")) #no choice in gene color: green # Change the colors of compounds for element in pathway.compounds: for graphic in element.graphics: tc = element.name[4:10] #skip over the 'cpd:' if (tc in intCompounds): #in the pathway, set the color tempColor = color_df.loc[tc,'idx'] graphic.bgcolor = useColors[int(tempColor)-1] graphic.width = size graphic.height = size canvas = KGMLCanvas(pathway, import_imagemap=True) pdfName = 'mapWithColors_' + str(usePathway) + '.pdf' #Tracer()() canvas.draw(folderName + '/' + pdfName) pdfName = None #empty it in case that is where I am having issues
# process all found kegg pathways for k in kegg: print("Processing: {}".format(k)) stats[k] = defaultdict(int) processedIDs = set() # load current pathway isRef = True try: pathway = KGML_parser.read(kegg_get("{}{}".format(keggmap,k), "kgml")) except: print("{} not a reference pathway.".format(k)) pathway = KGML_parser.read(kegg_get("ko{}".format(k), "kgml")) isRef = False canvas = KGMLCanvas(pathway, import_imagemap=True) if isRef: for element in list(pathway.genes): f = kegg_find(keggmap, element.name) results = f.readline() g = results.split("\t")[0]#[len(keggmap)+1:] for l in kegg_get(g).readlines(): if "ORTHOLOGY" in l: match = pattern.findall(l) # EC number ortho= l[12:18] #print(match) if len(match) == 0: print("{} has no EC".format(ortho)) eName = set([x.split(":")[1] for x in element.name.split(" ")]) if ortho not in processedIDs:
def map2highlighted_map(map_id, ko_list, ko2freq, biodb, outpath='test.pdf', taxon_id=False, n_species=60): import re from chlamdb.biosqldb import shell_command from Bio.Graphics.KGML_vis import KGMLCanvas from Bio.Graphics import KGML_vis import urllib.request from Bio.KEGG.KGML.KGML_pathway import Pathway, Reaction, Relation import Bio.KEGG.KGML.KGML_pathway from Bio.KEGG.KGML import KGML_parser from Bio.Graphics.ColorSpiral import ColorSpiral import matplotlib.cm as cm from matplotlib.colors import rgb2hex import matplotlib as mpl values = [float(i) for i in ko2freq.values()] norm = mpl.colors.Normalize(vmin=0, vmax=n_species) cmap = cm.OrRd cmap2 = cm.Greens m = cm.ScalarMappable(norm=norm, cmap=cmap) m2 = cm.ScalarMappable(norm=norm, cmap=cmap2) url_template = 'http://rest.kegg.jp/get/%s/kgml' % re.sub( 'map', 'ko', map_id) print(url_template) f = urllib.request.urlopen(url_template) from Bio.Graphics import KGML_vis pathway = KGML_parser.read(f.read().decode('UTF-8')) kgml_map = KGMLCanvas(pathway, show_maps=True) # Let's use some arbitrary colours for the orthologs cs = ColorSpiral(a=2, b=0.2, v_init=0.85, v_final=0.5, jitter=0.03) # Loop over the orthologs in the pathway, and change the # background colour orthologs = [e for e in pathway.orthologs] for o in orthologs: match = False if 'K00163' in o.name: print('##################################') ko_temp_list = set([i.rstrip() for i in o.name.split('ko:')]) if len(ko_temp_list.intersection(set(ko2freq.keys()))) > 0: ko_keep = [] for ko in ko_temp_list: if ko in ko2freq: ko_keep.append(ko) if ko in ko_list: match = True o.name = 'ko:' + ' ko:'.join(ko_keep) total = sum([ int(ko2freq[i]) for i in ko_temp_list.intersection(set(ko2freq.keys())) ]) for g in o.graphics: if match: g.bgcolor = rgb2hex(m2.to_rgba(float(total))) else: #print 'no match!!!!' #print ko_temp_list #print ko2freq.keys() #print 'TOTAL:', total g.bgcolor = rgb2hex(m.to_rgba(float(total))) o.name = "%s (%s)" % (o.name.split('ko:')[0], total) #else: # for g in o.graphics: # g.bgcolor = '#FFFFFF' # Default settings are for the KGML elements only # We need to use the image map, and turn off the KGML elements, to see # only the .png base map. We could have set these values on canvas # instantiation kgml_map.import_imagemap = True kgml_map.show_maps = True kgml_map.show_orthologs = True kgml_map.draw_relations = False kgml_map.show_compounds = False kgml_map.show_genes = False kgml_map.show_compounds = False kgml_map.show_genes = False kgml_map.draw(outpath) ''' print 'DIRLISAT:', dir(pathway) maps = [m for m in pathway.maps] for map in maps: for g in map.graphics: print g.name ''' #print re.sub('pdf', 'svg', outpath) shell_command.shell_command( 'inkscape %s --export-plain-svg=%s' % (outpath, re.sub('pdf', 'svg', outpath))) # 'pdf2svg %s %s all' t = edit_svg_map("%s" % re.sub('pdf', 'svg', outpath), ko2freq.keys(), biodb, map_id, taxon_id=taxon_id) #print "%s" % re.sub('pdf', 'svg', outpath) t.write("%s" % re.sub('pdf', 'svg', outpath))