def addFeat(self,feat,colorMax,percent): labelTab=[] try : labelTab=feat.qualifiers['product'][0].split(" ") except KeyError: print " NO PRODUCT FOUND FOR " print feat labelTab[0]="No name" labelName="" if len(labelTab)<= Track.maxLabelWord : for word in labelTab : labelName+=word+" " else : labelName=labelTab[0]+" "+labelTab[1]+" "+labelTab[2]+" " labelName=labelName[0:len(labelName)-1]+" \n "+str(feat.location.start)+" - "+str(feat.location.end) #skip the final space and add location #change location newStart=feat.location.start-self.diff newEnd=feat.location.end-self.diff if newEnd > Track.maxSize : Track.maxSize=newEnd newLocation = FeatureLocation(newStart,newEnd,feat.strand) feat.location=newLocation if self.nbFeats==0: self.gdFeature.add_feature(feat,color=Track.backgroundColor,sigil="ARROW",name=labelName,label_position="start",label_angle=Track.angle[self.nbFeats%2],label=True,strand=Track.strand[self.nbFeats%2]) else : self.gdFeature.add_feature(feat,color=Track.backgroundColor,sigil="ARROW",name=labelName,label_position="middle",label_angle=Track.angle[self.nbFeats%2],label=True,strand=Track.strand[self.nbFeats%2]) if feat.strand==1: self.gdFeature.add_feature(feat,border=colorMax,color=colors.linearlyInterpolatedColor(white,colorMax,minSimilarityScore,100,percent),sigil="ARROW",name=feat.qualifiers['product'][0][0:11].replace(" ","_")+" \n "+str(feat.location.start)+" - "+str(feat.location.end),label_position="middle",label_angle=0,label=False) else : self.gdFeature.add_feature(feat,border=colorMax,color=colors.linearlyInterpolatedColor(white,colorMax,minSimilarityScore,100,percent),sigil="ARROW",label_position="middle",label_angle=180,label=False) self.nbFeats+=1 self.gdTrack.add_set(self.gdFeature)
def _blast_feature(self, f, c1, c2): trans = Translator(self._abort_event) cds = trans.translate(f.extract(c1), 11) sixframes = trans.translate_six_frames_single(c2, 11) if not sixframes: return [(None, None, None)] results = [] for frame in sixframes: res = BlastCLI.s2s_blast(cds, frame, self.evalue, command='blastp', task='blastp') if res: results.extend(res) hsps = BlastCLI.all_hsps(results) if not hsps: return [(None, None, None)] f1 = [] f2 = [] col = [] fname = self._feature_name(f, default='CDS') cds_len = len(cds) min_len = len(cds) * self.min_length for hsp in hsps: if hsp.align_length < min_len: continue if hsp.identities / float(hsp.align_length) < self.min_identity: continue color_t = (float(hsp.identities) / hsp.align_length) print '%s %s: %5.1f%% (%5.1f%%)' % (c1.description, fname, color_t * 100, float(hsp.identities) / cds_len * 100) col.append(colors.linearlyInterpolatedColor(colors.Color(0, 0, 1, 0.2), colors.Color(0, 1, 0, 0.2), 0.2, 1, color_t)) qstart = (hsp.query_start - 1) * 3 qend = qstart + hsp.align_length * 3 sstart = (hsp.sbjct_start - 1) * 3 send = sstart + hsp.align_length * 3 f1.append( SeqFeature(FeatureLocation(f.location.start + qstart, f.location.start + qend, strand=hsp.strand[0]))) f2.append(SeqFeature(FeatureLocation(sstart, send, strand=hsp.strand[1]))) return zip(f1, f2, col)
def Add_Anno(self, aList): fields = aList.split() name = fields[1] chrom = fields[2] strand = fields[3] txStart = int(fields[4]) self.txStart = txStart txEnd = int(fields[5]) self.txEnd = txEnd self.frag = (txEnd - txStart) / 10000 self.end = txEnd - txStart cdsStart = int(fields[6]) cdsEnd = int(fields[7]) exonCount = int(fields[8]) exonStarts = [int(i) for i in fields[9].split(',')[0:-1]] exonEnds = [int(i) for i in fields[10].split(',')[0:-1]] name2 = fields[12] exonFrames = [int(i) for i in fields[15].split(',')[0:-1]] for i in range(exonCount): color = colors.linearlyInterpolatedColor(colors.white, colors.green, 0, exonCount, i + 1) feature = SeqFeature(FeatureLocation(exonStarts[i] - txStart, exonEnds[i] - txStart), strand=+1) self.gds_features.add_feature(feature, name=str(i + 1), label=True, color=color)
def Add_Blast(self, aList): self.blast_num += 1 fields = aList.split('\t') gene = fields[0] query = fields[1] chrom = fields[2] q_start = int(fields[7]) q_end = int(fields[8]) s_start = int(fields[9]) s_end = int(fields[10]) evalue = fields[11] score = fields[12] color = colors.linearlyInterpolatedColor(colors.blue, colors.firebrick, 0, self.blast_rownum, self.blast_num) if s_start <= s_end: feature = SeqFeature(FeatureLocation(s_start - self.txStart, s_end - self.txStart), strand=-1) if s_start > s_end: feature = SeqFeature(FeatureLocation(s_end - self.txStart, s_start - self.txStart), strand=-1) self.gds_features.add_feature(feature, name=str(self.blast_num), label=True, color=color)
def _generate_gene_colors(self): if not self.clusters: return full_gene_set = set() for c in self.clusters: full_gene_set.update(c.genes) if 'NONE' in full_gene_set: full_gene_set.remove('NONE') ngenes = float(len(full_gene_set))-1 middle = ngenes/2.0 self.colors = {} for i, gene in enumerate(sorted(full_gene_set)): t = i/ngenes if i < middle: c = colors.linearlyInterpolatedColor(colors.Color(1, 0, 0, 1), colors.Color(0, 1, 0, 1), 0, 1, t*2) else: c = colors.linearlyInterpolatedColor(colors.Color(0, 0.9, 0.1, 1), colors.Color(0, 0, 1, 1), 0, 1, t*2-1) self.colors[gene] = c
def draw(self): # general widget bits group = Group() x, y, w, h, c0, c1 = self._flipRectCorners() numShades = self.numShades if self.cylinderMode: if not numShades % 2: numShades = numShades + 1 halfNumShades = (numShades - 1) / 2 + 1 num = float(numShades) # must make it float! vertical = self.orientation == 'vertical' if vertical: if numShades == 1: V = [x] else: V = frange(x, x + w, w / num) else: if numShades == 1: V = [y] else: V = frange(y, y + h, h / num) for v in V: stripe = vertical and Rect(v, y, w / num, h) or Rect( x, v, w, h / num) if self.cylinderMode: if V.index(v) >= halfNumShades: col = colors.linearlyInterpolatedColor( c1, c0, V[halfNumShades], V[-1], v) else: col = colors.linearlyInterpolatedColor( c0, c1, V[0], V[halfNumShades], v) else: col = colors.linearlyInterpolatedColor(c0, c1, V[0], V[-1], v) stripe.fillColor = col stripe.strokeColor = col stripe.strokeWidth = 1 group.add(stripe) if self.strokeColor and self.strokeWidth >= 0: rect = Rect(x, y, w, h) rect.strokeColor = self.strokeColor rect.strokeWidth = self.strokeWidth rect.fillColor = None group.add(rect) return group
def colorRange(c0, c1, n): "Return a range of intermediate colors between c0 and c1" if n==1: return [c0] C = [] if n>1: lim = n-1 for i in range(n): C.append(colors.linearlyInterpolatedColor(c0,c1,0,lim, i)) return C
def colorRange(c0, c1, n): "Return a range of intermediate colors between c0 and c1" if n == 1: return [c0] C = [] if n > 1: lim = n - 1 for i in range(n): C.append(colors.linearlyInterpolatedColor(c0, c1, 0, lim, i)) return C
def _getColors(self): # for calculating intermediate colors... numShades = self.numberOfBoxes + 1 fillColorStart = self.startColor fillColorEnd = self.endColor colorsList = [] for i in range(0, numShades): colorsList.append( colors.linearlyInterpolatedColor(fillColorStart, fillColorEnd, 0, numShades - 1, i)) return colorsList
def _blast_feature(self, f, c1, c2, features1, features2, evalue, max_rlen): results = BlastCLI.s2s_blast(f.extract(c1), c2, evalue, command='blastn', task='blastn') hsps = BlastCLI.all_hsps(results, max_rlen) if not hsps: return [(None, None, None)] f1 = [] f2 = [] col = [] for hsp in hsps: col.append(colors.linearlyInterpolatedColor(colors.Color(1,1,1,0.2), colors.Color(0,0,0,0.2), 0, 1, float(hsp.identities)/hsp.align_length)) f1.append(SeqFeature(FeatureLocation(f.location.start+hsp.query_start, f.location.start+hsp.query_start+hsp.align_length, strand=0))) f2.append(SeqFeature(FeatureLocation(hsp.sbjct_start, hsp.sbjct_start+hsp.align_length, strand=0))) return zip(f1, f2, col)
def _blast_feature(self, f, c1, c2, features1, features2, evalue, max_rlen): trans = Translator(self._abort_event) cds = trans.translate(f.extract(c1), 11) sixframes = trans.translate_six_frames_single(c2, 11) if not sixframes: return [(None, None, None)] results = [] for frame in sixframes: res = BlastCLI.s2s_blast(cds, frame, evalue, command='blastp', task='blastp') if res: results.extend(res) hsps = BlastCLI.all_hsps(results, max_rlen) if not hsps: return [(None, None, None)] f1 = [] f2 = [] col = [] c1_name = pretty_rec_name(c1) if 'locus_tag' in f.qualifiers: fname = f.qualifiers['locus_tag'][0] else: fname = 'CDS' cds_len = len(cds) for hsp in hsps: color_t = (float(hsp.identities) / hsp.align_length) print '%s %s: %5.1f%% (%5.1f%%)' % (c1_name, fname, color_t * 100, float(hsp.identities) / cds_len * 100) col.append( colors.linearlyInterpolatedColor(colors.Color(0, 0, 1, 0.2), colors.Color(0, 1, 0, 0.2), 0.2, 1, color_t)) qstart = (hsp.query_start - 1) * 3 qend = qstart + hsp.align_length * 3 sstart = (hsp.sbjct_start - 1) * 3 send = sstart + hsp.align_length * 3 f1.append( SeqFeature( FeatureLocation(f.location.start + qstart, f.location.start + qend, strand=hsp.strand[0]))) f2.append( SeqFeature(FeatureLocation(sstart, send, strand=hsp.strand[1]))) return zip(f1, f2, col)
def draw_heat_graph(self, graph): """ draw_heat_graph(self, graph) -> [element, element,...] o graph Graph object Returns a list of drawable elements for the heat graph """ #print '\tdraw_heat_graph' # At each point contained in the graph data, we draw a box that is the # full height of the track, extending from the midpoint between the # previous and current data points to the midpoint between the current # and next data points heat_elements = [] # holds drawable elements # Get graph data data_quartiles = graph.quartiles() minval, maxval = data_quartiles[0], data_quartiles[4] midval = (maxval + minval) / 2. # mid is the value at the X-axis btm, ctr, top = self.track_radii[self.current_track_level] trackheight = (top - btm) newdata = intermediate_points(self.start, self.end, graph[self.start:self.end]) # Create elements on the graph, indicating a large positive value by # the graph's poscolor, and a large negative value by the graph's # negcolor attributes for pos0, pos1, val in newdata: pos0angle, pos0cos, pos0sin = self.canvas_angle(pos0) pos1angle, pos1cos, pos1sin = self.canvas_angle(pos1) # Calculate the heat color, based on the differential between # the value and the median value heat = colors.linearlyInterpolatedColor(graph.poscolor, graph.negcolor, maxval, minval, val) # Draw heat box heat_elements.append( self._draw_arc(btm, top, pos0angle, pos1angle, heat, border=heat)) return heat_elements
def Add_Blast(self,aList) : self.blast_num+=1 fields=aList.split('\t') gene=fields[0] query=fields[1] chrom=fields[2] q_start=int(fields[7]) q_end=int(fields[8]) s_start=int(fields[9]) s_end=int(fields[10]) evalue=fields[11] score=fields[12] color = colors.linearlyInterpolatedColor(colors.blue, colors.firebrick, 0,self.blast_rownum,self.blast_num) if s_start<=s_end: feature = SeqFeature(FeatureLocation(s_start-self.txStart,s_end-self.txStart), strand=-1) if s_start>s_end: feature = SeqFeature(FeatureLocation(s_end-self.txStart,s_start-self.txStart), strand=-1) self.gds_features.add_feature(feature,name=str(self.blast_num),label=True,color=color)
def draw_heat_graph(self, graph): """ draw_heat_graph(self, graph) -> [element, element,...] o graph Graph object Returns a list of drawable elements for the heat graph """ #print '\tdraw_heat_graph' # At each point contained in the graph data, we draw a box that is the # full height of the track, extending from the midpoint between the # previous and current data points to the midpoint between the current # and next data points heat_elements = [] # holds drawable elements # Get graph data data_quartiles = graph.quartiles() minval, maxval = data_quartiles[0],data_quartiles[4] midval = (maxval + minval)/2. # mid is the value at the X-axis btm, ctr, top = self.track_radii[self.current_track_level] trackheight = (top-btm) newdata = intermediate_points(self.start, self.end, graph[self.start:self.end]) # Create elements on the graph, indicating a large positive value by # the graph's poscolor, and a large negative value by the graph's # negcolor attributes for pos0, pos1, val in newdata: pos0angle, pos0cos, pos0sin = self.canvas_angle(pos0) pos1angle, pos1cos, pos1sin = self.canvas_angle(pos1) # Calculate the heat color, based on the differential between # the value and the median value heat = colors.linearlyInterpolatedColor(graph.poscolor, graph.negcolor, maxval, minval, val) # Draw heat box heat_elements.append(self._draw_arc(btm, top, pos0angle, pos1angle, heat, border=heat)) return heat_elements
def Add_Anno(self,aList) : fields=aList.split() name=fields[1] chrom=fields[2] strand=fields[3] txStart=int(fields[4]) self.txStart=txStart txEnd=int(fields[5]) self.txEnd=txEnd self.frag=(txEnd-txStart)/10000 self.end=txEnd-txStart cdsStart=int(fields[6]) cdsEnd=int(fields[7]) exonCount=int(fields[8]) exonStarts=[int(i) for i in fields[9].split(',')[0:-1]] exonEnds=[int(i) for i in fields[10].split(',')[0:-1]] name2=fields[12] exonFrames=[int(i) for i in fields[15].split(',')[0:-1]] for i in range(exonCount): color = colors.linearlyInterpolatedColor(colors.white, colors.green,0,exonCount,i+1 ) feature = SeqFeature(FeatureLocation(exonStarts[i]-txStart,exonEnds[i]-txStart), strand=+1) self.gds_features.add_feature(feature,name=str(i+1),label=True,color=color)
gdd = GenomeDiagram.Diagram('Diagram') gdt1_features = gdd.new_track(1, greytrack=False) gds1_features = gdt1_features.new_set() gdt2_features = gdd.new_track(1, greytrack=False) gds2_features = gdt2_features.new_set() inFile=open('CHC10A.unmapped.sam.mapped.fa.fa.blasted') NC_len=5894 num=0 startend=[] for line in inFile : fields=line.split('\t') if fields[1]=='NC_003977.1': num+=1 color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 200, num) line=line.strip() q_start=int(fields[6]) q_end=int(fields[7]) s_start=int(fields[9]) s_end=int(fields[9]) startend.append(q_start) startend.append(q_end) startend.append(s_start) startend.append(s_end) feature = SeqFeature(FeatureLocation(q_start,q_end),strand=+1) #gds1_features.add_feature(feature,name=str(num),label=True,color=color) feature = SeqFeature(FeatureLocation(s_start,s_end),strand=+1)
name=record.name, greytrack=True, height=0.5, start=0, end=len(record)) assert record.name not in feature_sets feature_sets[record.name] = gd_track_for_features.new_set() # We add dummy features to the tracks for each cross-link BEFORE we add the # arrow features for the genes. This ensures the genes appear on top: for X, Y, X_vs_Y in [("NC_002703", "AF323668", A_vs_B), ("AF323668", "NC_003212", B_vs_C)]: features_X = records[X].features features_Y = records[Y].features set_X = feature_sets[X] set_Y = feature_sets[Y] for score, x, y in X_vs_Y: color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, score) border = colors.lightgrey f_x = get_feature(features_X, x) F_x = set_X.add_feature(SeqFeature(FeatureLocation(f_x.location.start, f_x.location.end, strand=0)), color=color, border=border) f_y = get_feature(features_Y, y) F_y = set_Y.add_feature(SeqFeature(FeatureLocation(f_y.location.start, f_y.location.end, strand=0)), color=color, border=border) gd_diagram.cross_track_links.append(CrossLink(F_x, F_y, color, border)) for record, gene_colors in zip([A_rec, B_rec, C_rec],
def addFeat(self, feat, colorMax, percent): labelTab = [] try: labelTab = feat.qualifiers['product'][0].split(" ") except KeyError: print " NO PRODUCT FOUND FOR " print feat labelTab[0] = "No name" labelName = "" if len(labelTab) <= Track.maxLabelWord: for word in labelTab: labelName += word + " " else: labelName = labelTab[0] + " " + labelTab[1] + " " + labelTab[ 2] + " " labelName = labelName[0:len(labelName) - 1] + " \n " + str( feat.location.start) + " - " + str( feat.location.end) #skip the final space and add location #change location newStart = feat.location.start - self.diff newEnd = feat.location.end - self.diff if newEnd > Track.maxSize: Track.maxSize = newEnd newLocation = FeatureLocation(newStart, newEnd, feat.strand) feat.location = newLocation if self.nbFeats == 0: self.gdFeature.add_feature(feat, color=Track.backgroundColor, sigil="ARROW", name=labelName, label_position="start", label_angle=Track.angle[self.nbFeats % 2], label=True, strand=Track.strand[self.nbFeats % 2]) else: self.gdFeature.add_feature(feat, color=Track.backgroundColor, sigil="ARROW", name=labelName, label_position="middle", label_angle=Track.angle[self.nbFeats % 2], label=True, strand=Track.strand[self.nbFeats % 2]) if feat.strand == 1: self.gdFeature.add_feature( feat, border=colorMax, color=colors.linearlyInterpolatedColor(white, colorMax, minSimilarityScore, 100, percent), sigil="ARROW", name=feat.qualifiers['product'][0][0:11].replace(" ", "_") + " \n " + str(feat.location.start) + " - " + str(feat.location.end), label_position="middle", label_angle=0, label=False) else: self.gdFeature.add_feature(feat, border=colorMax, color=colors.linearlyInterpolatedColor( white, colorMax, minSimilarityScore, 100, percent), sigil="ARROW", label_position="middle", label_angle=180, label=False) self.nbFeats += 1 self.gdTrack.add_set(self.gdFeature)
def generateClusterCompGraphic(cluster1,cluster2,pairs,outname): noPair = tuple(map(lambda x: x/255.,(211,211,211))) distDict = {} colorDict = {} colorPalette = [tuple(float(value) for value in values) for values in _get_colors_Janus(len(pairs))] cluster1Name = "%s:%i-%i" % (cluster1[0].species,cluster1.location[0],cluster1.location[1]) cluster2Name = "%s:%i-%i" % (cluster2[0].species,cluster2.location[0],cluster2.location[1]) # Generate color dictionary maxLen = 0 proteinHits = set() pairs.reverse() for idx,(dist,prot1,prot2) in enumerate(pairs): distDict[prot1.hitName] = dist distDict[prot2.hitName] = dist colorDict[prot1.hitName] = colorPalette[idx] colorDict[prot2.hitName] = colorPalette[idx] proteinHits.update([prot1,prot2]) # Draw the cluster gd_diagram = GenomeDiagram.Diagram(outname) featureHandles = {} for idx,cluster in enumerate([cluster1,cluster2]): offset = cluster.location[0] maxLen = max(maxLen,cluster.location[1]-offset) clusterName = "%s:%i-%i" % (cluster[0].species,cluster.location[0],cluster.location[1]) gd_track_for_features = gd_diagram.new_track(3-2*idx,name = clusterName, start=0, end=cluster.location[1]-offset, scale_ticks=0,scale=0) assert clusterName not in featureHandles featureHandles[clusterName] = gd_track_for_features.new_set() for dist,prot1,prot2 in pairs: color = colors.linearlyInterpolatedColor(colors.firebrick,colors.white, 0, 1, dist) border = colors.lightgrey coord1,direction1 = prot1.location coord2,direction2 = prot2.location offset1 = cluster1.location[0] offset2 = cluster2.location[0] coord1 = (x - offset1 for x in coord1) coord2 = (x - offset2 for x in coord2) F_x = featureHandles[cluster1Name].add_feature(SeqFeature(FeatureLocation(*coord1),strand=0),color=color,border=border) F_y = featureHandles[cluster2Name].add_feature(SeqFeature(FeatureLocation(*coord2),strand=0),color=color,border=border) gd_diagram.cross_track_links.append(CrossLink(F_x,F_y,color,border)) for name,cluster in zip([cluster1Name,cluster2Name],[cluster1,cluster2]): offset = cluster.location[0] for protein in cluster: coord,direction = protein.location coord = (x-offset for x in coord) if direction == '+': strand = +1 else: strand = -1 feature = SeqFeature(FeatureLocation(*coord),strand=strand) featureHandles[name].add_feature(feature,sigil="BIGARROW", color=colorDict.get(protein.hitName,noPair), name = protein.name,label_position="middle", label=protein in proteinHits,arrowshaft_height=1, arrowshaft_length = 0.1,label_strand = 1, label_size=8, label_angle=45) tracks = gd_diagram.get_tracks() for track in tracks: track.height=1 track.greytrack_fontcolor = colors.black track.greytrack_labels = 1 track.greytrack = 1 track.greytrack_fontsize=16 track.greytrack_font_rotation = 0 track.axis_labels = 0 gd_diagram.draw(format="linear", pagesize='A4', fragments=1, start=0, end=maxLen) gd_diagram.write(outname + ".svg", "SVG")
c = 0 m = 0 for feature in record.features: if feature.type != "CDS": #Exclude feature that is no CDS continue name = feature.qualifiers["protein_id"][ 0] #get protein id of feature if l == 0: #main gene cluster if name in main_blast: #feature in main that match with other color_value = m / n_main_blast #rank of feature in main blast hit #color range [red,orange,yellow,green,cyan,blue,purple,magenta,pink] if color_value < 0.125: color = colors.linearlyInterpolatedColor( colors.red, colors.orange, 0, 0.125, color_value) elif color_value < 0.25: color = colors.linearlyInterpolatedColor( colors.orange, colors.yellow, 0.125, 0.25, color_value) elif color_value < 0.375: color = colors.linearlyInterpolatedColor( colors.yellow, colors.green, 0.25, 0.375, color_value) elif color_value < 0.5: color = colors.linearlyInterpolatedColor( colors.green, colors.cyan, 0.375, 0.5, color_value) elif color_value < 0.625: color = colors.linearlyInterpolatedColor( colors.cyan, colors.blue, 0.5, 0.625, color_value) elif color_value < 0.75:
def crosslinks(fileName, GenBank_1, GenBank_2): gd_diagram = GenomeDiagram.Diagram(fileName) max_len = 0 #Open Files and create fasta files to be compared by Ublast A_rec = SeqIO.read(GenBank_1, 'gb') GB_file_name = GenBank_1 fasta_file_name_A = GB_file_name[:-3] + ".fasta" writeFasta(GB_file_name, fasta_file_name_A) B_rec = SeqIO.read(GenBank_2, 'gb') GB_file_name = GenBank_2 fasta_file_name_B = GB_file_name[:-3] + ".fasta" writeFasta(GB_file_name, fasta_file_name_B) #create the tab file with the Ublast output blastfile = ublastfeatures(fasta_file_name_A, fasta_file_name_B) Gname = 'nn' #name of gene to add #First section gets the crosslinks from the blast files A_vs_B = getCrossLinks(blastfile) #print ('(percent, Gene Query, Gene result)')#This prints the list of Blast results for reference for item in A_vs_B: print item # asks user for a gene name to highlight gene_search = raw_input( "would you like to highlight a specific gene name?\n \t1) Yes\n \t2) No\n" ) if gene_search == "1" or gene_search.lower() == "yes": gene_highlight = raw_input( "What is the name of the gene you would like to highlight?\n") print gene_highlight + " will be printed in red on the genome diagram, all other genes will be grey" C_colors = [yellow] * 1 + [orange] * 1 + [brown] * 1 + [ lightblue ] * 1 + [purple] * 1 + [green] * 1 + [grey] * 1 else: gene_highlight = "NONE" C_colors = [yellow] * 1 + [ orange ] * 1 + [brown] * 1 + [lightblue] * 1 + [purple] * 1 + [green] * 1 + [ grey ] * 1 #this creates an array of color for the arrows in the GUI i = 0 #index of random color to add geneColor = grey #color of gene. Grey= no name # Create new features for concatenations recs = ("A", "B") for rec in recs: if rec == "A": for loc_a in re.finditer('NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN', str(A_rec.seq)): concat_feature = SeqFeature(FeatureLocation(loc_a.start(), loc_a.start() + 35, strand=-1), id="Concat", type="CDS", qualifiers={'product': 'Concat'}) A_rec.features.append(concat_feature) else: for loc_b in re.finditer('NNNNNCACACACTTAATTAATTAAGTGTGTGNNNNN', str(B_rec.seq)): concat_feature = SeqFeature(FeatureLocation(loc_b.start(), loc_b.start() + 35, strand=-1), id="Concat", type="CDS", qualifiers={'product': 'Concat'}) B_rec.features.append(concat_feature) #Read in lists of gene names and types with open('Backbones_2_Clean.csv', 'r') as hand1: back_b = csv.reader(hand1) backbone = list(back_b) with open('AntibioticResistanceGenesClean.csv', 'r') as hand2: AnRe = csv.reader(hand2) An_Re = list(AnRe) #this loop adds each gene feature to the record with a color and name for record, gene_colors in zip([A_rec, B_rec], [C_colors, C_colors]): max_len = max(max_len, len(record)) gd_track_for_features = gd_diagram.new_track(1, name=record.name, greytrack=True, start=0, end=len(record)) gd_feature_set = gd_track_for_features.new_set() for feature in record.features: if feature.type != "CDS": #Exclude this feature continue ## Chose Colors of annotations based on gene name try: Gname = feature.qualifiers['product'][0] if Gname == gene_highlight: geneColor = red elif gene_highlight == "NONE": geneColor = gene_colors[i % 6] # Backbone genes elif Gname in backbone[0]: geneColor = blue #Antibiotic Resistance Genes elif Gname in An_Re[0]: geneColor = green #Transposease & Intergrase elif Gname == 'Tnp' or Gname == 'Int': geneColor = orange else: geneColor = grey except KeyError: #if no gene name make it grey Gname = 'No Name' geneColor = grey gd_feature_set.add_feature( feature, sigil="BIGARROW", #this adds gene features to gd_feature_set arrowhead_length=.25, color=geneColor, label=True, name=Gname, label_position="start", label_size=6, label_angle=45) i += 1 #increment i so that arrows will have a random color track_X = gd_diagram.tracks[2] track_Y = gd_diagram.tracks[1] #this loop adds the cross links so they point to their feature in the diagram for score, id_X, id_Y in A_vs_B: try: feature_X = get_feature(A_rec.features, id_X) feature_Y = get_feature(B_rec.features, id_Y) color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, score) link_xy = CrossLink( (track_X, feature_X.location.start, feature_X.location.end), (track_Y, feature_Y.location.start, feature_Y.location.end), color, colors.lightgrey) print "Link made " gd_diagram.cross_track_links.append(link_xy) except KeyError: print "Feature qualifier for crosslink not found" # for those pesky nameless genes gd_diagram.draw(format="linear", pagesize=(1200, 2400), fragments=1, start=0, end=max_len) print max_len gd_diagram.write(fileName + ".pdf", "PDF")
def draw_heat_graph(self, graph): """ draw_heat_graph(self, graph) -> [element, element,...] o graph Graph object Returns a list of drawable elements for the heat graph """ #print '\tdraw_heat_graph' # At each point contained in the graph data, we draw a box that is the # full height of the track, extending from the midpoint between the # previous and current data points to the midpoint between the current # and next data points heat_elements = [] # Holds drawable elements for the graph # Get graph data and information data_quartiles = graph.quartiles() minval, maxval = data_quartiles[0], data_quartiles[4] midval = (maxval + minval) / 2. # mid is the value at the X-axis btm, ctr, top = self.track_offsets[self.current_track_level] trackheight = (top - btm) #print self.start, self.end newdata = intermediate_points(self.start, self.end, graph[self.start:self.end]) #print newdata # Create elements on the graph, indicating a large positive value by # the graph's poscolor, and a large negative value by the graph's # negcolor attributes for pos0, pos1, val in newdata: fragment0, x0 = self.canvas_location(pos0) fragment1, x1 = self.canvas_location(pos1) x0, x1 = self.x0 + x0, self.x0 + x1 # account for margin #print 'x1 before:', x1 # Calculate the heat color, based on the differential between # the value and the median value heat = colors.linearlyInterpolatedColor(graph.poscolor, graph.negcolor, maxval, minval, val) # Draw heat box if fragment0 == fragment1: # Box is contiguous on one fragment if pos1 >= self.fragment_limits[fragment0][1]: x1 = self.xlim ttop = top + self.fragment_lines[fragment0][0] tbtm = btm + self.fragment_lines[fragment0][0] #print 'equal', pos0, pos1, val #print pos0, pos1, fragment0, fragment1 heat_elements.append( draw_box((x0, tbtm), (x1, ttop), color=heat, border=None)) else: # box is split over two or more fragments #if pos0 >= self.fragment_limits[fragment0][0]: # fragment0 += 1 fragment = fragment0 start = x0 while self.fragment_limits[fragment][1] <= pos1: #print pos0, self.fragment_limits[fragment][1], pos1 ttop = top + self.fragment_lines[fragment][0] tbtm = btm + self.fragment_lines[fragment][0] heat_elements.append( draw_box((start, tbtm), (self.xlim, ttop), color=heat, border=None)) fragment += 1 start = self.x0 ttop = top + self.fragment_lines[fragment][0] tbtm = btm + self.fragment_lines[fragment][0] # Add the last part of the bar #print 'x1 after:', x1, '\n' heat_elements.append( draw_box((self.x0, tbtm), (x1, ttop), color=heat, border=None)) return heat_elements
for index, connection in enumerate(links_handle): connection = connection.strip() values = connection.split("\t") phage1 = values[0] phage1Full = phageFullNameConvert[phage1] phage1_gene = int(values[1]) phage2 = values[2] phage2Full = phageFullNameConvert[phage2] phage2_gene = int(values[3]) score = float(values[4]) track1 = genomeDiag.tracks[phageTrack[phage1]] track2 = genomeDiag.tracks[phageTrack[phage2]] color = colors.linearlyInterpolatedColor(colors.white, colors.lightcoral, 0, 100, score) borderColor = None if phage1Full == "OCN008_K139_region.fa" or phage1Full == "RE98_web_2_ep3.fa" or phage1Full == "RE98_web_1_kappa.fa": link_xy = CrossLink((track1, phageDict[phage1Full][phage1_gene]["start"], phageDict[phage1Full][phage1_gene]["stop"]), (track2, phageDict[phage2Full][phage2_gene]["start"], phageDict[phage2Full][phage2_gene]["stop"]), color=color, flip=True, border=borderColor) else: link_xy = CrossLink((track1, phageDict[phage1Full][phage1_gene]["start"], phageDict[phage1Full][phage1_gene]["stop"]), (track2, phageDict[phage2Full][phage2_gene]["start"], phageDict[phage2Full][phage2_gene]["stop"]), color=color, flip=False, border=borderColor) # add link features to first track BoxFeatureTrack1 = SeqFeature(FeatureLocation(phageDict[phage1Full][phage1_gene]["start"], phageDict[phage1Full][phage1_gene]["stop"], strand=0)) borderBoxColor = colors.white genomeSet = track1.new_set() genomeSet.add_feature(BoxFeatureTrack1, label=False, label_position="start", sigil="BOX", color=color, border=borderBoxColor)
# add crosslink from blast results blast_results = open(path_to_blast_result) # parse blast results for each_line in blast_results: each_line_split = each_line.split('\t') query = each_line_split[0] target = each_line_split[1] identity = float(each_line_split[2]) query_start = int(each_line_split[6]) query_end = int(each_line_split[7]) target_start = int(each_line_split[8]) target_end = int(each_line_split[9]) # use color to reflect identity color = colors.linearlyInterpolatedColor(colors.white, colors.red, 50, 100, identity) # determine which is which (query/target to contig_1/contig_2) # if query is contig_1 if query == gene1_contig.name: link = CrossLink( (contig_1_gene_content_track, query_start, query_end), (contig_2_gene_content_track, target_start, target_end), color=color, border=color, flip=False) diagram.cross_track_links.append(link) # if query is contig_2 elif query == gene2_contig.name: link = CrossLink(
def GenomeMap(file, GenomeId, grid=10000, cross=True): # print(GenomeId) gd_diagram = GenomeDiagram.Diagram('phages') with open(file, 'r') as f: reader = csv.reader(f) data = list(reader) records = [] ref = {} for Id in GenomeId: try: record = SeqIO.read(Id + ".gb", "genbank") except FileNotFoundError or IOError or ValueError: hd = Entrez.efetch(db="nucleotide", id=Id, rettype='gb', retmode="text") record = SeqIO.read(hd, 'genbank') fw = open(Id + '.gb', 'w') SeqIO.write(record, fw, 'genbank') fw.close() os.getcwd() for i in SeqIO.parse(Id + ".gb", "genbank"): ref[Id] = i.annotations['keywords'] records.append(record) feature_sets = {} max_len = 0 for i, record in enumerate(records): max_len = max(max_len, len(record)) gd_track_for_features = gd_diagram.new_track( 5 - 2 * i, name=record.description, greytrack=True, greytrack_fontsize=16, greytrack_labels=1, largetick=True, smalltick=True, scale_ticks=True, scale_largeticks=0.5, scale_smallticks=0.1, scale_largetick_interval=grid, scale_smalltick_interval=grid / 20, scale_largetick_labels=True, start=0, end=len(record), ) assert record.name not in feature_sets feature_sets[record.id] = gd_track_for_features.new_set() for crosslink in data: if not cross: break set_X = feature_sets[crosslink[0].split(' ')[0]] set_Y = feature_sets[crosslink[1].split(' ')[0]] # 手动划分连接类型时使用 # score = 100 # try: # if crosslink[7] == 1 or crosslink[7] == -1: # score = 100 # except TypeError: # score = 50 if crosslink[0].split(' ')[0] in CLASS1 and crosslink[1].split( ' ')[0] in CLASS1: color = colors.linearlyInterpolatedColor( colors.green, colors.yellow, 0, len(GenomeId), GenomeId.index(crosslink[1].split(' ')[0])) elif crosslink[0].split(' ')[0] in CLASS2 and crosslink[1].split( ' ')[0] in CLASS2: color = colors.linearlyInterpolatedColor( colors.purple, colors.red, 0, len(GenomeId), GenomeId.index(crosslink[1].split(' ')[0])) else: color = colors.linearlyInterpolatedColor( colors.blue, colors.cyan, 0, len(GenomeId), GenomeId.index(crosslink[1].split(' ')[0])) # color = list(colors.getAllNamedColors().keys())[GenomeId.index(crosslink[1].split(' ')[0]) * 17 + 17 % 163] F_x = set_X.add_feature( SeqFeature( FeatureLocation(int(crosslink[2]), int(crosslink[3]), strand=0)), color=color, border=color, ) F_y = set_Y.add_feature( SeqFeature( FeatureLocation(int(crosslink[4]), int(crosslink[5]), strand=0)), color=color, border=color, ) link_xy = CrossLink(F_x, F_y, color, color) gd_diagram.cross_track_links.append(link_xy) for record in records: gd_feature_set = feature_sets[record.id] # 矫正ori for feature in record.features: if feature.type == 'rep_origin': print(record.description + ' 的起始位点在:' + str(feature.location.start)) record = record[feature.location. start:] + record[:feature.location.start] if record.features[0].strand == -1: print('daole') record = record.reverse_complement(id=True, name=True, description=True, features=True, annotations=True, letter_annotations=True) break # 务必绘制反向互补序列时手动开启 # record = record.reverse_complement(id=True, name=True, description=True, features=True, # annotations=True, letter_annotations=True) print(record.description + ' 的起始位点已校正') # 画features i = 0 if ref[record.id] != ['']: for feature in record.features: if feature.type != "gene": continue color = list(colors.getAllNamedColors().keys())[len(feature) % 163] gd_feature_set.add_feature(feature, color=color, label=True, label_size=10, label_angle=90, sigil="ARROW", arrowshaft_height=1.0, arrowhead_length=0.1) i += 1 elif ref[record.id] == ['']: for feature in record.features: if feature.type != "CDS": continue color = list(colors.getAllNamedColors().keys())[len(feature) % 163] gd_feature_set.add_feature(feature, color=color, label=True, label_size=10, label_angle=90, sigil="ARROW", arrowshaft_height=1.0, arrowhead_length=0.2) i += 1 # 用来手动添加重组位点 # for pos in recombinations: # if pos in record.features: # gd_feature_set.add_feature(feature, color=color, label=True, label_size=10, label_angle=90, # sigil="ARROW", arrowshaft_height=1.0, arrowhead_length=0.1) if not cross: # 用来绘制单一序列 gd_diagram.draw(format="linear", pagesize='A4', fragments=5, start=0, end=max_len, fragment_size=1) gd_diagram.write("T7.pdf", "PDF") else: # 用来绘制比对序列 gd_diagram.draw(format="linear", pagesize=(10 * len(GenomeId) * cm, 120 * cm), fragments=1, start=0, end=max_len, fragment_size=1) gd_diagram.write(output, "PDF") print("已输出为PDF")
def draw_alignment(self, _gdd, query_id, query_length, blast_hits): # draw reference gdt_features = _gdd.new_track(1, greytrack=False, start=1, end=self.ref_length) gds_features = gdt_features.new_set() max_length = 0 for annot in self.annotation: if annot[-1] == '1': strand = 1 elif annot[-1] == '-1': strand = -1 else: strand = None start, end = int(annot[0]), int(annot[1]) if end > max_length: max_length = end feature = SeqFeature(FeatureLocation(start, end), strand=strand, id=annot[2]) gds_features.add_feature(feature, label=True, name=annot[2], sigil='BOX', label_size=14, label_angle=0, arrowhead_length=0.1, arrowshaft_height=1) gds_features.add_feature(SeqFeature( FeatureLocation(self.start, self.start + 1)), label=True, name='PCR start', label_size=14, color='black') _gdd.draw(format='linear', fragments=1, start=1, end=max_length) # draw query sequence gdt_features = _gdd.new_track(1, greytrack=True, start=0, end=query_length, name=query_id) gds_features = gdt_features.new_set() for r in blast_hits: feature = SeqFeature(FeatureLocation(r[0], r[1]), strand=r[-1]) gds_features.add_feature(feature, label=False, sigil='BOX') gdt_features.greytrack_fontcolor = colors.black gdt_features.greytrack_fontsize = 12 # draw cross link for r in blast_hits: alpha = self.scale_color(r[4]) color = colors.linearlyInterpolatedColor(colors.white, colors.firebrick, 0, 100, alpha) feature = SeqFeature(FeatureLocation(r[0], r[1]), strand=r[-1]) gds_features.add_feature(feature, label=False, sigil='BOX') if max(r[2], r[3]) < self.start: continue link_xy = CrossLink((_gdd.tracks[1], r[0], r[1]), (_gdd.tracks[2], r[2], r[3]), color) _gdd.cross_track_links.append(link_xy) _gdd.draw(format='linear', fragments=1)
def draw_heat_graph(self, graph): """ draw_heat_graph(self, graph) -> [element, element,...] o graph Graph object Returns a list of drawable elements for the heat graph """ #print '\tdraw_heat_graph' # At each point contained in the graph data, we draw a box that is the # full height of the track, extending from the midpoint between the # previous and current data points to the midpoint between the current # and next data points heat_elements = [] # Holds drawable elements for the graph # Get graph data and information data_quartiles = graph.quartiles() minval, maxval = data_quartiles[0],data_quartiles[4] midval = (maxval + minval)/2. # mid is the value at the X-axis btm, ctr, top = self.track_offsets[self.current_track_level] trackheight = (top-btm) #print self.start, self.end newdata = intermediate_points(self.start, self.end, graph[self.start:self.end]) #print newdata # Create elements on the graph, indicating a large positive value by # the graph's poscolor, and a large negative value by the graph's # negcolor attributes for pos0, pos1, val in newdata: fragment0, x0 = self.canvas_location(pos0) fragment1, x1 = self.canvas_location(pos1) x0, x1 = self.x0 + x0, self.x0 + x1 # account for margin #print 'x1 before:', x1 # Calculate the heat color, based on the differential between # the value and the median value heat = colors.linearlyInterpolatedColor(graph.poscolor, graph.negcolor, maxval, minval, val) # Draw heat box if fragment0 == fragment1: # Box is contiguous on one fragment if pos1 >= self.fragment_limits[fragment0][1]: x1 = self.xlim ttop = top + self.fragment_lines[fragment0][0] tbtm = btm + self.fragment_lines[fragment0][0] #print 'equal', pos0, pos1, val #print pos0, pos1, fragment0, fragment1 heat_elements.append(draw_box((x0, tbtm), (x1, ttop), color=heat, border=None)) else: # box is split over two or more fragments #if pos0 >= self.fragment_limits[fragment0][0]: # fragment0 += 1 fragment = fragment0 start = x0 while self.fragment_limits[fragment][1] <= pos1: #print pos0, self.fragment_limits[fragment][1], pos1 ttop = top + self.fragment_lines[fragment][0] tbtm = btm + self.fragment_lines[fragment][0] heat_elements.append(draw_box((start, tbtm), (self.xlim, ttop), color=heat, border=None)) fragment += 1 start = self.x0 ttop = top + self.fragment_lines[fragment][0] tbtm = btm + self.fragment_lines[fragment][0] # Add the last part of the bar #print 'x1 after:', x1, '\n' heat_elements.append(draw_box((self.x0, tbtm), (x1, ttop), color=heat, border=None)) return heat_elements