def __init__(self, seq1, seq2, **kw): if not isinstance(seq1, Display): seq1 = Display(seq1, **kw) if not isinstance(seq2, Display): seq2 = Display(seq2, **kw) self.seq1 = seq1.base self.seq1d = seq1 self.seq2 = seq2.base self.seq2d = seq2 self._cache = {} # Check inputs are sufficiently sequence-like assert len(self.seq1) == len(str(self.seq1)) assert len(self.seq2) == len(str(self.seq2))
def populateAxes(self, ax, columns=3): """ Returns the legend as a matplotlib artist Arguments: - columns: the number of columns of feature / representation pairs """ ax.set_xlim(0, 600) ax.set_ylim(-800, 50) result = [] x = y = 0 for track in self.policy()._makeTrackDefns(): if track.tag is None or track.tag == "Graphs": continue ax.text(10, y * 30, track.tag) y -= 1 for feature in track: seq = self._makeSampleSequence(feature) display = Display( seq, policy=self.policy, min_feature_height=10, show_code=False, pad=0, ) sample = display.makeArtist() #trans = sample.get_transform() #offset = Affine2D() #offset.translate(x*600+20 / columns, y*30) sample.translate(x * 600 / columns + 10, y * 30) ax.add_artist(sample) ax.text(x * 600 / columns + 90, y * 30, feature) x += 1 if x % columns == 0: x = 0 y -= 1 if x: x = 0 y -= 1 ax.axhline((y + .7) * 30)
def populateAxes(self, ax, columns = 3): """ Returns the legend as a matplotlib artist Arguments: - columns: the number of columns of feature / representation pairs """ ax.set_xlim(0, 600) ax.set_ylim(-800, 50) result = [] x = y = 0 for track in self.policy()._makeTrackDefns(): if track.tag is None or track.tag=="Graphs": continue ax.text(10, y*30, track.tag) y -= 1 for feature in track: seq = self._makeSampleSequence(feature) display = Display(seq, policy = self.policy, min_feature_height = 10, show_code = False, pad = 0,) sample = display.makeArtist() #trans = sample.get_transform() #offset = Affine2D() #offset.translate(x*600+20 / columns, y*30) sample.translate(x*600/columns+10, y*30) ax.add_artist(sample) ax.text(x*600/columns+90, y*30, feature) x += 1 if x % columns == 0: x = 0 y -= 1 if x: x = 0 y -= 1 ax.axhline((y+.7)*30)
def partimatrix(alignment, display=False, samples=0, s_limit=0, title="", include_incomplete=False, print_stats=True, max_site_labels=50): if print_stats: print "%s sequences in %s bp alignment" % ( alignment.getNumSeqs(), len(alignment)) (sites, columns, partitions) = binary_partitions(alignment) if print_stats: print "%s unique binary partitions from %s informative sites" % ( len(partitions), len(sites)) partpart = min_edges(partitions) # [partition,partition] partimatrix = partpart[columns,:] # [site, partition] sitematrix = partimatrix[:,columns] # [site, site] # RETICULATE, JE 1996 compatiblity = sitematrix <= 2 if print_stats: print "Overall compatibility %.6f" % intra_region_average(compatiblity) if samples == 0: print "Neighbour similarity score = %.6f" % \ neighbour_similarity_score(compatiblity) else: print "Neighbour similarity = %.6f, avg random = %.6f, p < %s" % \ nss_significance(compatiblity, samples=samples) # PARTIMATRIX, JWE 1997 # Remove the incomplete partitions with gaps or other ambiguities mask = 2**alignment.getNumSeqs()-1 complete = [i for (i,(x, xz)) in enumerate(partitions) if xz==mask] if not include_incomplete: partimatrix = partimatrix[:,complete] partitions = [partitions[i] for i in complete] # For scoring/ordering purposes, also remove the incomplete sequences complete_columns = [i for (i,c) in enumerate(columns) if c in complete] scoreable_partimatrix = partimatrix[complete_columns, :] # Order partitions by increasing conflict score conflict = (scoreable_partimatrix > 2).sum(axis=0) conflict_order = numpy.argsort(conflict) partimatrix = partimatrix[:, conflict_order] partitions = [partitions[i] for i in conflict_order] scoreable_partimatrix = partimatrix[complete_columns, :] support = (scoreable_partimatrix == 0).sum(axis=0) consist = (scoreable_partimatrix <= 2).sum(axis=0) conflict = (scoreable_partimatrix > 2).sum(axis=0) # Similarity measure between partitions O = boolean_similarity(scoreable_partimatrix <= 2) s = 1.0*len(complete_columns) O = O.astype(float) / s p,q = consist/s, conflict/s E = numpy.outer(p,p) + numpy.outer(q,q) S = (O-E)/numpy.sqrt(E*(1-E)/s) # Order partitions for better visual grouping if "order_by_conflict": order = order_tied_to_cluster_similar(S, conflict) else: order = order_to_cluster_similar(S) half = len(order) // 2 if sum(conflict[order[:half]]) > sum(conflict[order[half:]]): order.reverse() partimatrix = partimatrix[:, order] conflict = conflict[order] support = support[order] partitions = [partitions[i] for i in order] if display: figwidth = 8.0 (c_size, p_size) = partimatrix.shape s_size = num_seqs = alignment.getNumSeqs() # Layout (including figure height) chosen to get aspect ratio of # 1.0 for the compatibility matrix, and if possible the other # matrices. if s_size > s_limit: # too many species to show s_size = 0 else: # distort squares to give enough space for species names extra = max(1.0, (12/80)/(figwidth/(c_size + p_size))) p_size *= numpy.sqrt(extra) s_size *= extra genemap = Display(alignment, recursive=s_size>0, colour_sequences=False, draw_bases=False) annot_width = max(genemap.height / 80, 0.1) figwidth = max(figwidth, figwidth/2 + annot_width) bar_height = 0.5 link_width = 0.3 x_margin = 0.60 y_margin = 0.35 xpad = 0.05 ypad = 0.2 (x, y) = (c_size + p_size, c_size + s_size) x_scale = y_scale = (figwidth-2*x_margin-xpad-link_width-annot_width)/x figheight = y_scale * y + 2*y_margin + 2*ypad + bar_height x_scale /= figwidth y_scale /= figheight x_margin /= figwidth y_margin /= figheight xpad /= figwidth ypad /= figheight bar_height /= figheight link_width /= figwidth annot_width /= figwidth (c_width, c_height) = (c_size*x_scale, c_size*y_scale) (p_width, s_height) = (p_size*x_scale, s_size*y_scale) vert = (x_margin + xpad + c_width) top = (y_margin + c_height + ypad) fig = plt.figure(figsize=(figwidth,figheight)) kw = dict(axisbg=fig.get_facecolor()) axC = fig.add_axes([x_margin, y_margin, c_width, c_height], **kw) axP = fig.add_axes([vert, y_margin, p_width, c_height], sharey=axC, **kw) axS = fig.add_axes([vert, top, p_width, s_height or .001], sharex=axP, **kw) axB = fig.add_axes([vert, top+ypad+s_height, p_width, bar_height], sharex=axP, **kw) axZ = fig.add_axes([vert+p_width, y_margin, link_width, c_height], frameon=False) axA = genemap.asAxes( fig, [vert+p_width+link_width, y_margin, annot_width, c_height], vertical=True, labeled=True) axP.yaxis.set_visible(False) #for ax in [axC, axP, axS]: #ax.set_aspect(adjustable='box', aspect='equal') fig.text(x_margin+c_width/2, .995, title, ha='center', va='top') if not s_size: axS.set_visible(False) # No ticks for these non-float dimensions for axes in [axB, axC, axS, axP]: for axis in [axes.xaxis, axes.yaxis]: for tick in axis.get_major_ticks(): tick.gridOn = False tick.tick1On = False tick.tick2On = False tick.label1.set_size(8) tick.label2.set_size(8) if axis is axes.xaxis: tick.label1.set_rotation('vertical') # Partition dimension for axis in [axS.xaxis, axP.xaxis, axB.xaxis, axB.yaxis]: axis.set_major_formatter(matplotlib.ticker.NullFormatter()) axis.set_minor_formatter(matplotlib.ticker.NullFormatter()) # Site dimension if c_size > max_site_labels: for axis in [axC.yaxis, axC.xaxis]: axis.set_visible(False) else: isl = integer_tick_label(sites) for axis in [axC.yaxis, axC.xaxis]: axis.set_minor_locator(matplotlib.ticker.IndexLocator(1,0)) axis.set_minor_formatter(matplotlib.ticker.NullFormatter()) axis.set_major_locator(matplotlib.ticker.IndexLocator(1,0.5)) axis.set_major_formatter(matplotlib.ticker.FuncFormatter(isl)) # Species dimension if s_size: seq_names = [name.split(' ')[0] for name in alignment.getSeqNames()] axS.yaxis.set_minor_locator(matplotlib.ticker.IndexLocator(1,0)) axS.yaxis.set_minor_formatter(matplotlib.ticker.NullFormatter()) axS.yaxis.set_major_locator(matplotlib.ticker.IndexLocator(1,0.5)) axS.yaxis.set_major_formatter(matplotlib.ticker.FixedFormatter(seq_names)) #axS.yaxis.grid(False) #, 'minor') # Display the main matrices: compatibility and partimatrix axC.pcolorfast(compatiblity, cmap=plt.cm.gray) partishow = partimatrix <= 2 axP.pcolorfast(partishow, cmap=plt.cm.gray) axP.set_autoscale_on(False) axC.plot([0,c_size], [0, c_size], color='lightgreen') (sx, sy) = numpy.nonzero(partimatrix.T==0) axP.scatter(sx+0.5, sy+0.5, color='lightgreen', marker='^', s=15) # Make [partition, sequence] matrix # Not a good idea with too many sequences if s_size: partseq1 = numpy.empty([len(partitions), num_seqs], bool) partseq2 = numpy.empty([len(partitions), num_seqs], bool) for (i, (x, xz)) in enumerate(partitions): partseq1[i] = bit_decode(x, num_seqs) partseq2[i] = bit_decode(xz^x, num_seqs) # Order sequqnces so as to place similar sequences adjacent O = boolean_similarity(partseq1) order = order_to_cluster_similar(O) partseq1 = partseq1[:,order] partseq2 = partseq2[:,order] seq_names = [seq_names[i] for i in order] axS.set_ylim(0, len(seq_names)) axS.set_autoscale_on(False) for (halfpart,color) in [(partseq1, 'red'),(partseq2, 'blue')]: (sx, sy) = numpy.nonzero(halfpart) axS.scatter(sx+0.5, sy+0.5, color=color, marker='o') axS.grid(False) #axS.yaxis.tick_right() #axS.yaxis.set_label_position('right') # Bar chart of partition support and conflict scores #axB.set_autoscalex_on(False) if conflict.sum(): axB.bar(numpy.arange(len(partitions)), -conflict/conflict.sum(), 1.0, color='black', align='edge') if support.sum(): axB.bar(numpy.arange(len(partitions)), +support/support.sum(), 1.0, color='lightgreen', align='edge') axB.set_xlim(0.0, len(partitions)) # Alignment features axA.set_ylim(0, len(alignment)) axA.set_autoscale_on(False) axA.yaxis.set_major_formatter( matplotlib.ticker.FuncFormatter(lambda y,pos:str(int(y)))) axA.yaxis.tick_right() axA.yaxis.set_label_position('right') axA.xaxis.tick_top() axA.xaxis.set_label_position('top') #axA.xaxis.set_visible(False) # "Zoom lines" linking informative-site coords to alignment coords from matplotlib.patches import PathPatch from matplotlib.path import Path axZ.set_xlim(0.0,1.0) axZ.set_xticks([]) axZ.set_ylim(0, len(alignment)) axZ.set_yticks([]) zoom = len(alignment) / len(sites) vertices = [] for (i,p) in enumerate(sites): vertices.extend([(.1, (i+0.5)*zoom), (.9,p+0.5)]) axA.axhspan(p, p+1, facecolor='green', edgecolor='green', alpha=0.3) ops = [Path.MOVETO, Path.LINETO] * (len(vertices)//2) path = Path(vertices, ops) axZ.add_patch(PathPatch(path, fill=False, linewidth=0.25)) # interactive navigation messes up axZ. Could use callbacks but # probably not worth the extra complexity. for ax in [axC, axP, axS, axB, axZ, axA]: ax.set_navigate(False) return fig