def plot_fingerprint(data, alpha=0.7, \ show_legend=True, graph_name='fingerprint.png', has_mean=True, which_blocks='quartets', multiple=False, graph_grid='t', prob_axes=True, \ edge_colors='k', **kwargs): """Outputs a bubble plot of four-codon amino acid blocks labeled with the colors from Sueoka 2002. takes: data: array-elements in the col order x, y, r of each of the four codon Amino Acids in the row order: ALA, ARG4, GLY, LEU4, PRO, SER, THR, VAL (for traditional fingerprint), or: UU -> GG (for 16-block fingerprint). last row is the mean (if has_mean is set True) **kwargs passed on to init_graph_display (these include graph_shape, graph_grid, x_label, y_label, dark, with_parens). title: will be printed on graph (default: 'Unknown Species') num_genes (number of genes contributing to graph: default None) NOTE: will not print if None.) size: of graph in inches (default = 8.0) alpha: transparency of bubbles (ranges from 0, transparent, to 1, opaque; default 0.7) show_legend: bool, default True, whether to print legend graph_name: name of file to write (default 'fingerprint.png') has_mean: whether the data contain the mean (default: True) which_blocks: which codon blocks to print (default is 'quartets' for the 4-codon amino acid blocks, but can also use 'all' for all quartets or 'split' for just the split quartets.) multiple: if False (the default), assumes it got a single block of data. Otherwise, assumes multiple blocks of data in a list or array. edge_colors: if multiple is True (ignored otherwise), uses this sequence of edge color strings to hand out edge colors to successive series. Will iterate over this, so can be a string of 1-letter color codes or a list of color names. note: that the data are always expected to be in the range (0,1) since we're plotting frequencies. axes, gid, etc. are hard-coded to these values. """ #figure out which type of fingerprint plot we're doing, and get the #right colors if which_blocks == 'quartets': blocks = CodonUsage.SingleAABlocks elif which_blocks == 'split': blocks = CodonUsage.SplitBlocks else: blocks = CodonUsage.Blocks colors = [doublets_to_colors[i] for i in blocks] #formatting the labels in latex x_label="$G_3/(G_3+C_3)$" y_label="$A_3/(A_3+T_3)$" #initializing components of the graph font,label_font_size=init_graph_display(graph_shape='sqr', \ graph_grid=graph_grid, x_label=x_label, \ y_label=y_label, prob_axes=prob_axes, **kwargs) if not multiple: data = [data] alpha = broadcast(alpha, len(data)) edge_colors = broadcast(edge_colors, len(data)) for al, d, edge_color in zip(alpha, data, edge_colors): #skip this series if no data if d is None or not d.any(): continue for i, color in enumerate(colors): j = i+1 #note: doing these as slices because scatter_classic needs the #extra level of nesting patches = scatter_classic(d[i:j,0], d[i:j,1], s=(d[i:j,2]/2), c=color) #set alpha for the patches manually for p in patches: p.set_alpha(al) p.set_edgecolor(edge_color) #plot mean as its own point -- can't do cross with scatter if has_mean: mean_index = len(blocks) #next index after the blocks plot([d[mean_index,0]], [d[mean_index,1]], '-k+',markersize=label_font_size, alpha=al) abbrev = CodonUsage.BlockAbbreviations a = gca() #if show_legend is True prints a legend in the right center area if show_legend: legend_key = [abbrev[b] for b in blocks] #copy legend font properties from the x axis tick labels legend_font_props = \ a.xaxis.get_label().get_fontproperties().copy() legend_font_scale_factor = 0.7 curr_size = legend_font_props.get_size() legend_font_props.set_size(curr_size*legend_font_scale_factor) l = figlegend(a.patches[:len(blocks)], legend_key, prop=legend_font_props, loc='center right',borderpad=0.1,labelspacing=0.5, handlelength=1.0,handletextpad=0.5, borderaxespad=0.0) #fix transparency of patches for p in l.get_patches(): p.set_alpha(1) #initialize the ticks set_axis_to_probs() init_ticks(a, label_font_size) a.set_xticks([0, 0.5, 1]) a.set_yticks([0,0.5,1]) #output the figure if graph_name is not None: savefig(graph_name)
def plot_pr2_bias(data, title='ALANINE', graph_name='pr2_bias.png', \ num_genes='ignored', **kwargs): """Outputs a PR2-Bias plot of: -isotypic transversions (base swapping) with G3/(G3+C3) and A3/(A3+T3) -Transitions (deaminations) with G3/(G3+A3) and C3/(C3+T3) -Allotypic transversions (G- oxidations) with G3/(G3+T3) and C3/(C3+A3) takes: an array in the order: x,G3/(G3+C3),A3/(A3+T3), G3/(G3/A3),C3/(C3+T3),G3/(G3+T3),C3/(C3+A3) label: default 'ALANINE' one amino acid written out in caps: ALANINE, ARGININE4, GLYCINE, LEUCINE4, PROLINE, SERINE4, THREONINE, VALINE from one of the amino acids program will add acronym C2 type: ala(GCN), pro(CCN), ser4(TCN), thr(ACN) G2 type: arg4 (CGN), an gly(GGN) T2 type: leu4(CTN), val (GTN) size: of graph in inches (default = 8.0) graph_name: default 'pr2_bias.png' num_genes: number of genes contributing to graph, currently ignored. """ #we can't put anything in the top right, so print num_genes after the title #if it was supplied #initializes the graph display and font font,label_font_size=init_graph_display(graph_shape='sqr', \ graph_grid='/', x_label="$P_3$", y_label="Y axis", prob_axes=True, \ title=title, **kwargs) #sets the marker_size relative to the font and thus the graph size marker_size = (label_font_size-1) #plots the pr2bias in order G3/(G3+C3),A3/(A3+T3), # G3/(G3/A3),C3/(C3+T3), # G3/(G3+T3),C3/(C3+A3) #colors and symbols coded from Sueoka 2002 plot(data[:,0], data[:,1], '-ko', c='k', markersize=marker_size) plot(data[:,0], data[:,2], '-kv', c='k', markersize=marker_size) plot(data[:,0], data[:,3], '-ro', c='r', markersize=marker_size) plot(data[:,0], data[:,4], '-rv', c='r', markersize=marker_size) plot(data[:,0], data[:,5], '-wo', c='k', mfc='w', markersize=marker_size) plot(data[:,0], data[:,6], '-wv', c='k', mfc='w', markersize=marker_size) #aaLabel based on the amino acid that is graphed #C2 type: ala(GCN), pro(CCN), ser4(TCN), thr(ACN) #G2 type: arg4 (CGN), an gly(GGN) #T2 type: leu4(CTN), val (GTN) (Sueoka 2002) text(.95, .05, aa_labels[title], font, verticalalignment='bottom', horizontalalignment='right') #output the figure set_axis_to_probs() if graph_name is not None: savefig(graph_name)