Example #1
0
def plot_fingerprint(data, alpha=0.7, \
    show_legend=True, graph_name='fingerprint.png', has_mean=True,
    which_blocks='quartets', multiple=False, graph_grid='t', prob_axes=True, \
    edge_colors='k', **kwargs):
    """Outputs a bubble plot of four-codon amino acid blocks
    labeled with the colors from Sueoka 2002.

    takes: data:  array-elements in the col order x, y, r of
           each of the four codon Amino Acids in the row order:
           ALA, ARG4, GLY, LEU4, PRO, SER, THR, VAL
           (for traditional fingerprint), or:
           UU -> GG (for 16-block fingerprint).
           last row is the mean (if has_mean is set True)

        **kwargs passed on to init_graph_display (these include 
        graph_shape, graph_grid, x_label, y_label, dark, with_parens).
                 
           title: will be printed on graph (default: 'Unknown Species')
           
           num_genes (number of genes contributing to graph: default None)
           NOTE: will not print if None.)
        
           size: of graph in inches (default = 8.0)

           alpha: transparency of bubbles
           (ranges from 0, transparent, to 1, opaque; default 0.7)
           
           show_legend: bool, default True, whether to print legend

           graph_name: name of file to write (default 'fingerprint.png')

           has_mean: whether the data contain the mean (default: True)

           which_blocks: which codon blocks to print (default is 'quartets'
           for the 4-codon amino acid blocks, but can also use 'all' for all 
           quartets or 'split' for just the split quartets.)

           multiple: if False (the default), assumes it got a single block
           of data. Otherwise, assumes multiple blocks of data in a list or
           array.

           edge_colors: if multiple is True (ignored otherwise), uses this
           sequence of edge color strings to hand out edge colors to successive
           series. Will iterate over this, so can be a string of 1-letter
           color codes or a list of color names.

    note: that the data are always expected to be in the range (0,1)
    since we're plotting frequencies. axes, gid, etc. are hard-coded
    to these values. 
    """
    #figure out which type of fingerprint plot we're doing, and get the
    #right colors
    if which_blocks == 'quartets':
        blocks = CodonUsage.SingleAABlocks
    elif which_blocks == 'split':
        blocks = CodonUsage.SplitBlocks
    else:
        blocks = CodonUsage.Blocks

    colors = [doublets_to_colors[i] for i in blocks]
      
    #formatting the labels in latex
    x_label="$G_3/(G_3+C_3)$"
    y_label="$A_3/(A_3+T_3)$"

    #initializing components of the graph
    font,label_font_size=init_graph_display(graph_shape='sqr', \
        graph_grid=graph_grid, x_label=x_label, \
        y_label=y_label, prob_axes=prob_axes, **kwargs)

    if not multiple:
        data = [data]
 
    alpha = broadcast(alpha, len(data))
    edge_colors = broadcast(edge_colors, len(data))
  
    for al, d, edge_color in zip(alpha, data, edge_colors):
        #skip this series if no data
        if d is None or not d.any():
            continue
        for i, color in enumerate(colors):
            j = i+1
            #note: doing these as slices because scatter_classic needs the
            #extra level of nesting
            patches = scatter_classic(d[i:j,0], d[i:j,1],
                        s=(d[i:j,2]/2), c=color)
            #set alpha for the patches manually
            for p in patches:
                p.set_alpha(al)
                p.set_edgecolor(edge_color)
        
        #plot mean as its own point -- can't do cross with scatter
        if has_mean:
            mean_index = len(blocks)    #next index after the blocks
            plot([d[mean_index,0]], [d[mean_index,1]],
                 '-k+',markersize=label_font_size, alpha=al)
               

    abbrev = CodonUsage.BlockAbbreviations

    a = gca()
    #if show_legend is True prints a legend in the right center area
    if show_legend:
        legend_key = [abbrev[b] for b in blocks]
        #copy legend font properties from the x axis tick labels
        legend_font_props = \
            a.xaxis.get_label().get_fontproperties().copy()
        legend_font_scale_factor = 0.7
        curr_size = legend_font_props.get_size()
        legend_font_props.set_size(curr_size*legend_font_scale_factor)
        l = figlegend(a.patches[:len(blocks)],
                  legend_key,
                  prop=legend_font_props,
                  loc='center right',borderpad=0.1,labelspacing=0.5,
                  handlelength=1.0,handletextpad=0.5, borderaxespad=0.0)
        #fix transparency of patches
        for p in l.get_patches():
            p.set_alpha(1)

    #initialize the ticks
    set_axis_to_probs()
    init_ticks(a, label_font_size)
    a.set_xticks([0, 0.5, 1])
    a.set_yticks([0,0.5,1])
    
    #output the figure
    if graph_name is not None:
        savefig(graph_name)
Example #2
0
def plot_fingerprint(data, alpha=0.7, \
    show_legend=True, graph_name='fingerprint.png', has_mean=True,
    which_blocks='quartets', multiple=False, graph_grid='t', prob_axes=True, \
    edge_colors='k', **kwargs):
    """Outputs a bubble plot of four-codon amino acid blocks
    labeled with the colors from Sueoka 2002.

    takes: data:  array-elements in the col order x, y, r of
           each of the four codon Amino Acids in the row order:
           ALA, ARG4, GLY, LEU4, PRO, SER, THR, VAL
           (for traditional fingerprint), or:
           UU -> GG (for 16-block fingerprint).
           last row is the mean (if has_mean is set True)

        **kwargs passed on to init_graph_display (these include 
        graph_shape, graph_grid, x_label, y_label, dark, with_parens).
                 
           title: will be printed on graph (default: 'Unknown Species')
           
           num_genes (number of genes contributing to graph: default None)
           NOTE: will not print if None.)
        
           size: of graph in inches (default = 8.0)

           alpha: transparency of bubbles
           (ranges from 0, transparent, to 1, opaque; default 0.7)
           
           show_legend: bool, default True, whether to print legend

           graph_name: name of file to write (default 'fingerprint.png')

           has_mean: whether the data contain the mean (default: True)

           which_blocks: which codon blocks to print (default is 'quartets'
           for the 4-codon amino acid blocks, but can also use 'all' for all 
           quartets or 'split' for just the split quartets.)

           multiple: if False (the default), assumes it got a single block
           of data. Otherwise, assumes multiple blocks of data in a list or
           array.

           edge_colors: if multiple is True (ignored otherwise), uses this
           sequence of edge color strings to hand out edge colors to successive
           series. Will iterate over this, so can be a string of 1-letter
           color codes or a list of color names.

    note: that the data are always expected to be in the range (0,1)
    since we're plotting frequencies. axes, gid, etc. are hard-coded
    to these values. 
    """
    #figure out which type of fingerprint plot we're doing, and get the
    #right colors
    if which_blocks == 'quartets':
        blocks = CodonUsage.SingleAABlocks
    elif which_blocks == 'split':
        blocks = CodonUsage.SplitBlocks
    else:
        blocks = CodonUsage.Blocks

    colors = [doublets_to_colors[i] for i in blocks]
      
    #formatting the labels in latex
    x_label="$G_3/(G_3+C_3)$"
    y_label="$A_3/(A_3+T_3)$"

    #initializing components of the graph
    font,label_font_size=init_graph_display(graph_shape='sqr', \
        graph_grid=graph_grid, x_label=x_label, \
        y_label=y_label, prob_axes=prob_axes, **kwargs)

    if not multiple:
        data = [data]
 
    alpha = broadcast(alpha, len(data))
    edge_colors = broadcast(edge_colors, len(data))
  
    for al, d, edge_color in zip(alpha, data, edge_colors):
        #skip this series if no data
        if d is None or not d.any():
            continue
        for i, color in enumerate(colors):
            j = i+1
            #note: doing these as slices because scatter_classic needs the
            #extra level of nesting
            patches = scatter_classic(d[i:j,0], d[i:j,1],
                        s=(d[i:j,2]/2), c=color)
            #set alpha for the patches manually
            for p in patches:
                p.set_alpha(al)
                p.set_edgecolor(edge_color)
        
        #plot mean as its own point -- can't do cross with scatter
        if has_mean:
            mean_index = len(blocks)    #next index after the blocks
            plot([d[mean_index,0]], [d[mean_index,1]],
                 '-k+',markersize=label_font_size, alpha=al)
               

    abbrev = CodonUsage.BlockAbbreviations

    a = gca()
    #if show_legend is True prints a legend in the right center area
    if show_legend:
        legend_key = [abbrev[b] for b in blocks]
        #copy legend font properties from the x axis tick labels
        legend_font_props = \
            a.xaxis.get_label().get_fontproperties().copy()
        legend_font_scale_factor = 0.7
        curr_size = legend_font_props.get_size()
        legend_font_props.set_size(curr_size*legend_font_scale_factor)
        l = figlegend(a.patches[:len(blocks)],
                  legend_key,
                  prop=legend_font_props,
                  loc='center right',borderpad=0.1,labelspacing=0.5,
                  handlelength=1.0,handletextpad=0.5, borderaxespad=0.0)
        #fix transparency of patches
        for p in l.get_patches():
            p.set_alpha(1)

    #initialize the ticks
    set_axis_to_probs()
    init_ticks(a, label_font_size)
    a.set_xticks([0, 0.5, 1])
    a.set_yticks([0,0.5,1])
    
    #output the figure
    if graph_name is not None:
        savefig(graph_name)
Example #3
0
def plot_pr2_bias(data, title='ALANINE', graph_name='pr2_bias.png', \
    num_genes='ignored', **kwargs):
    """Outputs a PR2-Bias plot of:
    -isotypic transversions (base swapping)
    with G3/(G3+C3) and A3/(A3+T3)
    -Transitions (deaminations)
    with G3/(G3+A3) and C3/(C3+T3)
    -Allotypic transversions (G- oxidations)
    with G3/(G3+T3) and C3/(C3+A3)

    takes: an array in the order: x,G3/(G3+C3),A3/(A3+T3),
    G3/(G3/A3),C3/(C3+T3),G3/(G3+T3),C3/(C3+A3)

    label: default 'ALANINE'
    one amino acid written out in caps:
    ALANINE, ARGININE4, GLYCINE, LEUCINE4,
    PROLINE, SERINE4, THREONINE, VALINE
       from one of the amino acids program will add acronym
       C2 type: ala(GCN), pro(CCN), ser4(TCN), thr(ACN)
       G2 type: arg4 (CGN), an gly(GGN)
       T2 type: leu4(CTN), val (GTN)

    size: of graph in inches (default = 8.0)

    graph_name: default 'pr2_bias.png'
    
    num_genes: number of genes contributing to graph, currently ignored.
    """
    #we can't put anything in the top right, so print num_genes after the title
    #if it was supplied
    #initializes the graph display and font
    font,label_font_size=init_graph_display(graph_shape='sqr', \
        graph_grid='/', x_label="$P_3$", y_label="Y axis", prob_axes=True, \
        title=title, **kwargs)
    #sets the marker_size relative to the font and thus the graph size
    marker_size = (label_font_size-1)
    
    #plots the pr2bias in order G3/(G3+C3),A3/(A3+T3),
    #                           G3/(G3/A3),C3/(C3+T3),
    #                           G3/(G3+T3),C3/(C3+A3)
    #colors and symbols coded from Sueoka 2002
    plot(data[:,0], data[:,1], '-ko', c='k',
         markersize=marker_size)
    plot(data[:,0], data[:,2], '-kv', c='k',
         markersize=marker_size)
    plot(data[:,0], data[:,3], '-ro', c='r',
         markersize=marker_size)
    plot(data[:,0], data[:,4], '-rv', c='r',
         markersize=marker_size)
    plot(data[:,0], data[:,5], '-wo', c='k', mfc='w',
         markersize=marker_size)
    plot(data[:,0], data[:,6], '-wv', c='k', mfc='w',
         markersize=marker_size)

    #aaLabel based on the amino acid that is graphed
    #C2 type: ala(GCN), pro(CCN), ser4(TCN), thr(ACN)
    #G2 type: arg4 (CGN), an gly(GGN)
    #T2 type: leu4(CTN), val (GTN) (Sueoka 2002)
    text(.95, .05, aa_labels[title], font, verticalalignment='bottom',
         horizontalalignment='right')

    #output the figure
    set_axis_to_probs()
    if graph_name is not None:
        savefig(graph_name)
Example #4
0
def plot_pr2_bias(data, title='ALANINE', graph_name='pr2_bias.png', \
    num_genes='ignored', **kwargs):
    """Outputs a PR2-Bias plot of:
    -isotypic transversions (base swapping)
    with G3/(G3+C3) and A3/(A3+T3)
    -Transitions (deaminations)
    with G3/(G3+A3) and C3/(C3+T3)
    -Allotypic transversions (G- oxidations)
    with G3/(G3+T3) and C3/(C3+A3)

    takes: an array in the order: x,G3/(G3+C3),A3/(A3+T3),
    G3/(G3/A3),C3/(C3+T3),G3/(G3+T3),C3/(C3+A3)

    label: default 'ALANINE'
    one amino acid written out in caps:
    ALANINE, ARGININE4, GLYCINE, LEUCINE4,
    PROLINE, SERINE4, THREONINE, VALINE
       from one of the amino acids program will add acronym
       C2 type: ala(GCN), pro(CCN), ser4(TCN), thr(ACN)
       G2 type: arg4 (CGN), an gly(GGN)
       T2 type: leu4(CTN), val (GTN)

    size: of graph in inches (default = 8.0)

    graph_name: default 'pr2_bias.png'
    
    num_genes: number of genes contributing to graph, currently ignored.
    """
    #we can't put anything in the top right, so print num_genes after the title
    #if it was supplied
    #initializes the graph display and font
    font,label_font_size=init_graph_display(graph_shape='sqr', \
        graph_grid='/', x_label="$P_3$", y_label="Y axis", prob_axes=True, \
        title=title, **kwargs)
    #sets the marker_size relative to the font and thus the graph size
    marker_size = (label_font_size-1)
    
    #plots the pr2bias in order G3/(G3+C3),A3/(A3+T3),
    #                           G3/(G3/A3),C3/(C3+T3),
    #                           G3/(G3+T3),C3/(C3+A3)
    #colors and symbols coded from Sueoka 2002
    plot(data[:,0], data[:,1], '-ko', c='k',
         markersize=marker_size)
    plot(data[:,0], data[:,2], '-kv', c='k',
         markersize=marker_size)
    plot(data[:,0], data[:,3], '-ro', c='r',
         markersize=marker_size)
    plot(data[:,0], data[:,4], '-rv', c='r',
         markersize=marker_size)
    plot(data[:,0], data[:,5], '-wo', c='k', mfc='w',
         markersize=marker_size)
    plot(data[:,0], data[:,6], '-wv', c='k', mfc='w',
         markersize=marker_size)

    #aaLabel based on the amino acid that is graphed
    #C2 type: ala(GCN), pro(CCN), ser4(TCN), thr(ACN)
    #G2 type: arg4 (CGN), an gly(GGN)
    #T2 type: leu4(CTN), val (GTN) (Sueoka 2002)
    text(.95, .05, aa_labels[title], font, verticalalignment='bottom',
         horizontalalignment='right')

    #output the figure
    set_axis_to_probs()
    if graph_name is not None:
        savefig(graph_name)