def weblogoPOIM(logofile, poim, max_len): """instead of plotting the POIM heatmap, create a weblogo from the 1st-degree poim""" warnings.filterwarnings('ignore', ' This call to matplotlib.use()*') from corebio.data import rna_letters, dna_letters, amino_acid_letters from weblogolib import LogoData, LogoOptions, LogoFormat, classic, png_print_formatter #print "WEBLOGO!" #print "Writing ", logofile #print poim[0] positive_logo = [] negative_logo = [] for i in xrange(len(poim[0])): positive_logo.append([]) negative_logo.append([]) for j in xrange(len(poim[0][i])): if poim[0][i][j] < 0: positive_logo[i].append(0) negative_logo[i].append(poim[0][i][j] * -10000) else: negative_logo[i].append(0) positive_logo[i].append(poim[0][i][j] * 1000) #print "Positive logo: ", positive_logo #print "Negative logo: ", negative_logo pos_data = LogoData.from_counts('ACGT', numpy.array(positive_logo).T, None) neg_data = LogoData.from_counts("ACGT", numpy.array(negative_logo).T, None) neg_opt = LogoOptions() neg_opt.fineprint += " from KIRMES POIM data" #logoopt.number_interval = 5 neg_opt.small_fontsize = 4 neg_opt.title_fontsize = 8 neg_opt.scale_width = False title = os.path.split(logofile)[1] title = title[:title.rfind(".")] if "_" in title: title = title[title.rfind("_") + 1:] neg_opt.logo_title = title + " Negative Logo" neg_format = LogoFormat(neg_data, neg_opt) pos_opt = LogoOptions() #pos_opt.show_ends = True pos_opt.scale_width = False pos_opt.logo_title = title + " Positive Sequence Logo" pos_opt.show_fineprint = False pos_opt.color_scheme = classic pos_format = LogoFormat(pos_data, pos_opt) neg_logo = open(logofile + "n.png", 'w') png_print_formatter(neg_data, neg_format, neg_logo) neg_logo.close() pos_logo = open(logofile + "p.png", 'w') png_print_formatter(pos_data, pos_format, pos_logo) pos_logo.close() concatPNG(logofile, (logofile + "p.png", logofile + "n.png")) os.remove(logofile + "n.png") os.remove(logofile + "p.png")
def write_weblogo(self, filepath): matrix_tuple = [] for distribution in self.values: matrix_tuple.append(tuple(distribution)) dataArray = np.array(tuple(matrix_tuple)) alph = Alphabet(''.join(self.alphabet)) weblogoData = LogoData.from_counts(alph, dataArray) weblogoOptions = LogoOptions(color_scheme=classic) weblogoOptions.title = "PWM" weblogoFormat = LogoFormat(weblogoData, weblogoOptions) weblogo_file = open(filepath, 'w') weblogo_file.write(png_print_formatter(weblogoData, weblogoFormat)) weblogo_file.close()
def outputMotif(theta_motif, theta_background_matrix, lambda_motif, logev, k, outstr): from weblogolib import LogoData, LogoOptions, LogoFormat, png_formatter, eps_formatter, unambiguous_dna_alphabet _pv_format = "%3.1fe%+04.0f" f_string = sprint_logx(log(lambda_motif), 1, _pv_format) g_string = sprint_logx(logev, 1, _pv_format) print(("Motif {0:s} had a fraction of {1:s}").format(str(k), f_string)) print(("Motif {0:s} had an E-value of {1:s}").format(str(k), g_string)) print 'Saving motif as a png...' data = LogoData.from_counts(counts=theta_motif,alphabet=unambiguous_dna_alphabet)#,prior=theta_background_matrix[0])#Does prior mess things up? options = LogoOptions() options.title = 'Motif' forma = LogoFormat(data, options) fout = open(outstr + "Motif_" + str(k) + '.png', 'w') png_formatter(data, forma, fout) fout.close() print 'Saving motif as an eps...' fout = open(outstr + "Motif_" + str(k) + '.eps', 'w') eps_formatter(data, forma, fout) fout.close()
def createSeqLogo(pfm, filename, fformat='eps'): """Create sequence logo for an individual cRBM motif. Parameters ----------- pfm : numpy-array 2D numpy array representing a PFM. See :meth:`CRBM.getPFMs` path : str Output folder. fformat : str File format for storing the sequence logos. Default: 'eps'. """ alph = Alphabet('ACGT') weblogoData = LogoData.from_counts( alph, pfm.T) #, c)#, learner.c.get_value().reshape(-1)) weblogoOptions = LogoOptions(color_scheme=classic) weblogoFormat = LogoFormat(weblogoData, weblogoOptions) content = formatters[fformat](weblogoData, weblogoFormat) f = open(filename, "wb") f.write(content) f.close()
def _build_option_parser(): defaults = LogoOptions() parser = DeOptionParser( usage="%prog [options] < sequence_data.fa > sequence_logo.eps", description=description, version=release_description, add_verbose_options=False) io_grp = OptionGroup( parser, "Input/Output Options", ) data_grp = OptionGroup( parser, "Logo Data Options", ) trans_grp = OptionGroup(parser, "Transformations", "Optional transformations of the sequence data.") format_grp = OptionGroup( parser, "Logo Format Options", "These options control the format and display of the logo.") color_grp = OptionGroup( parser, "Color Options", "Colors can be specified using CSS2 syntax. e.g. 'red', '#FF0000', etc." ) advanced_grp = OptionGroup( parser, "Advanced Format Options", "These options provide fine control over the display of the logo. ") server_grp = OptionGroup(parser, "WebLogo Server", "Run a standalone webserver on a local port.") parser.add_option_group(io_grp) parser.add_option_group(data_grp) parser.add_option_group(trans_grp) parser.add_option_group(format_grp) parser.add_option_group(color_grp) parser.add_option_group(advanced_grp) parser.add_option_group(server_grp) # ========================== IO OPTIONS ========================== io_grp.add_option("-f", "--fin", dest="fin", action="store", type="file_in", default=None, help="Sequence input file (default: stdin)", metavar="FILENAME") # Add position weight matrix formats to input parsers by hand fin_choices = dict(seq_io.format_names()) fin_choices['transfac'] = 'transfac' io_grp.add_option( "-D", "--datatype", dest="input_parser", action="store", type="dict", default=seq_io, choices=fin_choices, # seq_io.format_names(), help= "Type of multiple sequence alignment or position weight matrix file: (%s, transfac)" % ', '.join([f.names[0] for f in seq_io.formats]), metavar="FORMAT") io_grp.add_option("-o", "--fout", dest="fout", type="file_out", default=sys.stdout, help="Output file (default: stdout)", metavar="FILENAME") io_grp.add_option( "-F", "--format", dest="formatter", action="store", type="dict", choices=formatters, metavar="FORMAT", help= "Format of output: eps (default), png, png_print, pdf, jpeg, svg, logodata", default=default_formatter) # ========================== Data OPTIONS ========================== data_grp.add_option( "-A", "--sequence-type", dest="alphabet", action="store", type="dict", choices=std_alphabets, help="The type of sequence data: 'protein', 'rna' or 'dna'.", metavar="TYPE") data_grp.add_option( "-a", "--alphabet", dest="alphabet", action="store", help="The set of symbols to count, e.g. 'AGTC'. " "All characters not in the alphabet are ignored. " "If neither the alphabet nor sequence-type are specified then weblogo will examine the input data and make an educated guess. " "See also --sequence-type, --ignore-lower-case") data_grp.add_option( "-U", "--units", dest="unit_name", action="store", choices=std_units.keys(), type="choice", default=defaults.unit_name, help= "A unit of entropy ('bits' (default), 'nats', 'digits'), or a unit of free energy ('kT', 'kJ/mol', 'kcal/mol'), or 'probability' for probabilities", metavar="NUMBER") data_grp.add_option( "", "--composition", dest="composition", action="store", type="string", default="auto", help= "The expected composition of the sequences: 'auto' (default), 'equiprobable', 'none' (do not perform any compositional adjustment), a CG percentage, a species name (e.g. 'E. coli', 'H. sapiens'), or an explicit distribution (e.g. \"{'A':10, 'C':40, 'G':40, 'T':10}\"). The automatic option uses a typical distribution for proteins and equiprobable distribution for everything else. ", metavar="COMP.") data_grp.add_option( "", "--weight", dest="weight", action="store", type="float", default=None, help="The weight of prior data. Default depends on alphabet length", metavar="NUMBER") data_grp.add_option( "-i", "--first-index", dest="first_index", action="store", type="int", default=1, help="Index of first position in sequence data (default: 1)", metavar="INDEX") data_grp.add_option("-l", "--lower", dest="logo_start", action="store", type="int", help="Lower bound of sequence to display", metavar="INDEX") data_grp.add_option("-u", "--upper", dest="logo_end", action="store", type="int", help="Upper bound of sequence to display", metavar="INDEX") # ========================== Transformation OPTIONS ========================== # FIXME Add test? trans_grp.add_option( "", "--ignore-lower-case", dest="ignore_lower_case", action="store_true", default=False, help= "Disregard lower case letters and only count upper case letters in sequences." ) trans_grp.add_option( "", "--reverse", dest="reverse", action="store_true", default=False, help="reverse sequences", ) trans_grp.add_option( "", "--complement", dest="complement", action="store_true", default=False, help="complement DNA sequences", ) # ========================== FORMAT OPTIONS ========================== format_grp.add_option( "-s", "--size", dest="stack_width", action="store", type="dict", choices=std_sizes, metavar="LOGOSIZE", default=defaults.stack_width, help="Specify a standard logo size (small, medium (default), large)") format_grp.add_option( "-n", "--stacks-per-line", dest="stacks_per_line", action="store", type="int", help="Maximum number of logo stacks per logo line. (default: %default)", default=defaults.stacks_per_line, metavar="COUNT") format_grp.add_option("-t", "--title", dest="logo_title", action="store", type="string", help="Logo title text.", default=defaults.logo_title, metavar="TEXT") format_grp.add_option("", "--label", dest="logo_label", action="store", type="string", help="A figure label, e.g. '2a'", default=defaults.logo_label, metavar="TEXT") format_grp.add_option( "-X", "--show-xaxis", action="store", type="boolean", default=defaults.show_xaxis, metavar="YES/NO", help="Display sequence numbers along x-axis? (default: %default)") format_grp.add_option("-x", "--xlabel", dest="xaxis_label", action="store", type="string", default=defaults.xaxis_label, help="X-axis label", metavar="TEXT") format_grp.add_option( "", "--annotate", dest="annotate", action="store", type="string", default=None, help= "A comma separated list of custom stack annotations, e.g. '1,3,4,5,6,7'. Annotation list must be same length as sequences.", metavar="TEXT") format_grp.add_option( "-S", "--yaxis", dest="yaxis_scale", action="store", type="float", help= "Height of yaxis in units. (Default: Maximum value with uninformative prior.)", metavar="UNIT") format_grp.add_option( "-Y", "--show-yaxis", action="store", type="boolean", dest="show_yaxis", default=defaults.show_yaxis, metavar="YES/NO", help="Display entropy scale along y-axis? (default: %default)") format_grp.add_option( "-y", "--ylabel", dest="yaxis_label", action="store", type="string", help="Y-axis label (default depends on plot type and units)", metavar="TEXT") format_grp.add_option( "-E", "--show-ends", action="store", type="boolean", default=defaults.show_ends, metavar="YES/NO", help="Label the ends of the sequence? (default: %default)") format_grp.add_option("-P", "--fineprint", dest="fineprint", action="store", type="string", default=defaults.fineprint, help="The fine print (default: weblogo version)", metavar="TEXT") format_grp.add_option("", "--ticmarks", dest="yaxis_tic_interval", action="store", type="float", default=defaults.yaxis_tic_interval, help="Distance between ticmarks (default: %default)", metavar="NUMBER") format_grp.add_option("", "--errorbars", dest="show_errorbars", action="store", type="boolean", default=defaults.show_errorbars, metavar="YES/NO", help="Display error bars? (default: %default)") format_grp.add_option( "", "--reverse-stacks", dest="reverse_stacks", action="store", type="boolean", default=defaults.show_errorbars, metavar="YES/NO", help="Draw stacks with largest letters on top? (default: %default)") # ========================== Color OPTIONS ========================== # TODO: Future Feature # color_grp.add_option( "-K", "--color-key", # dest= "show_color_key", # action="store", # type = "boolean", # default= defaults.show_color_key, # metavar = "YES/NO", # help="Display a color key (default: %default)") color_scheme_choices = std_color_schemes.keys() color_scheme_choices.sort() color_grp.add_option( "-c", "--color-scheme", dest="color_scheme", action="store", type ="dict", choices = std_color_schemes, metavar = "SCHEME", default = None, # Auto help="Specify a standard color scheme (%s)" % \ ", ".join(color_scheme_choices) ) color_grp.add_option( "-C", "--color", dest="colors", action="append", metavar="COLOR SYMBOLS DESCRIPTION ", nargs=3, default=[], help= "Specify symbol colors, e.g. --color black AG 'Purine' --color red TC 'Pyrimidine' " ) color_grp.add_option("", "--default-color", dest="default_color", action="store", metavar="COLOR", default=defaults.default_color, help="Symbol color if not otherwise specified.") # ========================== Advanced options ========================= advanced_grp.add_option("-W", "--stack-width", dest="stack_width", action="store", type="float", default=defaults.stack_width, help="Width of a logo stack (default: %s)" % defaults.stack_width, metavar="POINTS") advanced_grp.add_option( "", "--aspect-ratio", dest="stack_aspect_ratio", action="store", type="float", default=defaults.stack_aspect_ratio, help="Ratio of stack height to width (default: %s)" % defaults.stack_aspect_ratio, metavar="POINTS") advanced_grp.add_option("", "--box", dest="show_boxes", action="store", type="boolean", default=False, metavar="YES/NO", help="Draw boxes around symbols? (default: no)") advanced_grp.add_option( "", "--resolution", dest="resolution", action="store", type="float", default=96, help= "Bitmap resolution in dots per inch (DPI). (Default: 96 DPI, except png_print, 600 DPI) Low resolution bitmaps (DPI<300) are antialiased.", metavar="DPI") advanced_grp.add_option( "", "--scale-width", dest="scale_width", action="store", type="boolean", default=True, metavar="YES/NO", help= "Scale the visible stack width by the fraction of symbols in the column? (I.e. columns with many gaps of unknowns are narrow.) (Default: yes)" ) advanced_grp.add_option( "", "--debug", action="store", type="boolean", default=defaults.debug, metavar="YES/NO", help="Output additional diagnostic information. (Default: %default)") # ========================== Server options ========================= server_grp.add_option( "", "--serve", dest="serve", action="store_true", default=False, help="Start a standalone WebLogo server for creating sequence logos.") server_grp.add_option( "", "--port", dest="port", action="store", type="int", default=8080, help="Listen to this local port. (Default: %default)", metavar="PORT") return parser
def _build_logoformat(logodata, opts): """ Extract and process relevant option values and return a LogoFormat object.""" args = {} direct_from_opts = [ "stacks_per_line", "logo_title", "yaxis_label", "show_xaxis", "show_yaxis", "xaxis_label", "show_ends", "fineprint", "show_errorbars", "show_boxes", "yaxis_tic_interval", "resolution", "alphabet", "debug", "show_ends", "default_color", #"show_color_key", "color_scheme", "unit_name", "logo_label", "yaxis_scale", "first_index", "logo_start", "logo_end", "scale_width", "annotate", "stack_width", "stack_aspect_ratio", "reverse_stacks" ] for k in direct_from_opts: args[k] = opts.__dict__[k] # logo_size = copy.copy(opts.__dict__['logo_size']) # size_from_opts = ["stack_width", "stack_height"] # for k in size_from_opts : # length = getattr(opts, k) # if length : setattr( logo_size, k, length ) # args["size"] = logo_size if opts.colors: color_scheme = ColorScheme() for color, symbols, desc in opts.colors: try: #c = Color.from_string(color) color_scheme.groups.append(ColorGroup(symbols, color, desc)) except ValueError: raise ValueError("error: option --color: invalid value: '%s'" % color) args["color_scheme"] = color_scheme if opts.annotate: args["annotate"] = opts.annotate.split(',') logooptions = LogoOptions() for a, v in args.iteritems(): setattr(logooptions, a, v) theformat = LogoFormat(logodata, logooptions) return theformat