def test_parse_table_fail(self): # should fail with parse error f = StringIO(table_io.example) self.assertRaises(ValueError, clustal_io.read, f) f.close()
def __str__(self) : out = StringIO() print('## LogoData', file=out) print('# First column is position number, counting from zero', file=out) print('# Subsequent columns are raw symbol counts', file=out) print('# Entropy is mean entropy measured in nats.' , file=out) print('# Low and High are the 95% confidence limits.', file=out) print('# Weight is the fraction of non-gap symbols in the column.', file=out) print('#\t', file=out) # Show column names print('#', end='\t', file=out) for a in self.alphabet : print(a, end=' \t', file=out) print('Entropy\tLow\tHigh\tWeight', file=out) # Write the data table for i in range(self.length) : print(i + 1, end=' \t', file=out) for c in self.counts[i]: print(c, end=' \t', file=out) print("%6.4f" % self.entropy[i], end=' \t', file=out) if self.entropy_interval is not None: print("%6.4f" % self.entropy_interval[i][0], end=' \t', file=out) print("%6.4f" % self.entropy_interval[i][1], end=' \t', file=out) else : print('\t', '\t', end='', file=out) if self.weight is not None : print("%6.4f" % self.weight[i], end='', file=out) print('', file=out) print('# End LogoData', file=out) return out.getvalue()
def test_isaligned(self): seqs = fasta_io.read(StringIO()) assert seqs.isaligned() seqs = fasta_io.read(StringIO(fasta_io.example)) assert seqs.isaligned() seqs = fasta_io.read(StringIO(example4)) assert not seqs.isaligned()
def test_write_seq(self): f = StringIO(array_io.example) seqs = array_io.read(f) fout = StringIO() array_io.write(fout, seqs) fout.seek(0) seqs2 = array_io.read(fout) self.assertEqual(seqs, seqs2)
def test_write(self) : f = StringIO(plain_io.example) seqs = plain_io.read(f) fout = StringIO() null_io.write(fout,seqs) fout.seek(0) self.assertEqual(fout.read(), '')
def test_read_headerless(self): # This example has blank headers. f = StringIO(example3) seqs = fasta_io.read(f) self.assertEqual(len(seqs), 4) #print seqs fout = StringIO() fasta_io.write(fout, seqs)
def test_write_comments(self) : f = StringIO(example_with_optional_comments) seqs = fasta_io.read(f) fout = StringIO() fasta_io.write(fout, seqs) fout.seek(0) seqs2 = fasta_io.read(fout) self.assertEqual(seqs, seqs2) self.assertEqual(seqs[1].description, seqs2[1].description)
def test_write_comments(self): f = StringIO(example_with_optional_comments) seqs = fasta_io.read(f) fout = StringIO() fasta_io.write(fout, seqs) fout.seek(0) seqs2 = fasta_io.read(fout) self.assertEqual(seqs, seqs2) self.assertEqual(seqs[1].description, seqs2[1].description)
def test_read_alphabets(self): # incompatable alphabets f = StringIO(test_matrix3) self.assertRaises(ValueError, SubMatrix.read, f) f = StringIO(test_matrix3) mat = SubMatrix.read(f, alphabet=Alphabet('ARNDCQEGHILKMFPSTWYV')) f2 = StringIO(test_matrix1) self.assertRaises(ValueError, SubMatrix.read, f2, unambiguous_protein_alphabet)
def test_write_seq(self): f = StringIO(plain_io.example) seqs = plain_io.read(f) fout = StringIO() plain_io.write(fout, seqs) fout.seek(0) seqs2 = plain_io.read(fout) #print seqs[0].alphabet !=seqs2[0].alphabet self.assertEqual(seqs[0].alphabet, seqs2[0].alphabet) self.assertEqual(seqs[0], seqs2[0]) self.assertEqual(seqs, seqs2)
def test_write_seq(self) : f = StringIO(plain_io.example) seqs = plain_io.read(f) fout = StringIO() plain_io.write(fout,seqs) fout.seek(0) seqs2 = plain_io.read(fout) #print seqs[0].alphabet !=seqs2[0].alphabet self.assertEqual(seqs[0].alphabet, seqs2[0].alphabet) self.assertEqual(seqs[0], seqs2[0]) self.assertEqual(seqs, seqs2)
def _build_logodata(options): motif_flag = False fin = options.fin if options.upload is None: if fin is None: fin = StringIO(sys.stdin.read()) else: if fin is None: from . import _from_URL_fileopen fin = _from_URL_fileopen(options.upload) else: raise ValueError( "error: options --fin and --upload are incompatible") try: # Try reading data in transfac format first. from corebio.matrix import Motif motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError as motif_err: # Failed reading Motif, try reading as multiple sequence data. if options.input_parser == "transfac": raise motif_err # Adding transfac as str insted of parser is a bit of a ugly kludge seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case) if motif_flag: if options.ignore_lower_case: raise ValueError( "error: option --ignore-lower-case incompatible with matrix input" ) if options.reverse or options.revcomp: motif.reverse() if options.complement or options.revcomp: motif.complement() prior = parse_prior(options.composition, motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) else: if options.reverse or options.revcomp: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement or options.revcomp: if not nucleic_alphabet.alphabetic(seqs.alphabet): raise ValueError('non-nucleic sequence cannot be complemented') aaa = seqs.alphabet seqs.alphabet = nucleic_alphabet seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) seqs.alphabet = aaa prior = parse_prior(options.composition, seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) return data
def test_read_fasta(self): f = StringIO(fasta_io.example) seqs = seq_io.read(f) #print seqs self.assertEqual(len(seqs), 3) self.assertEqual(seqs[0].description, "Lamprey GLOBIN V - SEA LAMPREY") self.assertEqual(len(seqs[1]), 231)
def test_read_example_array(self): f = StringIO(array_io.example) seqs = seq_io.read(f) #print seqs self.assertEqual(len(seqs), 8) self.assertEqual(seqs[0].name, None) self.assertEqual(len(seqs[1]), 60)
def test_parse_fasta_fail(self): # should fail with parse error f = StringIO(fasta_io.example) self.assertRaises(ValueError, clustal_io.read, f, protein_alphabet) self.assertRaises(ValueError, clustal_io.read, f)
def test_read_comments(self): f = StringIO(example_with_optional_comments) seqs = fasta_io.read(f) self.assertEqual(len(seqs), 2) self.assertEqual(seqs[1].startswith("SATVSEI"), True) self.assertEqual(seqs[1].description.splitlines()[1], ("comment line 1 (optional)"))
def test_read_example(self): f = StringIO(plain_io.example) seqs = plain_io.read(f) #print seqs self.assertEqual(len(seqs), 1) self.assertEqual(seqs[0].name, None) #print seqs[0] self.assertEqual(len(seqs[0]), 450)
def test_fail(self): # Lengths differ example = """ -SPC-MLETETLNKYVVIIAYALVFLLSLLGNSLVMLVILYSRVGRSVTDVYLLNLALAD -EPC-RDEN """ f = StringIO(example) self.assertRaises(ValueError, array_io.read, f)
def test_read(self) : f = StringIO(ig_io.example) seqs = ig_io.read(f) self.assertEqual(len(seqs), 2) self.assertEqual(seqs[0].description, "H.sapiens fau mRNA, 518 bases") self.assertEqual(seqs[1].name, "HSFAU1") self.assertEqual(len(seqs[1]), 299)
def test_index(self): f = StringIO(fasta_io.example) idx = fasta_io.index(f) #print idx._key_dict self.assertEqual(len(idx), 3) self.assertEqual(idx[0].description, "Lamprey GLOBIN V - SEA LAMPREY") self.assertEqual(idx[0].name, "Lamprey") self.assertEqual(idx['Lamprey'].name, "Lamprey") self.assertEqual(len(idx['Hagfish']), 231)
def test_parse_clustal2_newline(self): # Bug regession test. Clustal barfed on windows line endings, sometimes f = data_stream("clustalw2.aln") s = f.read() import re s = re.sub("\n", "\r\n", s) # Change to windows line endings seqs = clustal_io.read(StringIO(s)) f.close()
def test_write(self): f = StringIO(clustal_io.example) seqs = clustal_io.read(f) fout = StringIO() clustal_io.write(fout, seqs) fout.seek(0) seqs2 = clustal_io.read(fout) self.assertEqual(seqs, seqs2) f.close()
def read_seq_data(fin, input_parser=seq_io.read, alphabet=None, ignore_lower_case=False, max_file_size=0): """ Read sequence data from the input stream and return a seqs object. The environment variable WEBLOGO_MAX_FILE_SIZE overides the max_file_size argument. Used to limit the load on the WebLogo webserver. """ max_file_size =int(os.environ.get("WEBLOGO_MAX_FILE_SIZE", max_file_size)) # If max_file_size is set, or if fin==stdin (which is non-seekable), we # read the data and replace fin with a StringIO object. if(max_file_size>0) : data = fin.read(max_file_size) more_data = fin.read(2) if more_data != "" : raise IOError("File exceeds maximum allowed size: %d bytes" % max_file_size) fin = StringIO(data) elif fin == sys.stdin: fin = StringIO(fin.read()) fin.seek(0) seqs = input_parser(fin) if seqs is None or len(seqs) ==0 : raise ValueError("Please provide a multiple sequence alignment") if ignore_lower_case : # Case is significant. Do not count lower case letters. for i,s in enumerate(seqs) : seqs[i] = s.mask() # Add alphabet to seqs. if alphabet : seqs.alphabet = Alphabet(alphabet) else : seqs.alphabet = Alphabet.which(seqs) return seqs
def test_write(self): f = StringIO(plain_io.example) seqs = plain_io.read(f) fout = StringIO() null_io.write(fout, seqs) fout.seek(0) self.assertEqual(fout.read(), '')
def testParse(self): f = testdata_stream('scop/dir.cla.scop.txt_test') try: cla = f.read() f.close() f = testdata_stream('scop/dir.des.scop.txt_test') des = f.read() f.close() f = testdata_stream('scop/dir.hie.scop.txt_test') hie = f.read() finally: f.close() scop = Scop.parse_files(StringIO(cla), StringIO(des), StringIO(hie)) cla_out = StringIO() scop.write_cla(cla_out) assert cla_out.getvalue() == cla, cla_out.getvalue() des_out = StringIO() scop.write_des(des_out) assert des_out.getvalue() == des, des_out.getvalue() hie_out = StringIO() scop.write_hie(hie_out) assert hie_out.getvalue() == hie, hie_out.getvalue() domain = scop.domains_by_sid["d1hbia_"] self.assertEqual(domain.sunid, 14996) domains = scop.domains self.assertEqual(len(domains), 14) self.assertEqual(domains[4].sunid, 14988) self.assertFalse(-111 in scop.nodes_by_sunid) self.assertFalse("no such domain" in scop.domains_by_sid )
def test_file_index(self): stream = StringIO(tfile) idx = FileIndex(stream) self.assertTrue(idx[0].startswith('line 0')) self.assertTrue(idx[4].startswith('line 4')) def parser(f): return int(f.readline().split()[1]) idx = FileIndex(stream, parser=parser) self.assertEqual(len(idx), 5) self.assertEqual(idx[0], 0) self.assertEqual(idx[4], 4) key = re.compile(r"(line \d)") def linekey(line): k = key.search(line) if k is None: return None return k.group(1) idx = FileIndex(stream, linekey=linekey, parser=parser) self.assertEqual(len(idx), 4) self.assertEqual(idx[0], 0) self.assertEqual(idx[3], 4) self.assertRaises(IndexError, idx.__getitem__, 5) # print idx._key_dict self.assertEqual(idx['line 1'], 1) self.assertEqual(idx['line 4'], 4) self.assertTrue('line 1' in idx) self.assertFalse('Blah' in idx) self.assertFalse(20 in idx) # Test iteration over values t = 0 for v in idx: t += v self.assertEqual(t, 8)
def read_seq_data(fin, input_parser=seq_io.read, alphabet=None, ignore_lower_case=False, max_file_size=0): """ Read sequence data from the input stream and return a seqs object. The environment variable WEBLOGO_MAX_FILE_SIZE overides the max_file_size argument. Used to limit the load on the WebLogo webserver. """ max_file_size = int(os.environ.get("WEBLOGO_MAX_FILE_SIZE", max_file_size)) # If max_file_size is set, or if fin==stdin (which is non-seekable), we # read the data and replace fin with a StringIO object. if (max_file_size > 0): data = fin.read(max_file_size) more_data = fin.read(2) if more_data != "": raise IOError("File exceeds maximum allowed size: %d bytes" % max_file_size) fin = StringIO(data) elif fin == sys.stdin: fin = StringIO(fin.read()) fin.seek(0) seqs = input_parser(fin) if seqs is None or len(seqs) == 0: raise ValueError("Please provide a multiple sequence alignment") if ignore_lower_case: # Case is significant. Do not count lower case letters. for i, s in enumerate(seqs): seqs[i] = s.mask() # Add alphabet to seqs. if alphabet: seqs.alphabet = Alphabet(alphabet) else: seqs.alphabet = Alphabet.which(seqs) return seqs
def main(htdocs_directory=None): logooptions = weblogolib.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field('sequences', ''), Field( 'format', 'png', weblogolib.formatters.get, options=[ 'png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata' ], #TODO: Should copy list from __init__.formatters errmsg="Unknown format option."), Field('stacks_per_line', logooptions.stacks_per_line, int, errmsg='Invalid number of stacks per line.'), Field('stack_width', 'medium', weblogolib.std_sizes.get, options=['small', 'medium', 'large'], errmsg='Invalid logo size.'), Field('alphabet', 'alphabet_auto', alphabets.get, options=[ 'alphabet_auto', 'alphabet_protein', 'alphabet_dna', 'alphabet_rna' ], errmsg="Unknown sequence type."), Field('unit_name', 'bits', options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol' ]), Field('first_index', 1, int_or_none), Field('logo_start', '', int_or_none), Field('logo_end', '', int_or_none), Field('composition', 'comp_auto', composition.get, options=[ 'comp_none', 'comp_auto', 'comp_equiprobable', 'comp_CG', 'comp_Celegans', 'comp_Dmelanogaster', 'comp_Ecoli', 'comp_Hsapiens', 'comp_Mmusculus', 'comp_Scerevisiae' ], errmsg="Illegal sequence composition."), Field('percentCG', '', float_or_none, errmsg="Invalid CG percentage."), Field('show_errorbars', False, truth), Field('logo_title', logooptions.logo_title), Field('logo_label', logooptions.logo_label), Field('show_xaxis', False, truth), Field('xaxis_label', logooptions.xaxis_label), Field('show_yaxis', False, truth), Field('yaxis_label', logooptions.yaxis_label, string_or_none), Field('yaxis_scale', logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number."), Field('yaxis_tic_interval', logooptions.yaxis_tic_interval, float_or_none), Field('show_ends', False, truth), Field('show_fineprint', False, truth), Field('color_scheme', 'color_auto', color_schemes.get, options=color_schemes.keys(), errmsg='Unknown color scheme'), Field('color0', ''), Field('symbols0', ''), Field('desc0', ''), Field('color1', ''), Field('symbols1', ''), Field('desc1', ''), Field('color2', ''), Field('symbols2', ''), Field('desc2', ''), Field('color3', ''), Field('symbols3', ''), Field('desc3', ''), Field('color4', ''), Field('symbols4', ''), Field('desc4', ''), Field('ignore_lower_case', False, truth), Field('scale_width', False, truth), ] form = {} for c in controls: form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form['show_errorbars'].value = logooptions.show_errorbars form['show_xaxis'].value = logooptions.show_xaxis form['show_yaxis'].value = logooptions.show_yaxis form['show_ends'].value = logooptions.show_ends form['show_fineprint'].value = logooptions.show_fineprint form['scale_width'].value = logooptions.scale_width send_form(controls, htdocs_directory=htdocs_directory) return # Get form content for c in controls: c.value = form_values.getfirst(c.name, c.default) options_from_form = [ 'format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name', 'first_index', 'logo_start', 'logo_end', 'composition', 'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width' ] errors = [] for optname in options_from_form: try: value = form[optname].get_value() if value != None: setattr(logooptions, optname, value) except ValueError as err: errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0, 5): color = form["color%d" % i].get_value() symbols = form["symbols%d" % i].get_value() desc = form["desc%d" % i].get_value() if color: try: custom.groups.append( weblogolib.ColorGroup(symbols, color, desc)) except ValueError as e: errors.append(('color%d' % i, "Invalid color: %s" % color)) if form["color_scheme"].value == 'color_custom': logooptions.color_scheme = custom else: try: logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) sequences = None # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] if "sequences_file" in form_values: sequences = form_values.getvalue("sequences_file") #assert type(sequences) == str if not sequences or len(sequences) == 0: sequences = form["sequences"].get_value() if not sequences or len(sequences) == 0: errors.append(( "sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload." )) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try: comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = ("ignore_lower_case" in form_values) if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print('Content-Disposition: attachment; ' \ 'filename="logo.%s"' % extension[format]) else: print('Content-Disposition: inline; ' \ 'filename="logo.%s"' % extension[format]) # Separate header from data print() # Finally, and at last, send the logo. if sys.version_info[0] >= 3: sys.stdout.buffer.write(logo) else: sys.stdout.write(logo)
def test_read_fail(self): f = StringIO(fasta_io.example) # Wrong alphabet self.assertRaises(ValueError, fasta_io.read, f, nucleic_alphabet)
def test_read_with_blank_line(self): f = StringIO(example4) seqs = fasta_io.read(f) assert not seqs.isaligned() self.assertEqual(len(seqs), 3)
def test_parse_clustal_example(self): f = StringIO(clustal_io.example) seqs = clustal_io.read(f) f.close()
def test_read_empty(self): f = StringIO() seqs = fasta_io.read(f) assert len(seqs) == 0
def test_parse_fail(self): # should fail with parse error examples = (StringIO(clustal_io.example), ) for f in examples: self.assertRaises(ValueError, stockholm_io.read, f)
def test_write_with_header(self): f = StringIO(fasta_io.example) seqs = fasta_io.read(f) seqs.description = 'A description\nMore description' fout = StringIO() fasta_io.write(fout, seqs)
def test_parse_plain_fail(self): # should fail with parse error f = StringIO(plain_io.example) self.assertRaises(ValueError, fasta_io.read, f)
def test_parse_plain_fail(self): # should fail with parse error f = StringIO(plain_io.example) self.assertRaises(ValueError, msf_io.read, f) f.close()
def test_read_empty(self): f = StringIO() self.assertRaises(ValueError, blastxml.read, f)