def test_make_iterparsers2(): exp = Martel.HeaderFooter("dataset", {}, Martel.Group("header", Martel.Re(r"(a*\R)*")), RecordReader.Until, ("b",), Martel.Group("record", Martel.Re(r"(b*\R)*")), RecordReader.Until, ("c",), Martel.Group("footer", Martel.Re(r"(c*\R)*")), RecordReader.Everything, (),) iterator = exp.make_iterator("record") assert isinstance(iterator, IterParser.IterHeaderFooter), iterator lines = ["a" "aa", "aaaaaaa", "b", "bb", "bbbb", "bbbbbbbb", "bbbbbbbbbbbbbbbb", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "cccc", "cc", "c", ] text = "\n".join(lines) + "\n" i = 0 for rec in iterator.iterateString(text): i = i + 1 assert i == 1, i
def test_header_footer7(): # header and footer but with no record data s = """\ This is some misc. header text that goes on until the end. FOOTER """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.CountLines, (2, ), header.tagtable, RecordReader.EndsWith, ("//", ), record.tagtable, RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 0, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 0, count.count
def delimiter(delim): assert len(delim) == 1, \ "delimiter can only be a single character long, not %s" % repr(delim) assert delim not in "\n\r", "Cannot use %s as a delimiter" % repr(delim) field = Martel.Group("field", Martel.Rep(Martel.AnyBut(delim + "\r\n"))) line = field + Martel.Rep(Martel.Str(delim) + field) + Martel.AnyEol() record = Martel.Group("record", line) format = Martel.ParseRecords("delimited", {}, record, RecordReader.CountLines, (1, )) return format
def test_header_footer6(): # Make sure I can skip records when there are footer records s = """ This is some misc. header text that goes on until the end. ID 1 This is some data // ID A This is some more data // ID 3 This is again some more data // ID Q This blah // ID W QWE // ID 987 To be // ID 897 Or not to be // FOOTER """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.EndsWith, ("//", ), record.tagtable, RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 3, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 4, count.count
def test_header_footer1(): s = """\ header XX record 1 // record 2 // record 3 // footer """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><header>header XX </header><record>record 1 // </record><record>record 2 // </record><record>record 3 // </record><footer>footer </footer></hf>""" debug_level = 1 # Don't use regexps like these in your code - for testing only! header = Martel.Group("header", Martel.Re(r"header(.|\n)*")) record = Martel.Group("record", Martel.Re(r"rec(.|\n)*")) footer = Martel.Group("footer", Martel.Re(r"footer(.|\n)*")) header = header.make_parser(debug_level=debug_level) record = record.make_parser(debug_level=debug_level) footer = footer.make_parser(debug_level=debug_level) hf = Parser.HeaderFooterParser("hf", {}, RecordReader.EndsWith, ("XX\n", ), header.tagtable, RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.StartsWith, ("f", ), footer.tagtable, (0, debug_level, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) result = outfile.getvalue() assert result == gold, (result, gold)
def test_missing_end2(): # Same as the test_missing_end1 but using HeaderFooter exp = Martel.HeaderFooter("dataset", {}, None, None, None, Martel.Group("record", Martel.Re(r"(b*\R)*a\R")), RecordReader.EndsWith, ("a",), None, None, None ) lines = [ "bbb", "bb", "a", "b", "a", "a", ] text = "\n".join(lines) + "\n" iterator = exp.make_iterator("record") # This should work for x in iterator.iterateString(text): pass # This should not lines.append("c") text = "\n".join(lines) + "\n" try: for x in iterator.iterateString(text): pass raise AssertionError except Parser.ParserPositionException, exc: assert exc.pos == 15, exc.pos
def test_hf4(): ip = IterParser.IterHeaderFooter( Martel.Re(r"a*\R").make_parser(), RecordReader.CountLines, (1,), Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(), RecordReader.CountLines, (2,), None, None, None, "spam") lines = ["aaaaaaaaa", "b", "c", "bb", "cc", "bbb", "ccc", ] text = "\n".join(lines) + "\n" i = 1 for x in ip.iterateString(text): assert x["spam"][0] == "b" * i + "\n" + "c" * i + "\n" i = i + 1
def test_header_footer3(): # Have a footer but no header s = """\ ID 1 This is some data // ID 2 This is some more data // Okay, that was all of the data. """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><record>ID 1 This is some data // </record><record>ID 2 This is some more data // </record><footer>Okay, that was all of the data. </footer></hf>""" # Don't use a regexp like this in your code - for testing only! record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) # Require at least 5 characters (just to be safe) footer = Martel.Group("footer", Martel.Re(r".....(.|\n)*")) record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Nothing, (), (), RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.Everything, (), footer.tagtable, (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold)
def test_DelimitedFields(): exp = Martel.Group("test", Martel.DelimitedFields("Field", "/")) parser = exp.make_parser() file = StringIO.StringIO() parser.setContentHandler(saxutils.XMLGenerator(file)) parser.parseString("a/b/cde/f//\n") s = file.getvalue() expect = "<test><Field>a</Field>/<Field>b</Field>/<Field>cde</Field>/" \ "<Field>f</Field>/<Field></Field>/<Field></Field>\n</test>" assert string.find(s, expect) != -1, ("Got: %s" % (repr(s), ))
def test_header_footer2(): # Have a header but no footer s = """ This is some misc. header text that goes on until the end. ID 1 This is some data ID 2 This is some more data """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><header> This is some misc. header text that goes on until the end. </header><record>ID 1 This is some data </record><record>ID 2 This is some more data </record></hf>""" # Don't use a regexp like this in your code - for testing only! header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) header = header.make_parser() record = record.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.StartsWith, ("ID", ), record.tagtable, RecordReader.Nothing, (), (), (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold)
def test_ToSep(): exp = Martel.Group("test", Martel.ToSep("colon", ":") + \ Martel.ToSep("space", " ") + \ Martel.ToSep("empty", "!")) parser = exp.make_parser() file = StringIO.StringIO() parser.setContentHandler(saxutils.XMLGenerator(file)) parser.parseString("q:wxy !") s = file.getvalue() expect = "<test><colon>q</colon>:<space>wxy</space> <empty></empty>!</test>" assert string.find(s, expect) != -1, ("Got: %s" % (repr(s), ))
def test_reader_parser(): record = Martel.Group("start", Martel.Rep(Martel.Str("abc"))) + \ Martel.Group("end", Martel.Rep(Martel.Str("xyz"))) parser = record.make_parser() parser = Parser.Parser(parser.tagtable) parser.setErrorHandler(handler.ErrorHandler()) parser.parseString("abc" * 10 + "xyz") try: parser.parseString("abc" * 10 + "xyzQ") except Parser.ParserPositionException: pass else: raise AssertionError, "didn't get a position exception" try: parser.parseString("abc" * 10 + "x") except Parser.ParserPositionException: pass else: raise AssertionError, "didn't get a position exception"
def gen_iterator(): return IterParser.IterHeaderFooter( Martel.Re(r"a*\R").make_parser(), RecordReader.CountLines, (1,), Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1), RecordReader.CountLines, (2,), Martel.Re(r"d*\R").make_parser(), RecordReader.CountLines, (1,), "spam")
def test_missing_end3(): # This one is missing the footer exp = Martel.HeaderFooter("dataset", {}, None, None, None, Martel.Group("record", Martel.Re(r"(b*\R)*a\R")), RecordReader.EndsWith, ("a",), Martel.Group("footer", Martel.Re(r"c\R")), RecordReader.CountLines, (1,) ) lines = [ "bbb", "bb", "a", "b", "a", "a", "c", # This will be removed for the test ] text = "\n".join(lines) + "\n" iterator = exp.make_iterator("record") # This should work for x in iterator.iterateString(text): pass # This should not lines.pop() text = "\n".join(lines) + "\n" try: for x in iterator.iterateString(text): pass raise AssertionError except Parser.ParserPositionException, exc: assert exc.pos == 15, exc.pos
def test_make_iterparsers1(): exp = Martel.ParseRecords("dataset", {}, Martel.Group("spam", Martel.Re(r"a*\R")), RecordReader.CountLines, (1,)) iterator = exp.make_iterator("spam") assert isinstance(iterator, IterParser.IterRecords) lines = [] for i in range(0, 10): lines.append("a" * i + "\n") text = "".join(lines) i = 0 for rec in iterator.iterateString(text): assert len(rec["spam"][0][:-1]) == i, (i, rec["spam"][0]) i = i + 1 assert i == 10
def test_record_parser(): record = Martel.Group("A", Martel.Str("X\n") + Martel.Re("a*\n")) p = record.make_parser() parser = Parser.RecordParser("blah", {}, p.tagtable, (0, 1, {}), RecordReader.StartsWith, ("X", )) err = CountErrors() parser.setErrorHandler(err) count = CountRecords("A") parser.setContentHandler(count) parser.parseString("X\na\nX\nb\nX\naaa\nX\naaaa\nX\nq\nX\na\n") assert err.fatal_error_count == 0, err.fatal_error_count assert err.error_count == 2, err.error_count assert count.count == 4, count.count
def test_ri3(): # error in the second record ip = IterParser.IterRecords( Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1), RecordReader.CountLines, (2,), "spam") lines = ["b", "c", "b-", "cc", "bbb", "ccc", ] text = "\n".join(lines) + "\n" try: for x in ip.iterateString(text): pass except Parser.ParserPositionException, exc: assert exc.pos == 5, exc.pos
def test1(): fields = ( ["Andrew", "Dalke", "12"], ["Liz", "Nelson", "22"], ["Mandrake", "Moose", "23"], ["Lisa", "Marie", "91"], ) text = "" for line in fields: text = text + string.join(line, " ") + "\n" format = Martel.Rep1( Martel.Group("line", Martel.Word("name", {"type": "first"}) + \ Martel.Spaces() + \ Martel.Word("name", {"type": "last"}) + \ Martel.Spaces() + \ Martel.Integer("age") + \ Martel.AnyEol() )) iterator = format.make_iterator("line") i = 0 for record in iterator.iterateString(text, LAX.LAX()): assert record["name"] == fields[i][:2], (record["name"], fields[i][:2]) assert record["age"] == fields[i][2:3], (record["age"], fields[i][2:3]) i = i + 1 i = 0 for record in iterator.iterateString(text, LAX.LAXAttrs()): assert [x[0] for x in record["name"]] == fields[i][:2], \ ([x[0] for x in record["name"]], fields[i][:2]) assert [x[0] for x in record["age"]] == fields[i][2:3], \ ([x[0] for x in record["age"]], fields[i][2:3]) assert record["name"][0][1]["type"] == "first" assert record["name"][1][1]["type"] == "last" assert record["age"][0][1].keys() == [] i = i + 1
def test_single(): ele = get_element( Martel.Group("spam", Martel.Str("X"), {"format": "swissprot"})) check_element(ele, ("spam", {"format": "swissprot"}))
# Copyright 2001 by Katharine Lindner. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Martel regular expression for Intelligenetic format (DEPRECATED). This is a huge regular regular expression for the IntelliGenetics/MASE format, built using the 'regular expressions on steroids' capabilities of Martel. """ #http://immuno.bme.nwu.edu/seqhunt.html # Martel import Martel # --- first set up some helper constants and functions comment_line = Martel.Group( "comment_line", \ Martel.Str( ';' ) + Martel.ToEol( "comment" ) ) comment_lines = Martel.Group("comment_lines", Martel.Rep(comment_line)) title_line = Martel.Group( "title_line", \ Martel.Expression.Assert( Martel.Str( ';' ), 1 ) + Martel.ToEol() ) residue_line = Martel.Group( "residue_line", \ Martel.Expression.Assert( Martel.Str( ';' ), 1 ) + Martel.ToEol( "sequence" ) ) residue_lines = Martel.Group("residue_lines", Martel.Rep1(residue_line)) intelligenetics_record = comment_lines + title_line + residue_lines
def test_header_footer8(): # header, record and footer, but with extra data s1 = """Two lines in the header. Data 1 Data 2 Data Q Data 4 FOOTER Abc FOOTER B """ s2 = """Two lines in the header. Data 1 Data 2 Data Q Data 4 FOOTER Abc """ s3 = """Two lines in the header. Data 1 Data 4 FOOTER Abc """ s4 = """Two lines in the header. Data Q FOOTER Abc """ s5 = """Two lines in the header. FOOTER Abc """ dataset = ( (s1, 3, 1, 1), (s2, 3, 1, 0), (s3, 2, 0, 0), (s4, 0, 1, 0), (s5, 0, 0, 0), ) header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"Data \d+\n")) footer = Martel.Group("footer", Martel.Re("FOOTER \w+\n")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.CountLines, (2, ), header.tagtable, RecordReader.CountLines, (1, ), record.tagtable, RecordReader.CountLines, (1, ), footer.tagtable, (0, 1, {})) for s, rec_count, err_count, fatal_count in dataset: count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == err_count, (s, err.error_count, err_count) assert err.fatal_error_count == fatal_count, \ (s, err.fatal_error_count, fatal_count) assert count.count == rec_count, (s, count.count, rec_count)
from Martel import Str from Martel import AnyEol, UntilEol from Martel import Group from Martel import Alt from Martel import Rep from Martel import Rep1 from Martel import AnyBut from Martel import UntilSep from Bio.NBRF.ValSeq import valid_sequence_dict sequence_types = map(Str, valid_sequence_dict.keys()) sequence_type = Group("sequence_type", Alt(*sequence_types)) name_line = Martel.Group( "name_line", \ Str( ">" ) + sequence_type + Str( ";" ) + UntilEol("sequence_name") + AnyEol() ) comment_line = UntilEol("comment") + AnyEol() # 0x2a -- '*' # 10 -- '\n', 13 -- '\r' newline endings excluded_chars = chr(0x2a) + chr(10) + chr(13) # sequence lines with only sequence sequence_text = Group( "sequence_text", \ Martel.Rep1( AnyBut( excluded_chars ) ) ) sequence_line = Group("sequence_line", sequence_text + AnyEol()) # the final line, has a '*' and potentially some sequence sequence_final_line = Group( "sequence_final_line",
""" import Martel from Martel import RecordReader def Simple(tag, tag_data): return Martel.Group( tag, Martel.Str(tag + " ") + Martel.Group(tag_data, Martel.Re("[^\R]*")) + Martel.AnyEol()) #--- ID ID = Martel.Group("ID", Martel.Re( r"ID (?P<entry_name>\w+) +(?P<data_class_table>\w+); +" \ r"(?P<molecule_type>\w+); +(?P<sequence_length>\d+) AA\.\R" )) #--- AC AC = Martel.Group( "AC", Martel.Re(r"AC (?P<ac_number>\w+);( (?P<ac_number>\w+);)*\R")) AC_block = Martel.Group("AC_block", Martel.Rep1(AC)) #--- DT DT_created = Martel.Group("DT_created", Martel.Re( r"DT (?P<day>\d\d)-(?P<month>...)-(?P<year>\d{4}) \(Rel. "\ r"(?P<release>\d\d), Created\)\R" )) DT_seq_update = Martel.Group("DT_seq_update", Martel.Re(
def test_none(): ele = get_element(Martel.Group("spam", Martel.Str("X"))) check_element(ele, ("spam", {})) ele = get_element(Martel.Group("spam", Martel.Str("X"), {})) check_element(ele, ("spam", {}))
def Simple(tag, tag_data): return Martel.Group( tag, Martel.Str(tag + " ") + Martel.Group(tag_data, Martel.Re("[^\R]*")) + Martel.AnyEol())
"""Martel definitions for the output files produced by primer3. """ import Martel any_space = Martel.Re("[ ]+") blank_line = Martel.AnyEol() comment_line = Martel.Str("#") + Martel.ToEol() # comments and blank lines in the file comments = Martel.Group("comments", blank_line + comment_line + blank_line + comment_line) # 1 PRODUCT SIZE: 289 product_size = Martel.Group("product_size", Martel.Re("[\d]+")) start_primer = Martel.Group( "start_primer", any_space + Martel.Re("[\d]+") + Martel.Str(" PRODUCT SIZE: ")) primer_start_line = Martel.Group("primer_start_line", start_primer + product_size + Martel.AnyEol()) # a blank line that signifies a new primer is coming up single_primer_line = Martel.Group("single_primer_line", blank_line) # FORWARD PRIMER 1725 20 59.96 55.00 AGGGAAGGGATGCTAGGTGT primer_space = Martel.Str(" " * 5) any_integer = Martel.Re("[\d]+") any_float = Martel.Re("[\d\.]+") sequence = Martel.Re("[GATCN]+")
"""Martel format for primersearch output files, """ import Martel blank_line = Martel.AnyEol() # Primer name D1S2660 primer_name = Martel.Group("primer_name", Martel.ToEol()) primer_name_line = Martel.Str("Primer name ") + primer_name # Amplimer 1 amplifier = Martel.Group("amplifier", Martel.Re("[\d]+")) amplimer_line = Martel.Str("Amplimer ") + amplifier + Martel.AnyEol() # Sequence: AC074298 AC074298 # Telomere associated sequence for Arabidopsis thaliana TEL1N # CCGGTTTCTCTGGTTGAAAA hits forward strand at 114 with 0 mismatches # TCACATTCCCAAATGTAGATCG hits reverse strand at [114] with 0 mismatches seq_indent = Martel.Str("\t") sequence_id = Martel.Group("sequence_id", Martel.ToEol()) sequence_descr = Martel.Group("sequence_descr", Martel.ToEol()) sequence_info = sequence_id + sequence_descr forward_strand_info = Martel.Group("forward_strand_info", Martel.ToEol()) reverse_strand_info = Martel.Group("reverse_strand_info", Martel.ToEol()) amplifier_sequence = Martel.Group( "amplifier_sequence", sequence_info + forward_strand_info + reverse_strand_info) amplifier_sequence_lines = seq_indent + Martel.Str("Sequence: ") + \ amplifier_sequence