def test_missing_end2(): # Same as the test_missing_end1 but using HeaderFooter exp = Martel.HeaderFooter("dataset", {}, None, None, None, Martel.Group("record", Martel.Re(r"(b*\R)*a\R")), RecordReader.EndsWith, ("a",), None, None, None ) lines = [ "bbb", "bb", "a", "b", "a", "a", ] text = "\n".join(lines) + "\n" iterator = exp.make_iterator("record") # This should work for x in iterator.iterateString(text): pass # This should not lines.append("c") text = "\n".join(lines) + "\n" try: for x in iterator.iterateString(text): pass raise AssertionError except Parser.ParserPositionException, exc: assert exc.pos == 15, exc.pos
def test_header_footer_parser(): # Check that I can pass same tag names in the header, record and # footer but not have them collide. header_format = Martel.Re("(?P<term?pos=header>a+)\R") record_format = Martel.Re("(?P<term?pos=body>b+)\R") footer_format = Martel.Re("(?P<term?pos=footer>c+)\R") format = Martel.HeaderFooter( "all", {"state": "New Mexico"}, header_format, RecordReader.CountLines, (1, ), record_format, RecordReader.CountLines, (1, ), footer_format, RecordReader.CountLines, (1, ), ) parser = format.make_parser() grab = GrabElements() parser.setContentHandler(grab) parser.parseString("a\nbb\nbb\nccc\n") elements = grab.elements assert len(elements) == 5, len(elements) check_element(elements[0], ("all", {"state": "New Mexico"})) check_element(elements[1], ("term", {"pos": "header"})) check_element(elements[2], ("term", {"pos": "body"})) check_element(elements[3], ("term", {"pos": "body"})) check_element(elements[4], ("term", {"pos": "footer"}))
def test_hf4(): ip = IterParser.IterHeaderFooter( Martel.Re(r"a*\R").make_parser(), RecordReader.CountLines, (1,), Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(), RecordReader.CountLines, (2,), None, None, None, "spam") lines = ["aaaaaaaaa", "b", "c", "bb", "cc", "bbb", "ccc", ] text = "\n".join(lines) + "\n" i = 1 for x in ip.iterateString(text): assert x["spam"][0] == "b" * i + "\n" + "c" * i + "\n" i = i + 1
def test_Spaces(): parseString = Martel.Spaces().make_parser().parseString for term in (" ", "\t", " ", " \t \t\t "): must_parse("Spaces", parseString, term) for term in ("\n", " \n", " X ", ""): must_not_parse("not Spaces", parseString, term) has_group(Martel.Spaces("spam", {"x": "pick"}), " " * 100, "spam", "pick") has_group(Martel.Spaces("eggs", {"x": "name"}), "\t" * 200, "eggs", "name") has_no_group(Martel.Spaces(), " ")
def test_Word(): parseString = Martel.Word().make_parser().parseString for term in ("Andrew", "Dalke", "was_here", "test12", "12df"): must_parse("Word", parseString, term) for term in ("*", "", "this-that"): must_not_parse("not Word", parseString, term) has_group(Martel.Word("spam", {"x": "fly"}), "_", "spam", "fly") has_group(Martel.Word("eggs", {"x": "boy"}), "9", "eggs", "boy") has_no_group(Martel.Word(), "__")
def test_DelimitedFields(): exp = Martel.Group("test", Martel.DelimitedFields("Field", "/")) parser = exp.make_parser() file = StringIO.StringIO() parser.setContentHandler(saxutils.XMLGenerator(file)) parser.parseString("a/b/cde/f//\n") s = file.getvalue() expect = "<test><Field>a</Field>/<Field>b</Field>/<Field>cde</Field>/" \ "<Field>f</Field>/<Field></Field>/<Field></Field>\n</test>" assert string.find(s, expect) != -1, ("Got: %s" % (repr(s), ))
def test_Digits(): parseString = Martel.Digits().make_parser().parseString for term in ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "20", "99", "453", "34653", "34359739467623"): must_parse("Digits", parseString, term) for term in ("A", "1A", "123123123T", "-1"): must_not_parse("not Digits", parseString, term) has_group(Martel.Digits("spam", {"x": "this"}), "5", "spam", "this") has_group(Martel.Digits("eggs", {"x": "that"}), "9", "eggs", "that") has_no_group(Martel.Digits(), "00")
def test_ToSep(): exp = Martel.Group("test", Martel.ToSep("colon", ":") + \ Martel.ToSep("space", " ") + \ Martel.ToSep("empty", "!")) parser = exp.make_parser() file = StringIO.StringIO() parser.setContentHandler(saxutils.XMLGenerator(file)) parser.parseString("q:wxy !") s = file.getvalue() expect = "<test><colon>q</colon>:<space>wxy</space> <empty></empty>!</test>" assert string.find(s, expect) != -1, ("Got: %s" % (repr(s), ))
def test_Punctuation(): parseString = Martel.Punctuation().make_parser().parseString for i in range(0, 256): c = chr(i) if c in string.punctuation: must_parse("Punctuation", parseString, c) else: must_not_parse("not Punctuation", parseString, c) has_group(Martel.Punctuation("spam", {"x": "Iran"}), string.punctuation[0], "spam", "Iran") has_group(Martel.Punctuation("eggs", {"x": "Iraq"}), string.punctuation[-1], "eggs", "Iraq") has_no_group(Martel.Punctuation(), string.punctuation[1])
def gen_iterator(): return IterParser.IterHeaderFooter( Martel.Re(r"a*\R").make_parser(), RecordReader.CountLines, (1,), Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1), RecordReader.CountLines, (2,), Martel.Re(r"d*\R").make_parser(), RecordReader.CountLines, (1,), "spam")
def test_header_footer7(): exp = Martel.HeaderFooter("dataset", {}, None, None, None, Martel.Re("a(?P<b>b*)a\R"), RecordReader.CountLines, (1, ), None, None, None) lines = [ "aa", "aBbbba", "abba", ] text = "\n".join(lines) + "\n" try: for info in exp.make_iterator("b").iterateString(text): pass except Parser.ParserPositionException, exc: assert exc.pos == 4, exc.pos
def __init__(self, debug = 0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = [ "input_file_name", "num_int_metabolites", \ "num_reactions", "metabolite_line", "unbalanced_metabolite", \ "num_rows", "num_cols", "irreversible_vector", \ "branch_metabolite", "non_branch_metabolite", \ "stoichiometric_tag", "kernel_tag", "subsets_tag", \ "reduced_system_tag", "convex_basis_tag", \ "conservation_relations_tag", "elementary_modes_tag", \ "reaction", "enzyme", "matrix_row", "sum_is_constant_line", \ "end_stochiometric", "end_kernel", "end_subsets", \ "end_reduced_system", "end_convex_basis", \ "end_conservation_relations", "end_elementary_modes" ] # make a parser that returns only the tags we are interested in expression = Martel.select_names( metatool_format.metatool_record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def make_expression(format, tag_format="%s"): """format, tag_format = "%s" -> Martel Expresion Turn the given time format string into the corresponding Martel Expression. A format term may contain a Group name and attribute information. If present, the group name is %'ed with the tag_format to produce the tag name to use. Use None to specify that named groups should not be used. >>> from Martel import Time >>> from xml.sax import saxutils >>> exp = Time.make_expression("%m-%Y\\n", "created-%s") >>> parser = exp.make_parser() >>> parser.setContentHandler(saxutils.XMLGenerator()) >>> parser.parseString("05-1921\n") <?xml version="1.0" encoding="iso-8859-1"?> <created-month type="numeric">05</created-month>-<created-year type="long">1921</created-year> >>> See the Time module docstring for more information. """ return _parse_time(format, tag_format, text_to_result=Martel.Str, group_to_result=Martel.Group, re_to_result=Martel.Re, t=Martel.NullOp())
def test_Float(): parseString = Martel.Float().make_parser().parseString for head in ("", "-", "+", "-1", "+2", "3"): for tail in ("", "E0", "E+0", "E-0", "E4", "e+5", "e-6", "E10", "E-19", "e+28"): for middle in (".1", "5.", "7.6", "989", ".0001"): must_parse("Float", parseString, head + middle + tail) for term in ("1E", ".E", "1.E", "1/", "E0", "1.2E0K", "=1", "+-1", ".", "e", "-e", "-e0"): must_not_parse("not Float", parseString, term) has_group(Martel.Float("spam", {"x": "spot"}), "1.0", "spam", "spot") has_group(Martel.Float("eggs", {"x": "SPOT"}), "0.8", "eggs", "SPOT") has_no_group(Martel.Float(), "+1")
def test_Unprintable(): parseString = Martel.Unprintable().make_parser().parseString unprintables = [] for i in range(0, 256): c = chr(i) if c in string.printable: must_not_parse("not Unprintable", parseString, c) else: must_parse("Unprintable", parseString, c) unprintables.append(c) has_group(Martel.Unprintable("spam", {"x": "import"}), unprintables[0], "spam", "import") has_group(Martel.Unprintable("eggs", {"x": "export"}), unprintables[-1], "eggs", "export") has_no_group(Martel.Unprintable(), unprintables[1])
def test_make_iterparsers1(): exp = Martel.ParseRecords("dataset", {}, Martel.Group("spam", Martel.Re(r"a*\R")), RecordReader.CountLines, (1,)) iterator = exp.make_iterator("spam") assert isinstance(iterator, IterParser.IterRecords) lines = [] for i in range(0, 10): lines.append("a" * i + "\n") text = "".join(lines) i = 0 for rec in iterator.iterateString(text): assert len(rec["spam"][0][:-1]) == i, (i, rec["spam"][0]) i = i + 1 assert i == 10
def index(handle, index_fn=None): """index(handle[, index_fn]) -> list of (PMID, MedlineID, start, end) Index a Medline XML file. Returns where the records are, as offsets from the beginning of the handle. index_fn is a callback function with parameters (PMID, MedlineID, start, end) and is called as soon as each record is indexes. """ # Find the correct format to parse the data. data = handle.read(1000) format_module = choose_format(data) handle = _SavedDataHandle(handle, data) format = format_module.format wanted = ["MedlineCitation", "PMID", "MedlineID"] format = Martel.select_names(format, wanted) # Create an indexer that will save all the index information and # call index_fn if appropriate. indexes = [] def citation_fn(pmid, medline_id, start, end, indexes=indexes, index_fn=index_fn): if index_fn is not None: index_fn(pmid, medline_id, start, end) indexes.append((pmid, medline_id, start, end)) indexer = _IndexerHandler(citation_fn) # Create the parser and parse the results. parser = format.make_parser(debug_level=0) parser.setContentHandler(indexer) parser.setErrorHandler(handler.ErrorHandler()) parser.parseFile(handle) return indexes
def test_header_footer3(): exp = Martel.HeaderFooter("dataset", {}, None, None, None, Martel.Re("a(?P<b>b*)a\R"), RecordReader.CountLines, (1, ), None, None, None) lines = [ "aa", "aba", "abba", ] text = "\n".join(lines) + "\n" i = 0 for info in exp.make_iterator("b").iterateString(text): assert len(info["b"]) == 1 assert len(info["b"][0]) == i, (info["b"][0], i) i = i + 1 assert i == 3, i
def test_multi(): ele = get_element(Martel.Re("(?P<qwe?def=%7Edalke&a=1&b=2&cc=33>X)")) check_element(ele, ("qwe", { "def": "~dalke", "a": "1", "b": "2", "cc": "33" }))
def __init__(self, debug = 0): self.interest_tags = ["primer_name", "amplifier", "amplifier_sequence", "amplifier_length", "end_record"] expression = Martel.select_names(primersearch_format.record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def test_record_parser(): record = Martel.Group("A", Martel.Str("X\n") + Martel.Re("a*\n")) p = record.make_parser() parser = Parser.RecordParser("blah", {}, p.tagtable, (0, 1, {}), RecordReader.StartsWith, ("X", )) err = CountErrors() parser.setErrorHandler(err) count = CountRecords("A") parser.setContentHandler(count) parser.parseString("X\na\nX\nb\nX\naaa\nX\naaaa\nX\nq\nX\na\n") assert err.fatal_error_count == 0, err.fatal_error_count assert err.error_count == 2, err.error_count assert count.count == 4, count.count
def test_make_iterparsers2(): exp = Martel.HeaderFooter("dataset", {}, Martel.Group("header", Martel.Re(r"(a*\R)*")), RecordReader.Until, ("b",), Martel.Group("record", Martel.Re(r"(b*\R)*")), RecordReader.Until, ("c",), Martel.Group("footer", Martel.Re(r"(c*\R)*")), RecordReader.Everything, (),) iterator = exp.make_iterator("record") assert isinstance(iterator, IterParser.IterHeaderFooter), iterator lines = ["a" "aa", "aaaaaaa", "b", "bb", "bbbb", "bbbbbbbb", "bbbbbbbbbbbbbbbb", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "cccc", "cc", "c", ] text = "\n".join(lines) + "\n" i = 0 for rec in iterator.iterateString(text): i = i + 1 assert i == 1, i
def __init__(self, debug=0): self.interest_tags = [ "primer_name", "amplifier", "amplifier_sequence", "amplifier_length", "end_record" ] expression = Martel.select_names(primersearch_format.record, self.interest_tags) self._parser = expression.make_parser(debug_level=debug)
def test_record_parser(): format = Martel.Re("(?P<term?field=first>...)" "(?P<term?field=second>...)" "(?P<last>.)\R") format = Martel.ParseRecords("all", {"author": "guido"}, format, RecordReader.CountLines, (1, )) parser = format.make_parser() grab = GrabElements() parser.setContentHandler(grab) parser.parseString("aaabbbZ\ncccdddZ\n") elements = grab.elements assert len(elements) == 7 check_element(elements[0], ("all", {"author": "guido"})) check_element(elements[1], ("term", {"field": "first"})) check_element(elements[2], ("term", {"field": "second"})) check_element(elements[3], ("last", {})) check_element(elements[4], ("term", {"field": "first"})) check_element(elements[5], ("term", {"field": "second"})) check_element(elements[6], ("last", {}))
def test_same_tag(): format = Martel.Re("(?P<char?type=a>a+)(?P<char?type=b>b+)") parser = format.make_parser() grab = GrabElements() parser.setContentHandler(grab) parser.parseString("aaabb") assert len(grab.elements) == 2, len(grab.elements) check_element(grab.elements[0], ("char", {"type": "a"})) check_element(grab.elements[1], ("char", {"type": "b"}))
def test_header_footer3(): # Have a footer but no header s = """\ ID 1 This is some data // ID 2 This is some more data // Okay, that was all of the data. """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><record>ID 1 This is some data // </record><record>ID 2 This is some more data // </record><footer>Okay, that was all of the data. </footer></hf>""" # Don't use a regexp like this in your code - for testing only! record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) # Require at least 5 characters (just to be safe) footer = Martel.Group("footer", Martel.Re(r".....(.|\n)*")) record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Nothing, (), (), RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.Everything, (), footer.tagtable, (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold)
def delimiter(delim): assert len(delim) == 1, \ "delimiter can only be a single character long, not %s" % repr(delim) assert delim not in "\n\r", "Cannot use %s as a delimiter" % repr(delim) field = Martel.Group("field", Martel.Rep(Martel.AnyBut(delim + "\r\n"))) line = field + Martel.Rep(Martel.Str(delim) + field) + Martel.AnyEol() record = Martel.Group("record", line) format = Martel.ParseRecords("delimited", {}, record, RecordReader.CountLines, (1, )) return format
def __init__(self, debug=0): self.interest_tags = [ "comments", "single_primer_line", "start_primer", "product_size", "forward_start", "forward_length", "forward_tm", "forward_gc", "forward_seq", "reverse_start", "reverse_length", "reverse_tm", "reverse_gc", "reverse_seq", "internal_start", "internal_length", "internal_tm", "internal_gc", "internal_seq", "end_record" ] expression = Martel.select_names(primer3_format.record, self.interest_tags) self._parser = expression.make_parser(debug_level=debug)
def test_header_footer2(): # Have a header but no footer s = """ This is some misc. header text that goes on until the end. ID 1 This is some data ID 2 This is some more data """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><header> This is some misc. header text that goes on until the end. </header><record>ID 1 This is some data </record><record>ID 2 This is some more data </record></hf>""" # Don't use a regexp like this in your code - for testing only! header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) header = header.make_parser() record = record.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.StartsWith, ("ID", ), record.tagtable, RecordReader.Nothing, (), (), (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold)
def test_header_footer7(): # header and footer but with no record data s = """\ This is some misc. header text that goes on until the end. FOOTER """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.CountLines, (2, ), header.tagtable, RecordReader.EndsWith, ("//", ), record.tagtable, RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 0, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 0, count.count
def test_header_footer5(): # Make sure I can skip records when there are not footer records s = """ This is some misc. header text that goes on until the end. ID 1 This is some data ID A This is some more data ID 3 This is again some more data ID Q This blah ID W QWE ID 987 To be ID 897 Or not to be """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) header = header.make_parser() record = record.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.StartsWith, ("ID", ), record.tagtable, None, (), (), (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 3, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 4, count.count
def test_ri3(): # error in the second record ip = IterParser.IterRecords( Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1), RecordReader.CountLines, (2,), "spam") lines = ["b", "c", "b-", "cc", "bbb", "ccc", ] text = "\n".join(lines) + "\n" try: for x in ip.iterateString(text): pass except Parser.ParserPositionException, exc: assert exc.pos == 5, exc.pos
def __init__(self, debug = 0): self.interest_tags = ["comments", "single_primer_line", "start_primer", "product_size", "forward_start", "forward_length", "forward_tm", "forward_gc", "forward_seq", "reverse_start", "reverse_length", "reverse_tm", "reverse_gc", "reverse_seq", "internal_start", "internal_length", "internal_tm", "internal_gc", "internal_seq", "end_record"] expression = Martel.select_names(primer3_format.record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def make_iterator(self, tag="record", select_names=None, debug_level=0): """S.make_iterator([tag][, select_names][, debug_level]) -> iterator""" if select_names is not None: select_names = list(select_names) select_names.sort() key = tuple(select_names), debug_level else: key = None, debug_level if not self._iterator_cache.has_key(key): import Martel exp = self.expression if select_names is not None: exp = Martel.select_names(exp, select_names) p = exp.make_iterator(tag, debug_level = debug_level) self._iterator_cache[key] = p return self._iterator_cache[key].copy()
def make_parser(self, select_names=None, debug_level=0): """S.make_parser([select_names][, debug_level]) -> parser""" if select_names is not None: select_names = list(select_names) select_names.sort() key = tuple(select_names), debug_level else: key = None, debug_level if not self._parser_cache.has_key(key): import Martel exp = self.expression if select_names is not None: exp = Martel.select_names(exp, select_names) p = exp.make_parser(debug_level = debug_level) self._parser_cache[key] = p return self._parser_cache[key].copy()
def __init__(self, debug = 0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = ["comment", "title_line", "sequence" ] # make a parser that returns only the tags we are interested in expression = Martel.select_names(intelligenetics_format.intelligenetics_record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def __init__(self, debug = 0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = [ 'header_line', 'system_line', 'substance_multiline', \ 'reactor_multiline', 'include_line' ] # make a parser that returns only the tags we are interested in expression = Martel.select_names( ecell_format.ecell_record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def __init__(self, debug_level = 0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = [ "cd_tag", \ "description_tag", \ "status_tag", \ "source_tag", \ "date_tag", \ "taxonomy_tag", \ "aligned_tag", \ "representative_tag", \ "range_tag", \ "sequence_tag", \ "description_contents_multiline", \ "status_contents_multiline", \ "source_contents_multiline", \ "date_contents_multiline", \ "reference_contents_multiline", \ "taxonomy_contents_multiline", \ "aligned_contents_multiline", \ "representative_contents_multiline", \ "range_contents_multiline", \ "cd_contents_multiline", \ "sequence_contents_multiline", \ "table_entry" ] # make a parser that returns only the tags we are interested in expression = Martel.select_names( cdd_format.cdd_record, self.interest_tags) self._parser = expression.make_parser(debug_level )
def _fixup_sp_pattern(exp): import re import Martel exp = Martel.select_names(exp, (Std.dbxref_dbname.tag,Std.dbxref_dbid.tag)) e = exp._find_groups(Std.dbxref_dbname.tag) assert len(e) == 1 e = e[0] e.name = "dbname" dbstyle = e.attrs["style"] e.attrs = {} e = exp._find_groups(Std.dbxref_dbid.tag) assert len(e) == 2 e[0].name = "primary_dbid" primary_type = e[0].attrs["type"] e[0].attrs = {} e[1].name = "secondary_dbid" secondary_type = e[1].attrs["type"] e[1].attrs = {} pattern = str(exp) + "$" pat = re.compile(pattern) return pat, dbstyle, primary_type, secondary_type
print "acc Start" self.m_accession=1 if name=="pname": self.m_pname=1 if name=="bioformat:sequence": self.m_seq=1 if name=="bioformat:sequence_block": self.seq='' self.m_seq_block=1 def endElement(self, name): if name == "accession": print "acc End" self.m_accession=0 if name == "pname": self.m_pname=0 if name=="bioformat:sequence": self.m_seq=0 if name=="bioformat:sequence_block": #self.outf.write(self.seq+'\n') self.m_seq_block=0 if __name__=="__main__": exp = Martel.select_names(embl.format, ("record", "accession", "pname", "bioformat:sequence",)) parser=embl.format_expression.make_parser() #outf=open(sys.argv[2],'w') parser.setContentHandler(my_handler(sys.argv[2])) parser.parse(sys.argv[1])
Martel.Opt(Martel.Str(" ") + bib) + Martel.AnyEol()) # Here's the neq SQ line format -- uses a CRC64 # SQ SEQUENCE 889 AA; 100368 MW; ABD7E3CD53961B78 CRC64; SQ_exp = Martel.Re("SQ SEQUENCE +(?P<sequence_length>\d+) AA;" \ " +(?P<molecular_weight>\d+) MW;" \ " +(?P<crc?type=64>\w+) CRC64;\R") replacements = [ ("DT_created", DT_created_exp), ("OX_block", OX_exp), ("RX", RX_exp), ("SQ", SQ_exp), ] record = Martel.replace_groups(sprot38.record, replacements) format_expression = Martel.replace_groups( sprot38.format_expression, replacements) format = Martel.replace_groups(sprot38.format, replacements) if __name__ == "__main__": parser = format.make_parser() filename = "/home/dalke/ftps/databases/swiss-prot/release_compressed/sprot40.dat" ## import os ## infile = os.popen("zcat " + filename) infile = open(filename) infile.seek(107976062)
AC_block + DT_created + DT_seq_update + DT_ann_update + Martel.Opt(DE_block) + Martel.Opt(GN_block) + Martel.Opt(OS_block) + Martel.Opt(OG_block) + Martel.Opt(OC_block) + Martel.Group("OX_block", Martel.NullOp()) + Martel.Group("reference_block", Martel.Rep(reference)) + comment + Martel.Opt(DR_block) + Martel.Opt(KW_block) + Martel.Opt(feature_block) + sequence + end, {"format": "swissprot/38"}) format_expression = Martel.Group("dataset", Martel.Rep1(record), {"format": "swissprot/38"}) format = Martel.ParseRecords("dataset", {"format": "swissprot/38"}, record, RecordReader.EndsWith, ("//\n",) ) if __name__ == "__main__": exp = Martel.select_names(format, ("entry_name", "sequence")) parser = exp.make_parser() parser.parseFile(open("/home/dalke/ftps/swissprot/sprot38.dat"))