def __init__(self, debug = 0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = [ "input_file_name", "num_int_metabolites", \ "num_reactions", "metabolite_line", "unbalanced_metabolite", \ "num_rows", "num_cols", "irreversible_vector", \ "branch_metabolite", "non_branch_metabolite", \ "stoichiometric_tag", "kernel_tag", "subsets_tag", \ "reduced_system_tag", "convex_basis_tag", \ "conservation_relations_tag", "elementary_modes_tag", \ "reaction", "enzyme", "matrix_row", "sum_is_constant_line", \ "end_stochiometric", "end_kernel", "end_subsets", \ "end_reduced_system", "end_convex_basis", \ "end_conservation_relations", "end_elementary_modes" ] # make a parser that returns only the tags we are interested in expression = Martel.select_names( metatool_format.metatool_record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def index(handle, index_fn=None): """index(handle[, index_fn]) -> list of (PMID, MedlineID, start, end) Index a Medline XML file. Returns where the records are, as offsets from the beginning of the handle. index_fn is a callback function with parameters (PMID, MedlineID, start, end) and is called as soon as each record is indexes. """ # Find the correct format to parse the data. data = handle.read(1000) format_module = choose_format(data) handle = _SavedDataHandle(handle, data) format = format_module.format wanted = ["MedlineCitation", "PMID", "MedlineID"] format = Martel.select_names(format, wanted) # Create an indexer that will save all the index information and # call index_fn if appropriate. indexes = [] def citation_fn(pmid, medline_id, start, end, indexes=indexes, index_fn=index_fn): if index_fn is not None: index_fn(pmid, medline_id, start, end) indexes.append((pmid, medline_id, start, end)) indexer = _IndexerHandler(citation_fn) # Create the parser and parse the results. parser = format.make_parser(debug_level=0) parser.setContentHandler(indexer) parser.setErrorHandler(handler.ErrorHandler()) parser.parseFile(handle) return indexes
def __init__(self, debug = 0): self.interest_tags = ["primer_name", "amplifier", "amplifier_sequence", "amplifier_length", "end_record"] expression = Martel.select_names(primersearch_format.record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def __init__(self, debug=0): self.interest_tags = [ "primer_name", "amplifier", "amplifier_sequence", "amplifier_length", "end_record" ] expression = Martel.select_names(primersearch_format.record, self.interest_tags) self._parser = expression.make_parser(debug_level=debug)
def __init__(self, debug=0): self.interest_tags = [ "comments", "single_primer_line", "start_primer", "product_size", "forward_start", "forward_length", "forward_tm", "forward_gc", "forward_seq", "reverse_start", "reverse_length", "reverse_tm", "reverse_gc", "reverse_seq", "internal_start", "internal_length", "internal_tm", "internal_gc", "internal_seq", "end_record" ] expression = Martel.select_names(primer3_format.record, self.interest_tags) self._parser = expression.make_parser(debug_level=debug)
def __init__(self, debug = 0): self.interest_tags = ["comments", "single_primer_line", "start_primer", "product_size", "forward_start", "forward_length", "forward_tm", "forward_gc", "forward_seq", "reverse_start", "reverse_length", "reverse_tm", "reverse_gc", "reverse_seq", "internal_start", "internal_length", "internal_tm", "internal_gc", "internal_seq", "end_record"] expression = Martel.select_names(primer3_format.record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def make_iterator(self, tag="record", select_names=None, debug_level=0): """S.make_iterator([tag][, select_names][, debug_level]) -> iterator""" if select_names is not None: select_names = list(select_names) select_names.sort() key = tuple(select_names), debug_level else: key = None, debug_level if not self._iterator_cache.has_key(key): import Martel exp = self.expression if select_names is not None: exp = Martel.select_names(exp, select_names) p = exp.make_iterator(tag, debug_level = debug_level) self._iterator_cache[key] = p return self._iterator_cache[key].copy()
def make_parser(self, select_names=None, debug_level=0): """S.make_parser([select_names][, debug_level]) -> parser""" if select_names is not None: select_names = list(select_names) select_names.sort() key = tuple(select_names), debug_level else: key = None, debug_level if not self._parser_cache.has_key(key): import Martel exp = self.expression if select_names is not None: exp = Martel.select_names(exp, select_names) p = exp.make_parser(debug_level = debug_level) self._parser_cache[key] = p return self._parser_cache[key].copy()
def __init__(self, debug = 0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = ["comment", "title_line", "sequence" ] # make a parser that returns only the tags we are interested in expression = Martel.select_names(intelligenetics_format.intelligenetics_record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def __init__(self, debug=0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = ["comment", "title_line", "sequence"] # make a parser that returns only the tags we are interested in expression = Martel.select_names( intelligenetics_format.intelligenetics_record, self.interest_tags) self._parser = expression.make_parser(debug_level=debug)
def __init__(self, debug = 0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = [ 'header_line', 'system_line', 'substance_multiline', \ 'reactor_multiline', 'include_line' ] # make a parser that returns only the tags we are interested in expression = Martel.select_names( ecell_format.ecell_record, self.interest_tags) self._parser = expression.make_parser(debug_level = debug)
def __init__(self, debug_level = 0): """Initialize the scanner by setting up our caches. Creating the parser takes a long time, so we want to cache it to reduce parsing time. Arguments: o debug - The level of debugging that the parser should display. Level 0 is no debugging, Level 2 displays the most debugging info (but is much slower). See Martel documentation for more info on this. """ # a listing of all tags we are interested in scanning for # in the MartelParser self.interest_tags = [ "cd_tag", \ "description_tag", \ "status_tag", \ "source_tag", \ "date_tag", \ "taxonomy_tag", \ "aligned_tag", \ "representative_tag", \ "range_tag", \ "sequence_tag", \ "description_contents_multiline", \ "status_contents_multiline", \ "source_contents_multiline", \ "date_contents_multiline", \ "reference_contents_multiline", \ "taxonomy_contents_multiline", \ "aligned_contents_multiline", \ "representative_contents_multiline", \ "range_contents_multiline", \ "cd_contents_multiline", \ "sequence_contents_multiline", \ "table_entry" ] # make a parser that returns only the tags we are interested in expression = Martel.select_names( cdd_format.cdd_record, self.interest_tags) self._parser = expression.make_parser(debug_level )
def _fixup_sp_pattern(exp): import re import Martel exp = Martel.select_names(exp, (Std.dbxref_dbname.tag,Std.dbxref_dbid.tag)) e = exp._find_groups(Std.dbxref_dbname.tag) assert len(e) == 1 e = e[0] e.name = "dbname" dbstyle = e.attrs["style"] e.attrs = {} e = exp._find_groups(Std.dbxref_dbid.tag) assert len(e) == 2 e[0].name = "primary_dbid" primary_type = e[0].attrs["type"] e[0].attrs = {} e[1].name = "secondary_dbid" secondary_type = e[1].attrs["type"] e[1].attrs = {} pattern = str(exp) + "$" pat = re.compile(pattern) return pat, dbstyle, primary_type, secondary_type
print "acc Start" self.m_accession=1 if name=="pname": self.m_pname=1 if name=="bioformat:sequence": self.m_seq=1 if name=="bioformat:sequence_block": self.seq='' self.m_seq_block=1 def endElement(self, name): if name == "accession": print "acc End" self.m_accession=0 if name == "pname": self.m_pname=0 if name=="bioformat:sequence": self.m_seq=0 if name=="bioformat:sequence_block": #self.outf.write(self.seq+'\n') self.m_seq_block=0 if __name__=="__main__": exp = Martel.select_names(embl.format, ("record", "accession", "pname", "bioformat:sequence",)) parser=embl.format_expression.make_parser() #outf=open(sys.argv[2],'w') parser.setContentHandler(my_handler(sys.argv[2])) parser.parse(sys.argv[1])
AC_block + DT_created + DT_seq_update + DT_ann_update + Martel.Opt(DE_block) + Martel.Opt(GN_block) + Martel.Opt(OS_block) + Martel.Opt(OG_block) + Martel.Opt(OC_block) + Martel.Group("OX_block", Martel.NullOp()) + Martel.Group("reference_block", Martel.Rep(reference)) + comment + Martel.Opt(DR_block) + Martel.Opt(KW_block) + Martel.Opt(feature_block) + sequence + end, {"format": "swissprot/38"}) format_expression = Martel.Group("dataset", Martel.Rep1(record), {"format": "swissprot/38"}) format = Martel.ParseRecords("dataset", {"format": "swissprot/38"}, record, RecordReader.EndsWith, ("//\n",) ) if __name__ == "__main__": exp = Martel.select_names(format, ("entry_name", "sequence")) parser = exp.make_parser() parser.parseFile(open("/home/dalke/ftps/swissprot/sprot38.dat"))