def test_comments(self): input_str = "([the quick]apple[brown],([fox]banjo,([jumps]cucumber[over the],[really]dogwood)[lazy]eggplant)) rhubarb[dog];" expected_comments = { "apple": ["the quick", "brown"], "banjo": ["fox"], "cucumber": ["jumps", "over the"], "dogwood": ["really"], "eggplant": ["lazy"], "rhubarb": ["dog"], } expected_tokens = [ "(", "apple", ",", "(", "banjo", ",", "(", "cucumber", ",", "dogwood", ")", "eggplant", ")", ")", "rhubarb", ";" ] src = StringIO(input_str) observed_tokens = [] tk = nexusprocessing.NexusTokenizer(src=src) for token in tk: if token in expected_comments: expected_comment = expected_comments[token] observed_comment = tk.pull_captured_comments() self.assertEqual(expected_comment, observed_comment) del expected_comments[token] observed_tokens.append(token) self.assertEqual(expected_comments, {}) self.assertEqual(observed_tokens, expected_tokens)
def tree_iter(self, stream, taxon_symbol_mapper, tree_factory): """ Iterator that yields trees in NEWICK-formatted source. Parameters ---------- stream : file or file-like object A file or file-like object opened for reading. taxon_namespace : |TaxonNamespace| Operational taxonomic unit namespace to use for taxon management. tree_factory : function object A function that returns a new |Tree| object when called without arguments. Returns ------- iter : :py`collections.Iterator` [|Tree|] An iterator yielding |Tree| objects constructed based on data in ``stream``. """ nexus_tokenizer = nexusprocessing.NexusTokenizer(stream, preserve_unquoted_underscores=self.preserve_unquoted_underscores) while True: tree = self._parse_tree_statement( nexus_tokenizer=nexus_tokenizer, tree_factory=tree_factory, taxon_symbol_map_fn=taxon_symbol_mapper.require_taxon_for_symbol) yield tree if tree is None: raise StopIteration
def f(): for src_path in src_paths: if verbose: sys.stderr.write(" .. {}\n".format(src_path)) src = open(src_path, "rU") nt = nexusprocessing.NexusTokenizer(src) for token in nt: pass
def f(): for src_path in src_paths: if verbose: sys.stderr.write(" .. {}\n".format(src_path)) src = open(src_path, "rU") nt = nexusprocessing.NexusTokenizer(src) np = newickreader.NewickTreeParser() while True: t = np.parse_tree_statement(nt, tree_factory=Tree) if t is None: break
def _yield_items_from_stream(self, stream): nexus_tokenizer = nexusprocessing.NexusTokenizer( stream, preserve_unquoted_underscores=self.newick_reader. preserve_unquoted_underscores) taxon_symbol_mapper = nexusprocessing.NexusTaxonSymbolMapper( taxon_namespace=self.attached_taxon_namespace, enable_lookup_by_taxon_number=False, case_sensitive=self.newick_reader.case_sensitive_taxon_labels) while True: tree = self.newick_reader._parse_tree_statement( nexus_tokenizer=nexus_tokenizer, tree_factory=self.tree_factory, taxon_symbol_map_fn=taxon_symbol_mapper. require_taxon_for_symbol) if tree is None: break yield tree
def check_tokenization(self, input_str, expected_tokens): src = StringIO(input_str) observed = [] for token in nexusprocessing.NexusTokenizer(src=src): observed.append(token) self.assertEqual(observed, expected_tokens)