def _test_trees(self): if not self._trees: self._trees = {} for i, tstr in enumerate([TSTR0, TSTR1]): self._trees["tstr%d" % i] = parse.parse_rst_dt_tree(tstr) for i in glob.glob('tests/*.dis'): bname = os.path.basename(i) tfile = os.path.splitext(i)[0] self._trees[bname] = parse.read_annotation_file(i, tfile) return self._trees
def slurp_subcorpus(self, cfiles, verbose=False): """ See `educe.rst_dt.parse` for a description of `RSTTree` """ corpus={} counter=0 for k in cfiles.keys(): if verbose: sys.stderr.write("\rSlurping corpus dir [%d/%d]" % (counter, len(cfiles))) f = cfiles[k] annotations=parse.read_annotation_file(f) annotations.set_origin(k) corpus[k]=annotations counter=counter+1 if verbose: sys.stderr.write("\rSlurping corpus dir [%d/%d done]\n" % (counter, len(cfiles))) return corpus
def slurp_subcorpus(self, cfiles, verbose=False): """ See `educe.rst_dt.parse` for a description of `RSTTree` """ corpus = {} counter = 0 for k in cfiles.keys(): if verbose: sys.stderr.write("\rSlurping corpus dir [%d/%d]" % (counter, len(cfiles))) annotations = parse.read_annotation_file(*cfiles[k]) annotations.set_origin(k) corpus[k] = annotations counter = counter + 1 if verbose: sys.stderr.write("\rSlurping corpus dir [%d/%d done]\n" % (counter, len(cfiles))) return corpus
def test_from_files(self): for i in glob.glob('tests/*.dis'): t = read_annotation_file(i, os.path.splitext(i)[0]) self.assertEqual(len(t.text()), treenode(t).span.char_end)