def _test_round_trip_source(self, file_obj, parser, leave_file=False, *args, **kwds): source = list(file_obj) rt_src = sfm.generate(parser(source, *args, **kwds)).splitlines(True) # Try for perfect match first if source == rt_src: self.assertTrue(True) return # Normalise line endings source = [x.rstrip() for x in source] rt_src = [x.rstrip() for x in rt_src] if source == rt_src: self.assertTrue(True) return # Normalise the \f ..\f* marker forms in the source source = [x.replace(r'\ft ', r'\fr*') for x in source] rt_src = [x.replace(r'\ft ', r'\fr*') for x in rt_src] if leave_file and hasattr(file_obj, 'name'): path = Path(Path(file_obj.name).name) enc = getattr(file_obj, 'encoding', None) with path.with_suffix('.normalised').open('w', encoding=enc) as f: f.writelines(x+'\n' for x in source) with path.with_suffix('.roundtrip').open('w', encoding=enc) as f: f.writelines(x+'\n' for x in rt_src) self.assertEqual(source, rt_src, 'roundtriped source not equal')
def test_format(self): src = ['\\test\n', '\\test text\n', '\\sfm text\n', 'bare text\n', '\\more-sfm more text\n', 'over a line break\\marker' '\\le unix\n', '\\le windows\r\n', '\\le missing\n', '\\test\\i1\\i2 deep text\\i1*\n', '\\test\\i1\\i2 deep text\n', # These forms do not transduce identically due to whitespace # differences '\\test \\inline text\\inline*\n', '\\test \\i1\\i2 deep\\i2*\\i1*\n'] with warnings.catch_warnings(record=True) as ref_parse_errors: warnings.resetwarnings() warnings.simplefilter("always", SyntaxWarning) ref_parse = list(sfm.parser(src)) trans_src = sfm.generate(ref_parse).splitlines(True) with warnings.catch_warnings(record=True) as trans_parse_errors: warnings.resetwarnings() warnings.simplefilter("always", SyntaxWarning) trans_parse = list(sfm.parser(trans_src)) # Check the parsed pretty printed doc matches the reference self.assertEqual(trans_parse, ref_parse) # Check pretty printer output matches input, skip the last 2 self.assertEqual(trans_src[:10], src[:10]) # Check the errors match for a, e in zip(trans_parse_errors[:31], ref_parse_errors): with self.subTest(warning=str(e)): self.assertEqual(a.message.args, e.message.args) # Check all the line positions, meta data and annotations line up for a, e in zip(flatten(trans_parse), flatten(ref_parse)): with self.subTest(): self.assertEqual(a.pos.line, e.pos.line) self.assertAlmostEqual(a.pos.col, e.pos.col, delta=1) self.assertEqual(getattr(a, 'meta', None), getattr(e, 'meta', None)) self.assertEqual(getattr(a, 'annotations', None), getattr(e, 'annotations', None))
def _test_round_trip_parse(self, file, parser, leave_file=False, *args, **kwds): doc = list(parser(file, *args, **kwds)) regenerated = sfm.generate(doc) try: doc = list(flatten(doc)) rt_doc = list(flatten(parser(regenerated.splitlines(True), *args, **kwds))) # Check for equivilent parse. self.assertEqual(doc, rt_doc, 'roundtrip parse unequal') except (SyntaxError, AssertionError) as se: if leave_file and hasattr(file, 'name'): path = Path(Path(file.name).name).with_suffix('.regenerated') enc = getattr(file, 'encoding', None) with path.open('w', encoding=enc) as f: f.write(regenerated) se.filename = f.name if isinstance(se, AssertionError): raise print(str(se))
else: opts.tags = [tag.strip() for tag in opts.tags.split(', ')] output = codecs.open(os.path.expanduser(args[0]), mode='w', encoding='utf_8_sig') paths = chain.from_iterable(map(glob.iglob, args[1:])) files = (codecs.open(p, mode='r', encoding='utf_8_sig') for p in paths) try: with warnings.catch_warnings(): warnings.simplefilter("always" if opts.warnings else "ignore", SyntaxWarning) docs = [ list( usfm.parser(stylesheet=opts.stylesheet, error_level=opts.error_level)) for f in files ] validate_structure(*docs) output.write(sfm.generate(merge(opts.tags, *docs))) except IOError as err: sys.stderr.write( parser.expand_prog_name(f'%prog: IO error: {err!s}\n')) sys.exit(2) except StructureError: sys.stderr.write( parser.expand_prog_name( '%prog: Structure error: A USFM file does not match the others\n' )) sys.exit(3)
work = [] first_def = -1 if not opts.output: first_def = 0 elif not os.path.isdir(opts.output): work.append((sfms[0], opts.output)) first_def = 1 else: work.extend( zip(sfms, (os.path.join(opts.output, os.path.split(x)[1]) for x in sfms))) if first_def > -1: work.extend(zip(sfms[first_def:], (f"{x}_u" for x in sfms[first_def:]))) try: with warnings.catch_warnings(): warnings.simplefilter("always" if opts.warnings else "ignore", SyntaxWarning) for job in work: res = generate(transduce(job[0], opts)) ofh = codecs.open(job[1], "w", "utf-8") ofh.write(res) ofh.close() except IOError as err: sys.stderr.write( parser.expand_prog_name(f'%prog: IO error: {err!s}\n')) sys.exit(2)