Beispiel #1
0
    def _test_round_trip_source(self, file_obj, parser, leave_file=False,
                                *args, **kwds):
        source = list(file_obj)
        rt_src = sfm.generate(parser(source, *args, **kwds)).splitlines(True)

        # Try for perfect match first
        if source == rt_src:
            self.assertTrue(True)
            return

        # Normalise line endings
        source = [x.rstrip() for x in source]
        rt_src = [x.rstrip() for x in rt_src]
        if source == rt_src:
            self.assertTrue(True)
            return

        # Normalise the \f ..\f* marker forms in the source
        source = [x.replace(r'\ft ', r'\fr*') for x in source]
        rt_src = [x.replace(r'\ft ', r'\fr*') for x in rt_src]

        if leave_file and hasattr(file_obj, 'name'):
            path = Path(Path(file_obj.name).name)
            enc = getattr(file_obj, 'encoding', None)

            with path.with_suffix('.normalised').open('w', encoding=enc) as f:
                f.writelines(x+'\n' for x in source)
            with path.with_suffix('.roundtrip').open('w', encoding=enc) as f:
                f.writelines(x+'\n' for x in rt_src)

        self.assertEqual(source, rt_src, 'roundtriped source not equal')
Beispiel #2
0
    def test_format(self):
        src = ['\\test\n',
               '\\test text\n',
               '\\sfm text\n',
               'bare text\n',
               '\\more-sfm more text\n',
               'over a line break\\marker'
               '\\le unix\n',
               '\\le windows\r\n',
               '\\le missing\n',
               '\\test\\i1\\i2 deep text\\i1*\n',
               '\\test\\i1\\i2 deep text\n',
               # These forms do not transduce identically due to whitespace
               # differences
               '\\test \\inline text\\inline*\n',
               '\\test \\i1\\i2 deep\\i2*\\i1*\n']

        with warnings.catch_warnings(record=True) as ref_parse_errors:
            warnings.resetwarnings()
            warnings.simplefilter("always", SyntaxWarning)
            ref_parse = list(sfm.parser(src))
        trans_src = sfm.generate(ref_parse).splitlines(True)

        with warnings.catch_warnings(record=True) as trans_parse_errors:
            warnings.resetwarnings()
            warnings.simplefilter("always", SyntaxWarning)
            trans_parse = list(sfm.parser(trans_src))

        # Check the parsed pretty printed doc matches the reference
        self.assertEqual(trans_parse, ref_parse)
        # Check pretty printer output matches input, skip the last 2
        self.assertEqual(trans_src[:10], src[:10])
        # Check the errors match
        for a, e in zip(trans_parse_errors[:31], ref_parse_errors):
            with self.subTest(warning=str(e)):
                self.assertEqual(a.message.args, e.message.args)

        # Check all the line positions, meta data and annotations line up
        for a, e in zip(flatten(trans_parse), flatten(ref_parse)):
            with self.subTest():
                self.assertEqual(a.pos.line, e.pos.line)
                self.assertAlmostEqual(a.pos.col, e.pos.col, delta=1)
                self.assertEqual(getattr(a, 'meta', None),
                                 getattr(e, 'meta', None))
                self.assertEqual(getattr(a, 'annotations', None),
                                 getattr(e, 'annotations', None))
Beispiel #3
0
    def _test_round_trip_parse(self, file, parser,
                               leave_file=False,
                               *args, **kwds):
        doc = list(parser(file, *args, **kwds))
        regenerated = sfm.generate(doc)
        try:
            doc = list(flatten(doc))
            rt_doc = list(flatten(parser(regenerated.splitlines(True),
                                         *args, **kwds)))

            # Check for equivilent parse.
            self.assertEqual(doc,
                             rt_doc,
                             'roundtrip parse unequal')
        except (SyntaxError, AssertionError) as se:
            if leave_file and hasattr(file, 'name'):
                path = Path(Path(file.name).name).with_suffix('.regenerated')
                enc = getattr(file, 'encoding', None)
                with path.open('w', encoding=enc) as f:
                    f.write(regenerated)
                    se.filename = f.name
            if isinstance(se, AssertionError):
                raise
            print(str(se))
Beispiel #4
0
    else:
        opts.tags = [tag.strip() for tag in opts.tags.split(', ')]
    output = codecs.open(os.path.expanduser(args[0]),
                         mode='w',
                         encoding='utf_8_sig')
    paths = chain.from_iterable(map(glob.iglob, args[1:]))
    files = (codecs.open(p, mode='r', encoding='utf_8_sig') for p in paths)
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("always" if opts.warnings else "ignore",
                                  SyntaxWarning)
            docs = [
                list(
                    usfm.parser(stylesheet=opts.stylesheet,
                                error_level=opts.error_level)) for f in files
            ]

            validate_structure(*docs)

            output.write(sfm.generate(merge(opts.tags, *docs)))
    except IOError as err:
        sys.stderr.write(
            parser.expand_prog_name(f'%prog: IO error: {err!s}\n'))
        sys.exit(2)
    except StructureError:
        sys.stderr.write(
            parser.expand_prog_name(
                '%prog: Structure error: A USFM file does not match the others\n'
            ))
        sys.exit(3)
Beispiel #5
0
    work = []
    first_def = -1
    if not opts.output:
        first_def = 0
    elif not os.path.isdir(opts.output):
        work.append((sfms[0], opts.output))
        first_def = 1
    else:
        work.extend(
            zip(sfms, (os.path.join(opts.output,
                                    os.path.split(x)[1]) for x in sfms)))
    if first_def > -1:
        work.extend(zip(sfms[first_def:],
                        (f"{x}_u" for x in sfms[first_def:])))

    try:
        with warnings.catch_warnings():
            warnings.simplefilter("always" if opts.warnings else "ignore",
                                  SyntaxWarning)
            for job in work:
                res = generate(transduce(job[0], opts))
                ofh = codecs.open(job[1], "w", "utf-8")
                ofh.write(res)
                ofh.close()

    except IOError as err:
        sys.stderr.write(
            parser.expand_prog_name(f'%prog: IO error: {err!s}\n'))
        sys.exit(2)