def clean_dte_xml_file(input_file_path: str,
                       output_file_path: str) -> Iterable[bytes]:
    with open(input_file_path, mode='rb') as f:
        file_bytes = f.read()

    xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

    xml_doc_cleaned, modified = cl_sii.dte.parse.clean_dte_xml(
        xml_doc,
        set_missing_xmlns=True,
        remove_doc_personalizado=True,
    )

    # TODO: add exception with a nice message for the caller.
    cl_sii.dte.parse.validate_dte_xml(xml_doc_cleaned)

    with open(output_file_path, 'w+b') as f:
        xml_utils.write_xml_doc(xml_doc_cleaned, f)

    with open(output_file_path, mode='rb') as f:
        file_bytes_rewritten = f.read()

    # note: another way to compute the difference in a similar format is
    #   `diff -Naur $input_file_path $output_file_path`
    file_bytes_diff_gen = difflib.diff_bytes(
        dfunc=difflib.unified_diff,
        a=file_bytes.splitlines(),
        b=file_bytes_rewritten.splitlines())

    return file_bytes_diff_gen
Ejemplo n.º 2
0
    def get_file_diff(self, test_name, file1, file2, is_bytes=False):
        msg = ''
        diffOut = ''

        if is_bytes:
            expectedLines = open(file1, "rb").readlines()
            outputLines = open(file2, "rb").readlines()

            for line in difflib.diff_bytes(difflib.unified_diff, expectedLines, outputLines):
                diffOut += line
        else:
            expectedLines = open(file1).readlines()
            outputLines = open(file2).readlines()

            for line in difflib.unified_diff(expectedLines, outputLines):
                diffOut += line

        if len(diffOut) > 0:
            msg += ('FAIL -- Test %s has different ouput than expected' %
                    (test_name))

            msg += (wrap_message(''.join(expectedLines),
                                 test_name + ' Expected Lines'))
            msg += (wrap_message(''.join(outputLines),
                                 test_name + ' Output Lines'))

        return msg
Ejemplo n.º 3
0
def compareTwoBinaryFiles(flags, filepaths, filelines):
    exitCode = 0
    if hasattr(difflib, 'diff_bytes'):
        # python 3.5 or newer
        diffs = difflib.diff_bytes(difflib.unified_diff,
                                   filelines[0],
                                   filelines[1],
                                   filepaths[0].encode(),
                                   filepaths[1].encode(),
                                   n=flags.num_context_lines)
        diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
    else:
        # python 2.7
        if flags.unified_diff:
            func = difflib.unified_diff
        else:
            func = difflib.context_diff
        diffs = func(filelines[0],
                     filelines[1],
                     filepaths[0],
                     filepaths[1],
                     n=flags.num_context_lines)

    for diff in diffs:
        sys.stdout.write(to_string(diff))
        exitCode = 1
    return exitCode
Ejemplo n.º 4
0
def diffBinaryFiles(file1, file2):
    with open(file1, "rb") as f:
        content1 = f.read()
    with open(file2, "rb") as f:
        content2 = f.read()
    gen = difflib.diff_bytes(difflib.unified_diff, [content1], [content2])
    return not(list(gen))
Ejemplo n.º 5
0
def diffBinaryFiles(file1, file2):
    with open(file1, "rb") as f:
        content1 = f.read()
    with open(file2, "rb") as f:
        content2 = f.read()
    gen = difflib.diff_bytes(difflib.unified_diff, [content1], [content2])
    return not (list(gen))
Ejemplo n.º 6
0
def diffsize(lA, lB):
    if not lA:
        return len(strip_to_diff_parts(lB))
    if not lB:
        return len(strip_to_diff_parts(lA))
    lA = strip_to_diff_parts(lA)
    lB = strip_to_diff_parts(lB)
    diff = difflib.diff_bytes(difflib.unified_diff, lA, lB)
    return len(list(diff))
Ejemplo n.º 7
0
def diff_bytes(file1, file2, return_str=False):
    """
    Compare the bytes of two files.
    Simulates the output of GNU diff.
    """
    texts = []
    for f in [file1, file2]:
        with open(f, 'rb') as f:
            text = f.read()
        text = text.replace(b'\r\n', b'\n')  # Ignore line breaks for Windows
        texts += [text.split(b'\n')]
    text1, text2 = texts

    output = []
    new_part = True
    num = 0
    for line in difflib.diff_bytes(difflib.unified_diff,
                                   text1,
                                   text2,
                                   fromfile=file1.encode(),
                                   tofile=file2.encode(),
                                   n=0,
                                   lineterm=b''):
        num += 1
        if num < 3:
            line = line.decode()
            line = line.replace('--- ', '<<< ')
            line = line.replace('+++ ', '>>> ')
            output += [line.encode()]
            continue

        flag = line[0:1]
        if flag == b'-':  # line unique to sequence 1
            new_flag = b'< '
        elif flag == b'+':  # line unique to sequence 2
            new_flag = b'> '
            if new_part:
                new_part = False
                output += [b'---']
        elif flag == b' ':  # line common to both sequences
            # new_flag   = b'  '
            continue
        elif flag == b'?':  # line not present in either input sequence
            new_flag = b'? '
        elif flag == b'@':
            output += [re.sub(rb'@@ -([^ ]+) \+([^ ]+) @@', rb'\1c\2', line)]
            new_part = True
            continue
        else:
            new_flag = flag
        output += [new_flag + line[1:]]

    if return_str:
        return '\n'.join([repr(line)[2:-1] for line in output])
    else:
        return b'\n'.join(output)
Ejemplo n.º 8
0
def diff_score(a, b):
    la = a.splitlines(keepends=True)
    lb = b.splitlines(keepends=True)
    d = difflib.diff_bytes(difflib.context_diff, la, lb)
    sc = 0
    for ln in d:
        if ln.startswith(b'! ') or ln.startswith(b'+ ') or ln.startswith(
                b'- '):
            sc += 1
    return 1 - (sc / (len(la) + len(lb)))
Ejemplo n.º 9
0
def unidiff(a, b, filename_a=b'original', timestamp_a=b'',
            filename_b=b'modified', timestamp_b=b'', ignore_blanks=False):
    r"""Compare two sequences of lines; generate the resulting delta.

    Each sequence must contain individual single-line strings
    ending with newlines. Such sequences can be obtained from the
    `readlines()` method of file-like objects.  The delta
    generated also consists of newline-terminated strings, ready
    to be printed as-is via the writeline() method of a file-like
    object.

    Note that the last line of a file may *not* have a newline;
    this is reported in the same way that GNU diff reports this.
    *This method only supports UNIX line ending conventions.*

        filename_a and filename_b are used to generate the header,
        allowing other tools to determine what 'files' were used
        to generate this output.

        timestamp_a and timestamp_b, when supplied, are expected
        to be last-modified timestamps to be inserted in the
        header, as floating point values since the epoch.

    """
    if isinstance(a, six.binary_type):
        a = a.splitlines()

    if isinstance(b, six.binary_type):
        b = b.splitlines()

    if isinstance(filename_a, six.text_type):
        filename_a = filename_a.encode('utf-8')

    if isinstance(filename_b, six.text_type):
        filename_b = filename_b.encode('utf-8')

    if not isinstance(timestamp_a, six.binary_type):
        timestamp_a = six.text_type(timestamp_a).encode('utf-8')

    if not isinstance(timestamp_b, six.binary_type):
        timestamp_b = six.text_type(timestamp_b).encode('utf-8')

    if ignore_blanks:
        a = [x for x in a if not BLANKS_REGEX.match(x)]
        b = [x for x in b if not BLANKS_REGEX.match(x)]

    if six.PY2:
        return difflib.unified_diff(a, b, filename_a, filename_b, timestamp_a,
                                    timestamp_b, lineterm=b"")
    else:
        return difflib.diff_bytes(difflib.unified_diff, a, b, filename_a,
                                  filename_b, timestamp_a, timestamp_b,
                                  lineterm=b"")
Ejemplo n.º 10
0
    def test_byte_filenames(self):
        # somebody renamed a file from ISO-8859-2 to UTF-8
        fna = b'\xb3odz.txt'  # "łodz.txt"
        fnb = b'\xc5\x82odz.txt'

        # they transcoded the content at the same time
        a = [b'\xa3odz is a city in Poland.']
        b = [b'\xc5\x81odz is a city in Poland.']

        check = self.check
        unified = difflib.unified_diff
        context = difflib.context_diff
        check(difflib.diff_bytes(unified, a, b, fna, fnb))
        check(difflib.diff_bytes(context, a, b, fna, fnb))

        def assertDiff(expect, actual):
            # do not compare expect and equal as lists, because unittest
            # uses difflib to report difference between lists
            actual = list(actual)
            self.assertEqual(len(expect), len(actual))
            for e, a in zip(expect, actual):
                self.assertEqual(e, a)

        expect = [
            b'--- \xb3odz.txt',
            b'+++ \xc5\x82odz.txt',
            b'@@ -1 +1 @@',
            b'-\xa3odz is a city in Poland.',
            b'+\xc5\x81odz is a city in Poland.',
        ]
        actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
        assertDiff(expect, actual)

        # with dates (plain ASCII)
        datea = b'2005-03-18'
        dateb = b'2005-03-19'
        check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
        check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))

        expect = [
            # note the mixed encodings here: this is deeply wrong by every
            # tenet of Unicode, but it doesn't crash, it's parseable by
            # patch, and it's how UNIX(tm) diff behaves
            b'--- \xb3odz.txt\t2005-03-18',
            b'+++ \xc5\x82odz.txt\t2005-03-19',
            b'@@ -1 +1 @@',
            b'-\xa3odz is a city in Poland.',
            b'+\xc5\x81odz is a city in Poland.',
        ]
        actual = difflib.diff_bytes(unified,
                                    a,
                                    b,
                                    fna,
                                    fnb,
                                    datea,
                                    dateb,
                                    lineterm=b'')
        assertDiff(expect, actual)
Ejemplo n.º 11
0
    def test_clean_dte_xml_ok_3(self) -> None:
        file_bytes = self.dte_bad_xml_3_xml_bytes
        xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

        self.assertEqual(xml_doc.getroottree().getroot().tag, 'DTE')

        with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm:
            validate_dte_xml(xml_doc)
        self.assertSequenceEqual(cm.exception.args, (
            "Element 'DTE': No matching global declaration available for the validation root., "
            "line 2", ))

        xml_doc_cleaned, modified = clean_dte_xml(
            xml_doc,
            set_missing_xmlns=True,
            remove_doc_personalizado=True,
        )
        self.assertTrue(modified)

        # This will not raise.
        validate_dte_xml(xml_doc_cleaned)

        f = io.BytesIO()
        xml_utils.write_xml_doc(xml_doc_cleaned, f)
        file_bytes_rewritten = f.getvalue()
        del f

        xml_doc_rewritten = xml_utils.parse_untrusted_xml(file_bytes_rewritten)
        validate_dte_xml(xml_doc_rewritten)

        expected_file_bytes_diff = (
            b'--- \n',
            b'+++ \n',
            b'@@ -1,5 +1,5 @@\n',
            b'-<?xml version="1.0" encoding="windows-1252"?>',
            b'-<DTE version="1.0">',
            b"+<?xml version='1.0' encoding='WINDOWS-1252'?>",
            b'+<DTE xmlns="http://www.sii.cl/SiiDte" version="1.0">',
            b' <Documento ID="DTE-33-2336600">',
            b' <Encabezado>',
            b' <IdDoc>',
        )

        file_bytes_diff_gen = difflib.diff_bytes(
            dfunc=difflib.unified_diff,
            a=file_bytes.splitlines(),
            b=file_bytes_rewritten.splitlines())
        self.assertSequenceEqual(
            [diff_line for diff_line in file_bytes_diff_gen],
            expected_file_bytes_diff)
Ejemplo n.º 12
0
def compute_assignment(sA, dA, sB, dB):
    pmap = []
    la = len(sA)
    lb = len(sB)

    # Attempt to greedily assign an exact match with 0 weight (and
    # give the other choices for this commit a very large weight).
    # This speeds up the case where the patches are the same.
    eqA, eqB = split_away_same_patches(sA, dA, sB, dB)
    lhs1, rhs1 = compute_matching_assignment(
        [u for u, e in zip(sA, eqA) if e is None], dA,
        [v for v, e in zip(sB, eqB) if e is None], dB)
    imap = make_index_map(eqA, eqB)
    jmap = make_index_map(eqB, eqA)
    lhs = np.array(rebuild_match_list(eqA, lhs1, jmap))
    rhs = np.array(rebuild_match_list(eqB, rhs1, imap))

    # We assume the user is really more interested in the second
    # argument ("newer" version).  To that end, we print the output in
    # the order of the RHS.  To put the LHS commits that are no longer
    # in the RHS into a good place, we place them once we have seen
    # all of their predecessors in the LHS.
    new_on_lhs = (lhs >= lb)[:la]
    lhs_prior_counter = np.arange(la)

    def process_lhs_orphans():
        while True:
            assert (lhs_prior_counter >= 0).all()
            w = (lhs_prior_counter == 0) & new_on_lhs
            idx = w.nonzero()[0]
            if len(idx) == 0:
                break
            pmap.append((idx[0], None, None))
            new_on_lhs[idx[0]] = False
            lhs_prior_counter[idx[0] + 1:] -= 1

    for j, (u, i) in enumerate(zip(sB, rhs)):
        # now show an RHS commit
        process_lhs_orphans()
        if i < la:
            idiff = list(
                difflib.diff_bytes(difflib.unified_diff, dA[sA[i]], dB[u]))
            pmap.append((i, j, idiff))
            lhs_prior_counter[i + 1:] -= 1
        else:
            pmap.append((None, j, None))
    process_lhs_orphans()

    return pmap
Ejemplo n.º 13
0
def check_prune_result(expected):
    actual = sorted([int(x)
                     for x in exo([b'git', b'log',
                                   b'--pretty=format:%at']).out.splitlines()])

    if expected != actual:
        for x in expected:
            print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
                  file=stderr)
        for line in diff_bytes(unified_diff,
                               [result_diffline(x) for x in expected],
                               [result_diffline(x) for x in actual],
                               fromfile=b'expected', tofile=b'actual'):
            sys.stderr.flush()
            byte_stream(sys.stderr).write(line)
    wvpass(expected == actual)
Ejemplo n.º 14
0
def is_content_equal(c1: Dict[str, str], c2: Dict[str, str]) -> bool:
    if len(c1.keys()) != len(c1.keys()):
        print("number of contents is not same")
        print(c1.keys())
        print(c2.keys())
        return False
    for key in c1.keys():
        if key not in c2:
            print(f"file does not exist: {key}")
            return False
        if c1[key] != c2[key]:
            print(f"file is not equal: {key}")
            for diff in difflib.diff_bytes(difflib.unified_diff,
                                           c1[key].splitlines(),
                                           c2[key].splitlines()):
                print(diff)
            return False
    return True
Ejemplo n.º 15
0
    def test_byte_filenames(self):
        # somebody renamed a file from ISO-8859-2 to UTF-8
        fna = b'\xb3odz.txt'    # "łodz.txt"
        fnb = b'\xc5\x82odz.txt'

        # they transcoded the content at the same time
        a = [b'\xa3odz is a city in Poland.']
        b = [b'\xc5\x81odz is a city in Poland.']

        check = self.check
        unified = difflib.unified_diff
        context = difflib.context_diff
        check(difflib.diff_bytes(unified, a, b, fna, fnb))
        check(difflib.diff_bytes(context, a, b, fna, fnb))

        def assertDiff(expect, actual):
            # do not compare expect and equal as lists, because unittest
            # uses difflib to report difference between lists
            actual = list(actual)
            self.assertEqual(len(expect), len(actual))
            for e, a in zip(expect, actual):
                self.assertEqual(e, a)

        expect = [
            b'--- \xb3odz.txt',
            b'+++ \xc5\x82odz.txt',
            b'@@ -1 +1 @@',
            b'-\xa3odz is a city in Poland.',
            b'+\xc5\x81odz is a city in Poland.',
        ]
        actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
        assertDiff(expect, actual)

        # with dates (plain ASCII)
        datea = b'2005-03-18'
        dateb = b'2005-03-19'
        check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
        check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))

        expect = [
            # note the mixed encodings here: this is deeply wrong by every
            # tenet of Unicode, but it doesn't crash, it's parseable by
            # patch, and it's how UNIX(tm) diff behaves
            b'--- \xb3odz.txt\t2005-03-18',
            b'+++ \xc5\x82odz.txt\t2005-03-19',
            b'@@ -1 +1 @@',
            b'-\xa3odz is a city in Poland.',
            b'+\xc5\x81odz is a city in Poland.',
        ]
        actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
                                    lineterm=b'')
        assertDiff(expect, actual)
Ejemplo n.º 16
0
 def compare_to_formatted(self, filename_str: str) -> None:
     """Compare the expected formatted output to file contents."""
     # This string encode is from argparse, so we should be able to trust it.
     filename = filename_str.encode()
     actual = self.get_filelines(filename)
     expected = self.get_formatted_lines(filename)
     if self.edit_in_place:
         # If edit in place is used, the formatter will fix in place with
         # no stdout. So compare the before/after file for hook pass/fail
         expected = self.get_filelines(filename)
     diff = list(
         difflib.diff_bytes(difflib.unified_diff,
                            actual,
                            expected,
                            fromfile=b'original',
                            tofile=b'formatted'))
     if len(diff) > 0:
         header = filename + b"\n" + 20 * b"=" + b"\n"
         self.stderr += header + b"\n".join(diff) + b"\n"
         self.returncode = 1
Ejemplo n.º 17
0
 def __call__(
     self, id: str, from_version_at: str, to_version_at: str = None
 ) -> bytes:
     session = self.Session()
     document = session.documents.fetch(id)
     from_version = document.data(version_at=from_version_at).splitlines()
     if to_version_at:
         _to_version_at = {"version_at": to_version_at}
     else:
         _to_version_at = {}
     to_version = document.data(**_to_version_at).splitlines()
     diff = difflib.diff_bytes(
         difflib.unified_diff,
         from_version,
         to_version,
         fromfile=from_version_at.encode("utf-8"),
         tofile=to_version_at.encode("utf-8") if to_version_at else b"latest",
         lineterm=b"",
     )
     return b"\n".join(diff)
Ejemplo n.º 18
0
def _diff(logger, ln, l, rn, r):
    """Return the difference between two strings"""

    if l == r:
        # slightly faster path
        logger.debug("_diff '%s' and '%s' fast match", ln, rn)
        return []
    # compare
    diff = list(
        difflib.diff_bytes(difflib.unified_diff,
                           l.splitlines(),
                           r.splitlines(),
                           fromfile=ln.encode(),
                           tofile=rn.encode(),
                           lineterm=rb""))
    logger.debug("_diff: %s", diff)
    if not diff:
        # Always return a list.
        return []
    return diff
Ejemplo n.º 19
0
    def _ndiff_matches(self, olines, dlines):
        """ Uses difflib's ndiff to find matching lines
            in ancestor and alice or bob
            Args:
               olines - list of bytestrings of ancestor
               dlines - list of bytestrings of either alice or bob
            Returns:
               dictionary mapping matching line numbers in ancestor to other
        """
        on, dn = 0, 0
        matches = {}

        # See difflib.diff_bytes documentation
        # https://docs.python.org/3/library/difflib.html
        # Use this dfunc to allow ndiff to work on mixed or unknown encoded
        # byte strings
        def do_ndiff(alines, blines, fromfile, tofile, fromfiledate,
                     tofiledate, n, lineterm):
            return ndiff(alines, blines, linejunk=None, charjunk=None)

        for line in diff_bytes(do_ndiff,
                               olines,
                               dlines,
                               b'ancestor',
                               b'other',
                               b' ',
                               b' ',
                               n=-1,
                               lineterm=b'\n'):
            dt = line[0:2]
            if dt == b'  ':
                on += 1
                dn += 1
                matches[on] = dn
            elif dt == b'+ ':
                dn += 1
            elif dt == b'- ':
                on += 1
        return matches
Ejemplo n.º 20
0
    def test_byte_filenames(self):
        fna = b'\xb3odz.txt'
        fnb = b'\xc5\x82odz.txt'
        a = [b'\xa3odz is a city in Poland.']
        b = [b'\xc5\x81odz is a city in Poland.']
        check = self.check
        unified = difflib.unified_diff
        context = difflib.context_diff
        check(difflib.diff_bytes(unified, a, b, fna, fnb))
        check(difflib.diff_bytes(context, a, b, fna, fnb))

        def assertDiff(expect, actual):
            actual = list(actual)
            self.assertEqual(len(expect), len(actual))
            for e, a in zip(expect, actual):
                self.assertEqual(e, a)

        expect = [
            b'--- \xb3odz.txt', b'+++ \xc5\x82odz.txt', b'@@ -1 +1 @@',
            b'-\xa3odz is a city in Poland.',
            b'+\xc5\x81odz is a city in Poland.'
        ]
        actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
        assertDiff(expect, actual)
        datea = b'2005-03-18'
        dateb = b'2005-03-19'
        check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
        check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
        expect = [
            b'--- \xb3odz.txt\t2005-03-18', b'+++ \xc5\x82odz.txt\t2005-03-19',
            b'@@ -1 +1 @@', b'-\xa3odz is a city in Poland.',
            b'+\xc5\x81odz is a city in Poland.'
        ]
        actual = difflib.diff_bytes(unified,
                                    a,
                                    b,
                                    fna,
                                    fnb,
                                    datea,
                                    dateb,
                                    lineterm=b'')
        assertDiff(expect, actual)
Ejemplo n.º 21
0
    def test_byte_content(self):
        # if we receive byte strings, we return byte strings
        a = [b'hello', b'andr\xe9']  # iso-8859-1 bytes
        b = [b'hello', b'andr\xc3\xa9']  # utf-8 bytes

        unified = difflib.unified_diff
        context = difflib.context_diff

        check = self.check
        check(difflib.diff_bytes(unified, a, a))
        check(difflib.diff_bytes(unified, a, b))

        # now with filenames (content and filenames are all bytes!)
        check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
        check(difflib.diff_bytes(unified, a, b, b'a', b'b'))

        # and with filenames and dates
        check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
        check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))

        # same all over again, with context diff
        check(difflib.diff_bytes(context, a, a))
        check(difflib.diff_bytes(context, a, b))
        check(difflib.diff_bytes(context, a, a, b'a', b'a'))
        check(difflib.diff_bytes(context, a, b, b'a', b'b'))
        check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
        check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
Ejemplo n.º 22
0
    def run(self):
        self.detail(color(self.name, Color.UNDERLINE))
        test_input = self.read_file(self.input_file)
        self.detail(color('Command:', Color.BOLD) + ' ' + ' '.join(self.cmd))
        stdout, stderr, process = self.run_cmd(test_input)
        stdout = self.convert_output(stdout)
        stderr = self.convert_output(stderr)

        stdout_match = False
        stderr_match = False

        if path.isfile(self.stdout_file):
            expected_stdout = self.convert_output(
                self.read_file(self.stdout_file))
            if expected_stdout == stdout:
                stdout_match = True
            else:
                if self.kwargs.get("diff_mode", False):
                    self.detail(color('STDOUT:', Color.YELLOW))
                    for line in diff_bytes(unified_diff,
                                           expected_stdout.split(b'\n'),
                                           stdout.split(b'\n'),
                                           fromfile=b'Expected STDOUT',
                                           tofile=b'Received STDOUT'):
                        print(line)
                else:
                    self.detail(color('Received STDOUT:', Color.YELLOW))
                    self.detail(stdout.decode())
                    self.detail(color('Expected STDOUT:', Color.YELLOW))
                    self.detail(expected_stdout.decode())
        elif len(stdout) > 0:
            self.detail(color('Received STDOUT:', Color.YELLOW))
            self.detail(stdout.decode())
            self.detail(
                color('Missing STDOUT file: %s' % self.stdout_file,
                      Color.YELLOW))
        else:
            stdout_match = True

        if path.isfile(self.stderr_file):
            expected_stderr = self.convert_output(
                self.read_file(self.stderr_file))
            if expected_stderr == stderr:
                stderr_match = True
            else:
                if self.kwargs.get("diff_mode", False):
                    self.detail(color('STDERR:', Color.YELLOW))
                    for line in diff_bytes(unified_diff,
                                           expected_stderr.split(b'\n'),
                                           stderr.split(b'\n'),
                                           fromfile=b'Expected STDERR',
                                           tofile=b'Received STDERR'):
                        print(line)
                else:
                    self.detail(color('Received STDERR:', Color.YELLOW))
                    self.detail(stderr.decode())
                    self.detail(color('Expected STDERR:', Color.YELLOW))
                    self.detail(expected_stderr.decode())
        elif len(stderr) > 0:
            self.detail(color('Received STDERR:', Color.YELLOW))
            self.detail(stderr.decode())
            self.detail(
                color('Missing STDERR file: %s' % self.stderr_file,
                      Color.YELLOW))
        else:
            stderr_match = True

        self.success = stdout_match and stderr_match

        if self.success:
            self.detail(color('Success', Color.GREEN))
        else:
            self.detail(color('Failure', Color.RED))
Ejemplo n.º 23
0
 def test_byte_content(self):
     a = [b'hello', b'andr\xe9']
     b = [b'hello', b'andr\xc3\xa9']
     unified = difflib.unified_diff
     context = difflib.context_diff
     check = self.check
     check(difflib.diff_bytes(unified, a, a))
     check(difflib.diff_bytes(unified, a, b))
     check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
     check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
     check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
     check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
     check(difflib.diff_bytes(context, a, a))
     check(difflib.diff_bytes(context, a, b))
     check(difflib.diff_bytes(context, a, a, b'a', b'a'))
     check(difflib.diff_bytes(context, a, b, b'a', b'b'))
     check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
     check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
Ejemplo n.º 24
0
    def test_byte_content(self):
        # if we receive byte strings, we return byte strings
        a = [b'hello', b'andr\xe9']     # iso-8859-1 bytes
        b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes

        unified = difflib.unified_diff
        context = difflib.context_diff

        check = self.check
        check(difflib.diff_bytes(unified, a, a))
        check(difflib.diff_bytes(unified, a, b))

        # now with filenames (content and filenames are all bytes!)
        check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
        check(difflib.diff_bytes(unified, a, b, b'a', b'b'))

        # and with filenames and dates
        check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
        check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))

        # same all over again, with context diff
        check(difflib.diff_bytes(context, a, a))
        check(difflib.diff_bytes(context, a, b))
        check(difflib.diff_bytes(context, a, a, b'a', b'a'))
        check(difflib.diff_bytes(context, a, b, b'a', b'b'))
        check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
        check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
Ejemplo n.º 25
0
    def test_clean_dte_xml_ok_2(self) -> None:
        file_bytes = self.dte_bad_xml_2_xml_bytes
        xml_doc = xml_utils.parse_untrusted_xml(file_bytes)

        self.assertEqual(
            xml_doc.getroottree().getroot().tag,
            'DTE')

        with self.assertRaises(xml_utils.XmlSchemaDocValidationError) as cm:
            validate_dte_xml(xml_doc)
        self.assertSequenceEqual(
            cm.exception.args,
            ("Element 'DTE': No matching global declaration available for the validation root., "
             "line 2", )
        )

        xml_doc_cleaned, modified = clean_dte_xml(
            xml_doc,
            set_missing_xmlns=True,
            remove_doc_personalizado=True,
        )
        self.assertTrue(modified)

        # This will not raise.
        validate_dte_xml(xml_doc_cleaned)

        f = io.BytesIO()
        xml_utils.write_xml_doc(xml_doc_cleaned, f)
        file_bytes_rewritten = f.getvalue()
        del f

        xml_doc_rewritten = xml_utils.parse_untrusted_xml(file_bytes_rewritten)
        validate_dte_xml(xml_doc_rewritten)

        expected_file_bytes_diff = (
            b'--- \n',
            b'+++ \n',
            b'@@ -1,5 +1,5 @@\n',
            b'-<?xml version="1.0" encoding="ISO-8859-1"?>',
            b'-<DTE version="1.0">',
            b"+<?xml version='1.0' encoding='ISO-8859-1'?>",
            b'+<DTE xmlns="http://www.sii.cl/SiiDte" version="1.0">',
            b'   <!-- O Win32 Chrome 73 central VERSION: v20190227 -->',
            b' <Documento ID="MiPE76399752-6048">',
            b'     <Encabezado>',
            b'@@ -64,13 +64,13 @@\n',
            b'   </Documento>',
            b' <Signature xmlns="http://www.w3.org/2000/09/xmldsig#">',
            b' <SignedInfo>',
            b'-<CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315" />',  # noqa: E501
            b'-<SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1" />',
            b'+<CanonicalizationMethod Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>',  # noqa: E501
            b'+<SignatureMethod Algorithm="http://www.w3.org/2000/09/xmldsig#rsa-sha1"/>',
            b' <Reference URI="#MiPE76399752-6048">',
            b' <Transforms>',
            b'-<Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315" />',
            b'+<Transform Algorithm="http://www.w3.org/TR/2001/REC-xml-c14n-20010315"/>',
            b' </Transforms>',
            b'-<DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1" />',
            b'+<DigestMethod Algorithm="http://www.w3.org/2000/09/xmldsig#sha1"/>',
            b' <DigestValue>tk/D3mfO/KtdWyFXYZHe7dtYijg=</DigestValue>',
            b' </Reference>',
            b' </SignedInfo>',
        )

        file_bytes_diff_gen = difflib.diff_bytes(
            dfunc=difflib.unified_diff,
            a=file_bytes.splitlines(),
            b=file_bytes_rewritten.splitlines())
        self.assertSequenceEqual(
            [diff_line for diff_line in file_bytes_diff_gen],
            expected_file_bytes_diff
        )
Ejemplo n.º 26
0
    # Get the program input. If NAME.input exists, read it.
    # Otherwise, there is no input.
    if os.path.exists(input_path):
        with open(input_path, 'rb') as f:
            input_text = f.read()
    else:
        input_text = b''

    # Invoke the test program and collect the merged stdout and stderr.
    from subprocess import check_call, Popen, PIPE, STDOUT
    p = Popen([exe_path], stdin=PIPE, stdout=PIPE, stderr=STDOUT)
    stdout, _ = p.communicate(input_text)
    actual = stdout.splitlines(1)

    # Read the name.TEST file to get the expected output.
    with open(test_path, 'rb') as f:
        expected = f.readlines()

    if actual != expected:
        # Actual output differs from the expected output.
        # Format a context diff. The program output is a list of byte strings,
        # so the expected output is also read as byte strings.
        # The diff is then performed on byte strings, and
        # the result is converted to Unicode using what I hope
        # is a lossless conversion.
        import difflib
        for line in difflib.diff_bytes(difflib.context_diff, expected, actual,
                                       b'Expected', b'Actual'):
            sys.stdout.write(line.decode('latin1'))
        sys.exit(1)
Ejemplo n.º 27
0
def check_reliability(service, output_dir, timeout):
    """
    Checks the reliability of a URL, and appends the result to a CSV file.

    Parameters:
          service(Service) : The `Service` instance to check.
          output_path(str): The path of the CSV file to append the results to.
          timeout(float): The timeout in seconds for the GET requests - if `None`,
            defaults to `DEFAULT_CHECK_INTERVAL`.
    """

    info(f"Checking {service.url}")
    ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
    db = ReliabilityDB(output_dir)
    checksum = None
    note = None
    diff_impossible = False
    try:

        with requests.get(service.url,
                          timeout=timeout,
                          stream=True,
                          allow_redirects=True) as r:
            # Best effort file name
            header_file_name = get_filename(
                r.headers.get("content-disposition"))
            last_segment = service.url.split("/")[-1]
            if header_file_name is not None:
                file_name = header_file_name
            elif ("&" not in last_segment and "?" not in last_segment
                  and (last_segment.lower().endswith(".gml")
                       or last_segment.lower().endswith(".zip"))):
                file_name = last_segment
            else:
                file_name = "download"

            content = r.content

            # Accept single file ZIP responses
            if last_segment.endswith("zip"):
                with zipfile.ZipFile(io.BytesIO(content)) as z:
                    files_info = z.infolist()
                    if len(files_info) == 1:
                        first_info = files_info[0]
                        with z.open(first_info) as f:
                            file_name = first_info.filename
                            content = f.read()
                    else:
                        diff_impossible = True
                        note = (
                            "Could not perform diff: response is multi-file ZIP."
                        )
                        if not file_name.lower().endswith("zip"):
                            file_name += ".zip"

            if not diff_impossible:
                # Attempt pretty-formatting to reduce diffs for compacted XML
                try:
                    doc = etree.parse(io.BytesIO(content))
                    content = etree.tostring(doc,
                                             encoding="utf8",
                                             pretty_print=True)
                except etree.ParseError:
                    diff_impossible = True
                    note = "Could not perform diff: invalid XML."

            checksum = hashlib.md5(content).hexdigest()
            db.add_check(ts,
                         checksum=checksum,
                         status=r.status_code,
                         note=note)

            if checksum != db.latest_checksum:
                download_dir = output_dir / ts
                download_dir.mkdir()
                with open(download_dir / file_name, "wb") as f:
                    f.write(content)
                if db.latest_changed_ts is not None:
                    if diff_impossible:
                        diff_msg = note
                    else:
                        new_lines = content.splitlines()
                        with open(
                                output_dir / db.latest_changed_ts /
                                db.latest_changed_file_name, "rb") as f:
                            previous_lines = f.read().splitlines()
                        diff = difflib.diff_bytes(
                            difflib.unified_diff,
                            previous_lines,
                            new_lines,
                            db.latest_changed_ts.encode("utf-8"),
                            ts.encode("utf-8"),
                        )
                        diff_msg = [l for l in diff]

                    with open(download_dir / "diff", "wb") as f:
                        f.writelines(b"%b\n" % l for l in diff_msg)

                db.latest_changed_ts = ts
                db.latest_changed_file_name = file_name

            db.latest_checksum = checksum

    except requests.exceptions.Timeout:
        db.add_check(ts, timeout=True, note=note)
    except (requests.exceptions.ConnectionError,
            requests.exceptions.ChunkedEncodingError):
        db.add_check(ts, conn_error=True, note=note)
    except zipfile.BadZipFile:
        db.add_check(ts, content_error=True, note="Bad Zip file")
Ejemplo n.º 28
0
 def update_event(self, inp=-1):
     self.set_output_val(0, difflib.diff_bytes(self.input(0), self.input(1), self.input(2), self.input(3), self.input(4), self.input(5), self.input(6), self.input(7), self.input(8)))
difflib.unified_diff(a, b, fromfile="", tofile="",
    fromfiledate="", tofiledate="", n=3, lineterm="\n")
    Compare a and b (list of strings); return a delta (a generator
    generating the delta lines) in unified diff format.
    
    Unified diffs are a compact way of showing just the lines that have
    changed plus a few lines of context. The changes are shown in an
    inline style. The number of context lines is set by n which defaults
    to three
"""
s1 = ["bacon\n", "eggs\n", "ham\n", "guido\n"]
s2 = ["python\n", "eggy\n", "hamster\n", "guido\n"]

sys.stdout.writelines(
    difflib.unified_diff(s1, s2, fromfile="before.py", tofile="after.py"))
"""
difflib.diff_bytes(dfunc, a, b, fromfile=b"", tofile=b"",
fromfiledate=b"", tofiledate=b"", n=3, lineterm=b"\n")
    Compare a and b (list of bytes objects) using dfunct; yield a 
    sequence of delta lines (also bytes) in the format returned by
    dfunc. dunc must be a callable, typically either unified_diff() or
    context_diff()
    
    Allows you to compare data with unknown or inconsistent encoding.
    All inputs except n must be bytes objects, not str.

difflib.IS_LINE_JUNK(line)
    Returns True for ignorable lines. The line line is ignorable if line
    is blank or contains a single "#", otherwise it is not ignorable. 
    Used as default for parameter linejunk in ndiff() in older versions
Ejemplo n.º 30
0
 def do_diff(self, ref_lines, test_lines, ref_file, test_file):
     return difflib.diff_bytes(difflib.unified_diff,
                               ref_lines,
                               test_lines,
                               fromfile=ref_file.encode('utf-8'),
                               tofile=test_file.encode('utf-8'))
Ejemplo n.º 31
0
def unidiff(a,
            b,
            filename_a=b'original',
            timestamp_a=b'',
            filename_b=b'modified',
            timestamp_b=b'',
            ignore_blanks=False):
    r"""Compare two sequences of lines; generate the resulting delta.

    Each sequence must contain individual single-line strings
    ending with newlines. Such sequences can be obtained from the
    `readlines()` method of file-like objects.  The delta
    generated also consists of newline-terminated strings, ready
    to be printed as-is via the writeline() method of a file-like
    object.

    Note that the last line of a file may *not* have a newline;
    this is reported in the same way that GNU diff reports this.
    *This method only supports UNIX line ending conventions.*

        filename_a and filename_b are used to generate the header,
        allowing other tools to determine what 'files' were used
        to generate this output.

        timestamp_a and timestamp_b, when supplied, are expected
        to be last-modified timestamps to be inserted in the
        header, as floating point values since the epoch.

    """
    if isinstance(a, six.binary_type):
        a = a.splitlines()

    if isinstance(b, six.binary_type):
        b = b.splitlines()

    if isinstance(filename_a, six.text_type):
        filename_a = filename_a.encode('utf-8')

    if isinstance(filename_b, six.text_type):
        filename_b = filename_b.encode('utf-8')

    if not isinstance(timestamp_a, six.binary_type):
        timestamp_a = six.text_type(timestamp_a).encode('utf-8')

    if not isinstance(timestamp_b, six.binary_type):
        timestamp_b = six.text_type(timestamp_b).encode('utf-8')

    if ignore_blanks:
        a = [x for x in a if not BLANKS_REGEX.match(x)]
        b = [x for x in b if not BLANKS_REGEX.match(x)]

    if six.PY2:
        return difflib.unified_diff(a,
                                    b,
                                    filename_a,
                                    filename_b,
                                    timestamp_a,
                                    timestamp_b,
                                    lineterm=b"")
    else:
        return difflib.diff_bytes(difflib.unified_diff,
                                  a,
                                  b,
                                  filename_a,
                                  filename_b,
                                  timestamp_a,
                                  timestamp_b,
                                  lineterm=b"")