예제 #1
0
    def __eq__(self, o: object) -> bool:
        """Compares the HOCRNode to another object

        The problem with comparing HTML is that minor differences in markup
        still represent the same tree with the same elements. lxml has a
        utility meant to make output checking in doctests more readable
        by comparing the functional equivalency. Read here:
        https://lxml.de/lxmlhtml.html#running-html-doctests

        Though this isn't a doctest, this functionality is essentially what
        is needed to compare two nodes. The comparator lives in
        lxml.doctestcompare.LHTMLOutputChecker, which is used with the
        PARSE_HTML optionflag.

        The following is considered functionally equivalent by the output
        checker and will therefore evaluate as true:
        - Different order of attributes
        - Repeated spaces inside a tag
        - Whitespace between tags
        """
        if not isinstance(o, HOCRNode):
            return False

        checker = LHTMLOutputChecker()
        return checker.check_output(
            want=lxml.etree.tostring(self),
            got=lxml.etree.tostring(o),
            optionflags=PARSE_HTML,
        )
예제 #2
0
    def output_difference(self, example, got, optionflags):
        want = example.want
        if not want.strip():
            return LHTMLOutputChecker.output_difference(
                self, example, got, optionflags)

        # Dang, this isn't as easy to override as we might wish
        original = want

        for transformer in self.transformers:
            want = transformer(want)
            got = transformer(got)

        # temporarily hack example with normalized want:
        example.want = want
        result = LHTMLOutputChecker.output_difference(self, example, got,
                                                      optionflags)
        example.want = original

        # repeat lines with a diff, otherwise it's wading through mud
        difflines = [l for l in result.splitlines() if '(got:' in l]

        if difflines:
            result += '\nLines with differences:\n' + '\n'.join(difflines)

        return result
예제 #3
0
class HTML(object):
    """
    A class wrapping HTML for better comparison and nicer
    error reporting.
    """
    def __init__(self, text):
        self.text = text
        self.example = doctest.Example('', self.text)
        self.checker = LHTMLOutputChecker()
        self.flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
        self.print_diff = True

    def compare(self, other, expect_eq):
        if isinstance(other, HTML):
            text = other.text
        else:
            text = other
        eq = self.checker.check_output(self.text, text, self.flags)
        if self.print_diff and eq != expect_eq:
            print self.checker.output_difference(self.example, text, self.flags)
        # Only output diff once per HTML object.
        self.print_diff = False
        return eq

    def __eq__(self, other):
        return self.compare(other, True)

    def __ne__(self, other):
        return self.compare(other, False)

    def __str__(self):
        return str(self.text)

    def __unicode__(self):
        return unicode(self.text)
예제 #4
0
 def assertHTML(self, want, got):
     """Assert the want and the got are equal HTML strings.
     Uses lxml's LHTMLOutputChecker class, which handles minor differences in 
     HTML documents, like differences in whitespace that don't affect the 
     equality of the HTML."""
     if not isinstance(got, basestring):
         got = unicode(got)
     checker = LHTMLOutputChecker()
     try:
         self.assertTrue(checker.check_output(want, got, PARSE_HTML))
     except AssertionError:
         print "Wanted: %s" % want
         print "Got: %s" % got
         raise AssertionError
예제 #5
0
    def get(self, url):
        status_codes = []
        content = []
        full_url = '{}/a/{}/{}'.format(self.base_url, self.domain, url)
        content_type = None
        for i, s in enumerate(self.sessions):
            resp = s.get(full_url)
            status_codes.append(resp.status_code)
            content.append(resp.text)
            content_type = resp.headers.get('content-type')

        self.stdout('\n{}\n{}'.format(full_url, status_codes))
        if not len(set(status_codes)) == 1:
            self.print_diff(url, 'status_code', status_codes)

        if content[0] != content[1]:
            if content_type == 'application/json':
                diff = json_delta.diff(json.loads(content[0]),
                                       json.loads(content[1]),
                                       verbose=False)
                pprint(diff, indent='8')
            else:
                try:
                    _check_shared(content[0], content[1], LHTMLOutputChecker(),
                                  "html")
                except AssertionError as e:
                    self.stderr(str(e))
예제 #6
0
    def check_output(self, want, got, optionflags):
        if got == want:
            return True

        for transformer in self.transformers:
            want = transformer(want)
            got = transformer(got)

        return LHTMLOutputChecker.check_output(self, want, got, optionflags)
예제 #7
0
    def check_output(self, want, got, optionflags):
        if got == want:
            return True

        for transformer in self.transformers:
            want = transformer(want)
            got = transformer(got)

        return LHTMLOutputChecker.check_output(self, want, got, optionflags)
예제 #8
0
    def output_difference(self, example, got, optionflags):
        want = example.want
        if not want.strip():
            return LHTMLOutputChecker.output_difference(
                self, example, got, optionflags)

        # Dang, this isn't as easy to override as we might wish
        original = want

        for transformer in self.transformers:
            want = transformer(want)
            got = transformer(got)

        # temporarily hack example with normalized want:
        example.want = want
        result = LHTMLOutputChecker.output_difference(self, example, got,
                                                      optionflags)
        example.want = original

        return result
예제 #9
0
    def output_difference(self, example, got, optionflags):
        want = example.want
        if not want.strip():
            return LHTMLOutputChecker.output_difference(
                self, example, got, optionflags)

        # Dang, this isn't as easy to override as we might wish
        original = want

        for transformer in self.transformers:
            want = transformer(want)
            got = transformer(got)

        # temporarily hack example with normalized want:
        example.want = want
        result = LHTMLOutputChecker.output_difference(
            self, example, got, optionflags)
        example.want = original

        return result
예제 #10
0
class HTML(object):
    """
    A class wrapping HTML for better comparison and nicer
    error reporting.
    """
    def __init__(self, text):
        self.text = text
        self.example = doctest.Example('', self.text)
        self.checker = LHTMLOutputChecker()
        self.flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
        self.print_diff = True

    def compare(self, other, expect_eq):
        if isinstance(other, HTML):
            text = other.text
        else:
            text = other
        eq = self.checker.check_output(self.text, text, self.flags)
        if self.print_diff and eq != expect_eq:
            print(
                self.checker.output_difference(self.example, text, self.flags))
        # Only output diff once per HTML object.
        self.print_diff = False
        return eq

    def __eq__(self, other):
        return self.compare(other, True)

    def __ne__(self, other):
        return self.compare(other, False)

    def __str__(self):
        return str(self.text)

    def __unicode__(self):
        return str(self.text)
예제 #11
0
 def runTest(self):
     self.parse()
     if self.ignore:
         # We've marked this test to be ignored.
         return
     kw = {}
     for name in self.options:
         if name.startswith('-'):
             kw[name[1:]] = False
         else:
             kw[name] = True
     if kw.get('clean', True):
         transformed = Cleaner(**kw).clean_html(self.input)
     else:
         transformed = self.input
     assert self.expect is not None, ("No expected output in %s" %
                                      self.filename)
     checker = LHTMLOutputChecker()
     if not checker.check_output(self.expect, transformed, 0):
         result = checker.output_difference(DummyInput(want=self.expect),
                                            transformed, 0)
         #result += '\noptions: %s %r' % (', '.join(self.options), kw)
         #result += repr(transformed)
         raise Exception("\n" + result)
예제 #12
0
 def runTest(self):
     self.parse()
     if self.ignore:
         # We've marked this test to be ignored.
         return
     kw = {}
     for name in self.options:
         if name.startswith('-'):
             kw[name[1:]] = False
         else:
             kw[name] = True
     if kw.get('clean', True):
         transformed = Cleaner(**kw).clean_html(self.input)
     else:
         transformed = self.input
     assert self.expect is not None, (
         "No expected output in %s" % self.filename)
     checker = LHTMLOutputChecker()
     if not checker.check_output(self.expect, transformed, 0):
         result = checker.output_difference(
             DummyInput(want=self.expect), transformed, 0)
         #result += '\noptions: %s %r' % (', '.join(self.options), kw)
         #result += repr(transformed)
         raise Exception("\n"+result)
예제 #13
0
 def __init__(self, text):
     self.text = text
     self.example = doctest.Example('', self.text)
     self.checker = LHTMLOutputChecker()
     self.flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
     self.print_diff = True
예제 #14
0
파일: util.py 프로젝트: tstalka/commcare-hq
 def assertHtmlEqual(self, expected, actual, normalize=True):
     if normalize:
         expected = parse_normalize(expected, is_html=True)
         actual = parse_normalize(actual, is_html=True)
     _check_shared(expected, actual, LHTMLOutputChecker(), "html")
예제 #15
0
 def __init__(self, text):
     self.text = text
     self.example = doctest.Example('', self.text)
     self.checker = LHTMLOutputChecker()
     self.flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
     self.print_diff = True