Esempio n. 1
0
    def get(self, url):
        status_codes = []
        content = []
        full_url = '{}/a/{}/{}'.format(self.base_url, self.domain, url)
        content_type = None
        for i, s in enumerate(self.sessions):
            resp = s.get(full_url)
            status_codes.append(resp.status_code)
            content.append(resp.text)
            content_type = resp.headers.get('content-type')

        self.stdout('\n{}\n{}'.format(full_url, status_codes))
        if not len(set(status_codes)) == 1:
            self.print_diff(url, 'status_code', status_codes)

        if content[0] != content[1]:
            if content_type == 'application/json':
                diff = json_delta.diff(json.loads(content[0]),
                                       json.loads(content[1]),
                                       verbose=False)
                pprint(diff, indent='8')
            else:
                try:
                    _check_shared(content[0], content[1], LHTMLOutputChecker(),
                                  "html")
                except AssertionError as e:
                    self.stderr(str(e))
Esempio n. 2
0
    def __eq__(self, o: object) -> bool:
        """Compares the HOCRNode to another object

        The problem with comparing HTML is that minor differences in markup
        still represent the same tree with the same elements. lxml has a
        utility meant to make output checking in doctests more readable
        by comparing the functional equivalency. Read here:
        https://lxml.de/lxmlhtml.html#running-html-doctests

        Though this isn't a doctest, this functionality is essentially what
        is needed to compare two nodes. The comparator lives in
        lxml.doctestcompare.LHTMLOutputChecker, which is used with the
        PARSE_HTML optionflag.

        The following is considered functionally equivalent by the output
        checker and will therefore evaluate as true:
        - Different order of attributes
        - Repeated spaces inside a tag
        - Whitespace between tags
        """
        if not isinstance(o, HOCRNode):
            return False

        checker = LHTMLOutputChecker()
        return checker.check_output(
            want=lxml.etree.tostring(self),
            got=lxml.etree.tostring(o),
            optionflags=PARSE_HTML,
        )
Esempio n. 3
0
 def runTest(self):
     self.parse()
     if self.ignore:
         # We've marked this test to be ignored.
         return
     kw = {}
     for name in self.options:
         if name.startswith('-'):
             kw[name[1:]] = False
         else:
             kw[name] = True
     if kw.get('clean', True):
         transformed = Cleaner(**kw).clean_html(self.input)
     else:
         transformed = self.input
     assert self.expect is not None, ("No expected output in %s" %
                                      self.filename)
     checker = LHTMLOutputChecker()
     if not checker.check_output(self.expect, transformed, 0):
         result = checker.output_difference(DummyInput(want=self.expect),
                                            transformed, 0)
         #result += '\noptions: %s %r' % (', '.join(self.options), kw)
         #result += repr(transformed)
         raise Exception("\n" + result)
Esempio n. 4
0
 def __init__(self, text):
     self.text = text
     self.example = doctest.Example('', self.text)
     self.checker = LHTMLOutputChecker()
     self.flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS
     self.print_diff = True
Esempio n. 5
0
 def assertHtmlEqual(self, expected, actual, normalize=True):
     if normalize:
         expected = parse_normalize(expected, is_html=True)
         actual = parse_normalize(actual, is_html=True)
     _check_shared(expected, actual, LHTMLOutputChecker(), "html")