def get(self, url): status_codes = [] content = [] full_url = '{}/a/{}/{}'.format(self.base_url, self.domain, url) content_type = None for i, s in enumerate(self.sessions): resp = s.get(full_url) status_codes.append(resp.status_code) content.append(resp.text) content_type = resp.headers.get('content-type') self.stdout('\n{}\n{}'.format(full_url, status_codes)) if not len(set(status_codes)) == 1: self.print_diff(url, 'status_code', status_codes) if content[0] != content[1]: if content_type == 'application/json': diff = json_delta.diff(json.loads(content[0]), json.loads(content[1]), verbose=False) pprint(diff, indent='8') else: try: _check_shared(content[0], content[1], LHTMLOutputChecker(), "html") except AssertionError as e: self.stderr(str(e))
def __eq__(self, o: object) -> bool: """Compares the HOCRNode to another object The problem with comparing HTML is that minor differences in markup still represent the same tree with the same elements. lxml has a utility meant to make output checking in doctests more readable by comparing the functional equivalency. Read here: https://lxml.de/lxmlhtml.html#running-html-doctests Though this isn't a doctest, this functionality is essentially what is needed to compare two nodes. The comparator lives in lxml.doctestcompare.LHTMLOutputChecker, which is used with the PARSE_HTML optionflag. The following is considered functionally equivalent by the output checker and will therefore evaluate as true: - Different order of attributes - Repeated spaces inside a tag - Whitespace between tags """ if not isinstance(o, HOCRNode): return False checker = LHTMLOutputChecker() return checker.check_output( want=lxml.etree.tostring(self), got=lxml.etree.tostring(o), optionflags=PARSE_HTML, )
def runTest(self): self.parse() if self.ignore: # We've marked this test to be ignored. return kw = {} for name in self.options: if name.startswith('-'): kw[name[1:]] = False else: kw[name] = True if kw.get('clean', True): transformed = Cleaner(**kw).clean_html(self.input) else: transformed = self.input assert self.expect is not None, ("No expected output in %s" % self.filename) checker = LHTMLOutputChecker() if not checker.check_output(self.expect, transformed, 0): result = checker.output_difference(DummyInput(want=self.expect), transformed, 0) #result += '\noptions: %s %r' % (', '.join(self.options), kw) #result += repr(transformed) raise Exception("\n" + result)
def __init__(self, text): self.text = text self.example = doctest.Example('', self.text) self.checker = LHTMLOutputChecker() self.flags = doctest.NORMALIZE_WHITESPACE | doctest.ELLIPSIS self.print_diff = True
def assertHtmlEqual(self, expected, actual, normalize=True): if normalize: expected = parse_normalize(expected, is_html=True) actual = parse_normalize(actual, is_html=True) _check_shared(expected, actual, LHTMLOutputChecker(), "html")