def test_special_cases(self): """some special cases tests""" parsed = list(parse_html("<meta http-equiv='Pragma' content='no-cache' />")) self.assertEqual(parsed[0].attributes, {'content': 'no-cache', 'http-equiv': 'Pragma'}) parsed = list(parse_html("<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>")) self.assertEqual(parsed[0].attributes, {'xmlns': 'http://www.w3.org/1999/xhtml', 'xml:lang': 'en', 'lang': 'en'}) parsed = list(parse_html("<IMG SRC='http://images.play.com/banners/SAM550a.jpg' align='left' / hspace=5>")) self.assertEqual(parsed[0].attributes, {'src': 'http://images.play.com/banners/SAM550a.jpg', \ 'align': 'left', 'hspace': '5', '/': None})
def _test_sample(self, source, expected_parsed, samplecount=None): parsed = parse_html(source) count_element = 0 count_expected = 0 for element in parsed: if type(element) == HtmlTag: count_element += 1 expected = expected_parsed.pop(0) if type(expected) == HtmlTag: count_expected += 1 element_text = source[element.start:element.end] expected_text = source[expected.start:expected.end] if element.start != expected.start or element.end != expected.end: errstring = "[%s,%s] %s != [%s,%s] %s" % (element.start, \ element.end, element_text, expected.start, \ expected.end, expected_text) if samplecount is not None: errstring += " (sample %d)" % samplecount assert False, errstring if type(element) != type(expected): errstring = "(%s) %s != (%s) %s for text\n%s" % (count_element, \ repr(type(element)), count_expected, repr(type(expected)), element_text) if samplecount is not None: errstring += " (sample %d)" % samplecount assert False, errstring if type(element) == HtmlTag: self.assertEqual(element.tag, expected.tag) self.assertEqual(element.attributes, expected.attributes) self.assertEqual(element.tag_type, expected.tag_type) if expected_parsed: errstring = "Expected %s" % repr(expected_parsed) if samplecount is not None: errstring += " (sample %d)" % samplecount assert False, errstring
def add_sample(source): """ Method for adding samples to test samples file (use from console) """ count = 0 while os.path.exists("%s_%d.json" % (SAMPLES_FILE_PREFIX, count)): count += 1 open("%s_%d.html" % (SAMPLES_FILE_PREFIX, count), "wb").write(unicode_to_str(source)) parsed = list(parse_html(source)) open("%s_%d.json" % (SAMPLES_FILE_PREFIX, count), "wb")\ .write(json.dumps(parsed, default=_encode_element, indent=8))
def add_sample(source): """ Method for adding samples to test samples file (use from console) """ samples = [] if os.path.exists(SAMPLES_FILE): for line in GzipFile(os.path.join(path, SAMPLES_FILE), "r").readlines(): samples.append(json.loads(line)) new_sample = {"source": source} new_sample["parsed"] = list(parse_html(source)) samples.append(new_sample) samples_file = GzipFile(os.path.join(path, SAMPLES_FILE), "wb") for sample in samples: samples_file.write(json.dumps(sample, default=_encode_element) + "\n") samples_file.close()
def _test_sample(self, sample): source = sample["source"] expected_parsed = sample["parsed"] parsed = parse_html(source) count_element = 0 count_expected = 0 for element in parsed: if type(element) == HtmlTag: count_element += 1 expected = expected_parsed.pop(0) if type(expected) == HtmlTag: count_expected += 1 element_text = source[element.start:element.end] expected_text = source[expected.start:expected.end] if element.start != expected.start or element.end != expected.end: assert False, "[%s,%s] %s != [%s,%s] %s" % (element.start, \ element.end, element_text, expected.start, \ expected.end, expected_text) if type(element) != type(expected): assert False, "(%s) %s != (%s) %s for text\n%s" % (count_element, \ repr(type(element)), count_expected, repr(type(expected)), element_text) if type(element) == HtmlTag: self.assertEqual(element.tag, expected.tag) self.assertEqual(element.attributes, expected.attributes)