コード例 #1
0
 def test_special_cases(self):
     """some special cases tests"""
     parsed = list(parse_html("<meta http-equiv='Pragma' content='no-cache' />"))
     self.assertEqual(parsed[0].attributes, {'content': 'no-cache', 'http-equiv': 'Pragma'})
     parsed = list(parse_html("<html xmlns='http://www.w3.org/1999/xhtml' xml:lang='en' lang='en'>"))
     self.assertEqual(parsed[0].attributes, {'xmlns': 'http://www.w3.org/1999/xhtml', 'xml:lang': 'en', 'lang': 'en'})
     parsed = list(parse_html("<IMG SRC='http://images.play.com/banners/SAM550a.jpg' align='left' / hspace=5>"))
     self.assertEqual(parsed[0].attributes, {'src': 'http://images.play.com/banners/SAM550a.jpg', \
                                             'align': 'left', 'hspace': '5', '/': None})
コード例 #2
0
 def _test_sample(self, source, expected_parsed, samplecount=None):
     parsed = parse_html(source)
     count_element = 0
     count_expected = 0
     for element in parsed:
         if type(element) == HtmlTag:
             count_element += 1
         expected = expected_parsed.pop(0)
         if type(expected) == HtmlTag:
             count_expected += 1
         element_text = source[element.start:element.end]
         expected_text = source[expected.start:expected.end]
         if element.start != expected.start or element.end != expected.end:
             errstring = "[%s,%s] %s != [%s,%s] %s" % (element.start, \
                 element.end, element_text, expected.start, \
                 expected.end, expected_text)
             if samplecount is not None:
                 errstring += " (sample %d)" % samplecount
             assert False, errstring
         if type(element) != type(expected):
             errstring = "(%s) %s != (%s) %s for text\n%s" % (count_element, \
                 repr(type(element)), count_expected, repr(type(expected)), element_text)
             if samplecount is not None:
                 errstring += " (sample %d)" % samplecount
             assert False, errstring
         if type(element) == HtmlTag:
             self.assertEqual(element.tag, expected.tag)
             self.assertEqual(element.attributes, expected.attributes)
             self.assertEqual(element.tag_type, expected.tag_type)
     if expected_parsed:
         errstring = "Expected %s" % repr(expected_parsed)
         if samplecount is not None:
             errstring += " (sample %d)" % samplecount
         assert False, errstring
コード例 #3
0
def add_sample(source):
    """
    Method for adding samples to test samples file
    (use from console)
    """
    count = 0
    while os.path.exists("%s_%d.json" % (SAMPLES_FILE_PREFIX, count)):
        count += 1
    
    open("%s_%d.html" % (SAMPLES_FILE_PREFIX, count), "wb").write(unicode_to_str(source))
    parsed = list(parse_html(source))
    open("%s_%d.json" % (SAMPLES_FILE_PREFIX, count), "wb")\
        .write(json.dumps(parsed, default=_encode_element, indent=8))
コード例 #4
0
ファイル: test_htmlpage.py プロジェクト: kenzouyeh/scrapy
def add_sample(source):
    """
    Method for adding samples to test samples file
    (use from console)
    """
    samples = []
    if os.path.exists(SAMPLES_FILE):
        for line in GzipFile(os.path.join(path, SAMPLES_FILE), "r").readlines():
            samples.append(json.loads(line))
    
    new_sample = {"source": source}
    new_sample["parsed"] = list(parse_html(source))
    samples.append(new_sample)
    samples_file = GzipFile(os.path.join(path, SAMPLES_FILE), "wb")
    for sample in samples:
        samples_file.write(json.dumps(sample, default=_encode_element) + "\n")
    samples_file.close()
コード例 #5
0
ファイル: test_htmlpage.py プロジェクト: kenzouyeh/scrapy
 def _test_sample(self, sample):
     source = sample["source"]
     expected_parsed = sample["parsed"]
     parsed = parse_html(source)
     count_element = 0
     count_expected = 0
     for element in parsed:
         if type(element) == HtmlTag:
             count_element += 1
         expected = expected_parsed.pop(0)
         if type(expected) == HtmlTag:
             count_expected += 1
         element_text = source[element.start:element.end]
         expected_text = source[expected.start:expected.end]
         if element.start != expected.start or element.end != expected.end:
             assert False, "[%s,%s] %s != [%s,%s] %s" % (element.start, \
                 element.end, element_text, expected.start, \
                 expected.end, expected_text)
         if type(element) != type(expected):
             assert False, "(%s) %s != (%s) %s for text\n%s" % (count_element, \
                 repr(type(element)), count_expected, repr(type(expected)), element_text)
         if type(element) == HtmlTag:
             self.assertEqual(element.tag, expected.tag)
             self.assertEqual(element.attributes, expected.attributes)