def test_all_properties_exists(self):
     stub_response = HtmlResponse(
         url="",
         body=(
             r"<html>"
             r"<body>"
             r"<span id='ctl00_ContentPH_GoodsName'>ice cream</span>"
             r"<span id='ctl00_ContentPH_BarCodeL'>2220066000747</span>"
             r"<span id='ctl00_ContentPH_KeepingTime'>13</span>"
             r"<span id='ctl00_ContentPH_Comment'>comment</span>"
             r"<span id='ctl00_ContentPH_Composition'>one, two, three</span>"
             r"<span id='ctl00_ContentPH_Net'>1000,00 g</span>"
             r"<span id='ctl00_ContentPH_Gost'>TU 919191291</span>"
             r"<span id='ctl00_ContentPH_StoreCond'>+25</span>"
             r"<span id='ctl00_ContentPH_ESL'>Proteins: 10,00 g</span>"
             r"<span id='ctl00_ContentPH_PackingType'>test</span>"
             r"</body>"
             r"</html>"))
     self.assertEqual(
         xpath_extractor.extract_goods_properties_dict(stub_response),
         {
             'name': 'ice cream',
             'barcode': '2220066000747',
             'best_before': '13',
             'comment': 'comment',
             'ingredients': 'one, two, three',
             'netto_weight': '1000,00 g',
             'standart': 'TU 919191291',
             'store_conditions': '+25',
             'esl_as_string': 'Proteins: 10,00 g',
             'pack_type': 'test',
         })
Beispiel #2
0
 def parse_good(self, response):
     log.msg("PARSE GOOD: {0}".format(response.url), level=log.DEBUG)
     if self.close_down:
         raise CloseSpider(str(self.close_exception))
     good = GoodItem(xpath_extractor.extract_goods_properties_dict(response))
     if good:
         good['goodsmatrix_url'] = response.url
         return good
     else:
         log.msg("can't parse {0}".format(response.url, level=log.ERROR))
Beispiel #3
0
 def parse_good(self, response):
     log.msg("PARSE GOOD: {0}".format(response.url), level=log.DEBUG)
     if self.close_down:
         raise CloseSpider(str(self.close_exception))
     good = GoodItem(
         xpath_extractor.extract_goods_properties_dict(response))
     if good:
         good['goodsmatrix_url'] = response.url
         return good
     else:
         log.msg("can't parse {0}".format(response.url, level=log.ERROR))
 def test_no_goods_properties_in_response(self):
     stub_response = HtmlResponse(
         url="",
         body=(
             r"<html>"
             r"<body>"
             r"</body>"
             r"</html>"))
     self.assertEqual(
         xpath_extractor.extract_goods_properties_dict(stub_response),
         dict()
         )
 def test_extract_value_with_html_tag_ontly(self):
     stub_response = HtmlResponse(
         url="",
         body=(
             r"<html>"
             r"<body>"
             r"<span id='ctl00_ContentPH_GoodsName'><br></span>"
             r"</body>"
             r"</html>")
         )
     self.assertEqual(
         xpath_extractor.extract_goods_properties_dict(stub_response),
         dict())
 def test_extract_esl_as_string_concatinated_with_semicolon(self):
     stub_response = HtmlResponse(
         url="",
         body=(
             r"<html>"
             r"<body>"
             r"<span id='ctl00_ContentPH_ESL'>Proteins:  11,20 g<br />Fats:  22,90 g<br />Carbohydrates:  25,90 g<br />Calories:  354,50 kkal<br /></span>"
             r"</body>"
             r"</html>")
         )
     self.assertEqual(
         xpath_extractor.extract_goods_properties_dict(stub_response),
         {
             'esl_as_string': 'Proteins:  11,20 g; Fats:  22,90 g; Carbohydrates:  25,90 g; Calories:  354,50 kkal'
         })
 def test_extract_paired_html_dropped_but_content_saved(self):
     stub_response = HtmlResponse(
         url="",
         body=(
             r"<html>"
             r"<body>"
             r"<span id='ctl00_ContentPH_GoodsName'>t<b>es</b>t</span>"
             r"</body>"
             r"</html>")
         )
     self.assertEqual(
         xpath_extractor.extract_goods_properties_dict(stub_response),
         {
             'name': 'test',
         })
 def test_extract_special_charcter_decoded(self):
     stub_response = HtmlResponse(
         url="",
         body=(
             r"<html>"
             r"<body>"
             r"<span id='ctl00_ContentPH_GoodsName'>&lt;</span>"
             r"</body>"
             r"</html>")
         )
     self.assertEqual(
         xpath_extractor.extract_goods_properties_dict(stub_response),
         {
             'name': '<',
         })
 def test_if_property_does_not_exist_then_it_is_not_added_to_dict(self):
     stub_response = HtmlResponse(
         url="",
         body=(
             r"<html>"
             r"<body>"
             r"<span id='ctl00_ContentPH_GoodsName'>ice cream</span>"
             r"<span id='ctl00_ContentPH_BarCodeL'>2220066000747</span>"
             r"</body>"
             r"</html>"))
     self.assertEqual(
         xpath_extractor.extract_goods_properties_dict(stub_response),
         {
             'name': 'ice cream',
             'barcode': '2220066000747',
         })
 def test_extract_only_first_occurance(self):
     stub_response = HtmlResponse(
         url="",
         body=(
             r"<html>"
             r"<body>"
             r"<span id='ctl00_ContentPH_GoodsName'>test1</span>"
             r"<span id='ctl00_ContentPH_GoodsName'>test2</span>"
             r"</body>"
             r"</html>")
         )
     self.assertEqual(
         xpath_extractor.extract_goods_properties_dict(stub_response),
         {
             'name': 'test1',
         })