Exemple #1
0
 def parse(self, response):
     dom = minidom.parseString(response.body)
     root = dom.documentElement
     roots = get_xmlnode(root, 'url')
     for root in roots:
         url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue
         item = {"site": "nuomi", "shops": [], "url": url, "apiType": "hao123"}
         display_nodes = get_xmlnode(root, 'display')[0].childNodes
         for display in display_nodes:
             if display.nodeName == "#text":
                 continue
             elif display.nodeName == "shops":
                 shop_nodes = get_xmlnode(display, 'shop')
                 shop = {}
                 for shop_node in shop_nodes:
                     for node in shop_node.childNodes:
                         if node.nodeName == "#text":
                             continue
                         elif node.childNodes:
                             shop[node.nodeName] = node.childNodes[0].nodeValue  # wholeText
                 item["shops"].append(shop)
             elif display.childNodes:
                 name = display.nodeName
                 item[name] = display.childNodes[0].nodeValue  # wholeText
                 if name == "identifier":
                     item["id"] = item[name]
         yield item
 def parse(self, response):
     dom = minidom.parseString(response.body)
     root = dom.documentElement
     roots = get_xmlnode(root, 'url')
     for root in roots:
         url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue
         id = url.split("/")[-1].strip()
         item = {
             "site": "dianping",
             "shops": [],
             "url": url,
             "id": id,
             "apiType": "tuan800"
         }
         display_nodes = get_xmlnode(root, 'display')[0].childNodes
         for display in display_nodes:
             if display.nodeName == "#text":
                 continue
             elif display.childNodes:
                 item[display.nodeName] = display.childNodes[
                     0].nodeValue  # wholeText
         shops_nodes = get_xmlnode(root, 'shop')
         for shop_node in shops_nodes:
             shop = {}
             for node in shop_node.childNodes:
                 if node.nodeName == "#text":
                     continue
                 elif node.childNodes:
                     shop[node.nodeName] = node.childNodes[
                         0].nodeValue  # wholeText
             item["shops"].append(shop)
         yield item
Exemple #3
0
 def parse(self, response):
     dom = minidom.parseString(response.body)
     root = dom.documentElement
     roots = get_xmlnode(root, 'url')
     for root in roots:
         url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue
         item = {
             "site": "nuomi",
             "shops": [],
             "url": url,
             "apiType": "hao123"
         }
         display_nodes = get_xmlnode(root, 'display')[0].childNodes
         for display in display_nodes:
             if display.nodeName == "#text":
                 continue
             elif display.nodeName == "shops":
                 shop_nodes = get_xmlnode(display, 'shop')
                 shop = {}
                 for shop_node in shop_nodes:
                     for node in shop_node.childNodes:
                         if node.nodeName == "#text":
                             continue
                         elif node.childNodes:
                             shop[node.nodeName] = node.childNodes[
                                 0].nodeValue  # wholeText
                 item["shops"].append(shop)
             elif display.childNodes:
                 name = display.nodeName
                 item[name] = display.childNodes[0].nodeValue  # wholeText
                 if name == "identifier":
                     item["id"] = item[name]
         yield item
 def parse_hao123(self, response):
     dom = minidom.parseString(response.body)
     root = dom.documentElement
     roots = get_xmlnode(root, "url")
     for root in roots:
         url = get_xmlnode(root, "loc")[0].childNodes[0].nodeValue
         url = url.split("&url=")[1]
         id = url.split("/")[-1].replace(".html", "")
         item = {"site": "meituan", "shops": [], "url": url, "id": id, "apiType": "hao123"}
         display_nodes = get_xmlnode(root, "display")[0].childNodes
         for display in display_nodes:
             if display.nodeName == "#text":
                 continue
             elif display.nodeName == "shops":
                 shop_nodes = get_xmlnode(display, "shop")
                 shop = {}
                 for shop_node in shop_nodes:
                     for node in shop_node.childNodes:
                         if node.nodeName == "#text":
                             continue
                         elif node.childNodes:
                             shop[node.nodeName] = node.childNodes[0].nodeValue  # wholeText
                 item["shops"].append(shop)
             elif display.childNodes:
                 item[display.nodeName] = display.childNodes[0].nodeValue  # wholeText
         yield item
 def parse_hao123(self, response):
     dom = minidom.parseString(response.body)
     root = dom.documentElement
     roots = get_xmlnode(root, 'url')
     for root in roots:
         url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue
         url = url.split('&url=')[1]
         id = url.split("/")[-1].replace(".html", "")
         item = {
             "site": "meituan",
             "shops": [],
             "url": url,
             "id": id,
             "apiType": "hao123"
         }
         display_nodes = get_xmlnode(root, 'display')[0].childNodes
         for display in display_nodes:
             if display.nodeName == "#text":
                 continue
             elif display.nodeName == "shops":
                 shop_nodes = get_xmlnode(display, 'shop')
                 shop = {}
                 for shop_node in shop_nodes:
                     for node in shop_node.childNodes:
                         if node.nodeName == "#text":
                             continue
                         elif node.childNodes:
                             shop[node.nodeName] = node.childNodes[
                                 0].nodeValue  # wholeText
                 item["shops"].append(shop)
             elif display.childNodes:
                 item[display.nodeName] = display.childNodes[
                     0].nodeValue  # wholeText
         yield item
 def parse(self, response):
     dom = minidom.parseString(response.body)
     root = dom.documentElement
     roots = get_xmlnode(root, 'url')
     for root in roots:
         url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue
         id = url.split("/")[-1].strip()
         item = {"site": "dianping", "shops": [], "url": url, "id": id, "apiType": "tuan800"}
         display_nodes = get_xmlnode(root, 'display')[0].childNodes
         for display in display_nodes:
             if display.nodeName == "#text":
                 continue
             elif display.childNodes:
                 item[display.nodeName] = display.childNodes[0].nodeValue  # wholeText
         shops_nodes = get_xmlnode(root, 'shop')
         for shop_node in shops_nodes:
             shop = {}
             for node in shop_node.childNodes:
                 if node.nodeName == "#text":
                     continue
                 elif node.childNodes:
                     shop[node.nodeName] = node.childNodes[0].nodeValue  # wholeText
             item["shops"].append(shop)
         yield item