def parse(self, response): dom = minidom.parseString(response.body) root = dom.documentElement roots = get_xmlnode(root, 'url') for root in roots: url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue item = {"site": "nuomi", "shops": [], "url": url, "apiType": "hao123"} display_nodes = get_xmlnode(root, 'display')[0].childNodes for display in display_nodes: if display.nodeName == "#text": continue elif display.nodeName == "shops": shop_nodes = get_xmlnode(display, 'shop') shop = {} for shop_node in shop_nodes: for node in shop_node.childNodes: if node.nodeName == "#text": continue elif node.childNodes: shop[node.nodeName] = node.childNodes[0].nodeValue # wholeText item["shops"].append(shop) elif display.childNodes: name = display.nodeName item[name] = display.childNodes[0].nodeValue # wholeText if name == "identifier": item["id"] = item[name] yield item
def parse(self, response): dom = minidom.parseString(response.body) root = dom.documentElement roots = get_xmlnode(root, 'url') for root in roots: url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue id = url.split("/")[-1].strip() item = { "site": "dianping", "shops": [], "url": url, "id": id, "apiType": "tuan800" } display_nodes = get_xmlnode(root, 'display')[0].childNodes for display in display_nodes: if display.nodeName == "#text": continue elif display.childNodes: item[display.nodeName] = display.childNodes[ 0].nodeValue # wholeText shops_nodes = get_xmlnode(root, 'shop') for shop_node in shops_nodes: shop = {} for node in shop_node.childNodes: if node.nodeName == "#text": continue elif node.childNodes: shop[node.nodeName] = node.childNodes[ 0].nodeValue # wholeText item["shops"].append(shop) yield item
def parse(self, response): dom = minidom.parseString(response.body) root = dom.documentElement roots = get_xmlnode(root, 'url') for root in roots: url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue item = { "site": "nuomi", "shops": [], "url": url, "apiType": "hao123" } display_nodes = get_xmlnode(root, 'display')[0].childNodes for display in display_nodes: if display.nodeName == "#text": continue elif display.nodeName == "shops": shop_nodes = get_xmlnode(display, 'shop') shop = {} for shop_node in shop_nodes: for node in shop_node.childNodes: if node.nodeName == "#text": continue elif node.childNodes: shop[node.nodeName] = node.childNodes[ 0].nodeValue # wholeText item["shops"].append(shop) elif display.childNodes: name = display.nodeName item[name] = display.childNodes[0].nodeValue # wholeText if name == "identifier": item["id"] = item[name] yield item
def parse_hao123(self, response): dom = minidom.parseString(response.body) root = dom.documentElement roots = get_xmlnode(root, "url") for root in roots: url = get_xmlnode(root, "loc")[0].childNodes[0].nodeValue url = url.split("&url=")[1] id = url.split("/")[-1].replace(".html", "") item = {"site": "meituan", "shops": [], "url": url, "id": id, "apiType": "hao123"} display_nodes = get_xmlnode(root, "display")[0].childNodes for display in display_nodes: if display.nodeName == "#text": continue elif display.nodeName == "shops": shop_nodes = get_xmlnode(display, "shop") shop = {} for shop_node in shop_nodes: for node in shop_node.childNodes: if node.nodeName == "#text": continue elif node.childNodes: shop[node.nodeName] = node.childNodes[0].nodeValue # wholeText item["shops"].append(shop) elif display.childNodes: item[display.nodeName] = display.childNodes[0].nodeValue # wholeText yield item
def parse_hao123(self, response): dom = minidom.parseString(response.body) root = dom.documentElement roots = get_xmlnode(root, 'url') for root in roots: url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue url = url.split('&url=')[1] id = url.split("/")[-1].replace(".html", "") item = { "site": "meituan", "shops": [], "url": url, "id": id, "apiType": "hao123" } display_nodes = get_xmlnode(root, 'display')[0].childNodes for display in display_nodes: if display.nodeName == "#text": continue elif display.nodeName == "shops": shop_nodes = get_xmlnode(display, 'shop') shop = {} for shop_node in shop_nodes: for node in shop_node.childNodes: if node.nodeName == "#text": continue elif node.childNodes: shop[node.nodeName] = node.childNodes[ 0].nodeValue # wholeText item["shops"].append(shop) elif display.childNodes: item[display.nodeName] = display.childNodes[ 0].nodeValue # wholeText yield item
def parse(self, response): dom = minidom.parseString(response.body) root = dom.documentElement roots = get_xmlnode(root, 'url') for root in roots: url = get_xmlnode(root, 'loc')[0].childNodes[0].nodeValue id = url.split("/")[-1].strip() item = {"site": "dianping", "shops": [], "url": url, "id": id, "apiType": "tuan800"} display_nodes = get_xmlnode(root, 'display')[0].childNodes for display in display_nodes: if display.nodeName == "#text": continue elif display.childNodes: item[display.nodeName] = display.childNodes[0].nodeValue # wholeText shops_nodes = get_xmlnode(root, 'shop') for shop_node in shops_nodes: shop = {} for node in shop_node.childNodes: if node.nodeName == "#text": continue elif node.childNodes: shop[node.nodeName] = node.childNodes[0].nodeValue # wholeText item["shops"].append(shop) yield item