def parse_showdesk_membercards(self, resp): hxs = Selector(resp) headers = hxs.xpath('//form[@id="cardTypeForm"]//table/thead/tr/th/child::text()').extract() if not headers: self.log('%s can not find table headers.' % self.name, level=log.ERROR) yield None return employee_nodes = hxs.xpath('//form[@id="cardTypeForm"]//table/tbody/tr') if not employee_nodes: self.log('%s can not find member card info' % self.name, level=log.ERROR) yield None return for e_n in employee_nodes: info_nodes = e_n.xpath('td') info = OrderedDict({}) for idx, i_n in enumerate(info_nodes): if idx == 0 or idx == len(info_nodes) - 2: continue if idx == len(info_nodes) - 1: info[headers[idx]] = ' | '.join(str_list_strip_replace(i_n.xpath('./child::text()').extract(), [' ', '\t', '\n', ' '])) continue sep = ' | ' if idx == 3: sep = '' info[headers[idx]] = sep.join(str_list_strip_replace(str_list_strip(i_n.xpath('descendant::text()').extract()), [' ', '\t', '\n', ' '])) item = SentreeMemberCardItem() item['info'] = info # items.append(info) yield item
def parse_showdesk_services(self, resp): hxs = Selector(resp) headers = hxs.xpath('//table[@id="itemset"]/thead/tr/th/child::text()').extract() if not headers: self.log('%s can not find table headers.' % self.name, level=log.ERROR) yield None return service_nodes = hxs.xpath('//table[@id="itemset"]/tbody/tr') if not service_nodes: self.log('%s can not find services info' % self.name, level=log.ERROR) yield None return for s_n in service_nodes: info_nodes = s_n.xpath('td') info = OrderedDict({}) no = None for idx, i_n in enumerate(info_nodes): if idx == 0 or idx == len(info_nodes) - 1: continue if idx == 8: info[headers[idx]] = str_list_strip_replace(str_list_strip(hxs.xpath('//span[@id="pricespan%s"]' % no).xpath('child::text()').extract()), [' ', '\t', '\n']) continue if idx == 9: discount_nodes = i_n.xpath('.//div[starts-with(@id, "icddiv")]') discounts = [] if discount_nodes: for d_n in discount_nodes: discounts.append(' | '.join(str_list_strip_replace(str_list_strip(d_n.xpath('./child::text()').extract()), [' ', '\t', '\n']))) info[headers[idx]] = ' ||| '.join(discounts) continue info[headers[idx]] = ' | '.join(str_list_strip_replace(str_list_strip(i_n.xpath('descendant::text()').extract()), [' ', '\t', '\n'])) if idx == 1: no = info[headers[idx]] item = SentreeServiceItem() item['info'] = info # items.append(info) yield item
def parse_consumer_bill_stream_validate(self, resp): hxs = Selector(resp) menu = [u'营业记录', u'水单记录', u'水单审查'] bill_headers = [] head_nodes = hxs.xpath('//tbody[@id="billBody"]/parent::table/thead/tr/th') if not head_nodes: self.log('in %s.parse_consumer_bill_stream_validate, can not get table headers.' % self.name, level=log.ERROR) yield None return for idx, hd in enumerate(head_nodes): if idx == len(head_nodes) - 1: break txts = hd.xpath('child::text()').extract() bill_headers.append('/'.join(txts)) bill_nodes = hxs.xpath('//tbody[@id="billBody"]/tr') if bill_nodes: for bn in bill_nodes: item = SentreeShuiDanShenChaItem() item['menu'] = menu headers = [] item['data'] = OrderedDict({}) data_nodes = bn.xpath('td') for idx, dn in enumerate(data_nodes): if idx == 6: break h = bill_headers[idx] if idx == 0 or idx == 4: headers.append(h) item['data'][h] = [str_list_strip(dn.xpath('descendant::text()').extract())[0], True] continue if idx == 1 or idx == 2 or idx == 3: headers.append(h) item['data'][h] = [str_list_strip(dn.xpath('descendant::text()').extract()), True] continue if idx == 5: detail = [] subtrs = dn.xpath('table/tr') recoded_headers = False for tr in subtrs: empperfors = [] subdetail = OrderedDict({}) subtds = tr.xpath('td') h = bill_headers[idx + 0] if not recoded_headers: headers.append(h) subdetail[h] = [str_list_strip(subtds[0].xpath('descendant::text()').extract()), True] h = bill_headers[idx + 1] if not recoded_headers: headers.append(h) subdetail[h] = [str_list_strip(subtds[1].xpath('descendant::text()').extract())[0], True] subtrs2 = subtds[2].xpath('table/tr') for kdx, tr2 in enumerate(subtrs2): if kdx == len(subtrs2) - 1: break empperfor = OrderedDict({}) subtds2 = tr2.xpath('td') h = bill_headers[idx + 2 + 0] if not recoded_headers: headers.append(h) if h not in empperfor: empperfor[h] = [] empperfor[h].append([str_list_strip(subtds2[0].xpath('descendant::text()').extract()), True]) h = bill_headers[idx + 2 + 1] if not recoded_headers: headers.append(h) if h not in empperfor: empperfor[h] = [] empperfor[h].append([str_list_strip(subtds2[1].xpath('descendant::text()').extract())[0], True]) h = bill_headers[idx + 2 + 2] h = u'员工' + h if not recoded_headers: headers.append(h) if h not in empperfor: empperfor[h] = [] empperfor[h].append([str_list_strip(subtds2[2].xpath('descendant::text()').extract())[0], True]) empperfors.append(empperfor) recoded_headers = True subdetail[u'员工业绩'] = [empperfors, False] detail.append([subdetail, False]) recoded_headers = True item['headers'] = headers item['data'][u'详情'] = [detail, False] # items.append(item) yield item