Example #1
0
    def parse_showdesk_membercards(self, resp):
        hxs = Selector(resp)
        headers = hxs.xpath('//form[@id="cardTypeForm"]//table/thead/tr/th/child::text()').extract()
        if not headers:
            self.log('%s can not find table headers.' % self.name, level=log.ERROR)
            yield None
            return
        employee_nodes = hxs.xpath('//form[@id="cardTypeForm"]//table/tbody/tr')
        if not employee_nodes:
            self.log('%s can not find member card info' % self.name, level=log.ERROR)
            yield None
            return
        for e_n in employee_nodes:
            info_nodes = e_n.xpath('td')
            info = OrderedDict({})
            for idx, i_n in enumerate(info_nodes):
                if idx == 0 or idx == len(info_nodes) - 2:
                    continue
                if idx == len(info_nodes) - 1:
                    info[headers[idx]] = ' | '.join(str_list_strip_replace(i_n.xpath('./child::text()').extract(), [' ', '\t', '\n', ' ']))
                    continue
                sep = ' | '
                if idx == 3:
                    sep = ''
                info[headers[idx]] = sep.join(str_list_strip_replace(str_list_strip(i_n.xpath('descendant::text()').extract()), [' ', '\t', '\n', ' ']))

            item = SentreeMemberCardItem()
            item['info'] = info
#             items.append(info)
            yield item
Example #2
0
    def parse_showdesk_services(self, resp):
        hxs = Selector(resp)
        headers = hxs.xpath('//table[@id="itemset"]/thead/tr/th/child::text()').extract()
        if not headers:
            self.log('%s can not find table headers.' % self.name, level=log.ERROR)
            yield None
            return
        service_nodes = hxs.xpath('//table[@id="itemset"]/tbody/tr')
        if not service_nodes:
            self.log('%s can not find services info' % self.name, level=log.ERROR)
            yield None
            return
        for s_n in service_nodes:
            info_nodes = s_n.xpath('td')
            info = OrderedDict({})
            no = None
            for idx, i_n in enumerate(info_nodes):
                if idx == 0 or idx == len(info_nodes) - 1:
                    continue
                if idx == 8:
                    info[headers[idx]] = str_list_strip_replace(str_list_strip(hxs.xpath('//span[@id="pricespan%s"]' % no).xpath('child::text()').extract()), [' ', '\t', '\n'])
                    continue
                if idx == 9:
                    discount_nodes = i_n.xpath('.//div[starts-with(@id, "icddiv")]')
                    discounts = []
                    if discount_nodes:
                        for d_n in discount_nodes:
                            discounts.append(' | '.join(str_list_strip_replace(str_list_strip(d_n.xpath('./child::text()').extract()), [' ', '\t', '\n'])))
                    info[headers[idx]] = ' ||| '.join(discounts)
                    continue
                info[headers[idx]] = ' | '.join(str_list_strip_replace(str_list_strip(i_n.xpath('descendant::text()').extract()), [' ', '\t', '\n']))
                if idx == 1:
                    no = info[headers[idx]]

            item = SentreeServiceItem()
            item['info'] = info
#             items.append(info)
            yield item
Example #3
0
    def parse_consumer_bill_stream_validate(self, resp):
        hxs = Selector(resp)
        menu = [u'营业记录', u'水单记录', u'水单审查']
        bill_headers = []
        head_nodes = hxs.xpath('//tbody[@id="billBody"]/parent::table/thead/tr/th')
        if not head_nodes:
            self.log('in %s.parse_consumer_bill_stream_validate, can not get table headers.' % self.name, level=log.ERROR)
            yield None
            return
        for idx, hd in enumerate(head_nodes):
            if idx == len(head_nodes) - 1:
                break
            txts = hd.xpath('child::text()').extract()
            bill_headers.append('/'.join(txts))

        bill_nodes = hxs.xpath('//tbody[@id="billBody"]/tr')
        if bill_nodes:
            for bn in bill_nodes:
                item = SentreeShuiDanShenChaItem()
                item['menu'] = menu
                headers = []
                item['data'] = OrderedDict({})
                data_nodes = bn.xpath('td')
                for idx, dn in enumerate(data_nodes):
                    if idx == 6:
                        break
                    h = bill_headers[idx]
                    if idx == 0 or idx == 4:
                        headers.append(h)
                        item['data'][h] = [str_list_strip(dn.xpath('descendant::text()').extract())[0], True]
                        continue
                    if idx == 1 or idx == 2 or idx == 3:
                        headers.append(h)
                        item['data'][h] = [str_list_strip(dn.xpath('descendant::text()').extract()), True]
                        continue
                    if idx == 5:
                        detail = []
                        subtrs = dn.xpath('table/tr')
                        recoded_headers = False
                        for tr in subtrs:
                            empperfors = []
                            subdetail = OrderedDict({})
                            subtds = tr.xpath('td')
                            h = bill_headers[idx + 0]
                            if not recoded_headers:
                                headers.append(h)
                            subdetail[h] = [str_list_strip(subtds[0].xpath('descendant::text()').extract()), True]
                            h = bill_headers[idx + 1]
                            if not recoded_headers:
                                headers.append(h)
                            subdetail[h] = [str_list_strip(subtds[1].xpath('descendant::text()').extract())[0], True]

                            subtrs2 = subtds[2].xpath('table/tr')
                            for kdx, tr2 in enumerate(subtrs2):
                                if kdx == len(subtrs2) - 1:
                                    break
                                empperfor = OrderedDict({})
                                subtds2 = tr2.xpath('td')
                                h = bill_headers[idx + 2 + 0]
                                if not recoded_headers:
                                    headers.append(h)
                                if h not in empperfor:
                                    empperfor[h] = []
                                empperfor[h].append([str_list_strip(subtds2[0].xpath('descendant::text()').extract()), True])
                                h = bill_headers[idx + 2 + 1]
                                if not recoded_headers:
                                    headers.append(h)
                                if h not in empperfor:
                                    empperfor[h] = []
                                empperfor[h].append([str_list_strip(subtds2[1].xpath('descendant::text()').extract())[0], True])
                                h = bill_headers[idx + 2 + 2]
                                h = u'员工' + h
                                if not recoded_headers:
                                    headers.append(h)
                                if h not in empperfor:
                                    empperfor[h] = []
                                empperfor[h].append([str_list_strip(subtds2[2].xpath('descendant::text()').extract())[0], True])
                                empperfors.append(empperfor)
                                recoded_headers = True
                            subdetail[u'员工业绩'] = [empperfors, False]
                            detail.append([subdetail, False])
                            recoded_headers = True
                        item['headers'] = headers
                        item['data'][u'详情'] = [detail, False]
#                 items.append(item)
                yield item