Example #1
0
    def parse_showdesk_members_treat(self, resp):
        hxs = Selector(resp)
        next_page_nodes = hxs.xpath('//a[@class="next_page"]')
        meta = resp.meta
        if next_page_nodes and meta['page'] == 1:
            next_page_node = next_page_nodes[0]
            total_page = next_page_node.xpath('./parent::li/preceding-sibling::li')[-1].xpath('a/child::text()').extract()[0].strip()
            for i in xrange(2, int(total_page) + 1):
                new_meta = dict(meta)
                new_meta['page'] = i
                self.log('%s yield member list page %d' % (self.name, i))
                yield FormRequest(url="http://vip6.sentree.com.cn/shair/timesItem!initTreat.action", formdata={
                             'page.currNum' : str(i),
                             'page.rpp' : '30',
                             'r' : str(meta['r']),
                             'set' : 'manage'
                             }, callback=self.parse_showdesk_members_treat, meta=new_meta)
        treat_info_tabs = hxs.xpath('//div[@class="page_main"]//div[@class="table-responsive"]/table')
        if not treat_info_tabs:
            yield None
            return
        treat_info_tab = treat_info_tabs[0]
        ths = str_list_strip_replace(treat_info_tab.xpath('./thead/tr/th/child::text()').extract(), [' ', '\t', '\n', ' '])

        info_nodes = treat_info_tab.xpath('./tbody/tr')
        for i_n in info_nodes:
            infos = []
            info_tds = i_n.xpath('./td')
            for i_t in info_tds:
                info = ''.join(str_list_strip_replace(i_t.xpath('.//child::text()').extract(), [' ', '\t', '\n', ' ']))
                infos.append(info)
            item = SentreeMemberTreatItem()
            item['hs'] = ths
            item['vals'] = infos
            yield item
Example #2
0
    def parse_showdesk_membercards(self, resp):
        hxs = Selector(resp)
        headers = hxs.xpath('//form[@id="cardTypeForm"]//table/thead/tr/th/child::text()').extract()
        if not headers:
            self.log('%s can not find table headers.' % self.name, level=log.ERROR)
            yield None
            return
        employee_nodes = hxs.xpath('//form[@id="cardTypeForm"]//table/tbody/tr')
        if not employee_nodes:
            self.log('%s can not find member card info' % self.name, level=log.ERROR)
            yield None
            return
        for e_n in employee_nodes:
            info_nodes = e_n.xpath('td')
            info = OrderedDict({})
            for idx, i_n in enumerate(info_nodes):
                if idx == 0 or idx == len(info_nodes) - 2:
                    continue
                if idx == len(info_nodes) - 1:
                    info[headers[idx]] = ' | '.join(str_list_strip_replace(i_n.xpath('./child::text()').extract(), [' ', '\t', '\n', ' ']))
                    continue
                sep = ' | '
                if idx == 3:
                    sep = ''
                info[headers[idx]] = sep.join(str_list_strip_replace(str_list_strip(i_n.xpath('descendant::text()').extract()), [' ', '\t', '\n', ' ']))

            item = SentreeMemberCardItem()
            item['info'] = info
#             items.append(info)
            yield item
Example #3
0
    def parse_showdesk_members2(self, resp):
        hxs = Selector(resp)
        next_page_nodes = hxs.xpath('//a[@class="next_page"]')
        meta = resp.meta
        if next_page_nodes and meta['page'] == 1:
            next_page_node = next_page_nodes[0]
            total_page = next_page_node.xpath('./parent::li/preceding-sibling::li')[-1].xpath('a/child::text()').extract()[0].strip()
            for i in xrange(2, int(total_page) + 1):
                new_meta = dict(meta)
                new_meta['page'] = i
                self.log('%s yield member list page %d' % (self.name, i))
                yield FormRequest(url="http://vip6.sentree.com.cn/shair/memberInfo!memberlist.action", formdata={
                             'page.currNum' : str(i),
                             'page.rpp' : '30',
                             'r' : str(meta['r']),
                             'set' : 'manage'
                             }, callback=self.parse_showdesk_members2, meta=new_meta)

        member_nodes = hxs.xpath('//form[@id="delForm"]//table/tbody/tr')
        if member_nodes:
            for m_n in member_nodes:
                member_tds = m_n.xpath('td')
                info_query_str = None
                try:
                    phone = member_tds[1].xpath('a/child::text()').extract()[0].replace(' ', '').strip()
                    name = member_tds[2].xpath('span/child::text()').extract()[0].replace(' ', '').strip()
                    card_no = member_tds[6].xpath('table/tr/td[1]/a/child::text()').extract()[0].replace(' ', '').strip()
                    info_query_str = member_tds[6].xpath('table/tr/td[1]/a/@onclick').extract()[0]
                    info_query_str = info_query_str[info_query_str.find('?') + 1:]
                    info_query_str = info_query_str[:info_query_str.find("'")]
                    card_name = member_tds[6].xpath('table/tr/td[2]/child::text()').extract()[0].replace(' ', '').strip()
                    card_type = member_tds[6].xpath('table/tr/td[3]//child::text()').extract()[0].replace(' ', '').replace(' ', '').strip()
                    discont = member_tds[6].xpath('table/tr/td[4]/child::text()').extract()[0].replace(' ', '').replace(' ', '').strip()
                    timeout = member_tds[6].xpath('table/tr/td[9]/child::text()').extract()[0].replace(' ', '').replace(' ', '').strip()
                    overage = str_list_strip_replace(member_tds[6].xpath('table/tr/td[7]//child::text()').extract(), [' ', ' ', '\t', '\n'])
                except:
                    self.log(traceback.format_exc())
                    continue
                mem_item = SentreeMembersSimpleItem()
                mem_item[u'phone'] = phone
                mem_item[u'name'] = name
                mem_item[u'card_no'] = card_no
                mem_item[u'card_name'] = card_name
                mem_item[u'card_type'] = card_type
                mem_item[u'discont'] = discont
                mem_item[u'timeout'] = timeout
                mem_item[u'overage'] = overage
                if info_query_str:
                    new_meta = dict(meta)
                    new_meta['item'] = mem_item
                    yield Request(url='http://vip6.sentree.com.cn/shair/memberArchives!editMember.action?%s%d' % (info_query_str, time.time()), callback=self.parse_member_overdraft, meta=new_meta)
                else:
                    mem_item['overdraft'] = '0.0'
                    yield mem_item
Example #4
0
 def parse_member_overdraft2(self, resp):
     mem_item = resp.meta['item']
     hxs = Selector(resp)
     total_overdraft_nodes = hxs.xpath('//div[@class="table-responsive"]/table/tbody/tr/td[3]/child::text()')
     if not total_overdraft_nodes:
         overdraft = '0.0'
     else:
         overdrafts = str_list_strip_replace(total_overdraft_nodes.extract(), [' ', ' ', '\t', '\n'])
         overdraft_statuss = str_list_strip_replace(hxs.xpath('//div[@class="table-responsive"]/table/tbody/tr/td[5]/font/child::text()').extract(), [' ', ' ', '\t', '\n'])
         overdraft = float(0)
         for i, s_overdraft in enumerate(overdrafts):
             f_overdraft = float(s_overdraft)
             if u'已还清' in overdraft_statuss[i]:
                 overdraft = overdraft - f_overdraft
                 continue
             if u'未还清' in overdraft_statuss[i]:
                 overdraft = overdraft + f_overdraft
         if overdraft < 0:
             overdraft = float(0)
         overdraft = '%.1f' % overdraft
     mem_item['overdraft'] = overdraft
     yield mem_item
Example #5
0
    def parse_showdesk_services(self, resp):
        hxs = Selector(resp)
        headers = hxs.xpath('//table[@id="itemset"]/thead/tr/th/child::text()').extract()
        if not headers:
            self.log('%s can not find table headers.' % self.name, level=log.ERROR)
            yield None
            return
        service_nodes = hxs.xpath('//table[@id="itemset"]/tbody/tr')
        if not service_nodes:
            self.log('%s can not find services info' % self.name, level=log.ERROR)
            yield None
            return
        for s_n in service_nodes:
            info_nodes = s_n.xpath('td')
            info = OrderedDict({})
            no = None
            for idx, i_n in enumerate(info_nodes):
                if idx == 0 or idx == len(info_nodes) - 1:
                    continue
                if idx == 8:
                    info[headers[idx]] = str_list_strip_replace(str_list_strip(hxs.xpath('//span[@id="pricespan%s"]' % no).xpath('child::text()').extract()), [' ', '\t', '\n'])
                    continue
                if idx == 9:
                    discount_nodes = i_n.xpath('.//div[starts-with(@id, "icddiv")]')
                    discounts = []
                    if discount_nodes:
                        for d_n in discount_nodes:
                            discounts.append(' | '.join(str_list_strip_replace(str_list_strip(d_n.xpath('./child::text()').extract()), [' ', '\t', '\n'])))
                    info[headers[idx]] = ' ||| '.join(discounts)
                    continue
                info[headers[idx]] = ' | '.join(str_list_strip_replace(str_list_strip(i_n.xpath('descendant::text()').extract()), [' ', '\t', '\n']))
                if idx == 1:
                    no = info[headers[idx]]

            item = SentreeServiceItem()
            item['info'] = info
#             items.append(info)
            yield item
Example #6
0
    f = open('e:\\1.html')

    html = ""
    for l in f:
        html += l
    f.close()

    resp = TextResponse(url="", body=html)
    if 1:
        hxs = Selector(resp)
        total_overdraft_nodes = hxs.xpath('//div[@class="table-responsive"]/table/tbody/tr/td[3]')
        total_overdraft_nodes = hxs.xpath('//div[@class="table-responsive"]/table/tbody/tr/td[3]/child::text()')
        if not total_overdraft_nodes:
            overdraft = '0'
        else:
            overdraft = str_list_strip_replace(total_overdraft_nodes.extract(), ['&nbsp;', ' ', '\t', '\n'])[0]
        print overdraft
    sys.exit(0)

    s = SentreeSpider()
    try:
        s.parse_showdesk_services(resp)
    except:
        print traceback.format_exc()

    print json.dumps(obj=items, ensure_ascii=False, indent=4)
#     sys.exit(0)
#
#     SentreeSpider().parse_consumer_bill_stream_validate(resp)
#
#     datas = []