Ejemplo n.º 1
0
    def parse(self, response):
        # print "内部网页"
        # print response.body
        indatas = response.xpath('//div[@class="ebdp-pc4promote-circularcontainer"]')
        print len(indatas)
        for data in indatas:

            item = FundsInfoItem()

            item["pname"] = data.xpath('./div[@class="ebdp-pc4promote-circularcontainer-head"]/span/span/a/text()').extract()[0]
            # print item["pname"]
            item["pid"] =  data.xpath('./div[@class="ebdp-pc4promote-circularcontainer-head"]/span/span/a/@href').extract()[0].split('(')[-1].split(',')[0].strip('\'')
            temp = data.xpath('./div[@class="ebdp-pc4promote-circularcontainer-content"]/table/tbody/tr/td')
            if len(temp)==5:
            
                item["prate"] = temp[0].xpath('./div/div')[1].xpath('./text()').extract()[0]
            
                item["pfloor"] = temp[1].xpath('./div/div')[1].xpath('./b/text()').extract()[0]+temp[1].xpath('./div/div')[1].xpath('./text()').extract()[0]
                
                item["pperiod"] = temp[2].xpath('./div/div')[1].xpath('string(.)').extract()[0]
            elif len(temp)==6:
                            
                item["prate"] = temp[1].xpath('./div/div')[1].xpath('./text()').extract()[0]
            
                item["pfloor"] = temp[2].xpath('./div/div')[1].xpath('./b/text()').extract()[0]+temp[1].xpath('./div/div')[1].xpath('./text()').extract()[0]
                
                item["pperiod"] = temp[3].xpath('./div/div')[1].xpath('string(.)').extract()[0]
            yield item
Ejemplo n.º 2
0
 def parsexk(self, response):
     print("新客理财爬取")
     xklc = response.xpath('//div[@class="xklc_con"]')
     print(len(xklc))
     for product in xklc:
         item = FundsInfoItem()
         item["pid"] = product.xpath(
             './div/div/div[@class="xklc_cptab"]/ul[@class="tb2 fl"]/li'
         )[0].xpath('normalize-space(string(.))').extract()[0]
         item["pname"] = product.xpath(
             './div/div/div[@class="xklc_title"]/text()').extract()[0]
         try:
             item["prate"] = product.xpath(
                 './div/div/div[@class="xklc_sz"]/div')[0].xpath(
                     'normalize-space(string(.))').extract()[0]
         except:
             pass
         try:
             item["pperiod"] = product.xpath(
                 './div/div/div[@class="xklc_cptab"]/ul[@class="tb2 fl"]'
             )[1].xpath('./li')[1].xpath(
                 'normalize-space(string(.))').extract()[0]
         except:
             pass
         try:
             item["pfloor"] = product.xpath(
                 './div/div/div[@class="xklc_cptab"]/ul[@class="tb2 fl"]'
             )[1].xpath('./li')[0].xpath(
                 'normalize-space(string(.))').extract()[0]
         except:
             pass
         yield item
Ejemplo n.º 3
0
Archivo: cib.py Proyecto: iris93/crawl
    def parse(self, response):
        # print "内部网页"
        # print response.body
        indatas = response.xpath('//tbody/tr')
        # print len(indatas)

        for data in indatas:
            item = FundsInfoItem()
            try:
                item["pname"] = data.xpath('./td')[0].xpath(
                    './a/text()').extract()[0]
            except:
                item["pname"] = data.xpath('./td')[0].xpath(
                    './text()').extract()[0]
            item["pid"] = data.xpath('./td')[-1].xpath(
                './img/@src').extract()[0].split('lccp')[-1].split('.')[0]

            item["prate"] = data.xpath('./td')[6].xpath(
                './text()').extract()[0]

            item["pfloor"] = data.xpath('./td')[5].xpath(
                './text()').extract()[0]

            item["pperiod"] = data.xpath('./td')[4].xpath(
                './text()').extract()[0]
            yield item
Ejemplo n.º 4
0
 def parse(self, response):
     # print "打印response"
     # print response.body
     datas = json.loads(response.body)['content']['resultList']
     # print datas[0]
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data['prdNo']
         item["pname"] = data['prdName']
         item["prate"] = data['incomerate']
         item["pperiod"] = data['dayDeadLine']
         item["pfloor"] = data['firstAmt']
         pdfUrl = 'https://etrade.citicbank.com/portalweb/findoc/' + str(
             item["pid"]) + '00.html'
         item["pscale"] = pdfUrl
         # self.i = self.i+1
         # print self.i
         # yield item
         try:
             subResponse = Request(url=pdfUrl,
                                   method='GET',
                                   meta={"item": item},
                                   callback=self.get_scale,
                                   errback=self.errors)
             yield subResponse
         except:
             yield item
Ejemplo n.º 5
0
 def parsein(self, response):
     # print "内部网页"
     indatas = response.xpath('//tr')
     item = FundsInfoItem()
     for data in indatas:
         if u'产品名称' in data.xpath('./th/text()').extract()[0]:
             # print "产品名称"
             item["pname"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
         if u'产品代码' in data.xpath('./th/text()').extract()[0]:
             # print "产品代码"
             item["pid"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
         if u'预计年化收益率' in data.xpath('./th/text()').extract()[0]:
             # print "预计年化收益率"
             item["prate"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
         if u'起点金额' in data.xpath('./th/text()').extract()[0]:
             # print "起点金额"
             item["pfloor"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
         if u'投资期限' in data.xpath('./th/text()').extract()[0]:
             # print "投资期限"
             item["pperiod"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
     yield item
Ejemplo n.º 6
0
 def parse(self, response):
     datas = json.loads(response.text)['rows']
     # print(datas[1])
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data['finance_no']
         item["pname"] = data['finance_allname']
         item["prate"] = data['finance_anticipate_rate']
         item["pperiod"] = data['finance_lmttime_info']
         item["pfloor"] = data['finance_indi_ipominamnt']
         yield item
Ejemplo n.º 7
0
 def parsedq(self,response):
     # print "打印response"
     datas = json.loads(response.body)["data"]["compFinancialProducts"]
     # print len(datas)
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data["prdCode"]
         item["pname"] = data["prdName"]
         item["prate"] = data["indexContent"]
         item["pperiod"] = data["investTerm"]
         item["pfloor"] = data["minInvestAmount"]
         yield item
Ejemplo n.º 8
0
 def parse(self, response):
     # print "打印response"
     datas = response.xpath('//Table')
     # print len(datas)
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data.xpath('./ProductNo/text()').extract()[0]
         item["pname"] = data.xpath('./ProdName/text()').extract()[0]
         item["prate"] = data.xpath('./ProdProfit/text()').extract()[0]
         item["pperiod"] = data.xpath('./ProdLimit/text()').extract()[0]
         item["pfloor"] = data.xpath('./PurStarAmo/text()').extract()[0]
         yield item
Ejemplo n.º 9
0
 def parse(self, response):
     print('响应开始:')
     print(response.text)
     datas = json.loads(response.text)['List']
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data['PrdCode']
         item["pname"] = data['PrdName']
         item["prate"] = data['IncomeRateExt']
         # item["pperiod"] = str(data['LiveTime'])+'*'+data['UnitLiveTime']
         item["pperiod"] = str(data['LiveTime']) + '天'
         item["pfloor"] = data['PfirstAmt']
         yield item
Ejemplo n.º 10
0
 def parse(self, response):
     page = response.text
     reg = r'(永乐\d号[^<]*).*(\w\w\d\d\d\d).*(\d\.\d\d%).*起点金额(.*)[\S]上限'.decode(
         'utf-8')
     reg = re.compile(reg)
     finfos = reg.findall(page)
     for data in finfos:
         item = FundsInfoItem()
         item["pid"] = data[1]
         item["pname"] = data[0]
         item["prate"] = data[2]
         item["pperiod"] = u'未找到投资期限'
         item["pfloor"] = data[3]
         yield item
Ejemplo n.º 11
0
 def parse(self, response):
     begin = re.search('jsonpCallback', response.text).end()
     datas = json.loads(response.text[begin + 1:-1])['ProdList']
     for data in datas:
         if data["yieldRate"] == 0.0:
             # 试图从子页面抓取最新收益率
             data["yieldRate"] = self.get_ccb_detail_rate(data['code'])
         item = FundsInfoItem()
         item["pid"] = data['code']
         item["pname"] = data['name']
         item["prate"] = data['yieldRate']
         item["pperiod"] = data['investPeriod']
         item["pfloor"] = data['purFloorAmt']
         # item["pscale"] = data['instructionUrl']
         yield item
Ejemplo n.º 12
0
 def parse(self, response):
     reg = r'ft">(.*)<span>(.*)</span>[\s\S]{1,1000}value="(.*)"\sna[\s\S]{1,2900}font"[^<>]*>([^<>]*)</span>[\s\S]' \
           r'{1,2900}<td class="bot"><span class="font" >(.*)</span><span class="grey">(.*)</span></td>'
     reg = re.compile(reg)
     funds_info = reg.findall(response.text)
     print(funds_info)
     print(len(funds_info))
     for data in funds_info:
         item = FundsInfoItem()
         item["pid"] = data[1]
         item["pname"] = data[0]
         item["prate"] = data[2]
         item["pperiod"] = data[3]
         item["pfloor"] = data[4] + data[5]
         yield item
Ejemplo n.º 13
0
 def parse(self, response):
     datas = json.loads(response.text)['rows']
     # print(datas[1])
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data['finance_no']
         item["pname"] = data['finance_allname']
         item["prate"] = data['finance_anticipate_rate']
         # 利率格式归一化
         item["prate"] = item["prate"].replace('%', '').replace(
             '\r', '').replace('\n', '').replace('\t', '')
         if item["prate"][-1] != '%':
             item["prate"] = item["prate"] + '%'
         item["pperiod"] = data['finance_lmttime_info']
         item["pfloor"] = data['finance_indi_ipominamnt']
         yield item
Ejemplo n.º 14
0
 def parsehq(self, response):
     # print "打印response"
     datas = json.loads(response.body)["data"]["recommendAreas"]
     # print len(datas[0]["recommendProducts"])
     for data_list in datas:
         for data in data_list["recommendProducts"]:
             item = FundsInfoItem()
             item["pid"] = data["prdCode"]+","
             item["pname"] = data["recommendName"]
             item["prate"] = data["newIndexContent"]
             item["pperiod"] = data["recommendType"]
             item["pscale"] = data["redirectUrl"]
             if data["product"].has_key("finaSaleStatusInfo"):
                 item["pfloor"] = data["product"]["finaSaleStatusInfo"]["minAmount"]
             elif data["product"].has_key("fundSaleStatusInfo"):
                 item["pfloor"] = data["product"]["fundSaleStatusInfo"]["pfirstAmt"]
             yield item
Ejemplo n.º 15
0
 def parse(self, response):
     # return response
     # print('响应开始:')
     # print(response.text)
     respo = json.loads(response.text)
     datas = respo["returnData"]["list"]
     # f = open('test.txt', 'w')
     # f.write(str(datas))
     # f.close()
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data['PRD_CODE']
         item["pname"] = data['PRD_NAME']
         item["prate"] = data['NEXT_INCOME_RATE']
         item["pperiod"] = str(data['LIVE_TIME'])+u'天'
         item["pfloor"] = data['PFIRST_AMT']
         yield item
Ejemplo n.º 16
0
 def parse(self, response):
     # print "打印response"
     datas = response.xpath('//Table')
     # print len(datas)
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data.xpath('./ProductNo/text()').extract()[0]
         item["pname"] = data.xpath('./ProdName/text()').extract()[0]
         item["prate"] = data.xpath('./ProdProfit/text()').extract()[0]
         item["pperiod"] = data.xpath('./ProdLimit/text()').extract()[0]
         item["pfloor"] = data.xpath('./PurStarAmo/text()').extract()[0]
         productUrl = self.start_urls[1] + '/' + str(item["pid"]) + '.htm'
         # yield item
         yield scrapy.FormRequest(url=productUrl,
                                  method='GET',
                                  meta={"item": item},
                                  callback=self.parse_pdf)
Ejemplo n.º 17
0
    def parse(self, response):
        re_pid = re.compile(r'(2301\d*)</font>')
        re_pname = re.compile(r'<fontclass="autosho[^>]*>([^<]*)|<fontclass="xianjin[^>]*>([^<]*)')
        re_prate = re.compile(r'1-2">([^<]*)<|准-->([^<]*)<')
        re_pperiod = re.compile(r'日-->([^<]*)<|td>([0123456789天月年\-]+)<')
        re_pfloor = re.compile(r'aid[^>]*>([^<]*)|位-->([^<]*)|td>([^<]*万元)')
        res_clean = response.text.replace(' ', '').replace('\r', '').replace('\n', '').replace('\t', '')
        pids = re_pid.findall(res_clean)
        pnames = re_pname.findall(res_clean)
        prates = re_prate.findall(res_clean)
        pperiods = re_pperiod.findall(res_clean)
        pfloors = re_pfloor.findall(res_clean)
        datas = []
        for i in range(len(pids)):
            data = [pids[i], pnames[i][0], prates[i][0], pperiods[i][0], pfloors[i][0]]
            if data[1] == '':
                data[1] = pnames[i][1]
            if data[2] == '':
                data[2] = prates[i][1]
            if data[3] == '':
                data[3] = pperiods[i][1]
            if data[3] == '':
                data[3] = '无'
            if data[4] == '':
                data[4] = pfloors[i][1]
            if data[4] == '':
                data[4] = pfloors[i][2]
            datas.append(data)

        for data in datas:
            item = FundsInfoItem()
            item["pid"] = data[0]
            item["pname"] = data[1]
            item["prate"] = data[2]
            # 利率格式归一化
            item["prate"] = item["prate"].replace('%', '').replace('\r', '').replace('\n', '').replace('\t', '')
            try:
                if item["prate"][-1] != '%':
                    item["prate"] = item["prate"] + '%'
            except Exception as e:
                print('Error:{}'.format(e))
                print(item["prate"])
            item["pperiod"] = data[3]
            item["pfloor"] = data[4]
            yield item
Ejemplo n.º 18
0
 def parseacb(self, response):
     print("抓取安存宝")
     products = response.xpath('//tr[@class="acb_table"]')
     print(len(products))
     for pro in products:
         procon = pro.xpath('./td')
         endtime = procon[2].xpath('./span')[1].xpath(
             'string(.)').extract()[0]
         if self.now() < endtime:
             print(endtime, "未过期")
             item = FundsInfoItem()
             item["pname"] = procon[0].xpath('./a/@title').extract()[0]
             # item["pid"]
             item["prate"] = procon[5].xpath('string(.)').extract()[0]
             item["pperiod"] = procon[4].xpath('string(.)').extract()[0]
             item["pfloor"] = procon[3].xpath('string(.)').extract(
             )[0] + procon[1].xpath('string(.)').extract()[0]
             yield item
         else:
             print(endtime, "已过期")
Ejemplo n.º 19
0
    def parse_pdf(self, response):
        print(response.url)
        filename = 'pdf/' + response.url.split('/')[-1]
        f = open(filename, 'wb')
        f.write(response.body)
        f.close()

        pdf = pdfplumber.open(filename)

        p0 = pdf.pages[0]  #注意此处的pages是一个列表,索引是从0开始的

        table = p0.extract_table()

        item = FundsInfoItem()
        item["pid"] = "".join(table[2][1].split())
        item["pname"] = table[1][1]
        item["prate"] = table[11][1]
        item["pperiod"] = table[10][1]
        item["pfloor"] = "".join(table[5][1].split())
        item['pscale'] = "".join(table[4][1].split())
        yield item
Ejemplo n.º 20
0
Archivo: hxb.py Proyecto: iris93/crawl
    def parse(self, response):
        # print "打印response"
        datas = response.xpath('//ol/li[@name="pageli"]')
        # print len(datas)
        # print datas[0].xpath('./div/p/a/text()').extract()[0].encode("utf-8")
        # print datas[0].xpath('normalize-space(./div/div[@class="box_lf"]/p[@class="box_num"]/text())').extract()[0]
        # print datas[0].xpath('./div/ul/li/span[@class="amt"]/text()').extract()[0].encode("utf-8")+'万'
        # print datas[0].xpath('normalize-space(./div/ul/li/span[@class="highlight"]/text())').extract()[0].encode("utf-8")

        for data in datas:
            item = FundsInfoItem()
            # item["pid"] =
            item["pname"] = data.xpath('./div/p/a/text()').extract()[0]
            item["prate"] = data.xpath(
                'normalize-space(./div/div[@class="box_lf"]/p[@class="box_num"]/text())'
            ).extract()[0]
            item["pperiod"] = data.xpath(
                'normalize-space(./div/ul/li/span[@class="highlight"]/text())'
            ).extract()[0]
            item["pfloor"] = data.xpath(
                './div/ul/li/span[@class="amt"]/text()').extract()[0] + '0000'
            yield item
Ejemplo n.º 21
0
 def parse5(self, response):
     # print "打印response"
     # print len(response.xpath('//div[@class="lccp_main_content_tx"]/ul/li'))
     # datas = response.xpath('//div[@class="lccp_main_content_tx"]/ul/li')
     datas = response.xpath(
         '//div[@class="lccp_main_content_lb"]/table/tbody/tr')
     # print len(datas)
     for data in datas[1:]:
         item = FundsInfoItem()
         temp = data.xpath('./td')
         # print len(temp)
         item["pid"] = temp[0].xpath(
             './a/@data-analytics-click').extract()[0].split('-')[-1]
         item["pname"] = temp[0].xpath(
             'normalize-space(./a/text())').extract()[0]
         item["prate"] = temp[5].xpath(
             'normalize-space(./div/span/text())').extract()[0]
         item["pperiod"] = temp[4].xpath(
             'normalize-space(./text())').extract()[0]
         item["pfloor"] = temp[3].xpath(
             'normalize-space(./text())').extract()[0]
         yield item
Ejemplo n.º 22
0
 def parsein(self, response):
     # print "内部网页"
     indatas = response.xpath('//tr')
     item = FundsInfoItem()
     for data in indatas:
         if u'产品名称' in data.xpath('./th/text()').extract()[0]:
             # print "产品名称"
             item["pname"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
         if u'产品代码' in data.xpath('./th/text()').extract()[0]:
             # print "产品代码"
             item["pid"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
         if u'预计年化收益率' in data.xpath('./th/text()').extract()[0]:
             # print "预计年化收益率"
             item["prate"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
         if u'起点金额' in data.xpath('./th/text()').extract()[0]:
             # print "起点金额"
             item["pfloor"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
         if u'投资期限' in data.xpath('./th/text()').extract()[0]:
             # print "投资期限"
             item["pperiod"] = data.xpath(
                 'normalize-space(./td/text())').extract()[0]
     url = response.xpath('//ul[@class="title-ul"]/li/a')
     # print len(url)
     # yield item
     if len(url) < 1:
         item["pscale"] = "not found"
         yield item
     else:
         pdfUrl = url.xpath('@href').extract()[0]
         # print pdfUrl
         yield scrapy.FormRequest(url=pdfUrl,
                                  method='GET',
                                  meta={"item": item},
                                  callback=self.get_scale)
Ejemplo n.º 23
0
    def parse(self, response):
        # print "内部网页"
        # print response.body
        indatas = response.xpath('//tbody/tr')

        for data in indatas:

            item = FundsInfoItem()

            item["pname"] = data.xpath('./td')[1].xpath(
                './text()').extract()[0]

            item["pid"] = data.xpath('./td')[0].xpath('./text()').extract()[0]

            item["prate"] = data.xpath('./td')[3].xpath(
                './text()').extract()[0]

            item["pfloor"] = data.xpath('./td')[4].xpath(
                './text()').extract()[0]

            item["pperiod"] = data.xpath('./td')[2].xpath(
                './text()').extract()[0]
            yield item
Ejemplo n.º 24
0
 def parse(self, response):
     begin = re.search('jsonpCallback', response.text).end()
     datas = json.loads(response.text[begin + 1:-1])['ProdList']
     for data in datas:
         item = FundsInfoItem()
         item["pid"] = data['code']
         item["pname"] = data['name']
         item["prate"] = data['yieldRate']
         item["pperiod"] = data['investPeriod']
         item["pfloor"] = data['purFloorAmt']
         # item["pscale"] = data['instructionUrl']
         if item["prate"] == 0.0:
             item["prate"] = self.get_ccb_detail_rate(item['pid'])
         if item["prate"] == 'html中无法获取到收益率':
             # 试图从子页面抓取最新收益率
             url = r'http://finance.ccb.com/cc_webtran/queryFinanceProdDetail.gsp?'
             headers = {
                 'User-Agent':
                 r'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0',
                 'Referer':
                 r'http://finance.ccb.com/cn/finance/product.html',
                 'Connection': 'keep-alive'
             }
             data = {
                 'jsoncallback':
                 'jQuery191036942510719116894_1533864732025',
                 'params.code': item["pid"]
             }
             yield scrapy.FormRequest(url=url,
                                      method='POST',
                                      headers=headers,
                                      formdata=data,
                                      meta={"item": item},
                                      callback=self.find_pdf)
         else:
             yield item
Ejemplo n.º 25
0
    def parse(self, response):
        # print "内部网页"
        # print response.body
        indatas = response.xpath('//tr[@class="bg2"]')
        # print len(indatas)
        # item = FundsInfoItem()
        # item["pid"] = "test"
        for data in indatas:
            item = FundsInfoItem()
            item["pname"] = data.xpath(
                'normalize-space(./td[@class="name"]/a/text())').extract()[0]

            item["pid"] = data.xpath('./td[@class="name"]/a/@href').extract(
            )[0].split('productno=')[-1]

            item["prate"] = data.xpath('./td')[4].xpath(
                './b/text()').extract()[0]

            item["pfloor"] = data.xpath('./td')[3].xpath(
                './text()').extract()[0]

            item["pperiod"] = data.xpath('./td')[2].xpath(
                'normalize-space(./text())').extract()[0]
            yield item
Ejemplo n.º 26
0
    def parse(self, response):
        # print "内部网页"
        # print response.body
        tables = response.xpath('//table')
        # print len(indatas)
        for table in tables:
            floor = 5
            rate = 6
            period = 4
            table_head = table.xpath('./thead')
            if len(table_head)>0:
                for data in table.xpath('./tbody/tr'):
                    item = FundsInfoItem()

                    item["pname"] = data.xpath('./td')[0].xpath('string(.)').extract()[0]

                    item["pid"] = data.xpath('./td')[-1].xpath('./img/@src').extract()[0].split('lccp')[-1].split('.')[0]

                    item["prate"] = data.xpath('./td')[rate].xpath('./text()').extract()[0]
                
                    item["pfloor"] = data.xpath('./td')[floor].xpath('./text()').extract()[0]
                
                    item["pperiod"] = data.xpath('./td')[period].xpath('./text()').extract()[0]

                    yield item
            else:
                table_title = table.xpath('./tbody/tr')[0].xpath('./td')
                for i in range(len(table_title)):
                    title = table_title[i].xpath('string(.)').extract()[0]
                    # print title
                    if u'起购' in title:
                        floor = i
                    elif u'客户年化' in title or u'比较基准' in title or u'客户参考浮动年化净收益率' in title:
                        rate = i
                    elif u'天' in title:
                        period = i
                for data in table.xpath('./tbody/tr')[1:]:
                    item = FundsInfoItem()

                    item["pname"] = data.xpath('./td')[0].xpath('string(.)').extract()[0]

                    item["pid"] = data.xpath('./td')[-1].xpath('./img/@src').extract()[0].split('lccp')[-1].split('.')[0]

                    item["prate"] = data.xpath('./td')[rate].xpath('./text()').extract()[0]
                    if item['prate'] == '以我行网站刊登的参考收益率公告为准':
                        try:
                            html_id = '201' + str(item["pid"][-4:-1])
                            if html_id == '201201':
                                html_id = '201203'
                            inner_url_1 = 'http://wealth.cib.com.cn/retail/duration/cash/referNetValue/' + html_id + '/' + \
                                          html_id + '.html'
                            res = urllib.request.urlopen(inner_url_1)
                            inner_html_1 = res.read().decode('utf-8')
                            re_1 = '(/retail/duration/cash/referNetValue/' + html_id + '/'\
                                   + html_id + '_[\d]*.html)'
                            inner_url_2 = re.search(re_1, inner_html_1).group()
                            inner_url_2 = 'http://wealth.cib.com.cn' + inner_url_2
                            res_2 = urllib.request.urlopen(inner_url_2)
                            inner_html_2 = res_2.read().decode('utf-8')
                            re_2 = '<td>([\d\.]*%)</td>'
                            final = re.search(re_2, inner_html_2).group(1)
                            item['prate'] = final
                        except Exception as e:
                            print('errorinfo:{}'.format(e))
                            item['prate'] = '未能在子页面获取到收益'
                    item["pfloor"] = data.xpath('./td')[floor].xpath('./text()').extract()[0]
                    item["pperiod"] = data.xpath('./td')[period].xpath('./text()').extract()[0]
                    for sub_item_key in item.keys():
                        item[sub_item_key] = str(item[sub_item_key]).replace('\r', '').replace('\n', '')\
                            .replace('\t', '').strip()
                        print(item[sub_item_key])
                    yield item