Exemple #1
0
    def parse_item(self, response):
        fund_name = response.meta['ext']
        rows = response.css('table#table1 tr')[1:]
        for r in rows:
            td = r.css('::text').extract()
            row = [_.strip() for _ in td if _.strip()]
            date = row[0]
            nav = row[1]
            add_nav = row[2]

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['fund_name'] = fund_name
            item['channel'] = self.channel
            item['statistic_date'] = datetime.strptime(date, '%Y-%m-%d')
            item['nav'] = float(nav) if nav else None
            item['added_nav'] = float(add_nav) if add_nav else None

            yield item
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = response.xpath('//table/tr')
        type = response.xpath('//table/tr[1]/th[2]/text()').extract()
        for row in rows[1:]:
            statistic_date = row.xpath('./td[1]/text()').extract_first()
            if statistic_date is None or statistic_date == '':
                continue
            nav = row.xpath('./td[2]/text()').extract_first()
            added_nav = row.xpath('./td[3]/text()').extract_first()

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = datetime.strptime(statistic_date, '%Y%m%d')
            if '份额净值' in type:
                item['nav'] = float(nav) if nav is not None else None
                item['added_nav'] = float(added_nav) if added_nav is not None else None
            elif '万份收益' in type:
                item['income_value_per_ten_thousand'] = float(nav) if nav is not None and nav != '--' else None
                item['annualized_return'] = float(added_nav.strip('%'))/100.0 if added_nav is not None and added_nav != '--' else None
            yield item

        if len(rows) > int(response.meta['form']['numPerPage']):
            code = response.meta['ext']['fund_code']
            pg = response.meta['form']['curPage']
            pg = int(pg)+1
            self.ips.append({
                'url': 'https://ziguan.xcsc.com/servlet/asset/AssetManage',
                'ref': response.url,
                'form': {
                    'function': 'loadFundJz',
                    'fundcode': code,
                    'curPage': str(pg),
                    'numPerPage': '10',
                    'reqUrl': '/servlet/asset/AssetManage?function=loadFundJz&fundcode=' + code,
                    '_': '1526452208722'
                },
                'ext': {'fund_name': fund_name, 'fund_code': code}
            })
Exemple #3
0
 def parse_item(self, response):
     fund_name = response.xpath("//div[@class='col-md-9']/h4//text()").extract_first()
     rows = response.xpath("//div[@id='value']//div")
     for row in rows:
         statistic_date = row.xpath("./div[@class='col-lg-4'][1]//text()").extract_first()
         nav = row.xpath("./div[@class='col-lg-4'][2]//text()").extract_first()
         added_nav = row.xpath("./div[@class='col-lg-4'][3]//text()").extract_first()
         if statistic_date:
             statistic_date = statistic_date.strip()
             nav = nav.strip()
             added_nav = added_nav.strip()
             item = GGFundNavItem()
             item['sitename'] = self.sitename
             item['channel'] = self.channel
             item['url'] = response.url
             item['fund_name'] = fund_name
             item['nav'] = float(nav) if nav is not None else None
             item['added_nav'] = float(added_nav) if added_nav is not None else None
             item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d')
             yield item
Exemple #4
0
    def parse_item(self, response):
        rows = response.css('div.product_con:nth-child(2) tr')[1:]
        for r in rows:
            dt = r.css('td:nth-child(1) ::text').extract()
            nav = r.css('td:nth-child(2) ::text').extract_first()
            add_nav = r.css('td:nth-child(3) ::text').extract_first()

            date = ''.join(dt)

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['fund_name'] = response.meta['ext']
            item['channel'] = self.channel
            item['url'] = response.url
            item['nav'] = float(nav) if nav else None
            item['added_nav'] = float(add_nav) if add_nav else None
            item['statistic_date'] = datetime.strptime(
                date, '%Y%m%d') if date else None

            yield item
 def parse_item(self, response):
     funds = json.loads(response.text)[2]['data']
     f_list = scrapy.Selector(text=funds).xpath('//table//tr')
     for i in f_list[1:]:
         t = i.xpath('td//text()').extract()
         fund_name = t[0]
         nav = t[1]
         added_nav = t[2]
         statistic_date = t[3]
         item = GGFundNavItem()
         item['sitename'] = self.sitename
         item['channel'] = self.channel
         item['url'] = response.url
         item['fund_name'] = fund_name
         item['statistic_date'] = datetime.strptime(statistic_date,
                                                    '%Y-%m-%d')
         item['nav'] = float(nav) if nav is not None else None
         item['added_nav'] = float(
             added_nav) if added_nav is not None else None
         yield item
    def parse_item(self, response):
        if response.status == 200:
            rows = response.xpath(
                '//div[contains(@class,"product_tab_sub")][3]//tr')
            fund_name = response.meta['ext'].replace(':', '')
            for r in rows[1:]:
                date = r.css('td:nth-child(1)::text').re_first('\S+')
                add_nav = r.css('td:nth-child(2)::text').re_first('\S+')

                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['fund_name'] = fund_name
                item['channel'] = self.channel
                item['url'] = response.url
                item['added_nav'] = float(add_nav.replace(
                    ' ', '')) if add_nav else None
                item['statistic_date'] = datetime.strptime(
                    date, '%Y/%m/%d') if date else None

                yield item
    def parse_item(self, response):
        fund_name = response.xpath(
            '//div[@id="tagContent2"]/table/tbody//tr[2]//p//font//text()').extract_first().strip()
        rows = response.xpath('//div[@class="tagContent"]//table//tbody//tr')
        for row in rows[6:]:
            i = row.xpath("td//font//text()").extract()
            i2 = [_ for _ in i if _.strip()]
            if len(i2) > 3:
                nav = i2[0].replace(',', '').replace(' ', '')
                statistic_date = i2[3].replace(',', '')

                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name.replace('净值披露表', '')
                item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d')
                item['nav'] = float(nav) if nav is not None else None

                yield item
Exemple #8
0
    def parse_item(self, response):
        fund_name = response.meta['ext']
        fund_list = response.xpath(
            '//div[@name ="data"]/div[@class = "nr_con"][1]//table[@class = "tab tab_01"]//tr'
        )
        for i in fund_list:
            t = i.xpath('td//text()').extract()
            statistic_date = ''.join(t[0].split())
            nav = ''.join(t[1].split())

            # 网站复权累计净值不抓取
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            item['nav'] = float(nav)
            yield item
Exemple #9
0
    def parse_item(self, response):
        fund_info = response.text
        wz1 = fund_info.find('{"result')
        fund_info1 = fund_info[wz1:len(fund_info) - 1]
        fund_info2 = json.loads(fund_info1)
        productname_bf = fund_info2['result']['name']
        for k, v in enumerate(fund_info2['result']['date']):
            statistic_date = v
            nav = fund_info2['result']['value'][k]
            statistic_date = statistic_date.replace('/', '-')

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = productname_bf
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            item['nav'] = float(nav) if nav else None
            yield item
Exemple #10
0
    def parse_item(self, response):
        res_text = re.sub('[\s,]', '', response.text)
        target_str = re.findall('单位净值报告(.*)深圳市金中和投资管理有限公司', res_text,
                                re.DOTALL)
        if target_str:
            rows = target_str[0].split('</TR>')
            for tr in rows:
                date = re.findall('\d{4}\.\d{2}\.\d{2}', tr, re.DOTALL)
                nav = re.findall('[^\d\.](\d{2,3}\.\d{2})[^%]', tr, re.DOTALL)
                if date:
                    item = GGFundNavItem()
                    item['sitename'] = self.sitename
                    item['fund_name'] = response.meta['ext']
                    item['channel'] = self.channel
                    item['url'] = response.url
                    item['nav'] = float(nav[0].strip()) if nav else None
                    item['statistic_date'] = datetime.strptime(
                        date[0].strip(), '%Y.%m.%d') if date else None

                    yield item
Exemple #11
0
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = response.xpath('//table[@id="dateTR"]/tbody/tr')
        for row in rows:
            nav = row.xpath('./td[2]/text()').extract_first()
            added_nav = row.xpath('./td[3]/text()').extract_first()
            statistic_date = row.xpath('./td[1]/text()').extract_first()

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['nav'] = float(nav) if nav is not None else None
            item['added_nav'] = float(
                added_nav) if added_nav is not None else None
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')

            yield item
Exemple #12
0
    def parse_item(self, response):
        fund_name = response.xpath(
            '//div[@class="pro_title"]/h1/text()').extract_first()
        nav_rows = response.xpath('//div[@class="pj_table"]//tr')
        for row in nav_rows[1:]:
            nav_info = row.xpath('td/text()').extract()
            statistic_date = nav_info[0].replace('/', '-')
            nav = nav_info[1]

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name.replace('(已结束)',
                                                  '').replace('(已成立)', '')
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            item['nav'] = float(nav) if nav is not None else None

            yield item
Exemple #13
0
    def parse_item(self, response):
        rows = response.xpath('//tbody/tr')[1:] if response.xpath(
            '//tbody/tr[1]/td[1]/text()').extract_first().strip(
            ) == '产品名称' else response.xpath(
                '//table/tbody/tr/td[1]/table/tbody/tr')[1:]
        for row in rows:
            fund_name = row.xpath('./td[1]/text()').extract_first().strip()
            statistic_date = row.xpath('./td[2]/text()').re_first(
                '\d+/\d+/\d+')
            nav = row.xpath('./td[3]/text()').re_first('[0-9.]+')

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['nav'] = float(nav)
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y/%m/%d')
            yield item
Exemple #14
0
    def parse_item(self, response):
        rows = json.loads(response.text)
        fund_name = response.meta['ext']['fund_name']

        for row in rows:
            stamp = row[0]
            nav = row[1]
            date = time.strftime("%Y-%m-%d", time.localtime(int(stamp) / 1000))

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['nav'] = float(nav) / 100
            item['added_nav'] = None
            item['statistic_date'] = datetime.strptime(date, '%Y-%m-%d')
            yield item

        yield self.request_next()
Exemple #15
0
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = json.loads(response.text)['results']
        for row in rows:
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            statistic_date = row['create_time']
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y%m%d')

            nav = row['relate_price']
            item['nav'] = float(nav) if nav is not None else None

            added_nav = row['cumulative_net']
            item['added_nav'] = float(
                added_nav) if added_nav is not None else None
            yield item
    def parse_item(self, response):
        ext = response.meta['ext']
        fund_name = ext['fund_name']
        page = int(ext['page'])
        base_url = ext['url']
        rows = response.xpath('//td[@bgcolor="#ffffff"]')
        if rows and len(rows) > 1:
            rows = rows[1]
            rows = rows.xpath('.//tr')
            head_row = rows[0].xpath('./td/text()').re('\S+')
            if rows and len(rows) > 1:
                for row in rows[1:]:
                    item = GGFundNavItem()
                    item['sitename'] = self.sitename
                    item['fund_name'] = fund_name
                    item['channel'] = self.channel
                    item['url'] = response.url

                    statistic_date = row.xpath('./td[1]/text()').extract_first()
                    item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') if statistic_date else None

                    if head_row.count('单位净值') > 0:
                        nav = row.xpath('./td[2]/text()').extract_first()
                        item['nav'] = float(nav) if nav else None
                    if head_row.count('累计净值') > 0:
                        added_nav = row.xpath('./td[3]/text()').extract_first()
                        item['added_nav'] = float(added_nav) if added_nav else None
                    if head_row.count('每万份收益') > 0:
                        income_value_per_ten_thousand = row.xpath('./td[2]/text()').extract_first()
                        item['income_value_per_ten_thousand'] = float(
                            income_value_per_ten_thousand) if income_value_per_ten_thousand else None
                    if head_row.count('七日年化收益率') > 0:
                        d7_annualized_return = row.xpath('./td[3]/text()').re_first('(\d+\.?\d*)')
                        item['d7_annualized_return'] = float(d7_annualized_return) if d7_annualized_return else None
                    yield item
                url = base_url + '&pagesize=10&currentPage=' + str(page + 1)
                self.ips.append({
                    'url': url,
                    'ref': response.url,
                    'ext': {'page': str(page + 1), 'url': base_url, 'fund_name': fund_name}
                })
    def parse_item(self, response):
        info_json = json.loads(response.text)
        datas = info_json['results'][0]['data']
        for data in datas:
            fund_name = data['pro_name']
            nav = data['nav']
            add_nav = data['accumulativenav']
            statistic_date = data['tradedate']
            statistic_date = datetime.strptime(statistic_date, '%Y-%m-%d')

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = statistic_date
            item['nav'] = float(nav) if nav is not None else None
            item['added_nav'] = float(add_nav) if add_nav is not None else None

            yield item

        # 总页数
        tp = info_json['results'][0]['totalPages']
        # 当前页
        cp = info_json['results'][0]['currentPage']
        pro_code = response.meta['ext']['pro_code']
        if int(cp) < int(tp):
            cp = cp + 1
            self.ips.append({
                'url': 'http://www.iztzq.com/servlet/json',
                'ref': response.url,
                'form': {
                    'funcNo': '834013',
                    'code': pro_code,
                    'page': str(cp),
                    'numPerPage': '10'
                },
                'ext': {
                    'pro_code': pro_code
                }
            })
    def parse_item(self, response):
        fund_name = response.xpath(
            '//div[@class="main"]/div[2]/div[1]/p/text()').extract_first()
        nav_list = response.xpath('//div[@id="cat_2"]//table//tr')
        for row in nav_list[2:]:
            row_info = row.xpath('td//text()').extract()
            if len(row_info) >= 3:
                statistic_date = row_info[0]
                nav = row_info[2]

                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name.replace('【期限已到】',
                                                      '').replace('【分红】', '')
                item['statistic_date'] = datetime.strptime(
                    statistic_date, '%Y%m%d')
                item['nav'] = float(nav) if nav is not None else None

                yield item
Exemple #19
0
    def parse_item(self, response):
        fund_name = response.meta['ext']['fund_name']
        rows = response.css('table tr')
        for row in rows:
            td_text = row.css('td::text').extract_first()
            if '日期' in td_text:
                continue
            statistic_date = row.css('td::text').extract_first()
            nav = row.css('td span::text').extract_first()

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d')
            item['nav'] = float(nav)
            item['added_nav'] = None
            yield item

        yield self.request_next()
Exemple #20
0
    def parse_item(self, response):
        nvList = json.loads(response.text)['data']
        for eachNv in nvList:
            item = GGFundNavItem()

            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url

            ext = response.meta['ext']
            item['fund_name'] = ext['fund_name']
            init_date = eachNv['cdate']
            item['statistic_date'] = datetime.strptime(init_date, '%Y-%m-%d')
            nav = eachNv['per_netvalue']
            item['nav'] = float(nav) if nav is not None else None
            added_nav = eachNv['total_netvalue']
            item['added_nav'] = float(
                added_nav) if added_nav is not None else None
            yield item

        yield self.request_next()
Exemple #21
0
    def parse_item(self, response):
        fund_name = response.xpath(
            '//*[@id="show1"]/table//tr[1]/th[2]/text()').extract_first()
        rows = response.xpath('//*[@id="show2"]/div[2]/table//tr')
        for row in rows[1:]:
            fund_date = row.xpath('./td[2]/text()').extract_first().strip()
            nav = row.xpath('./td[3]/text()').extract_first().strip()
            added_nav = row.xpath('./td[4]/text()').extract_first().strip()
            item = GGFundNavItem()

            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = datetime.strptime(fund_date, '%Y年%m月%d日')
            item['nav'] = float(nav) if nav is not None else None
            if added_nav is not None and added_nav.count(
                    '%') == 0 and added_nav != '-':
                item['added_nav'] = float(
                    added_nav) if nav is not None else None
            yield item
 def parse_item(self, response):
     fund_names = re.findall('"name" : "(.*?)",', response.text)
     navs = re.findall('"unit" : "(.*?)",', response.text)
     added_navs = re.findall('"accumulation" : "(.*?)",', response.text)
     statistic_dates = re.findall('"date" : "(.*?)"', response.text)
     for row in zip(fund_names, statistic_dates, navs, added_navs):
         fund_name = row[0]
         statistic_date = row[1]
         nav = row[2]
         added_nav = row[3]
         item = GGFundNavItem()
         item['sitename'] = self.sitename
         item['channel'] = self.channel
         item['url'] = response.url
         item['fund_name'] = fund_name
         item['nav'] = float(nav) if nav is not None else None
         item['added_nav'] = float(
             added_nav) if added_nav is not None else None
         item['statistic_date'] = datetime.strptime(statistic_date,
                                                    '%Y/%m/%d')
         yield item
Exemple #23
0
    def parse_item(self, response):
        fps = response.meta['fps']
        ips = response.meta['ips']
        rows = response.xpath('//div[@id="history"]/table/tr')[2:]
        for row in rows:
            fund_name = row.xpath(r'./td[1]/text()').extract_first()
            nav = row.xpath(r'./td[2]/text()').extract_first()
            added_nav = row.xpath(r'./td[3]/text()').extract_first()
            statistic_date = row.xpath("./td[4]/text()").extract_first()

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['nav'] = float(nav)
            item['added_nav'] = float(added_nav)
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            yield item
        yield self.request_next(fps, ips)
Exemple #24
0
    def parse_item(self, response):
        sitename = response.meta['ext']['sitename']
        fund_name = response.meta['ext']['fund_name']
        rows = response.xpath('//table[@class="table2"]/tr')[1:]
        for row in rows:
            item = GGFundNavItem()
            item['sitename'] = sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name

            statistic_date = row.xpath('./td[1]').re_first('\d+-\d+-\d+')
            if statistic_date is None:
                continue
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')

            nav = row.xpath('./td[2]').re_first('>\s*([0-9.]+)\s*<')
            item['nav'] = float(nav) if nav is not None else None

            yield item
Exemple #25
0
    def parse_item(self, response):
        rows = response.xpath("//div[@class='ff']//tr")
        fund_name = response.meta['ext']['fund_name']

        for row in rows:
            statistic_dates = row.xpath("./td[1]//text()").extract()
            statistic_date = ''.join(statistic_dates).replace('/', '-')
            nav = row.xpath("./td[2]//text()").extract_first()
            added_nav = row.xpath("./td[3]//text()").extract_first()
            if '20' in statistic_date:
                item = GGFundNavItem()
                item['sitename'] = self.sitename
                item['channel'] = self.channel
                item['url'] = response.url
                item['fund_name'] = fund_name
                item['nav'] = float(nav) if nav is not None else None
                item['added_nav'] = float(
                    added_nav) if nav is not None else None
                item['statistic_date'] = datetime.strptime(
                    statistic_date, '%Y-%m-%d')
                yield item
Exemple #26
0
    def parse_item(self, response):
        rows = json.loads(response.text)['result']['value']
        fund_name = json.loads(response.text)['fundName']
        for row in rows:
            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = 'http://www.reganfund.com/product.html'
            item['fund_name'] = fund_name

            statistic_date = row['tradingDate']
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')

            nav = row['nav']
            item['nav'] = float(nav) if nav is not None else None

            added_nav = row['nav']
            item['added_nav'] = float(
                added_nav) if added_nav is not None else None
            yield item
Exemple #27
0
 def parse_item(self, response):
     fund_name = response.xpath('//h2//text()').extract_first()
     navs = re.findall('var data1= \[(.*?)\];',
                       response.text)[0].replace('"', '').split(',')
     added_navs = re.findall(r'var data2= \[(.*?)\];',
                             response.text)[0].replace('"', '').split(',')
     statistic_dates = re.findall(r'var data3= \[(.*?)\];',
                                  response.text)[0].replace('"',
                                                            '').split(',')
     for nav, added_nav, statistic_date in zip(navs, added_navs,
                                               statistic_dates):
         item = GGFundNavItem()
         item['sitename'] = self.sitename
         item['channel'] = self.channel
         item['url'] = response.url
         item['fund_name'] = fund_name
         item['nav'] = float(nav) if nav is not None else None
         item['added_nav'] = float(added_nav) if nav is not None else None
         item['statistic_date'] = datetime.strptime(statistic_date,
                                                    '%Y-%m-%d')
         yield item
Exemple #28
0
    def parse_item(self, response):
        fund_name = response.meta['ext']
        nav_rows = response.xpath('//tr[@class="dd"]')

        for row in nav_rows:
            nav_info = row.xpath('td/text()').extract()
            statistic_date = nav_info[1].replace(
                '25016-6-3', '2016-6-3').replace('.', '-').strip()
            nav = nav_info[2].strip()
            added_nav = nav_info[3].replace('..', '.').strip()

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['statistic_date'] = datetime.strptime(
                statistic_date, '%Y-%m-%d') if statistic_date else None
            item['nav'] = float(nav) if nav else None
            item['added_nav'] = float(added_nav) if added_nav else None
            yield item
Exemple #29
0
    def parse_item(self, response):
        rows = response.xpath('//table[@class="tab_01"]/tr')[1:]
        for row in rows:
            fund_name = row.xpath('./td[2]/text()').extract_first()
            statistic_date = row.xpath('./td[3]/text()').re_first(
                '\d+-\d+-\d+')
            nav = row.xpath('./td[4]/text()').re_first('[0-9.]+')
            added_nav = row.xpath('./td[5]/text()').re_first('[0-9.]+')

            item = GGFundNavItem()
            item['sitename'] = self.sitename
            item['channel'] = self.channel
            item['url'] = response.url
            item['fund_name'] = fund_name
            item['nav'] = float(nav) if nav is not None else None
            item['added_nav'] = float(
                added_nav) if added_nav is not None else None
            item['statistic_date'] = datetime.strptime(statistic_date,
                                                       '%Y-%m-%d')
            yield item

        t_count = response.xpath(
            '//div[@class="pages_ul_page33"]/ul/li[1]/text()').re_first(
                '共有([\d]+)条记录')
        tp = int(t_count) / 10 if int(
            t_count) % 10 == 0 else int(t_count) // 10 + 1
        pg = response.meta['pg'] + 1
        if pg <= tp:
            class_id = response.meta['form']['classid']
            self.ips.append({
                'url': 'http://www.ydsc.com.cn/ydzq/zcgl/cpjhjzList.jsp',
                'form': {
                    'classid': class_id,
                    'pageIndex': str(pg),
                    'pageSize': '10',
                    'hrefURL': '',
                    'filter': ''
                },
                'pg': pg
            })
 def parse_item(self, response):
     fund_code = response.meta['ext']['fund_code']
     fund_name = response.meta['ext']['fund_name']
     is_currency_type = response.meta['ext']['is_currency_type']
     tpg = response.xpath('//*[@class="dtitle_t"]/table/tr/td/text()[1]').re_first('共\s*(\d+)\s*页')
     page = response.meta['ext']['page']
     table = response.xpath('//*[@id="dataTable"]')
     rows = table.xpath(".//tr")
     for row in rows[1:]:
         item = GGFundNavItem()
         item['sitename'] = self.sitename
         item['channel'] = self.channel
         item['url'] = response.url
         item['fund_name'] = fund_name
         statistic_date = row.xpath('./td[2]/text()').extract_first().strip()
         item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d')
         if is_currency_type == '0':  # 常规产品
             nav = row.xpath('./td[3]/text()').extract_first().strip()
             added_nav = row.xpath('./td[4]/text()').extract_first().strip()
             item['nav'] = float(nav) if nav is not None else None
             item['added_nav'] = float(added_nav) if added_nav is not None else None
         if is_currency_type == '1':  # 货币型产品
             income_value_per_ten_thousand = row.xpath('./td[3]/text()').extract_first().strip()
             d7_annualized_return = row.xpath('./td[4]/text()').extract_first().strip().replace('%', '')
             item['income_value_per_ten_thousand'] = float(
                 income_value_per_ten_thousand) if income_value_per_ten_thousand is not None else None
             item['d7_annualized_return'] = float(d7_annualized_return) if d7_annualized_return is not None else None
         # self.log(item)
         yield item
     if tpg is not None:
         page = int(page)
         if page < int(tpg):
             self.ips.append({
                 'url': 'http://www.gtsfund.com.cn/chart-web/chart/fundnettable?pages=' + str(
                     page + 1) + '-15&fundcode=' + fund_code + '&from=&to=',
                 'ref': response.url,
                 'ext': {'fund_code': fund_code, 'fund_name': fund_name, 'page': str(page + 1),
                         'is_currency_type': is_currency_type}
             })