def parse_item(self, response): rows = response.css('div.detail_cn tr')[1:] for r in rows: row_str = ''.join(r.css('td ::text').re('\S+')) if '产品' in row_str or '净值' in row_str: continue if 'a5905a55-18b9-4676-8e47-c399e357ef45' in response.url: fund_name = response.css('span#msgTitle::text').re_first( '(.*)产品净值') date = ''.join(r.css('td:nth-child(1) ::text').re('\S+')) nav_a = ''.join(r.css('td:nth-child(2) ::text').re('\S+')) add_nav_a = ''.join(r.css('td:nth-child(4) ::text').re('\S+')) item = GGFundNavItem() item['sitename'] = self.sitename item['fund_name'] = fund_name item['channel'] = self.channel item['url'] = response.url item['nav'] = float(nav_a.replace('.', '.')) if nav_a else None item['added_nav'] = float(add_nav_a.replace( ' ', '')) if add_nav_a else None item['statistic_date'] = datetime.strptime( date, '%Y-%m-%d') if date else None yield item nav_b = ''.join(r.css('td:nth-child(3) ::text').re('\S+')) add_nav_b = ''.join(r.css('td:nth-child(5) ::text').re('\S+')) item = GGFundNavItem() item['sitename'] = self.sitename item['fund_name'] = fund_name + '次级' item['channel'] = self.channel item['url'] = response.url item['nav'] = float(nav_b.replace('.', '.')) if nav_b else None item['added_nav'] = float(add_nav_b.replace( ' ', '')) if add_nav_b else None item['statistic_date'] = datetime.strptime( date, '%Y-%m-%d') if date else None yield item else: date = ''.join(r.css('td:nth-child(1) ::text').re('\S+')) fund_name = ''.join(r.css('td:nth-child(3) ::text').re('\S+')) nav = ''.join(r.css('td:nth-child(4) ::text').re('\S+')) add_nav = ''.join(r.css('td:nth-child(5) ::text').re('\S+')) if '和而泰员工持股计划' == fund_name: fund_code = ''.join( r.css('td:nth-child(2) ::text').re('\S+')) fund_name = fund_name + fund_code item = GGFundNavItem() item['sitename'] = self.sitename item['fund_name'] = fund_name item['channel'] = self.channel item['url'] = response.url item['nav'] = float(nav.replace('.', '.')) if nav else None item['added_nav'] = float(add_nav.replace( ' ', '')) if add_nav else None item['statistic_date'] = datetime.strptime( date, '%Y-%m-%d') if date else None yield item
def parse_item(self, response): rows = response.xpath('//div[@class="w100"]/table/tbody/tr') fund_name = response.meta['ext']['fund_name'] for row in rows[1:]: statistic_date = row.xpath('normalize-space(./td[1]/text())').extract_first() statistic_date = datetime.strptime(statistic_date, '%Y-%m-%d') nav = row.xpath('normalize-space(./td[2]/text())').extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['statistic_date'] = statistic_date item['nav'] = float(nav) if nav is not None else None yield item dates = re.search('categories:\s*\[([^\]]+)\]\s*\},', response.text).group(1) dates = re.findall('\d+-\d+-\d+', dates) added_navs = re.search("name:\s*'累计净值',\s*data:\s*\[([^\]]+)\]", response.text).group(1) added_navs = re.findall('[0-9.]+', added_navs) for date, added_nav in zip(dates, added_navs): item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name statistic_date = date item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') added_nav = added_nav item['added_nav'] = float(added_nav) if added_nav is not None else None yield item
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] added_nav = response.meta['ext']['added_nav'] rows = response.xpath('//div[@class="jzgbShow"]/table//tr') for row in rows[1:]: statistic_date = row.xpath('.//td[1]//text()').extract_first() nav = row.xpath('.//td[2]//text()').extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) item['added_nav'] = float(added_nav) if added_nav is not None else None item['statistic_date'] = datetime.strptime(statistic_date, '%Y%m%d') added_nav = None yield item end_page = re.findall('"last_page":(.*?),', response.text)[0] pg = response.meta['pg'] old_str = '/page/' + str(pg) if pg < int(end_page): new_str = '/page/' + str(pg + 1) next_url = response.url.replace(old_str, new_str) self.ips.append({ 'url': next_url, 'ref': response.url, 'pg': pg + 1, 'ext': {'fund_name': fund_name, 'added_nav': added_nav} })
def parse_item(self, response): fund_name = response.xpath( '//table[@class="product_table ke-zeroborder"]//tr[1]/td[2]/text()').extract_first().strip() nav_rows = response.xpath('//div[@class="shop_div"]/table[@class="ke-zeroborder"]//tr') for nav_row in nav_rows[1:]: nav_td = nav_row.xpath('td//text()').extract() if nav_td: if nav_td[1].strip() != '日期': if nav_td[1].strip() == '2017/12/01': statistic_date = nav_td[1].strip() nav = 1.0544 else: statistic_date = nav_td[1].strip() nav = nav_td[2].strip() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['statistic_date'] = datetime.strptime(statistic_date, '%Y/%m/%d') item['nav'] = float(nav) if nav else None yield item
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] type = response.meta['ext']['type'] rows = re.search('netList:\[(.*?)\]', response.text).group(1) rows = rows.replace('\\', '') navs = re.findall('valueStr2:\'([0-9.]+)\'', rows) added_navs = re.findall('valueStr1:\'([0-9.]+)\'', rows) dates = re.findall('\d+-\d+-\d+', rows) for nav, added_nav, date in zip(navs, added_navs, dates): item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name statistic_date = date item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') if type == '0': nav = nav item['nav'] = float(nav) if nav is not None else None annualized_return = added_nav item['added_nav'] = float(annualized_return) if annualized_return is not None else None else: nav = nav item['nav'] = float(nav) if nav is not None else None added_nav = added_nav item['added_nav'] = float(added_nav) if added_nav is not None else None yield item
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] rows = response.xpath('//table/tr')[1:] for row in rows: item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name statistic_date = row.xpath('./td[3]//text()').re_first('\d+-\d+-\d+') if statistic_date is None: continue item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') if '七日年化收益率' in response.text: income_value_per_ten_thousand = row.xpath('./td[1]').re_first('>\s*([0-9.]+)\s*<') item['income_value_per_ten_thousand'] = float(income_value_per_ten_thousand)if income_value_per_ten_thousand else None d7_annualized_return = row.xpath('./td[2]').re_first('>\s*([0-9.]+)\s*<') item['d7_annualized_return'] = float(d7_annualized_return)if d7_annualized_return else None else: nav = row.xpath('./td[1]').re_first('>\s*([0-9.]+)\s*<') item['nav'] = float(nav) if nav is not None else None added_nav = row.xpath('./td[2]').re_first('>\s*([0-9.]+)\s*<') item['added_nav'] = float(added_nav) if added_nav is not None else None yield item
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] rows = response.xpath( "//div[@class='col-lg-12 col-md-12 col-sm-12']/table[@class='table table-striped']/tbody/tr" ) if '暂无信息' not in response.text: for row in rows: statistic_date = row.xpath( "./td[1]//text()").extract_first().strip() nav = row.xpath("./td[2]//text()").extract_first().strip() added_nav = row.xpath( "./td[3]//text()").extract_first().strip() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) item['added_nav'] = float(added_nav) item['statistic_date'] = datetime.strptime( statistic_date, '%Y-%m-%d') yield item pg = response.meta['pg'] next_pg = pg + 1 next_url = response.url.replace('type_unit_p_' + str(pg), 'type_unit_p_' + str(next_pg)) self.ips.append({ 'url': next_url, 'ref': response.url, 'pg': next_pg, 'ext': { 'fund_name': fund_name } }) yield self.request_next()
def parse_item(self, response): rows = response.xpath("//div[@class='right']//tr") for row in rows[1:]: fund_name = row.xpath("./td[1]//text()").extract_first() statistic_date = row.xpath("./td[4]//text()").extract_first() nav = row.xpath("./td[2]//text()").extract_first().replace( '(', '(').split('(')[0] added_nav = row.xpath("./td[3]//text()").extract_first().replace( '(', '(').split('(')[0] item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name if fund_name == '沣谊一号': item['nav'] = float(nav) if nav is not None else None item['added_nav_2'] = float( added_nav) if added_nav is not None else None else: item['nav'] = float(nav) if nav is not None else None item['added_nav'] = float( added_nav) if nav is not None else None item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') yield item
def parse_item(self, response): # print(response) fundInfo = response.xpath('//div[@id="infoContent"]/table/tbody/tr[2]') # print(navInfo) for fund in fundInfo: try: fundName = fund.xpath('td[1]/text()').extract()[0] fundNav = fund.xpath('td[2]/text()').extract()[0] navDate = fund.xpath('td[3]/text()').extract()[0] item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fundName.strip('\n').strip('\t') init_date = navDate.strip('\n').strip('\t') item['statistic_date'] = datetime.strptime(init_date, '%Y-%m-%d') nav = fundNav item['nav'] = float(nav) if nav is not None else None item['added_nav'] = None yield item except: continue yield self.request_next()
def parse_item(self, response): rows = response.css('table.tableStyle.overvieTOP tr')[1:] if rows: for r in rows: row = r.xpath('td//text()').extract() date = row[1] nav = row[2] add_nav = row[3] fund_name = response.meta['ext'] item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['statistic_date'] = datetime.strptime(date, '%Y-%m-%d') item['nav'] = float(nav) if nav is not None else None item['added_nav'] = float( add_nav) if add_nav is not None else None yield item next_pg = response.meta['pg'] + 1 self.ips.append({ 'url': re.sub('\d+$', str(next_pg), response.url), 'ref': response.url, 'pg': next_pg, 'ext': response.meta['ext'] })
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] rows = response.xpath("//tr") if len(rows) > 1: for row in rows[1:]: nav = row.xpath('./td[2]//text()').extract_first().replace( '( ', '(').replace('(', '(') if '(' in nav: nav = nav.split('(')[0] statistic_date = row.xpath( './td[1]//text()').extract_first().replace( '年', '-').replace('月', '-').replace('日', '') item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) if nav is not None else None item['statistic_date'] = datetime.strptime( statistic_date, '%Y-%m-%d') yield item pg = response.meta['pg'] next_pg = int(pg) + 1 url = response.meta['ext']['url'] next_url = 'http://www.longwininvestment.com' + url + '&page=' + str( next_pg) self.ips.append({ 'url': next_url, 'ref': response.url, 'pg': next_pg, 'ext': { 'fund_name': fund_name, 'url': url }, })
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] rows = response.xpath("//div[@class='right_colume']//ul") if len(rows) > 1: for row in rows[1:]: statistic_date = row.xpath("./li[1]//text()").extract_first() nav = row.xpath("./li[2]//text()").extract_first() added_nav = row.xpath("./li[3]//text()").extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) item['added_nav'] = float(added_nav) item['statistic_date'] = datetime.strptime( statistic_date, '%Y-%m-%d') yield item pg = response.meta['pg'] last_two_date = response.meta['ext']['last_one_date'] last_one_date = statistic_date if last_one_date != last_two_date: next_pg = pg + 1 next_url = response.url.replace('page=' + str(pg), 'page=' + str(next_pg)) self.ips.append({ 'url': next_url, 'ref': response.url, 'pg': next_pg, 'ext': { 'fund_name': fund_name, 'last_one_date': last_one_date } })
def parse_item(self, response): fps = response.meta['fps'] ips = response.meta['ips'] fund_name = response.xpath( '//*[@id="view_text_24_10_txt"]/div/p/span/strong/text()' ).extract_first() rows = response.xpath( '//*[@id="view_text_17_10_txt"]/div/table/tbody/tr') rows.pop(0) for row in rows: item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name #//*[@id="view_text_17_10_txt"]/div/table/tbody/tr[2]/td[1]/span[1] statistic_date = row.xpath( "./td[1]/span[1]/text()").extract_first() statistic_date += row.xpath( "./td[1]/span[2]/text()").extract_first() item['statistic_date'] = datetime.strptime(statistic_date, '%Y年%m月%d日') item['nav'] = float( row.xpath("./td[2]/span[1]/text()").extract_first()) item['added_nav'] = float( row.xpath("./td[3]/span[1]/text()").extract_first()) yield item yield self.request_next(fps, ips)
def parse_item(self, response): fund_name = response.meta['ext'] rows = response.css('tr')[2:] col_type = response.css('tr th::text').extract() if rows: for r in rows: row = r.css('td::text').extract() date = row[0] item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['statistic_date'] = datetime.strptime(date, '%Y-%m-%d') if date else None if '单位净值' in col_type: item['nav'] = float(row[1]) if row[1] else None item['added_nav'] = float(row[2]) if row[2] else None elif '7日年化' in col_type: item['d7_annualized_return'] = float(row[1]) if row[1] else None item['income_value_per_ten_thousand'] = float(row[2]) if row[2] else None yield item next_pg = response.meta['pg'] + 1 meta = response.meta meta['pg'] = next_pg meta['form']['pageIndex'] = str(next_pg) self.ips.append(meta)
def parse_item(self, response): fund_name = '道通润丰一号' tab = response.css('table.MsoNormalTable tr') year = 2016 m2 = '0' for row in tab[1:]: date = ''.join(row.xpath('td//text()').extract()[:-1]).strip() m1 = date.split('月')[0] # 抓取没有年份,所以通过1月和12月交替的那两条记录的拼接判断是否跨年 if m1 + m2 == '121': year = year - 1 statistic_date = str(year) + '年' + date nav = row.xpath('td//text()').extract()[-1] m2 = m1 item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['statistic_date'] = datetime.strptime(statistic_date, '%Y年%m月%d日') item['nav'] = float(nav) yield item yield self.request_next()
def parse_item(self, response): name_match = {'S81154': '泰和汇1期择时策略混合基金', 'SN5055': '泰和汇2期主题精选私募投资基金'} rows = response.css('table tr') for row in rows[1:]: info = row.css('th ::text').re('\S+') if info: fund_name = name_match[info[0]] statistic_date = info[1] nav = info[2] added_nav = info[3] item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name statistic_date = datetime.strptime(statistic_date, '%Y-%m-%d') item['statistic_date'] = statistic_date item['nav'] = float(nav) if nav is not None else None item['added_nav'] = float( added_nav) if added_nav is not None else None yield item yield self.request_next()
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] rows = response.xpath( "//div[@class='bottombox fl']/table[@class='equityTable']//tr") end_pg = re.findall('当前为<font color="#FF0000">(.*?)</font>/(\d+)页 ', response.text)[0][1] if len(rows) > 1: for row in rows[1:]: statistic_date = row.xpath("./td[1]//text()").extract_first() nav = row.xpath("./td[2]//text()").extract_first() added_nav = row.xpath("./td[3]//text()").extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) item['added_nav'] = float(added_nav) item['statistic_date'] = datetime.strptime( statistic_date, '%Y/%m/%d') yield item pg = response.meta['pg'] if pg < int(end_pg): next_pg = pg + 1 next_url = response.url.replace('?page=' + str(pg), '?page=' + str(next_pg)) self.ips.append({ 'url': next_url, 'ref': response.url, 'pg': next_pg, 'ext': { 'fund_name': fund_name } })
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] rows = response.xpath("//tr") for row in rows[1:]: statistic_date = row.xpath("./td[1]//text()").extract_first() nav = row.xpath("./td[2]//text()").extract_first() added_nav = row.xpath("./td[3]//text()").extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) if nav is not None else None item['added_nav'] = float( added_nav) if added_nav is not None else None item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') yield item if len(rows) > 1: pg = response.meta['pg'] next_pg = int(pg) + 1 next_url = response.url.replace('&page=' + str(pg), '&page=' + str(next_pg)) self.ips.append({ 'url': next_url, 'ref': response.url, 'pg': next_pg, 'ext': { 'fund_name': fund_name }, })
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] row_info = json.loads(response.text) if '125.93.53.23' in response.url: rows = row_info['result'] else: rows = row_info['get_response']['netAnnouncements']['items'] for row in rows: statistic_date = row['netValueDate'] nav = row['netValue'] added_nav = row['totalNetValue'] item = GGFundNavItem() item['sitename'] = self.sitename item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) if nav is not None else None item['added_nav'] = float(added_nav) if nav is not None else None if '125.93.53.23' in response.url: item['channel'] = self.channel item['statistic_date'] = datetime.strptime( statistic_date, '%Y-%m-%d') else: item['channel'] = '券商资管净值' item['statistic_date'] = datetime.strptime( time.strftime("%Y-%m-%d", time.localtime(int(statistic_date) / 1000)), '%Y-%m-%d') yield item
def parse_item(self, response): fund_name = response.meta['ext']['fund_name'] rows = response.xpath("//table[@class='jingzhi']//tr") if rows: for row in rows: statistic_date = row.xpath("./td[1]//text()").extract_first() nav = row.xpath('./td[2]//text()').extract_first() added_nav = row.xpath('./td[3]//text()').extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) if added_nav: item['added_nav'] = float(added_nav) item['statistic_date'] = datetime.strptime( statistic_date, '%Y/%m/%d') yield item pg = response.meta['pg'] next_pg = pg + 1 next_url = response.url.replace('_' + str(pg) + '.html', '_' + str(next_pg) + '.html') self.ips.append({ 'url': next_url, 'ref': response.url, 'pg': next_pg, 'ext': { 'fund_name': fund_name } })
def parse_item(self, response): ext = response.meta['ext'] fund_name = ext['fund_name'] datas = response.xpath('//tr') for row in datas[1:]: item = GGFundNavItem() item['sitename'] = self.sitename item['fund_name'] = fund_name item['channel'] = self.channel item['url'] = response.url tds = row.xpath('./td') if len(tds) > 4: statistic_date = row.xpath('./td[2]//text()').re_first( r'\d+-\d+-\d+') if statistic_date is None or statistic_date == '': continue nav = row.xpath('./td[3]//text()').re_first(r'(\d+\.?\d*)') item['nav'] = float( nav) if nav is not None and nav != '' else None item['statistic_date'] = datetime.strptime( statistic_date, '%Y-%m-%d') yield item elif len(tds) > 2: statistic_date = row.xpath('./td[1]//text()').re_first( r'\d+年\d+月\d+日') if statistic_date is None or statistic_date == '': continue nav = row.xpath('./td[2]//text()').re_first(r'(\d+\.?\d*)') item['nav'] = float( nav) if nav is not None and nav != '' else None item['statistic_date'] = datetime.strptime( statistic_date, '%Y年%m月%d日') yield item
def parse_item(self, response): print(response.text) data = json.loads(response.text)['data'] ext = response.meta['ext'] fund_name = ext['fund_name'] for record in data['records']: item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name statistic_date = record['netDate'][0:10] item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') item['nav'] = record['netValue'] item['added_nav'] = record['netValueAccu'] yield item pg = response.meta['pg'] if pg['page'] * 500 < int(data['totalRecordCount']): pg['page'] = pg['page'] + 1 self.ips.append({ 'pg': pg, 'url': response.url, 'headers': response.meta['headers'], 'body': response.meta['body'], 'ref': response.meta['ref'], 'ext': response.meta['ext'] }) yield self.request_next()
def parse_item(self, response): rows = response.xpath('//tr') ext = response.meta['ext'] fund_name = ext['fund_name'] url = ext['url'] next_page = response.xpath( '/html/body/div/a[text()="下一页"]/@href').re_first(r'&page=(\d+)') for row in rows[1:]: fund_date = row.xpath('./td[4]/text()').extract_first() nav = row.xpath('./td[1]/text()').extract_first() added_nav = row.xpath('./td[2]/text()').extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name try: item['statistic_date'] = datetime.strptime( fund_date, '%Y/%m/%d') except ValueError: continue item['nav'] = float(nav) if nav is not None else None item['added_nav'] = float(added_nav) if nav is not None else None yield item if next_page: self.ips.append({ 'url': url + '&page=' + str(next_page), 'ref': response.url, 'ext': { 'fund_name': fund_name, 'url': url } })
def parse_item(self, response): rows = response.xpath("//div[@class='Details_right']/div[@class='jz_table']//tr") fund_name = response.xpath("//div[@class='left']//text()").extract_first() if len(rows) > 1: for row in rows[1:]: statistic_date = row.xpath("./td[1]//text()").extract_first() if '-' in statistic_date: statistic_date = statistic_date.replace('-', '/') if statistic_date[4] != '/': statistic_date = statistic_date.replace(statistic_date[0:4], statistic_date[0:4] + '/') nav = row.xpath('./td[2]//text()').extract_first() added_nav = row.xpath('./td[3]//text()').extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = float(nav) if nav is not None else None item['added_nav'] = float(added_nav) if added_nav is not None else None item['statistic_date'] = datetime.strptime(statistic_date, '%Y/%m/%d') yield item pg = response.meta['pg'] next_pg = int(pg) + 1 next_url = response.url.replace('&page=' + str(pg), '&page=' + str(next_pg)) self.ips.append({ 'url': next_url, 'ref': response.url, 'pg': next_pg, })
def parse_item(self, response): fps = response.meta['fps'] ips = response.meta['ips'] fund_name = response.xpath( '//*[@id="fund-gy"]/div[1]/div[1]/p[1]/span/text()').extract_first( ) rows = response.xpath( '//*[@id="divmodal"]/div/div/div[2]/div/div/table/tbody/tr') rows.pop(0) for row in rows: item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name statistic_date = row.xpath("./td[1]/text()").extract_first() item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') nav = row.xpath("./td[2]/text()").extract_first() if '(已清盘)' == nav: continue item['nav'] = float(nav) added_nav = row.xpath("./td[3]/text()").extract_first() item['added_nav'] = float(added_nav) yield item yield self.request_next(fps, ips)
def parse_item(self, response): # print(response.text) nvList = json.loads(response.text)['result'] # print(nvList) item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url for eachNv in nvList: item['fund_name'] = eachNv['pd_name'] init_date = eachNv['init_date'] # print(init_date) # print(str(statistic_date)[:4]) statistic_date = str(init_date)[:4] + '-' + str( init_date)[4:6] + '-' + str(init_date)[6:8] # print(statistic_date) item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') # print(item['statistic_date']) nav = eachNv['share_net'] item['nav'] = float(nav) if nav is not None else None added_nav = eachNv['share_net_total'] item['added_nav'] = float( added_nav) if added_nav is not None else None yield item
def parse_item(self, response): ips = response.meta['ips'] rows = response.css('tr.tts') if rows: for row in rows: fund_name = row.xpath( './td[@width="126"]/text()').extract_first() statistic_date = row.xpath( './td[@width="89"]/text()').extract_first() nav = row.xpath('./td[@width="73"]/text()').extract_first() item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['statistic_date'] = datetime.strptime( statistic_date, '%Y-%m-%d') if statistic_date is not None else None item['nav'] = float(nav) if nav is not None else None item['added_nav'] = None yield item url = response.url pg = re.compile('.*-(\d+)\.html').findall(url)[0] sub_str = str(int(pg) + 1) + '.html' next_url = re.sub('\d+\.html', sub_str, url) ips.append({'url': next_url, 'ref': url}) yield self.request_next()
def parse_item(self, response): f_list = response.xpath('//tbody//tr') for i in f_list: item = GGFundNavItem() t = i.xpath('td//text()').extract() fund_name = re.findall('.*净值', t[0])[0].replace('净值', '') if i.xpath('td[3]//text()'): nav = t[1] added_nav = t[2] statistic_date = t[3] item['nav'] = float(nav) if nav is not None else None item['added_nav'] = float( added_nav) if nav is not None else None else: nav = t[1] statistic_date = t[2] item['nav'] = float(nav) if nav is not None else None item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') yield item next_href = response.xpath( '//li[@class ="paging_next"]//a[contains(text(),下一页)]//@href' ).extract_first() if next_href: ips_url = 'http://www.qk-capital.com' + next_href self.ips.append({'url': ips_url, 'ref': response.url})
def parse_item(self, response): rows = response.css('tbody.list')[-1].css('tr') for row in rows: row_info = row.xpath('td//text()').extract() fund_name = row_info[0] statistic_date = row_info[2] nav = float(row_info[4]) add_nav = float(row_info[5]) if '中信兴聚一期' in fund_name: # 如果产品为“中信兴聚一期”所有净值需除以100 nav = nav / 100 add_nav = add_nav / 100 item = GGFundNavItem() item['sitename'] = self.sitename item['channel'] = self.channel item['url'] = response.url item['fund_name'] = fund_name item['nav'] = nav item['added_nav'] = add_nav item['statistic_date'] = datetime.strptime(statistic_date, '%Y-%m-%d') yield item yield self.request_next()
def parse_item(self, response): ext = response.meta['ext'] url = ext['url'] page = int(ext['page']) next_page = response.xpath( '//*[@id="productcontent"]/div[3]/div[2]/select[@name="page"]/option[last()]/text()' ).re_first(r'(\d+)') fund_name = response.xpath( '//*[@id="prodtitle"]/text()').extract_first() rows = response.xpath('//*[@id="productcontent"]/div[3]/table//tr') for row in rows[1:]: item = GGFundNavItem() item['sitename'] = self.sitename item['fund_name'] = fund_name item['channel'] = self.channel item['url'] = response.url nav = row.xpath('./td[1]/text()').extract_first() item['nav'] = float(nav) added_nav = row.xpath('./td[2]/text()').extract_first() item['added_nav'] = float(added_nav) statistic_date = row.xpath('./td[3]/text()').extract_first() item['statistic_date'] = datetime.strptime(statistic_date, '%Y%m%d') yield item if page < int(next_page): self.ips.append({ 'url': url + '&page=' + str(page + 1), 'ref': response.url, 'ext': { 'page': str(page + 1), 'url': url } })