def start_requests(self): page = 1 for url in self.start_urls: if url == 'http://www.chinaclear.cn/cms-rank/queryPledgeProportion?queryDate={date}&secCde=': dateformat = "%Y.%m.%d" today = datetime.datetime.now().strftime("%Y%m%d") datelist = S.datelist('20100101', today, dateformat) datelist.reverse() for date in datelist: _url = url.format(date=date) yield scrapy.Request(_url, meta={ 'page': page, 'date': date }, headers=hdr(), priority=0)
def start_requests(self): self.Start = { '1110': self.colistparse, '1105': self.fundlistparse, '1273': self.zqparse, } for _url in self.start_urls: page = 1 if _url == 'http://www.szse.cn/szseWeb/FrontController.szse': for CATALOGID, callback in self.Start.items(): postdata = szse_data(page, CATALOGID) meta = {'CATALOGID': CATALOGID, 'page': page, '_url': _url} yield scrapy.Request(_url, callback=callback, method='POST', headers=hdr(), meta=meta, body=postdata) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1837_xxpl&TABKEY=tab1&txtDate={date}&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate': dateformat = "%Y-%m-%d" today = datetime.datetime.now().strftime("%Y%m%d") datelist = S.datelist('20100101', today, dateformat) datelist.reverse() for date in datelist: url = _url.format(page=page, date=date) yield scrapy.Request( url, headers=hdr(), meta={ 'page': page, 'date': date }, callback=self.rzrqparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1900&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate': url = 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1900&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate'.format( page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.zrdsparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1901&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.zrdmparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1901&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.dmzgpxdaparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1900&TABKEY=tab3&tab3PAGENO={page}&tab3PAGECOUNT=&tab3RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.dsrckparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1759_cxda&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.chufaparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1903_detail&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.zhongjiechufaparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1902&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.jiechuxianshoufaparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1902&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.jiechuxianshou1perfaparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1902&TABKEY=tab3&tab3PAGENO={page}&tab3PAGECOUNT=&tab3RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.jiechuxianshou5perfaparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=sgshqd&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.sghgqdparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1931_zcjhcjxx&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&report_action=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.zcjhcjxxparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1839_zcjhcpxx&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.zcjhcpxxparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1798&TABKEY=tab1&txtKsrq=2000-01-01&txtZzrq=%s&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate' % datetime.datetime.now( ).strftime("%Y-%m-%d"): url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.tfpxxparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=SSGSGMXX&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.fullnamechangeparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=SSGSGMXX&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.shortnamechangeparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1793_ssgs&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.suspendListingparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=1793_ssgs&TABKEY=tab2&tab2PAGENO={page}&tab2PAGECOUNT=&tab2RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.StopListingparse, ) elif _url == 'http://www.szse.cn/szseWeb/FrontController.szse?ACTIONID=7&AJAX=AJAX-TRUE&CATALOGID=xmjdxx&TABKEY=tab1&tab1PAGENO={page}&tab1PAGECOUNT=&tab1RECORDCOUNT=&REPORT_ACTION=navigate': url = _url.format(page=page) yield scrapy.Request( url, headers=hdr(), meta={'page': page}, callback=self.projparse, )