def start_requests(self): code_list = stock_code.StockCode().stock_code_fetchall( ) # 数据库 stock_code,part post_data = stock_code.StockCode().post_data( ) # post参数 list:元素为data字典 tag_list = [ 'financialreport', 'incomestatements', 'balancesheet', 'cashflow', ] for code in code_list: #000001 for tag in tag_list: #balancesheet for data in post_data: data['cwzb'] = tag post_base_url = f'http://www.cninfo.com.cn/information/stock/{tag}_.jsp?stockCode={code[0]}' if 'balancesheet' in post_base_url: yield FormRequest(url=post_base_url, formdata=data, callback=self.parse_balancesheet, meta={ 'data': data, 'abb_name': code[2], 'stock_code': code[0] }) elif 'incomestatements' in post_base_url: yield FormRequest(url=post_base_url, formdata=data, callback=self.parse_incomestatements, meta={ 'data': data, 'abb_name': code[2], 'stock_code': code[0] }) elif 'cashflow' in post_base_url: yield FormRequest(url=post_base_url, formdata=data, callback=self.parse_cashflow, meta={ 'data': data, 'abb_name': code[2], 'stock_code': code[0] }) elif 'financialreport' in post_base_url: yield FormRequest(url=post_base_url, formdata=data, callback=self.parse_financialreport, meta={ 'data': data, 'abb_name': code[2], 'stock_code': code[0] })
def start_requests(self): column = ['szse_main', 'szse_sme', 'szse_gem', 'sse'] code_list = stock_code.StockCode().stock_code_fetchall() # 数据库入口:股票代码:元组 for code in code_list: stock_code1 = code[0] part = code[1] if part =='深市主板': announcement_post_data = stock_code.StockCode().full_annoucement(stock_code1,list(column[0])) # post参数 list:元素为data字典 for data in announcement_post_data: base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query' for i in range(1, 50): try: data['pageNum'] = f'{i}' yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data,'part':part}) except: time.sleep(0.5) elif part =='中小企业板': announcement_post_data = stock_code.StockCode().full_annoucement(stock_code1, list( column[1])) # post参数 list:元素为data字典 for data in announcement_post_data: base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query' for i in range(1, 50): try: data['pageNum'] = f'{i}' yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data,'part':part}) except: time.sleep(0.5) elif part =='创业板': announcement_post_data = stock_code.StockCode().full_annoucement(stock_code1, list( column[2])) # post参数 list:元素为data字典 for data in announcement_post_data: base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query' for i in range(1, 50): try: data['pageNum'] = f'{i}' yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data,'part':part}) except: time.sleep(0.5) elif part =='沪市主板': announcement_post_data = stock_code.StockCode().full_annoucement(stock_code1, list( column[3])) # post参数 list:元素为data字典 for data in announcement_post_data: base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query' for i in range(1, 50): try: data['pageNum'] = f'{i}' yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data,'part':part}) except: time.sleep(0.5)
def start_requests(self): hk_detail_base_url = 'http://www.cninfo.com.cn/information/hk/mb/brief' # 香港主板 # http://www.cninfo.com.cn/information/hk/mb/brief00034.html hk_gem_url = 'http://www.cninfo.com.cn/information/hk/gem/brief' # 香港中小板 tag = self.crawl_mode #页面标签 code_list = stock_code.StockCode().stock_code_fetchall() #数据库入口文件 base_url = f'http://www.cninfo.com.cn/information/{tag}/' for code in code_list: if '沪市主板' in code: shmb_url = base_url + 'shmb' + code[0] + '.html' yield self.request(url=shmb_url,tag=tag,abb_name=code[2],stock_code=code[0]) elif '深市主板' in code: szmb_url = base_url + 'szmb' + code[0] + '.html' yield self.request(url=szmb_url,tag=tag,abb_name=code[2],stock_code=code[0]) elif '中小企业板' in code: szsme_url = base_url + 'szsme' + code[0] + '.html' yield self.request(url=szsme_url, tag=tag,abb_name=code[2],stock_code=code[0]) elif '创业板' in code: szcn_url = base_url + 'szcn' + code[0] + '.html' yield self.request(url=szcn_url, tag=tag,abb_name=code[2],stock_code=code[0]) elif '香港主板' in code: yield Request(hk_detail_base_url + code[0] + '.html', callback=self.parse_cn_hk,meta={'stock_code':code[0],'abb_name':code[2]}) elif '香港创业板' in code and int(code[0]) > 8000: yield Request(hk_gem_url + code[0] + '.html', callback=self.parse_cn_hk,meta={'stock_code':code[0],'abb_name':code[2]})
def start_requests(self): code_list = stock_code.StockCode().stock_code_fetchall() # 数据库入口:股票代码:元组 tag_dict = {'annualreport':'ar1y','seannualreport':'sar1y','1qreport':'1qr1y','3qreport':'3qr1y'} for code in code_list: #遍历股票代码 for tag_key,tag_value in tag_dict.items(): #遍历字典 base_url = f'http://www.cninfo.com.cn//disclosure/{tag_key}/stocks/{tag_value}/cninfo/{code[0]}.js?ver=201809111413' yield Request(base_url,callback=self.parse,meta={'abb_name':code[2],'stock_code':code[0],'part':code[1]},encoding='gbk')
def start_requests(self): #http://www.cninfo.com.cn//disclosure/fulltext/stocks/hkmblatest/00003.js?ver=201809111755 code_list = stock_code.StockCode().stock_code_fetchall() # 数据库入口:股票代码:元组 tag_list = ['hkmblatest'] for code in code_list: if code[1] == '香港主板' or code[1] == '香港创业板': for tag in tag_list: base_url = f'http://www.cninfo.com.cn//disclosure/fulltext/stocks/{tag}/{code[0]}.js?ver=201809111755' yield Request(base_url,callback=self.parse,meta={'abb_name':code[2],'stock_code':code[0],'part':code[1]})
def start_requests(self): code_list = stock_code.StockCode().stock_code_fetchall( ) # 数据库入口:股票代码:元组 tag_list = ['shareholders', 'circulateshareholders'] for code in code_list: for tag in tag_list: base_url = f'http://www.cninfo.com.cn/information/{tag}/{code[0]}.html' yield Request(base_url, callback=self.parse, meta={ 'abb_name': code[2], 'stock_code': code[0] })
def start_requests(self): code_list = stock_code.StockCode().stock_code_fetchall( ) # 数据库入口:股票代码:元组 for code in code_list: #遍历股票代码 base_url = f'http://www.cninfo.com.cn//disclosure/summary/stocks/summary1y/cninfo/{code[0]}.js?ver=201809291501' yield Request(base_url, callback=self.parse, meta={ 'abb_name': code[2], 'stock_code': code[0], 'part': code[1] }, encoding='gbk')
def start_requests(self): #http://www.cninfo.com.cn//disclosure/gzzd/stocks/gzzd1y/000004.js?ver=201809111727 #http://www.cninfo.com.cn//disclosure/gszc/stocks/gszc1y/cninfo/000004.js?ver=201809111731 code_list = stock_code.StockCode().stock_code_fetchall() # 数据库入口:股票代码:元组 tag_dict = {'gzzd':'gzzd1y','gszc':'gszc1y'} for code in code_list: #遍历股票代码 if code[1] != '香港主板' or '香港创业板': for tag_key,tag_value in tag_dict.items(): #遍历字典 if tag_key == 'gszc': base_url1 = f'http://www.cninfo.com.cn//disclosure/{tag_key}/stocks/{tag_value}/cninfo/{code[0]}.js?ver=201809111727' yield Request(base_url1, callback=self.parse, meta={'abb_name': code[2], 'stock_code': code[0], 'part': code[1]}, encoding='gbk') else: base_url = f'http://www.cninfo.com.cn//disclosure/{tag_key}/stocks/{tag_value}/{code[0]}.js?ver=201809111727' yield Request(base_url,callback=self.parse,meta={'abb_name':code[2],'stock_code':code[0],'part':code[1]},encoding='gbk')
def start_requests(self): #http://www.cninfo.com.cn//disclosure/cxdd/stocks/cwgw/002002.js?ver=201809111612 #http://www.cninfo.com.cn//disclosure/cxdd/stocks/bjjg/002002.js?ver=201809111612 code_list = stock_code.StockCode().stock_code_fetchall( ) # 数据库入口:股票代码:元组 tag_dict = ['cwgw', 'bjjg'] for code in code_list: #遍历股票代码 for tag in tag_dict: #遍历字典 # base_url = f'http://www.cninfo.com.cn//disclosure/{tag_key}/stocks/{tag_value}/cninfo/{code[0]}.js?ver=201809111413' base_url = f'http://www.cninfo.com.cn//disclosure/cxdd/stocks/{tag}/{code[0]}.js?ver=201809111612' yield Request(base_url, callback=self.parse, meta={ 'abb_name': code[2], 'stock_code': code[0], 'part': code[1] }, encoding='gbk')
def start_requests(self): #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/zxxx1y/cninfo/000001.js?ver=201809111559 #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/dyhd1y/cninfo/000001.js?ver=201809111600 #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/mtcf1y/cninfo/000001.js?ver=201809111600 #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/lyhd1y/cninfo/000001.js?ver=201809111600 #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/glzd1y/cninfo/000001.js?ver=201809111600 code_list = stock_code.StockCode().stock_code_fetchall( ) # 数据库入口:股票代码:元组 tag_dict = ['zxxx1y', 'dyhd1y', 'mtcf1y', 'lyhd1y', 'glzd1y'] for code in code_list: #遍历股票代码 for tag in tag_dict: #遍历字典 base_url = f'http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/{tag}/cninfo/{code[0]}.js?ver=201809111600' yield Request(base_url, callback=self.parse, meta={ 'abb_name': code[2], 'stock_code': code[0], 'part': code[1] }, encoding='gbk')
def start_requests(self): category = [ 'category_lsgg_lwts;', 'category_dqgg_lwts;', 'category_zjjg_lwts;', 'category_cxpl_lwts;', 'category_scpl_lwts;' ] announcement_post_data = stock_code.StockCode().announcement_post_data( category) # post参数 list:元素为data字典 tag = 'staq_net_delisted' # 两网公司及退市公司 base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query' i = 1 for data in announcement_post_data: data['column'] = tag while 1: data['pageNum'] = f'{i}' yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data}) i += 1 if i > 50: break
def start_requests(self): category = [ 'category_lsgg_gfzr;', 'category_dqgg_gfzr;', 'category_zjjg_gfzr;', 'category_cxpl_gfzr;', 'category_scpl_gfzr;' ] #data内参数 announcement_post_data = stock_code.StockCode().announcement_post_data( category) # post参数 list:元素为data字典 tag = 'neeq_company' #股份转让系统挂牌公司 base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query' for data in announcement_post_data: i = 1 data['column'] = tag if self.active: while self.active: data['pageNum'] = f'{i}' yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data}) i += 1 else: self.active = True continue