def start_requests(self):
     code_list = stock_code.StockCode().stock_code_fetchall(
     )  # 数据库   stock_code,part
     post_data = stock_code.StockCode().post_data(
     )  # post参数   list:元素为data字典
     tag_list = [
         'financialreport',
         'incomestatements',
         'balancesheet',
         'cashflow',
     ]
     for code in code_list:  #000001
         for tag in tag_list:  #balancesheet
             for data in post_data:
                 data['cwzb'] = tag
                 post_base_url = f'http://www.cninfo.com.cn/information/stock/{tag}_.jsp?stockCode={code[0]}'
                 if 'balancesheet' in post_base_url:
                     yield FormRequest(url=post_base_url,
                                       formdata=data,
                                       callback=self.parse_balancesheet,
                                       meta={
                                           'data': data,
                                           'abb_name': code[2],
                                           'stock_code': code[0]
                                       })
                 elif 'incomestatements' in post_base_url:
                     yield FormRequest(url=post_base_url,
                                       formdata=data,
                                       callback=self.parse_incomestatements,
                                       meta={
                                           'data': data,
                                           'abb_name': code[2],
                                           'stock_code': code[0]
                                       })
                 elif 'cashflow' in post_base_url:
                     yield FormRequest(url=post_base_url,
                                       formdata=data,
                                       callback=self.parse_cashflow,
                                       meta={
                                           'data': data,
                                           'abb_name': code[2],
                                           'stock_code': code[0]
                                       })
                 elif 'financialreport' in post_base_url:
                     yield FormRequest(url=post_base_url,
                                       formdata=data,
                                       callback=self.parse_financialreport,
                                       meta={
                                           'data': data,
                                           'abb_name': code[2],
                                           'stock_code': code[0]
                                       })
 def start_requests(self):
     column = ['szse_main', 'szse_sme', 'szse_gem', 'sse']
     code_list = stock_code.StockCode().stock_code_fetchall()  # 数据库入口:股票代码:元组
     for code in code_list:
         stock_code1 = code[0]
         part = code[1]
         if part =='深市主板':
             announcement_post_data = stock_code.StockCode().full_annoucement(stock_code1,list(column[0]))  # post参数   list:元素为data字典
             for data in announcement_post_data:
                 base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query'
                 for i in range(1, 50):
                     try:
                         data['pageNum'] = f'{i}'
                         yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data,'part':part})
                     except:
                         time.sleep(0.5)
         elif part =='中小企业板':
             announcement_post_data = stock_code.StockCode().full_annoucement(stock_code1, list(
                 column[1]))  # post参数   list:元素为data字典
             for data in announcement_post_data:
                 base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query'
                 for i in range(1, 50):
                     try:
                         data['pageNum'] = f'{i}'
                         yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data,'part':part})
                     except:
                         time.sleep(0.5)
         elif part =='创业板':
             announcement_post_data = stock_code.StockCode().full_annoucement(stock_code1, list(
                 column[2]))  # post参数   list:元素为data字典
             for data in announcement_post_data:
                 base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query'
                 for i in range(1, 50):
                     try:
                         data['pageNum'] = f'{i}'
                         yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data,'part':part})
                     except:
                         time.sleep(0.5)
         elif part =='沪市主板':
             announcement_post_data = stock_code.StockCode().full_annoucement(stock_code1, list(
                 column[3]))  # post参数   list:元素为data字典
             for data in announcement_post_data:
                 base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query'
                 for i in range(1, 50):
                     try:
                         data['pageNum'] = f'{i}'
                         yield FormRequest(url=base_url, formdata=data, callback=self.parse, meta={'data': data,'part':part})
                     except:
                         time.sleep(0.5)
Esempio n. 3
0
    def start_requests(self):

        hk_detail_base_url = 'http://www.cninfo.com.cn/information/hk/mb/brief'  # 香港主板
        #      http://www.cninfo.com.cn/information/hk/mb/brief00034.html
        hk_gem_url = 'http://www.cninfo.com.cn/information/hk/gem/brief'  # 香港中小板
        tag = self.crawl_mode   #页面标签
        code_list = stock_code.StockCode().stock_code_fetchall()  #数据库入口文件
        base_url = f'http://www.cninfo.com.cn/information/{tag}/'
        for code in code_list:
            if '沪市主板' in code:
                shmb_url = base_url + 'shmb' + code[0] + '.html'
                yield self.request(url=shmb_url,tag=tag,abb_name=code[2],stock_code=code[0])
            elif '深市主板' in code:
                szmb_url = base_url + 'szmb' + code[0] + '.html'
                yield self.request(url=szmb_url,tag=tag,abb_name=code[2],stock_code=code[0])

            elif '中小企业板' in code:
                szsme_url = base_url + 'szsme' + code[0] + '.html'
                yield self.request(url=szsme_url, tag=tag,abb_name=code[2],stock_code=code[0])

            elif '创业板' in code:
                szcn_url = base_url + 'szcn' + code[0] + '.html'
                yield self.request(url=szcn_url, tag=tag,abb_name=code[2],stock_code=code[0])

            elif '香港主板' in code:
                yield Request(hk_detail_base_url + code[0] + '.html', callback=self.parse_cn_hk,meta={'stock_code':code[0],'abb_name':code[2]})

            elif '香港创业板' in code and int(code[0]) > 8000:
                yield Request(hk_gem_url + code[0] + '.html', callback=self.parse_cn_hk,meta={'stock_code':code[0],'abb_name':code[2]})
 def start_requests(self):
     code_list = stock_code.StockCode().stock_code_fetchall()  # 数据库入口:股票代码:元组
     tag_dict = {'annualreport':'ar1y','seannualreport':'sar1y','1qreport':'1qr1y','3qreport':'3qr1y'}
     for code in code_list:  #遍历股票代码
         for tag_key,tag_value in tag_dict.items():   #遍历字典
             base_url = f'http://www.cninfo.com.cn//disclosure/{tag_key}/stocks/{tag_value}/cninfo/{code[0]}.js?ver=201809111413'
             yield Request(base_url,callback=self.parse,meta={'abb_name':code[2],'stock_code':code[0],'part':code[1]},encoding='gbk')
Esempio n. 5
0
 def start_requests(self):
     #http://www.cninfo.com.cn//disclosure/fulltext/stocks/hkmblatest/00003.js?ver=201809111755
     code_list = stock_code.StockCode().stock_code_fetchall()  # 数据库入口:股票代码:元组
     tag_list = ['hkmblatest']
     for code in code_list:
         if code[1] == '香港主板' or code[1] == '香港创业板':
             for tag in tag_list:
                 base_url = f'http://www.cninfo.com.cn//disclosure/fulltext/stocks/{tag}/{code[0]}.js?ver=201809111755'
                 yield Request(base_url,callback=self.parse,meta={'abb_name':code[2],'stock_code':code[0],'part':code[1]})
 def start_requests(self):
     code_list = stock_code.StockCode().stock_code_fetchall(
     )  # 数据库入口:股票代码:元组
     tag_list = ['shareholders', 'circulateshareholders']
     for code in code_list:
         for tag in tag_list:
             base_url = f'http://www.cninfo.com.cn/information/{tag}/{code[0]}.html'
             yield Request(base_url,
                           callback=self.parse,
                           meta={
                               'abb_name': code[2],
                               'stock_code': code[0]
                           })
Esempio n. 7
0
    def start_requests(self):
        code_list = stock_code.StockCode().stock_code_fetchall(
        )  # 数据库入口:股票代码:元组

        for code in code_list:  #遍历股票代码
            base_url = f'http://www.cninfo.com.cn//disclosure/summary/stocks/summary1y/cninfo/{code[0]}.js?ver=201809291501'
            yield Request(base_url,
                          callback=self.parse,
                          meta={
                              'abb_name': code[2],
                              'stock_code': code[0],
                              'part': code[1]
                          },
                          encoding='gbk')
Esempio n. 8
0
 def start_requests(self):
     #http://www.cninfo.com.cn//disclosure/gzzd/stocks/gzzd1y/000004.js?ver=201809111727
     #http://www.cninfo.com.cn//disclosure/gszc/stocks/gszc1y/cninfo/000004.js?ver=201809111731
     code_list = stock_code.StockCode().stock_code_fetchall()  # 数据库入口:股票代码:元组
     tag_dict = {'gzzd':'gzzd1y','gszc':'gszc1y'}
     for code in code_list:  #遍历股票代码
         if code[1] != '香港主板' or '香港创业板':
             for tag_key,tag_value in tag_dict.items():   #遍历字典
                 if tag_key == 'gszc':
                     base_url1 = f'http://www.cninfo.com.cn//disclosure/{tag_key}/stocks/{tag_value}/cninfo/{code[0]}.js?ver=201809111727'
                     yield Request(base_url1, callback=self.parse,
                                   meta={'abb_name': code[2], 'stock_code': code[0], 'part': code[1]}, encoding='gbk')
                 else:
                     base_url = f'http://www.cninfo.com.cn//disclosure/{tag_key}/stocks/{tag_value}/{code[0]}.js?ver=201809111727'
                     yield Request(base_url,callback=self.parse,meta={'abb_name':code[2],'stock_code':code[0],'part':code[1]},encoding='gbk')
Esempio n. 9
0
 def start_requests(self):
     #http://www.cninfo.com.cn//disclosure/cxdd/stocks/cwgw/002002.js?ver=201809111612
     #http://www.cninfo.com.cn//disclosure/cxdd/stocks/bjjg/002002.js?ver=201809111612
     code_list = stock_code.StockCode().stock_code_fetchall(
     )  # 数据库入口:股票代码:元组
     tag_dict = ['cwgw', 'bjjg']
     for code in code_list:  #遍历股票代码
         for tag in tag_dict:  #遍历字典
             # base_url = f'http://www.cninfo.com.cn//disclosure/{tag_key}/stocks/{tag_value}/cninfo/{code[0]}.js?ver=201809111413'
             base_url = f'http://www.cninfo.com.cn//disclosure/cxdd/stocks/{tag}/{code[0]}.js?ver=201809111612'
             yield Request(base_url,
                           callback=self.parse,
                           meta={
                               'abb_name': code[2],
                               'stock_code': code[0],
                               'part': code[1]
                           },
                           encoding='gbk')
 def start_requests(self):
     #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/zxxx1y/cninfo/000001.js?ver=201809111559
     #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/dyhd1y/cninfo/000001.js?ver=201809111600
     #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/mtcf1y/cninfo/000001.js?ver=201809111600
     #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/lyhd1y/cninfo/000001.js?ver=201809111600
     #http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/glzd1y/cninfo/000001.js?ver=201809111600
     code_list = stock_code.StockCode().stock_code_fetchall(
     )  # 数据库入口:股票代码:元组
     tag_dict = ['zxxx1y', 'dyhd1y', 'mtcf1y', 'lyhd1y', 'glzd1y']
     for code in code_list:  #遍历股票代码
         for tag in tag_dict:  #遍历字典
             base_url = f'http://www.cninfo.com.cn//disclosure/tzzgxxx/stocks/{tag}/cninfo/{code[0]}.js?ver=201809111600'
             yield Request(base_url,
                           callback=self.parse,
                           meta={
                               'abb_name': code[2],
                               'stock_code': code[0],
                               'part': code[1]
                           },
                           encoding='gbk')
Esempio n. 11
0
 def start_requests(self):
     category = [
         'category_lsgg_lwts;', 'category_dqgg_lwts;',
         'category_zjjg_lwts;', 'category_cxpl_lwts;', 'category_scpl_lwts;'
     ]
     announcement_post_data = stock_code.StockCode().announcement_post_data(
         category)  # post参数   list:元素为data字典
     tag = 'staq_net_delisted'  # 两网公司及退市公司
     base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query'
     i = 1
     for data in announcement_post_data:
         data['column'] = tag
         while 1:
             data['pageNum'] = f'{i}'
             yield FormRequest(url=base_url,
                               formdata=data,
                               callback=self.parse,
                               meta={'data': data})
             i += 1
             if i > 50:
                 break
Esempio n. 12
0
 def start_requests(self):
     category = [
         'category_lsgg_gfzr;', 'category_dqgg_gfzr;',
         'category_zjjg_gfzr;', 'category_cxpl_gfzr;', 'category_scpl_gfzr;'
     ]  #data内参数
     announcement_post_data = stock_code.StockCode().announcement_post_data(
         category)  # post参数   list:元素为data字典
     tag = 'neeq_company'  #股份转让系统挂牌公司
     base_url = 'http://www.cninfo.com.cn/cninfo-new/announcement/query'
     for data in announcement_post_data:
         i = 1
         data['column'] = tag
         if self.active:
             while self.active:
                 data['pageNum'] = f'{i}'
                 yield FormRequest(url=base_url,
                                   formdata=data,
                                   callback=self.parse,
                                   meta={'data': data})
                 i += 1
         else:
             self.active = True
             continue