def get_json_str(self): wx = lg.get_handle() url = "http://vip.stock.finance.sina.com.cn/quotes_service/api/json_v2.php/CN_Bill.GetBillList?" \ "symbol="+id+"&num="+self.items_page+"&page=1&sort=ticktime&asc=0&volume=0&amount=0&type=0&" \ "day="+self.date_str header = { 'Cookie': 'yfx_c_g_u_id_10000042=_ck18112210334212135454572121490; yfx_mr_10000042=%3A%3Amarket_type_free_search%3A%3A%3A%3Abaidu%3A%3A%3A%3A%3A%3A%3A%3Awww.baidu.com%3A%3A%3A%3Apmf_from_free_search; yfx_key_10000042=; VISITED_COMPANY_CODE=%5B%22603017%22%2C%22600354%22%2C%22601975%22%2C%22600000%22%5D; VISITED_STOCK_CODE=%5B%22603017%22%2C%22600354%22%2C%22601975%22%2C%22600000%22%5D; seecookie=%5B601975%5D%3AST%u957F%u6CB9%2C%5B600000%5D%3A%u6D66%u53D1%u94F6%u884C; JSESSIONID=CA764F4C8465140437D5F6B868137460; yfx_f_l_v_t_10000042=f_t_1542854022203__r_t_1553650507322__v_t_1553651393256__r_c_23; VISITED_MENU=%5B%229055%22%2C%228536%22%2C%228451%22%2C%228453%22%2C%228454%22%2C%229057%22%2C%229062%22%2C%229056%22%2C%228466%22%2C%228523%22%2C%228528%22%5D', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36', 'Referer': 'http://www.sse.com.cn/assortment/stock/list/share/' } # requests.packages.urllib3.disable_warnings() http = urllib3.PoolManager() try: raw_data = http.request('GET', url, headers=header) except Exception as e: return None finally: if raw_data.status >= 300: wx.info("Web response failed : {}".format(url)) return None # 获得html源码,utf-8解码 str_type = chardet.detect(raw_data.data) # unicode = raw_data.data.decode(str_type['encoding']) unicode = lg.str_decode(raw_data.data, str_type['encoding']) return unicode
def get_json_str(self, id, time_str=''): wx = lg.get_handle() if time_str == '': wx.info("[rt_163][get_json_str] 查询时间为空,退出!") return None url = "http://quotes.money.163.com/service/zhubi_ajax.html?symbol="+id+"&end="+time_str #10%3A00%3A00" header = { 'Cookie': 'UM_distinctid=16bf36d52242f3-0693469a5596d3-e323069-1fa400-16bf36d5225362; _ntes_nnid=16b2182ff532e10833492eedde0996df,1563157161323; _ntes_nuid=16b2182ff532e10833492eedde0996df; vjuids=e0fb8aa0.16d4ee83324.0.e074eccb150e; [email protected]|1570190476|0|mail163|00&99|hen&1570190062&mail163#CN&null#10#0#0|&0|mail163|[email protected]; [email protected]:-1:1; mail_psc_fingerprint=8da65e9cc5769a658a69962d94f7c46f; _ntes_usstock_recent_=NTES%7C; _ntes_usstock_recent_=NTES%7C; vjlast=1568986903.1571018378.11; s_n_f_l_n3=e119c348b08890ac1571018378289; NNSSPID=0e35f22546f44023b00d65e2a3ca1f26; ne_analysis_trace_id=1571018721010; _ntes_stock_recent_=1002699%7C0600000%7C1000573; _ntes_stock_recent_=1002699%7C0600000%7C1000573; _ntes_stock_recent_=1002699%7C0600000%7C1000573; pgr_n_f_l_n3=e119c348b08890ac1571018815386610; vinfo_n_f_l_n3=e119c348b08890ac.1.5.1563157161368.1570632456351.1571018833379', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36', 'Referer': 'http://quotes.money.163.com/trade/cjmx_'+id+'.html' } # requests.packages.urllib3.disable_warnings() http = urllib3.PoolManager() try: raw_data = http.request('GET', url, headers=header) except Exception as e: return None finally: if raw_data.status >= 300: wx.info("Web response failed : {}".format(url)) return None # 获得html源码,utf-8解码 str_type = chardet.detect(raw_data.data) # unicode = raw_data.data.decode(str_type['encoding']) unicode = lg.str_decode(raw_data.data, str_type['encoding']) return unicode
def get_json_str(self, url, web_flag=None): wx = lg.get_handle() if web_flag == 'sz_basic': header = { 'User-Agent': r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36', 'Connection': 'keep-alive' } elif web_flag == 'sh_basic': header = { 'Cookie': 'yfx_c_g_u_id_10000042=_ck18112210334212135454572121490; yfx_mr_10000042=%3A%3Amarket_type_free_search%3A%3A%3A%3Abaidu%3A%3A%3A%3A%3A%3A%3A%3Awww.baidu.com%3A%3A%3A%3Apmf_from_free_search; yfx_key_10000042=; VISITED_COMPANY_CODE=%5B%22603017%22%2C%22600354%22%2C%22601975%22%2C%22600000%22%5D; VISITED_STOCK_CODE=%5B%22603017%22%2C%22600354%22%2C%22601975%22%2C%22600000%22%5D; seecookie=%5B601975%5D%3AST%u957F%u6CB9%2C%5B600000%5D%3A%u6D66%u53D1%u94F6%u884C; JSESSIONID=CA764F4C8465140437D5F6B868137460; yfx_f_l_v_t_10000042=f_t_1542854022203__r_t_1553650507322__v_t_1553651393256__r_c_23; VISITED_MENU=%5B%229055%22%2C%228536%22%2C%228451%22%2C%228453%22%2C%228454%22%2C%229057%22%2C%229062%22%2C%229056%22%2C%228466%22%2C%228523%22%2C%228528%22%5D', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36', 'Referer': 'http://www.sse.com.cn/assortment/stock/list/share/' } elif web_flag == 'eastmoney': header = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': 'st_pvi=71738581877645; st_sp=2018-11-22%2011%3A40%3A40; qgqp_b_id=8db9365e6c143170016c773cee144103; em_hq_fls=js; HAList=a-sz-000333-%u7F8E%u7684%u96C6%u56E2%2Ca-sz-300059-%u4E1C%u65B9%u8D22%u5BCC; st_si=74062085443937; st_asi=delete; st_sn=27; st_psi=20190113183705692-113300301007-4079839165', # 'Host': 'dcfm.eastmoney.com', 'Upgrade-Insecure-Requests': 1, 'User-Agent': r'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36' } # requests.packages.urllib3.disable_warnings() http = urllib3.PoolManager() try: # wx.info(" debug start GET URL ...") raw_data = http.request('GET', url, headers=header) # wx.info(" debug get Response URL ...") except Exception as e: return None finally: if raw_data.status >= 300: wx.info("Web response failed : {}".format(url)) return None # 获得html源码,utf-8解码 str_type = chardet.detect(raw_data.data) # unicode = raw_data.data.decode(str_type['encoding']) unicode = lg.str_decode(raw_data.data, str_type['encoding']) return unicode
def get_sh_notice(self, date_arr = []): wx = lg.get_handle() if len(date_arr) == 0 : start_date = (date.today()).strftime('%Y-%m-%d') end_date = start_date else: start_date = date_arr[0] end_date = date_arr[1] page_num = 1 header = { 'Cookie': 'yfx_c_g_u_id_10000042=_ck18112210334212135454572121490; yfx_mr_10000042=%3A%3Amarket_type_free_search%3A%3A%3A%3Abaidu%3A%3A%3A%3A%3A%3A%3A%3Awww.baidu.com%3A%3A%3A%3Apmf_from_free_search; yfx_key_10000042=; VISITED_COMPANY_CODE=%5B%22603017%22%2C%22600354%22%2C%22601975%22%2C%22600000%22%5D; VISITED_STOCK_CODE=%5B%22603017%22%2C%22600354%22%2C%22601975%22%2C%22600000%22%5D; seecookie=%5B601975%5D%3AST%u957F%u6CB9%2C%5B600000%5D%3A%u6D66%u53D1%u94F6%u884C; JSESSIONID=CA764F4C8465140437D5F6B868137460; yfx_f_l_v_t_10000042=f_t_1542854022203__r_t_1553650507322__v_t_1553651393256__r_c_23; VISITED_MENU=%5B%229055%22%2C%228536%22%2C%228451%22%2C%228453%22%2C%228454%22%2C%229057%22%2C%229062%22%2C%229056%22%2C%228466%22%2C%228523%22%2C%228528%22%5D', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36', 'Referer': 'http://www.sse.com.cn/assortment/stock/list/share/' } http = urllib3.PoolManager() notice_df = pd.DataFrame() while True: url = "http://query.sse.com.cn/security/stock/queryCompanyBulletin.do?" \ "jsonCallBack=jsonpCallback43958&isPagination=true&productId=&keyWord=&" \ "securityType=0101%2C120100%2C020100%2C020200%2C120200&reportType2=&" \ "reportType=ALL&beginDate=" + start_date + "&endDate=" + end_date + "&" \ "pageHelp.pageSize=100&pageHelp.pageCount=50&pageHelp.pageNo=" + str(page_num) + \ "&pageHelp.beginPage=" + str(page_num) + "&pageHelp.cacheSize=1&" \ "pageHelp.endPage=" + str(page_num) + "1&_=1581167068639" try: raw_data = http.request('GET', url, headers=header) except Exception as e: return None finally: if raw_data.status >= 300: return None # 获得html源码,utf-8解码 str_type = chardet.detect(raw_data.data) unicode = lg.str_decode(raw_data.data, str_type['encoding']) json_str = re.sub(r'jsonpCallback\w+\(', r'', unicode)[:-1] notice_data = self._sh_json_parse(json_str=json_str) if notice_data[0] == 0: wx.info("[上证公告获取][{}-{}]公告数量为[0],第{}页,退出 ".format(start_date, end_date, page_num)) break elif notice_df is None or len(notice_df) == 0 : notice_df = notice_data[1] else: notice_df = notice_df.append(notice_data[1]) wx.info("[上证公告获取][{}-{}]公告总数[{}],已获得第[{}]页,获取下一页". format(start_date, end_date, notice_data[0], page_num)) page_num += 1 # return unicode return notice_df
def get_json_str(self, id, time_str=None, page_num=0): wx = lg.get_handle() if time_str is None or len(time_str) < 11: wx.info("[RT_East][get_json_str] 时间段 不正确,退出") return None else: [begin_time_str, end_time_str] = time_str.split("-") begin_time_stamp = int( time.mktime( time.strptime(self.date_str + begin_time_str, '%Y%m%d%H:%M'))) end_time_stamp = int( time.mktime( time.strptime(self.date_str + end_time_str, '%Y%m%d%H:%M'))) my_timer = wx_timer(date_str='') ret_zone = my_timer.tell_time_zone(t_stamp=end_time_stamp) # 根据 end_time_stamp 获取 匹配的时间点,作为下次get_rt_data 的起始时间 record_stamp = ret_zone[2] # 用于 rebase 函数 # 如果ID没有查询过,设置 rt_page_dict[id] = 0,下面每次循环完成后,rt_page_dict[id]累加1 # if id not in self.rt_page_dict.keys(): # self.rt_page_dict[id] = 0 # 用于 get_rt_data 函数,从文件读取 起始页面序号 if page_num != 0: self.rt_page_dict[id] = page_num # 检查 RT 对象是否已经获取 end_time_stamp 之前的交易数据 if id in self.rt_dict_df.keys( ) and self.rt_dict_df[id].time_stamp.max() >= end_time_stamp: wx.info( "[RT_East][{}] RT 对象已保存 [{}--{}]逐笔交易数据,目标时间段[{}--{}]不需要重新获取". format( id, time.strftime( "%H:%M:%S", time.localtime(self.rt_dict_df[id].time_stamp.min())), time.strftime( "%H:%M:%S", time.localtime(self.rt_dict_df[id].time_stamp.max())), begin_time_str, end_time_str)) return None market_code_dict = { '60': ['1', '1'], '00': ['2', '0'], '30': ['2', '0'], '68': ['1', '0'] } while True: url = "http://push2ex.eastmoney.com/getStockFenShi?pagesize="+self.item_page+\ "&ut=7eea3edcaed734bea9cbfc24409ed989&dpt=wzfscj&" \ "cb=jQuery1123021130998143685753_1580471904475&pageindex="+str(self.rt_page_dict[id])+\ "&id="+id+ market_code_dict[id[0:2]][0]+"&" \ "sort=1&ft=1&code="+id+"&market="+market_code_dict[id[0:2]][1]+"&_=1580471904476" # sort =1 升序 ; 2 降序; header = { 'Cookie': 'UM_distinctid=16bf36d52242f3-0693469a5596d3-e323069-1fa400-16bf36d5225362; _ntes_nnid=16b2182ff532e10833492eedde0996df,1563157161323; _ntes_nuid=16b2182ff532e10833492eedde0996df; vjuids=e0fb8aa0.16d4ee83324.0.e074eccb150e; [email protected]|1570190476|0|mail163|00&99|hen&1570190062&mail163#CN&null#10#0#0|&0|mail163|[email protected]; [email protected]:-1:1; mail_psc_fingerprint=8da65e9cc5769a658a69962d94f7c46f; _ntes_usstock_recent_=NTES%7C; _ntes_usstock_recent_=NTES%7C; vjlast=1568986903.1571018378.11; s_n_f_l_n3=e119c348b08890ac1571018378289; NNSSPID=0e35f22546f44023b00d65e2a3ca1f26; ne_analysis_trace_id=1571018721010; _ntes_stock_recent_=1002699%7C0600000%7C1000573; _ntes_stock_recent_=1002699%7C0600000%7C1000573; _ntes_stock_recent_=1002699%7C0600000%7C1000573; pgr_n_f_l_n3=e119c348b08890ac1571018815386610; vinfo_n_f_l_n3=e119c348b08890ac.1.5.1563157161368.1570632456351.1571018833379', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36', 'Referer': 'http://quote.eastmoney.com/f1.html?code=' + id + '&market=2' } # requests.packages.urllib3.disable_warnings() http = urllib3.PoolManager() try: raw_data = http.request('GET', url, headers=header) except Exception as e: return None finally: if raw_data.status >= 300: wx.info("Web response failed : {}".format(url)) return None # 获得html源码,utf-8解码 str_type = chardet.detect(raw_data.data) # unicode = raw_data.data.decode(str_type['encoding']) unicode = lg.str_decode(raw_data.data, str_type['encoding']) # 解析 JSON 字符串,并将实时交易数据保存到 RT 对象的rt_dict_df [page_start_time_str, page_end_time_str] = self.json_parse(id=id, json_str=unicode) if page_start_time_str is not None: page_end_time_stamp = int( time.mktime( time.strptime(self.date_str + page_end_time_str, '%Y%m%d%H:%M:%S'))) page_start_time_stamp = int( time.mktime( time.strptime(self.date_str + page_start_time_str, '%Y%m%d%H:%M:%S'))) # 找到 record_stamp 的页面,记录到 self.record_page_dict[id] # 由get_rt_data 返回,并写入文件,作为下次 get_rt_data 读取的第一个页面序号 if time.strftime("%H:%M", time.localtime(record_stamp)) != '13:00': if page_end_time_stamp >= record_stamp and page_start_time_stamp <= record_stamp: self.record_page_dict[id] = self.rt_page_dict[id] else: # 遇到13:00 做特殊处理,11:30 之前所有页面的时间范围都不包含 13:00, 只需记录到最后一个页号 self.record_page_dict[id] = self.rt_page_dict[id] if page_end_time_stamp >= end_time_stamp: wx.info( "[RT_East][{}] 第{}页 [{}--{}]逐笔交易数据,已获得目标时间段数据".format( id, self.rt_page_dict[id], page_start_time_str, page_end_time_str)) # 页数累加,rebase 再次调用get_json_str从此页号开始查询 self.rt_page_dict[id] += 1 break else: wx.info("[RT_East][{}] 第{}页 [{}--{}]逐笔交易数据, 未完成,继续获取下一页数据". format(id, self.rt_page_dict[id], page_start_time_str, page_end_time_str)) # 页数累加,rebase 再次调用get_json_str从此页号开始查询 self.rt_page_dict[id] += 1 time.sleep(0.5) else: wx.info("[RT_East] [{}] 第{}页 没有数据,退出".format( id, self.rt_page_dict[id])) break return self.record_page_dict[id]