def _get_sina_json_dd_url(vol='0', type='0', num='10000', count=None): urllist = [] vol = str(vol) type = str(type) num = str(num) if count == None: url = ct.JSON_DD_CountURL % (ct.DD_VOL_List[vol], type) log.info("_json_dd_url:%s" % url) data = cct.get_url_data(url) # return [] # print data.find('abc') count = re.findall('(\d+)', data, re.S) log.debug("_json_dd_url_count:%s" % count) # print count if len(count) > 0: count = count[0] bigcount = getconfigBigCount(count, write=False) print("Big:%s V:%s " % (bigcount[0], bigcount[1])), if int(count) >= int(num): page_count = int(math.ceil(int(count) / int(num))) for page in range(1, page_count + 1): # print page url = ct.JSON_DD_Data_URL_Page % ( int(num), page, ct.DD_VOL_List[vol], type) urllist.append(url) else: url = ct.JSON_DD_Data_URL_Page % (count, '1', ct.DD_VOL_List[vol], type) urllist.append(url) else: log.error("url Count error:%s count:%s" % (url, count)) return [] else: url = ct.JSON_DD_CountURL % (ct.DD_VOL_List[vol], type) # print url data = cct.get_url_data(url) # print data count_now = re.findall('(\d+)', data, re.S) urllist = [] if count < count_now: count_diff = int(count_now) - int(count) if int(math.ceil(int(count_diff) / 10000)) >= 1: page_start = int(math.ceil(int(count) / 10000)) page_end = int(math.ceil(int(count_now) / 10000)) for page in range(page_start, page_end + 1): # print page url = ct.JSON_DD_Data_URL_Page % ( '10000', page, ct.DD_VOL_List[vol], type) urllist.append(url) else: page = int(math.ceil(int(count_now) / 10000)) url = ct.JSON_DD_Data_URL_Page % ('10000', page, ct.DD_VOL_List[vol], type) urllist.append(url) # print "url:",urllist[:0] return urllist
def sina_json_Big_Count(vol='1', type='0', num='10000'): """[summary] [description] Parameters ---------- vol : {str}, optional [description] (the default is '1', which [default_description]) type : {str}, optional [description] (the default is '0', which [default_description]) num : {str}, optional [description] (the default is '10000', which [default_description]) Returns ------- [type] [description] """ url = ct.JSON_DD_CountURL % (ct.DD_VOL_List[vol], type) log.info("Big_Count_url:%s"%url) data = cct.get_url_data(url) count = re.findall('(\d+)', data, re.S) log.debug("Big_Count_count:%s"%count) if len(count) > 0: count = count[0] else: count = 0 return count
def _get_sina_Market_url(market='sh_a', count=None, num='1000'): num = str(num) if count == None: url = ct.JSON_Market_Center_CountURL % (market) # print url data = cct.get_url_data(url,timeout=10) # print data count = re.findall('(\d+)', data, re.S) urllist = [] if len(count) > 0: count = count[0] if int(count) >= int(num): page_count = int(math.ceil(int(count) / int(num))) for page in range(1, page_count + 1): # print page url = ct.JSON_Market_Center_RealURL % (page, num, market) # print "url",url urllist.append(url) else: url = ct.JSON_Market_Center_RealURL % ('1', count, market) urllist.append(url) # print "%s count: %s"%(market,count), # print urllist[0], return urllist
def get_tzrq(url, today): global rzrqCount url = url % today if rzrqCount < 3: data = cct.get_url_data(url) # data = cct.get_url_data_R(url) if len(data) < 1: rzrqCount += 1 vol_l = [] # vollist=re.findall('{data:(\d+)',code) else: vol_l = re.findall('\"([\d\D]+?)\"', data) else: vol_l = [] # print vol_l dd = {} # print vol_l # print len(vol_l) if len(vol_l) == 3: data = vol_l[0].split(',') data2 = vol_l[1].split(',') dataall = vol_l[2].split(',') dd['sh'] = round(float(data[5]) / 100000000, 1) if len(data[5]) > 0 else 0 dd['sz'] = round(float(data2[5]) / 100000000, 1) if len(data2[5]) > 0 else 0 dd['all'] = round(float(dataall[5]) / 100000000, 1) if len(dataall[5]) > 0 else 0 return dd
def _parsing_sina_dd_price_json(url): """ 处理当日行情分页数据,格式为json Parameters ------ pageNum:页码 return ------- DataFrame 当日所有股票交易数据(DataFrame) """ ct._write_console() # request = Request(ct.SINA_DAY_PRICE_URL%(ct.P_TYPE['http'], ct.DOMAINS['vsf'], # ct.PAGES['jv'], pageNum)) # request = Request(url) # text = urlopen(request, timeout=10).read() # sinaheader = {'Referer':'http://vip.stock.finance.sina.com.cn'} text = cct.get_url_data(url, headers=sinaheader) # print(len(text)) # return text if len(text) < 10 or text.find('*****@*****.**') > 0: return '' #2020 new json text = text.replace('symbol', 'code') # text = text.replace('turnoverratio', 'ratio') # text.decode('unicode-escape') js = json.loads(text, encoding='GBK') # df = pd.DataFrame(pd.read_json(js, dtype={'code':object}),columns=ct.MARKET_COLUMNS) log.debug("parsing_sina_dd:%s" % js[0]) df = pd.DataFrame(js, columns=ct.DAY_REAL_DD_COLUMNS) #20200422 problem json ''' reg = re.compile(r'\,(.*?)\:') text = reg.sub(r',"\1":', text.decode('gbk') if ct.PY3 else text) text = text.replace('"{symbol', '{"code') text = text.replace('{symbol', '{"code"') if ct.PY3: jstr = json.dumps(text) else: # jstr = json.dumps(text, encoding='GBK') jstr = json.dumps(text) js = json.loads(jstr) df = pd.DataFrame(pd.read_json(js, dtype={'code': object}), columns=ct.DAY_REAL_DD_COLUMNS) ''' df = df.drop('symbol', axis=1) df = df.ix[df.volume > 0] # print "" # print df['name'][len(df.index)-1:],len(df.index) return df
def get_sina_tick_js_LastPrice(symbols): symbols_list = '' if len(symbols) == 0: return '' if isinstance(symbols, list) or isinstance(symbols, set) or isinstance( symbols, tuple) or isinstance(symbols, pd.Series): for code in symbols: symbols_list += cct.code_to_symbol(code) + ',' else: symbols_list = cct.code_to_symbol(symbols) # print symbol_str url = "http://hq.sinajs.cn/list=%s" % (symbols_list) # print url data = cct.get_url_data(url) # vollist=re.findall('{data:(\d+)',code) # print data ulist = data.split(";") price_dict = {} for var in range(0, len(ulist) - 1): # print var if len(ulist) == 2: code = symbols else: code = symbols[var] tempData = re.search('''(")(.+)(")''', ulist[var]).group(2) stockInfo = tempData.split(",") # stockName = stockInfo[0] #名称 # stockStart = stockInfo[1] #开盘 stockLastEnd = stockInfo[2] #昨收盘 # stockCur = stockInfo[3] #当前 # stockMax = stockInfo[4] #最高 # stockMin = stockInfo[5] #最低 # price_dict[code]=stockLastEnd price_dict[code] = float(stockLastEnd) # stockUp = round(float(stockCur) - float(stockLastEnd), 2) # stockRange = round(float(stockUp) / float(stockLastEnd), 4) * 100 # stockVolume = round(float(stockInfo[8]) / (100 * 10000), 2) # stockMoney = round(float(stockInfo[9]) / (100000000), 2) # stockTime = stockInfo[31] # dd={} return price_dict
def get_dfcfw_rzrq_SHSZ(url=ct.DFCFW_RZYE): data = {} log.info("rzrq:%s" % (ct.DFCFW_RZYE)) # rzdata = cct.get_url_data(url) # rzdata = cct.get_url_data_R(url,timeout=10) rzdata = cct.get_url_data(url, timeout=10) rz_dic = re.findall('"data":([\D\d]+.}])', rzdata.encode('utf8'))[0] # rz_dic = rz_dic.replace(';', '') # ct.DFCFW_RZYE2sh rzdata_dic = json.loads(rz_dic) df = pd.DataFrame(rzdata_dic, columns=ct.dfcfw_rzye_col2022) # rzdata_list=(rzdata_dic['result']['data']) # df=pd.DataFrame(rzdata_list,columns=ct.dfcfw_rzye_col2022) # rzdata = rzdata.replace(':"-"',':0.1') # rz_dic = re.findall('{"tdate"[\D\d]+?}', rzdata.encode('utf8')) # rzdict=[eval(x) for x in rz_dic ] # df=pd.DataFrame(rzdict,columns=ct.dfcfw_rzye_columns) # df.tdate=df.tdate.apply(lambda x: x[:10]) # df = df.set_index('tdate') # df.index = pd.to_datetime(df.index,format='%Y-%m-%d') # df.rename(columns={'rzye_hs': 'all'}, inplace=True) # df.rename(columns={'rzye_h': 'sh'}, inplace=True) # df.rename(columns={'rzye_s': 'sz'}, inplace=True) df.rename(columns={'RZYE': 'all'}, inplace=True) df.rename(columns={'H_RZYE': 'sh'}, inplace=True) df.rename(columns={'H_RQYL': 'sz'}, inplace=True) df['DIM_DATE'] = df['DIM_DATE'].apply(lambda x: x[:10]) df = df.set_index('DIM_DATE') df['all'] = df['all'].apply(lambda x: round((x / 1000 / 1000 / 100), 2)) df['sh'] = df['sh'].apply(lambda x: round((x / 1000 / 1000 / 100), 2)) df['sz'] = df['sz'].apply(lambda x: round((x / 1000 / 1000 / 1), 2)) # data=get_tzrq(url,today) # yestoday = cct.last_tddate(1) # log.debug(today) # beforeyesterday = cct.last_tddate(days=2) def get_days_data(days=1, df=None): rzrq_status = 1 # data='' da = 0 i = 0 data2 = '' while rzrq_status: for x in range(1, 20): yestoday = cct.last_tddate(x) # print("yestoday:%s"%(yestoday)) if yestoday in df.index: data2 = df.loc[yestoday] # log.info("yestoday:%s data:%s" % (yestoday, data2)) days -= 1 if days == 0: break # print da else: log.error("%s:None" % (yestoday)) rzrq_status = 0 return data2 # data = df.loc[yestoday] # data2 = df.loc[beforeyesterday] # log.info("data1:%s,data2:%s", data1, data2) if len(df) > 0: data1 = get_days_data(1, df) data2 = get_days_data(2, df) # print data1 data['all'] = round(data1.loc['all'], 2) data['sh'] = round(data1.loc['sh'], 2) data['sz'] = round(data1.loc['sz'], 2) data['dff'] = round(data1.loc['all'] - data2.loc['all'], 2) data['shrz'] = round(data1.loc['sh'] - data2.loc['sh'], 2) data['szrz'] = round(data1.loc['sz'] - data2.loc['sz'], 2) else: log.debug("df is None:%s" % (url)) data['dff'] = 'error' data['all'] = 0 data['sh'] = 0 data['sz'] = 0 data['shrz'] = 0 data['szrz'] = 0 if len(data) == 0: log.error("Fund_f NO Url:%s" % url) return data
def get_dfcfw_rzrq_SHSZ(url=ct.DFCFW_RZYE): data = {} log.info("rzrq:%s" % (ct.DFCFW_RZYE)) # rzdata = cct.get_url_data(url) # rzdata = cct.get_url_data_R(url,timeout=10) rzdata = cct.get_url_data(url, timeout=10) # import pdb;pdb.set_trace() rzdata = rzdata.replace(':"-"', ':0.1') # rz_dic = re.findall('{"tdate"[\D\d]+?}', rzdata.encode('utf8')) rz_dic = re.findall('{"tdate"[\D\d]+?}', rzdata) rzdict = [eval(x) for x in rz_dic] df = pd.DataFrame(rzdict, columns=ct.dfcfw_rzye_columns) df.tdate = df.tdate.apply(lambda x: x[:10]) df = df.set_index('tdate') # df.index = pd.to_datetime(df.index,format='%Y-%m-%d') df.rename(columns={'rzye_hs': 'all'}, inplace=True) df.rename(columns={'rzye_h': 'sh'}, inplace=True) df.rename(columns={'rzye_s': 'sz'}, inplace=True) df['all'] = df['all'].apply(lambda x: round((x / 1000 / 1000 / 100), 2)) df['sh'] = df['sh'].apply(lambda x: round((x / 1000 / 1000 / 100), 2)) df['sz'] = df['sz'].apply(lambda x: round((x / 1000 / 1000 / 100), 2)) # data=get_tzrq(url,today) # yestoday = cct.last_tddate(1) # log.debug(today) # beforeyesterday = cct.last_tddate(days=2) def get_days_data(days=1, df=None): rzrq_status = 1 # data='' da = 0 i = 0 data2 = '' while rzrq_status: for x in range(days, 20): yestoday = cct.last_tddate(x) if yestoday in df.index: data2 = df.loc[yestoday] # log.info("yestoday:%s data:%s" % (yestoday, data2)) break # print da else: log.error("%s:None" % (yestoday)) rzrq_status = 0 return data2 data1 = get_days_data(1, df) data2 = get_days_data(2, df) # data = df.loc[yestoday] # data2 = df.loc[beforeyesterday] # log.info("data1:%s,data2:%s", data1, data2) if len(data2) > 0: # print data1 data['all'] = round(data1.loc['all'], 2) data['sh'] = round(data1.loc['sh'], 2) data['sz'] = round(data1.loc['sz'], 2) data['dff'] = round(data1.loc['all'] - data2.loc['all'], 2) data['shrz'] = round(data1.loc['sh'] - data2.loc['sh'], 2) data['szrz'] = round(data1.loc['sz'] - data2.loc['sz'], 2) else: log.error("data2:%s" % (data2)) data['dff'] = 'error' data['all'] = 0 data['sh'] = 0 data['sz'] = 0 data['shrz'] = 0 data['szrz'] = 0 if len(data) == 0: log.error("Fund_f NO Url:%s" % url) return data