def get_last_n_dailybars(symbol, n): end_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") VAR = md.get_last_n_dailybars(symbol, n, end_time) z = len(VAR) var = [] for i in range(z): var.append(VAR[z-1-i]) ret = bar_topd(var,'date') return ret
def get_last_n_dailybars(symbol, n): end_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") VAR = md.get_last_n_dailybars(symbol, n, end_time) z = len(VAR) var = [] for i in range(z): var.append(VAR[z - 1 - i]) ret = bar_topd(var, 'date') return ret
def read_last_n_kline(symbol_list, weeks_in_seconds, count, end_time): # 连接本地终端时,td_addr为localhost:8001, if (td.init('*****@*****.**', 'zyj2590@1109', 'strategy_1') == 0): # 类结构体转成dataframe columns = [ 'endtime', 'open', 'high', 'low', 'close', 'volume', 'amount' ] bars = 0 is_daily = (weeks_in_seconds == 240 * 60) data_list = [] # pd.DataFrame(None, columns=columns) ''' todo 整批股票读取有问题,数据取不全,放弃 stocks = '' for x in symbol_list: stocks+=','+x read_days=int(count*weeks_in_seconds/240/60)+1 start_date=md.get_calendar('SZSE', datetime.datetime.strptime(end_time, '%Y-%m-%d %H:%M:%S') -datetime.timedelta(days=read_days),end_time)[0].strtime start_date=start_date[:10] +' 09:30:00' while start_date<end_time: bars=md.get_bars(stocks[1:], weeks_in_seconds, start_date, end_time) ''' for stock in symbol_list: #now = '[{0}] read k line'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) #print(now,stock) kdata = [] # 返回结果是bar类数组 if is_daily: bars = md.get_last_n_dailybars(stock, count, end_time) else: bars = md.get_last_n_bars(stock, weeks_in_seconds, count, end_time) for bar in bars: if is_daily: kdata.append([ int(bar.utc_time), bar.open, bar.high, bar.low, bar.close, bar.volume, bar.amount ]) else: kdata.append([ int(bar.utc_endtime), bar.open, bar.high, bar.low, bar.close, bar.volume, bar.amount ]) if len(bars) > 0: kdata = pd.DataFrame(kdata, columns=columns) kdata = kdata.sort_values(by='endtime', ascending=False) data_list.append({'code': stock, 'kdata': kdata}) return data_list
def holdlist_format(self, date=None, prctype='close', outdir=None, source='wind'): """ 提取标准格式, 与get_totval 平行,不会互相调用 """ if date is None: date = dt.datetime.today() if source == 'wind': ####### 万得数据源 ####### w.start() if date is None: date = dt.datetime.today() elif source == 'gm': ###### 掘金数据源 ####### md.init('18201141877', 'Wqxl7309') if prctype == 'settle': prctype = 'settle_price' # 转为掘金格式 holdnum = self.get_holdnum(date=date) holding = pd.DataFrame() for strat in self._logdir: stratinfo = strat.split('_') cttype = stratinfo[1].upper() montype = stratinfo[0] name = RawHoldingFutures.get_contracts_ours(date=date, cttype=cttype)[montype] code = RawHoldingStocks.addfix(name) num = holdnum[strat] multi = self._multiplier[cttype] if source == 'wind': ############# wind 数据源 prc = w.wsd('.'.join([name, 'CFE']), prctype, date, date).Data[0][0] elif source == 'gm': ######### 掘金数据 lastbar = md.get_last_n_dailybars( symbol='.'.join(['CFFEX', name]), n=1, end_time=date.strftime('%Y-%m-%d'))[0] prc = eval('.'.join(['lastbar', prctype])) #### 紧急措施 手动 # prc = 6069.2 if name=='IC1707' else 5880.6 holdlist = pd.DataFrame( [[code, name, num, multi, prc]], columns=['code', 'name', 'num', 'multi', 'prc']) #holdlist['val'] = holdlist['num']*holdlist['multi']*holdlist['prc'] holding = holding.append(holdlist, ignore_index=True) holding = holding[holding['num'] != 0] if outdir: holding.to_csv(outdir, header=True, index=False) else: return holding
r = md.get_last_n_bars('SHSE.600000',60,10) print('get_last_n_bars(10): ', len(r)) #提取日频数据 r = md.get_dailybars( 'SHSE.600000', '2015-05-01 00:00:00', '2015-05-20 23:59:59') print('get_dailybars: ', len(r)) #提取dailybar快照 r = md.get_last_dailybars('SHSE.600000,') print('get_last_dailybars: ', len(r)) #提取最新N笔dailybar数据 r = md.get_last_n_dailybars('SHSE.600000', 10) print('get_last_n_dailybars(10): ', len(r)) #提取交易代码 r = md.get_instruments('SHSE', 1, 1) print('get_instruments: ', len(r)) #根据期货品种提取交易代码 r = md.get_instruments_by_name('ag') print('get_instruments_by_name', len(r)) #提取指数的成分股代码 r = md.get_constituents('SHSE.000001') print('get_constituents', len(r)) #按时间周期提取FinancialIndex
def check_update(self,checkDate,outputPath = r'E:\stocks_data_min\StocksMinDB\check_reports'): """ 对比 checkDate 对应的两个数据库,应满足: 当天股票数量一致 每只股票 Kbar 数量一致 两个库的交易日期一致 输出:report 当日较前日比,增加的新股 当日股票数量 是否有更新失败的股票,如果有是哪些 """ start = time.time() if isinstance(checkDate,dt.datetime): checkDate = checkDate.strftime('%Y%m%d') today = dt.datetime.today().strftime('%Y%m%d') assert checkDate>='19990726' and checkDate<=today print('***************************') print('Checking data base on date {}'.format(checkDate)) print('***************************') bars = md.get_last_n_dailybars('SHSE.000001', 2, end_time=checkDate) preDate = ''.join(bars[1].strtime.split('T')[0].split('-')) ###### check by_day ###### connByDay = self.byDayDb._getConn_() cursorByDay = connByDay.cursor() ### trd dates ### cursorByDay.execute('SELECT date FROM trddates') byDayTrddates = set([trd[0] for trd in cursorByDay.fetchall()]) cursorByDay.execute('SELECT stkcd,count(*) FROM stkmin_{} GROUP BY stkcd'.format(checkDate)) ### stocks ### byDayBars = pd.DataFrame(cursorByDay.fetchall(),columns=['stkcd','barnum']) checkDateStocks = set(byDayBars['stkcd'].values) if checkDate>'19990726': cursorByDay.execute('SELECT DISTINCT stkcd FROM stkmin_{}'.format(preDate)) preDateStocks = set([stk[0] for stk in cursorByDay.fetchall()]) else: preDateStocks = set() newStocks = checkDateStocks - preDateStocks print('\n[+]{0} new stocks listed on date {1}'.format(len(newStocks),checkDate)) print(newStocks) ###### check by_stk ###### connByStk = self.byStkDb._getConn_() cursorByStk = connByStk.cursor() ### trd dates ### cursorByStk.execute('SELECT date FROM trddates') byStkTrddates = set([trd[0] for trd in cursorByStk.fetchall()]) moreTrdByDay = byDayTrddates - byStkTrddates moreTrdByStk = byStkTrddates - byDayTrddates if (moreTrdByDay | moreTrdByStk): print('\n[-]Different trdate dates') print('more in by day:',moreTrdByDay) print('more in by stk:',moreTrdByStk) else: print('\n[+]Trade dates matched between two databases') ### stocks ### cursorByStk.execute('SHOW TABLES') allStocks = set([int(tb[0].split('_')[1][2:]) for tb in cursorByStk.fetchall() if tb[0]!='trddates']) lostNewStocks = newStocks - allStocks if lostNewStocks: print('\n[-]Following new stocks NOT updated in stocks_data_min_by_stock') print(lostNewStocks) else: print('\n[+]New stocks matched between two databases on date {}'.format(checkDate)) lostCheckDateStocks = checkDateStocks - allStocks if lostCheckDateStocks: print('\n[-]Following stocks NOT updated in stocks_data_min_by_stock on date {}'.format(checkDate)) print(lostCheckDateStocks) else: print('\n[+]All stocks matched between two databases on date {}'.format(checkDate)) ### check each stock ### missBarStocks = [] for stk in sorted(list(checkDateStocks)): # stkstr = str(stk) # if stk>=600000: # stkstr = 'sh'+stkstr # else: # stklen = len(stkstr) # if stklen<6: # stkstr = 'sz'+'0'*(6-stklen)+stkstr # else: # stkstr = 'sz'+stkstr stkstr = self.stkcd_int_trans(stkint=stk) cursorByStk.execute('SELECT count(*) FROM stkmin_{0} WHERE date={1}'.format(stkstr,checkDate)) barnumByStk = cursorByStk.fetchall()[0][0] barnumByDay = byDayBars.loc[byDayBars['stkcd']==stk,'barnum'].values[0] if barnumByDay==barnumByStk: print('[+]Stock {0} bar num matched on date {1}'.format(stkstr,checkDate)) else: missBarStocks.append([stk,barnumByStk,barnumByDay]) if missBarStocks: print('\n[-] Missing bars between two databases on date {}'.format(checkDate)) missTable = pd.DataFrame(missBarStocks,columns=['stkcd','barnumByStk','barnumByDay']) print(missTable) missTable.to_csv(os.path.join(outputPath,'check_report_{}.csv'.format(checkDate)),index=False) else: print('\n[+] All bars of {0} stocks matched between two databases on date {1}'.format(len(checkDateStocks),checkDate)) print('\nCheck finished on date {0} with {1} seconds'.format(checkDate,time.time()-start))
r = md.get_bars( 'CFFEX.IF1512', 60, '2015-05-01 09:30:00', '2015-05-10 09:31:00', ) print('get_bars: ', len(r)) r = md.get_last_bars('CFFEX.IF1512,', 60) print('get_last_bars: ', len(r)) r = md.get_last_n_bars( 'CFFEX.IF1512', 60, 10) print('get_last_n_bars(10): ', len(r)) r = md.get_dailybars( 'CFFEX.IF1512', '2015-05-01 00:00:00', '2015-05-20 23:59:59') print('get_dailybars: ', len(r)) r = md.get_last_dailybars('CFFEX.IF1512,') print('get_last_dailybars: ', len(r)) r = md.get_last_n_dailybars('CFFEX.IF1512', 10) print('get_last_n_dailybars(10): ', len(r)) input()
def check_update( self, checkDate, outputPath=r'E:\stocks_data_min\StocksMinDB\check_reports'): """ 对比 checkDate 对应的两个数据库,应满足: 当天股票数量一致 每只股票 Kbar 数量一致 两个库的交易日期一致 输出:report 当日较前日比,增加的新股 当日股票数量 是否有更新失败的股票,如果有是哪些 """ start = time.time() if isinstance(checkDate, dt.datetime): checkDate = checkDate.strftime('%Y%m%d') today = dt.datetime.today().strftime('%Y%m%d') assert checkDate >= '19990726' and checkDate <= today print('***************************') print('Checking data base on date {}'.format(checkDate)) print('***************************') bars = md.get_last_n_dailybars('SHSE.000001', 2, end_time=checkDate) preDate = ''.join(bars[1].strtime.split('T')[0].split('-')) ###### check by_day ###### connByDay = self.byDayDb._getConn_() cursorByDay = connByDay.cursor() ### trd dates ### cursorByDay.execute('SELECT date FROM trddates') byDayTrddates = set([trd[0] for trd in cursorByDay.fetchall()]) cursorByDay.execute( 'SELECT stkcd,count(*) FROM stkmin_{} GROUP BY stkcd'.format( checkDate)) ### stocks ### byDayBars = pd.DataFrame(cursorByDay.fetchall(), columns=['stkcd', 'barnum']) checkDateStocks = set(byDayBars['stkcd'].values) if checkDate > '19990726': cursorByDay.execute( 'SELECT DISTINCT stkcd FROM stkmin_{}'.format(preDate)) preDateStocks = set([stk[0] for stk in cursorByDay.fetchall()]) else: preDateStocks = set() newStocks = checkDateStocks - preDateStocks print('\n[+]{0} new stocks listed on date {1}'.format( len(newStocks), checkDate)) print(newStocks) ###### check by_stk ###### connByStk = self.byStkDb._getConn_() cursorByStk = connByStk.cursor() ### trd dates ### cursorByStk.execute('SELECT date FROM trddates') byStkTrddates = set([trd[0] for trd in cursorByStk.fetchall()]) moreTrdByDay = byDayTrddates - byStkTrddates moreTrdByStk = byStkTrddates - byDayTrddates if (moreTrdByDay | moreTrdByStk): print('\n[-]Different trdate dates') print('more in by day:', moreTrdByDay) print('more in by stk:', moreTrdByStk) else: print('\n[+]Trade dates matched between two databases') ### stocks ### cursorByStk.execute('SHOW TABLES') allStocks = set([ int(tb[0].split('_')[1][2:]) for tb in cursorByStk.fetchall() if tb[0] != 'trddates' ]) lostNewStocks = newStocks - allStocks if lostNewStocks: print( '\n[-]Following new stocks NOT updated in stocks_data_min_by_stock' ) print(lostNewStocks) else: print('\n[+]New stocks matched between two databases on date {}'. format(checkDate)) lostCheckDateStocks = checkDateStocks - allStocks if lostCheckDateStocks: print( '\n[-]Following stocks NOT updated in stocks_data_min_by_stock on date {}' .format(checkDate)) print(lostCheckDateStocks) else: print('\n[+]All stocks matched between two databases on date {}'. format(checkDate)) ### check each stock ### missBarStocks = [] for stk in sorted(list(checkDateStocks)): # stkstr = str(stk) # if stk>=600000: # stkstr = 'sh'+stkstr # else: # stklen = len(stkstr) # if stklen<6: # stkstr = 'sz'+'0'*(6-stklen)+stkstr # else: # stkstr = 'sz'+stkstr stkstr = self.stkcd_int_trans(stkint=stk) cursorByStk.execute( 'SELECT count(*) FROM stkmin_{0} WHERE date={1}'.format( stkstr, checkDate)) barnumByStk = cursorByStk.fetchall()[0][0] barnumByDay = byDayBars.loc[byDayBars['stkcd'] == stk, 'barnum'].values[0] if barnumByDay == barnumByStk: print('[+]Stock {0} bar num matched on date {1}'.format( stkstr, checkDate)) else: missBarStocks.append([stk, barnumByStk, barnumByDay]) if missBarStocks: print('\n[-] Missing bars between two databases on date {}'.format( checkDate)) missTable = pd.DataFrame( missBarStocks, columns=['stkcd', 'barnumByStk', 'barnumByDay']) print(missTable) missTable.to_csv(os.path.join( outputPath, 'check_report_{}.csv'.format(checkDate)), index=False) else: print( '\n[+] All bars of {0} stocks matched between two databases on date {1}' .format(len(checkDateStocks), checkDate)) print('\nCheck finished on date {0} with {1} seconds'.format( checkDate, time.time() - start))
def updatePal(palPath=None): start = time.time() md.init('18201141877', 'Wqxl7309') if not w.isconnected(): w.start() palPath = r'E:\bqfcts\bqfcts\data\Paltest' if palPath is None else palPath tempFilePath = os.path.join(palPath,'temp_files') if not os.path.exists(tempFilePath): os.mkdir(tempFilePath) matName = 'data_20150701_now.mat' savedPal = h5py.File(os.path.join(palPath,matName)) # print(read_cell(savedPal,'sec_names')) nextTrd = dt.datetime.strptime(str(int(savedPal['nexttrd'][0][0])),'%Y%m%d') nextTrdStr = nextTrd.strftime('%Y-%m-%d') updateTime = dt.datetime(nextTrd.year,nextTrd.month,nextTrd.day,15,30,0) if updateTime > dt.datetime.now(): print('not update time yet') return else: availableDateStr = md.get_last_dailybars('SHSE.000001')[0].strtime[:10] if int(availableDateStr.replace('-','')) <= int(nextTrdStr.replace('-','')): print('new data not avaliable yet') return else: print('will update from {0} to {1}'.format(nextTrdStr,availableDateStr)) betweenDays = [tdt.strtime[:10] for tdt in md.get_calendar('SHSE',nextTrdStr,availableDateStr)] if nextTrdStr!=availableDateStr: # 避免同一日期重复 betweenDays.append(availableDateStr) betweenDaysNumber = [int(tdt.replace('-','')) for tdt in betweenDays] newDateNum = len(betweenDaysNumber) # 更新前 先备份数据 backupPath = os.path.join(palPath,'backup') cpResult = os.system(r'COPY {0} {1} /Y'.format(os.path.join(palPath,matName),os.path.join(backupPath,matName))) assert cpResult==0,'backup failed' gmDateFmt = 'yyyy-mm-dd' # update indice indiceNames = ['sh','hs300','zz500','sz50'] indiceCodes = ['000001','000300','000905','000016'] symbols = ','.join(['SHSE.{}'.format(sbl) for sbl in indiceCodes]) indiceBars = md.get_dailybars(symbols,nextTrdStr,availableDateStr) for dumi,idx in enumerate(indiceNames): bars = indiceBars[dumi::4] idxret = np.array([bar.close for bar in bars])/np.array([bar.pre_close for bar in bars]) - 1 idxArray = np.array([betweenDaysNumber, [bar.open for bar in bars], [bar.high for bar in bars], [bar.low for bar in bars], [bar.close for bar in bars], [bar.volume for bar in bars], [bar.amount for bar in bars], idxret ]) # newIndex = np.column_stack([savedPal['index_{}'.format(idx)][:], idxArray]) pd.DataFrame(np.transpose(idxArray)).to_csv(os.path.join(tempFilePath,'index_{}.csv'.format(idx)),index=False,header=False) # update stock info nCut = savedPal['N_cut'][0][0] # 6000 nEnd = savedPal['N_end'][0][0] # last end date id ex.6732 stockNames = read_cell(savedPal, 'stockname') savedStkcdsGM = ['.'.join([stk[-2:]+'SE',stk[:6]]) for stk in stockNames] savedStkNum = len(stockNames) listedStkcdsWind = w.wset('sectorconstituent','date={};sectorid=a001010100000000'.format(availableDateStr)).Data[1] newStkcdsWind = sorted(list(set(listedStkcdsWind) - set(stockNames))) if newStkcdsWind: stockNames.extend( newStkcdsWind ) newStkIpos = [int(tdt.strftime('%Y%m%d')) for tdt in w.wss(newStkcdsWind, 'ipo_date').Data[0]] newIpoIds = [(w.tdayscount(nextTrd,str(ipo)).Data[0][0]+nEnd) for ipo in newStkIpos] newStockip = pd.DataFrame([[int(newStkcdsWind[dumi][:6]), newStkIpos[dumi], newIpoIds[dumi],0,0,0,0,0] for dumi in range(len(newStkcdsWind))]) newStockip.to_csv( os.path.join(tempFilePath,'stockip.csv'),index=False,header=False ) else: pd.DataFrame([]).to_csv(os.path.join(tempFilePath, 'stockip.csv'), index=False, header=False) newStkcdsGm = ['.'.join([stk[-2:]+'SE',stk[:6]]) for stk in newStkcdsWind] allStkcdsGM = savedStkcdsGM + newStkcdsGm # 全体股票包含已退市 与pal行数相同 # allSecNames = pd.DataFrame(w.wss(stockNames,'sec_name').Data[0]) allInstruments = md.get_instruments('SZSE', 1, 0) + md.get_instruments('SHSE', 1, 0) allInstrumentsDF = pd.DataFrame([[inds.symbol, inds.sec_name] for inds in allInstruments],columns=['symbol','sec_name']).set_index('symbol') allSecNames = allInstrumentsDF.loc[allStkcdsGM,'sec_name'] allSecNames.to_csv( os.path.join(tempFilePath, 'sec_names.csv'), index=False, header=False ) pd.DataFrame(newStkcdsWind).to_csv( os.path.join(tempFilePath, 'stockname.csv'), index=False, header=False ) # update trade info pages = ['date','open','high','low','close','volume','amount','pctchg','flow_a_share','total_share','adjfct','adjprc','isst'] newPal = {} for page in pages: newPal[page] = pd.DataFrame(np.zeros([len(allStkcdsGM), newDateNum]),index=allStkcdsGM,columns=betweenDays) lastPal = pd.DataFrame(savedPal['Pal'][:,-1,:],columns=savedStkcdsGM) barsDaily = md.get_dailybars(','.join(allStkcdsGM), nextTrdStr, availableDateStr) for bar in barsDaily: tdt = bar.strtime[:10] stk = '.'.join([bar.exchange,bar.sec_id]) newPal['date'].loc[stk, tdt] = int(tdt.replace('-','')) newPal['open'].loc[stk, tdt] = bar.open newPal['high'].loc[stk, tdt] = bar.high newPal['low'].loc[stk, tdt] = bar.low newPal['close'].loc[stk, tdt] = bar.close newPal['volume'].loc[stk, tdt] = bar.volume newPal['amount'].loc[stk, tdt] = bar.amount newPal['pctchg'].loc[stk, tdt] = bar.close/bar.pre_close - 1 # 计算自算复权因子 : 前一日收盘价*(1+当日收益率)/当日收盘价 s.t. (当日收盘价*当日复权因子)/前一日收盘价 = 1+ret # 若当日没有交易 : 沿用前一日 复权因子 循环外处理 # 若前一日没有交易 前一日收盘价 特殊处理: # 当日有交易 : 取停牌前最后一个交易日的 收盘价 # 当日没交易 没有退市 : 沿用前一日复权因子 循环外处理 # 当日没交易 已经退市 : 沿用前一日复权因子 循环外处理 # 若新股上市第一天 : 复权因子为1 if stk in newStkcdsGm: newPal['adjfct'].loc[stk, tdt] = 1 else: noTrdLast = (lastPal.loc[0, stk] == 0) if tdt == nextTrdStr else (newPal['date'].loc[stk, betweenDays[betweenDays.index(tdt) - 1]] == 0) if noTrdLast: # 前一日没交易 今日有交易(否则不应出现在bars里面) lastBar = md.get_last_n_dailybars(stk, 2, end_time=tdt)[-1] newPal['adjfct'].loc[stk, tdt] = lastPal.loc[15, stk] * lastBar.close * (1 + newPal['pctchg'].loc[stk, tdt]) / bar.close else: preClose = lastPal.loc[4,stk] if tdt==nextTrdStr else newPal['close'].loc[stk,betweenDays[betweenDays.index(tdt)-1]] newPal['adjfct'].loc[stk, tdt] = lastPal.loc[15, stk] * preClose * (1 + newPal['pctchg'].loc[stk, tdt]) / bar.close for dumi,tdt in enumerate(betweenDays): idx = newPal['adjfct'].loc[:,tdt]==0 idx = idx.values if tdt==nextTrdStr: newPal['adjfct'].loc[idx[:savedStkNum], tdt] = lastPal.loc[15,:].values[idx[:savedStkNum]] else: newPal['adjfct'].loc[idx, tdt] = newPal['adjfct'].loc[idx, betweenDays[dumi-1]] newPal['adjprc'] = newPal['adjfct']*newPal['close'] shareBar = md.get_share_index(','.join(allStkcdsGM), nextTrdStr, availableDateStr) for bar in shareBar: tdt = bar.pub_date stk = bar.symbol newPal['flow_a_share'].loc[stk, tdt] = bar.flow_a_share newPal['total_share'].loc[stk, tdt] = bar.total_share isST = np.array([int('ST' in sn) for sn in allSecNames.values]) newPal['isst'] = pd.DataFrame(np.repeat(np.reshape(isST,(isST.shape[0],1)),len(betweenDays),axis=1), index=allStkcdsGM, columns=betweenDays) for page in newPal: newPal[page].to_csv(os.path.join(tempFilePath,'{}.csv'.format(page)),index=False,header=False ) print('Pal temp files update finished with {0} stocks and {1} days in {2} seconds '.format(len(newStkcdsWind),len(betweenDays),time.time() - start))