def recordResampleTickTransactionDataFromSqlServer(self,code,startDate,endDate): #获取股票日线数据,筛选出非停牌的日期 daily=KLineDataProcess('daily') dailyData=daily.getDataByDate(code,startDate,endDate) if dailyData.empty==True: logger.warning(f'There no daily data of {code} from {startDate} to {endDate}!') return dailyData=dailyData[dailyData['status']!='停牌'] tradedays=list(dailyData['date']) for date in tradedays: path=os.path.join(self.filePath,code.replace('.','_')) file=os.path.join(path,date+'.h5') HDF5Utility.pathCreate(path) exists=os.path.isfile(file) if exists==False: logger.info(f'get TickTransaction data of {code} in {date} from source!') mydata=self.__getResampleTickTransactionDataFromSqlServer(code,date) if mydata.empty==False: with pd.HDFStore(file,'a',complib='blosc:zstd',append=True,complevel=9) as store: store.append('data',mydata,append=False,format="table",data_columns=mydata.columns) else: logger.warning(f'there is no data of {code} in {date} from source!') else: logger.info(f'TickTransaction data of {code} in {date} is exists!') pass pass
def getTickDataAndFactorsByDateFromLocalFile(self, code, date, factors=TICKFACTORSUSED): myfactor = factorBase() mydata = pd.DataFrame() for item in factors: factor = item['factor'] data = myfactor.getDataFromLocalFile(code, date, factor) if mydata.shape[0] == 0: #如果还没有取出来数据 mydata = data.copy() pass elif data.shape[0] != 0: mydata = pd.merge(mydata, data, how='left', left_index=True, right_index=True) pass tick = TickDataProcess() tickData = tick.getDataByDateFromLocalFile(code, date) mydata = pd.merge(mydata, tickData, how='left', left_index=True, right_index=True) if mydata.shape[0] == 0: return mydata #dailyFactor=['closeStd','index','marketValue','industry'] dailyRepo = dailyFactorsProcess() dailyData = dailyRepo.getSingleStockDailyFactors(code, date, date) for col in dailyData.columns: if col not in ['date', 'code', 'return']: mydata[col] = dailyData[col].iloc[0] dailyKLineRepo = KLineDataProcess('daily') dailyKLineData = dailyKLineRepo.getDataByDate(code, date, date) mydata['preClose'] = dailyKLineData['preClose'].iloc[0] mydata['increaseToday'] = mydata['midPrice'] / mydata['preClose'] - 1 ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)] if ceiling.shape[0] > 0: ceilingTime = ceiling['tick'].iloc[0] mydata = mydata[mydata['tick'] < ceilingTime] pass return mydata
def recordResampleTickShotDataToInfluxdbFromSqlServer( self, code, startDate, endDate): #获取股票日线数据,筛选出非停牌的日期 daily = KLineDataProcess('daily') dailyData = daily.getDataByDate(code, startDate, endDate) if dailyData.empty == True: #logger.warning(f'There no daily data of {code} from {startDate} to {endDate}!') return dailyData = dailyData[dailyData['status'] != '停牌'] tradedays = list(dailyData['date']) for date in tradedays: #logger.info(f'get tickshot data of {code} in {date} from source!') mydata = self.__getResampleTickShotDataFromSqlServer(code, date) if mydata.shape[0] > 0: mydata = mydata.fillna(0) InfluxdbUtility.saveDataFrameDataToInfluxdb( mydata, INFLUXDBTICKTICKDATABASE, code, {}) pass
def getStockData(self): myindex=IndexComponentDataProcess(True) index500=myindex.getCSI500DataByDate(20190404,20190404) index300=myindex.getHS300DataByDate(20190404,20190404) index50=myindex.getSSE50DataByDate(20190404,20190404) stockCodes=list(pd.concat([index500,index300,index50],ignore_index=True)['code'].drop_duplicates()) myMinute=KLineDataProcess('minute',True) myDaily=KLineDataProcess('daily',True) num=0 for code in stockCodes: print(datetime.datetime.now()) num=num+1 myMinute.getDataByDate(code,20070101,20190327) myDaily.getDataByDate(code,20070101,20190327) print("{0}({1} of 800) complete!".format(code,num)) ########################################################################
def reverse_singleCode(self, code, startDate, endDate): days = list(TradedayDataProcess().getTradedays(startDate, endDate)) #factors=['closeStd','index','marketValue','industry'] dailyRepo = dailyFactorsProcess() dailyFactor = dailyRepo.getSingleStockDailyFactors( code, startDate, endDate) dailyKLine = KLineDataProcess('daily') dailyData = dailyKLine.getDataByDate(code, startDate, endDate) if dailyData.empty == True: logger.error( f'there is no data of {code} from {startDate} to {endDate}') return pd.DataFrame() tick = TickDataProcess() tickfactor = tickFactorsProcess() mydata = [] position = 0 profit = 0 myStatusList = [] myTradeList = [] myStatus = {} myTrade = {} positionYesterday = 0 select1 = [] select2 = [] selectall = [] for today in days: #logger.info(f'{code} in {today} start!') todayInfo = dailyFactor[dailyFactor['date'] == today] if todayInfo.empty == True: logger.error( f'there is no factor data of {code} in date {today}') continue pass todayKLine = dailyData[dailyData['date'] == today] if todayKLine.empty == True: logger.error( f'there is no KLine data of {code} in date {today}') continue pass myStatus['date'] = today myStatus['closeStd20'] = todayInfo['closeStd20'].iloc[0] myStatus['weight50'] = todayInfo['weight50'].iloc[0] myStatus['weight300'] = todayInfo['weight300'].iloc[0] myStatus['weight500'] = todayInfo['weight500'].iloc[0] myStatus['ts_closeStd20'] = todayInfo['closeStd20'].iloc[0] myStatus['adjFactor'] = todayKLine['adjFactor'].iloc[0] myStatus['preClose'] = todayKLine['preClose'].iloc[0] positionNow = positionYesterday if (todayInfo.empty == False) & (todayKLine['status'].iloc[0] != '停牌'): #tickData=tick.getDataByDateFromLocalFile(code,today) tickData = tick.getTickShotDataFromInfluxdbServer(code, today) #factors=tickfactor.getDataByDateFromLocalFile(code,today,'buySellForce') factors = tickfactor.getFactorsUsedByDateFromLocalFile( code, today) all = pd.merge(tickData, factors, how='left', left_index=True, right_index=True) all['closeStd20'] = todayInfo['closeStd20'].iloc[0] all['ts_closeStd20'] = todayInfo['ts_closeStd20'].iloc[0] all['adjFactor'] = todayKLine['adjFactor'].iloc[0] all['preClose'] = todayKLine['preClose'].iloc[0] all['increaseToday'] = all['midPrice'] / all['preClose'] - 1 all['midIncreasePrevious5m'] = all['midPrice'] / all[ 'midPrice'].shift(60) - 1 #select0=all[all['increaseToday']>2*all['closeStd20']] selectall.append(all) select0 = all[all['midIncreasePrevious5m'] > 0.5 * all['closeStd20']] select1.append(select0) select0 = all[all['midIncreasePrevious5m'] < -0.5 * all['closeStd20']] select2.append(select0) pass else: logger.warning(f'There is no data of {code} in {today}') mycolumns = [ 'midIncreaseNext1m', 'midIncreaseNext5m', 'ts_buyForceIncrease', 'ts_sellForceIncrease', 'ts_buySellVolumeRatio5', 'midIncreasePrevious3m', 'differenceHighLow', 'ts_differenceHighLow', 'differenceMidVwap', 'ts_differenceMidVwap', 'midStd60', 'ts_midStd60' ] select1 = pd.concat(select1) select2 = pd.concat(select2) selectall = pd.concat(selectall) selectall = selectall[mycolumns] s = select1[(select1['ts_sellForceIncrease'] > 0.6) & (select1['ts_buySellVolumeRatio5'] > 0.6) & (select1['increaseToday'] > select1['closeStd20'])] print(s['midIncreaseNext5m'].mean()) ss = select2[(select2['ts_buySellVolumeRatio5'] > 0.6) & (select2['increaseToday'] < -select2['closeStd20'])] print(ss['midIncreaseNext5m'].mean()) return mydata ########################################################################
class stockReverseMovement(object): """股票异动,专注股票大涨之后的回调""" #---------------------------------------------------------------------- def __init__(self): self.__myMinute = KLineDataProcess('minute', True) self.__myDaily = KLineDataProcess('daily', True) self.__localFileStr = LocalFileAddress + "\\intermediateResult\\ceilingFeature.h5" self.__allMinute = pd.DataFrame() pass #---------------------------------------------------------------------- def __getStockList(self): myindex = IndexComponentDataProcess() index500 = myindex.getCSI500DataByDate(20190404, 20190404) index300 = myindex.getHS300DataByDate(20190404, 20190404) index50 = myindex.getSSE50DataByDate(20190404, 20190404) stockCodes = list( pd.concat([index500, index300, index50], ignore_index=True)['code'].drop_duplicates()) return stockCodes #---------------------------------------------------------------------- def __dataPrepared(self): mylist = self.__getStockList() self.__minuteData = {} self.__dailyData = {} num = 0 for code in mylist: num = num + 1 print("{0}({1} of 50) start!".format(code, num)) print(datetime.datetime.now()) self.__dataSelectOneByOne(code) d = self.__dailyData[code] m = self.__minuteData[code] d['ceiling'] = 0 d['ceilingYesterday'] = 0 d['ceilingYesterday2'] = 0 d['ceilingIn5Days'] = 0 d.loc[(d['close'] == round(d['preClose'] * 1.1, 2)), 'ceiling'] = 1 d.loc[(d['ceiling'].shift(1) == 1), 'ceilingYesterday'] = 1 d.loc[((d['ceiling'].shift(1) == 1) & (d['ceiling'].shift(2) == 1)), 'ceilingYesterday2'] = 1 d['ceilingIn5Days'] = d['ceilingYesterday'].rolling(5).sum() #print(d[d['ceilingYesterday']>0][['date','close','preClose','ceilingIn5Days']]) m.loc[(m['date'] == m['date'].shift(5)), 'increase5m'] = (m['open'] / m['open'].shift(5) - 1) m.loc[(m['date'] == m['date'].shift(1)), 'increase1m'] = (m['open'] / m['open'].shift(1) - 1) d = d.set_index('date') dailyInfo = d.loc[m['date'], [ 'preClose', 'ceilingYesterday', 'ceilingYesterday2', 'ceilingIn5Days' ]] dailyInfo.index = m.index m[[ 'yesterdayClose', 'ceilingYesterday', 'ceilingYesterday2', 'ceilingIn5Days' ]] = dailyInfo m['increaseInDay'] = (m['open'] / m['yesterdayClose'] - 1) m['ceiling'] = 0 m['return5m'] = 0 m['return10m'] = 0 m.loc[(m['low'] == round(m['yesterdayClose'] * 1.1, 2)), 'ceiling'] = 1 m.loc[(m['open'].shift(-5) / m['open'] - 1) > 0.01, 'return5m'] = 1 m.loc[(m['open'].shift(-5) / m['open'] - 1) < -0.01, 'return5m'] = -1 m.loc[(m['open'].shift(-10) / m['open'] - 1) > 0.01, 'return10m'] = 1 m.loc[(m['open'].shift(-10) / m['open'] - 1) < -0.01, 'return10m'] = -1 m['ceilingInNext5m'] = m['ceiling'].shift(-5).rolling(5).max() m['maxLossInNext5m'] = round( (m['low'].shift(-5).rolling(5).min() - m['open']) / m['open'] - 1, 2) m['ceilingInNext10m'] = m['ceiling'].shift(-10).rolling(10).max() m['maxLossInNext10m'] = round( (m['low'].shift(-10).rolling(10).min() - m['open']) / m['open'] - 1, 2) m[m['minute'] > '1450']['ceilingInNext5m', 'maxLossInNext5m', 'ceilingInNext10m', 'maxLossInNext10m'] = None mselect = m[(m['increaseInDay'] > 0.07) & (m['increaseInDay'] < 0.08)] mselect = mselect.dropna(axis=0, how='any') self.__allMinute = self.__allMinute.append(mselect) pass pass #---------------------------------------------------------------------- def __dataSelectOneByOne(self, code): m = self.__myMinute.getDataByDate(code, self.startDate, self.endDate) d = self.__myDaily.getDataByDate(code, self.startDate, self.endDate) days = list(d[d['high'] > d['preClose'] * 1.05]['date']) m = m.loc[m['date'].isin(days), :] self.__dailyData[code] = d self.__minuteData[code] = m pass #---------------------------------------------------------------------- def reverse(self, startDate, endDate): self.startDate = startDate self.endDate = endDate self.tradeDays = TradedayDataProcess.getTradedays(startDate, endDate) self.__dataPrepared() store = pd.HDFStore(self.__localFileStr, 'a') store.append('ceiling', self.__allMinute, append=False, format="table", data_columns=[ 'code', 'date', 'minute', 'open', 'high', 'low', 'close', 'volume', 'amount', 'increase5m', 'increase1m', 'yesterdayClose', 'ceilingYesterday', 'ceilingYesterday2', 'ceilingIn5Days', 'increaseInDay', 'ceiling', 'ceilingInNext5m', 'ceilingInNext10m', 'maxLossInNext5m', 'maxLossInNext10m' ]) store.close()
def __init__(self): self.__myMinute = KLineDataProcess('minute', True) self.__myDaily = KLineDataProcess('daily', True) self.__localFileStr = LocalFileAddress + "\\intermediateResult\\ceilingFeature.h5" self.__allMinute = pd.DataFrame() pass
def reverse_singleCode(self, code, startDate, endDate, parameters=[300, 100000000, 1.5]): myindex = parameters[0] totalCash = parameters[1] std1 = parameters[2] days = list(TradedayDataProcess().getTradedays(startDate, endDate)) #factors=['closeStd','index','marketValue','industry'] dailyRepo = dailyFactorsProcess() dailyFactor = dailyRepo.getSingleStockDailyFactors( code, startDate, endDate) dailyKLine = KLineDataProcess('daily') dailyData = dailyKLine.getDataByDate(code, startDate, endDate) if dailyData.empty == True: logger.error( f'there is no data of {code} from {startDate} to {endDate}') return pd.DataFrame() tick = TickDataProcess() mydata = [] position = 0 profit = 0 myStatusList = [] myTradeList = [] myStatus = {} myTrade = {} positionYesterday = 0 for today in days: #logger.info(f'{code} in {today} start!') todayInfo = dailyFactor[dailyFactor['date'] == today] if todayInfo.empty == True: logger.error( f'there is no factor data of {code} in date {today}') continue pass todayKLine = dailyData[dailyData['date'] == today] if todayKLine.empty == True: logger.error( f'there is no KLine data of {code} in date {today}') continue pass myStatus['date'] = today myStatus['closeStd20'] = todayInfo['closeStd20'].iloc[0] myStatus['weight50'] = todayInfo['weight50'].iloc[0] myStatus['weight300'] = todayInfo['weight300'].iloc[0] myStatus['weight500'] = todayInfo['weight500'].iloc[0] myStatus['ts_closeStd20'] = todayInfo['closeStd20'].iloc[0] myStatus['adjFactor'] = todayKLine['adjFactor'].iloc[0] myStatus['preClose'] = todayKLine['preClose'].iloc[0] positionNow = positionYesterday if (todayInfo.empty == False) & (todayKLine['status'].iloc[0] != '停牌'): if myindex == 300: maxPosition = myStatus[ 'weight300'] * totalCash * 0.01 / myStatus['preClose'] elif myindex == 500: maxPosition = myStatus[ 'weight300'] * totalCash * 0.01 / myStatus['preClose'] elif myindex == 50: maxPosition = myStatus[ 'weight50'] * totalCash * 0.01 / myStatus['preClose'] else: maxPosition = myStatus['totalCash'] * 0.001 / myStatus[ 'preClose'] maxPosition = round(maxPosition, -2) if maxPosition == 0: continue tickData = tick.getDataByDateFromLocalFile(code, today) #['code' ,'date','tick' ,'lastPrice','S1','S2','S3','S4','S5','S6','S7','S8','S9','S10','B1','B2','B3','B4','B5','B6','B7','B8','B9','B10','SV1','SV2','SV3','SV4','SV5','SV6','SV7','SV8','SV9','SV10','BV1','BV2','BV3','BV4','BV5','BV6','BV7','BV8','BV9','BV10','volume' ,'amount','volumeIncrease','amountIncrease'] tickList = tickData.as_matrix() for i in range(0, len(tickList) - 60): now = tickList[i] midPrice = (now[4] + now[14]) / 2 lastPrice = now[3] tickShot = now[4:43] upCeiling = False downCeiling = False if now[24] == 0: upCeiling = True pass if now[34] == 0: downCeiling = True pass mytime = datetime.datetime.strptime( now[1] + now[2], '%Y%m%d%H%M%S%f') increaseToday = tickList[i][3] / myStatus['preClose'] - 1 if (i >= 100) & (tickList[i - 100][3] > 0): increase5m = tickList[i][3] / tickList[i - 100][3] - 1 else: increase5m = np.nan if ((positionNow == 0) & (positionYesterday == 0) & (i <= 4500) & (i >= 100) & (increaseToday > std1 * myStatus['closeStd20']) & (maxPosition > 0) & (downCeiling == False)): #开空头 [price, deltaPosition, amount] = TradeUtility.sellByTickShotData( tickShot, maxPosition, 0.001) positionNow = -deltaPosition myTrade['date'] = today myTrade['opentime'] = mytime myTrade['position'] = positionNow myTrade['open'] = price myTrade['openAdj'] = myStatus['adjFactor'] myTrade['increase5m'] = increase5m myTrade['increaseToday'] = increaseToday openIndex = i maxPosition = maxPosition - deltaPosition pass elif ((positionNow == 0) & (positionYesterday == 0) & (i <= 4500) & (i >= 100) & (increaseToday < -std1 * myStatus['closeStd20']) & (maxPosition > 0) & (upCeiling == False)): #开多头 [price, deltaPosition, amount] = TradeUtility.buyByTickShotData( tickShot, maxPosition, 0.001) positionNow = deltaPosition myTrade['date'] = today myTrade['opentime'] = mytime myTrade['position'] = positionNow myTrade['open'] = price myTrade['openAdj'] = myStatus['adjFactor'] myTrade['increase5m'] = increase5m myTrade['increaseToday'] = increaseToday maxPosition = maxPosition - deltaPosition openIndex = i pass elif (positionNow > 0): profit = (lastPrice * myStatus['adjFactor'] - myTrade['open'] * myTrade['openAdj']) / ( myTrade['open'] * myTrade['openAdj']) if (((profit < 0) & (i >= openIndex + 400)) | (i >= 4700) | (i >= openIndex + 1200) | (positionYesterday != 0)): if downCeiling == False: #未跌停 #平多头,记录一笔交易 [price, deltaPosition, amount] = TradeUtility.sellByTickShotData( tickShot, positionNow, 0.1) myTrade['closetime'] = mytime myTrade['close'] = price myTrade['closeAdj'] = myStatus['adjFactor'] positionNow = 0 positionYesterday = 0 openIndex = 0 tradeCopy = copy.deepcopy(myTrade) myTradeList.append(tradeCopy) myTrade = {} pass elif (positionNow < 0): profit = (-lastPrice * myStatus['adjFactor'] + myTrade['open'] * myTrade['openAdj']) / ( myTrade['open'] * myTrade['openAdj']) if (((profit < 0) & (i >= openIndex + 400)) | (i >= 4700) | (i >= openIndex + 1200) | (positionYesterday != 0)): if upCeiling == False: #平空头,记录一笔交易 [price, deltaPosition, amount] = TradeUtility.buyByTickShotData( tickShot, -positionNow, 0.1) myTrade['closetime'] = mytime myTrade['close'] = price myTrade['closeAdj'] = myStatus['adjFactor'] positionNow = 0 positionYesterday = 0 openIndex = 0 tradeCopy = copy.deepcopy(myTrade) myTradeList.append(tradeCopy) myTrade = {} pass if positionNow != 0: logger.info( f'{code} of {today} can not close the position!') positionYesterday = positionNow pass else: logger.warning(f'There is no data of {code} in {today}') m = pd.DataFrame(data=myTradeList, columns=[ 'date', 'opentime', 'position', 'open', 'openAdj', 'increase5m', 'increaseToday', 'closetime', 'close', 'closeAdj' ]) m['profit'] = m['position'] * (m['close'] * m['closeAdj'] - m['open'] * m['openAdj']) / m['openAdj'] m['fee'] = m['position'].abs() * m[['open', 'close' ]].max(axis=1) * 0.0012 m['netProfit'] = m['profit'] - m['fee'] m['code'] = code mycolumns = [ 'date', 'closeStd20', 'ts_closeStd20', 'industry', 'industryName' ] dailyFactor = dailyFactor[mycolumns] mydata = pd.merge(m, dailyFactor, how='left', left_on='date', right_on='date') #mydata=mydata[((mydata['increase5m']>mydata['closeStd20']) & (mydata['position']<0)) |((mydata['increase5m']<-mydata['closeStd20']) & (mydata['position']>0))] ''' print(mydata) print(mydata['profit'].sum()) print(mydata['fee'].sum()) print(mydata['netProfit']) ''' return mydata ########################################################################
def dataPrepared(self, stockCodes, startDate, endDate, recordFilePath): warnings.filterwarnings('ignore') mytradedays = TradedayDataProcess.getTradedays(startDate, endDate) mylist = stockCodes myMinute = KLineDataProcess('minute', False) num = 0 store = pd.HDFStore(self.__factorsAddress, 'a') allDailyData = store.select( self.__key, where=['date>="%s" and date<="%s"' % (startDate, endDate)]) store.close() store = pd.HDFStore(recordFilePath, 'a') oldKeys = store.keys() for code in mylist: num = num + 1 print("{0}({1} of {2}) start!".format(str(code), num, len(mylist))) #print(datetime.datetime.now()) if ('/' + code) in oldKeys: continue pass m = myMinute.getDataByDate(code, startDate, endDate) if len(m) == 0: continue m = m[m['date'].isin(mytradedays)] m['vwap'] = m['amount'] / m['volume'] m.loc[(m['date'] == m['date'].shift(5)), 'increase5m'] = (m['open'] / m['open'].shift(5) - 1) m.loc[(m['date'] == m['date'].shift(1)), 'increase1m'] = (m['open'] / m['open'].shift(1) - 1) d = allDailyData.xs(code, level='code') dailyInfo = d.loc[m['date']] deleteColumns = [ 'code', 'date', 'open', 'high', 'low', 'close', 'volume', 'amount', 'change', 'pctChange', 'vwap' ] mycolumns = [ col for col in dailyInfo.columns if col not in deleteColumns ] dailyInfo = dailyInfo[mycolumns] dailyInfo.rename(columns={'preClose': 'yesterdayClose'}, inplace=True) dailyInfo.index = m.index m = pd.concat([m, dailyInfo], axis=1) m['increaseInDay'] = (m['open'] / m['yesterdayClose'] - 1) m = m[m['status'] != 'N'] m['canBuy'] = 0 m['canSell'] = 0 m['canBuyPrice'] = None m['canSellPrice'] = None m.loc[((m['open'] < round(1.097 * m['yesterdayClose'], 2)) & (m['open'] > 0)), 'canBuy'] = 1 m.loc[((m['open'] > round(0.903 * m['yesterdayClose'], 2)) & (m['open'] > 0)), 'canSell'] = 1 m.loc[m['canBuy'] == 1, 'canBuyPrice'] = m.loc[m['canBuy'] == 1, 'open'] m.loc[m['canBuy'] == 1, 'canBuyPriceAdj'] = m.loc[m['canBuy'] == 1, 'adjFactor'] m['canBuyPrice'] = m['canBuyPrice'].fillna(method='bfill') m['canBuyPriceAdj'] = m['canBuyPriceAdj'].fillna(method='bfill') m.loc[m['canSell'] == 1, 'canSellPrice'] = m.loc[m['canSell'] == 1, 'open'] m.loc[m['canSell'] == 1, 'canSellPriceAdj'] = m.loc[m['canSell'] == 1, 'adjFactor'] m['canSellPrice'] = m['canSellPrice'].fillna(method='bfill') m['canSellPriceAdj'] = m['canSellPriceAdj'].fillna(method='bfill') m['timeStamp'] = m['date'] + m['tick'] #日内分钟信息 #成交量在前20分钟的分位数 m['ts_rank_volume'] = m['volume'].shift(1).rolling( 20, min_periods=15).apply( (lambda x: pd.Series(x).rank().iloc[-1] / len(x)), raw=True) #每分钟收益 m['minuteReturn'] = (m['close'] - m['close'].shift(1)) / m['close'].shift(1) #收益标准差 m['minuteStd20'] = m['minuteReturn'].shift(1).rolling( 20, min_periods=17).std() #收益标准差分位数 m['ts_rank_minuteStd20'] = m['minuteStd20'].rolling( 20, min_periods=15).apply( (lambda x: pd.Series(x).rank().iloc[-1] / len(x)), raw=True) mselect = m.set_index(['timeStamp', 'code']) store.append(code, mselect, append=False, format="table") pass store.close() pass
def updateSingleStockMinuteKLines(self, code, startDate, endDate): stock = KLineDataProcess('minute', True) stock.getDataByDate(code, startDate, endDate) pass
def getStockDailyFeatureData(self, stockCodes, startDate, endDate): myDaily = KLineDataProcess('daily') myDailyDerivative = KLineDataProcess('dailyDerivative') myindex = IndexComponentDataProcess() exists = os.path.isfile(self.localFileStr) if exists == True: store = pd.HDFStore(self.localFileStr, 'a') allData = store.select(self.key) allData.reset_index(inplace=True, drop=False) existsCodes = list(allData['code'].drop_duplicates()) store.close() else: allData = pd.DataFrame() existsCodes = [] num = 0 for code in stockCodes: num = num + 1 print("{0}({1} of {2}) start!".format(code, num, len(stockCodes))) if code in existsCodes: stockNow = allData[allData['code'] == code] latestDate = stockNow['date'].max() if endDate <= latestDate: continue pass startNow = max(startDate, TradedayDataProcess.getNextTradeday(latestDate)) if startNow > endDate: continue pass mydata = myDaily.getDataByDate(code, startNow, endDate) mydata.set_index('date', drop=True, inplace=True) myindustry = IndustryClassification.getIndustryByCode( code, startNow, endDate) mydata['industry'] = myindustry['industry'] mydata['industryName'] = myindustry['name'] myIndexBelongs50 = myindex.getStockBelongs(code, SSE50, startNow, endDate) myIndexBelongs300 = myindex.getStockBelongs( code, HS300, startNow, endDate) myIndexBelongs500 = myindex.getStockBelongs( code, CSI500, startNow, endDate) mydata['is50'] = myIndexBelongs50['exists'] mydata['is300'] = myIndexBelongs300['exists'] mydata['is500'] = myIndexBelongs500['exists'] mydataDerivative = myDailyDerivative.getDataByDate( code, startNow, endDate) mydataDerivative.set_index('date', inplace=True) mydata['freeShares'] = mydataDerivative['freeShares'] mydata['freeMarketValue'] = mydataDerivative['freeMarketValue'] mydata['return'] = (mydata['close'] - mydata['preClose']) / mydata['preClose'] #昨日计算出的标准差给今日用,防止用到未来数据 mydata['closeStd20'] = mydata['return'].shift(1).rolling( 20, min_periods=17).std() mydata['ts_rank_closeStd20'] = mydata['closeStd20'].rolling( 50, min_periods=20).apply( (lambda x: pd.Series(x).rank().iloc[-1] / len(x)), raw=True) mydata.reset_index(inplace=True) allData = allData.append(mydata) pass allData = allData.set_index(['date', 'code']) store = pd.HDFStore(self.localFileStr, 'a') store.append(self.key, allData, append=True, format="table") store.close() unstack = allData.unstack() rankMv = unstack['freeMarketValue'].rank(axis=1) mvMax = rankMv.max(axis=1) rankMv = rankMv.iloc[:, :].div(mvMax, axis=0) #做rankMarketValue的操作 allData['rankMarketValue'] = rankMv.stack() store = pd.HDFStore(self.localFileStrWithRank, 'a') store.append(self.keyWithRank, allData, append=False, format="table") store.close()
def updateStockDailyFactors(self, codeList, factorList): for code in codeList: logger.info(f'{code} factor update start!') #获取数据 IPOInfo = StockIPOInfoProcess.getStockIPOInfoByCode(code) listDate = IPOInfo['listDate'].iloc[0] delistDate = IPOInfo['delistDate'].iloc[0] #mydata=self.__updateStockDailyBasicData(code) for factor in factorList: fileName = code.replace('.', '_') + ".h5" factorFilePath = os.path.join(self.localFileFolder, factor, fileName) path = os.path.join(self.localFileFolder, factor) HDF5Utility.pathCreate(path) exists = os.path.isfile(factorFilePath) if exists == False: startDate = listDate endDate = self.endDate lastDate = EMPTY_STRING pass else: with pd.HDFStore(path=factorFilePath, mode='r', complib='blosc:zstd', append=True, complevel=9) as store: existsDate = store['date'] lastDate = existsDate.max() startDate = lastDate endDate = self.endDate if startDate >= endDate: continue pass startDate = TradedayDataProcess.getPreviousTradeday( startDate, 100) if startDate < listDate: startDate = listDate pass if endDate > delistDate: endDate = delistDate pass #marketData=self.__updateStockDailyBasicData(code,startDate,endDate) dailyData = KLineDataProcess('daily') marketData = dailyData.getDataByDate(code, startDate, endDate) endDate = marketData['date'].max() #因子日期统一计算到日线数据最后一天 mydata = marketData #tradedays=TradedayDataProcess.getTradedays(startDate,endDate) #mydata=pd.DataFrame(tradedays,columns=['date']) #mydata=pd.merge(mydata,marketData,how='left',left_on='date',right_on='date') if factor == 'closeStd': myReturnFun = ComputeUtility.computeReturn myStdFun = ComputeUtility.computeStandardDeviation myTSRank = ComputeUtility.computeTimeSeriesRank mydata = self.__computeStockDailyFactor( mydata, myReturnFun, ['close', 'preClose', 'return']) mydata.loc[mydata['status'] == '停牌', 'return'] = np.nan mydata.loc[mydata['date'] == listDate, 'return'] = np.nan mydata = self.__computeStockDailyFactor( mydata, myStdFun, ['return', 'closeStd20', 20, 0.8]) mydata = self.__computeStockDailyFactor( mydata, myTSRank, ['closeStd20', 'ts_closeStd20', 50, 0.4]) mycolumns = [ 'date', 'return', 'closeStd20', 'ts_closeStd20' ] mydata = mydata[mycolumns] elif factor == 'industry': mydata.set_index('date', drop=True, inplace=True) myindustry = IndustryClassification.getIndustryByCode( code, startDate, endDate) mydata['industry'] = myindustry['industry'] mydata['industryName'] = myindustry['name'] mydata.reset_index(drop=False, inplace=True) mycolumns = ['date', 'industry', 'industryName'] mydata = mydata[mycolumns] pass elif factor == 'index': myindex = IndexComponentDataProcess() myIndexBelongs50 = myindex.getStockBelongs( code, SSE50, startDate, endDate) myIndexBelongs300 = myindex.getStockBelongs( code, HS300, startDate, endDate) myIndexBelongs500 = myindex.getStockBelongs( code, CSI500, startDate, endDate) myStockWeightOf50 = IndexComponentDataProcess.getStockPropertyInIndex( code, SSE50, startDate, endDate) myStockWeightOf300 = IndexComponentDataProcess.getStockPropertyInIndex( code, HS300, startDate, endDate) myStockWeightOf500 = IndexComponentDataProcess.getStockPropertyInIndex( code, CSI500, startDate, endDate) mydata.set_index('date', drop=True, inplace=True) mydata['is50'] = myIndexBelongs50['exists'] mydata['is300'] = myIndexBelongs300['exists'] mydata['is500'] = myIndexBelongs500['exists'] mydata['weight50'] = myStockWeightOf50['weight'] mydata['weight300'] = myStockWeightOf300['weight'] mydata['weight500'] = myStockWeightOf500['weight'] mydata.reset_index(drop=False, inplace=True) mycolumns = [ 'date', 'is50', 'is300', 'is500', 'weight50', 'weight300', 'weight500' ] mydata = mydata[mycolumns] #mydata[['is50','is300','is500']]=mydata[['is50','is300','is500']].astype('int64') pass elif factor == 'marketValue': mydata.set_index('date', drop=True, inplace=True) myDailyDerivative = KLineDataProcess('dailyDerivative') mydataDerivative = myDailyDerivative.getDataByDate( code, startDate, endDate) mydataDerivative.set_index('date', inplace=True) mydata['freeShares'] = mydataDerivative['freeShares'] mydata['freeMarketValue'] = mydataDerivative[ 'freeMarketValue'] mydata.reset_index(drop=False, inplace=True) mycolumns = ['date', 'freeShares', 'freeMarketValue'] mydata = mydata[mycolumns] try: with pd.HDFStore(path=factorFilePath, mode='a', complib='blosc:zstd', append=True, complevel=9) as store: if lastDate == EMPTY_STRING: mydate = mydata['date'] else: mydata = mydata[(mydata['date'] > lastDate)] mydate = mydata['date'] if mydata.empty == False: store.append('date', mydate, append=True, format="table", data_columns=['date'], complevel=9) store.append('factors', mydata, append=True, format="table", data_columns=mydata.columns, complevel=9) except Exception as excp: logger.error(f'{code} in {date} error! {excp}') logger.error(traceback.format_exc()) pass pass
def updateMinuteIndexKLines(self, code, startDate, endDate): index = KLineDataProcess('minuteIndex', True) index.getDataByDate(code, startDate, endDate) pass
def updateMultipleStocksDailyDerivatives(self, codeList, startDate, endDate): dailyStock = KLineDataProcess('dailyDerivative', True) dailyStock.parallelizationUpdateDataByDate(codeList, startDate, endDate) pass
def updateSingleStockDailyDerivatives(self, code, startDate, endDate): stock = KLineDataProcess('dailyDerivative', True) stock.getDataByDate(code, startDate, endDate) pass
def updateMultipleStocksMinuteKLines(self, codeList, startDate, endDate): stock = KLineDataProcess('minute', True) stock.parallelizationUpdateDataByDate(codeList, startDate, endDate) pass
def saveAllFactorsByCodeAndDate(self, code, date): mypath = os.path.join(self.path, str(code).replace('.', '_')) HDF5Utility.pathCreate(mypath) fileName = os.path.join(mypath, str(date) + '.h5') exists = HDF5Utility.fileCheck(fileName) if exists == True: #如果文件已将存在,直接返回 return myfactor = factorBase() mydata = pd.DataFrame() factors = self.factorsUsed #获取tick因子数据 mydata = self.getFactorsUsedByDateFromLocalFile(code, date, factors) #获取tick行情数据 tick = TickDataProcess() tickData = tick.getDataByDateFromLocalFile(code, date) mydata = pd.merge(mydata, tickData, how='left', left_index=True, right_index=True) if mydata.shape[0] == 0: return #获取日线数据 dailyRepo = dailyFactorsProcess() dailyData = dailyRepo.getSingleStockDailyFactors(code, date, date) dailyKLineRepo = KLineDataProcess('daily') dailyKLineData = dailyKLineRepo.getDataByDate(code, date, date) mydata['preClose'] = dailyKLineData['preClose'].iloc[0] mydata['increaseToday'] = mydata['midPrice'] / mydata['preClose'] - 1 mydata = mydata[mydata['tick'] < '145700000'] #删去涨跌停之后的数据 ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)] if ceiling.shape[0] > 0: ceilingTime = ceiling['tick'].iloc[0] mydata = mydata[mydata['tick'] < ceilingTime] pass excludedColumns = [ 'preClose', 'buyVolume2', 'buyVolume5', 'buyVolume10', 'sellVolume2', 'sellVolume5', 'sellVolume10' ] mycolumns = list( set(mydata.columns).difference( set(list(tickData.columns) + excludedColumns))) mycolumns.sort() mydata = mydata[mycolumns] if mydata.shape[0] == 0: return try: logger.info(f'Recording factors of {code} in {date}!') with pd.HDFStore(fileName, 'a', complib='blosc:zstd', append=True, complevel=9) as store: store.append('data', mydata, append=True, format="table", data_columns=mydata.columns) except Exception as excp: logger.error(f'{fileName} error! {excp}')
def __updateStockDailyBasicData(self, code, startDate, endDate): marketData = KLineDataProcess('daily', True) mydata = marketData.getDataByDate(code, startDate, endDate) return mydata pass
def updateSingleStockDailyKLines(self, code, startDate, endDate): dailyStock = KLineDataProcess('daily', True) dailyStock.getDataByDate(code, startDate, endDate) pass