Beispiel #1
0
 def getNeedToUpdateDaysOfFactor(self, code, factor):
     self.setIPODate(code)
     listDate = self.listDate
     delistDate = self.delistDate
     today = datetime.datetime.now().strftime("%Y%m%d")
     yesterday = TradedayDataProcess.getPreviousTradeday(today)
     endDate = min(delistDate, yesterday)
     lastDate = self.getLastDate(code, factor)
     if lastDate != EMPTY_STRING:
         startDate = max(listDate,
                         TradedayDataProcess.getNextTradeday(lastDate))
     else:
         startDate = listDate
     return [startDate, endDate]
     pass
Beispiel #2
0
    def __computerFactor(self, code, startDate, endDate):
        myindex = IndexComponentDataProcess()
        myIndexBelongs50 = myindex.getStockBelongs(code, SSE50, startDate,
                                                   endDate)
        myIndexBelongs300 = myindex.getStockBelongs(code, HS300, startDate,
                                                    endDate)
        myIndexBelongs500 = myindex.getStockBelongs(code, CSI500, startDate,
                                                    endDate)
        myStockWeightOf50 = IndexComponentDataProcess.getStockPropertyInIndex(
            code, SSE50, startDate, endDate)
        myStockWeightOf300 = IndexComponentDataProcess.getStockPropertyInIndex(
            code, HS300, startDate, endDate)
        myStockWeightOf500 = IndexComponentDataProcess.getStockPropertyInIndex(
            code, CSI500, startDate, endDate)
        tradedays = TradedayDataProcess.getTradedays(startDate, endDate)
        mydata = pd.DataFrame(data=tradedays)
        mydata.set_index('date', drop=True, inplace=True)
        mydata['is50'] = myIndexBelongs50['exists']
        mydata['is300'] = myIndexBelongs300['exists']
        mydata['is500'] = myIndexBelongs500['exists']
        mydata['weight50'] = myStockWeightOf50['weight']
        mydata['weight300'] = myStockWeightOf300['weight']
        mydata['weight500'] = myStockWeightOf500['weight']
        mydata.reset_index(drop=False, inplace=True)
        mycolumns = [
            'date', 'is50', 'is300', 'is500', 'weight50', 'weight300',
            'weight500'
        ]
        mydata = mydata[mycolumns]
        return mydata
        pass


########################################################################
Beispiel #3
0
 def analysisPerCode(self, code, startDate, endDate, feature, target):
     tradedays = list(TradedayDataProcess.getTradedays(startDate, endDate))
     mydata = self.useJobLibToGetFactorDataCodeByCode(tradedays, 100, code)
     mycorr = mydata[[feature, target]].corr()
     print(mydata.shape)
     return mycorr
     pass
Beispiel #4
0
 def prepareData(self,codeList,startDate,endDate):
     tradedays=TradedayDataProcess.getTradedays(startDate,endDate)
     myfactor=tickFactorsProcess()
     for day in tradedays:
         data=myfactor.parallelizationGetDataByDate(codeList,day)
         data=data[(data['tick']>='093000000') & (data['tick']<'145700000')]
         tickColumns=[ 'code', 'date', 'tick', 'lastPrice', 'S1', 'S2','S3', 'S4', 'S5', 'S6', 'S7','S8', 'S9', 'S10', 'B1', 'B2', 'B3', 'B4','B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'SV1', 'SV2', 'SV3', 'SV4', 'SV5','SV6', 'SV7', 'SV8', 'SV9', 'SV10', 'BV1', 'BV2', 'BV3', 'BV4', 'BV5','BV6', 'BV7', 'BV8', 'BV9', 'BV10', 'volume', 'amount','volumeIncrease', 'amountIncrease', 'midPrice']
         #dailyColumns=['increaseToday','closeStd20','ts_closeStd20','preClose','is300','is500']
         mycolumns=list(set(data.columns).difference(set(tickColumns)))
         mycolumns=mycolumns+['code', 'date', 'tick']
         data=data[mycolumns]
         #print(data.shape)
         errorData=data[data[mycolumns].isna().sum(axis=1)>0]
         if errorData.shape[0]>0:
             logger.warning(f'factorData of date {day} has Nan!!!')
         #对部分因子进行去极值操作
         #data=self.dataSelect(data,0.2)
         #逐日存储数据
         fileName=os.path.join(self.path,str(day)+'.h5')
         exists=os.path.exists(fileName)
         if exists==True:
             os.remove(fileName)
         try:
             with pd.HDFStore(fileName,'a',complib='blosc:zstd',append=True,complevel=9) as store:
                 store.append('data',data,append=True,format="table",data_columns=data.columns)
         except Exception as excp:
             logger.error(f'{fileName} error! {excp}')
         pass
     pass
Beispiel #5
0
 def getLastTradedayTickData(self, code, date):
     previousday = TradedayDataProcess.getPreviousTradeday(date, 250)
     days = list(TradedayDataProcess.getTradedays(previousday, date))
     days.reverse()
     data = pd.DataFrame()
     for day in days:
         if day < date:
             #data=TickDataProcess().getDataByDateFromLocalFile(code,day)
             data = TickDataProcess().getTickShotDataFromInfluxdbServer(
                 code, day)
             if data.shape[0] > 0:
                 return data
                 pass
             pass
         pass
     return data
     pass
Beispiel #6
0
    def getDataFromLocal(self, path, startDate, endDate):
        tradedays = TradedayDataProcess.getTradedays(startDate, endDate)
        tradedays = list(tradedays)
        with parallel_backend("multiprocessing", n_jobs=MYJOBS):
            mydata = Parallel()(
                delayed(self.getDataFromLocalDaily)(path, tradedays[i])
                for i in range(len(tradedays)))
        all = pd.concat(mydata)
        return all
        pass


########################################################################
Beispiel #7
0
 def __computerFactor(self, code, startDate, endDate):
     tradedays = TradedayDataProcess.getTradedays(startDate, endDate)
     mydata = pd.DataFrame(data=tradedays)
     mydata.set_index('date', drop=True, inplace=True)
     myindustry = IndustryClassification.getIndustryByCode(
         code, startDate, endDate)
     if myindustry.shape[0] == 0:
         return pd.DataFrame()
     mydata['industry'] = myindustry['industry']
     mydata['industryName'] = myindustry['name']
     mydata.reset_index(drop=False, inplace=True)
     mycolumns = ['date', 'industry', 'industryName']
     mydata = mydata[mycolumns]
     return mydata
     pass
Beispiel #8
0
 def __computerFactor(self, code, startDate, endDate):
     tradedays = TradedayDataProcess.getTradedays(startDate, endDate)
     mydata = pd.DataFrame(data=tradedays)
     mydata.set_index('date', drop=True, inplace=True)
     myDailyDerivative = KLineDataProcess('dailyDerivative')
     mydataDerivative = myDailyDerivative.getDataByDate(
         code, startDate, endDate)
     if mydataDerivative.shape[0] == 0:
         return pd.DataFrame()
         pass
     mydataDerivative.set_index('date', inplace=True)
     mydata['freeShares'] = mydataDerivative['freeShares']
     mydata['freeMarketValue'] = mydataDerivative['freeMarketValue']
     mydata.reset_index(drop=False, inplace=True)
     mycolumns = ['date', 'freeShares', 'freeMarketValue']
     mydata = mydata[mycolumns]
     return mydata
Beispiel #9
0
 def getDataFromLocal(self,startDate,endDate):
     tradedays=TradedayDataProcess.getTradedays(startDate,endDate)
     tradedays=list(tradedays)
     with parallel_backend("multiprocessing", n_jobs=JobLibUtility.myjobs):
         mydata=Parallel()(delayed(self.getDataFromLocalDaily)(tradedays[i]) for i in range(len(tradedays)))
     all=pd.concat(mydata)
     '''
     for day in tradedays:
         fileName=os.path.join(self.path,str(day)+'.h5')
         try:
             with pd.HDFStore(fileName,'r',complib='blosc:zstd',append=True,complevel=9) as store:
                 data=store['data']
                 all.append(data)
         except Exception as excp:
             logger.error(f'{fileName} error! {excp}')
             pass
     all=pd.concat(all)
     '''
     return all
     pass
Beispiel #10
0
 def __computerFactor(self, code, startDate, endDate):
     #需要前推100天来获取计算得数据
     startDate = TradedayDataProcess.getPreviousTradeday(startDate, 100)
     [listDate, delistDate] = super().setIPODate(code)
     startDate = max(startDate, listDate)
     dailyData = KLineDataProcess('daily')
     mydata = dailyData.getDataByDate(code, startDate, endDate)
     if mydata.shape[0] == 0:
         return pd.DataFrame()
     mydata['return'] = (mydata['close'] / mydata['preClose'] - 1)
     mydata.loc[mydata['status'] == '停牌', 'return'] = np.nan
     mydata.loc[mydata['date'] == listDate, 'return'] = np.nan
     mydata['yesterdayReturn'] = mydata['return'].shift(1)
     mydata['closeStd20'] = mydata['yesterdayReturn'].rolling(
         20, min_periods=16).std()
     mydata['ts_closeStd20'] = mydata['closeStd20'].rolling(
         50, min_periods=20).apply(
             (lambda x: pd.Series(x).rank().iloc[-1] / len(x)), raw=True)
     mycolumns = ['date', 'yesterdayReturn', 'closeStd20', 'ts_closeStd20']
     mydata = mydata[mycolumns]
     return mydata
Beispiel #11
0
 def singleCode(self, code, startDate, endDate, parameters):
     days = list(TradedayDataProcess().getTradedays(startDate, endDate))
     tick = tickFactorsProcess()
     select = [
         'code', 'date', 'tick', 'B1', 'S1', 'midIncreasePrevious3m',
         'closeStd20', 'ts_closeStd20', 'ts_buySellForceChange',
         'ts_buySellVolumeRatio5', 'ts_buySellVolumeRatio2', 'preClose',
         'increaseToday', 'weight300', 'weight500'
     ]
     trade = []
     for day in days:
         data = tick.getTickDataAndFactorsByDateFromLocalFile(code, day)
         if data.shape[0] > 0:
             trade0 = self.__strategy(data, select)
             trade.append(trade0)
         pass
     if trade == []:
         trade = pd.DataFrame()
     else:
         trade = pd.concat(trade)
     return trade
     pass
Beispiel #12
0
 def myRNN(self,startDate,endDate,testStart,document):
     tradedays=TradedayDataProcess.getTradedays(startDate,endDate)
     filePath=os.path.join(LocalFileAddress,document)
     trainSet=[]
     testSet=[]
     for day in tradedays:
         fileName=os.path.join(filePath,str(day)+'.h5')
         data=super().getData(fileName)
         if day<str(testStart):
             trainSet.append(data)
         else:
             testSet.append(data)
         pass
     pass
     trainSet=pd.concat(trainSet)
     testSet=pd.concat(testSet)
     m=round(trainSet.corr(),3)
     print(m.loc[(m['midIncreaseNext1m'].abs()>=0.03),'midIncreaseNext1m'].sort_values())
     print(m.loc[(m['midIncreaseMinNext1m'].abs()>=0.03),'midIncreaseMinNext1m'].sort_values())
     print(m.loc[(m['midIncreaseMaxNext1m'].abs()>=0.03),'midIncreaseMaxNext1m'].sort_values())
     print(m.loc[(m['midIncreaseMinNext2m'].abs()>=0.03),'midIncreaseMinNext2m'].sort_values())
     print(m.loc[(m['midIncreaseMaxNext2m'].abs()>=0.03),'midIncreaseMaxNext2m'].sort_values())
     print(m.loc[(m['midIncreaseMinNext5m'].abs()>=0.03),'midIncreaseMinNext5m'].sort_values())
     print(m.loc[(m['midIncreaseMaxNext5m'].abs()>=0.03),'midIncreaseMaxNext5m'].sort_values())
Beispiel #13
0
 def singleCode(self,
                code: str,
                startDate: str,
                endDate: str,
                parameters=[]):
     days = list(TradedayDataProcess().getTradedays(startDate, endDate))
     tickFactors = tickFactorsProcess()
     tick = TickDataProcess()
     daily = dailyFactorsProcess()
     dailyKLine = KLineDataProcess('daily')
     trade = []
     for day in days:
         #tickData=tick.getDataByDateFromLocalFile(code,day)
         tickData = tick.getTickShotDataFromInfluxdbServer(code, day)
         if tickData.shape[0] == 0:
             continue
         data = tickFactors.getTickFactorsOnlyByDateFromLocalFile(code, day)
         data = pd.merge(data,
                         tickData,
                         how='left',
                         left_index=True,
                         right_index=True)
         dailyData = daily.getSingleStockDailyFactors(code, day, day)
         for col in dailyData.columns:
             if col not in ['date', 'code']:
                 data[col] = dailyData[col].iloc[0]
         dailyKLineData = dailyKLine.getDataByDate(code, day, day)
         data['preClose'] = dailyKLineData['preClose'].iloc[0]
         data['increaseToday'] = data['midPrice'] / data['preClose'] - 1
         if np.isnan(data['weight300'].iloc[0]) == True:
             continue
         maxPosition = round(
             data['weight300'].iloc[0] * 100000000 /
             data['preClose'].iloc[0] / 100, -2)
         features = [
             'buyForce', 'sellForce', 'buySellForceChange', 'buySellSpread',
             'differenceHighLow3m', 'midToVwap', 'midToVwap3m',
             'midPrice3mIncrease', 'midPriceBV3m', 'midInPrevious3m',
             'midStd60', 'increaseToday', 'closeStd20',
             'buySellVolumeRatio2', 'buySellWeightedVolumeRatio2',
             'buySellVolumeRatio5', 'buySellWeightedVolumeRatio5',
             'buySellVolumeRatio10', 'buySellWeightedVolumeRatio10'
         ]
         A = data[features]
         A = self.dataSelect(A, 0.2)
         A = A.values
         maxWeight = np.array([
             0.03218688, -0.0121024, -0.00970715, 0.48172206, 0.42610642,
             0.10048948, -0.05574053, 0.08212702, -0.12357012, -0.00123216,
             0.09529259, 0.00509518, 0.14970625, -0.00291313, 0.00402094,
             -0.00452788, 0.00286216, 0.0020172, -0.00235546
         ])
         minWeight = np.array([
             -0.00385887, -0.01163938, 0.0043455, -0.01114819, -0.34286923,
             0.08314041, 0.00154458, 0.12249813, -0.02194375, -0.00038749,
             -0.02217015, 0.00610296, -0.09264385, -0.0020065, 0.00249547,
             -0.00324293, 0.00501176, 0.00389697, -0.00294958
         ])
         maxIntercept = 0.00079871
         minIntercept = -0.00155935
         mymax = A.dot(maxWeight) + maxIntercept
         mymin = A.dot(minWeight) + minIntercept
         data['maxPredict'] = mymax
         data['minPredict'] = mymin
         data['maxPredict'] = data['maxPredict'].ewm(span=2,
                                                     ignore_na=True,
                                                     adjust=True).mean()
         data['minPredict'] = data['minPredict'].ewm(span=2,
                                                     ignore_na=True,
                                                     adjust=True).mean()
         data['midPredict'] = (data['maxPredict'] + data['minPredict']) / 2
         m = data[[
             'midIncreaseMinNext5m', 'midIncreaseMaxNext5m', 'maxPredict',
             'minPredict', 'midPredict'
         ]]
         print(m.corr())
         #long=data[(data['maxPredict']>0.01)]['midIncreaseMaxNext5m'].mean()-data['midIncreaseMaxNext5m'].mean()
         #short=data[(data['minPredict']<-0.01)]['midIncreaseMinNext5m'].mean()-data['midIncreaseMinNext5m'].mean()
         #print(long)
         #print(short)
         mycolumns = list(tickData.columns)
         mycolumns.append('maxPredict')
         mycolumns.append('minPredict')
         data = data[mycolumns]
         parameters = {
             'maxPosition': maxPosition,
             'longOpen': 0.015,
             'shortOpen': -0.015,
             'longClose': 0.01,
             'shortClose': -0.01,
             'transactionRatio': 0.2
         }
         #trade0=self.strategy(data,parameters)
         #trade.append(trade0)
         pass
     if len(trade) == 0:
         trade = pd.DataFrame()
     else:
         trade = pd.concat(trade)
         trade['code'] = code
         trade['fee'] = trade['price'] * 0.0001
         selectBuy = trade['direction'] == 'buy'
         selectSell = trade['direction'] == 'sell'
         trade.loc[selectSell, 'fee'] = (trade['fee'] +
                                         trade['price'] * 0.001)[selectSell]
         trade.loc[selectBuy,
                   'cashChange'] = ((-trade['price'] - trade['fee']) *
                                    trade['volume'])[selectBuy]
         trade.loc[selectSell,
                   'cashChange'] = ((trade['price'] - trade['fee']) *
                                    trade['volume'])[selectSell]
         trade['amount'] = trade['price'] * trade['volume']
     return trade
     pass
Beispiel #14
0
 def singleCode(self,
                code: str,
                startDate: str,
                endDate: str,
                parameters=[]):
     days = list(TradedayDataProcess().getTradedays(startDate, endDate))
     tickFactors = tickFactorsProcess()
     tick = TickDataProcess()
     daily = dailyFactorsProcess()
     dailyKLine = KLineDataProcess('daily')
     file = os.path.join(LocalFileAddress, 'tmp',
                         'dnn001_midIncreaseNext5m.h5')
     trade = []
     for day in days:
         tickData = tick.getDataByDateFromLocalFile(code, day)
         if tickData.shape[0] == 0:
             continue
         data = tickFactors.getTickFactorsOnlyByDateFromLocalFile(code, day)
         data = pd.merge(data,
                         tickData,
                         how='left',
                         left_index=True,
                         right_index=True)
         dailyData = daily.getSingleStockDailyFactors(code, day, day)
         for col in dailyData.columns:
             if col not in ['date', 'code']:
                 data[col] = dailyData[col].iloc[0]
         dailyKLineData = dailyKLine.getDataByDate(code, day, day)
         data['preClose'] = dailyKLineData['preClose'].iloc[0]
         data['increaseToday'] = data['midPrice'] / data['preClose'] - 1
         if np.isnan(data['weight300'].iloc[0]) == True:
             continue
         maxPosition = round(
             data['weight300'].iloc[0] * 100000000 /
             data['preClose'].iloc[0] / 100, -2)
         features = [
             'buyForce', 'sellForce', 'buySellForceChange', 'buySellSpread',
             'differenceHighLow3m', 'midToVwap', 'midToVwap3m',
             'midPrice3mIncrease', 'midPriceBV3m', 'midInPrevious3m',
             'midStd60', 'increaseToday', 'closeStd20',
             'buySellVolumeRatio2', 'buySellWeightedVolumeRatio2',
             'buySellVolumeRatio5', 'buySellWeightedVolumeRatio5',
             'buySellVolumeRatio10', 'buySellWeightedVolumeRatio10'
         ]
         A = data[features]
         A = self.dataSelect(A, 0.2)
         A = A.values
         warnings.filterwarnings('ignore')
         model = keras.models.load_model(file)
         predictArray = model.predict(A, verbose=0)
         mymin = predictArray[:, 0]
         mymax = predictArray[:, 1]
         data['maxPredict'] = mymax
         data['minPredict'] = mymin
         data['maxPredict'] = data['maxPredict'].ewm(span=2,
                                                     ignore_na=True,
                                                     adjust=True).mean()
         data['minPredict'] = data['minPredict'].ewm(span=2,
                                                     ignore_na=True,
                                                     adjust=True).mean()
         data['midPredict'] = (data['maxPredict'] + data['minPredict']) / 2
         m = data[[
             'midIncreaseMinNext5m', 'midIncreaseMaxNext5m', 'maxPredict',
             'minPredict', 'midPredict'
         ]]
         pd.set_option('display.max_rows', None)
         #print(m.corr())
         #long=data[(data['maxPredict']>0.003)]['midIncreaseMaxNext5m'].mean()-data['midIncreaseMaxNext5m'].mean()
         #short=data[(data['minPredict']<-0.003)]['midIncreaseMinNext5m'].mean()-data['midIncreaseMinNext5m'].mean()
         #print(long)
         #print(short)
         mycolumns = list(tickData.columns)
         mycolumns.append('maxPredict')
         mycolumns.append('minPredict')
         mycolumns.append('midPredict')
         mycolumns.append('increaseToday')
         mycolumns.append('midInPrevious3m')
         data = data[mycolumns]
         parameters = {
             'maxPosition': maxPosition,
             'longOpen': 0.015,
             'shortOpen': -0.015,
             'longClose': 0.01,
             'shortClose': -0.01,
             'transactionRatio': 0.1
         }
         trade0 = self.strategy(data, parameters)
         trade.append(trade0)
         pass
     if len(trade) == 0:
         trade = pd.DataFrame()
     else:
         trade = pd.concat(trade)
         if trade.shape[0] == 0:
             return pd.DataFrame()
         trade['code'] = code
         trade['fee'] = trade['price'] * 0.0001
         selectBuy = trade['direction'] == 'buy'
         selectSell = trade['direction'] == 'sell'
         trade.loc[selectSell, 'fee'] = (trade['fee'] +
                                         trade['price'] * 0.001)[selectSell]
         trade.loc[selectBuy,
                   'cashChange'] = ((-trade['price'] - trade['fee']) *
                                    trade['volume'])[selectBuy]
         trade.loc[selectSell,
                   'cashChange'] = ((trade['price'] - trade['fee']) *
                                    trade['volume'])[selectSell]
         trade['amount'] = trade['price'] * trade['volume']
     return trade
     pass