Beispiel #1
0
    def __computerFactor(self, code, startDate, endDate):
        myindex = IndexComponentDataProcess()
        myIndexBelongs50 = myindex.getStockBelongs(code, SSE50, startDate,
                                                   endDate)
        myIndexBelongs300 = myindex.getStockBelongs(code, HS300, startDate,
                                                    endDate)
        myIndexBelongs500 = myindex.getStockBelongs(code, CSI500, startDate,
                                                    endDate)
        myStockWeightOf50 = IndexComponentDataProcess.getStockPropertyInIndex(
            code, SSE50, startDate, endDate)
        myStockWeightOf300 = IndexComponentDataProcess.getStockPropertyInIndex(
            code, HS300, startDate, endDate)
        myStockWeightOf500 = IndexComponentDataProcess.getStockPropertyInIndex(
            code, CSI500, startDate, endDate)
        tradedays = TradedayDataProcess.getTradedays(startDate, endDate)
        mydata = pd.DataFrame(data=tradedays)
        mydata.set_index('date', drop=True, inplace=True)
        mydata['is50'] = myIndexBelongs50['exists']
        mydata['is300'] = myIndexBelongs300['exists']
        mydata['is500'] = myIndexBelongs500['exists']
        mydata['weight50'] = myStockWeightOf50['weight']
        mydata['weight300'] = myStockWeightOf300['weight']
        mydata['weight500'] = myStockWeightOf500['weight']
        mydata.reset_index(drop=False, inplace=True)
        mycolumns = [
            'date', 'is50', 'is300', 'is500', 'weight50', 'weight300',
            'weight500'
        ]
        mydata = mydata[mycolumns]
        return mydata
        pass


########################################################################
Beispiel #2
0
 def analysisPerCode(self, code, startDate, endDate, feature, target):
     tradedays = list(TradedayDataProcess.getTradedays(startDate, endDate))
     mydata = self.useJobLibToGetFactorDataCodeByCode(tradedays, 100, code)
     mycorr = mydata[[feature, target]].corr()
     print(mydata.shape)
     return mycorr
     pass
Beispiel #3
0
 def prepareData(self,codeList,startDate,endDate):
     tradedays=TradedayDataProcess.getTradedays(startDate,endDate)
     myfactor=tickFactorsProcess()
     for day in tradedays:
         data=myfactor.parallelizationGetDataByDate(codeList,day)
         data=data[(data['tick']>='093000000') & (data['tick']<'145700000')]
         tickColumns=[ 'code', 'date', 'tick', 'lastPrice', 'S1', 'S2','S3', 'S4', 'S5', 'S6', 'S7','S8', 'S9', 'S10', 'B1', 'B2', 'B3', 'B4','B5', 'B6', 'B7', 'B8', 'B9', 'B10', 'SV1', 'SV2', 'SV3', 'SV4', 'SV5','SV6', 'SV7', 'SV8', 'SV9', 'SV10', 'BV1', 'BV2', 'BV3', 'BV4', 'BV5','BV6', 'BV7', 'BV8', 'BV9', 'BV10', 'volume', 'amount','volumeIncrease', 'amountIncrease', 'midPrice']
         #dailyColumns=['increaseToday','closeStd20','ts_closeStd20','preClose','is300','is500']
         mycolumns=list(set(data.columns).difference(set(tickColumns)))
         mycolumns=mycolumns+['code', 'date', 'tick']
         data=data[mycolumns]
         #print(data.shape)
         errorData=data[data[mycolumns].isna().sum(axis=1)>0]
         if errorData.shape[0]>0:
             logger.warning(f'factorData of date {day} has Nan!!!')
         #对部分因子进行去极值操作
         #data=self.dataSelect(data,0.2)
         #逐日存储数据
         fileName=os.path.join(self.path,str(day)+'.h5')
         exists=os.path.exists(fileName)
         if exists==True:
             os.remove(fileName)
         try:
             with pd.HDFStore(fileName,'a',complib='blosc:zstd',append=True,complevel=9) as store:
                 store.append('data',data,append=True,format="table",data_columns=data.columns)
         except Exception as excp:
             logger.error(f'{fileName} error! {excp}')
         pass
     pass
Beispiel #4
0
    def getDataFromLocal(self, path, startDate, endDate):
        tradedays = TradedayDataProcess.getTradedays(startDate, endDate)
        tradedays = list(tradedays)
        with parallel_backend("multiprocessing", n_jobs=MYJOBS):
            mydata = Parallel()(
                delayed(self.getDataFromLocalDaily)(path, tradedays[i])
                for i in range(len(tradedays)))
        all = pd.concat(mydata)
        return all
        pass


########################################################################
Beispiel #5
0
 def __computerFactor(self, code, startDate, endDate):
     tradedays = TradedayDataProcess.getTradedays(startDate, endDate)
     mydata = pd.DataFrame(data=tradedays)
     mydata.set_index('date', drop=True, inplace=True)
     myindustry = IndustryClassification.getIndustryByCode(
         code, startDate, endDate)
     if myindustry.shape[0] == 0:
         return pd.DataFrame()
     mydata['industry'] = myindustry['industry']
     mydata['industryName'] = myindustry['name']
     mydata.reset_index(drop=False, inplace=True)
     mycolumns = ['date', 'industry', 'industryName']
     mydata = mydata[mycolumns]
     return mydata
     pass
Beispiel #6
0
 def __computerFactor(self, code, startDate, endDate):
     tradedays = TradedayDataProcess.getTradedays(startDate, endDate)
     mydata = pd.DataFrame(data=tradedays)
     mydata.set_index('date', drop=True, inplace=True)
     myDailyDerivative = KLineDataProcess('dailyDerivative')
     mydataDerivative = myDailyDerivative.getDataByDate(
         code, startDate, endDate)
     if mydataDerivative.shape[0] == 0:
         return pd.DataFrame()
         pass
     mydataDerivative.set_index('date', inplace=True)
     mydata['freeShares'] = mydataDerivative['freeShares']
     mydata['freeMarketValue'] = mydataDerivative['freeMarketValue']
     mydata.reset_index(drop=False, inplace=True)
     mycolumns = ['date', 'freeShares', 'freeMarketValue']
     mydata = mydata[mycolumns]
     return mydata
Beispiel #7
0
 def getLastTradedayTickData(self, code, date):
     previousday = TradedayDataProcess.getPreviousTradeday(date, 250)
     days = list(TradedayDataProcess.getTradedays(previousday, date))
     days.reverse()
     data = pd.DataFrame()
     for day in days:
         if day < date:
             #data=TickDataProcess().getDataByDateFromLocalFile(code,day)
             data = TickDataProcess().getTickShotDataFromInfluxdbServer(
                 code, day)
             if data.shape[0] > 0:
                 return data
                 pass
             pass
         pass
     return data
     pass
Beispiel #8
0
 def getDataFromLocal(self,startDate,endDate):
     tradedays=TradedayDataProcess.getTradedays(startDate,endDate)
     tradedays=list(tradedays)
     with parallel_backend("multiprocessing", n_jobs=JobLibUtility.myjobs):
         mydata=Parallel()(delayed(self.getDataFromLocalDaily)(tradedays[i]) for i in range(len(tradedays)))
     all=pd.concat(mydata)
     '''
     for day in tradedays:
         fileName=os.path.join(self.path,str(day)+'.h5')
         try:
             with pd.HDFStore(fileName,'r',complib='blosc:zstd',append=True,complevel=9) as store:
                 data=store['data']
                 all.append(data)
         except Exception as excp:
             logger.error(f'{fileName} error! {excp}')
             pass
     all=pd.concat(all)
     '''
     return all
     pass
Beispiel #9
0
 def myRNN(self,startDate,endDate,testStart,document):
     tradedays=TradedayDataProcess.getTradedays(startDate,endDate)
     filePath=os.path.join(LocalFileAddress,document)
     trainSet=[]
     testSet=[]
     for day in tradedays:
         fileName=os.path.join(filePath,str(day)+'.h5')
         data=super().getData(fileName)
         if day<str(testStart):
             trainSet.append(data)
         else:
             testSet.append(data)
         pass
     pass
     trainSet=pd.concat(trainSet)
     testSet=pd.concat(testSet)
     m=round(trainSet.corr(),3)
     print(m.loc[(m['midIncreaseNext1m'].abs()>=0.03),'midIncreaseNext1m'].sort_values())
     print(m.loc[(m['midIncreaseMinNext1m'].abs()>=0.03),'midIncreaseMinNext1m'].sort_values())
     print(m.loc[(m['midIncreaseMaxNext1m'].abs()>=0.03),'midIncreaseMaxNext1m'].sort_values())
     print(m.loc[(m['midIncreaseMinNext2m'].abs()>=0.03),'midIncreaseMinNext2m'].sort_values())
     print(m.loc[(m['midIncreaseMaxNext2m'].abs()>=0.03),'midIncreaseMaxNext2m'].sort_values())
     print(m.loc[(m['midIncreaseMinNext5m'].abs()>=0.03),'midIncreaseMinNext5m'].sort_values())
     print(m.loc[(m['midIncreaseMaxNext5m'].abs()>=0.03),'midIncreaseMaxNext5m'].sort_values())