Example #1
0
 def recordResampleTickTransactionDataFromSqlServer(self,code,startDate,endDate):
     #获取股票日线数据,筛选出非停牌的日期
     daily=KLineDataProcess('daily')
     dailyData=daily.getDataByDate(code,startDate,endDate)
     if dailyData.empty==True:
         logger.warning(f'There no daily data of {code} from {startDate} to {endDate}!')
         return 
     dailyData=dailyData[dailyData['status']!='停牌']
     tradedays=list(dailyData['date'])
     for date in tradedays:
         path=os.path.join(self.filePath,code.replace('.','_'))
         file=os.path.join(path,date+'.h5')
         HDF5Utility.pathCreate(path)
         exists=os.path.isfile(file)
         if exists==False:
             logger.info(f'get TickTransaction data of {code} in {date} from source!')
             mydata=self.__getResampleTickTransactionDataFromSqlServer(code,date)
             if mydata.empty==False:
                 with pd.HDFStore(file,'a',complib='blosc:zstd',append=True,complevel=9) as store:
                     store.append('data',mydata,append=False,format="table",data_columns=mydata.columns)
             else:
                 logger.warning(f'there is no data of {code} in {date} from source!')
         else:
             logger.info(f'TickTransaction data of {code} in {date} is exists!')
             pass
     pass
Example #2
0
    def getStockData(self):
        myindex=IndexComponentDataProcess(True)
        index500=myindex.getCSI500DataByDate(20190404,20190404)
        index300=myindex.getHS300DataByDate(20190404,20190404)
        index50=myindex.getSSE50DataByDate(20190404,20190404)
        stockCodes=list(pd.concat([index500,index300,index50],ignore_index=True)['code'].drop_duplicates())
        myMinute=KLineDataProcess('minute',True)
        myDaily=KLineDataProcess('daily',True)
        num=0
        

        for code in stockCodes:
            print(datetime.datetime.now())
            num=num+1
            myMinute.getDataByDate(code,20070101,20190327)
            myDaily.getDataByDate(code,20070101,20190327)
            print("{0}({1} of 800) complete!".format(code,num))
########################################################################
Example #3
0
    def getTickDataAndFactorsByDateFromLocalFile(self,
                                                 code,
                                                 date,
                                                 factors=TICKFACTORSUSED):
        myfactor = factorBase()
        mydata = pd.DataFrame()
        for item in factors:
            factor = item['factor']
            data = myfactor.getDataFromLocalFile(code, date, factor)
            if mydata.shape[0] == 0:  #如果还没有取出来数据
                mydata = data.copy()
                pass
            elif data.shape[0] != 0:
                mydata = pd.merge(mydata,
                                  data,
                                  how='left',
                                  left_index=True,
                                  right_index=True)
                pass
        tick = TickDataProcess()
        tickData = tick.getDataByDateFromLocalFile(code, date)
        mydata = pd.merge(mydata,
                          tickData,
                          how='left',
                          left_index=True,
                          right_index=True)
        if mydata.shape[0] == 0:
            return mydata
        #dailyFactor=['closeStd','index','marketValue','industry']
        dailyRepo = dailyFactorsProcess()
        dailyData = dailyRepo.getSingleStockDailyFactors(code, date, date)
        for col in dailyData.columns:
            if col not in ['date', 'code', 'return']:
                mydata[col] = dailyData[col].iloc[0]
        dailyKLineRepo = KLineDataProcess('daily')
        dailyKLineData = dailyKLineRepo.getDataByDate(code, date, date)
        mydata['preClose'] = dailyKLineData['preClose'].iloc[0]
        mydata['increaseToday'] = mydata['midPrice'] / mydata['preClose'] - 1
        ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)]
        if ceiling.shape[0] > 0:
            ceilingTime = ceiling['tick'].iloc[0]
            mydata = mydata[mydata['tick'] < ceilingTime]
            pass

        return mydata
Example #4
0
 def recordResampleTickShotDataToInfluxdbFromSqlServer(
         self, code, startDate, endDate):
     #获取股票日线数据,筛选出非停牌的日期
     daily = KLineDataProcess('daily')
     dailyData = daily.getDataByDate(code, startDate, endDate)
     if dailyData.empty == True:
         #logger.warning(f'There no daily data of {code} from {startDate} to {endDate}!')
         return
     dailyData = dailyData[dailyData['status'] != '停牌']
     tradedays = list(dailyData['date'])
     for date in tradedays:
         #logger.info(f'get tickshot data of {code} in {date} from source!')
         mydata = self.__getResampleTickShotDataFromSqlServer(code, date)
         if mydata.shape[0] > 0:
             mydata = mydata.fillna(0)
             InfluxdbUtility.saveDataFrameDataToInfluxdb(
                 mydata, INFLUXDBTICKTICKDATABASE, code, {})
     pass
Example #5
0
    def saveAllFactorsByCodeAndDate(self, code, date):
        mypath = os.path.join(self.path, str(code).replace('.', '_'))
        HDF5Utility.pathCreate(mypath)
        fileName = os.path.join(mypath, str(date) + '.h5')
        exists = HDF5Utility.fileCheck(fileName)
        if exists == True:  #如果文件已将存在,直接返回
            return
        myfactor = factorBase()
        mydata = pd.DataFrame()
        factors = self.factorsUsed
        #获取tick因子数据
        mydata = self.getFactorsUsedByDateFromLocalFile(code, date, factors)

        #获取tick行情数据
        tick = TickDataProcess()
        tickData = tick.getDataByDateFromLocalFile(code, date)
        mydata = pd.merge(mydata,
                          tickData,
                          how='left',
                          left_index=True,
                          right_index=True)
        if mydata.shape[0] == 0:
            return
        #获取日线数据
        dailyRepo = dailyFactorsProcess()
        dailyData = dailyRepo.getSingleStockDailyFactors(code, date, date)
        dailyKLineRepo = KLineDataProcess('daily')
        dailyKLineData = dailyKLineRepo.getDataByDate(code, date, date)
        mydata['preClose'] = dailyKLineData['preClose'].iloc[0]
        mydata['increaseToday'] = mydata['midPrice'] / mydata['preClose'] - 1
        mydata = mydata[mydata['tick'] < '145700000']
        #删去涨跌停之后的数据
        ceiling = mydata[(mydata['B1'] == 0) | (mydata['S1'] == 0)]
        if ceiling.shape[0] > 0:
            ceilingTime = ceiling['tick'].iloc[0]
            mydata = mydata[mydata['tick'] < ceilingTime]
            pass
        excludedColumns = [
            'preClose', 'buyVolume2', 'buyVolume5', 'buyVolume10',
            'sellVolume2', 'sellVolume5', 'sellVolume10'
        ]
        mycolumns = list(
            set(mydata.columns).difference(
                set(list(tickData.columns) + excludedColumns)))
        mycolumns.sort()
        mydata = mydata[mycolumns]
        if mydata.shape[0] == 0:
            return
        try:
            logger.info(f'Recording factors of {code} in {date}!')
            with pd.HDFStore(fileName,
                             'a',
                             complib='blosc:zstd',
                             append=True,
                             complevel=9) as store:
                store.append('data',
                             mydata,
                             append=True,
                             format="table",
                             data_columns=mydata.columns)
        except Exception as excp:
            logger.error(f'{fileName} error! {excp}')
Example #6
0
    def reverse_singleCode(self, code, startDate, endDate):
        days = list(TradedayDataProcess().getTradedays(startDate, endDate))
        #factors=['closeStd','index','marketValue','industry']
        dailyRepo = dailyFactorsProcess()
        dailyFactor = dailyRepo.getSingleStockDailyFactors(
            code, startDate, endDate)
        dailyKLine = KLineDataProcess('daily')
        dailyData = dailyKLine.getDataByDate(code, startDate, endDate)
        if dailyData.empty == True:
            logger.error(
                f'there is no data of {code} from {startDate} to {endDate}')
            return pd.DataFrame()
        tick = TickDataProcess()
        tickfactor = tickFactorsProcess()
        mydata = []
        position = 0
        profit = 0
        myStatusList = []
        myTradeList = []
        myStatus = {}
        myTrade = {}
        positionYesterday = 0
        select1 = []
        select2 = []
        selectall = []
        for today in days:
            #logger.info(f'{code} in {today} start!')
            todayInfo = dailyFactor[dailyFactor['date'] == today]
            if todayInfo.empty == True:
                logger.error(
                    f'there is no factor data of {code} in date {today}')
                continue
                pass
            todayKLine = dailyData[dailyData['date'] == today]
            if todayKLine.empty == True:
                logger.error(
                    f'there is no KLine data of {code} in date {today}')
                continue
                pass
            myStatus['date'] = today
            myStatus['closeStd20'] = todayInfo['closeStd20'].iloc[0]
            myStatus['weight50'] = todayInfo['weight50'].iloc[0]
            myStatus['weight300'] = todayInfo['weight300'].iloc[0]
            myStatus['weight500'] = todayInfo['weight500'].iloc[0]
            myStatus['ts_closeStd20'] = todayInfo['closeStd20'].iloc[0]
            myStatus['adjFactor'] = todayKLine['adjFactor'].iloc[0]
            myStatus['preClose'] = todayKLine['preClose'].iloc[0]
            positionNow = positionYesterday
            if (todayInfo.empty
                    == False) & (todayKLine['status'].iloc[0] != '停牌'):
                #tickData=tick.getDataByDateFromLocalFile(code,today)
                tickData = tick.getTickShotDataFromInfluxdbServer(code, today)
                #factors=tickfactor.getDataByDateFromLocalFile(code,today,'buySellForce')
                factors = tickfactor.getFactorsUsedByDateFromLocalFile(
                    code, today)
                all = pd.merge(tickData,
                               factors,
                               how='left',
                               left_index=True,
                               right_index=True)
                all['closeStd20'] = todayInfo['closeStd20'].iloc[0]
                all['ts_closeStd20'] = todayInfo['ts_closeStd20'].iloc[0]
                all['adjFactor'] = todayKLine['adjFactor'].iloc[0]
                all['preClose'] = todayKLine['preClose'].iloc[0]
                all['increaseToday'] = all['midPrice'] / all['preClose'] - 1
                all['midIncreasePrevious5m'] = all['midPrice'] / all[
                    'midPrice'].shift(60) - 1
                #select0=all[all['increaseToday']>2*all['closeStd20']]
                selectall.append(all)
                select0 = all[all['midIncreasePrevious5m'] > 0.5 *
                              all['closeStd20']]
                select1.append(select0)
                select0 = all[all['midIncreasePrevious5m'] < -0.5 *
                              all['closeStd20']]
                select2.append(select0)
                pass
            else:
                logger.warning(f'There is no data of {code} in {today}')
        mycolumns = [
            'midIncreaseNext1m', 'midIncreaseNext5m', 'ts_buyForceIncrease',
            'ts_sellForceIncrease', 'ts_buySellVolumeRatio5',
            'midIncreasePrevious3m', 'differenceHighLow',
            'ts_differenceHighLow', 'differenceMidVwap',
            'ts_differenceMidVwap', 'midStd60', 'ts_midStd60'
        ]
        select1 = pd.concat(select1)
        select2 = pd.concat(select2)
        selectall = pd.concat(selectall)
        selectall = selectall[mycolumns]
        s = select1[(select1['ts_sellForceIncrease'] > 0.6)
                    & (select1['ts_buySellVolumeRatio5'] > 0.6) &
                    (select1['increaseToday'] > select1['closeStd20'])]
        print(s['midIncreaseNext5m'].mean())
        ss = select2[(select2['ts_buySellVolumeRatio5'] > 0.6)
                     & (select2['increaseToday'] < -select2['closeStd20'])]
        print(ss['midIncreaseNext5m'].mean())
        return mydata


########################################################################
Example #7
0
class stockReverseMovement(object):
    """股票异动,专注股票大涨之后的回调"""

    #----------------------------------------------------------------------
    def __init__(self):
        self.__myMinute = KLineDataProcess('minute', True)
        self.__myDaily = KLineDataProcess('daily', True)
        self.__localFileStr = LocalFileAddress + "\\intermediateResult\\ceilingFeature.h5"
        self.__allMinute = pd.DataFrame()
        pass

    #----------------------------------------------------------------------
    def __getStockList(self):
        myindex = IndexComponentDataProcess()
        index500 = myindex.getCSI500DataByDate(20190404, 20190404)
        index300 = myindex.getHS300DataByDate(20190404, 20190404)
        index50 = myindex.getSSE50DataByDate(20190404, 20190404)
        stockCodes = list(
            pd.concat([index500, index300, index50],
                      ignore_index=True)['code'].drop_duplicates())
        return stockCodes

    #----------------------------------------------------------------------
    def __dataPrepared(self):
        mylist = self.__getStockList()
        self.__minuteData = {}
        self.__dailyData = {}
        num = 0
        for code in mylist:
            num = num + 1
            print("{0}({1} of 50) start!".format(code, num))
            print(datetime.datetime.now())
            self.__dataSelectOneByOne(code)
            d = self.__dailyData[code]
            m = self.__minuteData[code]
            d['ceiling'] = 0
            d['ceilingYesterday'] = 0
            d['ceilingYesterday2'] = 0
            d['ceilingIn5Days'] = 0
            d.loc[(d['close'] == round(d['preClose'] * 1.1, 2)), 'ceiling'] = 1
            d.loc[(d['ceiling'].shift(1) == 1), 'ceilingYesterday'] = 1
            d.loc[((d['ceiling'].shift(1) == 1) &
                   (d['ceiling'].shift(2) == 1)), 'ceilingYesterday2'] = 1
            d['ceilingIn5Days'] = d['ceilingYesterday'].rolling(5).sum()
            #print(d[d['ceilingYesterday']>0][['date','close','preClose','ceilingIn5Days']])
            m.loc[(m['date'] == m['date'].shift(5)),
                  'increase5m'] = (m['open'] / m['open'].shift(5) - 1)
            m.loc[(m['date'] == m['date'].shift(1)),
                  'increase1m'] = (m['open'] / m['open'].shift(1) - 1)
            d = d.set_index('date')
            dailyInfo = d.loc[m['date'], [
                'preClose', 'ceilingYesterday', 'ceilingYesterday2',
                'ceilingIn5Days'
            ]]
            dailyInfo.index = m.index
            m[[
                'yesterdayClose', 'ceilingYesterday', 'ceilingYesterday2',
                'ceilingIn5Days'
            ]] = dailyInfo
            m['increaseInDay'] = (m['open'] / m['yesterdayClose'] - 1)
            m['ceiling'] = 0
            m['return5m'] = 0
            m['return10m'] = 0
            m.loc[(m['low'] == round(m['yesterdayClose'] * 1.1, 2)),
                  'ceiling'] = 1
            m.loc[(m['open'].shift(-5) / m['open'] - 1) > 0.01, 'return5m'] = 1
            m.loc[(m['open'].shift(-5) / m['open'] - 1) < -0.01,
                  'return5m'] = -1
            m.loc[(m['open'].shift(-10) / m['open'] - 1) > 0.01,
                  'return10m'] = 1
            m.loc[(m['open'].shift(-10) / m['open'] - 1) < -0.01,
                  'return10m'] = -1
            m['ceilingInNext5m'] = m['ceiling'].shift(-5).rolling(5).max()
            m['maxLossInNext5m'] = round(
                (m['low'].shift(-5).rolling(5).min() - m['open']) / m['open'] -
                1, 2)
            m['ceilingInNext10m'] = m['ceiling'].shift(-10).rolling(10).max()
            m['maxLossInNext10m'] = round(
                (m['low'].shift(-10).rolling(10).min() - m['open']) / m['open']
                - 1, 2)
            m[m['minute'] > '1450']['ceilingInNext5m', 'maxLossInNext5m',
                                    'ceilingInNext10m',
                                    'maxLossInNext10m'] = None
            mselect = m[(m['increaseInDay'] > 0.07)
                        & (m['increaseInDay'] < 0.08)]
            mselect = mselect.dropna(axis=0, how='any')
            self.__allMinute = self.__allMinute.append(mselect)
            pass
        pass

    #----------------------------------------------------------------------
    def __dataSelectOneByOne(self, code):
        m = self.__myMinute.getDataByDate(code, self.startDate, self.endDate)
        d = self.__myDaily.getDataByDate(code, self.startDate, self.endDate)
        days = list(d[d['high'] > d['preClose'] * 1.05]['date'])
        m = m.loc[m['date'].isin(days), :]
        self.__dailyData[code] = d
        self.__minuteData[code] = m
        pass

    #----------------------------------------------------------------------
    def reverse(self, startDate, endDate):
        self.startDate = startDate
        self.endDate = endDate
        self.tradeDays = TradedayDataProcess.getTradedays(startDate, endDate)
        self.__dataPrepared()
        store = pd.HDFStore(self.__localFileStr, 'a')
        store.append('ceiling',
                     self.__allMinute,
                     append=False,
                     format="table",
                     data_columns=[
                         'code', 'date', 'minute', 'open', 'high', 'low',
                         'close', 'volume', 'amount', 'increase5m',
                         'increase1m', 'yesterdayClose', 'ceilingYesterday',
                         'ceilingYesterday2', 'ceilingIn5Days',
                         'increaseInDay', 'ceiling', 'ceilingInNext5m',
                         'ceilingInNext10m', 'maxLossInNext5m',
                         'maxLossInNext10m'
                     ])
        store.close()
Example #8
0
    def reverse_singleCode(self,
                           code,
                           startDate,
                           endDate,
                           parameters=[300, 100000000, 1.5]):
        myindex = parameters[0]
        totalCash = parameters[1]
        std1 = parameters[2]
        days = list(TradedayDataProcess().getTradedays(startDate, endDate))
        #factors=['closeStd','index','marketValue','industry']
        dailyRepo = dailyFactorsProcess()
        dailyFactor = dailyRepo.getSingleStockDailyFactors(
            code, startDate, endDate)
        dailyKLine = KLineDataProcess('daily')
        dailyData = dailyKLine.getDataByDate(code, startDate, endDate)
        if dailyData.empty == True:
            logger.error(
                f'there is no data of {code} from {startDate} to {endDate}')
            return pd.DataFrame()
        tick = TickDataProcess()
        mydata = []
        position = 0
        profit = 0
        myStatusList = []
        myTradeList = []
        myStatus = {}
        myTrade = {}
        positionYesterday = 0
        for today in days:
            #logger.info(f'{code} in {today} start!')
            todayInfo = dailyFactor[dailyFactor['date'] == today]
            if todayInfo.empty == True:
                logger.error(
                    f'there is no factor data of {code} in date {today}')
                continue
                pass

            todayKLine = dailyData[dailyData['date'] == today]
            if todayKLine.empty == True:
                logger.error(
                    f'there is no KLine data of {code} in date {today}')
                continue
                pass
            myStatus['date'] = today
            myStatus['closeStd20'] = todayInfo['closeStd20'].iloc[0]
            myStatus['weight50'] = todayInfo['weight50'].iloc[0]
            myStatus['weight300'] = todayInfo['weight300'].iloc[0]
            myStatus['weight500'] = todayInfo['weight500'].iloc[0]
            myStatus['ts_closeStd20'] = todayInfo['closeStd20'].iloc[0]
            myStatus['adjFactor'] = todayKLine['adjFactor'].iloc[0]
            myStatus['preClose'] = todayKLine['preClose'].iloc[0]
            positionNow = positionYesterday
            if (todayInfo.empty
                    == False) & (todayKLine['status'].iloc[0] != '停牌'):
                if myindex == 300:
                    maxPosition = myStatus[
                        'weight300'] * totalCash * 0.01 / myStatus['preClose']
                elif myindex == 500:
                    maxPosition = myStatus[
                        'weight300'] * totalCash * 0.01 / myStatus['preClose']
                elif myindex == 50:
                    maxPosition = myStatus[
                        'weight50'] * totalCash * 0.01 / myStatus['preClose']
                else:
                    maxPosition = myStatus['totalCash'] * 0.001 / myStatus[
                        'preClose']
                maxPosition = round(maxPosition, -2)
                if maxPosition == 0:
                    continue
                tickData = tick.getDataByDateFromLocalFile(code, today)
                #['code' ,'date','tick' ,'lastPrice','S1','S2','S3','S4','S5','S6','S7','S8','S9','S10','B1','B2','B3','B4','B5','B6','B7','B8','B9','B10','SV1','SV2','SV3','SV4','SV5','SV6','SV7','SV8','SV9','SV10','BV1','BV2','BV3','BV4','BV5','BV6','BV7','BV8','BV9','BV10','volume' ,'amount','volumeIncrease','amountIncrease']
                tickList = tickData.as_matrix()
                for i in range(0, len(tickList) - 60):
                    now = tickList[i]
                    midPrice = (now[4] + now[14]) / 2
                    lastPrice = now[3]
                    tickShot = now[4:43]
                    upCeiling = False
                    downCeiling = False
                    if now[24] == 0:
                        upCeiling = True
                        pass
                    if now[34] == 0:
                        downCeiling = True
                        pass
                    mytime = datetime.datetime.strptime(
                        now[1] + now[2], '%Y%m%d%H%M%S%f')
                    increaseToday = tickList[i][3] / myStatus['preClose'] - 1
                    if (i >= 100) & (tickList[i - 100][3] > 0):
                        increase5m = tickList[i][3] / tickList[i - 100][3] - 1
                    else:
                        increase5m = np.nan
                    if ((positionNow == 0) & (positionYesterday == 0) &
                        (i <= 4500) & (i >= 100) &
                        (increaseToday > std1 * myStatus['closeStd20']) &
                        (maxPosition > 0) & (downCeiling == False)):
                        #开空头
                        [price, deltaPosition,
                         amount] = TradeUtility.sellByTickShotData(
                             tickShot, maxPosition, 0.001)
                        positionNow = -deltaPosition
                        myTrade['date'] = today
                        myTrade['opentime'] = mytime
                        myTrade['position'] = positionNow
                        myTrade['open'] = price
                        myTrade['openAdj'] = myStatus['adjFactor']
                        myTrade['increase5m'] = increase5m
                        myTrade['increaseToday'] = increaseToday
                        openIndex = i
                        maxPosition = maxPosition - deltaPosition
                        pass
                    elif ((positionNow == 0) & (positionYesterday == 0) &
                          (i <= 4500) & (i >= 100) &
                          (increaseToday < -std1 * myStatus['closeStd20']) &
                          (maxPosition > 0) & (upCeiling == False)):
                        #开多头
                        [price, deltaPosition,
                         amount] = TradeUtility.buyByTickShotData(
                             tickShot, maxPosition, 0.001)
                        positionNow = deltaPosition
                        myTrade['date'] = today
                        myTrade['opentime'] = mytime
                        myTrade['position'] = positionNow
                        myTrade['open'] = price
                        myTrade['openAdj'] = myStatus['adjFactor']
                        myTrade['increase5m'] = increase5m
                        myTrade['increaseToday'] = increaseToday
                        maxPosition = maxPosition - deltaPosition
                        openIndex = i
                        pass
                    elif (positionNow > 0):
                        profit = (lastPrice * myStatus['adjFactor'] -
                                  myTrade['open'] * myTrade['openAdj']) / (
                                      myTrade['open'] * myTrade['openAdj'])
                        if (((profit < 0) & (i >= openIndex + 400)) |
                            (i >= 4700) | (i >= openIndex + 1200) |
                            (positionYesterday != 0)):
                            if downCeiling == False:  #未跌停
                                #平多头,记录一笔交易
                                [price, deltaPosition,
                                 amount] = TradeUtility.sellByTickShotData(
                                     tickShot, positionNow, 0.1)
                                myTrade['closetime'] = mytime
                                myTrade['close'] = price
                                myTrade['closeAdj'] = myStatus['adjFactor']
                                positionNow = 0
                                positionYesterday = 0
                                openIndex = 0
                                tradeCopy = copy.deepcopy(myTrade)
                                myTradeList.append(tradeCopy)
                                myTrade = {}
                            pass
                    elif (positionNow < 0):
                        profit = (-lastPrice * myStatus['adjFactor'] +
                                  myTrade['open'] * myTrade['openAdj']) / (
                                      myTrade['open'] * myTrade['openAdj'])
                        if (((profit < 0) & (i >= openIndex + 400)) |
                            (i >= 4700) | (i >= openIndex + 1200) |
                            (positionYesterday != 0)):
                            if upCeiling == False:
                                #平空头,记录一笔交易
                                [price, deltaPosition,
                                 amount] = TradeUtility.buyByTickShotData(
                                     tickShot, -positionNow, 0.1)
                                myTrade['closetime'] = mytime
                                myTrade['close'] = price
                                myTrade['closeAdj'] = myStatus['adjFactor']
                                positionNow = 0
                                positionYesterday = 0
                                openIndex = 0
                                tradeCopy = copy.deepcopy(myTrade)
                                myTradeList.append(tradeCopy)
                                myTrade = {}

                            pass
                if positionNow != 0:
                    logger.info(
                        f'{code} of {today} can not close the position!')
                positionYesterday = positionNow
                pass
            else:
                logger.warning(f'There is no data of {code} in {today}')
        m = pd.DataFrame(data=myTradeList,
                         columns=[
                             'date', 'opentime', 'position', 'open', 'openAdj',
                             'increase5m', 'increaseToday', 'closetime',
                             'close', 'closeAdj'
                         ])

        m['profit'] = m['position'] * (m['close'] * m['closeAdj'] -
                                       m['open'] * m['openAdj']) / m['openAdj']
        m['fee'] = m['position'].abs() * m[['open', 'close'
                                            ]].max(axis=1) * 0.0012
        m['netProfit'] = m['profit'] - m['fee']
        m['code'] = code
        mycolumns = [
            'date', 'closeStd20', 'ts_closeStd20', 'industry', 'industryName'
        ]
        dailyFactor = dailyFactor[mycolumns]
        mydata = pd.merge(m,
                          dailyFactor,
                          how='left',
                          left_on='date',
                          right_on='date')
        #mydata=mydata[((mydata['increase5m']>mydata['closeStd20']) & (mydata['position']<0)) |((mydata['increase5m']<-mydata['closeStd20']) & (mydata['position']>0))]
        '''
        print(mydata)
        print(mydata['profit'].sum())
        print(mydata['fee'].sum())
        print(mydata['netProfit'])
        '''
        return mydata


########################################################################
Example #9
0
    def dataPrepared(self, stockCodes, startDate, endDate, recordFilePath):
        warnings.filterwarnings('ignore')
        mytradedays = TradedayDataProcess.getTradedays(startDate, endDate)
        mylist = stockCodes
        myMinute = KLineDataProcess('minute', False)
        num = 0
        store = pd.HDFStore(self.__factorsAddress, 'a')
        allDailyData = store.select(
            self.__key,
            where=['date>="%s" and date<="%s"' % (startDate, endDate)])
        store.close()
        store = pd.HDFStore(recordFilePath, 'a')
        oldKeys = store.keys()
        for code in mylist:
            num = num + 1
            print("{0}({1} of {2}) start!".format(str(code), num, len(mylist)))
            #print(datetime.datetime.now())
            if ('/' + code) in oldKeys:
                continue
                pass
            m = myMinute.getDataByDate(code, startDate, endDate)
            if len(m) == 0:
                continue
            m = m[m['date'].isin(mytradedays)]
            m['vwap'] = m['amount'] / m['volume']
            m.loc[(m['date'] == m['date'].shift(5)),
                  'increase5m'] = (m['open'] / m['open'].shift(5) - 1)
            m.loc[(m['date'] == m['date'].shift(1)),
                  'increase1m'] = (m['open'] / m['open'].shift(1) - 1)
            d = allDailyData.xs(code, level='code')
            dailyInfo = d.loc[m['date']]
            deleteColumns = [
                'code', 'date', 'open', 'high', 'low', 'close', 'volume',
                'amount', 'change', 'pctChange', 'vwap'
            ]
            mycolumns = [
                col for col in dailyInfo.columns if col not in deleteColumns
            ]
            dailyInfo = dailyInfo[mycolumns]
            dailyInfo.rename(columns={'preClose': 'yesterdayClose'},
                             inplace=True)
            dailyInfo.index = m.index
            m = pd.concat([m, dailyInfo], axis=1)
            m['increaseInDay'] = (m['open'] / m['yesterdayClose'] - 1)
            m = m[m['status'] != 'N']
            m['canBuy'] = 0
            m['canSell'] = 0
            m['canBuyPrice'] = None
            m['canSellPrice'] = None
            m.loc[((m['open'] < round(1.097 * m['yesterdayClose'], 2)) &
                   (m['open'] > 0)), 'canBuy'] = 1
            m.loc[((m['open'] > round(0.903 * m['yesterdayClose'], 2)) &
                   (m['open'] > 0)), 'canSell'] = 1
            m.loc[m['canBuy'] == 1, 'canBuyPrice'] = m.loc[m['canBuy'] == 1,
                                                           'open']
            m.loc[m['canBuy'] == 1, 'canBuyPriceAdj'] = m.loc[m['canBuy'] == 1,
                                                              'adjFactor']
            m['canBuyPrice'] = m['canBuyPrice'].fillna(method='bfill')
            m['canBuyPriceAdj'] = m['canBuyPriceAdj'].fillna(method='bfill')
            m.loc[m['canSell'] == 1, 'canSellPrice'] = m.loc[m['canSell'] == 1,
                                                             'open']
            m.loc[m['canSell'] == 1,
                  'canSellPriceAdj'] = m.loc[m['canSell'] == 1, 'adjFactor']
            m['canSellPrice'] = m['canSellPrice'].fillna(method='bfill')
            m['canSellPriceAdj'] = m['canSellPriceAdj'].fillna(method='bfill')
            m['timeStamp'] = m['date'] + m['tick']
            #日内分钟信息
            #成交量在前20分钟的分位数
            m['ts_rank_volume'] = m['volume'].shift(1).rolling(
                20, min_periods=15).apply(
                    (lambda x: pd.Series(x).rank().iloc[-1] / len(x)),
                    raw=True)
            #每分钟收益
            m['minuteReturn'] = (m['close'] -
                                 m['close'].shift(1)) / m['close'].shift(1)
            #收益标准差
            m['minuteStd20'] = m['minuteReturn'].shift(1).rolling(
                20, min_periods=17).std()
            #收益标准差分位数
            m['ts_rank_minuteStd20'] = m['minuteStd20'].rolling(
                20, min_periods=15).apply(
                    (lambda x: pd.Series(x).rank().iloc[-1] / len(x)),
                    raw=True)

            mselect = m.set_index(['timeStamp', 'code'])
            store.append(code, mselect, append=False, format="table")
            pass
        store.close()
        pass
Example #10
0
 def __updateStockDailyBasicData(self, code, startDate, endDate):
     marketData = KLineDataProcess('daily', True)
     mydata = marketData.getDataByDate(code, startDate, endDate)
     return mydata
     pass
Example #11
0
 def updateStockDailyFactors(self, codeList, factorList):
     for code in codeList:
         logger.info(f'{code} factor update start!')
         #获取数据
         IPOInfo = StockIPOInfoProcess.getStockIPOInfoByCode(code)
         listDate = IPOInfo['listDate'].iloc[0]
         delistDate = IPOInfo['delistDate'].iloc[0]
         #mydata=self.__updateStockDailyBasicData(code)
         for factor in factorList:
             fileName = code.replace('.', '_') + ".h5"
             factorFilePath = os.path.join(self.localFileFolder, factor,
                                           fileName)
             path = os.path.join(self.localFileFolder, factor)
             HDF5Utility.pathCreate(path)
             exists = os.path.isfile(factorFilePath)
             if exists == False:
                 startDate = listDate
                 endDate = self.endDate
                 lastDate = EMPTY_STRING
                 pass
             else:
                 with pd.HDFStore(path=factorFilePath,
                                  mode='r',
                                  complib='blosc:zstd',
                                  append=True,
                                  complevel=9) as store:
                     existsDate = store['date']
                 lastDate = existsDate.max()
                 startDate = lastDate
                 endDate = self.endDate
                 if startDate >= endDate:
                     continue
             pass
             startDate = TradedayDataProcess.getPreviousTradeday(
                 startDate, 100)
             if startDate < listDate:
                 startDate = listDate
                 pass
             if endDate > delistDate:
                 endDate = delistDate
                 pass
             #marketData=self.__updateStockDailyBasicData(code,startDate,endDate)
             dailyData = KLineDataProcess('daily')
             marketData = dailyData.getDataByDate(code, startDate, endDate)
             endDate = marketData['date'].max()  #因子日期统一计算到日线数据最后一天
             mydata = marketData
             #tradedays=TradedayDataProcess.getTradedays(startDate,endDate)
             #mydata=pd.DataFrame(tradedays,columns=['date'])
             #mydata=pd.merge(mydata,marketData,how='left',left_on='date',right_on='date')
             if factor == 'closeStd':
                 myReturnFun = ComputeUtility.computeReturn
                 myStdFun = ComputeUtility.computeStandardDeviation
                 myTSRank = ComputeUtility.computeTimeSeriesRank
                 mydata = self.__computeStockDailyFactor(
                     mydata, myReturnFun, ['close', 'preClose', 'return'])
                 mydata.loc[mydata['status'] == '停牌', 'return'] = np.nan
                 mydata.loc[mydata['date'] == listDate, 'return'] = np.nan
                 mydata = self.__computeStockDailyFactor(
                     mydata, myStdFun, ['return', 'closeStd20', 20, 0.8])
                 mydata = self.__computeStockDailyFactor(
                     mydata, myTSRank,
                     ['closeStd20', 'ts_closeStd20', 50, 0.4])
                 mycolumns = [
                     'date', 'return', 'closeStd20', 'ts_closeStd20'
                 ]
                 mydata = mydata[mycolumns]
             elif factor == 'industry':
                 mydata.set_index('date', drop=True, inplace=True)
                 myindustry = IndustryClassification.getIndustryByCode(
                     code, startDate, endDate)
                 mydata['industry'] = myindustry['industry']
                 mydata['industryName'] = myindustry['name']
                 mydata.reset_index(drop=False, inplace=True)
                 mycolumns = ['date', 'industry', 'industryName']
                 mydata = mydata[mycolumns]
                 pass
             elif factor == 'index':
                 myindex = IndexComponentDataProcess()
                 myIndexBelongs50 = myindex.getStockBelongs(
                     code, SSE50, startDate, endDate)
                 myIndexBelongs300 = myindex.getStockBelongs(
                     code, HS300, startDate, endDate)
                 myIndexBelongs500 = myindex.getStockBelongs(
                     code, CSI500, startDate, endDate)
                 myStockWeightOf50 = IndexComponentDataProcess.getStockPropertyInIndex(
                     code, SSE50, startDate, endDate)
                 myStockWeightOf300 = IndexComponentDataProcess.getStockPropertyInIndex(
                     code, HS300, startDate, endDate)
                 myStockWeightOf500 = IndexComponentDataProcess.getStockPropertyInIndex(
                     code, CSI500, startDate, endDate)
                 mydata.set_index('date', drop=True, inplace=True)
                 mydata['is50'] = myIndexBelongs50['exists']
                 mydata['is300'] = myIndexBelongs300['exists']
                 mydata['is500'] = myIndexBelongs500['exists']
                 mydata['weight50'] = myStockWeightOf50['weight']
                 mydata['weight300'] = myStockWeightOf300['weight']
                 mydata['weight500'] = myStockWeightOf500['weight']
                 mydata.reset_index(drop=False, inplace=True)
                 mycolumns = [
                     'date', 'is50', 'is300', 'is500', 'weight50',
                     'weight300', 'weight500'
                 ]
                 mydata = mydata[mycolumns]
                 #mydata[['is50','is300','is500']]=mydata[['is50','is300','is500']].astype('int64')
                 pass
             elif factor == 'marketValue':
                 mydata.set_index('date', drop=True, inplace=True)
                 myDailyDerivative = KLineDataProcess('dailyDerivative')
                 mydataDerivative = myDailyDerivative.getDataByDate(
                     code, startDate, endDate)
                 mydataDerivative.set_index('date', inplace=True)
                 mydata['freeShares'] = mydataDerivative['freeShares']
                 mydata['freeMarketValue'] = mydataDerivative[
                     'freeMarketValue']
                 mydata.reset_index(drop=False, inplace=True)
                 mycolumns = ['date', 'freeShares', 'freeMarketValue']
                 mydata = mydata[mycolumns]
             try:
                 with pd.HDFStore(path=factorFilePath,
                                  mode='a',
                                  complib='blosc:zstd',
                                  append=True,
                                  complevel=9) as store:
                     if lastDate == EMPTY_STRING:
                         mydate = mydata['date']
                     else:
                         mydata = mydata[(mydata['date'] > lastDate)]
                         mydate = mydata['date']
                     if mydata.empty == False:
                         store.append('date',
                                      mydate,
                                      append=True,
                                      format="table",
                                      data_columns=['date'],
                                      complevel=9)
                         store.append('factors',
                                      mydata,
                                      append=True,
                                      format="table",
                                      data_columns=mydata.columns,
                                      complevel=9)
             except Exception as excp:
                 logger.error(f'{code} in {date} error!  {excp}')
                 logger.error(traceback.format_exc())
         pass
     pass
Example #12
0
 def updateMinuteIndexKLines(self, code, startDate, endDate):
     index = KLineDataProcess('minuteIndex', True)
     index.getDataByDate(code, startDate, endDate)
     pass
Example #13
0
 def updateSingleStockDailyDerivatives(self, code, startDate, endDate):
     stock = KLineDataProcess('dailyDerivative', True)
     stock.getDataByDate(code, startDate, endDate)
     pass
Example #14
0
 def updateSingleStockMinuteKLines(self, code, startDate, endDate):
     stock = KLineDataProcess('minute', True)
     stock.getDataByDate(code, startDate, endDate)
     pass
Example #15
0
 def updateSingleStockDailyKLines(self, code, startDate, endDate):
     dailyStock = KLineDataProcess('daily', True)
     dailyStock.getDataByDate(code, startDate, endDate)
     pass
Example #16
0
 def getStockDailyFeatureData(self, stockCodes, startDate, endDate):
     myDaily = KLineDataProcess('daily')
     myDailyDerivative = KLineDataProcess('dailyDerivative')
     myindex = IndexComponentDataProcess()
     exists = os.path.isfile(self.localFileStr)
     if exists == True:
         store = pd.HDFStore(self.localFileStr, 'a')
         allData = store.select(self.key)
         allData.reset_index(inplace=True, drop=False)
         existsCodes = list(allData['code'].drop_duplicates())
         store.close()
     else:
         allData = pd.DataFrame()
         existsCodes = []
     num = 0
     for code in stockCodes:
         num = num + 1
         print("{0}({1} of {2}) start!".format(code, num, len(stockCodes)))
         if code in existsCodes:
             stockNow = allData[allData['code'] == code]
             latestDate = stockNow['date'].max()
             if endDate <= latestDate:
                 continue
             pass
         startNow = max(startDate,
                        TradedayDataProcess.getNextTradeday(latestDate))
         if startNow > endDate:
             continue
             pass
         mydata = myDaily.getDataByDate(code, startNow, endDate)
         mydata.set_index('date', drop=True, inplace=True)
         myindustry = IndustryClassification.getIndustryByCode(
             code, startNow, endDate)
         mydata['industry'] = myindustry['industry']
         mydata['industryName'] = myindustry['name']
         myIndexBelongs50 = myindex.getStockBelongs(code, SSE50, startNow,
                                                    endDate)
         myIndexBelongs300 = myindex.getStockBelongs(
             code, HS300, startNow, endDate)
         myIndexBelongs500 = myindex.getStockBelongs(
             code, CSI500, startNow, endDate)
         mydata['is50'] = myIndexBelongs50['exists']
         mydata['is300'] = myIndexBelongs300['exists']
         mydata['is500'] = myIndexBelongs500['exists']
         mydataDerivative = myDailyDerivative.getDataByDate(
             code, startNow, endDate)
         mydataDerivative.set_index('date', inplace=True)
         mydata['freeShares'] = mydataDerivative['freeShares']
         mydata['freeMarketValue'] = mydataDerivative['freeMarketValue']
         mydata['return'] = (mydata['close'] -
                             mydata['preClose']) / mydata['preClose']
         #昨日计算出的标准差给今日用,防止用到未来数据
         mydata['closeStd20'] = mydata['return'].shift(1).rolling(
             20, min_periods=17).std()
         mydata['ts_rank_closeStd20'] = mydata['closeStd20'].rolling(
             50, min_periods=20).apply(
                 (lambda x: pd.Series(x).rank().iloc[-1] / len(x)),
                 raw=True)
         mydata.reset_index(inplace=True)
         allData = allData.append(mydata)
         pass
     allData = allData.set_index(['date', 'code'])
     store = pd.HDFStore(self.localFileStr, 'a')
     store.append(self.key, allData, append=True, format="table")
     store.close()
     unstack = allData.unstack()
     rankMv = unstack['freeMarketValue'].rank(axis=1)
     mvMax = rankMv.max(axis=1)
     rankMv = rankMv.iloc[:, :].div(mvMax, axis=0)
     #做rankMarketValue的操作
     allData['rankMarketValue'] = rankMv.stack()
     store = pd.HDFStore(self.localFileStrWithRank, 'a')
     store.append(self.keyWithRank, allData, append=False, format="table")
     store.close()