예제 #1
0
 def delItemsFromRemove(self):
     try:
         gLogger.info("start delItemsFromRemove")
         indexList = list(set(self.removeList))
         self.df = self.df.drop(indexList, axis=0)
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
예제 #2
0
    def gen1minKData(self, vtSymbol, df_data):
        try:
            gLogger.info("start gen1minKData , vtSymbol is %s" % vtSymbol)
            c = 1
            self.barDict[vtSymbol] = {}
            self.barDict[vtSymbol][c] = []
            self.df["structTime"] = self.df["time"].map(
                lambda x: datetime.datetime.strptime(x, "%H%M%S%f"))
            for i in zip(
                    *[iter(self.splitDict[vtSymbol][c][i:])
                      for i in range(2)]):
                start = datetime.datetime.strptime(
                    str(i[0]).strip(), '%H:%M:%S')
                end = datetime.datetime.strptime(str(i[1]).strip(), '%H:%M:%S')
                if (start - datetime.timedelta(minutes=1)
                    ).strftime('%H:%M') in self.AucTime:
                    start = start - datetime.timedelta(minutes=1)
                p1 = df_data["structTime"] >= start
                p2 = df_data["structTime"] < end
                dfTemp = df_data.loc[p1 & p2]
                if len(dfTemp) > 2:
                    self.barDict[vtSymbol][c].append(self.aggMethod(dfTemp))

            dbNew = self.db.get_db("localhost", 27017, 'WIND_1_MIN_DB')
            self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c])
        except Exception as e:
            gLogger.exception("Exception : %s" % e)
예제 #3
0
 def recordExceptionalPrice(self):
     gLogger.info("start recordExceptionalPrice")
     self.estimateExceptional("lastPrice")
     self.estimateExceptional("highPrice")
     self.estimateExceptional("lowPrice")
     self.estimateExceptional("bidPrice1")
     self.estimateExceptional("askPrice1")
예제 #4
0
 def cleanSameTimestamp(self):
     """清除重复时间戳,记录"""
     try:
         gLogger.info("start cleanSameTimestamp")
         dfTemp = self.df.sort_values(by=['datetime'], ascending=False)
         idList = dfTemp[dfTemp["datetime"].duplicated()].index
         for i in idList.values:
             self.removeList.append(i)
             gLogger.debug('remove index = %d' % i)
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
예제 #5
0
 def parse2CycleData(self):
     self.dateList = [
         datetime.datetime(2017, 5, 31, 0, 0),
         datetime.datetime(2017, 6, 1, 0, 0),
         datetime.datetime(2017, 6, 2, 0, 0)
     ]
     for i in list(set(self.dateList)):
         gLogger.info("start parse cycle data —— %s" % i)
         self.date = i
         dfInfo = self.loadInformation()
         AggregateTickData(dfInfo, i, self.AucTime)
예제 #6
0
 def cleanIllegalTradingTime(self):
     """删除非交易时段数据"""
     try:
         gLogger.info("start cleanIllegalTradingTime ")
         self.df['illegalTime'] = self.df["time"].map(
             self.StandardizeTimePeriod)
         self.df['illegalTime'] = self.df['illegalTime'].fillna(False)
         for i, row in self.df[self.df['illegalTime'] == False].iterrows():
             self.removeList.append(i)
             gLogger.debug('remove index = %d' % (i))
         del self.df["illegalTime"]
     except Exception as e:
         gLogger.exception("Exception: %s" % e)
예제 #7
0
 def gen1DayKData(self, vtSymbol):
     try:
         gLogger.info("start gen1DayKData , vtSymbol = %s" % vtSymbol)
         c = '1Day'
         self.barDict[vtSymbol][c] = []
         items = self.barDict[vtSymbol][1]
         dfTemp = pd.DataFrame(items)
         if not dfTemp.empty:
             self.barDict[vtSymbol][c].append(self.aggMethod(dfTemp))
             dbNew = self.db.get_db("localhost", 27017,
                                    'WIND_' + str(c) + '_MIN_DB')
             self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c])
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
예제 #8
0
 def estimateExceptional(self, field):
     try:
         gLogger.info("start estimateExceptional, field = %s" % field)
         dfTemp = pd.DataFrame(self.df[field])
         dfTemp["shift"] = self.df[field].shift(1)
         dfTemp["delta"] = abs(dfTemp[field] - dfTemp["shift"])
         dfTemp = dfTemp.dropna(axis=0, how='any')
         dfTemp["IsExcept"] = dfTemp["delta"] >= dfTemp["shift"] * 0.12
         for i, row in dfTemp.loc[dfTemp["IsExcept"]].iterrows():
             if i not in self.removeList:
                 self.logList.append(i)
                 gLogger.debug('Field = %s, log index = %d' % (field, i))
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
예제 #9
0
 def paddingWithPrevious(self, field):
     try:
         gLogger.info("start paddingWithPrevious, field = %s" % field)
         for i, row in self.df.loc[self.df[field] == 0.0].iterrows():
             if i not in self.removeList:
                 preIndex = i - 1
                 if preIndex >= 0 and i not in self.removeList:
                     row[field] = self.df.loc[preIndex, field]
                     self.df.loc[i, field] = row[field]
                     self.updateList.append(i)
                     gLogger.debug('Field = %s, update index = %d' %
                                   (field, i))
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
예제 #10
0
 def processTickData(self):
     self.fileList = self.parseMatFile()
     self.fileList = [
         "E:\\windDataOriginal\\commodity\\20170531\\bb1805\\bb1805_20170531.mat"
     ]
     for i in self.fileList:
         sym = i.split('\\')[-2]
         if "SP-" in sym or "SPC-" in sym or "IMCI" in sym:
             continue
         gLogger.info("start process tick data —— %s" % i)
         self.date = datetime.datetime.strptime(
             i.split('\\')[-1].split('_')[-1][:-4], '%Y%m%d')
         self.dateList.append(self.date)
         dfInfo = self.loadInformation()
         dfData = LoadMatFile(i).dfData
         CleanData(dfData, dfInfo, self.AucTime)
예제 #11
0
 def getTimeList(self, cycle):
     try:
         gLogger.info("start getTimeList")
         if not os.path.exists(self.timeFilePath):
             os.makedirs(self.timeFilePath)
         # filePath = self.timeFilePath + 'timeSeries_' + self.Symbol + '.pickle'
         # if os.path.exists(filePath) and datetime.datetime.fromtimestamp(os.path.getmtime(filePath)).replace(hour=0,minute=0,second=0,microsecond=0) == \
         #     datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0):
         #     gLogger.info("splitDict is load from pickle file")
         #     with open(filePath, 'rb') as handle:
         #         self.splitDict[self.Symbol] = pickle.load(handle)
         # else:
         self.genTimeList(self.Symbol, cycle)
         self.saveTimeList()
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
예제 #12
0
 def normalizeData(self, df):
     try:
         gLogger.info("start normalize df Data")
         f = lambda x: str(int(x))
         df["date"] = df["date"].map(f)
         df["time"] = df["time"].map(f)
         df["time"] = df["time"].map(lambda x: x.zfill(9))
         vtSymbol = self.matFile.split('\\')[-1].split('_')[0]
         symbol = "".join([a for a in vtSymbol if a.isalpha()]).lower()
         date = self.matFile.split('\\')[-1].split('_')[-1][:-4]
         df["vtSymbol"] = vtSymbol
         df["symbol"] = symbol
         df["DT"] = df["date"] + ' ' + df["time"]
         df["datetime"] = df["DT"].map(self.convert2datetime)
         del df["DT"]
         df["date"] = date
         return df
     except Exception as e:
         gLogger.exception("Exception when convert to df: %s" % e)
예제 #13
0
 def reserveLastTickInAuc(self):
     """保留集合竞价期间最后一个tick数据"""
     try:
         gLogger.info("start reserveLastTickInAuc")
         self.df["structTime"] = self.df["time"].map(
             lambda x: datetime.datetime.strptime(x, "%H%M%S%f"))
         for st in self.AucTime:
             start = datetime.datetime.strptime(st, '%H:%M')
             end = start + datetime.timedelta(minutes=1)
             p1 = self.df["structTime"] >= start
             p2 = self.df["structTime"] < end
             dfTemp = self.df.loc[p1 & p2]
             dfTemp = dfTemp.sort_values(by=["structTime"], ascending=False)
             for i in dfTemp.index.values[1:]:
                 self.removeList.append(i)
                 gLogger.debug('remove index = %d' % i)
         del self.df["structTime"]
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
예제 #14
0
    def cleanNullPriceIndicator(self):
        gLogger.info("start cleanNullPriceIndicator")
        lastP = self.df["lastPrice"] == 0.0
        high = self.df["highPrice"] == 0.0
        low = self.df["lowPrice"] == 0.0
        bidP = self.df["bidPrice1"] == 0.0
        askP = self.df["askPrice1"] == 0.0
        # 如果均为0,删除
        if self.df.loc[lastP & high & low & bidP & askP]._values.any():
            gLogger.debug("process data that all price indicators are null")
            for i in self.df.loc[lastP & high & low & bidP
                                 & askP].index.values:
                if i not in self.removeList:
                    self.removeList.append(i)
                    gLogger.debug('All Price is Null, remove index = %d' % i)

        # 某些为0,填充
        self.paddingWithPrevious("lastPrice")
        self.paddingWithPrevious("highPrice")
        self.paddingWithPrevious("lowPrice")
        self.paddingWithPrevious("bidPrice1")
        self.paddingWithPrevious("askPrice1")
예제 #15
0
    def initCleanRegulation(self):
        gLogger.info("start initCleanRegulation")
        dbNew = self.db.get_db("localhost", 27017, 'WIND_TICK_DB')
        i = self.df["vtSymbol"][0]
        try:
            if "IFC" in i or "IHC" in i or "ICC" in i or "TFC" in i:
                i = i[:2]
            self.Symbol = "".join([a for a in i if a.isalpha()]).lower()
            self.initList()
            if not self.df.empty:
                self.cleanIllegalTradingTime()
                self.cleanSameTimestamp()
                self.reserveLastTickInAuc()
                self.cleanNullVolTurn()
                self.cleanNullPriceIndicator()
                self.cleanNullOpenInter()
                self.recordExceptionalPrice()

                self.delItemsFromRemove()
                self.db.insert2db(dbNew, i, self.df)
        except Exception as e:
            gLogger.exception("Exception: %s" % e)
예제 #16
0
    def genOtherKData(self, vtSymbol, cycle):
        for c in cycle:
            try:
                gLogger.info("start genOtherKData cycle = %d" % c)
                self.barDict[vtSymbol][c] = []
                for i in zip(
                        *
                    [iter(self.splitDict[vtSymbol][c][i:]) for i in range(2)]):
                    self.start1 = time.strptime(str(i[0]).strip(), '%H:%M:%S')
                    self.end1 = time.strptime(str(i[1]).strip(), '%H:%M:%S')
                    items = list(
                        map(self.selectItems, self.barDict[vtSymbol][1]))
                    items = list(filter(lambda x: x is not None, items))
                    dfTemp = pd.DataFrame(items)
                    if len(dfTemp) > 2:
                        self.barDict[vtSymbol][c].append(
                            self.aggMethod(dfTemp))

                dbNew = self.db.get_db("localhost", 27017,
                                       'WIND_' + str(c) + '_MIN_DB')
                self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c])
            except Exception as e:
                gLogger.exception("Exception : %s" % e)
예제 #17
0
 def genTimeList(self, symbol, cycle):
     try:
         tempDict = {}
         self.splitDict[symbol] = {}
         for c in cycle:
             gLogger.info("start genTimeList, cycle = %d" % c)
             tempDict[c] = []
             self.splitDict[symbol][c] = []
             tp = self.dfInfo.loc[symbol]["CurrPeriod"]
             time1 = [t for i in tp.split(',') for t in i.split('-')]
             for i in zip(*([iter(time1)] * 2)):
                 start = str(i[0]).strip()
                 end = str(i[1]).strip()
                 if start in self.AucTime:
                     start1 = datetime.datetime.strptime(
                         start, "%H:%M") + datetime.timedelta(minutes=1)
                     start = start1.strftime("%H:%M")
                 else:
                     while ([
                             60 if datetime.datetime.strptime(
                                 start, "%H:%M").minute == 0 else datetime.
                             datetime.strptime(start, "%H:%M").minute
                     ][0] % int(c) != 0):
                         start1 = datetime.datetime.strptime(
                             start,
                             "%H:%M") + datetime.timedelta(minutes=10)
                         start = start1.strftime("%H:%M")
                 tempList = pd.date_range(
                     start, end, freq=(str(c) + 'min')).time.tolist()
                 tempDict[c].extend(tempList)
                 # tempDict[c].extend(pd.date_range(end, end, freq=(str(c) + 'min')).time.tolist())
             lst = list(set(tempDict[c]))
             lst.sort()
             self.splitDict[symbol][c] = lst
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
예제 #18
0
 def __init__(self, matFile):
     self.matFile = matFile
     self.data = sio.loadmat(self.matFile)
     gLogger.info("load mat file completed")
     self.dfData = self.convert2df()
예제 #19
0
    def cleanNullVolTurn(self):
        """Tick有成交,但volume和turnover为0"""
        gLogger.info("start cleanNullVolTurn")
        f = lambda x: float(x)
        self.df["lastVolume"] = self.df["lastVolume"].map(f)
        self.df["lastTurnover"] = self.df["lastTurnover"].map(f)
        self.df["volume"] = self.df["volume"].map(f)
        self.df["turnover"] = self.df["turnover"].map(f)
        self.df["openInterest"] = self.df["openInterest"].map(f)
        self.df["lastPrice"] = self.df["lastPrice"].map(f)

        lastVol = self.df["lastVolume"] != 0.0
        lastTurn = self.df["lastTurnover"] != 0.0
        Vol = self.df["volume"] == 0.0
        Turn = self.df["turnover"] == 0.0
        openIn = self.df["openInterest"] == 0.0
        lastP = self.df["lastPrice"] != 0.0

        tu = self.dfInfo.loc[self.Symbol]["TradingUnits"]

        # lastTurn为0,lastVolume和lastPrice不为0
        dfTemp = self.df.loc[~lastTurn & lastVol & lastP]
        if not dfTemp.empty:
            gLogger.debug(
                "process data that lastTurn is null but lastVol and lastP are not"
            )
            dfTemp["lastTurnover"] = dfTemp["lastVolume"] * dfTemp[
                "lastPrice"] * float(tu)
            for i, row in dfTemp.iterrows():
                if i not in self.removeList:
                    self.df.loc[i, "lastTurnover"] = row["lastTurnover"]
                    self.updateList.append(i)
                    gLogger.debug('lastTurn = 0, update index = %d' % (i))

        # lastVolume为0,lastTurnover和lastPrice不为0
        dfTemp = self.df.loc[lastTurn & ~lastVol & lastP]
        if not dfTemp.empty:
            dfTemp["lastVolume"] = dfTemp["lastTurnover"] / (
                dfTemp["lastPrice"] * float(tu))
            dfTemp["lastVolume"].map(lambda x: int(round(x)))
            for i, row in dfTemp.iterrows():
                if i not in self.removeList:
                    self.df.loc[i, "lastVolume"] = row["lastVolume"]
                    self.updateList.append(i)
                    gLogger.debug('lastVol = 0, update index = %d' % (i))

        # lastPrice为0,lastVolume和lastTurnover不为0
        dfTemp = self.df.loc[lastTurn & lastVol & ~lastP]
        if not dfTemp.empty:
            dfTemp["lastPrice"] = dfTemp["lastTurnover"] / (
                dfTemp["lastVolume"] * float(tu))
            for i, row in dfTemp.iterrows():
                if i not in self.removeList:
                    self.df.loc[i, "lastPrice"] = row["lastPrice"]
                    self.updateList.append(i)
                    gLogger.debug('lastPrice = 0, update index = %d' % (i))

        # lastVolume和lastTurnover均不为0
        dfTemp = self.df.loc[lastVol & lastTurn & (Vol | Turn | openIn)]
        if not dfTemp.empty:
            # volume、openInterest、turnover均为0,删除并记录
            if dfTemp.loc[Vol & Turn & openIn]._values.any():
                for i in dfTemp.loc[Vol & Turn & openIn].index.values:
                    if i not in self.removeList:
                        self.removeList.append(i)
                        self.logList.append(i)
                        gLogger.debug(
                            'Vol & openInterest & turn = 0, remove index = %d'
                            % i)

            # turnover为0,lastVol不为0
            for i, row in self.df[Turn & lastVol].iterrows():
                preIndex = i - 1
                if preIndex >= 0 and i not in self.removeList:
                    row["turnover"] = self.df.loc[
                        preIndex, "turnover"] + row["lastTurnover"]
                    self.df.loc[i, "turnover"] = row["turnover"]
                    self.updateList.append(i)
                    gLogger.debug(
                        'Turn = 0 & lastTurn != 0, update index = %d' % (i))

            # volume为0,lastVol不为0
            for i, row in self.df[Vol & lastVol].iterrows():
                preIndex = i - 1
                if preIndex >= 0 and i not in self.removeList:
                    row["volume"] = self.df.loc[preIndex,
                                                "volume"] + row["lastVolume"]
                    self.df.loc[i, "volume"] = row["volume"]
                    self.updateList.append(i)
                    gLogger.debug('Vol = 0 & lastVol != 0, update index = %d' %
                                  (i))
예제 #20
0
 def cleanNullOpenInter(self):
     """持仓量为0,用上一个填充"""
     gLogger.info("start cleanNullOpenInter")
     self.paddingWithPrevious("openInterest")