Beispiel #1
0
    def gen1minKData(self, vtSymbol, df_data):
        try:
            gLogger.info("start gen1minKData , vtSymbol is %s" % vtSymbol)
            c = 1
            self.barDict[vtSymbol] = {}
            self.barDict[vtSymbol][c] = []
            self.df["structTime"] = self.df["time"].map(
                lambda x: datetime.datetime.strptime(x, "%H%M%S%f"))
            for i in zip(
                    *[iter(self.splitDict[vtSymbol][c][i:])
                      for i in range(2)]):
                start = datetime.datetime.strptime(
                    str(i[0]).strip(), '%H:%M:%S')
                end = datetime.datetime.strptime(str(i[1]).strip(), '%H:%M:%S')
                if (start - datetime.timedelta(minutes=1)
                    ).strftime('%H:%M') in self.AucTime:
                    start = start - datetime.timedelta(minutes=1)
                p1 = df_data["structTime"] >= start
                p2 = df_data["structTime"] < end
                dfTemp = df_data.loc[p1 & p2]
                if len(dfTemp) > 2:
                    self.barDict[vtSymbol][c].append(self.aggMethod(dfTemp))

            dbNew = self.db.get_db("localhost", 27017, 'WIND_1_MIN_DB')
            self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c])
        except Exception as e:
            gLogger.exception("Exception : %s" % e)
Beispiel #2
0
 def delItemsFromRemove(self):
     try:
         gLogger.info("start delItemsFromRemove")
         indexList = list(set(self.removeList))
         self.df = self.df.drop(indexList, axis=0)
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
Beispiel #3
0
    def convert2df(self):
        try:
            colNames = [
                'vtSymbol', 'symbol', 'date', 'time', 'lastPrice',
                'lastVolume', 'lastTurnover', 'matchItems', 'openInterest',
                'tradeFlag', 'bsFlag', 'volume', 'turnover', 'highPrice',
                'lowPrice', 'openPrice', 'preClosePrice', 'settlementPrice',
                'position', 'curDelta', 'preSettlementPrice', 'prePosition',
                'askPrice1', 'askVolume1', 'bidPrice1', 'bidVolume1',
                'askAvPrice', 'bidAvPrice', 'totalAskVolume', 'totalBidVolume',
                'index', 'stocks', 'ups', 'downs', 'holdLines'
            ]

            seriesNames = locals()
            for k, v in enumerate(self.data['temp'][0][0].tolist()):
                if colNames[k] in [
                        'askPrice1', 'askVolume1', 'bidPrice1', 'bidVolume1'
                ]:
                    v = v[:, 0]
                if v.ndim == 1:
                    seriesNames['SN_%s' % k] = pd.Series(v, name=colNames[k])
                elif v.ndim > 1:
                    v = v.ravel()
                    seriesNames['SN_%s' % k] = pd.Series(v, name=colNames[k])
                else:
                    print("index = %d, dim = %d" % (k, v.ndim))

            df_data = pd.concat(
                [seriesNames[a] for a in seriesNames if 'SN_' in a], axis=1)
            df_data = df_data.dropna(axis=0, how='any')
            dfData = self.normalizeData(df_data)
            return dfData
        except Exception as e:
            gLogger.exception("Exception when convert to df: %s" % e)
Beispiel #4
0
 def genKData(self, vtSymbol, df_data):
     if not df_data.empty:
         cycle = self.cycle[1:]
         self.gen1minKData(vtSymbol, df_data)
         self.genOtherKData(vtSymbol, cycle)
         self.gen1DayKData(vtSymbol)
     else:
         gLogger.exception("df data is empty!")
Beispiel #5
0
 def cleanSameTimestamp(self):
     """清除重复时间戳,记录"""
     try:
         gLogger.info("start cleanSameTimestamp")
         dfTemp = self.df.sort_values(by=['datetime'], ascending=False)
         idList = dfTemp[dfTemp["datetime"].duplicated()].index
         for i in idList.values:
             self.removeList.append(i)
             gLogger.debug('remove index = %d' % i)
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
Beispiel #6
0
 def cleanIllegalTradingTime(self):
     """删除非交易时段数据"""
     try:
         gLogger.info("start cleanIllegalTradingTime ")
         self.df['illegalTime'] = self.df["time"].map(
             self.StandardizeTimePeriod)
         self.df['illegalTime'] = self.df['illegalTime'].fillna(False)
         for i, row in self.df[self.df['illegalTime'] == False].iterrows():
             self.removeList.append(i)
             gLogger.debug('remove index = %d' % (i))
         del self.df["illegalTime"]
     except Exception as e:
         gLogger.exception("Exception: %s" % e)
Beispiel #7
0
 def compare_time(self, s1, s2, st, ms):
     """由于time类型没有millisecond,故单取ms进行逻辑判断"""
     try:
         if s2 == time.strptime('00:00', '%H:%M'):
             s2 = time.strptime('23:59:61', '%H:%M:%S')
         if st > s1 and st < s2:
             return True
         elif (st == s1 and int(ms) >= 0) or (st == s2 and int(ms) == 0):
             return True
         else:
             return False
     except Exception as e:
         gLogger.exception("Exception when compare_time e = %s" % e)
Beispiel #8
0
 def gen1DayKData(self, vtSymbol):
     try:
         gLogger.info("start gen1DayKData , vtSymbol = %s" % vtSymbol)
         c = '1Day'
         self.barDict[vtSymbol][c] = []
         items = self.barDict[vtSymbol][1]
         dfTemp = pd.DataFrame(items)
         if not dfTemp.empty:
             self.barDict[vtSymbol][c].append(self.aggMethod(dfTemp))
             dbNew = self.db.get_db("localhost", 27017,
                                    'WIND_' + str(c) + '_MIN_DB')
             self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c])
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
Beispiel #9
0
 def estimateExceptional(self, field):
     try:
         gLogger.info("start estimateExceptional, field = %s" % field)
         dfTemp = pd.DataFrame(self.df[field])
         dfTemp["shift"] = self.df[field].shift(1)
         dfTemp["delta"] = abs(dfTemp[field] - dfTemp["shift"])
         dfTemp = dfTemp.dropna(axis=0, how='any')
         dfTemp["IsExcept"] = dfTemp["delta"] >= dfTemp["shift"] * 0.12
         for i, row in dfTemp.loc[dfTemp["IsExcept"]].iterrows():
             if i not in self.removeList:
                 self.logList.append(i)
                 gLogger.debug('Field = %s, log index = %d' % (field, i))
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
Beispiel #10
0
 def paddingWithPrevious(self, field):
     try:
         gLogger.info("start paddingWithPrevious, field = %s" % field)
         for i, row in self.df.loc[self.df[field] == 0.0].iterrows():
             if i not in self.removeList:
                 preIndex = i - 1
                 if preIndex >= 0 and i not in self.removeList:
                     row[field] = self.df.loc[preIndex, field]
                     self.df.loc[i, field] = row[field]
                     self.updateList.append(i)
                     gLogger.debug('Field = %s, update index = %d' %
                                   (field, i))
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
Beispiel #11
0
 def getTimeList(self, cycle):
     try:
         gLogger.info("start getTimeList")
         if not os.path.exists(self.timeFilePath):
             os.makedirs(self.timeFilePath)
         # filePath = self.timeFilePath + 'timeSeries_' + self.Symbol + '.pickle'
         # if os.path.exists(filePath) and datetime.datetime.fromtimestamp(os.path.getmtime(filePath)).replace(hour=0,minute=0,second=0,microsecond=0) == \
         #     datetime.datetime.today().replace(hour=0,minute=0,second=0,microsecond=0):
         #     gLogger.info("splitDict is load from pickle file")
         #     with open(filePath, 'rb') as handle:
         #         self.splitDict[self.Symbol] = pickle.load(handle)
         # else:
         self.genTimeList(self.Symbol, cycle)
         self.saveTimeList()
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
Beispiel #12
0
 def aggMethod(self, dfTemp):
     try:
         tempBar = {}
         tempBar["vtSymbol"] = dfTemp.iloc[0]["vtSymbol"]
         tempBar["symbol"] = dfTemp.iloc[0]["symbol"]
         tempBar["date"] = dfTemp.iloc[0]["date"]
         tempBar["time"] = dfTemp.iloc[0]["time"]
         tempBar["openInterest"] = float(dfTemp.iloc[-1]["openInterest"])
         tempBar["volume"] = float(dfTemp["lastVolume"].sum())
         tempBar["turnover"] = float(dfTemp["lastTurnover"].sum())
         tempBar["high"] = float(max(dfTemp["lastPrice"]))
         tempBar["low"] = float(min(dfTemp["lastPrice"]))
         tempBar["open"] = float(dfTemp.iloc[0]["lastPrice"])
         tempBar["close"] = float(dfTemp.iloc[-1]["lastPrice"])
         tempBar["datetime"] = dfTemp.iloc[0]["datetime"]
         return tempBar
     except Exception as e:
         gLogger.exception("Exception when exec aggMethod e:%s" % e)
Beispiel #13
0
    def StandardizeTimePeriod(self, target):
        tar = target
        try:
            tp = self.dfInfo.loc[self.Symbol]["CurrPeriod"]
            time1 = [t for i in tp.split(',') for t in i.split('-')]
            ms = tar[-3:]
            tar = tar[:-3]

            tar = time.strptime(tar, '%H%M%S')
            for i in zip(*([iter(time1)] * 2)):
                start = time.strptime(str(i[0]).strip(), '%H:%M')
                end = time.strptime(str(i[1]).strip(), '%H:%M')
                if self.compare_time(start, end, tar, ms):
                    return True
        except Exception as e:
            gLogger.exception(
                "Exception when StandardizeTimePeriod e = %s time = %s" %
                (e, str(tar)))
Beispiel #14
0
 def normalizeData(self, df):
     try:
         gLogger.info("start normalize df Data")
         f = lambda x: str(int(x))
         df["date"] = df["date"].map(f)
         df["time"] = df["time"].map(f)
         df["time"] = df["time"].map(lambda x: x.zfill(9))
         vtSymbol = self.matFile.split('\\')[-1].split('_')[0]
         symbol = "".join([a for a in vtSymbol if a.isalpha()]).lower()
         date = self.matFile.split('\\')[-1].split('_')[-1][:-4]
         df["vtSymbol"] = vtSymbol
         df["symbol"] = symbol
         df["DT"] = df["date"] + ' ' + df["time"]
         df["datetime"] = df["DT"].map(self.convert2datetime)
         del df["DT"]
         df["date"] = date
         return df
     except Exception as e:
         gLogger.exception("Exception when convert to df: %s" % e)
Beispiel #15
0
 def reserveLastTickInAuc(self):
     """保留集合竞价期间最后一个tick数据"""
     try:
         gLogger.info("start reserveLastTickInAuc")
         self.df["structTime"] = self.df["time"].map(
             lambda x: datetime.datetime.strptime(x, "%H%M%S%f"))
         for st in self.AucTime:
             start = datetime.datetime.strptime(st, '%H:%M')
             end = start + datetime.timedelta(minutes=1)
             p1 = self.df["structTime"] >= start
             p2 = self.df["structTime"] < end
             dfTemp = self.df.loc[p1 & p2]
             dfTemp = dfTemp.sort_values(by=["structTime"], ascending=False)
             for i in dfTemp.index.values[1:]:
                 self.removeList.append(i)
                 gLogger.debug('remove index = %d' % i)
         del self.df["structTime"]
     except Exception as e:
         gLogger.exception("Exception : %s" % e)
Beispiel #16
0
    def initCleanRegulation(self):
        gLogger.info("start initCleanRegulation")
        dbNew = self.db.get_db("localhost", 27017, 'WIND_TICK_DB')
        i = self.df["vtSymbol"][0]
        try:
            if "IFC" in i or "IHC" in i or "ICC" in i or "TFC" in i:
                i = i[:2]
            self.Symbol = "".join([a for a in i if a.isalpha()]).lower()
            self.initList()
            if not self.df.empty:
                self.cleanIllegalTradingTime()
                self.cleanSameTimestamp()
                self.reserveLastTickInAuc()
                self.cleanNullVolTurn()
                self.cleanNullPriceIndicator()
                self.cleanNullOpenInter()
                self.recordExceptionalPrice()

                self.delItemsFromRemove()
                self.db.insert2db(dbNew, i, self.df)
        except Exception as e:
            gLogger.exception("Exception: %s" % e)
Beispiel #17
0
    def genOtherKData(self, vtSymbol, cycle):
        for c in cycle:
            try:
                gLogger.info("start genOtherKData cycle = %d" % c)
                self.barDict[vtSymbol][c] = []
                for i in zip(
                        *
                    [iter(self.splitDict[vtSymbol][c][i:]) for i in range(2)]):
                    self.start1 = time.strptime(str(i[0]).strip(), '%H:%M:%S')
                    self.end1 = time.strptime(str(i[1]).strip(), '%H:%M:%S')
                    items = list(
                        map(self.selectItems, self.barDict[vtSymbol][1]))
                    items = list(filter(lambda x: x is not None, items))
                    dfTemp = pd.DataFrame(items)
                    if len(dfTemp) > 2:
                        self.barDict[vtSymbol][c].append(
                            self.aggMethod(dfTemp))

                dbNew = self.db.get_db("localhost", 27017,
                                       'WIND_' + str(c) + '_MIN_DB')
                self.db.insert2db(dbNew, vtSymbol, self.barDict[vtSymbol][c])
            except Exception as e:
                gLogger.exception("Exception : %s" % e)
Beispiel #18
0
 def genTimeList(self, symbol, cycle):
     try:
         tempDict = {}
         self.splitDict[symbol] = {}
         for c in cycle:
             gLogger.info("start genTimeList, cycle = %d" % c)
             tempDict[c] = []
             self.splitDict[symbol][c] = []
             tp = self.dfInfo.loc[symbol]["CurrPeriod"]
             time1 = [t for i in tp.split(',') for t in i.split('-')]
             for i in zip(*([iter(time1)] * 2)):
                 start = str(i[0]).strip()
                 end = str(i[1]).strip()
                 if start in self.AucTime:
                     start1 = datetime.datetime.strptime(
                         start, "%H:%M") + datetime.timedelta(minutes=1)
                     start = start1.strftime("%H:%M")
                 else:
                     while ([
                             60 if datetime.datetime.strptime(
                                 start, "%H:%M").minute == 0 else datetime.
                             datetime.strptime(start, "%H:%M").minute
                     ][0] % int(c) != 0):
                         start1 = datetime.datetime.strptime(
                             start,
                             "%H:%M") + datetime.timedelta(minutes=10)
                         start = start1.strftime("%H:%M")
                 tempList = pd.date_range(
                     start, end, freq=(str(c) + 'min')).time.tolist()
                 tempDict[c].extend(tempList)
                 # tempDict[c].extend(pd.date_range(end, end, freq=(str(c) + 'min')).time.tolist())
             lst = list(set(tempDict[c]))
             lst.sort()
             self.splitDict[symbol][c] = lst
     except Exception as e:
         gLogger.exception("Exception : %s" % e)