Beispiel #1
0
    def extract(self):
        df = dataio.getdf('MktIdxdGet')[['closeIndex']]

        df = df[df.index.get_level_values('indexID') == '000001.ZICN']

        values = np.squeeze(df[['closeIndex']].values)
        fea = utils_common.computeGradRateIntervals(values, ndays_)
        fea = np.array(fea).T

        columnNames = []
        namePrefix = self.getName()
        for nday in ndays_:
            columnNames.append(namePrefix + '_' + str(nday))
            pass

        df = pd.DataFrame(fea,
                          index=pd.Index(
                              df.index.get_level_values('tradeDate'),
                              name='tradeDate'),
                          columns=columnNames)

        dfMkt = dataio.getdf('MktEqudGet')[['closePrice']]

        df = dfMkt.join(df)[columnNames]
        return df
Beispiel #2
0
def testIntegrity():
    dfadj = dataio.getdf('MktEqudAdjAfGet')
    df = dataio.getdf('MktEqudGet')

    dfset = set(df.index.values.tolist())

    for sec, dt in dfadj.index.values:
        if (sec, dt) not in dfset:
            print('{0},{1}'.format(sec, dt))
            pass
        pass
    pass
Beispiel #3
0
def testIntegrity():
    dfadj = dataio.getdf('MktEqudAdjAfGet');
    df = dataio.getdf('MktEqudGet');

    dfset = set(df.index.values.tolist());

    for sec,dt in dfadj.index.values:
        if (sec,dt) not in dfset:
            print('{0},{1}'.format(sec,dt));
            pass;
        pass;
    pass;
    def extract(self):
        priceKeys = {self.buyPrice_, self.sellPrice_}
        dfMkt = dataio.getdf('MktEqudAdjAfGet')[list(priceKeys) + ['isOpen']]
        dfFlow = dataio.getdf('MktEquFlowGet')
        dfFlow = dfFlow[['moneyInflow', 'moneyOutflow']]
        dfSt = dataio.getdf('SecSTGet')[['STflg']]
        df = dfMkt.join(dfFlow)
        df = df.join(dfSt, how='left')
        df = df[pd.isnull(df[['STflg']]).values]
        df = df[(df[['isOpen']] == 1).values]
        df = df[(df[[self.buyPrice_]] > 0).values]
        df = df[(df[[self.sellPrice_]] > 0).values]

        def process(sec, recs):
            indices = []
            values = []

            n = len(recs)

            for i, rec in enumerate(recs):
                #stock must be listed for 100 days
                if i <= 300:
                    continue
                if i >= (n - self.nday_ - 1):
                    indices.append((rec['secID'], rec['tradeDate']))
                    values.append(-999)
                    continue

                rec1 = recs[i + 1]
                rec2 = recs[i + 1 + self.nday_]

                if rec1['moneyOutflow'] < 1000000 or rec1[
                        'moneyInflow'] < 1000000:
                    continue
                if (rec1['moneyInflow'] / rec1['moneyOutflow']) > 100:
                    continue

                l = (rec2[self.sellPrice_] -
                     rec1[self.buyPrice_]) / rec1[self.buyPrice_]
                indices.append((rec['secID'], rec['tradeDate']))
                values.append(l)
                pass

            return pd.DataFrame(
                values,
                index=pd.MultiIndex.from_tuples(indices,
                                                names=['secID', 'tradeDate']),
                columns=[self.getName()],
            )

        dfs = dataio.forEachSecID(df, process)
        return pd.concat(dfs)
    def extract(self):
        priceKeys = {self.buyPrice_,self.sellPrice_};
        dfMkt = dataio.getdf('MktEqudAdjAfGet')[list(priceKeys) + ['isOpen']];
        dfFlow = dataio.getdf('MktEquFlowGet');
        dfFlow = dfFlow[['moneyInflow','moneyOutflow']];
        dfSt = dataio.getdf('SecSTGet')[['STflg']];
        df = dfMkt.join(dfFlow);
        df = df.join(dfSt,how='left');
        df = df[pd.isnull(df[['STflg']]).values];
        df = df[(df[['isOpen']]==1).values];
        df = df[(df[[self.buyPrice_]]>0).values];
        df = df[(df[[self.sellPrice_]]>0).values];

        def process(sec,recs):
            indices = [];
            values = [];

            n = len(recs);
            
            for i,rec in enumerate(recs):
                #stock must be listed for 100 days
                if i<=300:
                    continue;
                if i>=(n-self.nday_-1):
                    indices.append((rec['secID'],rec['tradeDate']));
                    values.append(-999);
                    continue;

                rec1 = recs[i+1];
                rec2 = recs[i+1+self.nday_];

                if rec1['moneyOutflow']<1000000 or rec1['moneyInflow']<1000000:
                    continue;
                if (rec1['moneyInflow']/rec1['moneyOutflow'])>100:
                    continue;

                l = (rec2[self.sellPrice_]-rec1[self.buyPrice_])/rec1[self.buyPrice_];
                indices.append((rec['secID'],rec['tradeDate']));
                values.append(l);
                pass;

            return pd.DataFrame(values,
                                index=pd.MultiIndex.from_tuples(indices,
                                                                names=['secID','tradeDate']),
                                columns=[self.getName()],
                                );
        
        dfs = dataio.forEachSecID(df,process);
        return pd.concat(dfs);
Beispiel #6
0
    def __init__(self, initMoney):
        global globalMarket, globalTradingDays

        self.buyFee_ = 0.001
        self.sellFee_ = 0.002
        self.cache_ = initMoney

        if globalMarket is None:
            records = dataio.getdf('MktEqudAdjAfGet').to_records()
            self.market_ = dict()

            self.tradingDays_ = set()
            for i, rec in enumerate(records):
                if i % 10000 == 0:
                    #print(i);
                    pass
                self.market_[(rec['secID'], rec['tradeDate'])] = rec
                self.tradingDays_.add(rec['tradeDate'])
                pass
            globalMarket = self.market_
            globalTradingDays = self.tradingDays_
            pass
        else:
            self.market_ = globalMarket
            self.tradingDays_ = globalTradingDays

        self.position_ = []
        pass
Beispiel #7
0
    def __init__(self,initMoney):
        global globalMarket,globalTradingDays;
        
        self.buyFee_ = 0.001;
        self.sellFee_ = 0.002;
        self.cache_ = initMoney;

        if globalMarket is None:
            records = dataio.getdf('MktEqudAdjAfGet').to_records();
            self.market_ = dict();

            self.tradingDays_ = set();
            for i,rec in enumerate(records):
                if i%10000==0:
                    #print(i);
                    pass;
                self.market_[(rec['secID'],rec['tradeDate'])] = rec;
                self.tradingDays_.add(rec['tradeDate']);
                pass;
            globalMarket = self.market_;
            globalTradingDays = self.tradingDays_;
            pass;
        else:
            self.market_ = globalMarket;
            self.tradingDays_ = globalTradingDays;

        self.position_ = [];
        pass;
    def extract(self):
        def process(secID, recs):
            fea = np.zeros((len(recs), len(self.ndays_) * 2), dtype=np.float32)

            close = [rec['closePrice'] for rec in recs]
            high = [rec['highestPrice'] for rec in recs]
            low = [rec['lowestPrice'] for rec in recs]
            low = [99999 if i == 0 else i for i in low]

            ndayHigh = []
            ndayLow = []

            for nday in self.ndays_:
                ndayHigh.append(utils_common.slideWindowMaximum(high, nday))
                ndayLow.append(utils_common.slideWindowMinimum(low, nday))
                pass

            indices = []

            for i, rec in enumerate(recs):
                secID = rec['secID']
                tradeDate = rec['tradeDate']

                indices.append((secID, tradeDate))

                for ii, nday in enumerate(self.ndays_):
                    maxp = ndayHigh[ii][i]
                    minp = ndayLow[ii][i]

                    fea[i,
                        ii * 2] = (close[i] - maxp) / maxp if maxp != 0 else 0
                    fea[i, ii * 2 +
                        1] = (close[i] - minp) / minp if minp != 0 else 0
                    pass

                pass

            columnNames = []
            namePrefix = self.__class__.__name__
            for nday in self.ndays_:
                columnNames.append(namePrefix + '_max_' + str(nday))
                columnNames.append(namePrefix + '_min_' + str(nday))
                pass

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(
                                  indices, names=['secID', 'tradeDate']),
                              columns=columnNames)
            return df

        df = dataio.getdf('MktEqudAdjAfGet')[[
            'highestPrice', 'lowestPrice', 'closePrice', 'isOpen'
        ]]
        df = df[(df[['isOpen']] == 1).values]

        secResults = dataio.forEachSecID(df, process)

        return pd.concat(secResults)
        pass
    def extract(self):
        df = dataio.getdf('MktEqudAdjAfGet')[[
            'turnoverVol', 'turnoverValue', 'closePrice'
        ]]
        dfFea = dataio.forEachSecIDEx(df, process, self.getNames())

        return dfFea
        pass
Beispiel #10
0
def testIndex():
    df300 = dataio.getdf('FundETFConsGet')
    df300 = df300[df300.index.get_level_values('secID') == '510300.XSHG']
    df300.reset_index(inplace=True)
    df300.drop('secID', axis=1, inplace=True)
    df300.rename(columns={'consID': 'secID'}, inplace=True)
    df300.set_index(['secID', 'tradeDate'], inplace=True)
    print(df300)
    pass
Beispiel #11
0
    def extract(self):
        def process(secID, recs):
            values = []

            for key in self.keynames_:
                values.append([rec[key] for rec in recs])
                pass

            grads = []
            for nday in self.ndays_:
                for i in range(len(self.keynames_)):
                    win = utils_common.slideWindowAverage(values[i], nday)
                    grad = utils_common.computeGradRate(win)
                    grads.append(grad)
                    pass
                pass

            fea = np.zeros((len(recs), len(self.ndays_) * len(self.keynames_)),
                           dtype=np.float32)
            indices = []

            fea = np.array(grads).T
            for i, rec in enumerate(recs):
                secID = rec['secID']
                tradeDate = rec['tradeDate']

                indices.append((secID, tradeDate))

                #for j in range(len(self.ndays_)):
                #    for k in range(len(self.keynames)):
                #        l = j*len(self.keynames_) + k;
                #        fea[i,l] = grads[l][i];
                #        pass;
                #    pass;
                #pass;

            columnNames = []
            namePrefix = self.__class__.__name__
            for j in range(len(self.ndays_)):
                for k in range(len(self.keynames_)):
                    columnNames.append(namePrefix+'_'+self.keynames_[k] + '_' \
                                       + str(self.ndays_[j]))
                    pass
                pass

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(
                                  indices, names=['secID', 'tradeDate']),
                              columns=columnNames)
            return df

        df = dataio.getdf('MktEquFlowGet')[self.keynames_]

        secResults = dataio.forEachSecID(df, process)

        return pd.concat(secResults)
        pass
Beispiel #12
0
def testIndex():
    df300 = dataio.getdf('FundETFConsGet');
    df300 = df300[df300.index.get_level_values('secID')=='510300.XSHG'];
    df300.reset_index(inplace=True);
    df300.drop('secID', axis=1, inplace=True);
    df300.rename(columns = {'consID':'secID'}, inplace=True);
    df300.set_index(['secID','tradeDate'],inplace=True);
    print(df300);
    pass;
Beispiel #13
0
    def extract(self):
        def process(secID,recs):
            values = [];

            for key in self.keynames_:
                values.append([rec[key] for rec in recs]);
                pass;

            grads = [];
            for nday in self.ndays_:
                for i in range(len(self.keynames_)):
                    win = utils_common.slideWindowAverage(values[i],nday);
                    grad = utils_common.computeGradRate(win);
                    grads.append(grad);
                    pass;
                pass;
                        
            fea = np.zeros((len(recs),len(self.ndays_)*len(self.keynames_)),dtype=np.float32);
            indices = [];

            fea = np.array(grads).T;
            for i,rec in enumerate(recs):                
                secID = rec['secID'];
                tradeDate = rec['tradeDate'];

                indices.append((secID,tradeDate));
                
                #for j in range(len(self.ndays_)):
                #    for k in range(len(self.keynames)):
                #        l = j*len(self.keynames_) + k;
                #        fea[i,l] = grads[l][i];
                #        pass;
                #    pass;
                #pass;

            columnNames = [];
            namePrefix = self.__class__.__name__;
            for j in range(len(self.ndays_)):
                for k in range(len(self.keynames_)):
                    columnNames.append(namePrefix+'_'+self.keynames_[k] + '_' \
                                       + str(self.ndays_[j]));
                    pass;
                pass;

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=columnNames);
            return df;

        df = dataio.getdf('MktEquFlowGet')[self.keynames_];
        
        secResults = dataio.forEachSecID(df,process);

        return pd.concat(secResults);
        pass;
    def extract(self):
        def process(secID,recs):
            fea = np.zeros((len(recs),len(self.ndays_)*2),dtype=np.float32);

            close = [rec['closePrice'] for rec in recs];
            high = [rec['highestPrice'] for rec in recs];
            low = [rec['lowestPrice'] for rec in recs];
            low = [99999 if i==0 else i for i in low];

            ndayHigh = [];
            ndayLow = [];

            for nday in self.ndays_:
                ndayHigh.append(utils_common.slideWindowMaximum(high,nday));
                ndayLow.append(utils_common.slideWindowMinimum(low,nday));
                pass;
            
            indices = [];
            
            for i,rec in enumerate(recs):    
                secID = rec['secID'];
                tradeDate = rec['tradeDate'];

                indices.append((secID,tradeDate));

                for ii,nday in enumerate(self.ndays_):
                    maxp = ndayHigh[ii][i];
                    minp = ndayLow[ii][i];
                                        
                    fea[i,ii*2] = (close[i]-maxp)/maxp if maxp!=0 else 0;
                    fea[i,ii*2+1] = (close[i]-minp)/minp if minp!=0 else 0;
                    pass;

                pass;

            columnNames = [];
            namePrefix = self.__class__.__name__;
            for nday in self.ndays_:
                columnNames.append(namePrefix+'_max_'+str(nday));
                columnNames.append(namePrefix+'_min_'+str(nday));
                pass;

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=columnNames);
            return df;

        df = dataio.getdf('MktEqudAdjAfGet')[['highestPrice','lowestPrice','closePrice','isOpen']];
        df = df[(df[['isOpen']]==1).values];
        
        secResults = dataio.forEachSecID(df,process);

        return pd.concat(secResults);
        pass;
Beispiel #15
0
    def extract(self):
        
        def process(secID,recs):
            val = [rec['closePrice'] for rec in recs];
            windows = [];
            for nday in self.ndays_:
                windows.append(utils_common.slideWindowAverage(val,nday));
                pass;

            windows = np.array(windows).T;
            fea = np.zeros(windows.shape);

            for i in range(fea.shape[0]):
                if i==0:
                    fea[i,:] = 1;
                    pass;
                else:
                    for j in range(fea.shape[1]):
                        if windows[i-1,j]==0:
                            fea[i,j] = 0;
                        else:
                            fea[i,j] = val[i]/windows[i-1,j]
                            pass;
                        pass;
                    pass;
                pass;
            
            indices = [];
            
            for i,rec in enumerate(recs):
                secID = rec['secID'];
                tradeDate = rec['tradeDate'];
                indices.append((secID,tradeDate));
                pass;
                
            columnNames = [];
            namePrefix = self.__class__.__name__;
            for nday in self.ndays_:
                columnNames.append(namePrefix+'_'+str(nday));
                pass;

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=columnNames);
            return df;

        df = dataio.getdf('MktEqudAdjAfGet')[['closePrice']];
        
        secResults = dataio.forEachSecID(df,process);

        return pd.concat(secResults);
        pass;
    def extract(self):
        
        def process(secID,recs):
            val = [rec['turnoverValue'] for rec in recs];
            windows = [];
            for nday in self.ndays_:
                windows.append(utils_common.slideWindowAverage(val,nday));
                pass;

            windows = np.array(windows).T;
            fea = np.zeros(windows.shape);

            for i in range(fea.shape[0]):
                if i==0:
                    fea[i,:] = 1;
                    pass;
                else:
                    for j in range(fea.shape[1]):
                        if windows[i-1,j]==0:
                            fea[i,j] = 0;
                        else:
                            fea[i,j] = val[i]/windows[i-1,j]
                            pass;
                        pass;
                    pass;
                pass;
            
            indices = [];
            
            for i,rec in enumerate(recs):
                secID = rec['secID'];
                tradeDate = rec['tradeDate'];
                indices.append((secID,tradeDate));
                pass;
                
            columnNames = [];
            namePrefix = self.__class__.__name__;
            for nday in self.ndays_:
                columnNames.append(namePrefix+'_'+str(nday));
                pass;

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=columnNames);
            return df;

        df = dataio.getdf('MktEqudAdjAfGet')[['turnoverValue']];
        
        secResults = dataio.forEachSecID(df,process);

        return pd.concat(secResults);
        pass;
Beispiel #17
0
    def extract(self):
        def process(secID,recs):
            windows = [];

            for nday in self.ndays_:
                win = np.zeros((len(self.keynames_),len(recs)));
                for i,key in enumerate(self.keynames_):
                    origin = [abs(rec[key]) for rec in recs];
                    win[i,:]= utils_common.slideWindowAverage(origin,nday);
                    pass;

                win = sklearn.preprocessing.normalize(win,norm='l1',axis=0);
                
                windows.append(win);
                pass;
                        
            fea = np.zeros((len(recs),len(self.ndays_)*len(self.keynames_)),dtype=np.float32);
            indices = [];
            
            for i,rec in enumerate(recs):                
                secID = rec['secID'];
                tradeDate = rec['tradeDate'];

                indices.append((secID,tradeDate));
                
                for ii in range(len(self.ndays_)):
                    for iii,key in enumerate(self.keynames_):                        
                        fea[i,ii*len(self.keynames_)+iii] = windows[ii][iii,i];
                        pass;
                    pass;
                pass;

            columnNames = [];
            namePrefix = self.__class__.__name__;
            for ii in range(len(self.ndays_)):
                for iii,key in enumerate(self.keynames_):
                    columnNames.append(namePrefix+'_'+key+'_'+str(self.ndays_[ii]));
                pass;

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=columnNames);
            return df;

        df = dataio.getdf('MktEquFlowOrderGet')[self.keynames_];
        
        secResults = dataio.forEachSecID(df,process);

        return pd.concat(secResults);
        pass;
Beispiel #18
0
    def extract(self):
        def process(secID,recs):
            inflowOrigin = [rec['moneyInflow'] for rec in recs];
            outflowOrigin = [rec['moneyOutflow'] for rec in recs];
            
            inflows = [];
            outflows = [];

            for nday in self.ndays_:
                inflows.append(utils_common.slideWindowAverage(inflowOrigin,nday));
                outflows.append(utils_common.slideWindowAverage(outflowOrigin,nday));
                pass;
            
            fea = np.zeros((len(recs),len(self.ndays_)),dtype=np.float32);
            indices = [];
            
            for i,rec in enumerate(recs):                
                secID = rec['secID'];
                tradeDate = rec['tradeDate'];

                indices.append((secID,tradeDate));
                
                for ii in range(len(self.ndays_)):
                    inf = inflows[ii][i];
                    outf = outflows[ii][i];

                    
                    fea[i,ii] = inf/(inf+outf) if (inf+outf)>0 else 0;
                    pass;
                pass;

            columnNames = [];
            namePrefix = self.__class__.__name__;
            for ii in range(len(self.ndays_)):
                columnNames.append(namePrefix+'_'+str(self.ndays_[ii]));
                pass;

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=columnNames);
            return df;

        df = dataio.getdf('MktEquFlowGet')[['moneyInflow','moneyOutflow']];
        
        secResults = dataio.forEachSecID(df,process);

        return pd.concat(secResults);
        pass;
    def extract(self):
        df = dataio.getdf('MktIdxdGet')[['closeIndex']];

        df = df[df.index.get_level_values('indexID')=='000001.ZICN'];

        values = np.squeeze(df[['closeIndex']].values);
        fea = utils_common.computeGradRateIntervals(values,ndays_);
        fea = np.array(fea).T;

        columnNames = [];
        namePrefix = self.getName();
        for nday in ndays_:
            columnNames.append(namePrefix+'_'+str(nday));
            pass;

        df = pd.DataFrame(fea,
                          index=pd.Index(df.index.get_level_values('tradeDate'),
                                         name='tradeDate'),
                          columns=columnNames);

        dfMkt = dataio.getdf('MktEqudGet')[['closePrice']];

        df = dfMkt.join(df)[columnNames];
        return df;
    def extract(self):

        
        def process(secID,recs):
            val = [rec['turnoverValue'] for rec in recs];
            grad = utils_common.computeGrad(val);
            fea = np.zeros((len(recs),self.ndays_),dtype=np.float32);
            indices = [];
            
            for i,rec in enumerate(recs):
                
                secID = rec['secID'];
                tradeDate = rec['tradeDate'];

                indices.append((secID,tradeDate));
                
                for ii in range(self.ndays_):
                    pos = i-ii;
                    if pos<=0:
                        v = None;
                    elif val[pos-1]==0:
                        v = None;
                    else:
                        v = grad[pos]/val[pos-1];
                        pass;
                    fea[i,ii] = v;
                    pass;

                pass;

            columnNames = [];
            namePrefix = self.__class__.__name__;
            for ii in range(self.ndays_):
                columnNames.append(namePrefix+'_'+str(ii));
                pass;

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=columnNames);
            return df;

        df = dataio.getdf('MktEqudAdjAfGet')[['turnoverValue']];
        
        secResults = dataio.forEachSecID(df,process);

        return pd.concat(secResults);
        pass;
    def extract(self):
        df = dataio.getdf('MktFunddAdjGet')[['closePrice']]
        df = df[(df[['closePrice']] > 0).values]

        df = df[df.index.get_level_values('secID') == '510300.XSHG']

        def process(sec, recs):
            indices = []
            values = []

            n = len(recs)
            val = [rec['closePrice'] for rec in recs]
            fea = np.zeros((len(recs), self.ndays_), dtype=np.float32)

            for i, rec in enumerate(recs):
                #stock must be listed for 100 days

                secID = rec['secID']
                tradeDate = rec['tradeDate']

                for ii in range(self.ndays_):
                    pos = i - ii
                    if pos <= 0:
                        v = None
                    else:
                        v = (val[pos] - val[pos - 1]) / val[pos - 1]
                        pass
                    fea[i, ii] = v
                    pass

                indices.append((secID, tradeDate))
                pass

            columnNames = []
            namePrefix = self.getName()
            for ii in range(self.ndays_):
                columnNames.append(namePrefix + '_' + str(ii))
                pass

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(
                                  indices, names=['secID', 'tradeDate']),
                              columns=columnNames)
            return df

        dfs = dataio.forEachSecID(df, process)
        return pd.concat(dfs)
Beispiel #22
0
    def extract(self):
        def process(secID,recs):
            val = [rec['closePrice'] for rec in recs];
            fea = np.zeros((len(recs),len(self.ndays_)),dtype=np.float32);
            
            indices = [];
            
            for i,rec in enumerate(recs):    
                secID = rec['secID'];
                tradeDate = rec['tradeDate'];

                indices.append((secID,tradeDate));
                
                p = rec['closePrice'];
                for ii,nday in enumerate(self.ndays_):
                    pos = i-nday;
                    if pos<=0:
                        p1 = recs[0]['closePrice'];
                    else:
                        p1 = recs[pos]['closePrice'];
                        pass;
                    
                    fea[i,ii] = (p-p1)/p1
                    pass;

                pass;

            columnNames = [];
            namePrefix = self.__class__.__name__;
            for nday in self.ndays_:
                columnNames.append(namePrefix+'_'+str(nday));
                pass;

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=columnNames);
            return df;

        df = dataio.getdf('MktEqudAdjAfGet')[['closePrice','isOpen']];
        df = df[(df[['isOpen']]==1).values]
        
        secResults = dataio.forEachSecID(df,process);

        return pd.concat(secResults);
        pass;
    def extract(self):
        def process(secID, recs):
            val = [rec['closePrice'] for rec in recs]
            grad = utils_common.computeGrad(val)
            fea = np.zeros((len(recs), self.ndays_), dtype=np.float32)
            indices = []

            for i, rec in enumerate(recs):

                secID = rec['secID']
                tradeDate = rec['tradeDate']

                indices.append((secID, tradeDate))

                for ii in range(self.ndays_):
                    pos = i - ii
                    if pos <= 0:
                        v = None
                    else:
                        v = grad[pos] / val[pos - 1]
                        pass
                    fea[i, ii] = v
                    pass

                pass

            columnNames = []
            namePrefix = self.__class__.__name__
            for ii in range(self.ndays_):
                columnNames.append(namePrefix + '_' + str(ii))
                pass

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(
                                  indices, names=['secID', 'tradeDate']),
                              columns=columnNames)
            return df

        df = dataio.getdf('MktEqudAdjAfGet')[['closePrice']]

        secResults = dataio.forEachSecID(df, process)

        return pd.concat(secResults)
        pass
    def extract(self):
        def lastTurnoverMedian(secID, recs):
            val = [rec['turnoverValue'] for rec in recs]
            val = [v if v>0 else 999999999999.0 for v in val]
            
            fea = np.zeros((len(recs), 1), dtype=np.float32)
            indices = []
            for i, rec in enumerate(recs):
                secID = rec['secID']
                tradeDate = rec['tradeDate']
                indices.append((secID, tradeDate))
                idxStart = max(0, i - self.ndays_)
                if idxStart == i:
                    fea[i, 0] = val[i]
                else:
                    fea[i, 0] = np.min(val[idxStart:i])
                    pass
                pass

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(indices,
                                                              names=['secID','tradeDate']),
                              columns=['turnoverMedian']);
            return df;

        df = dataio.getdf('MktEqudAdjAfGet')[['turnoverValue']]

        df = df[df['turnoverValue'] > 0]
        
        medians = dataio.forEachSecID(df, lastTurnoverMedian)
        dfMedian = pd.concat(medians)

        def ranker(dfDt):
            rank = dfDt.shape[0]-(np.argsort(np.squeeze(dfDt.values))).argsort()-1;

            dfRet = dfDt.copy();
            dfRet['FeaLastTurnoverRank'] = rank;
            dfRet.drop('turnoverMedian', axis=1,inplace=True);
            return dfRet;
        
        df = dfMedian.groupby('tradeDate',group_keys=False).apply(ranker)
        return df;
Beispiel #25
0
    def extract(self):
        df = dataio.getdf('MktFunddAdjGet')[['openPrice']]
        df = df[(df[['openPrice']] > 0).values]

        df = df[df.index.get_level_values('secID') == '510300.XSHG']

        def process(sec, recs):
            indices = []
            values = []

            n = len(recs)

            for i, rec in enumerate(recs):
                #stock must be listed for 100 days
                indices.append((rec['secID'], rec['tradeDate']))
                if (i + 2) >= n:
                    values.append(-999)
                    continue

                rec1 = recs[i + 1]
                rec2 = recs[i + 2]

                l = (rec2['openPrice'] - rec1['openPrice']) / rec1['openPrice']

                values.append(l)
                pass

            return pd.DataFrame(
                values,
                index=pd.MultiIndex.from_tuples(indices,
                                                names=['secID', 'tradeDate']),
                columns=[self.getName()],
            )

        dfs = dataio.forEachSecID(df, process)
        return pd.concat(dfs)
Beispiel #26
0
    def extract(self):
        df = dataio.getdf('MktFunddAdjGet')[['openPrice']];
        df = df[(df[['openPrice']]>0).values];

        df = df[df.index.get_level_values('secID')=='510300.XSHG'];

        def process(sec,recs):
            indices = [];
            values = [];

            n = len(recs);
            
            for i,rec in enumerate(recs):
                #stock must be listed for 100 days
                indices.append((rec['secID'],rec['tradeDate']));
                if (i+2)>=n:
                    values.append(-999);
                    continue;
                
                rec1 = recs[i+1];
                rec2 = recs[i+2];

                l = (rec2['openPrice']-rec1['openPrice'])/rec1['openPrice'];


                values.append(l);
                pass;

            return pd.DataFrame(values,
                                index=pd.MultiIndex.from_tuples(indices,
                                                                names=['secID','tradeDate']),
                                columns=[self.getName()],
                                );

        dfs = dataio.forEachSecID(df,process);
        return pd.concat(dfs);
    def extract(self):
        df = dataio.getdf('MktEqudAdjAfGet')[['isOpen']];
        dfFea = dataio.forEachSecIDEx(df,process,self.getNames());

        return dfFea;
        pass;
    def extract(self):
        def process(secID, recs):
            values = dict()
            values['OpenClose'] = [(rec['closePrice']-rec['openPrice'])/rec['openPrice'] \
                                   if rec['openPrice']>0 else None for rec in recs]
            values['Open'] = [(rec['openPrice']-rec['preClosePrice'])/rec['preClosePrice'] \
                              if rec['preClosePrice']>0 else None for rec in recs]
            values['High'] = [(rec['highestPrice']-rec['openPrice'])/rec['openPrice'] \
                              if rec['openPrice']>0 else None for rec in recs]
            values['Low'] = [(rec['lowestPrice']-rec['openPrice'])/rec['openPrice'] \
                             if rec['openPrice']>0 else None for rec in recs]
            values = values.items()

            fea = np.zeros((len(recs), self.ndays_ * len(values)),
                           dtype=np.float32)
            indices = []

            for i, rec in enumerate(recs):
                secID = rec['secID']
                tradeDate = rec['tradeDate']

                indices.append((secID, tradeDate))

                for ii in range(self.ndays_):
                    pos = i - ii
                    for iii in range(len(values)):
                        colIdx = ii * len(values) + iii
                        if pos < 0:
                            v = None
                        else:
                            v = values[iii][1][pos]
                            pass
                        fea[i, colIdx] = v
                        pass
                    pass
                pass

            columnNames = []
            namePrefix = self.__class__.__name__

            for ii in range(self.ndays_):
                for iii in range(len(values)):
                    columnNames.append(namePrefix + '_' + values[iii][0] +
                                       '_' + str(ii))
                    pass
                pass

            df = pd.DataFrame(fea,
                              index=pd.MultiIndex.from_tuples(
                                  indices, names=['secID', 'tradeDate']),
                              columns=columnNames)
            return df

        df = dataio.getdf('MktEqudAdjAfGet')[[
            'openPrice', 'closePrice', 'isOpen', 'preClosePrice',
            'highestPrice', 'lowestPrice'
        ]]
        df = df[df['isOpen'] == 1]

        secResults = dataio.forEachSecID(df, process)

        return pd.concat(secResults)
        pass
    def extract(self):
        df = dataio.getdf('MktEqudAdjAfGet')[['turnoverVol', 'turnoverValue', 'closePrice']];        
        dfFea = dataio.forEachSecIDEx(df,process,self.getNames());

        return dfFea;
        pass;