예제 #1
0
def main(stocks=None, args=[1, 2, 3, 4, 5]):
    if stocks == None:
        stocks = tools.get_stocks()
    data = {
        stock:
        pd.read_csv('../../DataBase/StockDailyData/Stock/%s.csv' % stock,
                    index_col=[0],
                    parse_dates=[0])
        for stock in stocks
    }

    OPEN = DataFrame({stock: data[stock].loc[:, 'open'] for stock in stocks})
    HIGH = DataFrame({stock: data[stock].loc[:, 'high'] for stock in stocks})
    LOW = DataFrame({stock: data[stock].loc[:, 'low'] for stock in stocks})
    CLOSE = DataFrame({stock: data[stock].loc[:, 'close'] for stock in stocks})
    ADJ = DataFrame(
        {stock: data[stock].loc[:, 'adj_factor']
         for stock in stocks})
    st = DataFrame({stock: data[stock].loc[:, 'st'] for stock in stocks})
    AMOUNT = DataFrame(
        {stock: data[stock].loc[:, 'amount']
         for stock in stocks})

    st = st.shift()
    no_liquid = (AMOUNT.lt(AMOUNT.rolling(5).mean().quantile(0.05, axis=1),
                           axis=0)).shift()

    tingpai = (CLOSE == np.nan) | (AMOUNT == 0)

    CLOSE = (np.log(CLOSE * ADJ)).fillna(method='ffill')
    OPEN = (np.log(OPEN * ADJ)).fillna(value=CLOSE)
    HIGH = (np.log(HIGH * ADJ)).fillna(value=CLOSE)
    LOW = (np.log(LOW * ADJ)).fillna(value=CLOSE)

    yiziban = (HIGH == LOW) & (HIGH > CLOSE.shift())

    y1 = OPEN.shift(-2) - OPEN.shift(-1)
    y2 = OPEN.shift(-3) - OPEN.shift(-2)
    y3 = OPEN.shift(-4) - OPEN.shift(-3)
    y4 = OPEN.shift(-5) - OPEN.shift(-4)
    y5 = OPEN.shift(-6) - OPEN.shift(-5)

    def list_n_na(s, n):
        for i in range(n):
            s.loc[s.first_valid_index()] = np.nan
        return s

    y1 = y1.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand')
    y2 = y2.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand')
    y3 = y3.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand')
    y4 = y4.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand')
    y5 = y5.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand')
    y1[st | no_liquid | yiziban | tingpai] = np.nan
    y2[st | no_liquid | yiziban | tingpai] = np.nan
    y3[st | no_liquid | yiziban | tingpai] = np.nan
    y4[st | no_liquid | yiziban | tingpai] = np.nan
    y5[st | no_liquid | yiziban | tingpai] = np.nan

    y1.to_csv('../Data/y1.csv')
    y2.to_csv('../Data/y2.csv')
    y3.to_csv('../Data/y3.csv')
    y4.to_csv('../Data/y4.csv')
    y5.to_csv('../Data/y5.csv')
예제 #2
0
        })
        ADJ = DataFrame({
            stock: pd.read_csv('%s/StockDailyData/Stock/%s.csv' %
                               (gc.DATABASE_PATH, stock),
                               index_col=[0],
                               parse_dates=[0]).loc[:, 'adj_factor']
            for stock in self.stocks
        })
        CLOSE = CLOSE * ADJ
        r = np.log(CLOSE).diff()
        n = 20
        a = r.rolling(n).mean() / r.rolling(n).std()
        a = a.loc[a.index >= self.start_date, :]
        a = a.loc[a.index <= self.end_date, :]
        self.factor = a


#%%
if __name__ == '__main__':
    #获取股票
    stocks = tools.get_stocks()

    a = Sharpe('Sharpe',
               stocks=stocks,
               start_date='20200101',
               end_date='20201010')

    a.generate_factor()

    a.factor_analysis()
예제 #3
0
def main(start_date, end_date):
    #获取股票
    stocks = tools.get_stocks()
    #获取行业
    industrys = tools.get_industrys(level='L1', stocks=stocks)

    industrys = {k: industrys[k] for k in industrys.keys()}
    stocks = []
    for v in industrys.values():
        stocks.extend(v)
    stocks.sort()

    CLOSE = DataFrame({
        stock: pd.read_csv('%s/StockDailyData/Stock/%s.csv' %
                           (gc.DATABASE_PATH, stock),
                           index_col=[0],
                           parse_dates=[0]).loc[:, 'close']
        for stock in stocks
    })
    dates = CLOSE.index
    for ind in industrys.keys():
        if len(industrys[ind]) > 0:
            df = DataFrame(0, index=dates, columns=stocks)
            df.loc[:, industrys[ind]] = 1
            if os.path.exists('%s/Data/%s.csv' % (gc.FACTORBASE_PATH, ind)):
                df_old = pd.read_csv('%s/Data/%s.csv' %
                                     (gc.FACTORBASE_PATH, ind),
                                     index_col=[0],
                                     parse_dates=[0])
                df = pd.concat([df_old, df.loc[df.index > df_old.index[-1]]],
                               axis=0)
                df.sort_index(0, inplace=True)
            df.sort_index(1, inplace=True)
            df.to_csv('%s/Data/%s.csv' % (gc.FACTORBASE_PATH, ind))
    #遍历取pickle
    files = os.listdir('./')
    files = list(filter(lambda x: len(x) > 4, files))
    factors_1 = list(filter(lambda x: x[-5:] == '_1.py', files))
    factors_2 = list(filter(lambda x: x[-5:] == '_2.py', files))
    #生成单因子
    for p in factors_1:
        if os.path.exists('%s/Base/%s.csv' %
                          (gc.FACTORBASE_PATH, p.split('.')[0][:-2])):
            start_date = datetime.datetime.today().strftime('%Y%m%d')
            end_date = datetime.datetime.today().strftime('%Y%m%d')
        else:
            start_date = '20200101'
            end_date = datetime.datetime.today().strftime('%Y%m%d')
        exec('from %s import %s' % (p.split('.')[0], p.split('.')[0][:-2]))
        factor = eval('%s("%s", stocks, start_date, end_date)' %
                      (p.split('.')[0][:-2], p.split('.')[0][:-2]))
        factor.update_factor()
    #生成合成因子
    for p in factors_2:
        if os.path.exists('%s/Base/%s.csv' %
                          (gc.FACTORBASE_PATH, p.split('.')[0][:-2])):
            start_date = datetime.datetime.today().strftime('%Y%m%d')
            end_date = datetime.datetime.today().strftime('%Y%m%d')
        else:
            start_date = '20200101'
            end_date = datetime.datetime.today().strftime('%Y%m%d')
        exec('from %s import %s' % (p.split('.')[0], p.split('.')[0][:-2]))
        factor = eval('%s("%s", stocks, start_date, end_date)' %
                      (p.split('.')[0][:-2], p.split('.')[0][:-2]))
        factor.update_factor()