def main(stocks=None, args=[1, 2, 3, 4, 5]): if stocks == None: stocks = tools.get_stocks() data = { stock: pd.read_csv('../../DataBase/StockDailyData/Stock/%s.csv' % stock, index_col=[0], parse_dates=[0]) for stock in stocks } OPEN = DataFrame({stock: data[stock].loc[:, 'open'] for stock in stocks}) HIGH = DataFrame({stock: data[stock].loc[:, 'high'] for stock in stocks}) LOW = DataFrame({stock: data[stock].loc[:, 'low'] for stock in stocks}) CLOSE = DataFrame({stock: data[stock].loc[:, 'close'] for stock in stocks}) ADJ = DataFrame( {stock: data[stock].loc[:, 'adj_factor'] for stock in stocks}) st = DataFrame({stock: data[stock].loc[:, 'st'] for stock in stocks}) AMOUNT = DataFrame( {stock: data[stock].loc[:, 'amount'] for stock in stocks}) st = st.shift() no_liquid = (AMOUNT.lt(AMOUNT.rolling(5).mean().quantile(0.05, axis=1), axis=0)).shift() tingpai = (CLOSE == np.nan) | (AMOUNT == 0) CLOSE = (np.log(CLOSE * ADJ)).fillna(method='ffill') OPEN = (np.log(OPEN * ADJ)).fillna(value=CLOSE) HIGH = (np.log(HIGH * ADJ)).fillna(value=CLOSE) LOW = (np.log(LOW * ADJ)).fillna(value=CLOSE) yiziban = (HIGH == LOW) & (HIGH > CLOSE.shift()) y1 = OPEN.shift(-2) - OPEN.shift(-1) y2 = OPEN.shift(-3) - OPEN.shift(-2) y3 = OPEN.shift(-4) - OPEN.shift(-3) y4 = OPEN.shift(-5) - OPEN.shift(-4) y5 = OPEN.shift(-6) - OPEN.shift(-5) def list_n_na(s, n): for i in range(n): s.loc[s.first_valid_index()] = np.nan return s y1 = y1.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand') y2 = y2.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand') y3 = y3.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand') y4 = y4.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand') y5 = y5.apply(func=list_n_na, args=(60, ), axis=0, result_type='expand') y1[st | no_liquid | yiziban | tingpai] = np.nan y2[st | no_liquid | yiziban | tingpai] = np.nan y3[st | no_liquid | yiziban | tingpai] = np.nan y4[st | no_liquid | yiziban | tingpai] = np.nan y5[st | no_liquid | yiziban | tingpai] = np.nan y1.to_csv('../Data/y1.csv') y2.to_csv('../Data/y2.csv') y3.to_csv('../Data/y3.csv') y4.to_csv('../Data/y4.csv') y5.to_csv('../Data/y5.csv')
}) ADJ = DataFrame({ stock: pd.read_csv('%s/StockDailyData/Stock/%s.csv' % (gc.DATABASE_PATH, stock), index_col=[0], parse_dates=[0]).loc[:, 'adj_factor'] for stock in self.stocks }) CLOSE = CLOSE * ADJ r = np.log(CLOSE).diff() n = 20 a = r.rolling(n).mean() / r.rolling(n).std() a = a.loc[a.index >= self.start_date, :] a = a.loc[a.index <= self.end_date, :] self.factor = a #%% if __name__ == '__main__': #获取股票 stocks = tools.get_stocks() a = Sharpe('Sharpe', stocks=stocks, start_date='20200101', end_date='20201010') a.generate_factor() a.factor_analysis()
def main(start_date, end_date): #获取股票 stocks = tools.get_stocks() #获取行业 industrys = tools.get_industrys(level='L1', stocks=stocks) industrys = {k: industrys[k] for k in industrys.keys()} stocks = [] for v in industrys.values(): stocks.extend(v) stocks.sort() CLOSE = DataFrame({ stock: pd.read_csv('%s/StockDailyData/Stock/%s.csv' % (gc.DATABASE_PATH, stock), index_col=[0], parse_dates=[0]).loc[:, 'close'] for stock in stocks }) dates = CLOSE.index for ind in industrys.keys(): if len(industrys[ind]) > 0: df = DataFrame(0, index=dates, columns=stocks) df.loc[:, industrys[ind]] = 1 if os.path.exists('%s/Data/%s.csv' % (gc.FACTORBASE_PATH, ind)): df_old = pd.read_csv('%s/Data/%s.csv' % (gc.FACTORBASE_PATH, ind), index_col=[0], parse_dates=[0]) df = pd.concat([df_old, df.loc[df.index > df_old.index[-1]]], axis=0) df.sort_index(0, inplace=True) df.sort_index(1, inplace=True) df.to_csv('%s/Data/%s.csv' % (gc.FACTORBASE_PATH, ind)) #遍历取pickle files = os.listdir('./') files = list(filter(lambda x: len(x) > 4, files)) factors_1 = list(filter(lambda x: x[-5:] == '_1.py', files)) factors_2 = list(filter(lambda x: x[-5:] == '_2.py', files)) #生成单因子 for p in factors_1: if os.path.exists('%s/Base/%s.csv' % (gc.FACTORBASE_PATH, p.split('.')[0][:-2])): start_date = datetime.datetime.today().strftime('%Y%m%d') end_date = datetime.datetime.today().strftime('%Y%m%d') else: start_date = '20200101' end_date = datetime.datetime.today().strftime('%Y%m%d') exec('from %s import %s' % (p.split('.')[0], p.split('.')[0][:-2])) factor = eval('%s("%s", stocks, start_date, end_date)' % (p.split('.')[0][:-2], p.split('.')[0][:-2])) factor.update_factor() #生成合成因子 for p in factors_2: if os.path.exists('%s/Base/%s.csv' % (gc.FACTORBASE_PATH, p.split('.')[0][:-2])): start_date = datetime.datetime.today().strftime('%Y%m%d') end_date = datetime.datetime.today().strftime('%Y%m%d') else: start_date = '20200101' end_date = datetime.datetime.today().strftime('%Y%m%d') exec('from %s import %s' % (p.split('.')[0], p.split('.')[0][:-2])) factor = eval('%s("%s", stocks, start_date, end_date)' % (p.split('.')[0][:-2], p.split('.')[0][:-2])) factor.update_factor()