def cal_factor_return(self, sf_ids): sfs = [] for sf_id in sf_ids: sfs.append( StockFactor.subclass(sf_id, StockFactor.stock_factors[sf_id])) close = StockAsset.all_stock_nav() ret = close.pct_change() ret = ret[StockAsset.all_stock_info().index] dates = ret.index dates = dates[dates > '2000-01-01'] df_ret = pd.DataFrame(columns=sf_ids) df_sret = pd.DataFrame(columns=StockAsset.all_stock_info().index) for date, next_date in zip(dates[:-1], dates[1:]): print 'cal_factor_return:', date tmp_exposure = {} tmp_ret = ret.loc[next_date].values for sf in sfs: tmp_exposure[sf.factor_id] = sf.exposure.loc[date] tmp_exposure_df = pd.DataFrame(tmp_exposure) tmp_exposure_df = tmp_exposure_df[sf_ids].fillna(0.0) tmp_exposure_df = tmp_exposure_df.loc[ StockAsset.all_stock_info().index] mod = sm.OLS(tmp_ret, tmp_exposure_df.values, missing='drop').fit() df_ret.loc[next_date] = mod.params df_sret.loc[next_date] = tmp_ret - np.dot(tmp_exposure_df.values, mod.params) return df_ret, df_sret
def cal_earning(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt stock_quote = StockAsset.get_stock(stock_id).quote p = stock_quote.tclose pe = stock_fdmt.pettm eps = p / pe return eps
def cal_indexposure(self, stock_id): stock_quote = StockAsset.get_stock(stock_id).quote stock_info = StockAsset.all_stock_info() sf = pd.DataFrame(index=stock_quote.index) sf_ind = stock_info.loc[stock_id].sk_swlevel1code if sf_ind == self.sf_ind: sf_exposure = 1 else: sf_exposure = 0 sf['exposure'] = sf_exposure return sf.exposure
def cal_halpha(self, stock_id): stock_quote = StockAsset.get_stock(stock_id).quote close = stock_quote.tcloseaf close = close.replace(0.0, method='pad') ret = close.pct_change() sz = Asset.load_nav_series('120000016') bret = sz.pct_change() ret = ret.resample('m').sum().iloc[:-1] bret = bret.resample('m').sum().iloc[:-1] common_index = ret.index.intersection(bret.index) ret = ret.loc[common_index] bret = bret.loc[common_index] ser = pd.Series() if len(common_index) < 60: return ser for i in range(60, len(common_index)): tmp_dates = common_index[i - 59:i + 1] y = ret.loc[tmp_dates].values x = bret.loc[tmp_dates].values.reshape(-1, 1) mod = LinearRegression().fit(x, y) ser.loc[tmp_dates[-1]] = mod.intercept_ today = get_today() ser.loc[today] = np.nan ser = ser.resample('d').last().fillna(method='pad') return ser
def cal_dastd(self, stock_id, period=23): stock_quote = StockAsset.get_stock(stock_id).quote close = stock_quote.tclose close = close.replace(0.0, method='pad') ret = close.pct_change() ret = ret.rolling(period).apply(lambda x: pow(pow(x, 2).mean(), 0.5)) return ret
def cal_factor_return(self, sf_ids): period = 21 sfs = [] for sf_id in sf_ids: sfs.append( StockFactor.subclass(sf_id, StockFactor.stock_factors[sf_id])) close = StockAsset.all_stock_nav() ret = close.pct_change(period).iloc[period:] ret = ret[StockAsset.all_stock_info().index] dates = ret.index dates = dates[dates >= '2005-01-01'] df_ret = pd.DataFrame(columns=sf_ids) df_sret = pd.DataFrame(columns=StockAsset.all_stock_info().index) pool = Pool(len(sfs)) sfs = pool.map(multiprocess_load_factor_exposure, sfs) pool.close() pool.join() for date, next_date in zip(dates[:-period], dates[period:]): tmp_exposure = {} tmp_ret = ret.loc[next_date].values for sf in sfs: tmp_exposure[sf.factor_id] = sf.exposure.loc[date] #tmp_exposure[sf.factor_id] = fed[sf.factor_id].loc[date] tmp_exposure_df = pd.DataFrame(tmp_exposure) tmp_exposure_df = tmp_exposure_df[sf_ids].fillna(0.0) tmp_exposure_df = tmp_exposure_df.loc[ StockAsset.all_stock_info().index] mod = sm.OLS(tmp_ret, tmp_exposure_df.values, missing='drop').fit() # mod = sm.WLS(tmp_ret, tmp_exposure_df.values, weights = tmp_amount, missing = 'drop').fit() # print(mod.summary()) df_ret.loc[next_date] = mod.params df_sret.loc[next_date] = tmp_ret - np.dot(tmp_exposure_df.values, mod.params) return df_ret, df_sret
def cal_mom(self, stock_id, period=23): stock_quote = StockAsset.get_stock(stock_id).quote close = stock_quote.tclose close = close.replace(0.0, method='pad') ret = close.pct_change() tr = stock_quote.turnrate ret_tr = (ret * tr).rolling(period).sum() weight = tr.rolling(period).sum() # mom = ret.rolling(period).mean() mom = ret_tr / weight return mom
def cal_hilo(self, stock_id, period=23): stock_quote = StockAsset.get_stock(stock_id).quote high = stock_quote.thigh high = high.replace(0.0, method='pad') hi = high.rolling(period).max() low = stock_quote.tlow low = low.replace(0.0, method='pad') lo = low.rolling(period).min() hilo = np.log(hi / low) return hilo
def cal_egro(self, stock_id): def cal_egro_single(x): mod = LinearRegression().fit(np.arange(5).reshape(-1, 1), x) return mod.coef_[0] / np.mean(x) stock_fdmt = StockAsset.get_stock(stock_id).fdmt stock_quote = StockAsset.get_stock(stock_id).quote p = stock_quote.tclose pe = stock_fdmt.pettm eps = p / pe eps = eps[eps.diff() > 0.001] eps_y = pd.Series() for k, v in eps.groupby(eps.index.strftime('%Y')): eps_y.loc[v.index[-1]] = v.values[-1] eps_y = eps_y.rolling(5).apply(cal_egro_single) today = datetime.now() today_idx = pd.tslib.Timestamp(today.year, today.month, today.day) eps_y.loc[today_idx] = np.nan eps_y = eps_y.resample('d').last().fillna(method='pad').dropna() return eps_y
def cal_cmra(self, stock_id): stock_quote = StockAsset.get_stock(stock_id).quote close = stock_quote.tcloseaf close = close.replace(0.0, method='pad') nav = close / close.iloc[0] zt = np.log(nav) zt_max = zt.rolling(window=252 * 5).max() zt_min = zt.rolling(window=252 * 5).min() cmra = np.log((1 + zt_max) / (1 + zt_min)) cmra = cmra.fillna(method='pad') cmra = cmra.dropna() return cmra
def cal_factor_exposure(self): all_stocks = StockAsset.all_stock_info() factor_exposure = [] for desc_method in self.desc_methods: stock_exposure = {} for stock_id in all_stocks.index: stock_exposure[stock_id] = desc_method(stock_id) stock_exposure_df = pd.DataFrame(stock_exposure) stock_exposure_df = StockFactor.stock_factor_filter( stock_exposure_df) stock_exposure_df = StockFactor.normalized(stock_exposure_df) factor_exposure.append(stock_exposure_df) factor_exposure_df = reduce(lambda x, y: x + y, factor_exposure) / len(factor_exposure) factor_exposure_df = factor_exposure_df[all_stocks.index] self.exposure = factor_exposure_df return factor_exposure_df
def cal_btsg(self, stock_id): stock_quote = StockAsset.get_stock(stock_id).quote close = stock_quote.tcloseaf close = close.replace(0.0, method='pad') ret = close.pct_change() sz = Asset.load_nav_series('120000016') bret = sz.pct_change() ret = ret.resample('m').sum().iloc[:-1] bret = bret.resample('m').sum().iloc[:-1] common_index = ret.index.intersection(bret.index) ret = ret.loc[common_index] bret = bret.loc[common_index] ser = pd.Series() if len(common_index) < 60: return ser for i in range(60, len(common_index)): tmp_dates = common_index[:i + 1] y = ret.loc[tmp_dates].values x = bret.loc[tmp_dates].values.reshape(-1, 1) x = sm.add_constant(x) mod = sm.OLS(y, x).fit() beta = mod.params[1] sigma = mod.resid.std() btsg = pow(beta * sigma, 0.5) ser.loc[tmp_dates[-1]] = btsg today = get_today() ser.loc[today] = np.nan ser = ser.resample('d').last().fillna(method='pad') return ser
def cal_equtotliab(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt equtotliab = stock_fdmt.equtotliab return equtotliab
def cal_ltmliabtota(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt ltmliabtota = stock_fdmt.ltmliabtota return ltmliabtota
def cal_cashrt(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt cashrt = stock_fdmt.cashrt return cashrt
def cal_currentrt(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt currentrt = stock_fdmt.currentrt return currentrt
def cal_sgpmargin(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt sgpmargin = stock_fdmt.sgpmargin return sgpmargin
def cal_roa(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt roa = stock_fdmt.roa return roa
def cal_bp(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt bp = 1 / stock_fdmt.pb return bp
def cal_turnover(self, stock_id, period=23): stock_quote = StockAsset.get_stock(stock_id).quote tr = stock_quote.turnrate tr = tr.rolling(period).mean() return tr
def cal_size(self, stock_id): stock_quote = StockAsset.get_stock(stock_id).quote totmktcap = stock_quote.totmktcap return totmktcap
def cal_roe(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt roe = stock_fdmt.roedilutedcut return roe
def cal_ep(self, stock_id): stock_fdmt = StockAsset.get_stock(stock_id).fdmt ep = 1 / stock_fdmt.pettm return ep
def valid_stock_table(): all_stocks = StockAsset.all_stock_info() all_stocks = all_stocks.reset_index() all_stocks = all_stocks.set_index(['sk_secode']) st_stocks = StockAsset.stock_st() all_stocks.sk_listdate = all_stocks.sk_listdate + timedelta(365) engine = database.connection('caihui') Session = sessionmaker(bind=engine) session = Session() sql = session.query(asset_stock.tq_qt_skdailyprice.tradedate, asset_stock.tq_qt_skdailyprice.secode, asset_stock.tq_qt_skdailyprice.tclose, asset_stock.tq_qt_skdailyprice.amount).filter( asset_stock.tq_qt_skdailyprice.secode.in_( all_stocks.index)).statement #过滤停牌股票 quotation_amount = pd.read_sql(sql, session.bind, index_col=['tradedate', 'secode'], parse_dates=['tradedate']) quotation = quotation_amount[['tclose']] quotation = quotation.replace(0.0, np.nan) quotation = quotation.unstack() quotation.columns = quotation.columns.droplevel(0) #60个交易日内需要有25个交易日未停牌 quotation_count = quotation.rolling(60).count() quotation[quotation_count < 25] = np.nan #过滤掉过去一年日均成交额排名后20%的股票 amount = quotation_amount[['amount']] amount = amount.unstack() amount.columns = amount.columns.droplevel(0) year_amount = amount.rolling(252, min_periods=100).mean() def percentile20nan(x): x[x <= np.percentile(x, 20)] = np.nan return x year_amount = year_amount.apply(percentile20nan, axis=1) quotation[year_amount.isnull()] = np.nan session.commit() session.close() #过滤st股票 for i in range(0, len(st_stocks)): secode = st_stocks.index[i] record = st_stocks.iloc[i] selecteddate = record.selecteddate outdate = record.outdate if secode in set(quotation.columns): #print secode, selecteddate, outdate quotation.loc[selecteddate:outdate, secode] = np.nan #过滤上市未满一年股票 for secode in all_stocks.index: if secode in set(quotation.columns): quotation.loc[:all_stocks.loc[secode, 'sk_listdate'], secode] = np.nan quotation = quotation.rename( columns=dict(zip(all_stocks.index, all_stocks.globalid))) asset_stock_factor.update_valid_stock_table(quotation)