def fc_low_test(ctx, optid): lookback_days = 365 * 15 factor_ids_1 = ['120000013', '120000020', '120000014', '120000015'] factor_ids_2 = ['120000010', '120000011', '120000039'] factor_ids_3 = [ '120000053', '120000056', '120000058', '120000073', 'MZ.F00010', 'MZ.F00050', 'MZ.F00060', 'MZ.F00070', 'MZ.F10010', ] factor_ids = factor_ids_1 + factor_ids_2 + factor_ids_3 trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') for date in trade_dates: start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') print(start_date, end_date) df_std_dist = load_mv(factor_ids, start_date, end_date) df_std_dist = df_std_dist / 10 _, asset_cluster, _ = clusterKMeansLow(df_std_dist, n_init=10) asset_cluster = dict( list(zip(sorted(asset_cluster), sorted(asset_cluster.values())))) for k, v in asset_cluster.items(): print(v) print()
def fc_low(ctx, optid): years = 15 lookback_days = 365 * years factor_ids_1 = [ '120000013', '120000014', '120000015', '120000028', '120000029' ] factor_ids_2 = ['120000010', '120000011', '120000039'] factor_ids_3 = ['120000053', '120000056', '120000058', '120000073'] factor_ids = factor_ids_1 + factor_ids_2 + factor_ids_3 trade_dates = ATradeDate.month_trade_date(begin_date='2017-01-01') for date in trade_dates: start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') print(start_date, end_date) corr0 = load_corr(factor_ids, start_date, end_date) std0 = load_std(factor_ids, start_date, end_date) # asset_cluster = clusterSpectral(corr0*std0) # _, asset_cluster, _ = clusterKMeansHigh(corr0*std0, n_init=10) asset_cluster = clusterSimple(corr0, std0, years) asset_cluster = dict( list(zip(sorted(asset_cluster), sorted(asset_cluster.values())))) for k, v in asset_cluster.items(): print(v) print()
def fc_high(ctx, optid): years = 5 lookback_days = 365 * years factor_ids_1 = [ '120000013', '120000015', '120000020', '120000014', '120000028' ] factor_ids_2 = [ '120000016', '120000051', '120000056', '120000073', 'MZ.FA0010', 'MZ.FA0050', 'MZ.FA0070', 'MZ.FA1010' ] factor_ids = factor_ids_1 + factor_ids_2 trade_dates = ATradeDate.month_trade_date(begin_date='2017-01-01') for date in trade_dates: start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') print(start_date, end_date) corr0 = load_corr(factor_ids, start_date, end_date) std0 = load_std(factor_ids, start_date, end_date) asset_cluster = clusterSimple(corr0, std0**3, years) asset_cluster = dict( list(zip(sorted(asset_cluster), sorted(asset_cluster.values())))) for k, v in asset_cluster.items(): print(v) print()
def fc_update_nav(ctx, optid): lookback_days = 365 blacklist = [24, 32, 40] factor_ids = [ '1200000%02d' % i for i in range(1, 40) if i not in blacklist ] trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') date = trade_dates[-1] start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') corr0 = load_ind(factor_ids, start_date, end_date) res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100) asset_cluster = res[1] asset_cluster = dict( zip(sorted(asset_cluster), sorted(asset_cluster.values()))) factor_name = base_ra_index.load() for k, v in asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] assets = {} for factor_id in factor_ids: assets[factor_id] = Asset.load_nav_series(factor_id) df_assets = pd.DataFrame(assets) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('ra_composite_asset_nav', metadata, autoload=True) for layer in asset_cluster.keys(): layer_id = 'FC.000001.%d' % (layer + 1) layer_assets = asset_cluster[layer] layer_nav = df_assets.loc[:, layer_assets] layer_ret = layer_nav.pct_change().dropna() layer_ret = layer_ret.mean(1) layer_ret = layer_ret.reset_index() layer_ret.columns = ['ra_date', 'ra_inc'] layer_ret['ra_nav'] = (1 + layer_ret['ra_inc']).cumprod() layer_ret['ra_asset_id'] = layer_id df_new = layer_ret.set_index(['ra_asset_id', 'ra_date']) df_old = asset_ra_composite_asset_nav.load_nav(layer_id) df_new = df_new.reindex(columns=['ra_nav', 'ra_inc']) database.batch(db, t, df_new, df_old, timestamp=False)
def fc_rolling(ctx, optid): lookback_days = 365 blacklist = [24, 32, 40] factor_ids = [ '1200000%02d' % i for i in range(1, 40) if i not in blacklist ] trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') for date in trade_dates: start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') print start_date, end_date corr0 = load_ind(factor_ids, start_date, end_date) factor_name = base_ra_index.load() res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100) asset_cluster = res[1] asset_cluster = dict( zip(sorted(asset_cluster), sorted(asset_cluster.values()))) for k, v in asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] print
def pos_n_nav_update(stock_portfolio_info, begin_date, end_date): stock_portfolio_id = stock_portfolio_info.name stock_portfolio_type = stock_portfolio_info.loc['sp_type'] algo = stock_portfolio_info.loc['sp_algo'] df_argv = asset_sp_stock_portfolio_argv.load( portfolio_id=stock_portfolio_id) kwargs = df_argv.loc[stock_portfolio_id].sp_value.to_dict() list_int_arg = ['look_back', 'exclusion'] list_float_arg = [ 'percentage', 'percentage_low_beta', 'percentage_low_volatility', 'percentage_high_dividend' ] list_pd_index_arg = ['stock_portfolio_ids'] for arg in list_int_arg: if kwargs.get(arg) is not None: kwargs[arg] = int(kwargs.get(arg)) for arg in list_float_arg: if kwargs.get(arg) is not None: kwargs[arg] = float(kwargs.get(arg)) for arg in list_pd_index_arg: if kwargs.get(arg) is not None: kwargs[arg] = pd.Index( [s.strip() for s in kwargs.get(arg).split(',')]) period = kwargs.get('period', 'day') if period == 'day': kwargs['reindex'] = ATradeDate.trade_date( begin_date=begin_date, end_date=end_date).rename('trade_date') elif period == 'week': kwargs['reindex'] = ATradeDate.week_trade_date( begin_date=begin_date, end_date=end_date).rename('trade_date') elif period == 'month': kwargs['reindex'] = ATradeDate.month_trade_date( begin_date=begin_date, end_date=end_date).rename('trade_date') else: click.echo( click.style( f'\n Period {period} is unknown for stock portfolio {stock_portfolio_id}.', fg='red')) return if kwargs['reindex'].size == 0: click.echo( click.style( f'\n Trade date index for stock portfolio {stock_portfolio_id} is empty.', fg='red')) return if stock_portfolio_id[:3] != 'CS.' and stock_portfolio_id[-2:] != '00': algo = f'Industry{algo}' # kwargs['sw_industry_code'] = f'{stock_portfolio_id[-2:]}0000' try: if stock_portfolio_id[:3] == 'SP.' or stock_portfolio_id[:3] == 'CS.': class_name = f'StockPortfolio{algo}' elif stock_portfolio_id[:3] == 'FP.': class_name = f'FactorPortfolio{algo}' else: class_name = algo cls = getattr(stock_portfolio, class_name) except AttributeError: click.echo( click.style( f'\n Algo {algo} is unknown for stock portfolio {stock_portfolio_id}.', fg='red')) return class_stock_portfolio = cls(**kwargs) click.echo( click.style( f'\n Stock data for stock portfolio {stock_portfolio_id} loaded.', fg='yellow')) if stock_portfolio_type == 0: class_stock_portfolio.calc_portfolio_nav(considering_status=False, considering_fee=False) elif stock_portfolio_type == 1: class_stock_portfolio.calc_portfolio_nav(considering_status=True, considering_fee=False) elif stock_portfolio_type == 2: class_stock_portfolio.calc_portfolio_nav(considering_status=True, considering_fee=True) else: click.echo( click.style( f'\n Type {stock_portfolio_type} is unknown for stock portfolio {stock_portfolio_id}.', fg='red')) df_pos = deepcopy(class_stock_portfolio.df_stock_pos_adjusted) df_nav = pd.DataFrame({ 'nav': class_stock_portfolio.ser_portfolio_nav, 'inc': class_stock_portfolio.ser_portfolio_inc }) click.echo( click.style( f'\n Nav of stock portfolio {stock_portfolio_id} calculated.', fg='yellow')) class_stock_portfolio.portfolio_analysis() if stock_portfolio_type == 1: dict_benchmark = { '000016.SH': 'h00016.SH', '000300.SH': 'h00300.CSI', '000905.SH': 'h00905.CSI', '000906.SH': 'h00906.CSI' } benchmark_id = dict_benchmark[kwargs.get('index_id', '000906.SH')] try: class_stock_portfolio.portfolio_statistic(benchmark_id) except np.linalg.LinAlgError: print('numpy.linalg.LinAlgError: Matrix is not positive definite') engine = database.connection('asset') metadata = MetaData(bind=engine) table_sp_pos = Table('sp_stock_portfolio_pos', metadata, autoload=True) table_sp_nav = Table('sp_stock_portfolio_nav', metadata, autoload=True) table_sp_pos.delete( table_sp_pos.c.globalid == stock_portfolio_id).execute() table_sp_nav.delete( table_sp_nav.c.globalid == stock_portfolio_id).execute() df_pos = df_pos.stack().rename('sp_sk_pos').reset_index().rename( columns={ 'trade_date': 'sp_date', 'stock_id': 'sp_sk_id' }) df_pos['globalid'] = stock_portfolio_id df_pos = df_pos.loc[df_pos.sp_sk_pos > 0.0].set_index( ['globalid', 'sp_date', 'sp_sk_id']) df_nav = df_nav.reset_index().rename(columns={ 'trade_date': 'sp_date', 'nav': 'sp_nav', 'inc': 'sp_inc' }) df_nav['globalid'] = stock_portfolio_id df_nav = df_nav.set_index(['globalid', 'sp_date']) asset_sp_stock_portfolio_pos.save(stock_portfolio_id, df_pos) asset_sp_stock_portfolio_nav.save(stock_portfolio_id, df_nav) click.echo( click.style( f'\n Successfully updated pos and nav of stock portfolio {stock_portfolio_info.name}!', fg='green'))
def fc_rolling(ctx, optid): engine = database.connection('asset') Session = sessionmaker(bind=engine) session = Session() blacklist = [24, 40] asset_ids = ['1200000%02d' % i for i in range(1, 40) if i not in blacklist] assets = {} for asset_id in asset_ids: # assets[asset_id] = load_nav_series(asset_id) assets[asset_id] = Asset.load_nav_series(asset_id) layer_result = {} layer_result['date'] = [] layer_result['layer'] = [] layer_result['factor'] = [] lookback_days = 365 forecast_days = 90 df_result = pd.DataFrame(columns=['date', 'factor_id', 'layer']) start_date = '2017-01-01' trade_dates = ATradeDate.month_trade_date(begin_date=start_date) for date in trade_dates: print date sdate = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') edate = date.strftime('%Y-%m-%d') fdate = (date + datetime.timedelta(forecast_days)).strftime('%Y-%m-%d') ''' init_num = 5 fc = FactorCluster(assets, init_num, sdate, edate, fdate) fc.handle() while fc.inner_score < 0.88: init_num += 1 fc = FactorCluster(assets, init_num, sdate, edate, fdate) fc.handle() ''' method = 'beta' scores = {} models = {} for i in range(7, 12): fc = FactorCluster(assets, i, sdate, edate, fdate, method=method, bf_ids=None) fc.handle() print i, 'silhouette_samples_value:', fc.silhouette_samples_value score = fc.silhouette_samples_value scores[score] = i models[score] = fc best_score = np.max(scores.keys()) best_model = models[best_score] fc = best_model print 'best cluster num:', fc.n_clusters factor_name = base_ra_index.load() for k, v in fc.asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] for vv in v: df_result.loc[len(df_result)] = [date, vv, k] print session.commit() session.close()
for i in clstrs2.keys() ]) if newTstatMean <= meanRedoTstat: return corr1, clstrs, silh else: return corrNew, clstrsNew, silhNew if __name__ == '__main__': lookback_days = 365 blacklist = [24, 32, 40] factor_ids = [ '1200000%02d' % i for i in range(1, 40) if i not in blacklist ] trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') for date in trade_dates: # start_date = '%d-%02d-01'%(year, month) # end_date = '%d-%02d-01'%(year+1, month) start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') print start_date, end_date # corr0 = load_fund(start_date, end_date) corr0 = load_ind(factor_ids, start_date, end_date) # corr1, clstrs, silh = clusterKMeansBase(corr0,maxNumClusters=10,n_init=1) factor_name = base_ra_index.load() # df_fund = base_ra_fund.load() # df_fund.index = df_fund.ra_code.astype('int') # df_fund = df_fund.set_index('ra_code') # factor_name = df_fund.ra_name