def fc_update_nav(ctx, optid): lookback_days = 365 blacklist = [24, 32, 40] factor_ids = [ '1200000%02d' % i for i in range(1, 40) if i not in blacklist ] trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') date = trade_dates[-1] start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') corr0 = load_ind(factor_ids, start_date, end_date) res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100) asset_cluster = res[1] asset_cluster = dict( zip(sorted(asset_cluster), sorted(asset_cluster.values()))) factor_name = base_ra_index.load() for k, v in asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] assets = {} for factor_id in factor_ids: assets[factor_id] = Asset.load_nav_series(factor_id) df_assets = pd.DataFrame(assets) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('ra_composite_asset_nav', metadata, autoload=True) for layer in asset_cluster.keys(): layer_id = 'FC.000001.%d' % (layer + 1) layer_assets = asset_cluster[layer] layer_nav = df_assets.loc[:, layer_assets] layer_ret = layer_nav.pct_change().dropna() layer_ret = layer_ret.mean(1) layer_ret = layer_ret.reset_index() layer_ret.columns = ['ra_date', 'ra_inc'] layer_ret['ra_nav'] = (1 + layer_ret['ra_inc']).cumprod() layer_ret['ra_asset_id'] = layer_id df_new = layer_ret.set_index(['ra_asset_id', 'ra_date']) df_old = asset_ra_composite_asset_nav.load_nav(layer_id) df_new = df_new.reindex(columns=['ra_nav', 'ra_inc']) database.batch(db, t, df_new, df_old, timestamp=False)
def fc_rolling(ctx, optid): lookback_days = 365 blacklist = [24, 32, 40] factor_ids = [ '1200000%02d' % i for i in range(1, 40) if i not in blacklist ] trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') for date in trade_dates: start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') print start_date, end_date corr0 = load_ind(factor_ids, start_date, end_date) factor_name = base_ra_index.load() res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100) asset_cluster = res[1] asset_cluster = dict( zip(sorted(asset_cluster), sorted(asset_cluster.values()))) for k, v in asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] print
def fc_rolling(ctx, optid): engine = database.connection('asset') Session = sessionmaker(bind=engine) session = Session() blacklist = [24, 40] asset_ids = ['1200000%02d' % i for i in range(1, 40) if i not in blacklist] assets = {} for asset_id in asset_ids: # assets[asset_id] = load_nav_series(asset_id) assets[asset_id] = Asset.load_nav_series(asset_id) layer_result = {} layer_result['date'] = [] layer_result['layer'] = [] layer_result['factor'] = [] lookback_days = 365 forecast_days = 90 df_result = pd.DataFrame(columns=['date', 'factor_id', 'layer']) start_date = '2017-01-01' trade_dates = ATradeDate.month_trade_date(begin_date=start_date) for date in trade_dates: print date sdate = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') edate = date.strftime('%Y-%m-%d') fdate = (date + datetime.timedelta(forecast_days)).strftime('%Y-%m-%d') ''' init_num = 5 fc = FactorCluster(assets, init_num, sdate, edate, fdate) fc.handle() while fc.inner_score < 0.88: init_num += 1 fc = FactorCluster(assets, init_num, sdate, edate, fdate) fc.handle() ''' method = 'beta' scores = {} models = {} for i in range(7, 12): fc = FactorCluster(assets, i, sdate, edate, fdate, method=method, bf_ids=None) fc.handle() print i, 'silhouette_samples_value:', fc.silhouette_samples_value score = fc.silhouette_samples_value scores[score] = i models[score] = fc best_score = np.max(scores.keys()) best_model = models[best_score] fc = best_model print 'best cluster num:', fc.n_clusters factor_name = base_ra_index.load() for k, v in fc.asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] for vv in v: df_result.loc[len(df_result)] = [date, vv, k] print session.commit() session.close()
blacklist = [24, 32, 40] factor_ids = [ '1200000%02d' % i for i in range(1, 40) if i not in blacklist ] trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') for date in trade_dates: # start_date = '%d-%02d-01'%(year, month) # end_date = '%d-%02d-01'%(year+1, month) start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') print start_date, end_date # corr0 = load_fund(start_date, end_date) corr0 = load_ind(factor_ids, start_date, end_date) # corr1, clstrs, silh = clusterKMeansBase(corr0,maxNumClusters=10,n_init=1) factor_name = base_ra_index.load() # df_fund = base_ra_fund.load() # df_fund.index = df_fund.ra_code.astype('int') # df_fund = df_fund.set_index('ra_code') # factor_name = df_fund.ra_name res = None while res is None: try: # res = clusterKMeansTop(corr0, maxNumClusters=10, n_init=1) res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=10) except: pass for k, v in res[1].iteritems(): v = np.array(v).astype('int') print factor_name.loc[v]