def fc_update_nav(ctx, optid):

    lookback_days = 365
    blacklist = [24, 32, 40]
    factor_ids = [
        '1200000%02d' % i for i in range(1, 40) if i not in blacklist
    ]
    trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01')
    date = trade_dates[-1]

    start_date = (date -
                  datetime.timedelta(lookback_days)).strftime('%Y-%m-%d')
    end_date = date.strftime('%Y-%m-%d')
    corr0 = load_ind(factor_ids, start_date, end_date)
    res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100)
    asset_cluster = res[1]
    asset_cluster = dict(
        zip(sorted(asset_cluster), sorted(asset_cluster.values())))

    factor_name = base_ra_index.load()
    for k, v in asset_cluster.iteritems():
        v = np.array(v).astype('int')
        print factor_name.loc[v]

    assets = {}
    for factor_id in factor_ids:
        assets[factor_id] = Asset.load_nav_series(factor_id)
    df_assets = pd.DataFrame(assets)

    db = database.connection('asset')
    metadata = MetaData(bind=db)
    t = Table('ra_composite_asset_nav', metadata, autoload=True)

    for layer in asset_cluster.keys():
        layer_id = 'FC.000001.%d' % (layer + 1)
        layer_assets = asset_cluster[layer]
        layer_nav = df_assets.loc[:, layer_assets]
        layer_ret = layer_nav.pct_change().dropna()
        layer_ret = layer_ret.mean(1)
        layer_ret = layer_ret.reset_index()
        layer_ret.columns = ['ra_date', 'ra_inc']
        layer_ret['ra_nav'] = (1 + layer_ret['ra_inc']).cumprod()
        layer_ret['ra_asset_id'] = layer_id
        df_new = layer_ret.set_index(['ra_asset_id', 'ra_date'])
        df_old = asset_ra_composite_asset_nav.load_nav(layer_id)
        df_new = df_new.reindex(columns=['ra_nav', 'ra_inc'])
        database.batch(db, t, df_new, df_old, timestamp=False)
def fc_rolling(ctx, optid):

    lookback_days = 365
    blacklist = [24, 32, 40]
    factor_ids = [
        '1200000%02d' % i for i in range(1, 40) if i not in blacklist
    ]
    trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01')
    for date in trade_dates:
        start_date = (date -
                      datetime.timedelta(lookback_days)).strftime('%Y-%m-%d')
        end_date = date.strftime('%Y-%m-%d')
        print start_date, end_date
        corr0 = load_ind(factor_ids, start_date, end_date)
        factor_name = base_ra_index.load()
        res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100)
        asset_cluster = res[1]
        asset_cluster = dict(
            zip(sorted(asset_cluster), sorted(asset_cluster.values())))

        for k, v in asset_cluster.iteritems():
            v = np.array(v).astype('int')
            print factor_name.loc[v]
        print
Beispiel #3
0
def fc_rolling(ctx, optid):

    engine = database.connection('asset')
    Session = sessionmaker(bind=engine)
    session = Session()

    blacklist = [24, 40]
    asset_ids = ['1200000%02d' % i for i in range(1, 40) if i not in blacklist]
    assets = {}
    for asset_id in asset_ids:
        # assets[asset_id] = load_nav_series(asset_id)
        assets[asset_id] = Asset.load_nav_series(asset_id)

    layer_result = {}
    layer_result['date'] = []
    layer_result['layer'] = []
    layer_result['factor'] = []

    lookback_days = 365
    forecast_days = 90

    df_result = pd.DataFrame(columns=['date', 'factor_id', 'layer'])
    start_date = '2017-01-01'
    trade_dates = ATradeDate.month_trade_date(begin_date=start_date)
    for date in trade_dates:
        print date
        sdate = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d')
        edate = date.strftime('%Y-%m-%d')
        fdate = (date + datetime.timedelta(forecast_days)).strftime('%Y-%m-%d')
        '''
        init_num = 5
        fc = FactorCluster(assets, init_num, sdate, edate, fdate)
        fc.handle()
        while fc.inner_score < 0.88:
            init_num += 1
            fc = FactorCluster(assets, init_num, sdate, edate, fdate)
            fc.handle()
        '''

        method = 'beta'
        scores = {}
        models = {}
        for i in range(7, 12):
            fc = FactorCluster(assets,
                               i,
                               sdate,
                               edate,
                               fdate,
                               method=method,
                               bf_ids=None)
            fc.handle()
            print i, 'silhouette_samples_value:', fc.silhouette_samples_value
            score = fc.silhouette_samples_value
            scores[score] = i
            models[score] = fc

        best_score = np.max(scores.keys())
        best_model = models[best_score]
        fc = best_model

        print 'best cluster num:', fc.n_clusters
        factor_name = base_ra_index.load()
        for k, v in fc.asset_cluster.iteritems():

            v = np.array(v).astype('int')
            print factor_name.loc[v]
            for vv in v:
                df_result.loc[len(df_result)] = [date, vv, k]

        print

    session.commit()
    session.close()
    blacklist = [24, 32, 40]
    factor_ids = [
        '1200000%02d' % i for i in range(1, 40) if i not in blacklist
    ]
    trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01')
    for date in trade_dates:
        # start_date = '%d-%02d-01'%(year, month)
        # end_date = '%d-%02d-01'%(year+1, month)
        start_date = (date -
                      datetime.timedelta(lookback_days)).strftime('%Y-%m-%d')
        end_date = date.strftime('%Y-%m-%d')
        print start_date, end_date
        # corr0 = load_fund(start_date, end_date)
        corr0 = load_ind(factor_ids, start_date, end_date)
        # corr1, clstrs, silh = clusterKMeansBase(corr0,maxNumClusters=10,n_init=1)
        factor_name = base_ra_index.load()
        # df_fund = base_ra_fund.load()
        # df_fund.index = df_fund.ra_code.astype('int')
        # df_fund = df_fund.set_index('ra_code')
        # factor_name = df_fund.ra_name
        res = None
        while res is None:
            try:
                # res = clusterKMeansTop(corr0, maxNumClusters=10, n_init=1)
                res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=10)
            except:
                pass

        for k, v in res[1].iteritems():
            v = np.array(v).astype('int')
            print factor_name.loc[v]