Ejemplo n.º 1
0
def gold_view_update(ctx, startdate, enddate, viewid):
    #function:"更新结果"
    mv = cal_gold_view()
    today = datetime.now().strftime('%Y-%m-%d')
    df = {}
    df['globalid'] = np.repeat(viewid, len(mv))
    df['bl_date'] = mv.index
    df['bl_view'] = mv.view_01.values
    df['bl_index_id'] = np.repeat('120000014', len(mv))
    df['created_at'] = np.repeat(today, len(mv))
    df['updated_at'] = np.repeat(today, len(mv))
    df_new = pd.DataFrame(df).set_index(['globalid', 'bl_date'])

    db = database.connection('asset')
    metadata = MetaData(bind=db)
    t = Table('ra_bl_view', metadata, autoload=True)
    columns = [
        t.c.globalid,
        t.c.bl_date,
        t.c.bl_view,
        t.c.bl_index_id,
        t.c.created_at,
        t.c.updated_at,
    ]
    s = select(columns, (t.c.globalid == viewid))
    df_old = pd.read_sql(s,
                         db,
                         index_col=['globalid', 'bl_date'],
                         parse_dates=['bl_date'])
    df_new = df_new[df_old.columns]  #保持跟df_old列名同步
    database.batch(db, t, df_new, df_old, timestamp=False)
    print('########### id=BL.000009 #######保存到表格:asset/ra_bl_view  ####')
    print('########### gold view date:', mv.index[-1].strftime('%Y-%m-%d'),
          '#####value:', mv.view_01.values[-1])
Ejemplo n.º 2
0
def saveStockFlr(stockFlr, date, sdate=None, edate=None):

    df = pd.DataFrame(columns=['br_date', 'br_stock_id', 'br_flr'])
    df['br_stock_id'] = stockFlr.index
    df['br_flr'] = stockFlr.values
    df['br_date'] = date
    df.set_index(['br_date', 'br_stock_id'], inplace=True)

    db = create_engine(uris['multi_factor'])
    meta = MetaData(bind=db)
    t = Table('barra_resid_fluctuation_ratio', meta, autoload=True)
    columns = [
        t.c.br_date,
        t.c.br_stock_id,
        t.c.br_flr,
    ]
    sql = select(columns)
    if sdate != None:
        sql = sql.where(t.c.br_date >= sdate)
    if edate != None:
        sql = sql.where(t.c.br_date <= edate)
    dfBase = pd.read_sql(sql, db)
    dfBase['br_date'] = dfBase['br_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    dfBase.set_index(['br_date', 'br_stock_id'], inplace=True)

    database.batch(db, t, df, dfBase, timestamp=False)
Ejemplo n.º 3
0
def corr_update(corr, codes):
    ra_index = base_ra_index.find(corr['ra_index_id'])
    if ra_index is None:
        click.echo(click.style(
            "unknown index [%s]for calc corr!" % (corr['ra_index_id']), fg="yellow"))
        return False

    yesterday = (datetime.now() - timedelta(days=1)); 
    enddate = yesterday.strftime("%Y-%m-%d")        
    
    #
    # 加载指数数据
    #
    index_code = ra_index['ra_code']
    if corr['ra_date_type'] == 1:
        df_nav_index = DBData.db_index_value_daily('2015-10-08', enddate, codes=[index_code])
    else:
        df_nav_index = DBData.db_index_value('2015-10-08', enddate, codes=[index_code])
    df_inc_index = df_nav_index.pct_change().fillna(0.0)

    #
    # 加载基金列表
    #
    df_fund = base_ra_fund.load(codes=codes)
    
    data = []
    with click.progressbar(length=len(df_fund.index), label=('update corr for corr %d' % (corr['globalid'])).ljust(30)) as bar:
        for _,fund in df_fund.iterrows():
            bar.update(1)
            tmp = corr_update_fund(corr, fund, df_inc_index)
            if tmp is not None:
                data.append([
                    corr['globalid'],
                    fund['globalid'],
                    fund['ra_code'],
                    "%.4f" % (tmp),
                ])

    df_new = pd.DataFrame(data, columns=['ra_corr_id', 'ra_fund_id', 'ra_fund_code', 'ra_corr'])
    df_new = df_new.set_index(['ra_corr_id', 'ra_fund_id'])
    
    db = database.connection('base')
    # 加载旧数据
    t2 = Table('ra_corr_fund', MetaData(bind=db), autoload=True)
    columns2 = [
        t2.c.ra_corr_id,
        t2.c.ra_fund_id,
        t2.c.ra_fund_code,
        t2.c.ra_corr,
    ]
    stmt_select = select(columns2, (t2.c.ra_corr_id == corr['globalid']))
    if codes is not None:
        stmt_select = stmt_select.where(t2.c.ra_fund_code.in_(codes))
        
    df_old = pd.read_sql(stmt_select, db, index_col=['ra_corr_id', 'ra_fund_id'])
    if not df_old.empty:
        df_old['ra_corr'] = df_old['ra_corr'].map("{:.4f}".format)

    # 更新数据库
    database.batch(db, t2, df_new, df_old, timestamp=True)
def nav_update_cppi_benchmark(db, asset):

    df = load_index_for_asset(db['asset'], asset['globalid'])
    df = df.reset_index()
    globalid = df.loc[0, 'ra_fund_code']

    df = asset_ra_portfolio_nav.load_series(globalid, xtype=8)
    df = df.to_frame()
    df = df.reset_index()
    df['ra_asset_id'] = asset.globalid
    df['ra_inc'] = df['ra_nav'].pct_change().fillna(0.0)
    df.columns = ['ra_date', 'ra_nav', 'ra_asset_id', 'ra_inc']
    df = df.set_index(['ra_asset_id', 'ra_date'])
    df_new = df

    # 加载旧数据
    t2 = Table('ra_composite_asset_nav',
               MetaData(bind=db['asset']),
               autoload=True)
    columns2 = [
        t2.c.ra_asset_id,
        t2.c.ra_date,
        t2.c.ra_nav,
        t2.c.ra_inc,
    ]
    stmt_select = select(columns2, (t2.c.ra_asset_id == asset['globalid']))
    df_old = pd.read_sql(stmt_select,
                         db['asset'],
                         index_col=['ra_asset_id', 'ra_date'],
                         parse_dates=['ra_date'])
    if not df_old.empty:
        df_old = df_old.apply(format_nav_and_inc)

    # 更新数据库
    database.batch(db['asset'], t2, df_new, df_old, timestamp=False)
Ejemplo n.º 5
0
def macro_view_update(ctx, startdate, enddate, viewid, idx):

    backtest_interval = pd.date_range(startdate, enddate)
    rev = re_view(backtest_interval)
    irv = ir_view(backtest_interval)
    epsv = eps_view(backtest_interval, idx)
    #print rev.tail()
    #print irv.tail()
    #print epsv.tail()

    mv = pd.concat([rev, irv, epsv], 1)
    mv['mv'] = mv['rev'] + mv['irv'] + mv['epsv']
    #print mv.tail()
    #mv = mv.loc[:, ['mv']]

    today = datetime.now()
    mv_view_id = np.repeat(viewid, len(mv))
    mv_date = mv.index
    mv_inc = mv.mv.values

    created_at = np.repeat(today, len(mv))
    updated_at = np.repeat(today, len(mv))
    #df_inc_value = np.column_stack([mv_view_id, mv_date, mv_inc, created_at, updated_at])
    #df_inc = pd.DataFrame(df_inc_value, columns = ['mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at'])
    union_mv = {}
    union_mv['globalid'] = mv_view_id
    union_mv['bl_date'] = mv_date
    union_mv['bl_view'] = np.sign(mv_inc)
    union_mv['created_at'] = created_at
    union_mv['updated_at'] = updated_at
    union_mv_df = pd.DataFrame(
        union_mv,
        columns=['globalid', 'bl_date', 'bl_view', 'created_at', 'updated_at'])

    for index_id in ['120000001', '120000002']:

        df_new = union_mv_df
        df_new['bl_index_id'] = index_id
        df_new = df_new.set_index(['globalid', 'bl_date', 'bl_index_id'])

        db = database.connection('asset')
        metadata = MetaData(bind=db)
        t = Table('ra_bl_view', metadata, autoload=True)
        columns = [
            t.c.globalid,
            t.c.bl_date,
            t.c.bl_view,
            t.c.bl_index_id,
            t.c.created_at,
            t.c.updated_at,
        ]
        s = select(columns).where(t.c.globalid == viewid).where(
            t.c.bl_index_id == index_id)
        df_old = pd.read_sql(s,
                             db,
                             index_col=['globalid', 'bl_date', 'bl_index_id'],
                             parse_dates=['bl_date'])
        database.batch(db, t, df_new, df_old, timestamp=False)
        print df_new.tail()
Ejemplo n.º 6
0
def save(gid,df):
    #function:"更新数据"
    db = database.connection('wind')
    t = Table('mc_gold_indicator',MetaData(bind=db),autoload=True)
    columns = [literal_column(c) for c in (df.index.names + list(df.columns))]
    s = select(columns).where(t.c.globalid==gid)
    df_old = pd.read_sql(s,db,index_col=['globalid','mc_gold_date'],parse_dates=['mc_gold_date'])
    database.batch(db,t,df,df_old,timestamp=False)
Ejemplo n.º 7
0
def fund_update_factor_pool(pool, adjust_points, optlimit, optcalc):
    ''' re calc fund for single fund pool
    '''
    lookback = pool.ra_lookback
    limit = optlimit

    if optcalc:
        #
        # 计算每个调仓点的最新配置
        #
        # ffr = asset_fund_factor.load_fund_factor_return()
        ffr = asset_stock_factor.load_stock_factor_return()
        ffr = ffr.unstack().T
        ffr.index = ffr.index.levels[1]
        ffe = asset_fund_factor.load_fund_factor_exposure()
        # ffe.to_csv('data/factor/fund_factor_exposure.csv', index_label = ['fund_id', 'ff_id', 'trade_date'])
        # ffe = pd.read_csv('data/factor/fund_factor_exposure.csv', index_col = ['fund_id', 'ff_id', 'trade_date'], parse_dates = ['trade_date'])
        ffe = ffe.reset_index()
        data = []
        pre_codes = None

        with click.progressbar(length=len(adjust_points), label='calc pool %s' % (pool.id)) as bar:
            for day in adjust_points:
                bar.update(1)
                # codes, codes_pool = pool_by_factor(pool, day, lookback, limit, ffr, ffe)
                codes, codes_pool = pool_by_single_factor(pool, day, lookback, limit, ffr, ffe)
                if pre_codes is None:
                    pre_codes = codes
                else:
                    pre_codes_origin = pre_codes.copy()
                    pre_codes = np.intersect1d(pre_codes, codes_pool)
                    new_codes = np.setdiff1d(codes, pre_codes)
                    fund_lack_num = limit - len(pre_codes)
                    codes = np.append(pre_codes, new_codes[:fund_lack_num])
                    pre_codes = codes
                    print('turnover rate:', 1 - len(np.intersect1d(codes, pre_codes_origin))/float(limit))

                print(day, len(codes), codes)
                if codes is None or len(codes) == 0:
                    continue
                ra_fund = base_ra_fund.load(codes = codes)
                ra_fund = ra_fund.set_index(['ra_code'])
                ra_pool = pool['id']
                for code in ra_fund.index:
                    ra_fund_id = ra_fund.loc[code, 'globalid']
                    data.append([ra_pool, day, ra_fund_id, code])
        fund_df = pd.DataFrame(data, columns = ['ra_pool', 'ra_date', 'ra_fund_id', 'ra_fund_code'])
        fund_df = fund_df.set_index(['ra_pool', 'ra_date', 'ra_fund_id'])

        df_new = fund_df
        columns = [literal_column(c) for c in (df_new.index.names + list(df_new.columns))]
        s = select(columns)
        db = database.connection('asset')
        ra_pool_fund_t = Table('ra_pool_fund', MetaData(bind=db), autoload=True)
        s = s.where(ra_pool_fund_t.c.ra_pool.in_(df_new.index.get_level_values(0).tolist()))
        df_old = pd.read_sql(s, db, index_col = df_new.index.names)
        database.batch(db, ra_pool_fund_t, df_new, df_old)
Ejemplo n.º 8
0
def nav_update_category(db, pool, category):
    # 加载基金列表
    df = load_fund_category(pool['id'], category)

    # 构建均分仓位
    df['ra_ratio'] = 1.0
    df.set_index('ra_fund_code', append=True, inplace=True)
    df['ra_ratio'] = df['ra_ratio'].groupby(level=0, group_keys=False).apply(lambda x: x / len(x))
    df_position = df.unstack().fillna(0.0)
    df_position.columns = df_position.columns.droplevel(0)


    # 加载基金收益率
    min_date = df_position.index.min()
    #max_date = df_position.index.max()
    max_date = (datetime.now() - timedelta(days=1)) # yesterday


    df_nav = DBData.db_fund_value_daily(
        min_date, max_date, codes=df_position.columns)
    if '000000' in df_position.columns:
        df_nav['000000'] = 1
    df_inc = df_nav.pct_change().fillna(0.0)

    # 计算复合资产净值
    df_nav_portfolio = DFUtil.portfolio_nav(df_inc, df_position, result_col='portfolio')
    # df_nav_portfolio.to_csv(datapath('category_nav_' + category + '.csv'))

    df_result = df_nav_portfolio[['portfolio']].rename(columns={'portfolio':'ra_nav'}).copy()
    df_result.index.name = 'ra_date'
    df_result['ra_inc'] = df_result['ra_nav'].pct_change().fillna(0.0)
    df_result['ra_pool'] = pool['id']
    df_result['ra_category'] = category
    df_result['ra_type'] = pool['ra_type']
    df_result = df_result.reset_index().set_index(['ra_pool', 'ra_category', 'ra_type', 'ra_date'])

    df_new = df_result.apply(format_nav_and_inc)


    # 加载旧数据
    t2 = Table('ra_pool_nav', MetaData(bind=db), autoload=True)
    columns2 = [
        t2.c.ra_pool,
        t2.c.ra_category,
        t2.c.ra_type,
        t2.c.ra_date,
        t2.c.ra_nav,
        t2.c.ra_inc,
    ]
    stmt_select = select(columns2, (t2.c.ra_pool == pool['id']) & (t2.c.ra_category == category) & (t2.c.ra_type == pool['ra_type']))
    df_old = pd.read_sql(stmt_select, db, index_col=['ra_pool', 'ra_category', 'ra_type', 'ra_date'], parse_dates=['ra_date'])
    if not df_old.empty:
        df_old = df_old.apply(format_nav_and_inc)

    # 更新数据库
    database.batch(db, t2, df_new, df_old, timestamp=False)
def nav_update_index(db, asset):
    # 加载基金列表
    df = load_index_for_asset(db['asset'], asset['globalid'])

    # 构建均分仓位
    df_position = df.unstack().fillna(0.0)
    df_position.columns = df_position.columns.droplevel(0)

    # 加载基金收益率
    min_date = df_position.index.min()
    #max_date = df_position.index.max()
    max_date = (datetime.now() - timedelta(days=1))  # yesterday

    df_nav = DBData.db_index_value_daily(min_date,
                                         max_date,
                                         codes=df_position.columns)
    df_inc = df_nav.pct_change().fillna(0.0)

    dates = pd.date_range(df_position.index[0], datetime.now() - timedelta(1))
    df_position = df_position.reindex(dates).fillna(method='pad')
    # 计算复合资产净值
    df_nav_portfolio = DFUtil.portfolio_nav(df_inc,
                                            df_position,
                                            result_col='portfolio')
    # df_nav_portfolio.to_csv(datapath('category_nav_' + category + '.csv'))

    df_result = df_nav_portfolio[['portfolio']].rename(columns={
        'portfolio': 'ra_nav'
    }).copy()
    df_result.index.name = 'ra_date'
    df_result['ra_inc'] = df_result['ra_nav'].pct_change().fillna(0.0)
    df_result['ra_asset_id'] = asset['globalid']
    df_result = df_result.reset_index().set_index(['ra_asset_id', 'ra_date'])

    df_new = df_result.apply(format_nav_and_inc)

    # 加载旧数据
    t2 = Table('ra_composite_asset_nav',
               MetaData(bind=db['asset']),
               autoload=True)
    columns2 = [
        t2.c.ra_asset_id,
        t2.c.ra_date,
        t2.c.ra_nav,
        t2.c.ra_inc,
    ]
    stmt_select = select(columns2, (t2.c.ra_asset_id == asset['globalid']))
    df_old = pd.read_sql(stmt_select,
                         db['asset'],
                         index_col=['ra_asset_id', 'ra_date'],
                         parse_dates=['ra_date'])
    if not df_old.empty:
        df_old = df_old.apply(format_nav_and_inc)

    # 更新数据库
    database.batch(db['asset'], t2, df_new, df_old, timestamp=False)
Ejemplo n.º 10
0
def import_pool_sample(ctx, optpath):

    df = pd.read_csv(optpath.strip(), index_col=['ra_pool_id'])
    db = database.connection('asset')
    t = Table('ra_pool_sample', MetaData(bind=db), autoload=True)
    columns = [literal_column(c) for c in (df.index.names + list(df.columns))]

    for pool_id in set(df.index):
        s = select(columns, (t.c.ra_pool_id == pool_id))
        df_old = pd.read_sql(s, db, index_col=['ra_pool_id'])
        database.batch(db, t, df, df_old, timestamp=True)
Ejemplo n.º 11
0
def wavelet(ctx, startdate, enddate, viewid, idx, wavenum, max_wave_num,
            wave_name):

    if idx is None:
        #idx = ['120000001', '120000002', '120000013', '120000014', '120000015', '120000080' ,'ERI000001', 'ERI000002']
        idx = ['120000001']
    for _id in idx:
        trade_dates = ATradeDate.trade_date()
        nav = Asset(_id).nav(reindex=trade_dates).fillna(method='pad').dropna()
        dates = nav.index[1000:]
        views = []
        for d in dates:
            _tmp_nav = nav[nav.index <= d]
            wave_nav = wavefilter(_tmp_nav,
                                  wavenum,
                                  wname=wave_name,
                                  maxlevel=max_wave_num)
            inc = wave_nav.pct_change().fillna(0.0).iloc[-120:]
            wave_diff_rolling = wave_nav.diff().rolling(5).mean()
            views.append(wave_diff_rolling[-1])
            #print(d, views[-1])
            print(_id, d, inc.mean(), inc.std())

        view_df = pd.DataFrame(views, index=dates, columns=['bl_view'])
        view_df[view_df > 0] = 1.0
        view_df[view_df < 0] = 0
        view_df.index.name = 'bl_date'
        view_df['globalid'] = viewid
        view_df['bl_index_id'] = _id
        view_df['created_at'] = datetime.now()
        view_df['updated_at'] = datetime.now()
        df_new = view_df.reset_index().set_index(
            ['globalid', 'bl_date', 'bl_index_id'])

        #print(df_new.head())

        db = database.connection('asset')
        metadata = MetaData(bind=db)
        t = Table('ra_bl_view', metadata, autoload=True)
        columns = [
            t.c.globalid,
            t.c.bl_date,
            t.c.bl_view,
            t.c.bl_index_id,
            t.c.created_at,
            t.c.updated_at,
        ]
        s = select(columns).where(t.c.globalid == viewid).where(
            t.c.bl_index_id == _id)
        df_old = pd.read_sql(s,
                             db,
                             index_col=['globalid', 'bl_date', 'bl_index_id'],
                             parse_dates=['bl_date'])
        database.batch(db, t, df_new, df_old, timestamp=False)
Ejemplo n.º 12
0
def corr_update_category(pool, category, lookback):
    # 加载基金列表
    df = load_fund_category(pool['id'], category)

    data = {}
    for k0, v0 in df.groupby(level=0):
        index = DBData.trade_date_lookback_index(end_date=k0,
                                                 lookback=lookback)
        start_date = index.min().strftime("%Y-%m-%d")
        end_date = k0

        df_nav = DBData.db_fund_value_daily(start_date,
                                            end_date,
                                            codes=v0['ra_fund_code'])
        df_inc = df_nav.pct_change().fillna(0.0)
        df_corr = df_inc.corr()
        df_corr.fillna(0.0, inplace=True)

        if df_corr.empty:
            data[k0] = 0.0
        else:
            data[k0] = np.mean(df_corr.values)

    df_new = pd.DataFrame({'ra_corr': data})
    df_new.index.name = 'ra_date'
    df_new['ra_pool'] = pool['id']
    df_new['ra_category'] = category

    df_new = df_new.reset_index().set_index(
        ['ra_pool', 'ra_category', 'ra_date'])

    df_new = df_new.applymap("{:.4f}".format)

    db = database.connection('asset')
    # 加载旧数据
    t2 = Table('ra_pool_criteria', MetaData(bind=db), autoload=True)
    columns2 = [
        t2.c.ra_pool,
        t2.c.ra_category,
        t2.c.ra_date,
        t2.c.ra_corr,
    ]
    stmt_select = select(columns2, (t2.c.ra_pool == pool['id']) &
                         (t2.c.ra_category == category))
    df_old = pd.read_sql(stmt_select,
                         db,
                         index_col=['ra_pool', 'ra_category', 'ra_date'],
                         parse_dates=['ra_date'])
    if not df_old.empty:
        df_old = df_old.applymap("{:.4f}".format)

    # 更新数据库
    database.batch(db, t2, df_new, df_old, timestamp=False)
Ejemplo n.º 13
0
def sp_view_update(ctx, startdate, enddate, viewid):
    #backtest_interval = pd.date_range(startdate, enddate)
    mv = cal_sp_view()
    mv = mv.resample('d').last().fillna(method='pad')
    mv = mv.rolling(30).mean().dropna()
    print(mv.tail())

    today = datetime.now()
    mv_view_id = np.repeat(viewid, len(mv))
    mv_date = mv.index
    mv_inc = mv.view
    created_at = np.repeat(today, len(mv))
    updated_at = np.repeat(today, len(mv))
    #df_inc_value = np.column_stack([mv_view_id, mv_date, mv_inc, created_at, updated_at])
    #df_inc = pd.DataFrame(df_inc_value, columns = ['mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at'])
    union_mv = {}
    union_mv['globalid'] = mv_view_id
    union_mv['bl_date'] = mv_date
    union_mv['bl_view'] = np.sign(mv_inc)
    union_mv['created_at'] = created_at
    union_mv['updated_at'] = updated_at
    union_mv_df = pd.DataFrame(
        union_mv,
        columns=['globalid', 'bl_date', 'bl_view', 'created_at', 'updated_at'])

    for index_id in ['120000013', 'ERI000001']:

        df_new = union_mv_df
        df_new['bl_index_id'] = index_id
        df_new = df_new.set_index(['globalid', 'bl_date', 'bl_index_id'])

        db = database.connection('asset')
        metadata = MetaData(bind=db)
        t = Table('ra_bl_view', metadata, autoload=True)
        columns = [
            t.c.globalid,
            t.c.bl_date,
            t.c.bl_view,
            t.c.bl_index_id,
            t.c.created_at,
            t.c.updated_at,
        ]
        s = select(columns).where(t.c.globalid == viewid).where(
            t.c.bl_index_id == index_id)
        df_old = pd.read_sql(s,
                             db,
                             index_col=['globalid', 'bl_date', 'bl_index_id'],
                             parse_dates=['bl_date'])
        database.batch(db, t, df_new, df_old, timestamp=False)

        print(df_new.tail())
Ejemplo n.º 14
0
def saveFactorCovariance(matAll, names, dates):

    dates.sort()
    factorNum = np.shape(matAll)[1]
    dfList = []
    k = 0
    for date in dates:
        mat = matAll[k, :, :].reshape(factorNum, factorNum)
        dfTmp = pd.DataFrame(
            columns=['bf_date', 'bf_factor1', 'bf_factor2', 'bf_cov'])
        dfFactor1 = list()
        dfFactor2 = list()
        covij = list()
        i = 0
        for name1 in names:
            j = i
            for name2 in names[i:]:
                dfFactor1.append(name1)
                dfFactor2.append(name2)
                covij.append(mat[i, j])
                j += 1
            i += 1
        dfTmp['bf_factor1'] = dfFactor1
        dfTmp['bf_factor2'] = dfFactor2
        dfTmp['bf_cov'] = covij
        dfTmp['bf_date'] = date
        dfList.append(dfTmp)
        k += 1
    df = pd.concat(dfList, axis=0)
    df['bf_date'] = df['bf_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    df.set_index(['bf_date', 'bf_factor1', 'bf_factor2'], inplace=True)

    db = create_engine(uris['factor'])
    meta = MetaData(bind=db)
    t = Table('barra_factor_covariance', meta, autoload=True)
    columns = [
        t.c.bf_date,
        t.c.bf_factor1,
        t.c.bf_factor2,
        t.c.bf_cov,
    ]
    sql = select(columns)
    sql = sql.where(t.c.bf_date.in_(dates))
    dfBase = pd.read_sql(sql, db)
    dfBase['bf_date'] = dfBase['bf_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    dfBase.set_index(['bf_date', 'bf_factor1', 'bf_factor2'], inplace=True)

    database.batch(db, t, df, dfBase, timestamp=False)
Ejemplo n.º 15
0
def saveStockCovariance(dfAll, dates):

    dates.sort()
    dfList = []
    k = 0
    for date in dates:
        df = pd.DataFrame(
            columns=['bc_date', 'bc_stock1', 'bc_stock2', 'bc_cov'])
        names = list(dfAll[k].columns)
        dfStock1 = list()
        dfStock2 = list()
        covij = list()
        i = 0
        for name1 in names:
            j = i
            for name2 in names[i:]:
                dfStock1.append(name1)
                dfStock2.append(name2)
                covij.append(dfAll[k].iloc[i, j])
                j += 1
            i += 1
        df['bc_stock1'] = dfStock1
        df['bc_stock2'] = dfStock2
        df['bc_cov'] = covij
        df['bc_date'] = date
        df['bc_date'] = df['bc_date'].map(
            lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
        df.set_index(['bc_date', 'bc_stock1', 'bc_stock2'], inplace=True)
        dfList.append(df)
        k += 1
    dfNew = pd.concat(dfList, axis=0)

    db = create_engine(uris['factor'])
    meta = MetaData(bind=db)
    t = Table('barra_covariance', meta, autoload=True)
    columns = [
        t.c.bc_date,
        t.c.bc_stock1,
        t.c.bc_stock2,
        t.c.bc_cov,
    ]
    sql = select(columns)
    sql = sql.where(t.c.bc_date.in_(dates))
    dfBase = pd.read_sql(sql, db)
    dfBase['bc_date'] = dfBase['bc_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    dfBase.set_index(['bc_date', 'bc_stock1', 'bc_stock2'], inplace=True)

    database.batch(db, t, dfNew, dfBase, timestamp=False)
Ejemplo n.º 16
0
def fund_update_corr_jensen(pool, adjust_points, optlimit, optcalc):
    ''' re calc fund for single fund pool
    '''
    lookback = pool.ra_lookback
    limit = optlimit

    if optcalc:
        #
        # 计算每个调仓点的最新配置
        #

        db = database.connection('asset')
        ra_pool_sample_t = Table('ra_pool_sample',
                                 MetaData(bind=db),
                                 autoload=True)
        ra_pool_fund_t = Table('ra_pool_fund',
                               MetaData(bind=db),
                               autoload=True)

        data = []
        with click.progressbar(length=len(adjust_points),
                               label='calc pool %s' % (pool.id)) as bar:
            for day in adjust_points:
                bar.update(1)
                codes = pool_by_corr_jensen(pool, day, lookback, limit)
                print day, codes
                if codes is None or len(codes) == 0:
                    continue
                ra_fund = base_ra_fund.load(codes=codes)
                ra_fund = ra_fund.set_index(['ra_code'])
                ra_pool = pool['id']
                for code in ra_fund.index:
                    ra_fund_id = ra_fund.loc[code, 'globalid']
                    data.append([ra_pool, day, ra_fund_id, code])
        fund_df = pd.DataFrame(
            data, columns=['ra_pool', 'ra_date', 'ra_fund_id', 'ra_fund_code'])
        fund_df = fund_df.set_index(['ra_pool', 'ra_date', 'ra_fund_id'])

        df_new = fund_df
        columns = [
            literal_column(c)
            for c in (df_new.index.names + list(df_new.columns))
        ]
        s = select(columns)
        s = s.where(
            ra_pool_fund_t.c.ra_pool.in_(
                df_new.index.get_level_values(0).tolist()))
        df_old = pd.read_sql(s, db, index_col=df_new.index.names)
        database.batch(db, ra_pool_fund_t, df_new, df_old)
Ejemplo n.º 17
0
def fund_update_monetary_pool(pool, adjust_points, optlimit, optcalc):
    ''' re calc fund for single fund pool
    '''
    lookback = pool.ra_lookback
    limit = optlimit

    if optcalc:
        #
        # 计算每个调仓点的最新配置
        #
        data = []

        with click.progressbar(length=len(adjust_points), label='calc pool %s' % (pool.id)) as bar:
            for day in adjust_points:
                bar.update(1)
                mnf = MonetaryFundFilter()
                mnf.handle()
                asset_ids = MonetaryAllocate.all_monetary_fund_globalid()
                trade_dates = ATradeDate.week_trade_date()
                dict_nav = {}
                for asset_id in asset_ids:
                    dict_nav[asset_id] = Asset.load_nav_series(asset_id, reindex=trade_dates)
                df_nav = pd.DataFrame(dict_nav).fillna(method='pad')
                df_inc = df_nav.pct_change().fillna(0.0)

                codes = pool_by_scale_return(pool, day, lookback, limit, mnf, df_inc)

                print(day, len(codes), codes)
                if codes is None or len(codes) == 0:
                    continue
                ra_fund = base_ra_fund.load(globalids=codes)
                ra_fund = ra_fund.set_index(['ra_code'])
                ra_pool = pool['id']
                for code in ra_fund.index:
                    ra_fund_id = ra_fund.loc[code, 'globalid']
                    data.append([ra_pool, day, ra_fund_id, code])

        fund_df = pd.DataFrame(data, columns = ['ra_pool', 'ra_date', 'ra_fund_id', 'ra_fund_code'])
        fund_df = fund_df.set_index(['ra_pool', 'ra_date', 'ra_fund_id'])

        df_new = fund_df
        columns = [literal_column(c) for c in (df_new.index.names + list(df_new.columns))]
        s = select(columns)
        db = database.connection('asset')
        ra_pool_fund_t = Table('ra_pool_fund', MetaData(bind=db), autoload=True)
        s = s.where(ra_pool_fund_t.c.ra_pool.in_(df_new.index.get_level_values(0).tolist()))
        df_old = pd.read_sql(s, db, index_col = df_new.index.names)
        database.batch(db, ra_pool_fund_t, df_new, df_old)
def fc_update_nav(ctx, optid):

    lookback_days = 365
    blacklist = [24, 32, 40]
    factor_ids = [
        '1200000%02d' % i for i in range(1, 40) if i not in blacklist
    ]
    trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01')
    date = trade_dates[-1]

    start_date = (date -
                  datetime.timedelta(lookback_days)).strftime('%Y-%m-%d')
    end_date = date.strftime('%Y-%m-%d')
    corr0 = load_ind(factor_ids, start_date, end_date)
    res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100)
    asset_cluster = res[1]
    asset_cluster = dict(
        zip(sorted(asset_cluster), sorted(asset_cluster.values())))

    factor_name = base_ra_index.load()
    for k, v in asset_cluster.iteritems():
        v = np.array(v).astype('int')
        print factor_name.loc[v]

    assets = {}
    for factor_id in factor_ids:
        assets[factor_id] = Asset.load_nav_series(factor_id)
    df_assets = pd.DataFrame(assets)

    db = database.connection('asset')
    metadata = MetaData(bind=db)
    t = Table('ra_composite_asset_nav', metadata, autoload=True)

    for layer in asset_cluster.keys():
        layer_id = 'FC.000001.%d' % (layer + 1)
        layer_assets = asset_cluster[layer]
        layer_nav = df_assets.loc[:, layer_assets]
        layer_ret = layer_nav.pct_change().dropna()
        layer_ret = layer_ret.mean(1)
        layer_ret = layer_ret.reset_index()
        layer_ret.columns = ['ra_date', 'ra_inc']
        layer_ret['ra_nav'] = (1 + layer_ret['ra_inc']).cumprod()
        layer_ret['ra_asset_id'] = layer_id
        df_new = layer_ret.set_index(['ra_asset_id', 'ra_date'])
        df_old = asset_ra_composite_asset_nav.load_nav(layer_id)
        df_new = df_new.reindex(columns=['ra_nav', 'ra_inc'])
        database.batch(db, t, df_new, df_old, timestamp=False)
Ejemplo n.º 19
0
def saveStockFlr(df, dates):

    db = create_engine(uris['factor'])
    meta = MetaData(bind=db)
    t = Table('barra_resid_fluctuation_ratio', meta, autoload=True)
    columns = [
        t.c.br_date,
        t.c.br_stock_id,
        t.c.br_flr,
    ]
    sql = select(columns)
    sql = sql.where(t.c.br_date.in_(dates))
    dfBase = pd.read_sql(sql, db)
    dfBase['br_date'] = dfBase['br_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    dfBase.set_index(['br_date', 'br_stock_id'], inplace=True)

    database.batch(db, t, df, dfBase, timestamp=False)
Ejemplo n.º 20
0
def turnover_update_category(pool, category):
    # 加载基金列表
    df = load_fund_category(pool['id'], category)

    # 构建均分仓位
    df['mask'] = 1
    df.set_index('ra_fund_code', append=True, inplace=True)
    df = df.unstack(fill_value=0)
    df_prev = df.shift(1).fillna(0).astype(int)

    df_prev.iloc[0] = df.iloc[0]

    df_and = np.bitwise_and(df, df_prev)

    df_new = (1 -
              df_and.sum(axis=1) / df_prev.sum(axis=1)).to_frame('ra_turnover')
    df_new['ra_pool'] = pool['id']
    df_new['ra_category'] = category

    df_new = df_new.reset_index().set_index(
        ['ra_pool', 'ra_category', 'ra_date'])

    df_new = df_new.applymap("{:.4f}".format)

    db = database.connection('asset')
    # 加载旧数据
    t2 = Table('ra_pool_criteria', MetaData(bind=db), autoload=True)
    columns2 = [
        t2.c.ra_pool,
        t2.c.ra_category,
        t2.c.ra_date,
        t2.c.ra_turnover,
    ]
    stmt_select = select(columns2, (t2.c.ra_pool == pool['id']) &
                         (t2.c.ra_category == category))
    df_old = pd.read_sql(stmt_select,
                         db,
                         index_col=['ra_pool', 'ra_category', 'ra_date'],
                         parse_dates=['ra_date'])
    if not df_old.empty:
        df_old = df_old.applymap("{:.4f}".format)

    # 更新数据库
    database.batch(db, t2, df_new, df_old, timestamp=False)
Ejemplo n.º 21
0
def saveFactorCovariance(mat, names, date, sdate=None, edate=None):

    df = pd.DataFrame(
        columns=['bf_date', 'bf_factor1', 'bf_factor2', 'bf_cov'])
    dfFactor1 = list()
    dfFactor2 = list()
    covij = list()
    i = 0
    for name1 in names:
        j = i
        for name2 in names[i:]:
            dfFactor1.append(name1)
            dfFactor2.append(name2)
            covij.append(mat[i, j])
            j += 1
        i += 1
    df['bf_factor1'] = dfFactor1
    df['bf_factor2'] = dfFactor2
    df['bf_cov'] = covij
    df['bf_date'] = date
    df['bf_date'] = df['bf_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    df.set_index(['bf_date', 'bf_factor1', 'bf_factor2'], inplace=True)

    db = create_engine(uris['factor'])
    meta = MetaData(bind=db)
    t = Table('barra_factor_covariance', meta, autoload=True)
    columns = [
        t.c.bf_date,
        t.c.bf_factor1,
        t.c.bf_factor2,
        t.c.bf_cov,
    ]
    sql = select(columns)
    if sdate != None:
        sql = sql.where(t.c.bf_date >= sdate)
    if edate != None:
        sql = sql.where(t.c.bf_date <= edate)
    dfBase = pd.read_sql(sql, db)
    dfBase['bf_date'] = dfBase['bf_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    dfBase.set_index(['bf_date', 'bf_factor1', 'bf_factor2'], inplace=True)

    database.batch(db, t, df, dfBase, timestamp=False)
Ejemplo n.º 22
0
def gold_view_update2(ctx, startdate, enddate, viewid):
    #backtest_interval = pd.date_range(startdate, enddate)
    mv = cal_gold_view2()
    mv = mv.resample('d').last().fillna(method='pad')
    mv['view'] = mv['view'].shift(15)

    today = datetime.now()
    mv_view_id = np.repeat(viewid, len(mv))
    mv_date = mv.index
    mv_inc = mv.view
    created_at = np.repeat(today, len(mv))
    updated_at = np.repeat(today, len(mv))
    #df_inc_value = np.column_stack([mv_view_id, mv_date, mv_inc, created_at, updated_at])
    #df_inc = pd.DataFrame(df_inc_value, columns = ['mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at'])
    union_mv = {}
    union_mv['mc_view_id'] = mv_view_id
    union_mv['mc_date'] = mv_date
    union_mv['mc_inc'] = mv_inc
    union_mv['created_at'] = created_at
    union_mv['updated_at'] = updated_at
    union_mv_df = pd.DataFrame(union_mv,
                               columns=[
                                   'mc_view_id', 'mc_date', 'mc_inc',
                                   'created_at', 'updated_at'
                               ])
    df_new = union_mv_df.set_index(['mc_view_id', 'mc_date'])

    db = database.connection('asset')
    metadata = MetaData(bind=db)
    t = Table('mc_view_strength', metadata, autoload=True)
    columns = [
        t.c.mc_view_id,
        t.c.mc_date,
        t.c.mc_inc,
        t.c.created_at,
        t.c.updated_at,
    ]
    s = select(columns, (t.c.mc_view_id == viewid))
    df_old = pd.read_sql(s,
                         db,
                         index_col=['mc_view_id', 'mc_date'],
                         parse_dates=['mc_date'])
    database.batch(db, t, df_new, df_old, timestamp=False)
Ejemplo n.º 23
0
def bond_view_update(ctx, startdate, enddate, viewid):
    backtest_interval = pd.date_range(startdate, enddate)
    mv = ir_view(backtest_interval)
    mv = mv.rolling(60).apply(lambda x: sum(x) / 10 - 1).fillna(0.0)

    today = datetime.now()
    mv_view_id = np.repeat(viewid, len(mv))
    mv_date = mv.index
    mv_inc = mv.irv.values
    created_at = np.repeat(today, len(mv))
    updated_at = np.repeat(today, len(mv))
    #df_inc_value = np.column_stack([mv_view_id, mv_date, mv_inc, created_at, updated_at])
    #df_inc = pd.DataFrame(df_inc_value, columns = ['mc_view_id', 'mc_date', 'mc_inc', 'created_at', 'updated_at'])
    union_mv = {}
    union_mv['mc_view_id'] = mv_view_id
    union_mv['mc_date'] = mv_date
    union_mv['mc_inc'] = mv_inc
    union_mv['created_at'] = created_at
    union_mv['updated_at'] = updated_at
    union_mv_df = pd.DataFrame(union_mv,
                               columns=[
                                   'mc_view_id', 'mc_date', 'mc_inc',
                                   'created_at', 'updated_at'
                               ])
    df_new = union_mv_df.set_index(['mc_view_id', 'mc_date'])

    db = database.connection('asset')
    metadata = MetaData(bind=db)
    t = Table('mc_view_strength', metadata, autoload=True)
    columns = [
        t.c.mc_view_id,
        t.c.mc_date,
        t.c.mc_inc,
        t.c.created_at,
        t.c.updated_at,
    ]
    s = select(columns).where(t.c.globalid == mc_view_id)
    df_old = pd.read_sql(s,
                         db,
                         index_col=['mc_view_id', 'mc_date'],
                         parse_dates=['mc_date'])
    database.batch(db, t, df_new, df_old, timestamp=False)
Ejemplo n.º 24
0
def filter_fund_by_list_to_pool_sample(ctx, optfundtype, optinpath, optpoolid):

    df = pd.read_csv(optinpath.strip())
    codes_set = set()
    for code in df['codes'].values:
        codes_set.add('%06d' % int(code))

    fund_type = int(optfundtype.strip())

    db = database.connection('base')
    t = Table('ra_fund', MetaData(bind=db), autoload=True)
    columns = [
        t.c.ra_code,
    ]
    s = select(columns)
    s = s.where(t.c.ra_type == fund_type)
    df = pd.read_sql(s, db)

    final_codes = []
    for code in df['ra_code'].values:
        code = '%06d' % int(code)
        if code in codes_set:
            final_codes.append(code)

    print final_codes

    df = pd.DataFrame(final_codes, columns=['ra_fund_code'])
    df['ra_pool_id'] = optpoolid.strip()
    df = df.set_index(['ra_pool_id', 'ra_fund_code'])

    db = database.connection('asset')
    t = Table('ra_pool_sample', MetaData(bind=db), autoload=True)
    columns = [literal_column(c) for c in (df.index.names + list(df.columns))]

    for pool_id in set(df.index.get_level_values(0)):
        s = select(columns, (t.c.ra_pool_id == pool_id))
        df_old = pd.read_sql(s, db, index_col=['ra_pool_id', 'ra_fund_code'])
        print df
        database.batch(db, t, df, df_old, timestamp=True)
Ejemplo n.º 25
0
def saveStockVariance(dfAll, dates):

    dates.sort()
    n = len(dfAll)
    dfList = []
    for i in range(n):
        dfTmp = pd.DataFrame(columns=['bs_date', 'bs_stock_id', 'bs_var'])
        stocks = list()
        stockVars = list()
        k = 0
        for column in dfAll[i].columns:
            stockVars.append(dfAll[i].iloc[k, k])
            stocks.append(column)
            k += 1
        dfTmp['bs_stock_id'] = stocks
        dfTmp['bs_var'] = stockVars
        dfTmp['bs_date'] = dates[i]
        dfList.append(dfTmp)
    df = pd.concat(dfList, axis=0)
    df['bs_date'] = df['bs_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    df.set_index(['bs_date', 'bs_stock_id'], inplace=True)

    db = create_engine(uris['factor'])
    meta = MetaData(bind=db)
    t = Table('barra_stock_variance', meta, autoload=True)
    columns = [
        t.c.bs_date,
        t.c.bs_stock_id,
        t.c.bs_var,
    ]
    sql = select(columns)
    sql = sql.where(t.c.bs_date.in_(dates))
    dfBase = pd.read_sql(sql, db)
    dfBase['bs_date'] = dfBase['bs_date'].map(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    dfBase.set_index(['bs_date', 'bs_stock_id'], inplace=True)

    database.batch(db, t, df, dfBase, timestamp=False)
Ejemplo n.º 26
0
def imp_portf(df):

    db = database.connection('asset')
    metadata = MetaData(bind=db)
    portfolio_t = Table('ra_portfolio', metadata, autoload=True)
    portfolio_alloc_t = Table('ra_portfolio_alloc', metadata, autoload=True)
    portfolio_asset_t = Table('ra_portfolio_asset', metadata, autoload=True)
    portfolio_argv_t = Table('ra_portfolio_argv', metadata, autoload=True)

    df = df.copy()

    portfolio_id = df['ra_portfolio_id'].unique().item()
    portfolio_name = df['ra_portfolio_name'].unique().item()
    highlow_id = df['mz_highlow_id'].unique().item()
    portfolio_df = pd.DataFrame([[portfolio_id, portfolio_name, highlow_id]],
                                columns=['globalid', 'ra_name', 'ra_ratio_id'])
    portfolio_df['ra_type'] = 9
    portfolio_df = portfolio_df.set_index(['globalid'])
    portfolio_df['ra_algo'] = df['ra_portfolio_algo'].unique().item()
    portfolio_df['ra_persistent'] = 0

    df_new = portfolio_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(portfolio_t.c.globalid.in_(df_new.index.tolist()))
    df_old = pd.read_sql(s, db, index_col=df_new.index.names)
    database.batch(db, portfolio_t, df_new, df_old)

    portfolio_alloc_data = []
    portfolio_asset_data = []
    portfolio_argv_data = []

    portfolio_alloc_columns = [
        'globalid', 'ra_name', 'ra_portfolio_id', 'ra_ratio_id', 'ra_risk',
        'ra_type'
    ]
    portfolio_asset_columns = [
        'ra_portfolio_id', 'ra_asset_id', 'ra_asset_name', 'ra_asset_type',
        'ra_pool_id'
    ]
    portfolio_argv_columns = ['ra_portfolio_id', 'ra_key', 'ra_value']

    portfolio_alloc_index = ['globalid']
    portfolio_asset_index = ['ra_portfolio_id', 'ra_asset_id']
    portfolio_argv_index = ['ra_portfolio_id', 'ra_key']

    for k, v in df.groupby(['risk']):

        portfolio_id = v['ra_portfolio_id'].unique().item()
        risk = v['risk'].unique().item()
        portfolio_id_num = portfolio_id.strip().split('.')[1]
        portfolio_risk_id = portfolio_id.replace(
            portfolio_id_num,
            str(string.atoi(portfolio_id_num) + int(risk * 10) % 10))
        highlow_id_num = highlow_id.strip().split('.')[1]
        highlow_risk_id = highlow_id.replace(
            highlow_id_num,
            str(string.atoi(highlow_id_num) + int(risk * 10) % 10))

        portfolio_alloc_data.append([
            portfolio_risk_id, portfolio_name, portfolio_id, highlow_risk_id,
            risk, 9
        ])

        for i in range(0, len(v)):
            record = v.iloc[i]
            asset_id = record['asset_id']
            pool_id = record['pool_id']
            asset_name = find_asset_name(asset_id)
            portfolio_asset_data.append(
                [portfolio_risk_id, asset_id, asset_name, 0, pool_id])

        portfolio_argv_data = []
        for col in v.columns:
            key = col.strip()
            if key.startswith('portfolio'):
                value = str(v[col].unique().item()).strip()
                value = value if not value == 'nan' else ''
                portfolio_argv_data.append([portfolio_risk_id, key, value])

    portfolio_alloc_df = pd.DataFrame(portfolio_alloc_data,
                                      columns=portfolio_alloc_columns)
    portfolio_alloc_df = portfolio_alloc_df.set_index(portfolio_alloc_index)

    portfolio_asset_df = pd.DataFrame(portfolio_asset_data,
                                      columns=portfolio_asset_columns)
    portfolio_asset_df = portfolio_asset_df.set_index(portfolio_asset_index)

    portfolio_argv_df = pd.DataFrame(portfolio_argv_data,
                                     columns=portfolio_argv_columns)
    portfolio_argv_df = portfolio_argv_df.set_index(portfolio_argv_index)

    #print portfolio_alloc_df
    #print portfolio_asset_df
    #print portfolio_argv_df

    df_new = portfolio_alloc_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        portfolio_alloc_t.c.ra_portfolio_id.in_(v['ra_portfolio_id'].ravel()))
    df_old = pd.read_sql(s, db, index_col=[df_new.index.name])
    database.batch(db, portfolio_alloc_t, df_new, df_old)

    df_new = portfolio_asset_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        portfolio_asset_t.c.ra_portfolio_id.in_(
            df_new.index.get_level_values(0).tolist()))
    #s = s.where(portfolio_asset_t.c.ra_asset_id.in_(df_new.index.get_level_values(1).tolist()))
    df_old = pd.read_sql(s, db, index_col=df_new.index.names)
    database.batch(db, portfolio_asset_t, df_new, df_old)

    df_new = portfolio_argv_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        portfolio_argv_t.c.ra_portfolio_id.in_(
            df_new.index.get_level_values(0).tolist()))
    #s = s.where(portfolio_argv_t.c.ra_key.in_(df_new.index.get_level_values(1).tolist()))
    df_old = pd.read_sql(s, db, index_col=df_new.index.names)
    database.batch(db, portfolio_argv_t, df_new, df_old)
Ejemplo n.º 27
0
def imp_markowitz(df):

    db = database.connection('asset')
    metadata = MetaData(bind=db)
    markowitz_t = Table('mz_markowitz', metadata, autoload=True)
    markowitz_alloc_t = Table('mz_markowitz_alloc', metadata, autoload=True)
    markowitz_asset_t = Table('mz_markowitz_asset', metadata, autoload=True)
    markowitz_argv_t = Table('mz_markowitz_argv', metadata, autoload=True)

    df = df.copy()

    markowitz_id = df['mz_markowitz_id'].unique().item()
    markowitz_name = df['mz_markowitz_name'].unique().item()
    markowitz_algo = 0
    markowitz_df = pd.DataFrame(
        [[markowitz_id, markowitz_name, markowitz_algo]],
        columns=['globalid', 'mz_name', 'mz_algo'])
    markowitz_df['mz_type'] = 9
    markowitz_df = markowitz_df.set_index(['globalid'])

    df_new = markowitz_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(markowitz_t.c.globalid.in_(df_new.index.tolist()))
    df_old = pd.read_sql(s, db, index_col=[df_new.index.name])
    database.batch(db, markowitz_t, df_new, df_old)

    markowitz_alloc_data = []
    markowitz_asset_data = []
    markowitz_argv_data = []

    markowitz_alloc_columns = [
        'globalid', 'mz_markowitz_id', 'mz_name', 'mz_type', 'mz_algo',
        'mz_risk'
    ]
    markowitz_asset_columns = [
        'mz_markowitz_id', 'mz_asset_id', 'mz_asset_name',
        'mz_markowitz_asset_id', 'mz_markowitz_asset_name', 'mz_asset_type',
        'mz_upper_limit', 'mz_lower_limit', 'mz_sum1_limit', 'mz_sum2_limit'
    ]
    markowitz_argv_columns = ['mz_markowitz_id', 'mz_key', 'mz_value']

    markowitz_alloc_index = ['globalid']
    markowitz_asset_index = ['mz_markowitz_id', 'mz_markowitz_asset_id']
    markowitz_argv_index = ['mz_markowitz_id', 'mz_key']

    for k, v in df.groupby(['risk']):

        markowitz_id = v['mz_markowitz_id'].unique().item()
        risk = v['risk'].unique().item()
        markowitz_id_num = markowitz_id.strip().split('.')[1]
        markowitz_risk_id = markowitz_id.replace(
            markowitz_id_num,
            str(string.atoi(markowitz_id_num) + int(risk * 10) % 10))
        markowitz_algo = v['allocate_algo'].unique().item()

        markowitz_alloc_data.append([
            markowitz_risk_id, markowitz_id, markowitz_name, 9, markowitz_algo,
            risk
        ])

        for i in range(0, len(v)):
            record = v.iloc[i]
            asset_id = record['asset_id']
            asset_name = find_asset_name(asset_id)
            sum1 = record['sum1']
            sum2 = record['sum2']
            lower = record['lower']
            upper = record['upper']
            markowitz_asset_data.append([
                markowitz_risk_id, asset_id, asset_name, asset_id, asset_name,
                0, upper, lower, sum1, sum2
            ])

        for col in v.columns:
            key = col.strip()
            if key.startswith('allocate'):
                value = str(v[col].unique().item()).strip()
                value = value if not value == 'nan' else ''
                markowitz_argv_data.append([markowitz_risk_id, key, value])

    markowitz_alloc_df = pd.DataFrame(markowitz_alloc_data,
                                      columns=markowitz_alloc_columns)
    markowitz_alloc_df = markowitz_alloc_df.set_index(markowitz_alloc_index)

    markowitz_asset_df = pd.DataFrame(markowitz_asset_data,
                                      columns=markowitz_asset_columns)
    markowitz_asset_df = markowitz_asset_df.set_index(markowitz_asset_index)

    markowitz_argv_df = pd.DataFrame(markowitz_argv_data,
                                     columns=markowitz_argv_columns)
    markowitz_argv_df = markowitz_argv_df.set_index(markowitz_argv_index)

    #print highlow_alloc_df

    df_new = markowitz_alloc_df
    #print df_new
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        markowitz_alloc_t.c.mz_markowitz_id.in_(
            df_new['mz_markowitz_id'].ravel()))
    #print s.compile(compile_kwargs={"literal_binds": True})
    df_old = pd.read_sql(s, db, index_col=[df_new.index.name])
    database.batch(db, markowitz_alloc_t, df_new, df_old)

    df_new = markowitz_asset_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        markowitz_asset_t.c.mz_markowitz_id.in_(
            df_new.index.get_level_values(0).tolist()))
    #s = s.where(markowitz_asset_t.c.mz_markowitz_asset_id.in_(df_new.index.get_level_values(1).tolist()))
    df_old = pd.read_sql(s, db, index_col=df_new.index.names)
    database.batch(db, markowitz_asset_t, df_new, df_old)

    df_new = markowitz_argv_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        markowitz_argv_t.c.mz_markowitz_id.in_(
            df_new.index.get_level_values(0).tolist()))
    #s = s.where(markowitz_argv_t.c.mz_key.in_(df_new.index.get_level_values(1).tolist()))
    df_old = pd.read_sql(s, db, index_col=df_new.index.names)
    database.batch(db, markowitz_argv_t, df_new, df_old)
Ejemplo n.º 28
0
def imp_highlow(df):

    db = database.connection('asset')
    metadata = MetaData(bind=db)
    highlow_t = Table('mz_highlow', metadata, autoload=True)
    highlow_alloc_t = Table('mz_highlow_alloc', metadata, autoload=True)
    highlow_asset_t = Table('mz_highlow_asset', metadata, autoload=True)
    highlow_argv_t = Table('mz_highlow_argv', metadata, autoload=True)

    df = df.copy()

    highlow_id = df['mz_highlow_id'].unique().item()
    highlow_name = df['mz_highlow_name'].unique().item()
    markowitz_id = df['mz_markowitz_id'].unique().item()
    highlow_df = pd.DataFrame(
        [[highlow_id, highlow_name, markowitz_id]],
        columns=['globalid', 'mz_name', 'mz_markowitz_id'])
    highlow_df['mz_type'] = 9
    highlow_df = highlow_df.set_index(['globalid'])
    highlow_df['mz_algo'] = df['mz_highlow_algo'].unique().item()
    highlow_df['mz_persistent'] = 0

    df_new = highlow_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(highlow_t.c.globalid.in_(df_new.index.tolist()))
    df_old = pd.read_sql(s, db, index_col=df_new.index.names)
    database.batch(db, highlow_t, df_new, df_old)

    highlow_alloc_data = []
    highlow_asset_data = []
    highlow_argv_data = []

    highlow_alloc_columns = [
        'globalid', 'mz_name', 'mz_type', 'mz_highlow_id', 'mz_risk',
        'mz_algo', 'mz_markowitz_id'
    ]
    highlow_asset_columns = [
        'mz_highlow_id', 'mz_asset_id', 'mz_asset_name', 'mz_asset_type',
        'mz_origin_id', 'mz_riskmgr_id', 'mz_pool_id'
    ]
    highlow_argv_columns = ['mz_highlow_id', 'mz_key', 'mz_value']

    highlow_alloc_index = ['globalid']
    highlow_asset_index = ['mz_highlow_id', 'mz_asset_id']
    highlow_argv_index = ['mz_highlow_id', 'mz_key']

    for k, v in df.groupby(['risk']):

        highlow_id = v['mz_highlow_id'].unique().item()
        risk = v['risk'].unique().item()
        highlow_id_num = highlow_id.strip().split('.')[1]
        highlow_risk_id = highlow_id.replace(
            highlow_id_num,
            str(string.atoi(highlow_id_num) + int(risk * 10) % 10))
        highlow_name = v['mz_highlow_name'].unique().item()
        markowitz_id_num = markowitz_id.strip().split('.')[1]
        markowitz_risk_id = markowitz_id.replace(
            markowitz_id_num,
            str(string.atoi(markowitz_id_num) + int(risk * 10) % 10))
        highlow_algo = v['mz_highlow_algo'].unique().item()

        highlow_alloc_data.append([
            highlow_risk_id, highlow_name, 9, highlow_id, risk, highlow_algo,
            markowitz_risk_id
        ])

        for i in range(0, len(v)):
            record = v.iloc[i]
            asset_id = record['asset_id']
            pool_id = record['pool_id']
            riskmgr_id = record['riskmgr_id']
            asset_name = find_asset_name(asset_id)
            highlow_asset_data.append([
                highlow_risk_id, asset_id, asset_name, 0, markowitz_id,
                riskmgr_id, pool_id
            ])

        data = []
        for col in v.columns:
            key = col.strip()
            if key.startswith('highlow'):
                value = str(v[col].unique().item()).strip()
                value = value if not value == 'nan' else ''
                highlow_argv_data.append([highlow_risk_id, key, value])

        #print argv_df

    highlow_alloc_df = pd.DataFrame(highlow_alloc_data,
                                    columns=highlow_alloc_columns)
    highlow_alloc_df = highlow_alloc_df.set_index(highlow_alloc_index)

    highlow_asset_df = pd.DataFrame(highlow_asset_data,
                                    columns=highlow_asset_columns)
    highlow_asset_df = highlow_asset_df.set_index(highlow_asset_index)

    highlow_argv_df = pd.DataFrame(highlow_argv_data,
                                   columns=highlow_argv_columns)
    highlow_argv_df = highlow_argv_df.set_index(highlow_argv_index)

    #print highlow_alloc_df
    #print highlow_asset_df
    #print highlow_argv_df

    df_new = highlow_alloc_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        highlow_alloc_t.c.mz_highlow_id.in_(df_new['mz_highlow_id'].ravel()))
    df_old = pd.read_sql(s, db, index_col=[df_new.index.name])
    database.batch(db, highlow_alloc_t, df_new, df_old)

    df_new = highlow_asset_df.fillna('')
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        highlow_asset_t.c.mz_highlow_id.in_(
            df_new.index.get_level_values(0).tolist()))
    #s = s.where(highlow_asset_t.c.mz_asset_id.in_(df_new.index.get_level_values(1).tolist()))
    df_old = pd.read_sql(s, db, index_col=df_new.index.names)
    database.batch(db, highlow_asset_t, df_new, df_old)

    df_new = highlow_argv_df
    columns = [
        literal_column(c) for c in (df_new.index.names + list(df_new.columns))
    ]
    s = select(columns)
    s = s.where(
        highlow_argv_t.c.mz_highlow_id.in_(
            df_new.index.get_level_values(0).tolist()))
    #s = s.where(highlow_argv_t.c.mz_key.in_(df_new.index.get_level_values(1).tolist()))
    df_old = pd.read_sql(s, db, index_col=df_new.index.names)
    database.batch(db, highlow_argv_t, df_new, df_old)
Ejemplo n.º 29
0
def fund_update(pool, adjust_points, optlimit, opteliminateratio, optcalc):
    ''' re calc fund for single fund pool
    '''
    lookback = pool.ra_lookback
    limit = optlimit

    if optcalc:
        #
        # 计算每个调仓点的最新配置
        #
        data_fund = {}
        with click.progressbar(length=len(adjust_points),
                               label=('calc pool %s' %
                                      (pool.id)).ljust(30)) as bar:
            for day in adjust_points:
                #print day
                bar.update(1)
                if pool['ra_fund_type'] == 1:
                    df_indicator, df_label = LabelAsset.label_asset_stock_per_day(
                        day, lookback, limit)
                else:
                    df_indicator, df_label = LabelAsset.label_asset_bond_per_day(
                        day, lookback, limit)

                fund_dates = np.array(data_fund.keys())
                fund_date = fund_dates[fund_dates < day]
                fund_date = list(fund_date)
                fund_date.sort()
                if len(fund_date) == 0:
                    data_fund[day] = fund_lowliest_elimination(
                        None, df_indicator, df_label, opteliminateratio,
                        optlimit)
                else:
                    data_fund[day] = fund_lowliest_elimination(
                        data_fund[fund_date[-1]], df_indicator, df_label,
                        opteliminateratio, optlimit)

        df_fund = pd.concat(data_fund,
                            names=['ra_date', 'ra_category', 'ra_fund_code'])

        df_new = df_fund.rename(index=DFUtil.categories_types(True),
                                columns={
                                    'date': 'ra_date',
                                    'category': 'ra_category',
                                    'code': 'ra_fund_code',
                                    'sharpe': 'ra_sharpe',
                                    'jensen': 'ra_jensen',
                                    'sortino': 'ra_sortino',
                                    'ppw': 'ra_ppw'
                                })
        df_new.drop('stability', axis=1, inplace=True)

        df_new = df_new.applymap(lambda x: round(x, 4)
                                 if type(x) == float else x)

        codes = df_new.index.get_level_values(2)
        xtab = fund_code_to_globalid(codes)
        df_new['ra_fund_id'] = xtab[df_new.index.get_level_values(
            'ra_fund_code')].values
        df_new['ra_pool'] = pool.id
        df_new['ra_fund_type'] = 1
        df_new['ra_fund_level'] = 1

        df_new.reset_index(inplace=True)
        df_new = df_new.reindex_axis([
            'ra_pool', 'ra_category', 'ra_date', 'ra_fund_id', 'ra_fund_code',
            'ra_fund_type', 'ra_fund_level', 'ra_sharpe', 'ra_jensen',
            'ra_sortino', 'ra_ppw'
        ],
                                     axis='columns')
        df_new.sort_values(
            by=['ra_pool', 'ra_category', 'ra_date', 'ra_fund_id'],
            inplace=True)

        #df_new.to_csv(datapath('pool_%s.csv' % (pool['id'])), index=False)

    else:
        df_new = pd.read_csv(datapath('pool_%s.csv' % (pool['id'])),
                             parse_dates=['ra_date'],
                             dtype={'ra_fund_code': str})

    df_new.set_index(['ra_pool', 'ra_category', 'ra_date', 'ra_fund_id'],
                     inplace=True)
    df_new = df_new.applymap(lambda x: '%.4f' % (x) if type(x) == float else x)

    db = database.connection('asset')
    ra_pool_fund = Table('ra_pool_fund', MetaData(bind=db), autoload=True)

    # 加载就数据
    columns2 = [
        ra_pool_fund.c.ra_pool,
        ra_pool_fund.c.ra_category,
        ra_pool_fund.c.ra_date,
        ra_pool_fund.c.ra_fund_id,
        ra_pool_fund.c.ra_fund_code,
        ra_pool_fund.c.ra_fund_type,
        ra_pool_fund.c.ra_fund_level,
        ra_pool_fund.c.ra_sharpe,
        ra_pool_fund.c.ra_jensen,
        ra_pool_fund.c.ra_sortino,
        ra_pool_fund.c.ra_ppw,
    ]
    stmt_select = select(columns2, ra_pool_fund.c.ra_pool == pool.id)
    df_old = pd.read_sql(
        stmt_select,
        db,
        index_col=['ra_pool', 'ra_category', 'ra_date', 'ra_fund_id'])
    if not df_old.empty:
        df_old = df_old.applymap(lambda x: '%.4f' % (x)
                                 if type(x) == float else x)

    database.batch(db, ra_pool_fund, df_new, df_old)
Ejemplo n.º 30
0
def handle(sdate, edate):

    sdate = pd.Timestamp(sdate).strftime('%Y-%m-%d')
    edate = pd.Timestamp(edate).strftime('%Y-%m-%d')
    #
    styleFactors.sort()
    industryFactors.sort()

    # load daily returns of every stocks
    db = create_engine(uris['wind'])
    meta = MetaData(bind=db)
    t = Table('ashareeodprices', meta, autoload=True)
    columns = [t.c.S_INFO_WINDCODE, t.c.TRADE_DT, t.c.S_DQ_ADJCLOSE]
    sql = select(columns)
    #sql = sql.where(t.c.S_DQ_TRADESTATUS != '停牌').where(t.c.S_DQ_TRADESTATUS != '待核查')
    sql = sql.where(t.c.TRADE_DT <= pd.Timestamp(edate).strftime('%Y%m%d'))
    sql = sql.where(t.c.TRADE_DT >= pd.Timestamp(
        datetime.strptime(sdate, '%Y-%m-%d') -
        timedelta(days=100)).strftime('%Y%m%d'))
    dfAdjClose = pd.read_sql(sql, db)
    dfAdjClose.TRADE_DT = dfAdjClose.TRADE_DT.apply(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))

    if len(dfAdjClose[dfAdjClose.TRADE_DT >= sdate]) == 0:
        print('failed! there is no stock data between sdate and edate!')
        exit()

    # load factor exposures of every stocks
    db = create_engine(uris['factor'])
    sdateExposure = pd.Timestamp(
        datetime.strptime(sdate, '%Y-%m-%d') -
        timedelta(days=100)).strftime('%Y-%m-%d')
    sql = "select * from `factor_exposure_barra_20200220` where trade_date >= '" + sdateExposure + "' and trade_date <='" + edate + "'"
    dfExposure = pd.read_sql(sql, db)
    dfExposure.trade_date = dfExposure.trade_date.apply(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))

    if len(dfExposure) <= 1:
        print(
            'data is weird! please change sdate and edate and check exposure table!'
        )
        exit()

    sdateExposureListRaw = list(set(dfExposure.trade_date))
    helperList = [x for x in sdateExposureListRaw if x < sdate]
    if len(helperList) == 0:
        sdateExposureListRaw.sort()
        sdate = sdateExposureListRaw[1]
        helperList = sdateExposureListRaw[0:1]
    sdateNew = max(helperList)
    sdateExposureList = [x for x in sdateExposureListRaw if x >= sdateNew]
    sdateExposureList.sort()
    sdateExposureListNew = [x for x in sdateExposureListRaw if x >= sdate]

    if len(dfAdjClose[dfAdjClose.TRADE_DT > max(sdateExposureList)]) != 0:
        maxRealSdate = min(dfAdjClose[
            dfAdjClose.TRADE_DT > max(sdateExposureList)]['TRADE_DT'])
        sdateExposureListNew.append(maxRealSdate)
        sdateExposureListNew.sort()
        dfExposure = dfExposure[dfExposure.trade_date >= sdateNew].sort_values(
            by='trade_date', ascending=True)
        dfExposure.trade_date = dfExposure.trade_date.apply(
            lambda x: sdateExposureListNew[sdateExposureList.index(x)])
    else:
        sdateExposureListNew.sort()
        dfExposure = dfExposure[(dfExposure.trade_date >= sdateNew) & (
            dfExposure.trade_date < max(sdateExposureList))].sort_values(
                by='trade_date', ascending=True)
        dfExposure.trade_date = dfExposure.trade_date.apply(
            lambda x: sdateExposureListNew[sdateExposureList.index(x)])

    # it is necessary to make sure that stocks are both included in exposure table and wind table
    stocks = set(dfExposure['stock_id']).intersection(
        set(dfAdjClose['S_INFO_WINDCODE']))
    dfExposure = dfExposure[dfExposure['stock_id'].isin(stocks)]
    dfExposureG = dfExposure.groupby('trade_date')

    dfAdjClose = dfAdjClose[dfAdjClose['S_INFO_WINDCODE'].isin(stocks)]
    dfAdjCloseG = dfAdjClose.groupby('S_INFO_WINDCODE')
    dfAdjClose = pd.DataFrame(
        columns=['pct_change', 'S_INFO_WINDCODE', 'TRADE_DT', 'S_DQ_ADJCLOSE'])
    dfList = []
    for stock in stocks:
        dfTmp = dfAdjCloseG.get_group(stock).copy()
        dfTmp.sort_values(by='TRADE_DT', ascending=True, inplace=True)
        dfTmp.reset_index(inplace=True, drop=True)
        pct_change = dfTmp['S_DQ_ADJCLOSE'].pct_change()
        dfTmp.insert(0, 'pct_change', pct_change)
        dfTmp = dfTmp.fillna(0)
        dfList.append(dfTmp)
    dfAdjClose = pd.concat(dfList, axis=0, sort=True)
    dfAdjClose.drop_duplicates(['TRADE_DT', 'S_INFO_WINDCODE'], inplace=True)
    dfAdjCloseG = dfAdjClose.groupby('TRADE_DT')

    # main part
    dfResid = pd.DataFrame(columns=['trade_date', 'stock_id', 'resid'])
    dfParams = pd.DataFrame(columns=['trade_date'] + ['country'] +
                            styleFactors + industryFactors)
    dfParams.set_index('trade_date', inplace=True)
    dfPList = []
    dfRList = []
    # rn = fc + Sigma(Xi*fi) + Sigma(Xs*fs) + un  Sigma(w*fi) = 0  un is resid
    for date, Exposure in dfExposureG:
        date = pd.Timestamp(date).strftime('%Y-%m-%d')
        exposure = Exposure.copy()
        dfAdjClose = dfAdjCloseG.get_group(date).copy()
        exposure = exposure.dropna(subset=['industry']).fillna(0)
        windCodes = set(exposure['stock_id']).intersection(
            set(dfAdjClose['S_INFO_WINDCODE']))
        dfAdjClose = dfAdjClose[dfAdjClose['S_INFO_WINDCODE'].isin(windCodes)]
        dfAdjClose = dfAdjClose.fillna(0)
        exposure = exposure[exposure['stock_id'].isin(windCodes)]
        exposure.sort_values(by='stock_id', inplace=True)

        r = np.matrix(
            dfAdjClose.sort_values('S_INFO_WINDCODE')['pct_change']).T
        # exposures of country factor
        Xc = np.ones((len(exposure), 1))
        # exposures of style factor
        Xs = np.matrix(exposure[styleFactors])
        # exposures of industry factor
        Xi = np.matrix(pd.get_dummies(exposure['industry']).sort_index(axis=1))
        X = np.hstack((Xc, Xs, Xi))
        w = ((Xi.T) *
             (np.matrix(exposure['weight']).T)) / (exposure['weight'].sum())
        w = np.array(w).reshape(len(w), )
        # use generalized linear model
        model = sm.GLM(r, X, var_weights=np.sqrt(exposure['weight'].values))
        Q = np.hstack([[0], np.zeros(len(styleFactors)), w])
        result = model.fit_constrained((Q, 0.0))
        params = result.params
        resid = result.resid_response

        # industry changes.
        # sometimes new industries are added sometimes old industires are deleted
        # we only care about industries in industryList
        industryList = list(set(exposure['industry']))
        industryList.sort()
        factors = ['country'] + styleFactors + industryFactors
        dfP = pd.DataFrame(columns=['trade_date'] + factors)
        dfP.set_index('trade_date', inplace=True)
        for i in range(1 + len(styleFactors)):
            dfP.loc[date, factors[i]] = params[i]
        k = 1 + len(styleFactors)
        for ind in industryList:
            dfP.loc[date, 'industry_' + ind] = params[k]
            k += 1
        dfP = dfP.fillna(0)
        dfPList.append(dfP)

        dfR = pd.DataFrame(columns=['trade_date', 'stock_id', 'resid'])
        dfR['stock_id'] = exposure['stock_id']
        dfR['resid'] = resid
        dfR['trade_date'] = date
        dfRList.append(dfR)

    dfParams = pd.concat(dfPList, axis=0)
    dfResid = pd.concat(dfRList, axis=0)
    dfParams.sort_index(axis=1, inplace=True)

    # connect to database and update factor returns
    db = create_engine(uris['factor'])
    meta = MetaData(bind=db)
    t = Table('barra_factor_return', meta, autoload=True)
    sql = "select trade_date, " + ','.join(
        dfParams.columns.values
    ) + " from `barra_factor_return` where trade_date >= '" + sdate + "' and trade_date <='" + edate + "'"
    dfBase = pd.read_sql(sql, db)
    dfBase.sort_index(axis=1, inplace=True)
    dfBase['trade_date'] = dfBase['trade_date'].apply(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    dfBase.set_index('trade_date', inplace=True)

    database.batch(db, t, dfParams, dfBase, timestamp=False)
    print('factor return updated!')

    dfResid.set_index(['trade_date', 'stock_id'], inplace=True)
    # connect to database and update regression resids
    db = create_engine(uris['factor'])
    meta = MetaData(bind=db)
    t = Table('barra_regression_resid', meta, autoload=True)
    columns = [
        t.c.trade_date,
        t.c.stock_id,
        t.c.resid,
    ]
    sql = select(columns)
    sql = sql.where(t.c.trade_date >= sdate)
    sql = sql.where(t.c.trade_date <= edate)
    dfBase = pd.read_sql(sql, db)
    dfBase['trade_date'] = dfBase['trade_date'].apply(
        lambda x: pd.Timestamp(x).strftime('%Y-%m-%d'))
    dfBase.set_index(['trade_date', 'stock_id'], inplace=True)

    database.batch(db, t, dfResid, dfBase, timestamp=False)
    print('regression reside updated!')