Esempio n. 1
0
def test_remote_data_service_industry():
    from jaqs.data.align import align
    import pandas as pd
    
    ds = RemoteDataService()
    arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505)
    df = ds.get_industry_raw(symbol=','.join(arr), type_='ZZ')
    df = df.astype(dtype={'in_date': int})
    
    # df_ann = df.loc[:, ['in_date', 'symbol']]
    # df_ann = df_ann.set_index(['symbol', 'in_date'])
    # df_ann = df_ann.unstack(level='symbol')
    
    from jaqs.data.dataview import DataView
    dic_sec = DataView._group_df_to_dict(df, by='symbol')
    dic_sec = {sec: df.reset_index() for sec, df in dic_sec.viewitems()}
    
    df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1)
    df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1)
    
    dates_arr = ds.get_trade_date(20140101, 20170505)
    res = align(df_value, df_ann, dates_arr)
    # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date')
    # df_value = df.pivot(index=None, columns='symbol', values='industry1_code')
    
    def align_single_df(df_one_sec):
        df_value = df_one_sec.loc[:, ['industry1_code']]
        df_ann = df_one_sec.loc[:, ['in_date']]
        res = align(df_value, df_ann, dates_arr)
        return res
    # res_list = [align_single_df(df) for sec, df in dic_sec.viewitems()]
    res_list = [align_single_df(df) for sec, df in dic_sec.items()[:10]]
    res = pd.concat(res_list, axis=1)
Esempio n. 2
0
def test_align():
    # -------------------------------------------------------------------------------------
    # input and pre-process demo data
    ds = RemoteDataService()
    raw, msg = ds.query_lb_fin_stat('income', '600000.SH', 20151225, 20170501,
                                    'oper_rev')
    assert msg == '0,'

    idx_list = ['report_date', 'symbol']
    raw_idx = raw.set_index(idx_list)
    raw_idx.sort_index(axis=0, level=idx_list, inplace=True)

    df_ann = raw_idx.loc[pd.IndexSlice[:, :], 'ann_date']
    df_ann = df_ann.unstack(level=1)

    df_value = raw_idx.loc[pd.IndexSlice[:, :], 'oper_rev']
    df_value = df_value.unstack(level=1)

    date_arr = ds.get_trade_date(20160101, 20170501)
    df_close = pd.DataFrame(index=date_arr, columns=df_value.columns, data=1e3)

    # -------------------------------------------------------------------------------------
    # demo usage of parser
    parser = Parser()
    parser.register_function(
        'Myfunc',
        lambda x: x * 0 + 1)  # simultaneously test register function and align
    expr_formula = 'revenue / Myfunc(close)'
    expression = parser.parse(expr_formula)
    for i in range(100):
        df_res = parser.evaluate({
            'revenue': df_value,
            'close': df_close
        }, df_ann, date_arr)

    # -------------------------------------------------------------------------------------
    sec = '600000.SH'
    """
    # print to validate results
    print "\n======Expression Formula:\n{:s}".format(expr_formula)
    
    print "\n======Report date, ann_date and evaluation value:"
    tmp = pd.concat([df_ann.loc[:, sec], df_value.loc[:, sec]], axis=1)
    tmp.columns = ['df_ann', 'df_value']
    print tmp
    
    print "\n======Selection of result of expansion:"
    print "20161028  {:.4f}".format(df_res.loc[20161028, sec])
    print "20161031  {:.4f}".format(df_res.loc[20161031, sec])
    print "20170427  {:.4f}".format(df_res.loc[20170427, sec])
    
    """
    assert abs(df_res.loc[20161028, sec] - 82172000000) < 1
    assert abs(df_res.loc[20161031, sec] - 120928000000) < 1
    assert abs(df_res.loc[20170427, sec] - 42360000000) < 1