Esempio n. 1
0
def test_remote_data_service_industry():
    from quantos.data.align import align
    import pandas as pd
    
    ds = RemoteDataService()
    arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505)
    df = ds.get_industry_raw(symbol=','.join(arr), type_='ZZ')
    df = df.astype(dtype={'in_date': int})
    
    # df_ann = df.loc[:, ['in_date', 'symbol']]
    # df_ann = df_ann.set_index(['symbol', 'in_date'])
    # df_ann = df_ann.unstack(level='symbol')
    
    from quantos.data.dataview import DataView
    dic_sec = DataView._group_df_to_dict(df, by='symbol')
    dic_sec = {sec: df.reset_index() for sec, df in dic_sec.viewitems()}
    
    df_ann = pd.concat([df.loc[:, 'in_date'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1)
    df_value = pd.concat([df.loc[:, 'industry1_code'].rename(sec) for sec, df in dic_sec.viewitems()], axis=1)
    
    dates_arr = ds.get_trade_date(20140101, 20170505)
    res = align(df_value, df_ann, dates_arr)
    print
    # df_ann = df.pivot(index='in_date', columns='symbol', values='in_date')
    # df_value = df.pivot(index=None, columns='symbol', values='industry1_code')
    
    def align_single_df(df_one_sec):
        df_value = df_one_sec.loc[:, ['industry1_code']]
        df_ann = df_one_sec.loc[:, ['in_date']]
        res = align(df_value, df_ann, dates_arr)
        return res
    # res_list = [align_single_df(df) for sec, df in dic_sec.viewitems()]
    res_list = [align_single_df(df) for sec, df in dic_sec.items()[:10]]
    res = pd.concat(res_list, axis=1)
    print res
Esempio n. 2
0
def test_remote_data_service_industry_df():
    from quantos.data.calendar import Calendar
    cal = Calendar()
    
    ds = RemoteDataService()
    arr = ds.get_index_comp(index='000300.SH', start_date=20130101, end_date=20170505)
    symbol_arr = ','.join(arr)
    
    sec = '000008.SZ'
    type_ = 'ZZ'
    df_raw = ds.get_industry_raw(symbol=sec, type_=type_)
    df = ds.get_industry_daily(symbol=symbol_arr, start_date=df_raw['in_date'].min(), end_date=20170505, type_=type_)
    
    for idx, row in df_raw.iterrows():
        in_date = row['in_date']
        value = row['industry1_code']
        if in_date in df.index:
            assert df.loc[in_date, sec] == value
        else:
            idx = cal.get_next_trade_date(in_date)
            assert df.loc[idx, sec] == value