Example #1
0
def test_load():
    dv = DataView()
    folder_path = '../output/prepared/20160601_20170601_freq=1D'
    dv.load_dataview(folder=folder_path)
    
    assert dv.start_date == 20160601 and set(dv.symbol) == set('000001.SZ,600030.SH,000063.SZ'.split(','))

    # test get_snapshot
    snap1 = dv.get_snapshot(20170504, symbol='600030.SH,000063.SZ', fields='close,pb')
    assert snap1.shape == (2, 2)
    assert set(snap1.columns.values) == {'close', 'pb'}
    assert set(snap1.index.values) == {'600030.SH', '000063.SZ'}
    
    # test get_ts
    ts1 = dv.get_ts('close', symbol='600030.SH,000063.SZ', start_date=20170101, end_date=20170302)
    assert ts1.shape == (38, 2)
    assert set(ts1.columns.values) == {'600030.SH', '000063.SZ'}
    assert ts1.index.values[-1] == 20170302
Example #2
0
def store_ic_weight():
    """
    Calculate IC weight and save it to file
    """
    dv = DataView()

    dv.load_dataview(folder_path=dataview_dir_path)

    factorList = ['TO', 'BP', 'REVS20', 'float_mv_factor']

    orthFactor_dic = {}

    for factor in factorList:
        orthFactor_dic[factor] = {}

    # add the orthogonalized factor to dataview
    for trade_date in dv.dates:
        snapshot = dv.get_snapshot(trade_date)
        factorPanel = snapshot[factorList]
        factorPanel = factorPanel.dropna()

        if len(factorPanel) != 0:
            orthfactorPanel = Schmidt(factorPanel)
            orthfactorPanel.columns = [x + '_adj' for x in factorList]

            snapshot = pd.merge(left=snapshot, right=orthfactorPanel,
                                left_index=True, right_index=True, how='left')

            for factor in factorList:
                orthFactor_dic[factor][trade_date] = snapshot[factor]

    for factor in factorList:
        dv.append_df(pd.DataFrame(orthFactor_dic[factor]).T, field_name=factor + '_adj', is_quarterly=False)
    dv.save_dataview(dataview_dir_path)

    factorList_adj = [x + '_adj' for x in factorList]

    fileio.save_json(factorList_adj, custom_data_path)

    w = get_ic_weight(dv)

    store = pd.HDFStore(ic_weight_hd5_path)
    store['ic_weight'] = w
    store.close()
Example #3
0
def calculate_pe_pb_of_index_single_day(date):
    dv = DataView()
    dv.load_dataview(folder_path=dataview_dir_path)

    # 计算指数pe和pb的中位数、等权数
    data = dv.get_snapshot(date, symbol='', fields='pe_ttm,pb')

    # 判断数据质量,如果非nan数据占比超过2%,则抛出异常
    if len(data.dropna(how='any')) / len(data) <= 0.98:
        raise Exception('Nan of Data is too much.')
    else:
        data.dropna(how='any', inplace=True)

    # 计算成分股个数
    N = len(data)
    # 计算中位数,以倒数排序可以去掉负数的影响
    pe_median = 1 / ((1 / data['pe_ttm']).median())
    pb_median = 1 / ((1 / data['pb']).quantile(0.5))
    # 计算等权,即调和平均数
    pe_equal = N / (1 / data['pe_ttm']).sum()
    pb_equal = N / (1 / data['pb']).sum()
    print(data)
    print(date, pe_median, pe_equal, pb_median, pb_equal)
    return (date, pe_median, pe_equal, pb_median, pb_equal)