Ejemplo n.º 1
0
def check_dict(path=FACTOR_DICT_FILE_PATH):
    '''
    检查数据字典文件是否与当前模块中的因子字典相同,如果不同,则更新数据字典文件
    '''
    try:
        file_dict = load_pickle(FACTOR_DICT_FILE_PATH)
    except FileNotFoundError:  # 如果不存在数据字典文件,则生成文件
        print('Dictionary file not found, initialization...')
        update_factordict()
        return
    module_dict = gen_path_dict(get_factor_dict())
    file_dict = load_pickle(FACTOR_DICT_FILE_PATH)
    if module_dict != file_dict:
        print('Updating dictionary file...')
        update_factordict()
Ejemplo n.º 2
0
def query(factor_name, time, codes=None, fillna=None):
    '''
    接受外部的请求,从数据库中获取对应因子的数据

    Parameter
    ---------
    factor_name: str
        需要查询的因子名称
    time: type that can be converted by pd.to_datetime or tuple of that
        单一的参数表示查询横截面的数据,元组(start_time, end_time)表示查询时间序列数据
    codes: list, default None
        需要查询数据的股票代码,默认为None,表示查询所有股票的数据
    fillna: int or float, default
        是否给NA值进行填充,默认为None,即不需要填充,如果需要填充则将填充值传给fillna参数
    Return
    ------
    out: pd.DataFrame
        查询结果数据,index为时间,columns为股票代码,如果未查询到符合要求的数据,则返回None
    '''
    factor_dict = load_pickle(FACTOR_DICT_FILE_PATH)
    if factor_dict is None:
        raise ValueError('Dictionary file needs initialization...')
    assert factor_name in factor_dict, \
        'Error, factor name "{pname}" is'.format(pname=factor_name) +\
        ' not valid, valid names are {vnames}'.format(vnames=list(factor_dict.keys()))
    abs_path = factor_dict[factor_name]
    db = database.DBConnector(abs_path)
    data = db.query(time, codes)
    if fillna is not None:
        data = data.fillna(fillna)
    return data
Ejemplo n.º 3
0
def get_universe(path=UNIVERSE_FILE_PATH):
    '''
    用于获取当前数据中对应的universe
    Parameter
    ---------
    path: str, default UNIVERSE_FILE_PATH
        universe文件存储的位置

    Return
    ------
    out: list
        当前数据对应的universe(排序后)
    '''
    universe = datatoolkits.load_pickle(path)[0]
    return sorted(universe)
Ejemplo n.º 4
0
def update_universe(path=UNIVERSE_FILE_PATH):
    '''
    获取最新的universe,并将最新的universe与之前文件中的universe对比,如果发生了更新,打印相关信息
    随后,将最新的universe存储在指定文件中,存储文件为一个tuple(universe, update_time)

    Parameter
    ---------
    path: str, default UNIVERSE_FILE_PATH
        存储universe数据的文件

    Return
    ------
    universe: list
        当前最新的universe

    Notes
    -----
    不能自行调用该函数用于获取universe,可能造成获取的universe与因子数据的universe不一致,
    获取当前的universe,使用fmanger.factors.utils.get_universe函数
    '''
    logger = logging.getLogger(__name__.split()[0])
    new_universe = fdgetter.get_db_data(fdgetter.BASIC_SQLs['A_UNIVERSE'],
                                        cols=('code', ),
                                        add_stockcode=False)
    new_universe['code'] = new_universe.code.apply(datatoolkits.add_suffix)
    new_universe = new_universe.code.tolist()
    try:
        universe_save = datatoolkits.load_pickle(path)
        universe, _ = universe_save
        nu_set = set(new_universe)
        ou_set = set(universe)
        if nu_set != ou_set:
            add_diff = list(nu_set.difference(ou_set))
            minus_diff = list(ou_set.difference(nu_set))
            msg = 'Warning: universe UPDATED, {drop} are DROPED, {add} are ADDED'.\
                format(drop=minus_diff, add=add_diff)
            logger.info(msg)
            print(msg)
    except FileNotFoundError:
        pass
    data = (new_universe, dt.datetime.now())
    datatoolkits.dump_pickle(data, path)
    return new_universe
Ejemplo n.º 5
0
def query(factor_name, time, codes=None, fillna=None):
    '''
    接受外部的请求,从数据库中获取对应因子的数据

    Parameter
    ---------
    factor_name: str
        需要查询的因子名称
    time: type that can be converted by pd.to_datetime or tuple of that
        单一的参数表示查询横截面的数据,元组(start_time, end_time)表示查询时间序列数据
    codes: list, default None
        需要查询数据的股票代码,默认为None,表示查询所有股票的数据
    fillna: int or float, default(该参数将废止)
        是否给NA值进行填充,默认为None,即不需要填充,如果需要填充则将填充值传给fillna参数
    Return
    ------
    out: pd.DataFrame
        查询结果数据,index为时间,columns为股票代码,如果未查询到符合要求的数据,则返回None
    '''
    # 若更换了机器,需要先更新因子字典
    factor_dict = load_pickle(FACTOR_DICT_FILE_PATH)
    if factor_dict is None:
        raise ValueError('Dictionary file needs initialization...')
    assert factor_name in factor_dict, \
        'Error, factor name "{pname}" is'.format(pname=factor_name) +\
        ' not valid, valid names are {vnames}'.format(vnames=sorted(factor_dict.keys()))
    abs_path = factor_dict[factor_name]
    db = database.DBConnector(abs_path)
    data = db.query(time, codes)
    if data is None:
        return None
    universe = get_universe()
    if codes is None:  # 为了避免数据的universe不一致导致不同数据的横截面长度不同
        data = data.reindex(columns=universe)
    if fillna is None:
        fillna = db.default_data
        if isinstance(fillna, np.bytes_):
            fillna = fillna.decode('utf8')
    data = data.fillna(fillna)
    return data
Ejemplo n.º 6
0
        formater.get_basicformater(param)}

    Notes
    -----
    函数会根据参数的形式推断需要使用的formater的方法,推断方法如下:
    当字典值为tuple时会使用get_modformater,当字典值为str时,使用get_basicformater
    '''
    out = dict()
    for col, format_type in format_set.items():
        if isinstance(format_type, str):
            out[col] = formater.get_basicformater(format_type)
        else:
            f, p = format_type
            out[col] = formater.get_modformater(f, p)
    return out


if __name__ == '__main__':
    test_data = datatoolkits.load_pickle(
        r"F:\GeneralLib\CONST_DATAS\htmltable.pickle")
    res = table_convertor.format_df(test_data.reset_index(),
                                    formater=trans2formater({
                                        'nav':
                                        'pct2p',
                                        'CSI700': ('pctnp', 4)
                                    }),
                                    order=['nav', 'index', 'CSI700'])
    # ret = datatoolkits.load_pickle(r"F:\GeneralLib\CONST_DATAS\sample_ret.pickle")
    # sr = sortino_ratio(ret.group_05.pct_change().dropna(), 0.04)
    # print(sr)
Ejemplo n.º 7
0
#!/usr/bin/env python
# -*- coding:utf-8
"""
Author:  Hao Li
Email: [email protected]
Github: https://github.com/SAmmer0
Created: 2018/3/20
"""
import pandas as pd

from datatoolkits import load_pickle
import dateshandle
from tdtools.tradingcalendar import TradingCalendar

TD_PATH = r"E:\GeneralLib\CONST_DATAS\tradingDays.pickle"
td_data = load_pickle(TD_PATH)
trading_times = (('09:30', '11:30'), ('13:00', '15:00'))

sse_calendar = TradingCalendar(td_data, trading_times)

# 交易日计数测试
start_time = '2017-01-01'
end_time = '2018-03-02'
mod_cnt = sse_calendar.count(start_time, end_time, 'both')
old_cnt = dateshandle.tds_count(start_time, end_time)
assert mod_cnt == old_cnt

# 交易日区间测试
start_time = '2016-03-01'
end_time = '2017-11-03'
mod_tds = sse_calendar.get_tradingdays(start_time,