Ejemplo n.º 1
0
def get_secs_name_from_sql(sec_ids=[], index_code="A"):
    """
    获取最新日期A股或港股股票名称,数据格式为 {股票代码:股票名称}

    @sec_id (list): 股票列表 默认为空 表示最新日期的所有A股
    @index_code (str): 数据库名称
    :return: {sec_id: sec_name}
    """

    last_date = get_schema('index_contents')[index_code]['end date']

    dbpath = os.path.join(DB_INDEX_CONTENTS, '{}.db'.format(last_date[:4]))
    with SqliteProxy(log=False) as proxy:
        proxy.connect(dbpath)
        if len(sec_ids) == 0:  # 默认取所有股票
            query = "SELECT sec_id, sec_name FROM [{}] WHERE date = '{}'".format(
                index_code, last_date)
        elif len(sec_ids) == 1:
            query = "SELECT sec_id, sec_name FROM [{}] WHERE date = '{}' AND sec_id = '{}'".format(
                index_code, last_date, sec_ids[0])
        else:
            query = "SELECT sec_id, sec_name FROM [{}] WHERE date = '{}' AND sec_id in {}".format(
                index_code, last_date, tuple(sec_ids))
        df = proxy.query_as_dataframe(query).set_index("sec_id")

        if len(df) == 0:
            Logger.warn("Empty result for query contents from {} on {}".format(
                index_code, last_date))

        output = {
            sec: df.at[sec, "sec_name"]
            for sec in sec_ids if sec in df.index
        }
        return output
Ejemplo n.º 2
0
def get_index_weights(index_code, date=""):
    """
    读取单个日期指数成分股的权重

    @index_code (str): 指数代码,目前支持 ['000016.SH', '000300.SH', '000905.SH']
    @date (%Y-%m-%d): 单个日期
    :return: {sec_id: weight}
    """

    if not date:
        Logger.error("Empty date")
        raise ValueError

    if index_code not in ['000016.SH', '000300.SH', '000905.SH']:
        Logger.error("Invalid index code: {}".format(index_code))

    dbpath = os.path.join(DB_INDEX_CONTENTS, '{}.db'.format(date[:4]))
    with SqliteProxy(log=False) as proxy:
        proxy.connect(dbpath)
        query = "SELECT sec_id, weight FROM [{}] WHERE date = '{}' ".format(
            index_code, date)
        df = proxy.query_as_dataframe(query)

        if len(df) == 0:
            Logger.warn("Empty result when reading {} at {}".format(
                index_code, date))
            output = {}
        else:
            output = {
                df.at[i, 'sec_id']: df.at[i, 'weight']
                for i in range(len(df))
            }

        return output
Ejemplo n.º 3
0
def get_previous_existed_day_in_table(date, db_path, table_name):
    """
    获取数据库特定表中已存在交易日中,比date小且距date最近的交易日

    :param file_dir: <str> 文件夹路径
    :param date: <%Y-%m-%d> 日期名称
    :return: last_factor_day <%Y-%m-%d>
    """

    year = date[:4]
    output = None
    while True:
        filepath = os.path.join(db_path, '{}.db'.format(year))
        if not os.path.exists(filepath):
            break

        with SqliteProxy(log=False) as proxy:
            proxy.connect(filepath)
            if table_name not in proxy.list_tables:
                break

            try:
                query = "SELECT MAX(date) as date FROM [{}] WHERE date < '{}'".format(
                    table_name, date)
                df = proxy.query_as_dataframe(query)
            except Exception:
                traceback.print_exc()
                break

            output = df.at[0, 'date']
            if output:
                break
        year = str(int(year) - 1)

    return output
Ejemplo n.º 4
0
def update_index_contents_to_sql(index_code,
                                 trading_days,
                                 override,
                                 log=False):
    with SqliteProxy(log=log) as proxy:
        date_classfier = classify_dates_by_year(trading_days)

        for year, date_list in date_classfier.items():
            path = os.path.join(DB_INDEX_CONTENTS, '{}.db'.format(year))
            proxy.connect(path)
            if index_code not in proxy.list_tables:
                create_table(proxy, "index_contents", index_code)

            # 判断已有数据
            query = "SELECT DISTINCT(date) FROM [{}]".format(index_code)
            lookup = proxy.query_as_dataframe(query)
            lookup = set(lookup['date'].tolist())

            for date in date_list:
                if date in lookup and not override:  # 更新的日期已经存在于数据库时,不覆盖则跳过
                    if log:
                        Logger.warn("{} records on {} is existed.".format(
                            index_code, date))
                    continue

                try:
                    loader = LOADER_MAP[index_code]
                    df = loader(index_code, date)
                    df['date'] = date
                except Exception:
                    Logger.error("Error occurred when loading {} on {}".format(
                        index_code, date))
                    raise ValueError

                if df is not None:  # 从Wind下载数据成功时
                    try:
                        if date in lookup and override:  # 覆盖时删除原记录
                            proxy.execute(
                                "DELETE FROM [{}] WHERE date = '{}'".format(
                                    index_code, date))

                        proxy.write_from_dataframe(df, index_code)
                    except Exception:
                        Logger.error(
                            "Error occurred when writing {} on {}".format(
                                index_code, date))
                        traceback.print_exc()
                        raise ValueError

                    Logger.info("{} on {} is updated successfully".format(
                        index_code, date))
                else:  # 从wind提取数据失败时
                    Logger.error("Fail to fetch {} data on {}".format(
                        index_code, date))
                    raise ValueError

    update_schema('index_contents', index_code)
Ejemplo n.º 5
0
def get_secs_factor_on_multidays(factor,
                                 sec_ids=[],
                                 trading_days=[],
                                 log=False):
    """
    从本地数据库中获取一段日期的单个factor的值,并返回 dict of DataFrame

    @factor (str): 单个factor
    @sec_ids (list): 支持多个股票查询,默认为[],表示查询范围是全A股
    @trading_days (["%Y-%m-%d"]): 日期列表
    @log (Bool): 是否打印log
    :return: {date: Dataframe},其中 DataFrame 列为factor名,index为sec_id
    """

    if log:
        Logger.info(
            "Reading {} from {} to {}".format(factor, trading_days[0],
                                              trading_days[-1]), "green")

    if factor not in get_schema("factor"):
        Logger.error("Unrecognized factor: {}".format(factor))
        raise ValueError

    if not isinstance(sec_ids, list):
        Logger.error("sec_ids must be list!")
        raise ValueError

    if not trading_days:
        Logger.error("Empty date")
        raise ValueError

    # 长连接效率更高,所以这里不是复用 get_secs_factor 而是重新写
    with SqliteProxy(log=log) as proxy:
        output = {}
        for year, date_list in classify_dates_by_year(trading_days).items():
            path = os.path.join(DB_FACTOR, '{}.db'.format(year))
            proxy.connect(path)
            for date in date_list:
                if len(sec_ids) == 0:  # 为空默认全A股
                    conds = ""
                elif len(sec_ids) == 1:
                    conds = "AND sec_id = '{}'".format(sec_ids[0])
                else:
                    conds = "AND sec_id IN {}".format(tuple(sec_ids))
                query = "SELECT sec_id, {} FROM [{}] WHERE date = '{}' {}".format(
                    factor, factor, date, conds)
                try:
                    df = proxy.query_as_dataframe(query)
                except Exception:
                    Logger.error("Error occurred when reading {} at {}".format(
                        factor, date))
                    traceback.print_exc()
                    raise ValueError

                output[date] = df

    return output
Ejemplo n.º 6
0
def get_trading_days(start, end, format=lambda x: x, log=False):
    db = SqliteProxy(log=log)
    db.connect(os.path.join(DB_CALENDAR_PATH, "calendar.db"))
    query = ("SELECT date FROM calendar WHERE date BETWEEN '{}' AND '{}'"
             "AND is_trading_day = 1").format(start, end)
    date = [format(r[0]) for r in db.execute(query)]
    db.close()
    return date
Ejemplo n.º 7
0
def get_secs_industry_from_sql(industry_code, sec_ids=[], date=""):
    dbpath = os.path.join(DB_INDUSTRY, '{}.db'.format(date[:4]))
    with SqliteProxy(log=False) as proxy:
        proxy.connect(dbpath)
        if len(sec_ids) == 1:
            query = "SELECT sec_id, industry FROM [{}] WHERE date='{}' and sec_id = '{}'".format(industry_code, date, sec_ids[0])
        else:
            query = "SELECT sec_id, industry FROM [{}] WHERE date='{}' and sec_id in {}".format(industry_code, date, tuple(sec_ids))
        df = proxy.query_as_dataframe(query).set_index('sec_id')
        return {sec_id: df.at[sec_id, 'industry'] for sec_id in sec_ids if sec_id in df.index}
Ejemplo n.º 8
0
def get_index_contents_on_multidays(index_code, trading_days=[], log=False):
    """
    读取多个日期某指数全部股票列表

    @index_code (str): 指数代码,目前支持 ['A', '000905.SH', '000300.SH', '000016.SH']
    @trading_days (['%Y-%m-%d']): 日期列表
    @log (Bool): 是否打印log
    :return: ({date: list}), key为date value为 股票代码列表
    """

    if log:
        Logger.info(
            "Reading all {} records between trading_days ...".format(
                index_code), "green")

    if len(trading_days) == 0:
        Logger.error("Empty date")
        raise ValueError
    elif len(trading_days) == 1:
        date = trading_days[0]
        return {date: get_index_contents(index_code, date, log=False)}

    output = {}
    if index_code in IDXCONT_AS_SQL:
        with SqliteProxy(log=log) as proxy:
            for year, date_list in classify_dates_by_year(
                    trading_days).items():
                path = os.path.join(DB_INDEX_CONTENTS, '{}.db'.format(year))
                proxy.connect(path)

                query = "SELECT date, sec_id FROM [{}] WHERE date IN {}".format(
                    index_code, tuple(date_list))
                try:
                    df = proxy.query_as_dataframe(query)
                except Exception:
                    Logger.error(
                        "Empty result when reading {} from {} to {}".format(
                            index_code, trading_days[0], trading_days[-1]))
                    traceback.print_exc()
                    raise ValueError

                if len(df) == 0:
                    Logger.warn(
                        "Empty result when reading {} from {} to {}".format(
                            index_code, trading_days[0], trading_days[-1]))

                for date in date_list:
                    output[date] = df[df.date == date]['sec_id'].tolist()
    elif index_code in IDXCONT_AS_CSV:
        info = get_index_contents_from_csv(index_code)
        output = {date: info for date in trading_days}
    else:
        Logger.error("Unrecognized index code: {}".format(index_code))
        raise ValueError
    return output
Ejemplo n.º 9
0
def delete_records(conds):
    """从数据库中删除满足指定条件的记录"""

    with SqliteProxy(log=True) as proxy:
        for db_name, db_path in DB_PATH_LIB.items():
            for db_file in listdir_advanced(db_path, "db"):
                db_path = os.path.join(db_path, db_file)
                proxy.connect(db_path)
                for table in proxy.list_tables:
                    try:
                        proxy.execute("DELETE FROM [{}] WHERE {}".format(
                            table, conds))
                    except Exception:
                        traceback.print_exc()
                        continue
Ejemplo n.º 10
0
def get_secs_factor(factor, sec_ids=[], date="", log=False):
    """
    从本地数据库中获取单个日期的单个factor的值,并返回 DataFrame

    @factor (str): 单个factor
    @sec_ids (list): 支持多个股票查询,默认为[],表示查询范围是全A股
    @date ('%Y-%m-%d'): 单个日期
    @log (Bool): 是否打印log
    :return: Dataframe 列为factor名,index为sec_id
    """

    if log:
        Logger.info("Reading {} at {}".format(factor, date), "green")

    if factor not in get_schema("factor"):
        Logger.error("Unrecognized factor: {}".format(factor))
        raise ValueError

    if not isinstance(sec_ids, list):
        Logger.error("sec_ids must be list!")
        raise ValueError

    if not date:
        Logger.error("Empty date")
        raise ValueError

    with SqliteProxy(log=log) as proxy:
        path = os.path.join(DB_FACTOR, '{}.db'.format(date[:4]))
        proxy.connect(path)

        if len(sec_ids) == 0:  # 为空默认全A股
            conds = ""
        elif len(sec_ids) == 1:
            conds = "AND sec_id = '{}'".format(sec_ids[0])
        else:
            conds = "AND sec_id IN {}".format(tuple(sec_ids))

        query = "SELECT sec_id, {} FROM [{}] WHERE date = '{}' {}".format(
            factor, factor, date, conds)
        try:
            df = proxy.query_as_dataframe(query)
        except Exception:
            Logger.error("Error occurred when reading {} at {}".format(
                factor, date))
            traceback.print_exc()
            raise ValueError
        return df.sort_values(by=['sec_id']).set_index(['sec_id'])
Ejemplo n.º 11
0
def get_date_lists_in_table(db_path, table_name):
    """获取某张表中已有的日期,该表存在于多个db中,返回一个list"""

    datelist = []
    with SqliteProxy(log=False) as proxy:
        for db in listdir_advanced(db_path, "db"):
            path = os.path.join(db_path, db)
            proxy.connect(path)

            if table_name in proxy.list_tables:
                query = "SELECT DISTINCT(date) FROM [{}]".format(table_name)
                try:
                    df = proxy.query_as_dataframe(query)
                    datelist += df['date'].tolist()
                except Exception:
                    traceback.print_exc()
    return sorted(datelist)
Ejemplo n.º 12
0
def get_index_contents_from_sql(index_code, date="", log=False):
    path = os.path.join(DB_INDEX_CONTENTS, '{}.db'.format(date[:4]))
    with SqliteProxy(log=log) as proxy:
        proxy.connect(path)
        query = "SELECT sec_id FROM [{}] WHERE date = '{}'".format(
            index_code, date)
        try:
            df = proxy.query_as_dataframe(query)
        except Exception:
            Logger.error("Error occurred when reading {} at {}".format(
                index_code, date))
            traceback.print_exc()
            raise ValueError

    if len(df) == 0:
        Logger.warn("Empty result when reading {} at {}".format(
            index_code, date))
        return []

    return df["sec_id"].tolist()
Ejemplo n.º 13
0
def sqlize_db(db_name, subdb_list=[]):
    """将数据库sql化"""

    if not subdb_list:
        subdb_list = list(get_schema(db_name).keys())
    else:
        subdb_list = [s for s in subdb_list if s in get_schema(db_name)]

    db_path = os.path.join(DB_PATH, db_name)

    with SqliteProxy(log=False) as proxy:
        for subdb in subdb_list:
            Logger.info("SQLing {}/{}".format(db_name, subdb), "green")

            subdb_path = os.path.join(db_path, subdb)
            trading_days = listdir_advanced(subdb_path,
                                            'csv',
                                            strip_suffix=True)
            for year, dates in classify_dates_by_year(trading_days).items():
                path = os.path.join(db_path, '{}.db'.format(year))
                proxy.connect(path)

                if subdb not in proxy.list_tables:
                    creator = DB_CREATOR_MAP[db_name]
                    creator(proxy, subdb)

                for date in dates:
                    df = pd.read_csv(
                        os.path.join(subdb_path, '{}.csv'.format(date)))
                    df['date'] = date
                    try:
                        proxy.write_from_dataframe(df, subdb)
                    except Exception:
                        Logger.error(
                            "Error occurred when sqlizing {} on {}.".format(
                                subdb, date))
                        traceback.print_exc()
Ejemplo n.º 14
0
def update_single_indicator(indicator,
                            sec_ids=[],
                            trading_days=[],
                            override=False,
                            log=False):
    """
    更新单个indicator的指定日期列表的数据

    @indicator (str): 单个indicator的名称
    @sec_ids<list> : 股票代码列表
    @trading_days ([%Y-%m-%d]): 日期列表
    @override (Bool): 是否覆盖原记录 默认为False 表示不覆盖
    @log (Bool): 是否打印log
    """

    if log:
        Logger.info("Updating indicator {}".format(indicator), "green")

    if indicator not in get_schema('indicator'):
        Logger.error("Unrecognized indicator: {}".format(indicator))
        raise ValueError

    if not trading_days:
        Logger.error("Empty date")
        raise ValueError

    with SqliteProxy(log=log) as proxy:
        date_classfier = classify_dates_by_year(trading_days)

        for year, date_list in date_classfier.items():
            path = os.path.join(DB_INDICATOR, '{}.db'.format(year))
            proxy.connect(path)

            if indicator not in proxy.list_tables:
                create_table(proxy, "indicator", indicator)

            # 判断已有数据
            if len(date_list) == 1:
                query = "SELECT DISTINCT(date) FROM {} WHERE date = '{}'".format(
                    indicator, date_list[0])
            else:
                query = "SELECT DISTINCT(date) FROM {} WHERE date in {}".format(
                    indicator, tuple(date_list))
            lookup = proxy.query_as_dataframe(query)
            lookup = set(lookup['date'].tolist())

            for date in date_list:
                if date in lookup and not override:  # 更新的日期已经存在于数据库时,不覆盖则跳过
                    if log:
                        Logger.warn("{} records on {} is existed.".format(
                            indicator, date))
                    continue

                try:
                    df = load_single_indicator_on_single_day_from_wind(
                        indicator=indicator, sec_ids=sec_ids, date=date)
                except Exception:
                    Logger.error("Error occurred when loading {} on {}".format(
                        indicator, date))
                    raise ValueError

                if df is not None:  # 从Wind下载数据成功时
                    if date in lookup and override:  # 覆盖时删除原记录
                        if len(sec_ids) == 0:
                            proxy.execute(
                                "DELETE FROM [{}] WHERE date = '{}'".format(
                                    indicator, date))
                        if len(sec_ids) == 1:
                            proxy.execute(
                                "DELETE FROM [{}] WHERE date = '{}' and sec_id = '{}'"
                                .format(indicator, date, sec_ids[0]))
                        else:
                            proxy.execute(
                                "DELETE FROM [{}] WHERE date = '{}' and sec_id in {}"
                                .format(indicator, date, tuple(sec_ids)))
                    df['date'] = date
                    try:
                        proxy.write_from_dataframe(df, indicator)
                    except Exception:
                        Logger.error(
                            "Error occurred when writing {} on {}".format(
                                indicator, date))
                        traceback.print_exc()
                        raise ValueError
                    if log:
                        Logger.info("{} on {} is updated successfully".format(
                            indicator, date))

                else:  # 从wind提取数据失败时
                    Logger.error("Fail to fetch {} data on {}".format(
                        indicator, date))
                    raise ValueError

    update_schema(db_name="indicator", sub_name=indicator)

    if log:
        Logger.info("indicator {} is updated.".format(indicator),
                    color="green")
        Logger.info("------------------------------------------")
Ejemplo n.º 15
0
def update_single_factor(factor, trading_days=[], override=False, log=False):
    """
    更新单个factor的指定日期列表的数据

    @factor (str): factor名称
    @trading_days ([%Y-%m-%d]): 日期列表
    @override (Bool): 是否覆盖原记录,默认为False,表示不覆盖
    @log (Bool): 是否打印log
    """

    Logger.info("Updating factor {}".format(factor), "green")

    _n_updated_date = 0

    if factor not in get_schema('factor'):
        Logger.error("Unrecognized factor: {}".format(factor))
        raise ValueError

    if not trading_days:
        Logger.error("Empty date")
        raise ValueError

    with SqliteProxy(log=log) as proxy:
        date_classfier = classify_dates_by_year(trading_days)

        for year, date_list in date_classfier.items():
            path = os.path.join(DB_FACTOR, '{}.db'.format(year))
            proxy.connect(path)

            if factor not in proxy.list_tables:
                create_table(proxy, "factor", factor)

            # 判断已有数据
            if len(date_list) == 1:
                query = "SELECT DISTINCT(date) FROM {} WHERE date = '{}'".format(
                    factor, date_list[0])
            else:
                query = "SELECT DISTINCT(date) FROM {} WHERE date in {}".format(
                    factor, tuple(date_list))
            lookup = proxy.query_as_dataframe(query)
            lookup = set(lookup['date'].tolist())

            for date in date_list:
                if date in lookup and not override:  # 更新的日期已经存在于数据库时,不覆盖则跳过
                    if log:
                        Logger.warn("{} records on {} is existed.".format(
                            factor, date))
                    continue

                try:
                    df = load_single_factor_on_single_day(factor=factor,
                                                          date=date)
                except Exception:
                    Logger.error("Error occurred when loading {} on {}".format(
                        factor, date))
                    traceback.print_exc()
                    continue

                if df is not None:  # 成功取得indicator
                    if date in lookup and override:  # 覆盖时删除原记录
                        proxy.execute(
                            "DELETE FROM [{}] WHERE date = '{}'".format(
                                factor, date))

                    df['date'] = date
                    try:
                        proxy.write_from_dataframe(df, factor)
                    except Exception:
                        Logger.error(
                            "Error occurred when writing {} on {}".format(
                                factor, date))
                        traceback.print_exc()
                        raise ValueError

                    if log:
                        Logger.info("{} on {} is updated successfully".format(
                            factor, date))
                    _n_updated_date += 1
                else:  # 从wind提取数据失败时
                    Logger.error("Fail to fetch {} data on {}".format(
                        factor, date))
                    raise ValueError

    update_schema(db_name="factor", sub_name=factor)

    if log:
        _n_all_date = len(trading_days)
        _n_existed_date = _n_all_date - _n_updated_date
        Logger.info("传入日期数:{}  已经存在个数:{}  实际写入次数:{}".format(
            _n_all_date, _n_existed_date, _n_updated_date))
        Logger.info("factor {} is updated.".format(factor), color="green")
        Logger.info("------------------------------------------")