Python merge_df_dataset Exemples, library.tool.merge_df_dataset Python Exemples

Exemple #1

0

Afficher le fichier

def code_classify(code_list, classify_list):
    """
    按照code整理其所属的classify
    """
    f = h5py.File(conf.HDF5_FILE_CLASSIFY, 'a')
    console.write_head(conf.HDF5_OPERATE_ARRANGE, conf.HDF5_RESOURCE_TUSHARE,
                       conf.HDF5_OTHER_CODE_CLASSIFY)
    # 获取classify列表
    code_classify_df = tool.init_empty_df(["date", "code", "classify"])
    today_str = tradetime.get_today()
    for ctype in classify_list:
        for classify_name in f[ctype]:
            if f[ctype][classify_name].get(conf.HDF5_CLASSIFY_DS_CODE) is None:
                console.write_msg(classify_name + "的code列表不存在")
            classify_df = tool.df_from_dataset(f[ctype][classify_name],
                                               conf.HDF5_CLASSIFY_DS_CODE,
                                               None)
            for index, row in classify_df.iterrows():
                code = row[0].astype(str)
                if code in code_list:
                    code_dict = dict()
                    code_dict["date"] = today_str
                    code_dict["code"] = code
                    code_dict["classify"] = classify_name
                    code_classify_df = code_classify_df.append(
                        code_dict, ignore_index=True)
    console.write_tail()
    f.close()

    f_other = h5py.File(conf.HDF5_FILE_OTHER, 'a')
    tool.delete_dataset(f_other, conf.HDF5_OTHER_CODE_CLASSIFY)
    tool.merge_df_dataset(f_other, conf.HDF5_OTHER_CODE_CLASSIFY,
                          code_classify_df)
    f_other.close()
    return

Exemple #2

0

Afficher le fichier

Fichier : obtain.py Projet : wangzhen429/evolution

def bitmex(symbol, bin_size, count):
    """
    bitmex期货数据
    """
    console.write_head(conf.HDF5_OPERATE_GET, conf.HDF5_RESOURCE_BITMEX,
                       symbol + '-' + bin_size)
    f = h5py.File(conf.HDF5_FILE_BITMEX, 'a')
    if f.get(symbol) is None:
        f.create_group(symbol)

    # 处理D,5,1数据
    if bin_size in [
            conf.BINSIZE_ONE_DAY,
            conf.BINSIZE_ONE_HOUR,
            conf.BINSIZE_FIVE_MINUTE,
            conf.BINSIZE_ONE_MINUTE,
    ]:
        df = future.history(symbol, bin_size, count)
    # 处理30m数据
    elif bin_size in [conf.BINSIZE_THIRTY_MINUTE, conf.BINSIZE_FOUR_HOUR]:
        df = future.history_merge(symbol, bin_size, count)

    if df is not None and df.empty is not True:
        # 存储数据暂时不包括macd的内容
        # df = df.set_index(conf.HDF5_SHARE_DATE_INDEX)
        # index_df = macd.value(df)
        # df = df.merge(index_df, left_index=True, right_index=True, how='left')
        # df = df.dropna().reset_index()
        tool.merge_df_dataset(f[symbol], bin_size, df)

    f.close()
    console.write_tail()
    return

Exemple #3

0

Afficher le fichier

def all_classify_detail(classify_list, omit_list, start_date):
    """
    遍历所有分类，聚合所有code获取分类均值
    """
    f = h5py.File(conf.HDF5_FILE_SHARE, 'a')
    f_classify = h5py.File(conf.HDF5_FILE_CLASSIFY, 'a')
    # 获取classify列表
    for ctype in classify_list:
        for classify_name in f_classify[ctype]:
            console.write_head(conf.HDF5_OPERATE_ARRANGE,
                               conf.HDF5_RESOURCE_TUSHARE, classify_name)

            if f_classify[ctype][classify_name].get(
                    conf.HDF5_CLASSIFY_DS_CODE) is None:
                console.write_msg(classify_name + "的detail文件不存在")
                continue

            for ktype in conf.HDF5_SHARE_KTYPE:
                mean_df = one_classify_detail(
                    f, f_classify[ctype][classify_name].get(
                        conf.HDF5_CLASSIFY_DS_CODE), omit_list, ktype,
                    start_date)
                ds_name = ktype
                # 如果start_date为空，则重置该数据
                if start_date is None:
                    tool.delete_dataset(f_classify[ctype][classify_name],
                                        ds_name)

                if mean_df is not None:
                    tool.merge_df_dataset(f_classify[ctype][classify_name],
                                          ds_name, mean_df)
            console.write_tail()
    f_classify.close()
    f.close()
    return

Exemple #4

0

Afficher le fichier

Fichier : index.py Projet : wangzhen429/evolution

def all_classify(classify_list, init_flag=True):
    """
    获取所有分类的macd与所处均线等指标(依赖分类数据聚合)
    """
    f = h5py.File(conf.HDF5_FILE_CLASSIFY, 'a')
    # 获取classify列表
    for ctype in classify_list:
        for classify_name in f[ctype]:
            console.write_head(conf.HDF5_OPERATE_INDEX,
                               conf.HDF5_RESOURCE_TUSHARE, classify_name)
            for ktype in conf.HDF5_SHARE_KTYPE:
                ds_name = ktype
                if f[ctype][classify_name].get(ds_name) is None:
                    console.write_msg(classify_name + "分类聚合detail不存在")
                    continue

                df = tool.df_from_dataset(f[ctype][classify_name], ds_name,
                                          None)
                df["close"] = df["close"].apply(lambda x: round(x, 2))
                try:
                    index_df = one_df(df, init_flag, True)
                except Exception as er:
                    console.write_msg("[" + classify_name + "]" + str(er))
                    continue
                index_ds_name = conf.HDF5_INDEX_DETAIL + "_" + ktype
                if init_flag is True:
                    tool.delete_dataset(f[ctype][classify_name], index_ds_name)
                tool.merge_df_dataset(f[ctype][classify_name], index_ds_name,
                                      index_df.reset_index())
            console.write_tail()
    f.close()
    return

Exemple #5

0

Afficher le fichier

Fichier : wrap.py Projet : wangzhen429/evolution

def all_classify(classify_list, init_flag=True):
    """
    整理分类的缠论k线
    """
    f = h5py.File(conf.HDF5_FILE_SHARE, 'a')
    f_classify = h5py.File(conf.HDF5_FILE_CLASSIFY, 'a')
    # 获取classify列表
    for ctype in classify_list:
        for classify_name in f_classify[ctype]:
            console.write_head(conf.HDF5_OPERATE_WRAP,
                               conf.HDF5_RESOURCE_TUSHARE, classify_name)
            for ktype in conf.HDF5_SHARE_WRAP_KTYPE:
                ds_name = conf.HDF5_CLASSIFY_DS_DETAIL + "_" + ktype
                if f_classify[ctype][classify_name].get(ds_name) is None:
                    continue
                share_df = tool.df_from_dataset(
                    f_classify[ctype][classify_name], ds_name, None)
                wrap_df = one_df(share_df)
                wrap_ds_name = conf.HDF5_INDEX_WRAP + "_" + ktype
                if init_flag is True:
                    tool.delete_dataset(f_classify[ctype][classify_name],
                                        wrap_ds_name)
                if wrap_df is not None:
                    tool.merge_df_dataset(f_classify[ctype][classify_name],
                                          wrap_ds_name, wrap_df)
            console.write_tail()
    f_classify.close()
    f.close()
    return

Exemple #6

0

Afficher le fichier

def get_from_remote(ktype, stype, start_date, code, rewrite):
    """
    从远端获取最新数据
    """
    if stype == conf.STYPE_BITMEX:
        count = tradetime.get_barnum_by_date(start_date, ktype)
        if ktype in [
                conf.BINSIZE_ONE_DAY,
                conf.BINSIZE_ONE_HOUR,
                conf.BINSIZE_FIVE_MINUTE,
                conf.BINSIZE_ONE_MINUTE,
        ]:
            df = future.history(code, ktype, count)
        elif ktype in [conf.BINSIZE_THIRTY_MINUTE, conf.BINSIZE_FOUR_HOUR]:
            df = future.history_merge(code, ktype, count)

        if rewrite:
            # 将读取的remote数据写回文件
            f = h5py.File(conf.HDF5_FILE_BITMEX, 'a')
            path = '/' + code
            if f.get(path) is None:
                f.create(path)
            tool.merge_df_dataset(f[path], ktype, df)
            f.close()
    elif stype == conf.STYPE_ASHARE:
        # TODO (重要)，支持ip池并发获取，要不然多code的高频获取过于缓慢
        if rewrite:
            # backtest时，将读取的hist数据写回文件
            df = ts.get_hist_data(code,
                                  ktype=ktype,
                                  pause=conf.REQUEST_BLANK,
                                  start=start_date)
            if code.isdigit():
                f = h5py.File(conf.HDF5_FILE_SHARE, 'a')
                code_prefix = code[0:3]
                path = '/' + code_prefix + '/' + code
                df = df[tss.SHARE_COLS]
            else:
                f = h5py.File(conf.HDF5_FILE_INDEX, 'a')
                path = '/' + code
                df = df[tss.INDEX_COLS]
            df = df.reset_index().sort_values(by=[conf.HDF5_SHARE_DATE_INDEX])
            tool.merge_df_dataset(f[path], ktype, df)
            f.close
        else:
            # get_k_data无法获取换手率，但是在线监听时，get_k_data存在时间延迟
            df = ts.get_k_data(code,
                               ktype=ktype,
                               pause=conf.REQUEST_BLANK,
                               start=start_date)
        time.sleep(conf.REQUEST_BLANK)
    else:
        raise Exception("数据源不存在或未配置")

    if df is None and df.empty is True:
        raise Exception("无法获取" + code + "-" + ktype + ":" + start_date +
                        "以后的数据，休息30秒重新获取")
    return df

Exemple #7

0

Afficher le fichier

Fichier : error.py Projet : wangzhen429/evolution

def merge_batch():
    global index
    if index in batch_dict and batch_dict[index].empty is not True:
        f = h5py.File(conf.HDF5_FILE_ERROR)
        if f.get(index) is None:
            tool.create_df_dataset(f, index, batch_dict[index])
        else:
            tool.merge_df_dataset(f, index, batch_dict[index])
        batch_dict[index].drop(batch_dict[index].index, inplace=True)
    return

Exemple #8

0

Afficher le fichier

def get_sz_margins(f, reset_flag=False):
    """
    获取深市的融资融券
    """
    df = ts.sz_margins(pause=conf.REQUEST_BLANK)
    df = df.sort_values(by=["opDate"])
    if reset_flag is False:
        tool.merge_df_dataset(f, conf.HDF5_FUNDAMENTAL_SZ_MARGINS, df)
    else:
        tool.create_df_dataset(f, conf.HDF5_FUNDAMENTAL_SZ_MARGINS, df)
    return

Exemple #9

0

Afficher le fichier

def get_ipo(f, reset_flag=False):
    """
    获取ipo数据
    """
    df = ts.new_stocks(pause=conf.REQUEST_BLANK)
    df = df.drop("name", axis=1)
    df = df.sort_values(by=["ipo_date"])
    if reset_flag is False:
        tool.merge_df_dataset(f, conf.HDF5_FUNDAMENTAL_IPO, df)
    else:
        tool.create_df_dataset(f, conf.HDF5_FUNDAMENTAL_IPO, df)
    return

Exemple #10

0

Afficher le fichier

def code_detail(code_list, start_date):
    """
    将code的basic内容，整理至share文件下
    """
    # 获取basic所有日期的detail，并遍历读取详细信息
    f = h5py.File(conf.HDF5_FILE_BASIC, 'a')
    f_share = h5py.File(conf.HDF5_FILE_SHARE, 'a')
    console.write_head(conf.HDF5_OPERATE_ARRANGE, conf.HDF5_RESOURCE_TUSHARE,
                       conf.HDF5_BASIC_DETAIL)
    path = '/' + conf.HDF5_BASIC_DETAIL
    if f.get(path) is None:
        return

    code_basic_dict = dict()
    for date in f[path]:
        if start_date is not None and date < start_date:
            console.write_msg(start_date + "起始日期大于基本数据的最大日期")
            continue
        df = tool.df_from_dataset(f[path], date, None)
        df["code"] = df["code"].str.decode("utf-8")
        df = df.set_index("code")
        for code in df.index:
            if code not in code_list:
                continue

            if code not in code_basic_dict:
                code_basic_dict[code] = tool.init_empty_df(df.columns)
            code_basic_dict[code].loc[date] = df.loc[code, :]

    for code, code_df in code_basic_dict.items():
        code_df.index.name = conf.HDF5_SHARE_DATE_INDEX
        code_df = code_df.reset_index().sort_values(
            by=[conf.HDF5_SHARE_DATE_INDEX])

        code_prefix = code[0:3]
        code_group_path = '/' + code_prefix + '/' + code
        if f_share.get(code_group_path) is None:
            console.write_msg(code + "的detail文件不存在")
            continue

        if start_date is None:
            tool.delete_dataset(f_share[code_group_path],
                                conf.HDF5_BASIC_DETAIL)
        tool.merge_df_dataset(f_share[code_group_path], conf.HDF5_BASIC_DETAIL,
                              code_df)
        console.write_exec()
    console.write_blank()
    console.write_tail()
    f_share.close()
    f.close()
    return

Exemple #11

0

Afficher le fichier

Fichier : wrap.py Projet : wangzhen429/evolution

def all_index(init_flag=True):
    """
    整理指数的缠论k线
    """
    f = h5py.File(conf.HDF5_FILE_INDEX, 'a')
    for code in f:
        console.write_head(conf.HDF5_OPERATE_WRAP, conf.HDF5_RESOURCE_TUSHARE,
                           code)
        for ktype in conf.HDF5_SHARE_WRAP_KTYPE:
            if f[code].get(ktype) is None:
                continue
            share_df = tool.df_from_dataset(f[code], ktype, None)
            wrap_df = one_df(share_df)
            wrap_ds_name = conf.HDF5_INDEX_WRAP + "_" + ktype
            if init_flag is True:
                tool.delete_dataset(f[code], wrap_ds_name)
            if wrap_df is not None:
                tool.merge_df_dataset(f[code], wrap_ds_name, wrap_df)
        console.write_tail()
    f.close()
    return

Exemple #12

0

Afficher le fichier

Fichier : index.py Projet : wangzhen429/evolution

def all_share(omit_list, init_flag=True):
    """
    获取所有股票的macd与所处均线等指标
    """
    f = h5py.File(conf.HDF5_FILE_SHARE, 'a')
    for code_prefix in f:
        if code_prefix in omit_list:
            continue
        console.write_head(conf.HDF5_OPERATE_INDEX, conf.HDF5_RESOURCE_TUSHARE,
                           code_prefix)
        for code in f[code_prefix]:
            # 忽略停牌、退市、无法获取的情况
            if f[code_prefix][code].attrs.get(
                    conf.HDF5_BASIC_QUIT) is not None:
                continue
            if f[code_prefix][code].attrs.get(conf.HDF5_BASIC_ST) is not None:
                continue

            code_group_path = '/' + code_prefix + '/' + code
            for ktype in conf.HDF5_SHARE_KTYPE:
                try:
                    if f.get(code_group_path) is None or f[code_prefix][
                            code].get(ktype) is None:
                        console.write_msg(code + "-" + ktype + "的detail不存在")
                        continue
                    df = tool.df_from_dataset(f[code_prefix][code], ktype,
                                              None)
                    index_df = one_df(df, init_flag)
                    ds_name = conf.HDF5_INDEX_DETAIL + "_" + ktype
                    if init_flag is True:
                        tool.delete_dataset(f[code_prefix][code], ds_name)
                    tool.merge_df_dataset(f[code_prefix][code], ds_name,
                                          index_df.reset_index())
                except Exception as er:
                    print(str(er))
            console.write_exec()
        console.write_blank()
        console.write_tail()
    f.close()
    return

Exemple #13

0

Afficher le fichier

Fichier : index.py Projet : wangzhen429/evolution

def all_index(init_flag=True):
    """
    处理所有指数的均线与macd
    """
    f = h5py.File(conf.HDF5_FILE_INDEX, 'a')
    for code in f:
        console.write_head(conf.HDF5_OPERATE_INDEX, conf.HDF5_RESOURCE_TUSHARE,
                           code)
        for ktype in conf.HDF5_SHARE_KTYPE:
            if f[code].get(ktype) is None:
                console.write_msg(code + "-" + ktype + "的detail不存在")
                continue
            df = tool.df_from_dataset(f[code], ktype, None)
            index_df = one_df(df, init_flag)
            index_ds_name = conf.HDF5_INDEX_DETAIL + "_" + ktype
            if init_flag is True:
                tool.delete_dataset(f[code], index_ds_name)
            tool.merge_df_dataset(f[code], index_ds_name,
                                  index_df.reset_index())
        console.write_tail()
    f.close()
    return

Exemple #14

0

Afficher le fichier

def all_exec(omit_list):
    """
    筛选出存在背离的股票
    """
    # 筛选记录内容如下:
    # 1. 月线macd趋势
    # 2. 周线macd趋势
    # 3. 日线macd趋势，是否背离，数值差距
    # 4. 30min的macd趋势，是否背离，数值差距，震荡中枢数量
    # 5. 5min的macd趋势，是否背离，数值差距，震荡中枢数量
    console.write_head(conf.HDF5_OPERATE_SCREEN, conf.HDF5_RESOURCE_TUSHARE,
                       conf.STRATEGY_TREND_AND_REVERSE)
    f = h5py.File(conf.HDF5_FILE_SHARE, 'a')
    filter_df = tool.init_empty_df(_ini_filter_columns())
    for code_prefix in f:
        if code_prefix in omit_list:
            continue
        for code in f[code_prefix]:
            code_group_path = '/' + code_prefix + '/' + code
            if f.get(code_group_path) is None:
                console.write_blank()
                console.write_msg(code + "的tushare数据不存在")
                continue

            # 忽略停牌、退市、无法获取的情况
            if f[code_prefix][code].attrs.get(
                    conf.HDF5_BASIC_QUIT
            ) is not None or f[code_prefix][code].attrs.get(
                    conf.HDF5_BASIC_ST) is not None:
                console.write_blank()
                console.write_msg(code + "已退市或停牌")
                continue

            try:
                code_dict = code_exec(f, code)
                if code_dict is None:
                    console.write_pass()
                    continue
                else:
                    console.write_exec()
                    filter_df = filter_df.append(code_dict, ignore_index=True)
            except Exception as er:
                console.write_msg("[" + code + "]" + str(er))
    f.close()
    f_screen = h5py.File(conf.HDF5_FILE_SCREEN, 'a')
    if f_screen.get(conf.STRATEGY_TREND_AND_REVERSE) is None:
        f_screen.create_group(conf.STRATEGY_TREND_AND_REVERSE)
    if f_screen[conf.STRATEGY_TREND_AND_REVERSE].get(
            conf.SCREEN_SHARE_FILTER) is None:
        f_screen[conf.STRATEGY_TREND_AND_REVERSE].create_group(
            conf.SCREEN_SHARE_FILTER)
    today_str = tradetime.get_today()
    tool.delete_dataset(
        f_screen[conf.STRATEGY_TREND_AND_REVERSE][conf.SCREEN_SHARE_FILTER],
        today_str)
    tool.merge_df_dataset(
        f_screen[conf.STRATEGY_TREND_AND_REVERSE][conf.SCREEN_SHARE_FILTER],
        today_str, filter_df)
    f_screen.close()
    console.write_blank()
    console.write_tail()
    return

Exemple #15

0

Afficher le fichier

def mark_grade(today_str=None):
    """
    对筛选结果进行打分
    """
    console.write_head(conf.HDF5_OPERATE_SCREEN, conf.HDF5_RESOURCE_TUSHARE,
                       conf.SCREEN_SHARE_GRADE)
    f = h5py.File(conf.HDF5_FILE_SCREEN, 'a')
    f_share = h5py.File(conf.HDF5_FILE_SHARE, 'a')
    if today_str is None:
        today_str = tradetime.get_today()
    if f[conf.STRATEGY_TREND_AND_REVERSE][conf.SCREEN_SHARE_FILTER].get(
            today_str) is None:
        console.write_msg(today_str + "个股筛选结果不存在")
        return
    grade_df = tool.init_empty_df([
        "code", "status", "d_price_space", "d_price_per", "30_price_space",
        "30_price_per", "d_macd", "30_macd"
    ])
    screen_df = tool.df_from_dataset(
        f[conf.STRATEGY_TREND_AND_REVERSE][conf.SCREEN_SHARE_FILTER],
        today_str, None)
    screen_df["d_m_status"] = screen_df["d_m_status"].str.decode("utf-8")
    screen_df["w_m_status"] = screen_df["w_m_status"].str.decode("utf-8")
    screen_df["m_m_status"] = screen_df["m_m_status"].str.decode("utf-8")
    screen_df["code"] = screen_df["code"].str.decode("utf-8")
    for index, row in screen_df.iterrows():
        code = row["code"]
        grade_dict = dict()
        grade_dict["code"] = code
        grade_dict["status"] = 0
        grade_dict["status"] += _status_grade(row["d_m_status"])
        grade_dict["status"] += _status_grade(row["w_m_status"])
        grade_dict["status"] += _status_grade(row["m_m_status"])
        code_prefix = code[0:3]
        code_group_path = '/' + code_prefix + '/' + code
        for ktype in ["D", "30"]:
            detail_ds_name = ktype
            index_ds_name = conf.HDF5_INDEX_DETAIL + "_" + ktype
            if f_share[code_group_path].get(detail_ds_name) is None:
                console.write_msg(code + "的detail数据不存在")
                continue
            if f_share[code_group_path].get(index_ds_name) is None:
                console.write_msg(code + "的index数据不存在")
                continue
            detail_df = tool.df_from_dataset(f_share[code_group_path],
                                             detail_ds_name, None)
            index_df = tool.df_from_dataset(f_share[code_group_path],
                                            index_ds_name, None)
            latest_price = detail_df.tail(1)["close"].values[0]
            latest_macd = index_df.tail(1)["macd"].values[0]
            diverse_price_start = row[str.lower(ktype) +
                                      INDEX_MACD_DIVERSE_PRICE_START]
            if diverse_price_start == 0:
                grade_dict[str.lower(ktype) + "_price_space"] = 0
                grade_dict[str.lower(ktype) + "_price_per"] = 0
            else:
                grade_dict[str.lower(ktype) + "_price_space"] = round(
                    diverse_price_start - latest_price, 2)
                grade_dict[str.lower(ktype) + "_price_per"] = round(
                    grade_dict[str.lower(ktype) + "_price_space"] * 100 /
                    diverse_price_start, 2)
            grade_dict[str.lower(ktype) + "_macd"] = latest_macd
        grade_df = grade_df.append(grade_dict, ignore_index=True)
    if f[conf.STRATEGY_TREND_AND_REVERSE].get(conf.SCREEN_SHARE_GRADE) is None:
        f[conf.STRATEGY_TREND_AND_REVERSE].create_group(
            conf.SCREEN_SHARE_GRADE)
    tool.delete_dataset(
        f[conf.STRATEGY_TREND_AND_REVERSE][conf.SCREEN_SHARE_GRADE], today_str)
    tool.merge_df_dataset(
        f[conf.STRATEGY_TREND_AND_REVERSE][conf.SCREEN_SHARE_GRADE], today_str,
        grade_df)
    f_share.close()
    f.close()
    console.write_tail()
    return