Python split_chunk Exemples, ibats_utils.mess.split_chunk Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : to_sqlite.py Projet : ccliuyang/data_integration_celery

def tushare_to_sqlite_batch(file_name, table_name, field_pair_list, batch_size=500, **kwargs):
    """
    将Mysql数据导入到sqlite，全量读取然后导出
    速度适中，可更加 batch_size 调剂对内存的需求
    :param file_name:
    :param table_name:
    :param field_pair_list:
    :param batch_size:
    :param **kwargs:
    :return:
    """
    logger.info('mysql %s 导入到 sqlite %s 开始', table_name, file_name)
    sqlite_db_folder_path = get_folder_path('sqlite_db', create_if_not_found=False)
    db_file_path = os.path.join(sqlite_db_folder_path, file_name)
    conn = sqlite3.connect(db_file_path)
    sql_str = f"select ts_code from {table_name} group by ts_code"
    with with_db_session(engine_md) as session:
        table = session.execute(sql_str)
        code_list = list([row[0] for row in table.fetchall()])

    code_count, data_count, num = len(code_list), 0, 0
    for code_sub_list in split_chunk(code_list, batch_size):
        in_clause = ", ".join([r'%s' for _ in code_sub_list])
        sql_str = f"select * from {table_name} where ts_code in ({in_clause})"
        df_tot = pd.read_sql(sql_str, engine_md, params=code_sub_list)
        # 对 fields 进行筛选及重命名
        if field_pair_list is not None:
            field_list = [_[0] for _ in field_pair_list]
            field_list.append('ts_code')
            df_tot = df_tot[field_list].rename(columns=dict(field_pair_list))

        dfg = df_tot.groupby('ts_code')
        for num, (ts_code, df) in enumerate(dfg, start=num + 1):
            code_exchange = ts_code.split('.')
            sqlite_table_name = f"{code_exchange[1]}{code_exchange[0]}"
            df_len = df.shape[0]
            data_count += df_len
            logger.debug('%4d/%d) mysql %s -> sqlite %s %s %d 条记录',
                         num, code_count, table_name, file_name, sqlite_table_name, df_len)
            df.drop('ts_code', axis=1, inplace=True)
            df.to_sql(sqlite_table_name, conn, index=False, if_exists='replace')

    logger.info('mysql %s 导入到 sqlite %s 结束，导出数据 %d 条', table_name, file_name, data_count)

Exemple #2

0

Afficher le fichier

def import_future_info(chain_param=None):
    """
    更新期货合约列表信息
    :param chain_param: 该参数仅用于 task.chain 串行操作时，上下传递参数使用
    :return:
    """
    table_name = 'ifind_future_info'
    has_table = engine_md.has_table(table_name)
    logger.info("更新 %s [%s] 开始", table_name, has_table)
    # 获取已存在合约列表
    if has_table:
        sql_str = f'SELECT ths_code, ths_start_trade_date_future FROM {table_name}'
        with with_db_session(engine_md) as session:
            table = session.execute(sql_str)
            code_ipo_date_dic = dict(table.fetchall())
        exchange_latest_ipo_date_dic = get_exchange_latest_data()
    else:
        code_ipo_date_dic = {}
        exchange_latest_ipo_date_dic = {}

    exchange_sectorid_dic_list = [
        {
            'exch_eng': 'SHFE',
            'exchange_name': '上海期货交易所',
            'sectorid': '091001',
            'date_establish': '1995-05-10'
        },
        {
            'exch_eng': 'CFFEX',
            'exchange_name': '中国金融期货交易所',
            'sectorid': '091004',
            'date_establish': '2013-09-10'
        },
        {
            'exch_eng': 'DCE',
            'exchange_name': '大连商品交易所',
            'sectorid': '091002',
            'date_establish': '1999-01-10'
        },
        {
            'exch_eng': 'CZCE',
            'exchange_name': '郑州商品交易所',
            'sectorid': '091003',
            'date_establish': '1999-01-10'
        },
    ]

    # 字段列表及参数
    indicator_param_list = [
        ('ths_future_short_name_future', '', String(50)),
        ('ths_future_code_future', '', String(20)),
        ('ths_sec_type_future', '', String(20)),
        ('ths_td_variety_future', '', String(20)),
        ('ths_td_unit_future', '', DOUBLE),
        ('ths_pricing_unit_future', '', String(20)),
        ('ths_mini_chg_price_future', '', DOUBLE),
        ('ths_chg_ratio_lmit_future', '', DOUBLE),
        ('ths_td_deposit_future', '', DOUBLE),
        ('ths_start_trade_date_future', '', Date),
        ('ths_last_td_date_future', '', Date),
        ('ths_last_delivery_date_future', '', Date),
        ('ths_delivery_month_future', '', String(10)),
        ('ths_listing_benchmark_price_future', '', DOUBLE),
        ('ths_initial_td_deposit_future', '', DOUBLE),
        ('ths_contract_month_explain_future', '', String(120)),
        ('ths_td_time_explain_future', '', String(120)),
        ('ths_last_td_date_explian_future', '', String(120)),
        ('ths_delivery_date_explain_future', '', String(120)),
        ('ths_exchange_short_name_future', '', String(50)),
        ('ths_contract_en_short_name_future', '', String(50)),
        ('ths_contract_en_name_future', '', String(50)),
    ]
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in indicator_param_list], sep=';')

    # 设置 dtype
    dtype = {key: val for key, _, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    dtype['exch_eng'] = String(20)

    # 获取合约列表
    code_set = set()
    ndays_per_update = 90
    # 获取历史期货合约列表信息
    sector_count = len(exchange_sectorid_dic_list)
    for num, exchange_sectorid_dic in enumerate(exchange_sectorid_dic_list,
                                                start=1):
        exchange_name = exchange_sectorid_dic['exchange_name']
        exch_eng = exchange_sectorid_dic['exch_eng']
        sector_id = exchange_sectorid_dic['sectorid']
        date_establish = exchange_sectorid_dic['date_establish']
        # 计算获取合约列表的起始日期
        date_since = str_2_date(
            exchange_latest_ipo_date_dic.setdefault(exch_eng, date_establish))
        date_yestoday = date.today() - timedelta(days=1)
        logger.info("%d/%d) %s[%s][%s] %s ~ %s", num, sector_count,
                    exchange_name, exch_eng, sector_id, date_since,
                    date_yestoday)
        while date_since <= date_yestoday:
            date_since_str = date_2_str(date_since)
            # #数据池-板块_板块成分-日期;同花顺代码;证券名称;当日行情端证券名称(仅股票节点有效)-iFinD数据接口
            # 获取板块成分（期货商品的合约）
            # THS_DP('block','2021-01-15;091002003','date:Y,thscode:Y,security_name:Y,security_name_in_time:Y')
            try:
                future_info_df = invoker.THS_DataPool(
                    'block', '%s;%s' % (date_since_str, sector_id),
                    'thscode:Y,security_name:Y')
            except APIError as exp:
                if exp.ret_dic['error_code'] in (
                        -4001,
                        -4210,
                ):
                    future_info_df = None
                else:
                    logger.exception("THS_DataPool %s 获取失败, '%s;%s'",
                                     exchange_name, date_since_str, sector_id)
                    break
            # if future_info_df is None or future_info_df.shape[0] == 0:
            #     break
            if future_info_df is not None and future_info_df.shape[0] > 0:
                code_set |= set(future_info_df['THSCODE'])

            if date_since >= date_yestoday:
                break
            else:
                date_since += timedelta(days=ndays_per_update)
                if date_since > date_yestoday:
                    date_since = date_yestoday

        if DEBUG:
            break

    # 获取合约列表
    code_list = [wc for wc in code_set if wc not in code_ipo_date_dic]
    # 获取合约基本信息
    if len(code_list) > 0:
        for code_list in split_chunk(code_list, 500):
            future_info_df = invoker.THS_BasicData(code_list, json_indicator,
                                                   json_param)
            if future_info_df is None or future_info_df.shape[0] == 0:
                data_count = 0
                logger.warning("更新 %s 结束 %d 条记录被更新", table_name, data_count)
            else:
                # 补充 exch_eng 字段
                future_info_df['exch_eng'] = ''
                for exchange_sectorid_dic in exchange_sectorid_dic_list:
                    future_info_df['exch_eng'][
                        future_info_df['ths_exchange_short_name_future'] ==
                        exchange_sectorid_dic[
                            'exchange_name']] = exchange_sectorid_dic[
                                'exch_eng']

                data_count = bunch_insert_on_duplicate_update(
                    future_info_df,
                    table_name,
                    engine_md,
                    dtype,
                    primary_keys=['ths_code'],
                    schema=config.DB_SCHEMA_MD)
                logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count)

Exemple #3

0

Afficher le fichier

def import_stock_info_hk(chain_param=None, refresh=False):
    """
    获取全市场股票代码及名称 导入 港股股票信息 到 wind_stock_info_hk
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param refresh: 默认为False，True 则进行全部更新
    :return:
    """
    table_name = 'wind_stock_info_hk'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    param_list = [
        ('sec_name', String(20)),
        ('trade_code', String(20)),
        ('ipo_date', Date),
        ('delist_date', Date),
        ('mkt', String(20)),
        ('exch_city', String(20)),
        ('exch_eng', String(20)),
        ('prename', String(2000)),
    ]
    # 获取列属性名，以逗号进行分割
    param = ",".join([key for key, _ in param_list])
    rename_col_dic = {key.upper(): key.lower() for key, _ in param_list}
    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    if refresh:
        date_fetch = DATE_BASE
    else:
        date_fetch = date.today()
    date_end = date.today()
    stock_code_set = set()
    while date_fetch < date_end:
        stock_code_set_sub = get_stock_code_set(date_fetch)
        if stock_code_set_sub is not None:
            stock_code_set |= stock_code_set_sub
        date_fetch += timedelta(days=365)
    stock_code_set_sub = get_stock_code_set(date_end)
    if stock_code_set_sub is not None:
        stock_code_set |= stock_code_set_sub
    # 获取股票对应上市日期，及摘牌日期
    # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng")
    stock_code_list = list(stock_code_set)
    seg_count = 1000
    stock_info_df_list = []
    for stock_code_list_sub in split_chunk(stock_code_list, seg_count):
        # 尝试将 stock_code_list_sub 直接传递给wss，是否可行
        # stock_info_df = invoker.wss(stock_code_list_sub,
        #                             "sec_name,trade_code,ipo_date,delist_date,mkt,exch_city,exch_eng,prename")
        # 获取接口文档数据信息
        stock_info_df = invoker.wss(stock_code_list_sub, param)
        stock_info_df_list.append(stock_info_df)
        if DEBUG:
            break
    stock_info_all_df = pd.concat(stock_info_df_list)
    stock_info_all_df.index.rename('wind_code', inplace=True)
    stock_info_all_df.rename(columns=rename_col_dic, inplace=True)
    logging.info('%d data will be import', stock_info_all_df.shape[0])
    stock_info_all_df.reset_index(inplace=True)

    # data_list = list(stock_info_all_df.T.to_dict().values())
    #  sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)".format(
    #      table_name=table_name
    #  )
    # # sql_str = "insert INTO wind_stock_info_hk (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)"
    #  with with_db_session(engine_md) as session:
    #      session.execute(sql_str, data_list)
    #      stock_count = session.execute('select count(*) from {table_name}'.format(table_name=table_name)).first()[0]
    # 创建表格数据
    data_count = bunch_insert_on_duplicate_update(stock_info_all_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)

Exemple #4

0

Afficher le fichier

Fichier : to_sqlite.py Projet : Kunkka666/data_integration_celery

def tushare_to_sqlite_batch(file_name,
                            table_name,
                            field_pair_list,
                            batch_size=500,
                            sort_by='trade_date',
                            clean_old_file_first=True,
                            **kwargs):
    """
    将Mysql数据导入到sqlite，全量读取然后导出
    速度适中，可更加 batch_size 调剂对内存的需求
    :param file_name:
    :param table_name:
    :param field_pair_list:
    :param batch_size:
    :param sort_by:
    :param clean_old_file_first:
    :param kwargs:
    :return:
    """
    logger.info('mysql %s 导入到 sqlite %s 开始', table_name, file_name)
    sqlite_db_folder_path = get_folder_path('sqlite_db',
                                            create_if_not_found=False)
    db_file_path = os.path.join(sqlite_db_folder_path, file_name)
    # 删除历史文件——可以提上导入文件速度
    if clean_old_file_first and os.path.exists(
            db_file_path) and os.path.isfile(db_file_path):
        os.remove(db_file_path)

    conn = sqlite3.connect(db_file_path)
    # 对 fields 进行筛选及重命名
    if field_pair_list is not None:
        field_list = [_[0] for _ in field_pair_list]
        field_list.append('ts_code')
        field_pair_dic = dict(field_pair_list)
        sort_by = field_pair_dic[sort_by] if sort_by is not None else None
    else:
        field_list = None
        field_pair_dic = None

    if table_name == 'tushare_stock_index_daily_md':
        # tushare_stock_index_daily_md 表处理方式有些特殊
        ts_code_sqlite_table_name_dic = {
            # "": "CBIndex",  #
            "h30024.CSI": "CYBZ",  # 中证800保险
            "399300.SZ": "HS300",  # 沪深300
            "000016.SH": "HS50",  # 上证50
            "399905.SZ": "HS500",  # 中证500
            "399678.SZ": "SCXG",  # 深次新股
            "399101.SZ": "ZXBZ",  # 中小板综
        }
        code_list = [_ for _ in ts_code_sqlite_table_name_dic.keys()]
        in_clause = ", ".join([r'%s' for _ in code_list])
        sql_str = f"select * from {table_name} where ts_code in ({in_clause})"
        df_tot = pd.read_sql(sql_str, engine_md, params=code_list)
        # 对 fields 进行筛选及重命名
        if field_pair_dic is not None:
            df_tot = df_tot[field_list].rename(columns=field_pair_dic)

        dfg = df_tot.groupby('ts_code')
        code_count, data_count = len(code_list), 0
        for num, (ts_code, df) in enumerate(dfg, start=1):
            sqlite_table_name = ts_code_sqlite_table_name_dic[ts_code]
            df_len = df.shape[0]
            data_count += df_len
            logger.debug('%2d/%d) mysql %s -> sqlite %s %s %d 条记录', num,
                         code_count, table_name, file_name, sqlite_table_name,
                         df_len)
            df = df.drop('ts_code', axis=1)
            # 排序
            if sort_by is not None:
                df = df.sort_values(sort_by)

            df.to_sql(sqlite_table_name,
                      conn,
                      index=False,
                      if_exists='replace')
    else:
        # 非 tushare_stock_index_daily_md 表
        sql_str = f"select ts_code from {table_name} group by ts_code"
        with with_db_session(engine_md) as session:
            table = session.execute(sql_str)
            code_list = list([row[0] for row in table.fetchall()])

        code_count, data_count, num = len(code_list), 0, 0
        for code_sub_list in split_chunk(code_list, batch_size):
            in_clause = ", ".join([r'%s' for _ in code_sub_list])
            sql_str = f"select * from {table_name} where ts_code in ({in_clause})"
            df_tot = pd.read_sql(sql_str, engine_md, params=code_sub_list)
            # 对 fields 进行筛选及重命名
            if field_pair_dic is not None:
                df_tot = df_tot[field_list].rename(columns=field_pair_dic)

            dfg = df_tot.groupby('ts_code')
            for num, (ts_code, df) in enumerate(dfg, start=num + 1):
                code_exchange = ts_code.split('.')
                sqlite_table_name = f"{code_exchange[1]}{code_exchange[0]}"
                df_len = df.shape[0]
                data_count += df_len
                logger.debug('%4d/%d) mysql %s -> sqlite %s %s %d 条记录', num,
                             code_count, table_name, file_name,
                             sqlite_table_name, df_len)
                df = df.drop('ts_code', axis=1)
                # 排序
                if sort_by is not None:
                    df = df.sort_values(sort_by)

                df.to_sql(sqlite_table_name,
                          conn,
                          index=False,
                          if_exists='replace')

    logger.info('mysql %s 导入到 sqlite %s 结束，导出数据 %d 条', table_name, file_name,
                data_count)

Exemple #5

0

Afficher le fichier

Fichier : summary.py Projet : IBATS/IBATS_Common

def df_2_table(doc,
               df,
               format_by_index=None,
               format_by_col=None,
               max_col_count=None,
               mark_top_n=None,
               mark_top_n_on_cols=None):
    """

    :param doc:
    :param df:
    :param format_by_index: 按索引格式化
    :param format_by_col: 按列格式化
    :param max_col_count: 每行最大列数（不包括索引）
    :param mark_top_n: 标记 top N
    :param mark_top_n_on_cols: 选择哪些列标记 top N，None 代表不标记
    :return:
    """
    if max_col_count is None:
        max_col_count = df.shape[1]

    if mark_top_n is not None:
        if mark_top_n_on_cols is not None:
            rank_df = df[mark_top_n_on_cols]
        else:
            rank_df = df
        rank_df = rank_df.rank(ascending=False)
        is_in_rank_df = rank_df <= mark_top_n
    else:
        is_in_rank_df = None

    for table_num, col_name_list in enumerate(
            split_chunk(list(df.columns), max_col_count)):
        if table_num > 0:
            # 如果是换行写入第二、三、四。。个表格，先打一个空行
            doc.add_paragraph('')

        sub_df = df[col_name_list]
        row_num, col_num = sub_df.shape
        t = doc.add_table(row_num + 1, col_num + 1)

        # write head
        # col_name_list = list(sub_df.columns)
        for j in range(col_num):
            # t.cell(0, j).text = df.columns[j]
            # paragraph = t.cell(0, j).add_paragraph()
            paragraph = t.cell(0, j + 1).paragraphs[0]
            paragraph.add_run(str(col_name_list[j])).bold = True
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

        # write head bg color
        for j in range(col_num + 1):
            # t.cell(0, j).text = df.columns[j]
            t.cell(0, j)._tc.get_or_add_tcPr().append(
                parse_xml(r'<w:shd {} w:fill="00A2E8"/>'.format(nsdecls('w'))))

        # format table style to be a grid
        t.style = 'TableGrid'

        # populate the table with the dataframe
        for i in range(row_num):
            index = sub_df.index[i]
            paragraph = t.cell(i + 1, 0).paragraphs[0]
            index_str = str(date_2_str(index))
            paragraph.add_run(index_str).bold = True
            paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
            if format_by_index is not None and index in format_by_index:
                format_row = format_by_index[index]
            else:
                format_row = None

            for j in range(col_num):
                col_name = col_name_list[j]
                if format_row is None and format_by_col is not None and col_name in format_by_col:
                    format_cell = format_by_col[col_name]
                else:
                    format_cell = format_row

                content = sub_df.values[i, j]
                if format_cell is None:
                    text = str(content)
                elif isinstance(format_cell, str):
                    text = str.format(format_cell, content)
                elif callable(format_cell):
                    text = format_cell(content)
                else:
                    raise ValueError('%s: %s 无效', index, format_cell)

                paragraph = t.cell(i + 1, j + 1).paragraphs[0]
                paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                try:
                    style = paragraph.add_run(text)
                    if is_in_rank_df is not None and col_name in is_in_rank_df and is_in_rank_df.loc[
                            index, col_name]:
                        style.font.color.rgb = RGBColor(0xed, 0x1c, 0x24)
                        style.bold = True
                except TypeError as exp:
                    logger.exception('df.iloc[%d, %d] = df["%s", "%s"] = %s',
                                     i, j, index, col_name, text)
                    raise exp from exp

        for i in range(1, row_num + 1):
            for j in range(col_num + 1):
                if i % 2 == 0:
                    t.cell(i, j)._tc.get_or_add_tcPr().append(
                        parse_xml(r'<w:shd {} w:fill="A3D9EA"/>'.format(
                            nsdecls('w'))))

Exemple #6

0

Afficher le fichier

def import_cb_info(chain_param=None, first_time=False):
    """
    获取全市场可转债数据
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param first_time: 第一次执行时将从 1999 年开始查找全部基本信息
    :return: 
    """
    table_name = 'wind_convertible_bond_info'
    has_table = engine_md.has_table(table_name)
    name_param_list = [
        ('trade_code', DOUBLE),
        ('fullname', String(45)),
        ('sec_name', String(45)),
        ('issue_announcement', Date),
        ('ipo_date', Date),
        ('clause_conversion_2_swapsharestartdate', Date),
        ('clause_conversion_2_swapshareenddate', Date),
        ('clause_conversion_code', DOUBLE),
        ('clause_interest_5', String(8)),
        ('clause_interest_8', String(8)),
        ('clause_interest_6', String(200)),
        ('clause_interest_compensationinterest', DOUBLE),
        ('clause_interest_compensationinterest', DOUBLE),
        ('issueamount', DOUBLE),
        ('term', DOUBLE),
        ('underlyingcode', String(20)),
        ('underlyingname', String(20)),
        ('redemption_beginning', Date),
    ]
    param = ",".join([key for key, _ in name_param_list])
    # 设置dtype类型
    dtype = {key: val for key, val in name_param_list}
    dtype['wind_code'] = String(20)
    #
    if first_time:
        date_since = datetime.strptime('1999-01-01', STR_FORMAT_DATE).date()
        date_list = []
        one_year = timedelta(days=365)
        while date_since < date.today() - ONE_DAY:
            date_list.append(date_since)
            date_since += one_year
        else:
            date_list.append(date.today() - ONE_DAY)
    else:
        date_list = [date.today() - ONE_DAY]

    # 获取 wind_code 集合
    wind_code_set = set()
    for fetch_date in date_list:
        data_set = get_cb_set(fetch_date)
        if data_set is not None:
            wind_code_set |= data_set

    # 获取股票对应上市日期，及摘牌日期
    # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng")
    wind_code_list = list(wind_code_set)
    # wind_code_count = len(wind_code_list)
    # seg_count = 1000
    # loop_count = math.ceil(float(wind_code_count) / seg_count)
    data_info_df_list = []
    try:
        for sub_list in split_chunk(wind_code_list, 1000):
            # num_start = n * seg_count
            # num_end = (n + 1) * seg_count
            # # num_end = num_end if num_end <= wind_code_count else wind_code_count
            # sub_list = wind_code_list[n:(n + seg_count)]
            # 尝试将 stock_code_list_sub 直接传递给wss，是否可行
            data_df = invoker.wss(sub_list, param, "unit=1")
            if data_df is not None and data_df.shape[0] > 0:
                data_info_df_list.append(data_df)

            # 仅仅调试时使用
            if DEBUG and len(data_info_df_list) > 1:
                break
    finally:
        if len(data_info_df_list) > 0:
            data_info_all_df = pd.concat(data_info_df_list)
            data_info_all_df.index.rename('wind_code', inplace=True)
            data_info_all_df.rename(columns={col: col.lower() for col in data_info_all_df.columns}, inplace=True)
            logging.info('%d data will be import', data_info_all_df.shape[0])
            data_info_all_df.reset_index(inplace=True)
            data_count = bunch_insert_on_duplicate_update(data_info_all_df, table_name, engine_md, dtype=dtype)
            # logging.info("%d stocks have been in %s", len(data_info_all_df), table_name)
            logging.info("更新 %s 完成 新增数据 %d 条", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
            # 更新 code_mapping 表
            if engine_md.has_table(table_name):
                update_from_info_table(table_name)

Exemple #7

0

Afficher le fichier

def import_wind_stock_info(chain_param=None, refresh=False):
    """
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param refresh:获取全市场股票代码及名称
    :return:
    """
    table_name = 'wind_stock_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    wind_indicator_param_list = [
        ('sec_name', String(20)),
        ('trade_code', String(20)),
        ('ipo_date', Date),
        ('delist_date', Date),
        ('mkt', String(20)),
        ('exch_city', String(20)),
        ('exch_eng', String(20)),
        ('prename', String(2000)),
    ]
    # 获取列属性名，以逗号进行分割 "ipo_date,trade_code,mkt,exch_city,exch_eng"
    param = ",".join([key for key, _ in wind_indicator_param_list])
    # 设置 dtype
    dtype = {key: val for key, val in wind_indicator_param_list}
    dtype['wind_code'] = String(20)
    if refresh:
        date_fetch = datetime.strptime('2005-1-1', STR_FORMAT_DATE).date()
    else:
        date_fetch = date.today()
    date_end = date.today()
    stock_code_set = set()
    # 对date_fetch 进行一个判断，获取stock_code_set
    while date_fetch < date_end:
        stock_code_set_sub = get_stock_code_set(date_fetch)
        if stock_code_set_sub is not None:
            stock_code_set |= stock_code_set_sub
        date_fetch += timedelta(days=365)
    stock_code_set_sub = get_stock_code_set(date_end)
    if stock_code_set_sub is not None:
        stock_code_set |= stock_code_set_sub
    # 获取股票对应上市日期，及摘牌日期
    # w.wss("300005.SZ,300372.SZ,000003.SZ", "ipo_date,trade_code,mkt,exch_city,exch_eng")
    stock_code_list = list(stock_code_set)
    seg_count = 1000
    stock_info_df_list = []

    # 进行循环遍历获取stock_code_list_sub
    for stock_code_list_sub in split_chunk(stock_code_list, seg_count):
        # 尝试将 stock_code_list_sub 直接传递给wss，是否可行
        stock_info_df = invoker.wss(stock_code_list_sub, param)
        stock_info_df_list.append(stock_info_df)
        if DEBUG:
            break
    # 对数据表进行规范整理.整合,索引重命名
    stock_info_all_df = pd.concat(stock_info_df_list)
    stock_info_all_df.index.rename('wind_code', inplace=True)
    logging.info('%d data will be import', stock_info_all_df.shape[0])
    stock_info_all_df.reset_index(inplace=True)
    # data_list = list(stock_info_all_df.T.to_dict().values())
    # 对wind_stock_info表进行数据插入
    # sql_str = "REPLACE INTO {table_name} (wind_code, trade_code, sec_name, ipo_date, delist_date, mkt, exch_city, exch_eng, prename) values (:WIND_CODE, :TRADE_CODE, :SEC_NAME, :IPO_DATE, :DELIST_DATE, :MKT, :EXCH_CITY, :EXCH_ENG, :PRENAME)"
    # 事物提交执行更新
    # with with_db_session(engine_md) as session:
    #     session.execute(sql_str, data_list)
    #     data_count = session.execute('select count(*) from {table_name}').scalar()
    data_count = bunch_insert_on_duplicate_update(stock_info_all_df,
                                                  table_name,
                                                  engine_md,
                                                  dtype=dtype)
    logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    # 更新 code_mapping 表
    update_from_info_table(table_name)

Exemple #8

0

Afficher le fichier

def df_2_table(doc, df, format_by_index=None, format_by_col=None, max_col_count=None):
    if max_col_count is None:
        max_col_count = df.shape[1]

    for table_num, col_name_list in enumerate(split_chunk(list(df.columns), max_col_count)):
        if table_num > 0:
            # 如果是换行写入第二、三、四。。个表格，先打一个空行
            doc.add_paragraph('')

        sub_df = df[col_name_list]
        row_num, col_num = sub_df.shape
        t = doc.add_table(row_num + 1, col_num + 1)

        # Highlight all cells limegreen (RGB 32CD32) if cell contains text "0.5"
        from docx.oxml.ns import nsdecls
        from docx.oxml import parse_xml
        from docx.enum.text import WD_ALIGN_PARAGRAPH

        # write head
        # col_name_list = list(sub_df.columns)
        for j in range(col_num):
            # t.cell(0, j).text = df.columns[j]
            # paragraph = t.cell(0, j).add_paragraph()
            paragraph = t.cell(0, j + 1).paragraphs[0]
            paragraph.add_run(str(col_name_list[j])).bold = True
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

        # write head bg color
        for j in range(col_num + 1):
            # t.cell(0, j).text = df.columns[j]
            t.cell(0, j)._tc.get_or_add_tcPr().append(
                parse_xml(r'<w:shd {} w:fill="00A2E8"/>'.format(nsdecls('w'))))

        # format table style to be a grid
        t.style = 'TableGrid'

        # populate the table with the dataframe
        for i in range(row_num):
            index = sub_df.index[i]
            paragraph = t.cell(i + 1, 0).paragraphs[0]
            index_str = str(date_2_str(index))
            paragraph.add_run(index_str).bold = True
            paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
            if format_by_index is not None and index in format_by_index:
                formater = format_by_index[index]
            else:
                formater = None

            for j in range(col_num):
                if formater is None and format_by_col is not None and col_name_list[j] in format_by_col:
                    formater = format_by_col[col_name_list[j]]

                content = sub_df.values[i, j]
                if formater is None:
                    text = str(content)
                elif isinstance(formater, str):
                    text = str.format(formater, content)
                elif callable(formater):
                    text = formater(content)
                else:
                    raise ValueError('%s: %s 无效', index, formater)

                paragraph = t.cell(i + 1, j + 1).paragraphs[0]
                paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                paragraph.add_run(text)

        for i in range(1, row_num + 1):
            for j in range(col_num + 1):
                if i % 2 == 0:
                    t.cell(i, j)._tc.get_or_add_tcPr().append(
                        parse_xml(r'<w:shd {} w:fill="A3D9EA"/>'.format(nsdecls('w'))))