Example #1
0
def etl_fact_market(*args):
    """fact_market表主函数
    
    :param args: 按位参数engine_zone_macro,engine_draw,engine_target
    """
    # 初始化 extract,transform和load三个对象
    extract = Extract(engine_zone_macro, engine_draw, engine_target)
    transform = Transform()
    load = Load(engine_target)

    # 抽取已经经过etl的商圈
    done_market = extract.done_market()
    df_tag_counts = extract.tag_counts()
    df_industry = extract.industry()
    has_dealed = []

    for i, sample_tag_counts in df_tag_counts.iterrows():

        grandParentId = sample_tag_counts['grandParentId']
        if len(grandParentId) != 36:  # 判断grandParentId的有效性
            logging.warning('Round %d, %s is invalid ,skipped.' %
                            (i, grandParentId))
            continue

        elif grandParentId in done_market:  # 判断该商圈是已经经过etl
            logging.warning('Round %d, %s etl before' % (i, grandParentId))
            continue

        if grandParentId in has_dealed:
            logging.warning('Round %d, %s etl before' % (i, grandParentId))
            continue
        else:
            has_dealed.append(grandParentId)

        # 抽取数据
        zone_grandparent = extract.zone_grandparent(grandParentId)
        if len(zone_grandparent) == 0:
            logging.warning('Round %d, has no draw samples' % i)
            continue
        rent = extract.rent_details(grandParentId)
        industry_tmp = df_industry[df_industry['grandParentId'] ==
                                   grandParentId]
        # 转换数据
        rent = transform.rent_calculate(rent)
        industry_dict = transform.reshape_industry(industry_tmp)
        # 组合数据
        clean = transform.compile_dfs(sample_tag_counts, rent, industry_dict,
                                      zone_grandparent)
        try:
            load.loading(clean)
            logging.info('Round %d, %s etl secceed' % (i, grandParentId))
        except Exception as e:
            logging.error('Round %d, %s' % (i, e))
Example #2
0
def etl_fact_market(source_engine, target_engine, rec_path):

    extract = Extract(source_engine, target_engine)
    transform = Transform()
    load = Load(target_engine)
    record = Record('rec.cfg')

    start_params = record.get_record()
    unique_marketguid = []
    done_market = []
    has_dealed = []

    for i, grandParentId in enumerate(unique_marketguid):

        if len(grandParentId) != 36:  # 判断grandParentId的有效性
            logging.error('Round %d, %s is not valid.' % (i, grandParentId))
            continue

        elif grandParentId in done_market:  # 判断该商圈是已经经过etl
            logging.warning('Round %d, %s etl before' % (i, grandParentId))
            continue

        if grandParentId in has_dealed:
            logging.warning('Round %d, %s etl before' % (i, grandParentId))
            continue
        else:
            has_dealed.append(grandParentId)

        zone_grandparent = extract.zone_grandparent(grandParentId)
        if len(zone_grandparent) == 0:
            logging.warning('Round %d, has no draw samples' % i)
            continue

        rent = extract.rent_details(grandParentId)
        industry_tmp = industry[industry['grandParentId'] == grandParentId]
        # 转换数据
        rent = transform.rent_calculate(rent)
        industry_dict = transform.reshape_industry(industry_tmp)
        # 组合数据
        clean = transform.compile_dfs(sample_tag_counts, rent, industry_dict,
                                      zone_grandparent)
        try:
            load.loading(clean)
        except Exception as e:
            logging.error('Round %d, %s' % (i, e))
Example #3
0
def market_to_api2(source, target, record_file='api2.record'):
    """anti_fraud数据库api2表的etl主函数
    
    :param source: 源数据库引擎
    :param target: 目标数据库引擎
    :param record_file: 负责记录装载id的文件名,默认为 app2.record
    """
    # 初始化对象
    extract = Extract(source, target, record_file)
    transform = Transform()
    load = Load(target, record_file)

    # 抽取数据
    market_df = extract.market()
    draw_samples = extract.draw_samples()

    # 转换数据
    reshaped_market = transform.reshape_market(market_df)
    aggregated_samples = transform.aggregate_from_samples(draw_samples)
    api2_df = transform.compile_dfs(reshaped_market, aggregated_samples)

    # 装载数据
    load.loading(api2_df)