Beispiel #1
0
def statistic_rank_dimension(df, groups, na_field, fields, is_sa=True, is_cube=False, sort_field='-avg_salary',
                             need_total=False):
    """
    排名
    """
    na_field = [na_field] if na_field else []
    fields = [fields] if fields else []
    all_fields = groups + na_field + fields
    for field in all_fields:
        df = df.filter(df[field].isNotNull())

    if is_cube:
        md_df = statistic_cube(df, groups, na_field, fields, is_sa)
    else:
        md_df = statistic_groups(df, groups, na_field, fields, is_sa)

    if need_total:
        md_df = add_total(md_df, *(groups + na_field))

    if fields:
        md_df = add_rank(md_df, *(groups + na_field), sort_field=sort_field)
    else:
        md_df = add_rank(md_df, *(groups[1:] + na_field), sort_field=sort_field)

    return md_df
Beispiel #2
0
def statistic_salary_dimension(df,
                               groups,
                               na_field,
                               fields,
                               is_sa=True,
                               is_cube=False,
                               is_age=False,
                               need_alias=False):
    """
    薪资分析
    :param df:
    :return:
    """
    na_field = [na_field] if na_field else []
    fields = fields.split(',') if fields else []
    all_fields = groups + na_field + fields
    for f in all_fields:
        df = df.filter(df[f].isNotNull())
    if is_age:
        df = filter_age(df).withColumn("age", F.udf(str)(df.age))
    if is_cube:
        md_df = statistic_cube(df, groups, na_field, fields, is_sa)
    else:
        md_df = statistic_groups(df, groups, na_field, fields, is_sa)

    if need_alias:
        md_df = md_df.join(get_position_industry(df), "position_name")
        md_df = md_df.join(get_position_alias(df), "position_name")
    return md_df
Beispiel #3
0
def statistic_position_address_change(df,
                                      groups,
                                      na_field,
                                      fields,
                                      is_sa=False):
    """
    职位延续性分析
    :param df:
    :return:
    """
    na_field = [na_field] if na_field else []
    fields = [fields] if fields else []
    all_fields = groups + na_field + fields
    df = df.select(all_fields[0], all_fields[1], "resume_id", "work_index")

    df = next_same(df, groups[0])
    md_df = statistic_cube(df, groups, na_field, fields, is_sa)
    md_df = add_rank(md_df, *(groups[1:] + na_field), sort_field='-person_num')
    return md_df
Beispiel #4
0
def statistic_number_dimension(df,
                               groups,
                               na_field,
                               fields,
                               is_sa=False,
                               is_cube=False):
    """
    人数(权重)分析
    :param df:
    :return:
    """
    na_field = [na_field] if na_field else []
    fields = fields.split(',') if fields else []
    all_fields = groups + na_field + fields
    for f in all_fields:
        df = df.filter(df[f].isNotNull())
    if is_cube:
        md_df = statistic_cube(df, groups, na_field, fields, is_sa)
    else:
        md_df = statistic_groups(df, groups, na_field, fields, is_sa)

    return md_df