Ejemplo n.º 1
0
def summarize_attributes(
    in_fc, group_fields, agg_cols, consolidations=None, melt_col=None
):
    """
    Helper function to perform summarizations of input feature class defined by the
    group, agg, consolidate, and melt columns/objects provided

    Args:
        in_fc (str): path to feature class, typically this will be the result of an
            intersection of a summary fc and disaggregated fc
        group_fields (list): list of Column objects with optional rename attribute
        agg_cols (list): list of AggColumn objects with optional agg_method and rename attributes
        consolidations (list): list of Consolidation objects with optional consolidation method attribute
        melt_col (list): list of MeltColumn objects with optional agg_method, default value, and
            DomainColumn object

    Returns:
        pandas.Dataframe object with all data summarized according to specs
    """
    # Validation (listify inputs, validate values)
    # - Group fields (domain possible)
    group_fields = _validateAggSpecs(group_fields, Column)
    gb_fields = [gf.name for gf in group_fields]
    dump_fields = [gf.name for gf in group_fields]
    keep_cols = []
    null_dict = dict([(gf.name, gf.default) for gf in group_fields])
    renames = [(gf.name, gf.rename) for gf in group_fields if gf.rename is not None]

    # - Agg columns (no domain expected)
    agg_cols = _validateAggSpecs(agg_cols, AggColumn)
    agg_methods = {}
    for ac in agg_cols:
        dump_fields.append(ac.name)
        keep_cols.append(ac.name)
        null_dict[ac.name] = ac.default
        agg_methods[ac.name] = ac.agg_method
        if ac.rename is not None:
            renames.append((ac.name, ac.rename))

    # - Consolidations (no domain expected)
    if consolidations:
        consolidations = _validateAggSpecs(consolidations, Consolidation)
        for c in consolidations:
            if hasattr(c, "input_cols"):
                dump_fields += [ic for ic in c.input_cols]
                keep_cols.append(c.name)
                null_dict.update(c.defaultsDict())
                agg_methods[c.name] = c.agg_method
    else:
        consolidations = []

    # - Melt columns (domain possible)
    if melt_col:
        melt_col = _validateAggSpecs(melt_col, MeltColumn)[0]
        dump_fields += [ic for ic in melt_col.input_cols]
        gb_fields.append(melt_col.label_col)
        keep_cols.append(melt_col.val_col)
        null_dict.update(melt_col.defaultsDict())
        agg_methods[melt_col.val_col] = melt_col.agg_method

    # Dump the intersect table to df
    dump_fields = list(
        set(dump_fields)
    )  # remove duplicated fields used in multiple consolidations/melts
    missing = PMT.which_missing(table=in_fc, field_list=dump_fields)
    if not missing:
        int_df = PMT.table_to_df(
            in_tbl=in_fc, keep_fields=dump_fields, null_val=null_dict
        )
    else:
        raise Exception(
            f"\t\tthese cols were missing from the intersected FC: {missing}"
        )
    # Consolidate columns
    for c in consolidations:
        if hasattr(c, "input_cols"):
            int_df[c.name] = int_df[c.input_cols].agg(c.cons_method, axis=1)

    # Melt columns
    if melt_col:
        id_fields = [f for f in gb_fields if f != melt_col.label_col]
        id_fields += [f for f in keep_cols if f != melt_col.val_col]
        int_df = int_df.melt(
            id_vars=id_fields,
            value_vars=melt_col.input_cols,
            var_name=melt_col.label_col,
            value_name=melt_col.val_col,
        ).reset_index()
    # Domains
    for group_field in group_fields:
        if group_field.domain is not None:
            group_field.apply_domain(int_df)
            gb_fields.append(group_field.domain.name)
    if melt_col:
        if melt_col.domain is not None:
            melt_col.apply_domain(int_df)
            gb_fields.append(melt_col.domain.name)

    # Group by - summarize
    all_fields = gb_fields + keep_cols
    sum_df = int_df[all_fields].groupby(gb_fields).agg(agg_methods).reset_index()

    # Apply renames
    if renames:
        sum_df.rename(columns=dict(renames), inplace=True)

    return sum_df
Ejemplo n.º 2
0
def join_attributes(
    to_table,
    to_id_field,
    from_table,
    from_id_field,
    join_fields="*",
    null_value=0.0,
    renames=None,
    drop_dup_cols=False,
):
    """
    Helper function to join attributes of one table to another

    Args:
        to_table (str): path to table being extended
        to_id_field (str): primary key
        from_table (str): path to table being joined
        from_id_field (str): foreign key
        join_fields (list/str): list of fields to be added to to_table;
            Default: "*", indicates all fields are to be joined
        null_value (int/str): value to insert for nulls
        renames (dict): key/value pairs of existing field names/ new field names
        drop_dup_cols (bool): flag to eliminate duplicated fields

    Returns:
        None
    """
    # If all columns, get their names
    if renames is None:
        renames = {}
    if join_fields == "*":
        join_fields = [
            f.name
            for f in arcpy.ListFields(from_table)
            if not f.required and f.name != from_id_field
        ]
    # List expected columns based on renames dict
    expected_fields = [renames.get(jf, jf) for jf in join_fields]
    # Check if expected outcomes will collide with fields in the table
    if drop_dup_cols:
        # All relevant fields in table (excluding the field to join by)
        tbl_fields = [
            f.name for f in arcpy.ListFields(to_table) if f.name != to_id_field
        ]
        # List of which fields to drop
        drop_fields = [d for d in expected_fields if d in tbl_fields]  # join_fields
        # If all the fields to join will be dropped, exit
        if len(join_fields) == len(drop_fields):
            print("--- --- no new fields")
            return  # TODO: what if we want to update these fields?
    else:
        drop_fields = []

    # Dump from_table to df
    dump_fields = [from_id_field] + join_fields
    df = PMT.table_to_df(
        in_tbl=from_table, keep_fields=dump_fields, null_val=null_value
    )

    # Rename columns and drop columns as needed
    if renames:
        df.rename(columns=renames, inplace=True)
    if drop_fields:
        df.drop(columns=drop_fields, inplace=True)

    # Join cols from df to to_table
    print(f"--- --- {list(df.columns)} to {to_table}")
    PMT.extend_table_df(
        in_table=to_table,
        table_match_field=to_id_field,
        df=df,
        df_match_field=from_id_field,
    )