Beispiel #1
0
def build_enriched_tables(gdb, fc_dict, specs):
    """
    Helper function used to enrich and/or elongate data for a summarization area. Enrichment
    is based on intersection of disaggregate features with summarization area features. Elongation
    melts tables for serial reporting (square footage by land use per summarization area, e.g.)
    
    Args:
        gdb (str): path to geodatabase where outputs are written
        fc_dict (dict): dictionary returned from build_intersections
        specs (list of dicts): list of dictionaries specifying sources, grouping, aggregations,
            consolidations, melts/elongations, and an output table (this is used by the try/except
            clause to make a new table (elongation) or append to an existing feature class (widening)

    Returns:
        None
    """
    # Enrich features through summarization
    for spec in specs:
        summ, disag = spec["sources"]
        fc_name, fc_id, fc_fds = summ
        d_name, d_id, d_fds = disag
        if summ == disag:
            # Simple pivot wide to long
            fc = PMT.make_path(gdb, fc_fds, fc_name)
        else:
            # Pivot from intersection
            fc = fc_dict[summ][disag]

        print(f"--- Summarizing data from {d_name} to {fc_name}")
        # summary vars
        group = spec["grouping"]
        agg = spec["agg_cols"]
        consolidate = spec["consolidate"]
        melts = spec["melt_cols"]
        summary_df = summarize_attributes(
            in_fc=fc,
            group_fields=group,
            agg_cols=agg,
            consolidations=consolidate,
            melt_col=melts,
        )
        try:
            out_name = spec["out_table"]
            print(f"--- --- to long table {out_name}")
            out_table = PMT.make_path(gdb, out_name)
            PMT.df_to_table(df=summary_df, out_table=out_table, overwrite=True)
        except KeyError:
            # extend input table
            feature_class = PMT.make_path(gdb, fc_fds, fc_name)
            # if being run again, delete any previous data as da.ExtendTable will fail if a field exists
            summ_cols = [col for col in summary_df.columns.to_list() if col != fc_id]
            drop_fields = [
                f.name for f in arcpy.ListFields(feature_class) if f.name in summ_cols
            ]
            if drop_fields:
                print(
                    f"--- --- deleting previously generated data and replacing with current summarizations"
                )
                arcpy.DeleteField_management(
                    in_table=feature_class, drop_field=drop_fields
                )
            PMT.extend_table_df(
                in_table=feature_class,
                table_match_field=fc_id,
                df=summary_df,
                df_match_field=fc_id,
                append_only=False,
            )  # TODO: handle append/overwrite more explicitly
Beispiel #2
0
def join_attributes(
    to_table,
    to_id_field,
    from_table,
    from_id_field,
    join_fields="*",
    null_value=0.0,
    renames=None,
    drop_dup_cols=False,
):
    """
    Helper function to join attributes of one table to another

    Args:
        to_table (str): path to table being extended
        to_id_field (str): primary key
        from_table (str): path to table being joined
        from_id_field (str): foreign key
        join_fields (list/str): list of fields to be added to to_table;
            Default: "*", indicates all fields are to be joined
        null_value (int/str): value to insert for nulls
        renames (dict): key/value pairs of existing field names/ new field names
        drop_dup_cols (bool): flag to eliminate duplicated fields

    Returns:
        None
    """
    # If all columns, get their names
    if renames is None:
        renames = {}
    if join_fields == "*":
        join_fields = [
            f.name
            for f in arcpy.ListFields(from_table)
            if not f.required and f.name != from_id_field
        ]
    # List expected columns based on renames dict
    expected_fields = [renames.get(jf, jf) for jf in join_fields]
    # Check if expected outcomes will collide with fields in the table
    if drop_dup_cols:
        # All relevant fields in table (excluding the field to join by)
        tbl_fields = [
            f.name for f in arcpy.ListFields(to_table) if f.name != to_id_field
        ]
        # List of which fields to drop
        drop_fields = [d for d in expected_fields if d in tbl_fields]  # join_fields
        # If all the fields to join will be dropped, exit
        if len(join_fields) == len(drop_fields):
            print("--- --- no new fields")
            return  # TODO: what if we want to update these fields?
    else:
        drop_fields = []

    # Dump from_table to df
    dump_fields = [from_id_field] + join_fields
    df = PMT.table_to_df(
        in_tbl=from_table, keep_fields=dump_fields, null_val=null_value
    )

    # Rename columns and drop columns as needed
    if renames:
        df.rename(columns=renames, inplace=True)
    if drop_fields:
        df.drop(columns=drop_fields, inplace=True)

    # Join cols from df to to_table
    print(f"--- --- {list(df.columns)} to {to_table}")
    PMT.extend_table_df(
        in_table=to_table,
        table_match_field=to_id_field,
        df=df,
        df_match_field=from_id_field,
    )
def process_years_to_trend(
    years,
    tables,
    long_features,
    diff_features,
    base_year=None,
    snapshot_year=None,
    out_gdb_name=None,
):
    """
    Utilizing a base and snapshot year, trend data are generated for the associated time period.

    Procedure:
        1) creates a a blank output workspace with necessary feature dataset categories uniquely named
        2) generates tables long on year for all tabular data and summary areas
        3) generated difference tables for all tabular data summary features
            (Summary Areas, Census Blocks, MAZ, and TAZ)
        4) upon completion, replace existing copy of Trend/NearTerm gdb with newly processed version.
    """
    # TODO: add a try/except to delete any intermediate data created
    # Validation
    if base_year is None:
        base_year = years[0]
    if snapshot_year is None:
        snapshot_year = years[-1]
    if base_year not in years or snapshot_year not in years:
        raise ValueError("Base year and snapshot year must be in years list")
    if out_gdb_name is None:
        out_gdb_name = "Trend"

    # Set criteria
    table_criteria = [spec["table"] for spec in tables]
    diff_criteria = [spec["table"][0] for spec in diff_features]
    long_criteria = [spec["table"][0] for spec in long_features]

    # make a blank geodatabase
    out_path = PMT.validate_directory(BUILD)
    out_gdb = b_help.make_trend_template(out_path)

    # Get snapshot data
    for yi, year in enumerate(years):
        process_year = year
        if year == snapshot_year:
            if year == "NearTerm":
                process_year = snapshot_year = "NearTerm"
            else:
                process_year = snapshot_year = "Current"
        in_gdb = PMT.validate_geodatabase(
            gdb_path=PMT.make_path(BUILD, f"Snapshot_{process_year}.gdb"),
            overwrite=False,
        )
        # Make every table extra long on year
        year_tables = PMT._list_table_paths(gdb=in_gdb,
                                            criteria=table_criteria)
        year_fcs = PMT._list_fc_paths(gdb=in_gdb,
                                      fds_criteria="*",
                                      fc_criteria=long_criteria)
        elongate = year_tables + year_fcs
        for elong_table in elongate:
            elong_out_name = os.path.split(elong_table)[1] + "_byYear"
            if yi == 0:
                # Initialize the output table
                print(f"Creating long table {elong_out_name}")
                arcpy.TableToTable_conversion(in_rows=elong_table,
                                              out_path=out_gdb,
                                              out_name=elong_out_name)
            else:
                # Append to the output table
                print(
                    f"Appending to long table {elong_out_name} ({process_year})"
                )
                out_table = PMT.make_path(out_gdb, elong_out_name)
                arcpy.Append_management(inputs=elong_table,
                                        target=out_table,
                                        schema_type="NO_TEST")
        # Get snapshot and base year params
        if process_year == base_year:
            base_tables = year_tables[:]
            base_fcs = PMT._list_fc_paths(gdb=in_gdb,
                                          fds_criteria="*",
                                          fc_criteria=diff_criteria)
        elif process_year == snapshot_year:
            snap_tables = year_tables[:]
            snap_fcs = PMT._list_fc_paths(gdb=in_gdb,
                                          fds_criteria="*",
                                          fc_criteria=diff_criteria)

    # Make difference tables (snapshot - base)
    for base_table, snap_table, specs in zip(base_tables, snap_tables, tables):
        out_name = os.path.split(base_table)[1] + "_diff"
        out_table = PMT.make_path(out_gdb, out_name)
        idx_cols = specs["index_cols"]
        diff_df = PMT.table_difference(this_table=snap_table,
                                       base_table=base_table,
                                       idx_cols=idx_cols)
        print(f"Creating table {out_name}")
        PMT.df_to_table(df=diff_df, out_table=out_table, overwrite=True)

    # Make difference fcs (snapshot - base)
    for base_fc, snap_fc, spec in zip(base_fcs, snap_fcs, diff_features):
        # TODO: will raise if not all diff features are found, but maybe that's good?
        # Get specs
        fc_name, fc_id, fc_fds = spec["table"]
        idx_cols = spec["index_cols"]
        if isinstance(idx_cols, string_types):
            idx_cols = [idx_cols]
        if fc_id not in idx_cols:
            idx_cols.append(fc_id)
        out_fds = PMT.make_path(out_gdb, fc_fds)
        out_name = fc_name + "_diff"
        out_table = PMT.make_path(out_fds, out_name)
        # Field mappings
        field_mappings = arcpy.FieldMappings()
        for idx_col in idx_cols:
            fm = arcpy.FieldMap()
            fm.addInputField(base_fc, idx_col)
            field_mappings.addFieldMap(fm)
        # Copy geoms
        print(f"Creating feature class {out_name}")
        arcpy.FeatureClassToFeatureClass_conversion(
            in_features=base_fc,
            out_path=out_fds,
            out_name=out_name,
            field_mapping=field_mappings,
        )
        # Get table difference
        diff_df = PMT.table_difference(this_table=snap_fc,
                                       base_table=base_fc,
                                       idx_cols=idx_cols)
        # Extend attribute table
        drop_cols = [
            c for c in diff_df.columns if c in idx_cols and c != fc_id
        ]
        diff_df.drop(columns=drop_cols, inplace=True)
        print("... adding difference columns")
        PMT.extend_table_df(
            in_table=out_table,
            table_match_field=fc_id,
            df=diff_df,
            df_match_field=fc_id,
        )

    # TODO: calculate percent change in value over base for summary areas

    print("Finalizing the trend")
    final_gdb = PMT.make_path(BUILD, f"{out_gdb_name}.gdb")
    b_help.finalize_output(intermediate_gdb=out_gdb, final_gdb=final_gdb)