Exemple #1
0
def finalize_output(intermediate_gdb, final_gdb):
    """
    Takes an intermediate GDB path and the final GDB path for that data and
    replaces the existing GDB if it exists, otherwise it makes a copy
    of the intermediate GDB and deletes the original
    
    Args:
        intermediate_gdb (str): path to file geodatabase
        final_gdb (str): path to file geodatabase, cannot be the same as intermediate
    
    Returns:
        None
    """
    output_folder, _ = os.path.split(intermediate_gdb)
    temp_folder = PMT.validate_directory(PMT.make_path(output_folder, "TEMP"))
    _, copy_old_gdb = os.path.split(final_gdb)
    temp_gdb = PMT.make_path(temp_folder, copy_old_gdb)
    try:
        # make copy of existing data if it exists
        if arcpy.Exists(final_gdb):
            arcpy.Copy_management(in_data=final_gdb, out_data=temp_gdb)
            arcpy.Delete_management(in_data=final_gdb)
        arcpy.Copy_management(in_data=intermediate_gdb, out_data=final_gdb)
    except:
        # replace old data with copy made in previous step
        print("An error occured, rolling back changes")
        arcpy.Copy_management(in_data=temp_gdb, out_data=final_gdb)
    finally:
        arcpy.Delete_management(intermediate_gdb)
        print("")
Exemple #2
0
def build_access_by_mode(sum_area_fc, modes, id_field, out_gdb, year_val):
    """
    Helper function to generate access tables by mode.
    
    Args:
        sum_area_fc (str): path to summary area feature class
        modes (list): modes of travel
        id_fields (list): fields to be used as index
        out_gdb (str): path to output geodatabase
        year_val (int): value to insert for year

    Returns:
        None
    """
    for mode in modes:
        print(f"--- --- {mode}")
        df = _createLongAccess(
            int_fc=sum_area_fc,
            id_field=id_field,
            activities=b_conf.ACTIVITIES,
            time_breaks=b_conf.TIME_BREAKS,
            mode=mode,
        )
        df["Year"] = year_val
        out_table = PMT.make_path(out_gdb, f"ActivityByTime_{mode}")
        PMT.df_to_table(df, out_table)
Exemple #3
0
def process_joins(in_gdb, out_gdb, fc_specs, table_specs):
    """
    Joins feature classes to associated tabular data from year set and appends to FC in output
    gdb, making single feature classes with wide attribute tables for each geographic unit of
    analysis.

    Args:
        in_gdb (str): path to geodatabase containing input feature classes and tables
        out_gdb (str): path to geodatabase that will hold output feature classes
        fc_specs (list): list of tuples defining the feature classes from `in_gdb` to retain
            in `out_gdb`. Each tuple consists of the feature class name, unique ID attribute,
            and the feature dataset (within `in_gdb`) where it is located.
        table_specs (list): list of tuples defining the tables from `in_gdb` to join to
            feature classes in `out_gdb`. Each tuple consists of table name, unique ID attribute,
            fields to retain ("*" = all fields), and a dictionary of field renaming specs ({}
            must be given if no renaming is desired). Columns from each table are joined to
            the FC associated with the table name (based on the presence of the FC name in the
            table name)
    
    Returns:
        list: List of paths to joined feature classes ordered as Blocks, Parcels, MAZ, TAZ, SummaryAreas, NetworkNodes
    """

    #   tables need to be ordered the same as FCs
    _table_specs_ = []
    for fc in fc_specs:
        t_specs = [spec for spec in table_specs if fc[0].lower() in spec[0].lower()]
        _table_specs_.append(t_specs)
    table_specs = _table_specs_

    # join tables to feature classes, making them WIDE
    joined_fcs = []  # --> blocks, parcels, maz, taz, sa, net_nodes
    for fc_spec, table_spec in zip(fc_specs, table_specs):
        fc_name, fc_id, fds = fc_spec
        fc = PMT.make_path(out_gdb, fds, fc_name)

        for spec in table_spec:
            tbl_name, tbl_id, tbl_fields, tbl_renames = spec
            tbl = PMT.make_path(in_gdb, tbl_name)
            print(f"--- Joining fields from {tbl_name} to {fc_name}")
            join_attributes(
                to_table=fc,
                to_id_field=fc_id,
                from_table=tbl,
                from_id_field=tbl_id,
                join_fields=tbl_fields,
                renames=tbl_renames,
                drop_dup_cols=True,
            )
            joined_fcs.append(fc)
    return joined_fcs
Exemple #4
0
def post_process_databases(basic_features_gdb, build_dir):
    """
    Copies in basic features gdb to build dir and cleans up FCs and Tables
    with SummID to RowID. Finally deletes the TEMP folder generated in the
    build process

    Args:
        basic_features_gdb (str): path to the basic features geodatabase
        build_dir (str): path to the build directory
    
    Returns:
        None
    """
    print("Postprocessing build directory...")

    # copy BasicFeatures into Build
    print("--- Overwriting basic features in BUILD dir with current version")
    path, basename = os.path.split(basic_features_gdb)
    out_basic_features = PMT.make_path(build_dir, basename)
    PMT.check_overwrite_output(output=out_basic_features, overwrite=True)
    arcpy.Copy_management(in_data=basic_features_gdb, out_data=out_basic_features)

    # reset SummID to RowID
    print("--- updating SummID to RowID project wide...")
    arcpy.env.workspace = build_dir
    for gdb in arcpy.ListWorkspaces(workspace_type="FileGDB"):
        print(f"--- Cleaning up {gdb}")
        arcpy.env.workspace = gdb
        # update feature classes
        fcs = [fc for fc in list_fcs_in_gdb()]
        tbls = arcpy.ListTables()
        all_tbls = fcs + tbls
        alter_fields(
            table_list=all_tbls,
            field=p_conf.SUMMARY_AREAS_COMMON_KEY,
            new_field_name=b_conf.SUMMARY_AREAS_FINAL_KEY)
    # TODO: incorporate a more broad AlterField protocol for Popup configuration

    # delete TEMP folder
    temp = PMT.make_path(build_dir, "TEMP")
    if arcpy.Exists(temp):
        print("--- deleting TEMP folder from previous build steps")
        arcpy.Delete_management(temp)
Exemple #5
0
def sum_parcel_cols(gdb, par_spec, columns):
    """
    Helper function to summarize a provided list of columns for the parcel layer, creating
    region wide statistics

    Args:
        gdb (str): path to geodatabase that parcel layer exists in
        par_spec (tuple): tuple of format (fc name, unique id column, feature dataset location)
        columns (list): string list of fields/columns needing summarization

    Returns:
        pandas.Dataframe
    """
    par_name, par_id, par_fds = par_spec
    par_fc = PMT.make_path(gdb, par_fds, par_name)
    df = PMT.featureclass_to_df(
        in_fc=par_fc, keep_fields=columns, skip_nulls=False, null_val=0
    )
    return df.sum()
Exemple #6
0
def make_snapshot_template(in_gdb, out_path, out_gdb_name=None, overwrite=False):
    """
    Helper function to copy yearly feature classes into a reporting geodatabase;
    copies all feature datasets from corresponding clean data workspace
    
    Args:
        in_gdb (str): path to clean data workspace
        out_path (str): path where snapshot template gdb is written
        out_gdb_name (str): optional name of output gdb
        overwrite (bool): boolean flag to overwrite an existing copy of the out_gdb_name

    Returns (str):
        path to the newly created reporting geodatabase
    """
    out_gdb = make_reporting_gdb(out_path, out_gdb_name, overwrite)
    # copy in the geometry data containing minimal tabular data
    for fds in ["Networks", "Points", "Polygons"]:
        print(f"--- copying FDS {fds}")
        source_fd = PMT.make_path(in_gdb, fds)
        out_fd = PMT.make_path(out_gdb, fds)
        arcpy.Copy_management(source_fd, out_fd)
    return out_gdb
Exemple #7
0
def make_reporting_gdb(out_path, out_gdb_name=None, overwrite=False):
    """
    Helper function to create a temporary geodatabase to hold data as its procesed
    
    Args:
        out_path (str): path to folder
        out_gdb_name (str): name of geodatabase, Default is None, resulting in a unique name
        overwrite (bool): flag to delete an existing geodatabase

    Returns (str):
        path to output geodatabase
    """
    if not out_gdb_name:
        out_gdb_name = f"_{uuid.uuid4().hex}.gdb"
        out_gdb = PMT.make_path(out_path, out_gdb_name)
    elif out_gdb_name and overwrite:
        out_gdb = PMT.make_path(out_path, out_gdb_name)
        PMT.check_overwrite_output(output=out_gdb, overwrite=overwrite)
    else:
        out_gdb = PMT.make_path(out_path, out_gdb_name)
    arcpy.CreateFileGDB_management(out_path, out_gdb_name)
    return out_gdb
Exemple #8
0
def build_intersections(gdb, enrich_specs):
    """
    Helper function that performs a batch intersection of polygon feature classes

    Args:
        gdb (str): path to geodatabase that contains the source data
        enrich_specs (list): list of dictionaries specifying source data, groupings, aggregations,
            consolidations, melt/elongation, and boolean for full geometry or centroid use in intersection
    
    Returns:
        dict: Dictionary of the format {summ fc: {disag_fc: path/to/intersection}}
        will return multiple results for each summ_fc if more than one intersection is made against it.
    """
    # Intersect features for long tables
    int_out = {}
    for intersect in enrich_specs:
        # Parse specs
        summ, disag = intersect["sources"]
        summ_name, summ_id, summ_fds = summ
        disag_name, disag_id, disag_fds = disag
        summ_in = PMT.make_path(gdb, summ_fds, summ_name)
        disag_in = PMT.make_path(gdb, disag_fds, disag_name)
        full_geometries = intersect["disag_full_geometries"]
        # Run intersect
        print(f"--- Intersecting {summ_name} with {disag_name}")
        int_fc = PMT.intersect_features(
            summary_fc=summ_in,
            disag_fc=disag_in,
            in_temp_dir=True,
            full_geometries=full_geometries,
        )
        # Record with specs
        sum_dict = int_out.get(summ, {})
        sum_dict[disag] = int_fc
        int_out[summ] = sum_dict

    return int_out
Exemple #9
0
def apply_field_calcs(gdb, new_field_specs, recalculate=False):
    """
    Helper function that applies field calculations, adding a new field to a table

    Args:
        gdb (str): path to geodatabase containing table to have new calc added
        new_field_specs (list): list of dictionaries specifying table(s), new_field, field_type,
            expr, code_block
        recalculate (bool): flag to rerun a calculation if the field already exsits in the table;
            currently unused

    Returns:
        None
    """
    # Iterate over new fields
    for nf_spec in new_field_specs:
        # Get params
        tables = nf_spec["tables"]
        new_field = nf_spec["new_field"]
        field_type = nf_spec["field_type"]
        expr = nf_spec["expr"]
        code_block = nf_spec["code_block"]
        try:
            # Get params
            if isinstance(nf_spec["params"], Iterable):
                params = nf_spec["params"]
                all_combos = list(itertools.product(*params))
                for combo in all_combos:
                    combo_spec = nf_spec.copy()
                    del combo_spec["params"]
                    combo_spec["new_field"] = combo_spec["new_field"].format(*combo)
                    combo_spec["expr"] = combo_spec["expr"].format(*combo)
                    combo_spec["code_block"] = combo_spec["code_block"].format(*combo)
                    apply_field_calcs(gdb, [combo_spec])
            else:
                raise Exception("Spec Params must be an iterable if provided")
        except KeyError:
            add_args = {"field_name": new_field, "field_type": field_type}
            calc_args = {
                "field": new_field,
                "expression": expr,
                "expression_type": "PYTHON3",
                "code_block": code_block,
            }
            # iterate over tables
            if isinstance(tables, string_types):
                tables = [tables]
            print(f"--- Adding field {new_field} to {len(tables)} tables")
            for table in tables:
                t_name, t_id, t_fds = table
                in_table = PMT.make_path(gdb, t_fds, t_name)
                # update params
                add_args["in_table"] = in_table
                calc_args["in_table"] = in_table
                if field_type == "TEXT":
                    length = nf_spec["length"]
                    add_args["field_length"] = length

                # TODO: fix the below to work, was failing previously
                # # check if new field already in dataset, if recalc True delete and recalculate
                # if PMT.which_missing(table=in_table, field_list=[new_field]):
                #     if recalculate:
                #         print(f"--- --- recalculating {new_field}")
                #         arcpy.DeleteField_management(in_table=in_table, drop_field=new_field)
                #     else:
                #         print(f"--- --- {new_field} already exists, skipping...")
                #         continue
                # add and calc field
                arcpy.AddField_management(**add_args)
                arcpy.CalculateField_management(**calc_args)
Exemple #10
0
def summarize_attributes(
    in_fc, group_fields, agg_cols, consolidations=None, melt_col=None
):
    """
    Helper function to perform summarizations of input feature class defined by the
    group, agg, consolidate, and melt columns/objects provided

    Args:
        in_fc (str): path to feature class, typically this will be the result of an
            intersection of a summary fc and disaggregated fc
        group_fields (list): list of Column objects with optional rename attribute
        agg_cols (list): list of AggColumn objects with optional agg_method and rename attributes
        consolidations (list): list of Consolidation objects with optional consolidation method attribute
        melt_col (list): list of MeltColumn objects with optional agg_method, default value, and
            DomainColumn object

    Returns:
        pandas.Dataframe object with all data summarized according to specs
    """
    # Validation (listify inputs, validate values)
    # - Group fields (domain possible)
    group_fields = _validateAggSpecs(group_fields, Column)
    gb_fields = [gf.name for gf in group_fields]
    dump_fields = [gf.name for gf in group_fields]
    keep_cols = []
    null_dict = dict([(gf.name, gf.default) for gf in group_fields])
    renames = [(gf.name, gf.rename) for gf in group_fields if gf.rename is not None]

    # - Agg columns (no domain expected)
    agg_cols = _validateAggSpecs(agg_cols, AggColumn)
    agg_methods = {}
    for ac in agg_cols:
        dump_fields.append(ac.name)
        keep_cols.append(ac.name)
        null_dict[ac.name] = ac.default
        agg_methods[ac.name] = ac.agg_method
        if ac.rename is not None:
            renames.append((ac.name, ac.rename))

    # - Consolidations (no domain expected)
    if consolidations:
        consolidations = _validateAggSpecs(consolidations, Consolidation)
        for c in consolidations:
            if hasattr(c, "input_cols"):
                dump_fields += [ic for ic in c.input_cols]
                keep_cols.append(c.name)
                null_dict.update(c.defaultsDict())
                agg_methods[c.name] = c.agg_method
    else:
        consolidations = []

    # - Melt columns (domain possible)
    if melt_col:
        melt_col = _validateAggSpecs(melt_col, MeltColumn)[0]
        dump_fields += [ic for ic in melt_col.input_cols]
        gb_fields.append(melt_col.label_col)
        keep_cols.append(melt_col.val_col)
        null_dict.update(melt_col.defaultsDict())
        agg_methods[melt_col.val_col] = melt_col.agg_method

    # Dump the intersect table to df
    dump_fields = list(
        set(dump_fields)
    )  # remove duplicated fields used in multiple consolidations/melts
    missing = PMT.which_missing(table=in_fc, field_list=dump_fields)
    if not missing:
        int_df = PMT.table_to_df(
            in_tbl=in_fc, keep_fields=dump_fields, null_val=null_dict
        )
    else:
        raise Exception(
            f"\t\tthese cols were missing from the intersected FC: {missing}"
        )
    # Consolidate columns
    for c in consolidations:
        if hasattr(c, "input_cols"):
            int_df[c.name] = int_df[c.input_cols].agg(c.cons_method, axis=1)

    # Melt columns
    if melt_col:
        id_fields = [f for f in gb_fields if f != melt_col.label_col]
        id_fields += [f for f in keep_cols if f != melt_col.val_col]
        int_df = int_df.melt(
            id_vars=id_fields,
            value_vars=melt_col.input_cols,
            var_name=melt_col.label_col,
            value_name=melt_col.val_col,
        ).reset_index()
    # Domains
    for group_field in group_fields:
        if group_field.domain is not None:
            group_field.apply_domain(int_df)
            gb_fields.append(group_field.domain.name)
    if melt_col:
        if melt_col.domain is not None:
            melt_col.apply_domain(int_df)
            gb_fields.append(melt_col.domain.name)

    # Group by - summarize
    all_fields = gb_fields + keep_cols
    sum_df = int_df[all_fields].groupby(gb_fields).agg(agg_methods).reset_index()

    # Apply renames
    if renames:
        sum_df.rename(columns=dict(renames), inplace=True)

    return sum_df
Exemple #11
0
def join_attributes(
    to_table,
    to_id_field,
    from_table,
    from_id_field,
    join_fields="*",
    null_value=0.0,
    renames=None,
    drop_dup_cols=False,
):
    """
    Helper function to join attributes of one table to another

    Args:
        to_table (str): path to table being extended
        to_id_field (str): primary key
        from_table (str): path to table being joined
        from_id_field (str): foreign key
        join_fields (list/str): list of fields to be added to to_table;
            Default: "*", indicates all fields are to be joined
        null_value (int/str): value to insert for nulls
        renames (dict): key/value pairs of existing field names/ new field names
        drop_dup_cols (bool): flag to eliminate duplicated fields

    Returns:
        None
    """
    # If all columns, get their names
    if renames is None:
        renames = {}
    if join_fields == "*":
        join_fields = [
            f.name
            for f in arcpy.ListFields(from_table)
            if not f.required and f.name != from_id_field
        ]
    # List expected columns based on renames dict
    expected_fields = [renames.get(jf, jf) for jf in join_fields]
    # Check if expected outcomes will collide with fields in the table
    if drop_dup_cols:
        # All relevant fields in table (excluding the field to join by)
        tbl_fields = [
            f.name for f in arcpy.ListFields(to_table) if f.name != to_id_field
        ]
        # List of which fields to drop
        drop_fields = [d for d in expected_fields if d in tbl_fields]  # join_fields
        # If all the fields to join will be dropped, exit
        if len(join_fields) == len(drop_fields):
            print("--- --- no new fields")
            return  # TODO: what if we want to update these fields?
    else:
        drop_fields = []

    # Dump from_table to df
    dump_fields = [from_id_field] + join_fields
    df = PMT.table_to_df(
        in_tbl=from_table, keep_fields=dump_fields, null_val=null_value
    )

    # Rename columns and drop columns as needed
    if renames:
        df.rename(columns=renames, inplace=True)
    if drop_fields:
        df.drop(columns=drop_fields, inplace=True)

    # Join cols from df to to_table
    print(f"--- --- {list(df.columns)} to {to_table}")
    PMT.extend_table_df(
        in_table=to_table,
        table_match_field=to_id_field,
        df=df,
        df_match_field=from_id_field,
    )
Exemple #12
0
def build_enriched_tables(gdb, fc_dict, specs):
    """
    Helper function used to enrich and/or elongate data for a summarization area. Enrichment
    is based on intersection of disaggregate features with summarization area features. Elongation
    melts tables for serial reporting (square footage by land use per summarization area, e.g.)
    
    Args:
        gdb (str): path to geodatabase where outputs are written
        fc_dict (dict): dictionary returned from build_intersections
        specs (list of dicts): list of dictionaries specifying sources, grouping, aggregations,
            consolidations, melts/elongations, and an output table (this is used by the try/except
            clause to make a new table (elongation) or append to an existing feature class (widening)

    Returns:
        None
    """
    # Enrich features through summarization
    for spec in specs:
        summ, disag = spec["sources"]
        fc_name, fc_id, fc_fds = summ
        d_name, d_id, d_fds = disag
        if summ == disag:
            # Simple pivot wide to long
            fc = PMT.make_path(gdb, fc_fds, fc_name)
        else:
            # Pivot from intersection
            fc = fc_dict[summ][disag]

        print(f"--- Summarizing data from {d_name} to {fc_name}")
        # summary vars
        group = spec["grouping"]
        agg = spec["agg_cols"]
        consolidate = spec["consolidate"]
        melts = spec["melt_cols"]
        summary_df = summarize_attributes(
            in_fc=fc,
            group_fields=group,
            agg_cols=agg,
            consolidations=consolidate,
            melt_col=melts,
        )
        try:
            out_name = spec["out_table"]
            print(f"--- --- to long table {out_name}")
            out_table = PMT.make_path(gdb, out_name)
            PMT.df_to_table(df=summary_df, out_table=out_table, overwrite=True)
        except KeyError:
            # extend input table
            feature_class = PMT.make_path(gdb, fc_fds, fc_name)
            # if being run again, delete any previous data as da.ExtendTable will fail if a field exists
            summ_cols = [col for col in summary_df.columns.to_list() if col != fc_id]
            drop_fields = [
                f.name for f in arcpy.ListFields(feature_class) if f.name in summ_cols
            ]
            if drop_fields:
                print(
                    f"--- --- deleting previously generated data and replacing with current summarizations"
                )
                arcpy.DeleteField_management(
                    in_table=feature_class, drop_field=drop_fields
                )
            PMT.extend_table_df(
                in_table=feature_class,
                table_match_field=fc_id,
                df=summary_df,
                df_match_field=fc_id,
                append_only=False,
            )  # TODO: handle append/overwrite more explicitly
def process_year_to_snapshot(year):
    """
    Process cleaned yearly data to a Snapshot database

    Procedure:
        1) copies feature datasets into a temporary geodatabase
        2) performs a series of permenant joins of tabular data onto feature classes making wide tables
        3) Calculates a series of new fields in the existing feature classes
        4) calculated a dataframe of region wide parcel level statistics
        5) Intersects a series of geometries together, allowing us to aggregate and summarize data from higher to lower
            spatial scales
        6) Enrichment of existing feature class tables with the information from higher spatial resolution, in effect
            widening the tables (ex: roll parcel level data up to blocks, or parcel level data up to Station Areas)
        7) Generate new tables that are long on categorical information derived from the intersections
            (ex: pivot TOT_LVG_AREA on Land Use, taking the sum of living area by land use)
        8) Create separate access by mode tables (bike, walk, transit, auto)
        9) Calculate new attributes based on region wide summaries
        10) Calculate additional attributes for dashboards that require all previous steps to be run
        11) If successful, replace existing copy of Snapshot with newly processed version.

    Returns:
        None
    """
    # define numeric for year in the case of NearTerm
    calc_year = year
    if year == "NearTerm":
        calc_year = 9998

    b_conf.YEAR_COL.default = year
    # Make output gdb and copy features
    print("Validating all data have a year attribute...")
    out_path = PMT.validate_directory(directory=BUILD)
    in_gdb = PMT.validate_geodatabase(gdb_path=PMT.make_path(
        CLEANED, f"PMT_{year}.gdb"),
                                      overwrite=False)
    b_help.add_year_columns(in_gdb=in_gdb, year=calc_year)
    print("Making Snapshot Template...")
    out_gdb = b_help.make_snapshot_template(in_gdb=in_gdb,
                                            out_path=out_path,
                                            out_gdb_name=None,
                                            overwrite=False)

    # Join tables to the features
    print("Joining tables to feature classes...")
    b_help.process_joins(
        in_gdb=in_gdb,
        out_gdb=out_gdb,
        fc_specs=b_conf.FC_SPECS,
        table_specs=b_conf.TABLE_SPECS,
    )

    # Calculate values as need prior to intersections   # TODO: make this smarter, skip if already performed
    print("Adding and calculating new fields for dashboards...")
    b_help.apply_field_calcs(gdb=out_gdb, new_field_specs=b_conf.PRECALCS)

    # Summarize reference values
    print("Calculating parcels sums to generate regional statistics...")
    par_sums = b_help.sum_parcel_cols(gdb=out_gdb,
                                      par_spec=b_conf.PAR_FC_SPECS,
                                      columns=b_conf.PAR_SUM_FIELDS)

    # Intersect tables for enrichment
    print("Intersecting feature classes to generate summaries...")
    int_fcs = b_help.build_intersections(gdb=out_gdb,
                                         enrich_specs=b_conf.ENRICH_INTS)

    # # Store / load intersection fcs (temp locations) in debugging mode
    # if DEBUG:
    #     with open(PMT.makePath(ROOT, "int_fcs.pkl"), "wb") as __f__:
    #         pickle.dump(int_fcs, __f__)
    #     # with open(PMT.makePath(ROOT, "PROCESSING_TEST", "int_fcs.pkl"), "rb") as __f__:
    #     #     int_fcs = pickle.load(__f__)
    # enrich tables
    print("Enriching feature classes with tabular data...")
    b_help.build_enriched_tables(gdb=out_gdb,
                                 fc_dict=int_fcs,
                                 specs=b_conf.ENRICH_INTS)

    # elongate tables
    print("Elongating tabular data...")
    b_help.build_enriched_tables(gdb=out_gdb,
                                 fc_dict=int_fcs,
                                 specs=b_conf.ELONGATE_SPECS)

    # build access by mode tables
    print("Access scores by activity and time bin")
    sa_fc, sa_id, sa_fds = b_conf.SUM_AREA_FC_SPECS
    sum_areas_fc = PMT.make_path(out_gdb, sa_fds, sa_fc)
    id_fields = [
        p_conf.SUMMARY_AREAS_COMMON_KEY,
        p_conf.STN_NAME_FIELD,
        p_conf.CORRIDOR_NAME_FIELD,
    ]
    b_help.build_access_by_mode(sum_area_fc=sum_areas_fc,
                                modes=b_conf.MODES,
                                id_field=id_fields,
                                out_gdb=out_gdb,
                                year_val=calc_year)

    # Prepare regional reference columns
    reg_ref_calcs = []
    for new_field in b_conf.REG_REF_CALCS:
        nf_spec, ref_field = new_field
        if isinstance(ref_field, string_types):
            ref_val = [[par_sums[ref_field]]]
        else:
            # assume iterable
            ref_val = [[] for _ in ref_field]
            for ref_i, rf in enumerate(ref_field):
                ref_val[ref_i].append(par_sums[rf])
        nf_spec["params"] = ref_val
        reg_ref_calcs.append(nf_spec)

    # Calculated values - simple
    print("Calculating remaining fields for dashboards...")
    b_help.apply_field_calcs(gdb=out_gdb,
                             new_field_specs=b_conf.CALCS + reg_ref_calcs)

    # Delete tempfiles
    print("--- --- Removing temp files")
    for summ_key, summ_val in int_fcs.items():
        for disag_key, disag_val in summ_val.items():
            arcpy.Delete_management(in_data=disag_val)

    # Rename this output
    print("--- --- Finalizing the snapshot")
    if year == PMT.SNAPSHOT_YEAR:
        year = "Current"
    year_out_gdb = PMT.make_path(BUILD, f"Snapshot_{year}.gdb")
    b_help.finalize_output(intermediate_gdb=out_gdb, final_gdb=year_out_gdb)
def process_years_to_trend(
    years,
    tables,
    long_features,
    diff_features,
    base_year=None,
    snapshot_year=None,
    out_gdb_name=None,
):
    """
    Utilizing a base and snapshot year, trend data are generated for the associated time period.

    Procedure:
        1) creates a a blank output workspace with necessary feature dataset categories uniquely named
        2) generates tables long on year for all tabular data and summary areas
        3) generated difference tables for all tabular data summary features
            (Summary Areas, Census Blocks, MAZ, and TAZ)
        4) upon completion, replace existing copy of Trend/NearTerm gdb with newly processed version.
    """
    # TODO: add a try/except to delete any intermediate data created
    # Validation
    if base_year is None:
        base_year = years[0]
    if snapshot_year is None:
        snapshot_year = years[-1]
    if base_year not in years or snapshot_year not in years:
        raise ValueError("Base year and snapshot year must be in years list")
    if out_gdb_name is None:
        out_gdb_name = "Trend"

    # Set criteria
    table_criteria = [spec["table"] for spec in tables]
    diff_criteria = [spec["table"][0] for spec in diff_features]
    long_criteria = [spec["table"][0] for spec in long_features]

    # make a blank geodatabase
    out_path = PMT.validate_directory(BUILD)
    out_gdb = b_help.make_trend_template(out_path)

    # Get snapshot data
    for yi, year in enumerate(years):
        process_year = year
        if year == snapshot_year:
            if year == "NearTerm":
                process_year = snapshot_year = "NearTerm"
            else:
                process_year = snapshot_year = "Current"
        in_gdb = PMT.validate_geodatabase(
            gdb_path=PMT.make_path(BUILD, f"Snapshot_{process_year}.gdb"),
            overwrite=False,
        )
        # Make every table extra long on year
        year_tables = PMT._list_table_paths(gdb=in_gdb,
                                            criteria=table_criteria)
        year_fcs = PMT._list_fc_paths(gdb=in_gdb,
                                      fds_criteria="*",
                                      fc_criteria=long_criteria)
        elongate = year_tables + year_fcs
        for elong_table in elongate:
            elong_out_name = os.path.split(elong_table)[1] + "_byYear"
            if yi == 0:
                # Initialize the output table
                print(f"Creating long table {elong_out_name}")
                arcpy.TableToTable_conversion(in_rows=elong_table,
                                              out_path=out_gdb,
                                              out_name=elong_out_name)
            else:
                # Append to the output table
                print(
                    f"Appending to long table {elong_out_name} ({process_year})"
                )
                out_table = PMT.make_path(out_gdb, elong_out_name)
                arcpy.Append_management(inputs=elong_table,
                                        target=out_table,
                                        schema_type="NO_TEST")
        # Get snapshot and base year params
        if process_year == base_year:
            base_tables = year_tables[:]
            base_fcs = PMT._list_fc_paths(gdb=in_gdb,
                                          fds_criteria="*",
                                          fc_criteria=diff_criteria)
        elif process_year == snapshot_year:
            snap_tables = year_tables[:]
            snap_fcs = PMT._list_fc_paths(gdb=in_gdb,
                                          fds_criteria="*",
                                          fc_criteria=diff_criteria)

    # Make difference tables (snapshot - base)
    for base_table, snap_table, specs in zip(base_tables, snap_tables, tables):
        out_name = os.path.split(base_table)[1] + "_diff"
        out_table = PMT.make_path(out_gdb, out_name)
        idx_cols = specs["index_cols"]
        diff_df = PMT.table_difference(this_table=snap_table,
                                       base_table=base_table,
                                       idx_cols=idx_cols)
        print(f"Creating table {out_name}")
        PMT.df_to_table(df=diff_df, out_table=out_table, overwrite=True)

    # Make difference fcs (snapshot - base)
    for base_fc, snap_fc, spec in zip(base_fcs, snap_fcs, diff_features):
        # TODO: will raise if not all diff features are found, but maybe that's good?
        # Get specs
        fc_name, fc_id, fc_fds = spec["table"]
        idx_cols = spec["index_cols"]
        if isinstance(idx_cols, string_types):
            idx_cols = [idx_cols]
        if fc_id not in idx_cols:
            idx_cols.append(fc_id)
        out_fds = PMT.make_path(out_gdb, fc_fds)
        out_name = fc_name + "_diff"
        out_table = PMT.make_path(out_fds, out_name)
        # Field mappings
        field_mappings = arcpy.FieldMappings()
        for idx_col in idx_cols:
            fm = arcpy.FieldMap()
            fm.addInputField(base_fc, idx_col)
            field_mappings.addFieldMap(fm)
        # Copy geoms
        print(f"Creating feature class {out_name}")
        arcpy.FeatureClassToFeatureClass_conversion(
            in_features=base_fc,
            out_path=out_fds,
            out_name=out_name,
            field_mapping=field_mappings,
        )
        # Get table difference
        diff_df = PMT.table_difference(this_table=snap_fc,
                                       base_table=base_fc,
                                       idx_cols=idx_cols)
        # Extend attribute table
        drop_cols = [
            c for c in diff_df.columns if c in idx_cols and c != fc_id
        ]
        diff_df.drop(columns=drop_cols, inplace=True)
        print("... adding difference columns")
        PMT.extend_table_df(
            in_table=out_table,
            table_match_field=fc_id,
            df=diff_df,
            df_match_field=fc_id,
        )

    # TODO: calculate percent change in value over base for summary areas

    print("Finalizing the trend")
    final_gdb = PMT.make_path(BUILD, f"{out_gdb_name}.gdb")
    b_help.finalize_output(intermediate_gdb=out_gdb, final_gdb=final_gdb)
        print(f"- Snapshot for {year}")
        process_year_to_snapshot(year)


# MAIN
if __name__ == "__main__":
    DEBUG = False
    if DEBUG:
        """
        if DEBUG is True, you can change the path of the root directory and test any
        changes to the code you might need to handle without munging the existing data
        """
        ROOT = (
            r"C:\OneDrive_RP\OneDrive - Renaissance Planning Group\SHARE\PMT_link\Data"
        )
        CLEANED = PMT.validate_directory(
            directory=PMT.make_path(ROOT, "CLEANED"))
        # BUILD = PMT.validate_directory(directory=PMT.makePath(r"C:\PMT_TEST_FOLDER", "BUILD"))
        BUILD = PMT.validate_directory(directory=PMT.make_path(ROOT, "BUILD"))
        DATA = ROOT
        BASIC_FEATURES = PMT.make_path(CLEANED, "PMT_BasicFeatures.gdb")
        REF = PMT.make_path(ROOT, "Reference")
        RIF_CAT_CODE_TBL = PMT.make_path(REF, "road_impact_fee_cat_codes.csv")
        DOR_LU_CODE_TBL = PMT.make_path(REF, "Land_Use_Recode.csv")
        YEAR_GDB_FORMAT = PMT.YEAR_GDB_FORMAT
        YEARS = ["NearTerm"]

    # Snapshot data
    print("Building snapshot databases...")
    process_all_snapshots(years=YEARS)

    # Generate Trend Database