Example #1
0
def gdb(gdb_file, select_table='all', input_fields=None):
    """ Reads the contents of a gdb table into a Pandas dataframe"""
    import ogr

    # Initialize file
    driver = ogr.GetDriverByName("OpenFileGDB")
    gdb_obj = driver.Open(gdb_file)

    # parsing layers by index
    tables = {gdb_obj.GetLayerByIndex(i).GetName(): i for i in range(gdb_obj.GetLayerCount())}
    table_names = sorted(tables.keys()) if select_table == 'all' else [select_table]
    for table_name in table_names:
        table = gdb_obj.GetLayer(tables[table_name])
        table_def = table.GetLayerDefn()
        table_fields = [table_def.GetFieldDefn(i).GetName() for i in range(table_def.GetFieldCount())]
        if input_fields is None:
            input_fields = table_fields
        else:
            missing_fields = set(input_fields) - set(table_fields)
            if any(missing_fields):
                report("Fields {} not found in table {}".format(", ".join(missing_fields), table_name))
                input_fields = [field for field in input_fields if field not in missing_fields]
        data = np.array([[row.GetField(f) for f in input_fields] for row in table])
        df = pd.DataFrame(data=data, columns=input_fields)
        if select_table != 'all':
            return df
        else:
            yield table_name, df
Example #2
0
 def parse(self, field, data):
     try:
         return FieldParser.parse(self, field, data)
     except ValueError as e:
         report(e)
         # raise e
         return None
Example #3
0
def main():
    fields.expand('horizon', max_horizons)
    region_filter = None
    class_filter = [200, 210, 211]

    for i, region, class_num, class_name, scenarios in get_scenarios(
            region_filter, class_filter):
        report(f"Working on Region {region} {class_name}...")
        selection = report_region(scenarios, region, class_name, class_num)
        write.selected_scenarios(selection, i == 0)
Example #4
0
def main():
    years = range(2015, 2020)  # range(2010, 2016)
    overwrite_raster = True
    overwrite_combos = False
    regions = ['07'] + list(nhd_regions)
    for region in regions:
        nhd_raster = nhd_raster_path.format(vpus_nhd[region], region)
        arcpy.env.snapRaster = nhd_raster
        arcpy.env.mask = nhd_raster
        for year in years:
            print(region, year)
            cdl_raster = cdl_path.format(year)
            combined_raster = combined_raster_path.format(region, year) + "test"
            combinations_table = combo_path.format(region, year)
            if overwrite_raster or not os.path.exists(combined_raster):
                report("Performing raster overlay for Region {}, {}...".format(region, year))
                try:
                    overlay_rasters(combined_raster, cdl_raster, nhd_raster)
                    report(f"Combined raster saved to {combined_raster}")
                except Exception as e:
                    raise e
            if overwrite_combos or not os.path.exists(combinations_table):
                report("Building combinations table for Region {}, {}...".format(region, year))
                try:
                    combos = generate_combos(combined_raster, year)
                    combos.to_csv(combinations_table, index=None)
                except Exception as e:
                    raise e
Example #5
0
def pwc_outfile(class_num=None, class_name=None, path=None):
    """
    Read a PWC output file (BatchOutputVVWM.txt) into a dataframe
    :param class_num: Numerical class ID (str, int)
    :param class_name: Descriptive class name (str)
    :param path: Override the default input path (optional, str)
    :return: Dataframe of PWC output (df)
    """
    pwc_header = ['line_num', 'run_id'] + pwc_durations
    tables = []
    if path is None:
        path = pwc_outfile_path
    for i in range(10):
        for koc in kocs:
            p = os.path.join(path.format(class_num, class_name, i, koc),
                             'BatchOutputVVWM.txt')
            try:
                new_table = pd.read_csv(p, names=pwc_header, delimiter=r'\s+')
                tables.append(new_table)
                report(f"Read file {p}")
            except FileNotFoundError:
                break
    table = pd.concat(tables, axis=0)

    # Adjust line number so that header is not included
    table['line_num'] = table.line_num.astype(np.int32) - 1

    # Split the Batch Run ID field into constituent parts
    data = table.pop('run_id').str.split('_', expand=True)
    data.columns = ['bunk', 'koc', 'scenario_id', 'rep']
    data['koc'] = data.koc.str.slice(3).astype(np.int32)
    table = pd.concat([data, table], axis=1)
    table = table.melt(
        id_vars=[f for f in table.columns if f not in pwc_durations],
        value_vars=pwc_durations,
        var_name='duration',
        value_name='conc')

    return table
Example #6
0
def pwc_infile(class_num, class_name, path=None, fixed_base=None):
    """
    Read the tabular scenarios that were used to parameterize the PWC run for a given crop
    :param class_num: Numerical class ID (str, int)
    :param class_name: Descriptive class name (str)
    :param path: Override the default input path (optional, str)
    :param fixed_base: Override the default table name (optional, str)
    :return: Pandas dataframe of the PWC input scenarios
    """
    if path is None:
        path = pwc_outfile_path  # "{}_Corn_all_{}_koc{}"
    tables = []

    # Look for all chunked tables (so far the number of chunks has never exceeded 10)
    for i in range(10):
        try:
            # Read the input table from the koc10 folder. It should be identical in all folders
            p = path.format(class_num, class_name, i, 10)
            if fixed_base is None:
                base = os.path.basename(p).replace("_koc10", ".csv")
            else:
                base = fixed_base
            new_table = pd.read_csv(os.path.join(p, base),
                                    dtype={'area': np.int64})
            tables.append(new_table)
            report(f"Read file {p}")
        except FileNotFoundError as e:
            break
    if tables:
        # Join together all the chunks
        table = pd.concat(tables, axis=0)
        table['region'] = table.region.astype('str').str.zfill(2)
        return table
    else:
        print(f"No infiles found for {class_name}")
        return None
def chunk_combinations(combos):
    """
    Break the master combinations table into smaller chunks to avoid memory overflow.
    :param combos: Master scenarios table (df)
    """
    from parameters import chunk_size
    n_combinations = combos.shape[0]
    n_chunks = int(n_combinations / chunk_size) + 1
    if n_combinations > chunk_size:
        report(
            f"Breaking {n_combinations} combinations into {n_chunks} chunks",
            1)
        for i, start_row in enumerate(range(0, n_combinations, chunk_size)):
            end_row = min((start_row + chunk_size, n_combinations))
            report(f"Processing chunk {i + 1}...", 2)
            chunk = combos.iloc[start_row:end_row]
            yield i + 1, chunk
    else:
        report("Processing all combinations...", 2)
        yield 1, combos
def scenarios_and_recipes(regions, years, mode, class_filter=None):
    """
    Main program routine. Creates scenario and recipe (if applicable) files
    for specified NHD Plus Hydroregions and years. Years and regions provided
    must have corresponding input data. Specify paths to input data in paths.py
    Mode may be either 'sam' or 'pwc'. In 'sam' mode, recipes are created and
    aggregations are performed. In 'pwc' mode, different output files are created
    :param regions: NHD Plus Hydroregions to process (list of strings)
    :param years: Years to process (list of integers)
    :param mode: 'sam' or 'pwc'
    """
    report("Reading input files...")

    # Read and modify data indexed to weather grid
    report("Reading weather-indexed data...")
    met_params = read.met()
    met_params = modify.met(met_params)

    # Read crop related params. This has multiple functions since data are differently indexed
    report("Reading crop data...")
    crop_params = read.crop()
    crop_dates = read.crop_dates()
    irrigation = read.irrigation()

    # Read and modify data indexed to soil
    report("Reading soils data...")
    soil_params = read.soil()
    soil_params, aggregation_key = modify.soils(soil_params, mode)

    # Create a filter if only processing certain crops
    if class_filter is not None:
        class_filter = pd.DataFrame({pwc_selection_field: class_filter})

    # Soils, watersheds and combinations are broken up by NHD region
    for region in regions:
        report("Processing Region {}...".format(region))

        # Read curve numbers
        curve_numbers = read.curve_numbers(region)

        # Read and modify met/crop/land cover/soil/watershed combinations
        report("Reading combinations...")
        combinations = read.combinations(region, years)
        report("Processing combinations...")
        combinations = modify.combinations(combinations, crop_params, mode,
                                           aggregation_key)

        # Generate watershed 'recipes' for SAM and aggregate combinations after recipe fields removed
        if mode == 'sam':
            report(
                f"Creating watershed recipes and aggregating combinations...",
                1)
            watershed_params = pd.read_csv(
                condensed_nhd_path.format(region))[['gridcode', 'comid']]
            recipes, recipe_map, combinations = create_recipes(
                combinations, watershed_params)
            write.recipes(region, recipes, recipe_map)

        # Create and modify scenarios, and write to file
        report(f"Creating scenarios...", 1)
        if mode == 'sam':
            # Because SAM datasets do not exclude any scenarios, break into pieces to avoid memory overload
            for chunk_num, chunk in chunk_combinations(combinations):
                scenarios = create_scenarios(chunk, soil_params, met_params,
                                             crop_params, crop_dates,
                                             irrigation, curve_numbers)

                # Filter out only the desired crop, if a filter is specified
                if class_filter is not None:
                    scenarios = scenarios.merge(class_filter,
                                                on=pwc_selection_field,
                                                how='inner')
                    if scenarios.empty:
                        continue

                scenarios = modify.scenarios(scenarios,
                                             mode,
                                             region,
                                             write_qc=False)
                report("Writing to file...", 2)
                write.scenarios(scenarios, mode, region, name=chunk_num)
        elif mode == 'pwc':
            scenarios = create_scenarios(combinations, soil_params, met_params,
                                         crop_params, crop_dates, irrigation,
                                         curve_numbers)
            scenarios = modify.scenarios(scenarios, mode, region)

            # For PWC, apply sampling and write crop-specific tables
            for crop_num, crop_name, crop_scenarios in select_pwc_scenarios(
                    scenarios, crop_params):
                if crop_num in (200, 210, 211):
                    report(
                        "Writing table for Region {} {}...".format(
                            region, crop_name), 2)

                    write.scenarios(crop_scenarios,
                                    mode,
                                    region,
                                    name=crop_name,
                                    num=crop_num)
Example #9
0
def check_raster_RAT(raster):
    if not raster.hasRAT:
        report("Building RAT for {}".format(raster.catalogPath), 1)
        arcpy.BuildRasterAttributeTable_management(raster)
Example #10
0
def scenarios(in_scenarios, mode, region, write_qc=True):
    """
    Modify a table of field scenario parameters. This is primarly for computing parameters
    that are linked to multiple indices (e.g., land cover and soil). The major functions here include
    the assignment of runoff curve numbers, setting root and evaporation depth,
    and performing QAQC. QAQC parameters are specified in fields_and_qc.csv.
    :param in_scenarios: Input scenarios table (df)
    :param mode: 'sam' or 'pwc'
    :param region: NHD Plus region (str)
    :param write_qc: Write the results of the QAQC to file (bool)
    :return: Modified scenarios table (df)
    """
    from parameters import anetd

    # Assigns 'cover' and 'fallow' curve numbers for each scenario based on hydrologic soil group
    in_scenarios['cn_cov'] = in_scenarios['cn_fal'] = -1.

    # Do cultivated crops, then non-cultivated crops
    for cultivated, col in enumerate(('non-cultivated', 'cultivated')):
        # Convert from HSG number (hydro_group) to letter
        # For drained soils, fallow is set to D condition
        for hsg_num, hsg_letter in enumerate(hydro_soil_group[col]):
            sel = (in_scenarios.hydro_group
                   == hsg_num + 1) & (in_scenarios.cultivated == cultivated)
            in_scenarios.loc[sel, 'cn_cov'] = in_scenarios.loc[
                sel, f'cn_cov_{hsg_letter}']
            in_scenarios.loc[sel, 'cn_fal'] = in_scenarios.loc[
                sel, f'cn_fal_{hsg_letter}']

    # Calculate max irrigation rate by the USDA curve number method
    in_scenarios['max_irrigation'] = 0.2 * (
        ((2540. / in_scenarios.cn_cov) - 25.4))  # cm

    # Ensure that root and evaporation depths are 0.5 cm or more shallower than soil depth
    in_scenarios['root_depth'] = \
        np.minimum(in_scenarios.root_zone_max.values - 0.5, in_scenarios.max_root_depth)
    in_scenarios['evaporation_depth'] = \
        np.minimum(in_scenarios.root_zone_max.values - 0.5, anetd)

    # Choose output fields and perform data correction
    report("Performing data correction...", 3)
    fields.refresh()
    in_scenarios = in_scenarios.reset_index()

    if mode == 'pwc':
        qc_table = fields.perform_qc(
            in_scenarios[fields.fetch('pwc_qc')]).copy()
        index_cols = in_scenarios[['scenario_id', pwc_selection_field]]
        in_scenarios = in_scenarios[qc_table.max(axis=1) < 2]
        fields.expand('horizon', max_horizons)
    else:
        fields.expand("depth_weight", depth_bins)
        in_scenarios = in_scenarios[fields.fetch('sam_scenario')]
        qc_table = fields.perform_qc(in_scenarios)
        in_scenarios = in_scenarios.mask(qc_table == 2, fields.fill(), axis=1)
    if write_qc:
        qc_table = pd.concat([index_cols, qc_table], axis=1)
        write.qc_report(region, mode, qc_table)
    if mode == 'pwc':
        in_scenarios = in_scenarios[~in_scenarios.sam_only.fillna(0).
                                    astype(bool)]
    return in_scenarios[fields.fetch(mode + '_scenario')]