コード例 #1
0
def main(ini_path, start_date = None, end_date = None, crop_str = ''):
    """Compute Growing Season Statistics

    Args:
        ini_path (str): file path of project INI file
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare

    Returns:
        None
    """

    # Field names

    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field   = 'Day'
    season_field = 'Season'

    # Output file/folder names

    gs_summary_name = 'growing_season_full_summary.csv'
    gs_mean_annual_name = 'growing_season_mean_annual.csv'
    baddata_name = 'growing_season_bad_data.txt'

    # Delimiter

    sep = ','
    # sep = r"\s*"

    logging.info('\nComputing growing season statistics')
    logging.info('  INI: {}'.format(ini_path))

    # Check that INI file can be read
    
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get project workspace and daily ET folder from INI file
    # project workspace can use old or new ini file
    
    try:
        project_ws = config.get('PROJECT', 'project_folder')
    except:
        try:
            project_ws = config.get(crop_et_sec, 'project_folder')
        except:
            logging.error(
                'ERROR: project_folder ' +
                'parameter is not set in INI file')
            sys.exit()

    def get_config_param(config, param_name, section):
        """"""
        try:
            param_value = config.get(section, param_name)
        except:
            logging.error(('ERROR: {} parameter is not set' +
                           ' in INI file').format(param_name))
            sys.exit()
        return param_value

    daily_stats_ws = os.path.join(
            project_ws, get_config_param(
            config, 'daily_output_folder', crop_et_sec))
    gs_stats_ws = os.path.join(
        project_ws, get_config_param(config, 'gs_output_folder', crop_et_sec))
    try:
        name_format = config.get(crop_et_sec, 'name_format')
        if name_format is None or name_format == 'None': 
            # name_format = '%s_daily_crop_%c.csv'
            name_format = '%s_crop_%c.csv'
    except:
        # name_format = '%s_daily_crop_%c.csv'
        name_format = '%s_crop_%c.csv'
    if '%s' not in name_format or '%c' not in name_format:
        logging.error("crop et file name format requires"
                      " '%s' and '%c' wildcards.")
        sys.exit()
    swl = name_format.index('%s')
    cwl = name_format.index('%c')
    prefix = name_format[(swl + 2):cwl]
    suffix = name_format[(cwl + 2):len(name_format)]
    suf_no_ext = suffix[:(suffix.index('.'))]

    # Check workspaces

    if not os.path.isdir(daily_stats_ws):
        logging.error(('\nERROR: daily ET stats folder {0} ' +
                       'could be found\n').format(daily_stats_ws))
        sys.exit()
    if not os.path.isdir(gs_stats_ws):
        os.mkdir(gs_stats_ws)

    # Range of data to use

    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end <= year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # Allow user to subset crops from INI

    try:
        crop_skip_list = sorted(list(util.parse_int_set(
            config.get(crop_et_sec, 'crop_skip_list'))))
    except:
        crop_skip_list = []
        # crop_skip_list = [44, 45, 46, 55, 56, 57]
    try:
        crop_test_list = sorted(list(util.parse_int_set(
            config.get(crop_et_sec, 'crop_test_list'))))
    except:
        crop_test_list = []

    # Overwrite INI crop list with user defined values
    # Could also append to INI crop list

    if crop_str:
        try:
            crop_test_list = list(util.parse_int_set(crop_str))
        # try:
        #     crop_test_list = sorted(list(set(
        #         crop_test_list + list(util.parse_int_set(crop_str)))
        except:
            pass
    logging.debug('\n  crop_test_list = {0}'.format(crop_test_list))
    logging.debug('  crop_skip_list = {0}'.format(crop_skip_list))

    # Output file paths

    gs_summary_path = os.path.join(gs_stats_ws, gs_summary_name)
    gs_mean_annual_path = os.path.join(gs_stats_ws, gs_mean_annual_name)
    baddata_path = os.path.join(gs_stats_ws, baddata_name)

    # Initialize output data arrays and open bad data log file

    gs_summary_data = []
    gs_mean_annual_data = []
    all_cuttings=pd.DataFrame()
    baddata_file = open(baddata_path, 'w')

    # make used file list using name_format attributes
    
    data_file_list = []
    for item in os.listdir(daily_stats_ws):
        if prefix in item and suffix in item:
            if not item in data_file_list:
                data_file_list.append(os.path.join(daily_stats_ws, item))
    if len(data_file_list) < 1:
        logging.info('No files found')
        sys.exit()
    data_file_list = sorted(data_file_list)

    # Process each file
    
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  Processing {0}'.format(file_name))
        station, crop_num = os.path.splitext(file_name)[0].split(prefix)
        # crop_num = int(crop_num[:crop_num.index(suf_no_ext)])
        crop_num = int(crop_num)
        logging.debug('    Station:         {0}'.format(station))
        logging.debug('    Crop Num:        {0}'.format(crop_num))
        if station == 'temp': continue

        # Get crop name

        with open(file_path, 'r') as file_f:
            crop_name = file_f.readline().split('-', 1)[1].strip()
            logging.debug('    Crop:            {0}'.format(crop_name))

        # Read data from file into record array (structured array)

        daily_df = pd.read_csv(file_path, header=0, comment='#',
                               sep=sep)
        logging.debug('    Fields: {0}'.format(
            ', '.join(daily_df.columns.values)))
        daily_df[date_field] = pd.to_datetime(daily_df[date_field])
        daily_df.set_index(date_field, inplace=True)
        daily_df[year_field] = daily_df.index.year
        # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year)

        # Build list of unique years

        year_array = np.sort(np.unique(
            np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    All Years: {0}'.format(
            ', '.join(list(util.ranges(year_array.tolist())))))
        # logging.debug('    All Years: {0}'.format(
        #    ','.join(map(str, year_array.tolist()))))

        # Don't include first year in stats

        crop_year_start = min(daily_df[year_field])
        logging.debug('    Skipping {}, first year'.format(crop_year_start))
        daily_df = daily_df[daily_df[year_field] > crop_year_start]

        # Check if start and end years have >= 365 days

        crop_year_start = min(daily_df[year_field])
        crop_year_end = max(daily_df[year_field])
        if sum(daily_df[year_field] == crop_year_start) < 365:
            logging.debug('    Skipping {}, missing days'.format(
                crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]
        if sum(daily_df[year_field] == crop_year_end) < 365:
            logging.debug('    Skipping {}, missing days'.format(
                crop_year_end))
            daily_df = daily_df[daily_df[year_field] < crop_year_end]
        del crop_year_start, crop_year_end

        # Only keep years between year_start and year_end
        
        if year_start:
            daily_df = daily_df[daily_df[year_field] >= year_start]
        if year_end:
            daily_df = daily_df[daily_df[year_field] <= year_end]

        year_sub_array = np.sort(np.unique(np.array(daily_df[year_field])
                                           .astype(np.int)))
        logging.debug('    Data Years: {0}'.format(
            ', '.join(list(util.ranges(year_sub_array.tolist())))))
        # logging.debug('    Data Years: {0}'.format(
        #    ','.join(map(str, year_sub_array.tolist()))))

        # Get separate date related fields

        date_array = daily_df.index.date
        year_array = daily_df[year_field].values.astype(np.int)
        doy_array = daily_df[doy_field].values.astype(np.int)

        # Remove leap days
        # leap_array = (doy_array == 366)
        # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0])

        # Build separate arrays for each set of crop specific fields

        season_array = np.array(daily_df[season_field])

        # Original code from growing_season script
        # Initialize mean annual growing season length variables

        gs_sum, gs_cnt, gs_mean = 0, 0, 0
        start_sum, start_cnt, start_mean = 0, 0, 0
        end_sum, end_cnt, end_mean = 0, 0, 0

        # Process each year
        for year_i, year in enumerate(year_sub_array):
            year_crop_str = "Crop: {0:2d} {1:32s}  Year: {2}".format(
                crop_num, crop_name, year)
            logging.debug(year_crop_str)

            # Extract data for target year
            year_mask = (year_array == year)
            date_sub_array = date_array[year_mask]
            doy_sub_array = doy_array[year_mask]
            season_sub_mask = season_array[year_mask]
            field_names=list(daily_df.columns.values)

            # Only Run if Cutting in field_names else fill with blanks
            # Max of 6 cuttings?
            # Initial arrays with nans (is np.full better?)
            if 'Cutting' in field_names :
                cutting_dates = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
                cutting_dates_doy = [np.nan, np.nan, np.nan, np.nan, np.nan,
                                     np.nan]
                cutting_sub_array = daily_df.Cutting[year_mask]
                cutting_number = len(cutting_sub_array[cutting_sub_array>0])
                cutting_dates[0:cutting_number] = \
                    date_sub_array[cutting_sub_array>0]
                cutting_dates_doy[0:cutting_number] = \
                    doy_sub_array[cutting_sub_array>0]
            else:
                cutting_dates=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
                cutting_number=[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
                cutting_sub_array=[np.nan, np.nan, np.nan, np.nan, np.nan,
                                   np.nan]
                cutting_dates_doy=[np.nan, np.nan, np.nan, np.nan, np.nan,
                                   np.nan]
            # Track all cutting doy for mean annual by crop
            # Each column is different cutting 1-6)
            cutting_dates_temp=pd.DataFrame(cutting_dates_doy).transpose()
            all_cuttings=all_cuttings.append(cutting_dates_temp)

            
            # print(cutting_dates)
            # print('Break Line 269')
            # sys.exit()

            # Look for transitions in season value
            # Start transitions up day before actual start
            # End transitions down on end date

            try:
                start_i = np.where(np.diff(season_sub_mask) == 1)[0][0] + 1
            except:
                start_i = None
            try:
                end_i = np.where(np.diff(season_sub_mask) == -1)[0][0]
            except:
                end_i = None

            # If start transition is not found, season starts on DOY 1

            if start_i is None and end_i is not None:
                start_i = 0
            # If end transition is not found, season ends on DOY 365/366

            elif start_i is not None and end_i is None:
                end_i = -1

            # If neither transition is found, season is always on
            # elif start_i is None and end_i is None:
            #     , end_i = 0, -1

            # Calculate start and stop day of year
            # Set start/end to 0 if season never gets set to 1

            if not np.any(season_sub_mask):
                skip_str = "  Skipping, season flag was never set to 1"
                logging.debug(skip_str)
                baddata_file.write(
                    '{0}  {1} {2}\n'.format(station, year_crop_str, skip_str))
                start_doy, end_doy = 0, 0
                start_date, end_date = "", ""
            elif np.all(season_sub_mask):
                start_doy, end_doy = doy_sub_array[0], doy_sub_array[-1]
                start_date = date_sub_array[0].isoformat()
                end_date = date_sub_array[-1].isoformat()
            else:
                start_doy, end_doy = doy_sub_array[start_i],\
                                     doy_sub_array[end_i]
                start_date = date_sub_array[start_i].isoformat()
                end_date = date_sub_array[end_i].isoformat()
            gs_length = sum(season_sub_mask)
            logging.debug("Start: {0} ({1})  End: {2} ({3})".format(
                start_doy, start_date, end_doy, end_date))

            # Track growing season length and mean annual g.s. length

            if start_doy > 0 and end_doy > 0 and year_i != 0:
                start_sum += start_doy
                end_sum += end_doy
                gs_sum += gs_length
                start_cnt += 1
                end_cnt += 1
                gs_cnt += 1

            # Append data to list

            gs_summary_data.append(
                [station, crop_num, crop_name, year,
                 start_doy, end_doy, start_date, end_date, gs_length,
                 cutting_dates[0],
                 cutting_dates[1],
                 cutting_dates[2],
                 cutting_dates[3],
                 cutting_dates[4],
                 cutting_dates[5]])

            # Cleanup
            del year_mask, doy_sub_array, season_sub_mask
            del start_doy, end_doy, start_date, end_date, gs_length

        # Calculate mean annual growing season start/end/length

        if gs_cnt > 0:
            mean_start_doy = int(round(float(start_sum) / start_cnt))
            mean_end_doy = int(round(float(end_sum) / end_cnt))
            mean_length = int(round(float(gs_sum) / gs_cnt))
            mean_start_date = util.doy_2_date(year, mean_start_doy)
            mean_end_date = util.doy_2_date(year, mean_end_doy)
        else:
            mean_start_doy, mean_end_doy, mean_length = 0, 0, 0
            mean_start_date, mean_end_date = "", ""

        #Take mean of all doy cuttings columns
        mean_cuttings=all_cuttings.mean(skipna=True)
        # print(mean_cuttings)
        # print(round(mean_cuttings[4],0))
        # sys.exit()
        
        # Append mean annual growing season data to list

        gs_mean_annual_data.append(
            [station, crop_num, crop_name,
             mean_start_doy, mean_end_doy,
             mean_start_date, mean_end_date, mean_length,
             round(mean_cuttings[0], 0),
             round(mean_cuttings[1], 0),
             round(mean_cuttings[2], 0),
             round(mean_cuttings[3], 0),
             round(mean_cuttings[4], 0),
             round(mean_cuttings[5], 0)])

        # Cleanup

        del season_array
        del gs_sum, gs_cnt, gs_mean
        del start_sum, start_cnt, start_mean
        del end_sum, end_cnt, end_mean
        del mean_start_doy, mean_end_doy, mean_length
        del mean_start_date, mean_end_date
        del year_array, year_sub_array, doy_array
        del daily_df
        del cutting_dates, cutting_number, cutting_sub_array
        del all_cuttings, mean_cuttings
        all_cuttings=pd.DataFrame()
        logging.debug("")

    # Close bad data file log

    baddata_file.close()

    # Build output record array file
    # https://stackoverflow.com/questions/3348460/
    # csv-file-written-with-python-has-blank-lines-between-each-row/3348664
    gs_summary_csv = csv.writer(open(gs_summary_path, 'w', newline=''))
    gs_summary_csv.writerow(
        ['STATION', 'CROP_NUM', 'CROP_NAME', 'YEAR',
         'START_DOY', 'END_DOY', 'START_DATE', 'END_DATE', 'GS_LENGTH',
         'CUTTING_1','CUTTING_2','CUTTING_3','CUTTING_4','CUTTING_5',
         'CUTTING_6'])
    gs_summary_csv.writerows(gs_summary_data)

    # Build output record array file

    gs_mean_annual_csv = csv.writer(open(gs_mean_annual_path, 'w', newline=''))
    gs_mean_annual_csv.writerow(
        ['STATION', 'CROP_NUM', 'CROP_NAME', 'MEAN_START_DOY', 'MEAN_END_DOY',
         'MEAN_START_DATE', 'MEAN_END_DATE', 'MEAN_GS_LENGTH',
         'MEAN_CUTTING_1','MEAN_CUTTING_2','MEAN_CUTTING_3','MEAN_CUTTING_4',
         'MEAN_CUTTING_5','MEAN_CUTTING_6'])
    gs_mean_annual_csv.writerows(gs_mean_annual_data)

    # Cleanup

    del gs_summary_path, gs_summary_name
    del gs_summary_csv, gs_summary_data
    del gs_mean_annual_path, gs_mean_annual_name
    del gs_mean_annual_csv, gs_mean_annual_data
コード例 #2
0
def main(ini_path,
         figure_show_flag=False,
         figure_save_flag=True,
         figure_size=(1000, 300),
         start_date=None,
         end_date=None,
         crop_str='',
         overwrite_flag=False):
    """Plot full daily data by crop

    Args:
        ini_path (str): file path of the project INI file
        figure_show_flag (bool): if True, show figures
        figure_save_flag (bool): if True, save figures
        figure_size (tuple): width, height of figure in pixels
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare
        overwrite_flag (bool): If True, overwrite existing files

    Returns:
        None
    """

    # Input/output names
    # input_folder = 'daily_stats'
    # output_folder = 'daily_plots'

    # Only process a subset of the crops
    crop_keep_list = list(util.parse_int_set(crop_str))
    # These crops will not be processed (if set)
    crop_skip_list = [44, 45, 46]

    # Input field names
    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field = 'Day'
    pmeto_field = 'PMETo'
    precip_field = 'PPT'
    # t30_field = 'T30'

    etact_field = 'ETact'
    etpot_field = 'ETpot'
    etbas_field = 'ETbas'
    irrig_field = 'Irrigation'
    season_field = 'Season'
    runoff_field = 'Runoff'
    dperc_field = 'DPerc'
    # niwr_field = 'NIWR'

    # Number of header lines in data file
    # header_lines = 2

    # Additional figure controls
    # figure_dynamic_size = False
    figure_ylabel_size = '12pt'

    # Delimiter
    sep = ','
    # sep = r"\s*"

    sub_x_range_flag = True

    logging.info('\nPlot mean daily data by crop')
    logging.info('  INI: {}'.format(ini_path))

    # Check that the INI file can be read
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get the project workspace and daily ET folder from the INI file
    try:
        project_ws = config.get(crop_et_sec, 'project_folder')
    except:
        logging.error('ERROR: The project_folder ' +
                      'parameter is not set in the INI file')
        sys.exit()
    try:
        input_ws = os.path.join(project_ws,
                                config.get(crop_et_sec, 'daily_output_folder'))
    except:
        logging.error('ERROR: The daily_output_folder ' +
                      'parameter is not set in the INI file')
        sys.exit()
    try:
        output_ws = os.path.join(project_ws,
                                 config.get(crop_et_sec, 'daily_plots_folder'))
    except:
        if 'stats' in input_ws:
            output_ws = input_ws.replace('stats', 'plots')
        else:
            output_ws = os.path.join(project_ws, 'daily_stats_folder')

    # Check workspaces
    if not os.path.isdir(input_ws):
        logging.error(('\nERROR: The input ET folder {0} ' +
                       'could be found\n').format(input_ws))
        sys.exit()
    if not os.path.isdir(output_ws):
        os.mkdir(output_ws)

    # Range of data to plot
    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # # Windows only a
    # if figure_dynamic_size:
    #     :
    #        logging.info('Setting plots width/height dynamically')
    #        from win32api import GetSystemMetrics
    #        figure_width = int(0.92 * GetSystemMetrics(0))
    #        figure_height = int(0.28 * GetSystemMetrics(1))
    #        logging.info('  {0} {1}'.format(GetSystemMetrics(0), GetSystemMetrics(1)))
    #        logging.info('  {0} {1}'.format(figure_width, figure_height))
    #     :
    #        figure_width = 1200
    #        figure_height = 300

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I)

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(input_ws, f_name) for f_name in os.listdir(input_ws)
        if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No daily ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        station, crop_num = os.path.splitext(file_name)[0].split(
            '_daily_crop_')
        crop_num = int(crop_num)
        logging.debug('    Station:         {0}'.format(station))
        logging.debug('    Crop Num:        {0}'.format(crop_num))
        if station == 'temp':
            logging.debug('      Skipping')
            continue
        elif crop_skip_list and crop_num in crop_skip_list:
            logging.debug('    Skipping, crop number in crop_skip_list')
            continue
        elif crop_keep_list and crop_num not in crop_keep_list:
            logging.debug('    Skipping, crop number not in crop_keep_list')
            continue

        # Get crop name
        with open(file_path, 'r') as file_f:
            crop_name = file_f.readline().split('-', 1)[1].strip()
            logging.debug('    Crop:            {0}'.format(crop_name))

        # Read data from file into record array (structured array)
        daily_df = pd.read_table(file_path, header=0, comment='#', sep=sep)
        logging.debug('    Fields: {0}'.format(', '.join(
            daily_df.columns.values)))
        daily_df[date_field] = pd.to_datetime(daily_df[date_field])
        daily_df.set_index(date_field, inplace=True)
        daily_df[year_field] = daily_df.index.year
        # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year)

        # Build list of unique years
        year_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    All Years: {0}'.format(', '.join(
            list(util.ranges(year_array.tolist())))))
        # logging.debug('    All Years: {0}'.format(
        #    ','.join(map(str, year_array.tolist()))))

        # Don't include the first year in the stats
        crop_year_start = min(daily_df[year_field])
        logging.debug('    Skipping {}, first year'.format(crop_year_start))
        daily_df = daily_df[daily_df[year_field] > crop_year_start]

        # Check if start and end years have >= 365 days
        crop_year_start = min(daily_df[year_field])
        crop_year_end = max(daily_df[year_field])
        if sum(daily_df[year_field] == crop_year_start) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]
        if sum(daily_df[year_field] == crop_year_end) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_end))
            daily_df = daily_df[daily_df[year_field] < crop_year_end]

        # Only keep years between year_start and year_end
        # Adjust crop years
        if year_start:
            daily_df = daily_df[daily_df[year_field] >= year_start]
            crop_year_start = max(year_start, crop_year_start)
        if year_end:
            daily_df = daily_df[daily_df[year_field] <= year_end]
            crop_year_end = min(year_end, crop_year_end)

        year_sub_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    Plot Years: {0}'.format(', '.join(
            list(util.ranges(year_sub_array.tolist())))))
        # logging.debug('    Plot Years: {0}'.format(
        #    ','.join(map(str, year_sub_array.tolist()))))

        # Initial range of timeseries to show
        # For now default to last ~8 year
        if sub_x_range_flag:
            x_range = Range1d(
                np.datetime64(
                    dt.datetime(max(crop_year_end - 9, crop_year_start), 1, 1),
                    's'),
                np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'),
                bounds=(np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'),
                        np.datetime64(dt.datetime(crop_year_end + 1, 1, 1),
                                      's')))
        else:
            x_range = Range1d(
                np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'),
                np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'))

        # Build separate arrays for each field of non-crop specific data
        dt_array = daily_df.index.date
        doy_array = daily_df[doy_field].values.astype(np.int)
        pmeto_array = daily_df[pmeto_field].values
        precip_array = daily_df[precip_field].values

        # Remove leap days
        # leap_array = (doy_array == 366)
        # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0])

        # Build separate arrays for each set of crop specific fields
        etact_array = daily_df[etact_field].values
        etpot_array = daily_df[etpot_field].values
        etbas_array = daily_df[etbas_field].values
        irrig_array = daily_df[irrig_field].values
        season_array = daily_df[season_field].values
        runoff_array = daily_df[runoff_field].values
        dperc_array = daily_df[dperc_field].values
        kc_array = etact_array / pmeto_array
        kcb_array = etbas_array / pmeto_array

        # NIWR is ET - precip + runoff + deep percolation
        # Don't include deep percolation when irrigating
        # niwr_array = etact_array - (precip_array - runoff_array)
        # niwr_array[irrig_array==0] += dperc_array[irrig_array == 0]

        # Remove leap days
        # etact_sub_array = np.delete(etact_array, np.where(leap_array)[0])
        # niwr_sub_array = np.delete(niwr_array, np.where(leap_array)[0])

        # Timeseries figures of daily data
        output_name = '{0}_crop_{1:02d}_{2}-{3}'.format(
            station, int(crop_num), crop_year_start, crop_year_end)
        output_path = os.path.join(output_ws, output_name + '.html')
        if overwrite_flag and os.path.isfile(output_path):
            os.remove(output_path)
        f = output_file(output_path, title=output_name)
        TOOLS = 'xpan,xwheel_zoom,box_zoom,reset,save'

        f1 = figure(x_axis_type='datetime',
                    x_range=x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        # title='Evapotranspiration', x_axis_type='datetime',
        f1.line(dt_array, etact_array, color='blue', legend='ETact')
        f1.line(dt_array, etbas_array, color='green', legend='ETbas')
        f1.line(dt_array,
                pmeto_array,
                color='black',
                legend='ETos',
                line_dash="dotted")
        # line_dash="dashdot")
        # f1.title = 'Evapotranspiration [mm]'
        f1.grid.grid_line_alpha = 0.3
        f1.yaxis.axis_label = 'Evapotranspiration [mm]'
        f1.yaxis.axis_label_text_font_size = figure_ylabel_size
        # f1.xaxis.bounds = x_bounds

        f2 = figure(x_axis_type="datetime",
                    x_range=f1.x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        f2.line(dt_array, kc_array, color='blue', legend='Kc')
        f2.line(dt_array, kcb_array, color='green', legend='Kcb')
        f2.line(dt_array,
                season_array,
                color='black',
                legend='Season',
                line_dash="dashed")
        # f2.title = 'Kc and Kcb (dimensionless)'
        f2.grid.grid_line_alpha = 0.3
        f2.yaxis.axis_label = 'Kc and Kcb (dimensionless)'
        f2.yaxis.axis_label_text_font_size = figure_ylabel_size

        f3 = figure(x_axis_type="datetime",
                    x_range=f1.x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        f3.line(dt_array, precip_array, color='blue', legend='PPT')
        f3.line(dt_array,
                irrig_array,
                color='black',
                legend='Irrigation',
                line_dash="dotted")
        # f3.title = 'PPT and Irrigation [mm]'
        f3.grid.grid_line_alpha = 0.3
        # f3.xaxis.axis_label = 'Date'
        f3.yaxis.axis_label = 'PPT and Irrigation [mm]'
        f3.yaxis.axis_label_text_font_size = figure_ylabel_size

        if figure_show_flag:
            # Open in a browser
            show(column([f1, f2, f3], sizing_mode='stretch_both'))
            # show(vplot(f1, f2, f3))
        if figure_save_flag:
            save(column([f1, f2, f3], sizing_mode='stretch_both'))
            # save(vplot(f1, f2, f3))
        del f1, f2, f3, f

        # Cleanup
        del etact_array, etpot_array, etbas_array
        del irrig_array, season_array
        del runoff_array, dperc_array
        del kc_array, kcb_array
        # del niwr_array
        # del etact_sub_array, niwr_sub_array

        # Cleanup
        del file_path, daily_df
        del dt_array, year_array, year_sub_array, doy_array
        del pmeto_array
        del precip_array
        gc.collect()
コード例 #3
0
def main(ini_path,
         figure_show_flag=False,
         figure_save_flag=True,
         figure_size=(1000, 300),
         start_date=None,
         end_date=None,
         crop_str=''):
    """Plot full daily data by crop

    Args:
        ini_path (str): file path ofproject INI file
        figure_show_flag (bool): if True, show figures
        figure_save_flag (bool): if True, save figures
        figure_size (tuple): width, height of figure in pixels
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare

    Returns:
        None
    """

    # Input/output names
    # input_folder = 'daily_stats'
    # output_folder = 'daily_plots'

    # Only process subset of crops

    crop_keep_list = list(util.parse_int_set(crop_str))
    # These crops will not be processed (if set)
    crop_skip_list = [44, 45, 46]

    # Input field names
    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field = 'Day'
    #    pmeto_field = 'PMETo'
    precip_field = 'PPT'
    # t30_field = 'T30'

    etact_field = 'ETact'
    etpot_field = 'ETpot'
    etbas_field = 'ETbas'
    irrig_field = 'Irrigation'
    season_field = 'Season'
    runoff_field = 'Runoff'
    dperc_field = 'DPerc'
    # niwr_field = 'NIWR'

    # Number of header lines in data file
    # header_lines = 2

    # Additional figure controls
    # figure_dynamic_size = False

    figure_ylabel_size = '12pt'

    # Delimiter

    sep = ','
    # sep = r"\s*"

    sub_x_range_flag = True

    logging.info('\nPlot mean daily data by crop')
    logging.info('  INI: {}'.format(ini_path))

    # Check that INI file can be read

    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get to get project workspace and daily ET folder from INI file
    # project workspace can use old or new ini file

    try:
        project_ws = config.get('PROJECT', 'project_folder')
    except:
        try:
            project_ws = config.get(crop_et_sec, 'project_folder')
        except:
            logging.error('ERROR: project_folder ' +
                          'parameter is not set in INI file')
            sys.exit()
    try:
        input_ws = os.path.join(project_ws,
                                config.get(crop_et_sec, 'daily_output_folder'))
    except:
        logging.error('ERROR:daily_output_folder ' +
                      'parameter is not set inINI file')
        sys.exit()
    try:
        output_ws = os.path.join(project_ws,
                                 config.get(crop_et_sec, 'daily_plots_folder'))
    except:
        if 'stats' in input_ws:
            output_ws = input_ws.replace('stats', 'plots')
        else:
            output_ws = os.path.join(project_ws, 'daily_stats_folder')

    # Check workspaces

    if not os.path.isdir(input_ws):
        logging.error(('\nERROR:input ET folder {0} ' +
                       'could be found\n').format(input_ws))
        sys.exit()
    if not os.path.isdir(output_ws):
        os.mkdir(output_ws)
    try:
        name_format = config.get(crop_et_sec, 'name_format')
        if name_format is None or name_format == 'None':
            # name_format = '%s_daily_crop_%c.csv'
            name_format = '%s_crop_%c.csv'
    except:
        # name_format = '%s_daily_crop_%c.csv'
        name_format = '%s_crop_%c.csv'
    if '%s' not in name_format or '%c' not in name_format:
        logging.error(
            "crop et file name format requires '%s' and '%c' wildcards.")
        sys.exit()
    swl = name_format.index('%s')
    cwl = name_format.index('%c')
    prefix = name_format[(swl + 2):cwl]
    suffix = name_format[(cwl + 2):len(name_format)]
    suf_no_ext = suffix[:(suffix.index('.'))]

    # Range of data to plot

    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # # Windows only a
    # if figure_dynamic_size:
    #     :
    #        logging.info('Setting plots width/height dynamically')
    #        from win32api import GetSystemMetrics
    #        figure_width = int(0.92 * GetSystemMetrics(0))
    #        figure_height = int(0.28 * GetSystemMetrics(1))
    #        logging.info('  {0} {1}'.format(GetSystemMetrics(0), GetSystemMetrics(1)))
    #        logging.info('  {0} {1}'.format(figure_width, figure_height))
    #     :
    #        figure_width = 1200
    #        figure_height = 300

    # make used file list using name_format attributes

    data_file_list = []
    for item in os.listdir(input_ws):
        if prefix in item and suffix in item:
            if not item in data_file_list:
                data_file_list.append(os.path.join(input_ws, item))
    if len(data_file_list) < 1:
        logging.info('No files found')
        sys.exit()
    data_file_list = sorted(data_file_list)

    # Process each file

    for file_count, file_path in enumerate(data_file_list):
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  Processing {0}'.format(file_name))
        station, crop_num = os.path.splitext(file_name)[0].split(prefix)
        # crop_num = int(crop_num[:crop_num.index(suf_no_ext)])
        crop_num = int(crop_num)
        logging.debug('    Station:         {0}'.format(station))
        logging.debug('    Crop Num:        {0}'.format(crop_num))
        if station == 'temp':
            logging.debug('      Skipping')
            continue
        elif crop_skip_list and crop_num in crop_skip_list:
            logging.debug('    Skipping, crop number in crop_skip_list')
            continue
        elif crop_keep_list and crop_num not in crop_keep_list:
            logging.debug('    Skipping, crop number not in crop_keep_list')
            continue

        # Get crop name

        with open(file_path, 'r') as file_f:
            crop_name = file_f.readline().split('-', 1)[1].strip()
            logging.debug('    Crop:            {0}'.format(crop_name))

        # Read data from file into record array (structured array)

        daily_df = pd.read_csv(file_path, header=0, comment='#', sep=sep)
        logging.debug('    Fields: {0}'.format(', '.join(
            daily_df.columns.values)))
        daily_df[date_field] = pd.to_datetime(daily_df[date_field])

        # workaround for data before 1970 on a pc

        if not year_start or year_start < 1970:

            # test if a pc

            if os.getenv('OS') is not None and os.getenv('OS') == 'Windows_NT':
                # check if data exist before 1970

                data_sy = daily_df[date_field][0].year
                if data_sy < 1970:
                    # add multiple of 4 years to actual dates

                    years_to_add = 1970 - data_sy + ((1970 - data_sy) % 4)
                    daily_df[date_field] = daily_df[date_field] + pd.Timedelta(
                        days=int(years_to_add * 365.25))
                    if file_count == 0:
                        logging.info('  Added {0} years to input dates'.format(
                            years_to_add))
                    if year_start and file_count == 0:
                        year_start += years_to_add
                    if year_end and file_count == 0:
                        year_end += years_to_add
                    del years_to_add
                del data_sy
        daily_df.set_index(date_field, inplace=True)
        daily_df[year_field] = daily_df.index.year
        # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year)

        #Get PMET type from fieldnames in daily .csv
        field_names = daily_df.columns
        PMET_str = field_names[4]
        #        if 'PMETr' in field_names:
        #            PMET_str='PMETr'
        #        else:
        #            PMET_str='PMETo'

        # Build list of unique years

        year_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    All Years: {0}'.format(', '.join(
            list(util.ranges(year_array.tolist())))))

        # Don't include first year in plots

        crop_year_start = min(daily_df[year_field])
        logging.debug('    Skipping {}, first year'.format(crop_year_start))
        daily_df = daily_df[daily_df[year_field] > crop_year_start]

        # Check if start and end years have >= 365 days

        crop_year_start = min(daily_df[year_field])
        crop_year_end = max(daily_df[year_field])
        if sum(daily_df[year_field] == crop_year_start) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]
        if sum(daily_df[year_field] == crop_year_end) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_end))
            daily_df = daily_df[daily_df[year_field] < crop_year_end]

        # Only keep years between year_start and year_end
        # Adjust crop years

        if year_start:
            daily_df = daily_df[daily_df[year_field] >= year_start]
            crop_year_start = max(year_start, crop_year_start)
        if year_end:
            daily_df = daily_df[daily_df[year_field] <= year_end]
            crop_year_end = min(year_end, crop_year_end)

        year_sub_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    Plot Years: {0}'.format(', '.join(
            list(util.ranges(year_sub_array.tolist())))))

        # Initial range of time series to show
        # For now default to last ~8 year

        if sub_x_range_flag:
            x_range = Range1d(
                np.datetime64(
                    dt.datetime(max(crop_year_end - 9, crop_year_start), 1, 1),
                    's'),
                np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'),
                bounds=(np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'),
                        np.datetime64(dt.datetime(crop_year_end + 1, 1, 1),
                                      's')))
        else:
            x_range = Range1d(
                np.datetime64(dt.datetime(crop_year_start, 1, 1), 's'),
                np.datetime64(dt.datetime(crop_year_end + 1, 1, 1), 's'))

        # Build separate arrays for each field of non-crop specific data

        dt_array = daily_df.index.date
        doy_array = daily_df[doy_field].values.astype(np.int)
        pmet_array = daily_df[PMET_str].values
        precip_array = daily_df[precip_field].values

        # Remove leap days
        # leap_array = (doy_array == 366)
        # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0])

        # Build separate arrays for each set of crop specific fields

        etact_array = daily_df[etact_field].values
        etpot_array = daily_df[etpot_field].values
        etbas_array = daily_df[etbas_field].values
        irrig_array = daily_df[irrig_field].values
        season_array = daily_df[season_field].values
        runoff_array = daily_df[runoff_field].values
        dperc_array = daily_df[dperc_field].values
        kc_array = etact_array / pmet_array
        kcb_array = etbas_array / pmet_array

        # NIWR is ET - precip + runoff + deep percolation
        # Don't include deep percolation when irrigating
        # niwr_array = etact_array - (precip_array - runoff_array)
        # niwr_array[irrig_array==0] += dperc_array[irrig_array == 0]

        # Remove leap days
        # etact_sub_array = np.delete(etact_array, np.where(leap_array)[0])
        # niwr_sub_array = np.delete(niwr_array, np.where(leap_array)[0])

        # Time series figures of daily data

        output_name = '{0}_crop_{1:02d}_{2}-{3}'.format(
            station, int(crop_num), crop_year_start, crop_year_end)
        output_path = os.path.join(output_ws, output_name + '.html')

        f = output_file(output_path, title=output_name)
        TOOLS = 'xpan,xwheel_zoom,box_zoom,reset,save'
        f1 = figure(x_axis_type='datetime',
                    x_range=x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        # title='Evapotranspiration', x_axis_type='datetime',
        f1.line(dt_array, etact_array, color='blue', legend_label='ETact')
        f1.line(dt_array, etbas_array, color='green', legend_label='ETbas')
        f1.line(dt_array,
                pmet_array,
                color='black',
                legend_label=PMET_str,
                line_dash="dotted")
        # line_dash="dashdot")
        # f1.title = 'Evapotranspiration [mm]'
        f1.grid.grid_line_alpha = 0.3
        f1.yaxis.axis_label = 'Evapotranspiration [mm]'
        f1.yaxis.axis_label_text_font_size = figure_ylabel_size

        f2 = figure(x_axis_type="datetime",
                    x_range=f1.x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        f2.line(dt_array, kc_array, color='blue', legend_label='Kc')
        f2.line(dt_array, kcb_array, color='green', legend_label='Kcb')
        f2.line(dt_array,
                season_array,
                color='black',
                legend_label='Season',
                line_dash="dashed")
        f2.grid.grid_line_alpha = 0.3
        f2.yaxis.axis_label = 'Kc and Kcb (dimensionless)'
        f2.yaxis.axis_label_text_font_size = figure_ylabel_size

        f3 = figure(x_axis_type="datetime",
                    x_range=f1.x_range,
                    width=figure_size[0],
                    height=figure_size[1],
                    tools=TOOLS,
                    toolbar_location="right",
                    active_scroll="xwheel_zoom")
        f3.line(dt_array, precip_array, color='blue', legend_label='PPT')
        f3.line(dt_array,
                irrig_array,
                color='black',
                legend_label='Irrigation',
                line_dash="dotted")
        f3.grid.grid_line_alpha = 0.3
        f3.yaxis.axis_label = 'PPT and Irrigation [mm]'
        f3.yaxis.axis_label_text_font_size = figure_ylabel_size

        if figure_save_flag:
            # save(column([f1, f2, f3], sizing_mode = 'stretch_both'))
            save(column([f1, f2, f3], sizing_mode='stretch_both'),
                 validate=True)
        if figure_show_flag:
            # Open in browser
            show(column([f1, f2, f3], sizing_mode='stretch_both'))

        # Cleanup

        del f1, f2, f3, f
        del etact_array, etpot_array, etbas_array
        del irrig_array, season_array
        del runoff_array, dperc_array
        del kc_array, kcb_array
        del file_path
        del dt_array, year_array, year_sub_array, doy_array
        del pmet_array
        del precip_array
        gc.collect()
コード例 #4
0
def main(ini_path, overwrite_flag=True, cleanup_flag=True, year_filter=''):

    """Create Median NIWR Shapefiles from annual_stat files

    Args:
        ini_path (str): file path of the project INI file
        overwrite_flag (bool): If True (default), overwrite existing files
        cleanup_flag (bool): If True, remove temporary files
        year_filter (list): Only include data for one year in statistics

    Returns:
        None
    """
    logging.info('\nCreating Annual Stat Shapefiles')
    #  INI path
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        gis_ws = config.get('CROP_ET', 'gis_folder')
    except:
        logging.error(
            'gis_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'et_cells_path parameter must be set in the INI file, exiting')
        return False
#    try:
#        calibration_ws = config.get(crop_et_sec, 'spatial_cal_folder')
#    except:
#        calibration_ws = os.path.join(project_ws, 'calibration')
    try:
        etref_field = config.get('REFET', 'etref_field')
    except:
        logging.error(
            'etref_field parameter must be set in the INI file, exiting')
        return False
    
    #Year Filter
    year_list = None
    if year_filter:
        try:
            year_list= sorted(list(util.parse_int_set(year_filter)))
            logging.info('\nyear_list = {0}'.format(year_list))
        except:
            pass

    # Sub folder names
    monthly_ws = os.path.join(project_ws, 'monthly_stats')
    gs_ws = os.path.join(project_ws, 'growing_season_stats')

    # Check input folders
    if not os.path.exists(monthly_ws):
        logging.critical('ERROR: The monthly_stat folder does not exist.'
                         ' Check .ini settings')
        sys.exit()

    # Check input folders
    if not os.path.isdir(project_ws):
        logging.critical(('ERROR: The project folder ' +
                          'does not exist\n  {}').format(project_ws))
        sys.exit()
    elif not os.path.isdir(gis_ws):
        logging.critical(('ERROR: The GIS folder ' +
                          'does not exist\n  {}').format(gis_ws))
        sys.exit()
    logging.info('\nGIS Workspace:      {0}'.format(gis_ws))

    #output folder
    output_folder_path = os.path.join(gs_ws, 'gs_summary_shapefiles_allyears')
    if year_list:
        output_folder_path = os.path.join(gs_ws,
                            'gs_summary_shapefiles_{}to{}'.format(min(year_list),
                                                              max(year_list)))
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I)
    #data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I)
    
    # testing
    # monthly_ws = r"D:\upper_co_full\monthly_stats"
    # et_cells_path = os.path.join('D:\upper_co_full\gis','ETCells.shp')
    # etref_field = 'ETr_ASCE'

    # Build list of all data files
    data_file_list = sorted(
        [os.path.join(monthly_ws, f_name) for f_name in os.listdir(monthly_ws)
         if data_re.match(f_name)])
    if not data_file_list:
        logging.error(
            '  ERROR: No annual ET files were found\n' +
            '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Start with empty lists
    stations = []
    crop_nums = []

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        #station, crop_num = os.path.splitext(file_name)[0].split('_daily_crop_')
        station, crop_num = os.path.splitext(file_name)[0].split('_crop_')
        stations.append(station)
        crop_num = int(crop_num)
        crop_nums.append(crop_num)

    #Find unique crops and station ids
    unique_crop_nums = list(set(crop_nums))
    unique_stations = list(set(stations))

    ##Set file paths
    #out_path = os.path.join(monthly_ws, 'Summary_Shapefiles')

    #Loop through each crop and station list to build summary dataframes for
    #variables to include in output (if not in .csv skip)
    #Should PMETo/ETr come from the .ini?
    var_list = ['ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb',
                'PPT', 'Irrigation', 'Runoff', 'DPerc', 'NIWR', 'Season']
    PMET_field =  'PM{}'.format(etref_field)
    var_list.insert(0, PMET_field)
    
    # Arc fieldnames can only be 10 characters. Shorten names to include _stat
    # field name list will be based on etref_field ETr, ETo, or ET (not ETo/ETr)
    if 'ETr' in etref_field:
        var_fieldname_list = ['ETr', 'ETact', 'ETpot', 'ETbas', 'Kc',
                   'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season']
    elif 'ETo' in etref_field:
        var_fieldname_list = ['ETo', 'ETact', 'ETpot', 'ETbas', 'Kc',
                   'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season']
    else:
        var_fieldname_list = ['ET', 'ETact', 'ETpot', 'ETbas', 'Kc',
                   'Kcb', 'PPT', 'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season']

    # Testing (should this be an input option?)
    # unique_crop_nums = [3]
    # unique_stations = [377392, 378777]
    print('\n Creating Summary Shapefiles')
    if year_list:
        logging.info('\nOnly including years: {0}'.format(year_list))
    for crop in unique_crop_nums:
        print('\n Processing Crop: {:02d}'.format(crop))

        #Initialize df variable to check if pandas df needs to be created
        output_df = None
        for station in unique_stations:
            #Build File Path
            file_path = os.path.join(monthly_ws,
                                     '{}_crop_{:02d}.csv').format(station,
                                                                  crop)
            #Only process files that exists (crop/cell combinations)
            if not os.path.exists(file_path):
                continue

            #Read file into df
            monthly_df = pd.read_csv(file_path, skiprows=1)
            if year_list:
                monthly_df = monthly_df[monthly_df['Year'].isin(year_list)]
            #Remove all non-growing season data
            monthly_df = monthly_df[(monthly_df['Month'] >=4) & (monthly_df['Month'] <=10)]
            
            #Dictionary to control agg of each variable
            a = {
            'ETact':'sum',
            'ETpot':'sum',
            'ETbas':'sum',
            'PPT':'sum',
            'Irrigation':'sum',
            'Runoff':'sum',
            'DPerc':'sum',
            'NIWR':'sum',
            'Season':'sum',
            'Kc':'mean',
            'Kcb':'mean'}
            #add etref_field to dictionary
            a[PMET_field] = 'sum'
            
            #GroupStats by Year of each column follow agg assignment above
            yearlygroup_df = monthly_df.groupby('Year',
                                                as_index=True).agg(a)
            #Take Mean of Yearly GroupStats
            mean_df = yearlygroup_df.mean(axis=0)
            mean_fieldnames = [v + '_mn' for v in var_fieldname_list]

            #Take Median of Yearly GroupStats
            median_df = yearlygroup_df.median(axis=0)
            median_fieldnames =[v + '_mdn' for v in var_fieldname_list]

            #Create Dataframe if it doesn't exist
            if output_df is None:
                output_df = pd.DataFrame(index=unique_stations,
                                 columns=mean_fieldnames + median_fieldnames)

            #Write data to each station row
            output_df.loc[station] = list(mean_df[var_list]) + \
                              list(median_df[var_list])

        #Create station ID column from index (ETCells GRIDMET ID is int)
        output_df['Station'] = output_df.index.map(int)

        #Remove rows with Na (Is this the best option???)
        #Write all stations to index and then remove empty
        output_df = output_df.dropna()

        # Output file name
        out_name = "Crop_{:02d}_gs_stats.shp".format(crop)
        temp_name = "temp_annual.shp"

        # Copy ETCELLS.shp and join cropweighted data to it
        data = gpd.read_file(et_cells_path)

        # Data keep list (geometry is needed to write out as geodataframe)
        keep_list = ['geometry','GRIDMET_ID', 'LAT', 'LON', 'ELEV_M', 'ELEV_FT',
                     'COUNTYNAME', 'STATENAME', 'STPO', 'HUC8',
                     'AG_ACRES', 'CROP_{:02d}'.format(crop)]

        # Filter ETCells using keep list
        data = data[keep_list]

        # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!!
        merged_data = data.merge(output_df, left_on='GRIDMET_ID',
                                 right_on='Station')
        # Remove redundant Station column
        merged_data = merged_data.drop(columns='Station')
        # Write output .shp
        merged_data.to_file(os.path.join(output_folder_path, out_name),
                            driver='ESRI Shapefile')
コード例 #5
0
def main(ini_path, overwrite_flag=True, cleanup_flag=True, year_filter=''):
    """Create Median NIWR Shapefiles from annual_stat files

    Args:
        ini_path (str): file path of the project INI file
        overwrite_flag (bool): If True (default), overwrite existing files
        cleanup_flag (bool): If True, remove temporary files

    Returns:
        None
    """
    logging.info('\nCreating Annual Stat Shapefiles')
    #  INI path
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        gis_ws = config.get('CROP_ET', 'gis_folder')
    except:
        logging.error(
            'gis_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'et_cells_path parameter must be set in the INI file, exiting')
        return False


#    try:
#        calibration_ws = config.get(crop_et_sec, 'spatial_cal_folder')
#    except:
#        calibration_ws = os.path.join(project_ws, 'calibration')
    try:
        etref_field = config.get('REFET', 'etref_field')
    except:
        logging.error(
            'etref_field parameter must be set in the INI file, exiting')
        return False

    # Sub folder names
    annual_ws = os.path.join(project_ws, 'annual_stats')

    # Check input folders
    if not os.path.exists(annual_ws):
        logging.critical('ERROR: The annual_stat folder does not exist.'
                         ' Check .ini settings')
        sys.exit()

    # Check input folders
    if not os.path.isdir(project_ws):
        logging.critical(('ERROR: The project folder ' +
                          'does not exist\n  {}').format(project_ws))
        sys.exit()
    elif not os.path.isdir(gis_ws):
        logging.critical(
            ('ERROR: The GIS folder ' + 'does not exist\n  {}').format(gis_ws))
        sys.exit()
    logging.info('\nGIS Workspace:      {0}'.format(gis_ws))

    #Year Filter
    year_list = None
    if year_filter:
        try:
            year_list = sorted(list(util.parse_int_set(year_filter)))
            logging.info('\nyear_list = {0}'.format(year_list))
        except:
            pass

    #output folder
    output_folder_path = os.path.join(annual_ws, 'summary_shapefiles')
    if year_list:
        output_folder_path = os.path.join(
            annual_ws,
            'summary_shapefiles_{}to{}'.format(min(year_list), max(year_list)))
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I)

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(annual_ws, f_name) for f_name in os.listdir(annual_ws)
        if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No annual ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    #make sure lists are empty
    stations = []
    crop_nums = []

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))
        station, crop_num = os.path.splitext(file_name)[0].split('_crop_')
        stations.append(station)
        crop_num = int(crop_num)
        crop_nums.append(crop_num)

    # Find unique crops and station ids
    unique_crop_nums = list(set(crop_nums))
    unique_stations = list(set(stations))

    # Loop through each crop and station list to build summary dataframes for
    # variables to include in output (if not in .csv skip)
    # Should PMETo/ETr come from the .ini?
    var_list = [
        'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irrigation', 'Runoff',
        'DPerc', 'NIWR', 'Season'
    ]
    PMET_field = 'PM{}'.format(etref_field)
    var_list.insert(0, PMET_field)

    # Arc fieldnames can only be 10 characters. Shorten names to include _stat
    if 'ETr' in etref_field:
        var_fieldname_list = [
            'ETr', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr',
            'Runoff', 'DPerc', 'NIWR', 'Season'
        ]
    elif 'ETo' in etref_field:
        var_fieldname_list = [
            'ETo', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr',
            'Runoff', 'DPerc', 'NIWR', 'Season'
        ]
    else:
        var_fieldname_list = [
            'ET', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr',
            'Runoff', 'DPerc', 'NIWR', 'Season'
        ]

    # Testing (should this be an input option?)
    # unique_crop_nums = [3]

    print('\n Creating Summary Shapefiles')
    for crop in unique_crop_nums:
        print('\n Processing Crop: {:02d}'.format(crop))
        #create output dataframe
        output_df = pd.DataFrame(index=unique_stations)

        for var, var_name in zip(var_list, var_fieldname_list):
            #Initialize df variable to check if pandas df needs to be created
            df = None
            for station in unique_stations:
                #Build File Path
                file_path = os.path.join(annual_ws,
                                         '{}_crop_{:02d}.csv').format(
                                             station, crop)
                print(file_path)
                #Only process files that exists (crop/cell combinations)
                if not os.path.exists(file_path):
                    continue
                #Read file into df
                annual_df = pd.read_csv(file_path, skiprows=1)

                #Filter to only include years specified by user
                if year_list:
                    annual_df = annual_df[annual_df['Year'].isin(year_list)]

                #Check to see if variable is in .csv (ETr vs ETo)
                #SHOULD THIS Come FROM THE .ini?)
                if var not in annual_df.columns:
                    continue
                #Create Dataframe if it doesn't exist
                if df is None:
                    years = list(map(str, annual_df['Year']))
                    year_fieldnames = ['Year_' + y for y in years]
                    df = pd.DataFrame(index=unique_stations,
                                      columns=year_fieldnames)
                #Write data to each station row
                df.loc[station] = list(annual_df[var])

            #Add Column of Mean and Median of All Years
            #Check to see if variablie in .csv  ETr vs ETo
            # SHOULD THIS Come FROM THE .ini?
            if var not in annual_df.columns:
                continue

            # Median Fields
            median_fieldname = '{}_mdn'.format(var_name)
            output_df[median_fieldname] = df.median(axis=1)
            # Mean Fields
            mean_fieldname = '{}_mn'.format(var_name)
            output_df[mean_fieldname] = df.mean(axis=1)

        #Create station ID column from index (ETCells GRIDMET ID is int)
        output_df['Station'] = output_df.index.map(int)

        #Remove rows with Na (Is this the best option???)
        #Write all stations to index and then remove empty
        output_df = output_df.dropna()

        # Output file name
        out_name = "Crop_{:02d}_annual_stats.shp".format(crop)
        temp_name = "temp_annual.shp"

        # Copy ETCELLS.shp and join cropweighted data to it
        data = gpd.read_file(et_cells_path)

        # Data keep list (geometry is needed to write out as geodataframe)
        keep_list = [
            'geometry', 'GRIDMET_ID', 'LAT', 'LON', 'ELEV_M', 'ELEV_FT',
            'COUNTYNAME', 'STATENAME', 'STPO', 'HUC8', 'AG_ACRES',
            'CROP_{:02d}'.format(crop)
        ]

        # Filter ETCells using keep list
        data = data[keep_list]

        # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!!
        merged_data = data.merge(output_df,
                                 left_on='GRIDMET_ID',
                                 right_on='Station')
        # Remove redundant Station column
        merged_data = merged_data.drop(columns='Station')
        # Write output .shp
        merged_data.to_file(os.path.join(output_folder_path, out_name),
                            driver='ESRI Shapefile')
コード例 #6
0
def main(ini_path, time_agg, year_filter=''):
    """Read monthly summary files and create monthly, calendar year, and water year (oct-sep)
    summary files for each crop cell combination


    Args:
        ini_path (str): file path of the project INI file
        year_filter (list): only include certain years for summary
            (single YYYY or range YYYY:YYYY)
    Returns:
        None
    """

    # INI path
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'et_cells_path parameter must be set in the INI file, exiting')
        return False

    if time_agg == 'annual':
        print('\n Summarizing Annual Effective Precipitation Stats')
        ws = os.path.join(project_ws, r'annual_stats')
        date_var = 'Year'
    elif time_agg == 'wateryear':
        print('\n Summarizing Water Year Effective Precipitation Stats')
        ws = os.path.join(project_ws, r'monthly_stats')
        date_var = 'WY'
    else:
        print('\n Summarizing Monthly Effective Precipitation Stats')
        ws = os.path.join(project_ws, r'monthly_stats')
        date_var = 'Date'

    # Identify unique crops and station_ids in monthly_stats folder
    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I)
    # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$',
    #  re.I)

    # testing
    # monthly_ws = r"D:\upper_co_full\monthly_stats"
    # et_cells_path = os.path.join('D:\upper_co_full\gis','ETCells.shp')
    # etref_field = 'ETr_ASCE'

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(ws, f_name) for f_name in os.listdir(ws)
        if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No annual ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Start with empty lists
    stations = []
    crop_nums = []

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        # station, crop_num = os.path.splitext(file_name)[0].split(
        # '_daily_crop_')
        station, crop_num = os.path.splitext(file_name)[0].split('_crop_')
        stations.append(station)
        crop_num = int(crop_num)
        crop_nums.append(crop_num)

    # Find unique crops and station ids
    unique_crop_nums = sorted(list(set(crop_nums)))
    unique_stations = sorted(list(set(stations)))

    # Year Filter
    year_list = None
    if year_filter:
        try:
            year_list = sorted(list(util.parse_int_set(year_filter)))
        except:
            pass
    # Min/Max for file naming
    year_min = min(year_list)
    year_max = max(year_list)

    # Build full variable list for output order
    et_list = []
    for crop in unique_crop_nums:
        et_list.append('P_rz_{:02d}'.format(crop))
        et_list.append('P_rz_fraction_{:02d}'.format(crop))
        et_list.append('P_eft_{:02d}'.format(crop))
        et_list.append('P_eft_fraction_{:02d}'.format(crop))
    full_var_list = ['Station_ID', date_var] + ['PPT'] + et_list

    # Testing (cell with multiple crops)
    # unique_stations = [377392]
    # Loop through each station and crop list to build summary dataframes for
    print('\n Reading Data and Creating Effective PPT Files')

    out_df = pd.DataFrame(columns=full_var_list)
    for station in unique_stations:
        logging.info('\n Processing Station: {}'.format(station))
        loop_df = pd.DataFrame()
        for crop in unique_crop_nums:
            logging.info('\n Processing Crop: {:02d}'.format(crop))
            crop_vars_list = ['ETp_{:02d}'.format(crop), 'PPT', 'Season']
            # Initialize df variable to check if pandas df needs to be created
            # Build File Path
            file_path = os.path.join(
                ws, '{}_crop_{:02d}.csv'.format(station, crop))
            # Only process files that exists (crop/cell combinations)
            if not os.path.exists(file_path):
                logging.info('Crop not present in cell. Skipping')
                continue

            # Read file into df (skip header)
            df = pd.read_csv(file_path, skiprows=1)

            # Filter based on Year List
            if year_list:
                df = df[df['Year'].isin(year_list)]

            if time_agg == 'wateryear':
                # add water year column
                df['WY'] = df.Year.where(df.Month < 10, df.Year + 1)
                # groupby WY (sum); select PPT variables
                df = df[['PPT', 'P_rz', 'P_eft']].groupby(df.WY).sum()

                # calculate WY fractions
                df['P_rz_fraction'] = df.P_rz / df.PPT
                df['P_eft_fraction'] = df.P_eft / df.PPT
                df = df.reset_index()
                if year_list:
                    df = df[df['WY'].isin(year_list)]

            # Rename Columns to Match USBR Naming
            df = df.rename(
                {
                    'P_rz': 'P_rz_{:02d}'.format(crop),
                    'P_eft': 'P_eft_{:02d}'.format(crop),
                    'P_rz_fraction': 'P_rz_fraction_{:02d}'.format(crop),
                    'P_eft_fraction': 'P_eft_fraction_{:02d}'.format(crop)
                },
                axis='columns')
            # Add Station_ID column
            df['Station_ID'] = station

            # First pass create loop DF with PPT and Season
            if loop_df.empty:
                loop_df = df[[
                    'Station_ID', date_var, 'PPT', 'P_rz_{:02d}'.format(crop),
                    'P_rz_fraction_{:02d}'.format(crop),
                    'P_eft_{:02d}'.format(crop),
                    'P_eft_fraction_{:02d}'.format(crop)
                ]]
            else:
                # After df is built merge new ET data to existing df
                # Merge on both Station_ID and Date
                loop_df = loop_df.merge(df[[
                    'Station_ID', date_var, 'P_rz_{:02d}'.format(crop),
                    'P_rz_fraction_{:02d}'.format(crop),
                    'P_eft_{:02d}'.format(crop),
                    'P_eft_fraction_{:02d}'.format(crop)
                ]],
                                        left_on=['Station_ID', date_var],
                                        right_on=['Station_ID', date_var],
                                        how='outer')

        # Concat station_df to output df
        out_df = pd.concat([out_df, loop_df],
                           axis=0,
                           ignore_index=True,
                           sort=True)
        #        df = pd.concat([df,loop_df])
        out_df = out_df.fillna(-9999)

    output_ws = os.path.join(project_ws, 'effective_ppt_stats')
    if not os.path.exists(output_ws):
        os.makedirs(output_ws)

    output_path = os.path.join(
        output_ws, 'effective_ppt_{}_{}_{}.csv'.format(time_agg, year_min,
                                                       year_max))

    # Write Output File
    out_df.to_csv(output_path, sep=',', columns=full_var_list, index=False)
コード例 #7
0
def main(ini_path,
         zone_type='huc8',
         area_threshold=10,
         dairy_cuttings=5,
         beef_cuttings=4,
         crop_str='',
         remove_empty_flag=True,
         overwrite_flag=False,
         cleanup_flag=False):
    """Build a feature class for each crop and set default crop parameters

    Apply the values in the CropParams.txt as defaults to every cell

    Args:
        ini_path (str): file path of the project INI file
        zone_type (str): Zone type (huc8, huc10, county)
        area_threshold (float): CDL area threshold [acres]
        dairy_cuttings (int): Initial number of dairy hay cuttings
        beef_cuttings (int): Initial number of beef hay cuttings
        crop_str (str): comma separate list or range of crops to compare
        overwrite_flag (bool): If True, overwrite existing output rasters
        cleanup_flag (bool): If True, remove temporary files

    Returns:
        None
    """
    logging.info('\nCalculating ET-Demands Spatial Crop Parameters')

    remove_empty_flag = True

    # Input paths
    # DEADBEEF - For now, get cropET folder from INI file
    # This function may eventually be moved into the main cropET code
    config = util.read_ini(ini_path, section='CROP_ET')
    crop_et_sec = 'CROP_ET'
    project_ws = config.get(crop_et_sec, 'project_folder')
    gis_ws = config.get(crop_et_sec, 'gis_folder')
    cells_path = config.get(crop_et_sec, 'cells_path')
    # try: cells_path = config.get(crop_et_sec, 'cells_path')
    # except: cells_path = os.path.join(gis_ws, 'ETCells.shp')
    stations_path = config.get(crop_et_sec, 'stations_path')
    crop_et_ws = config.get(crop_et_sec, 'crop_et_folder')
    bin_ws = os.path.join(crop_et_ws, 'bin')

    try:
        template_ws = config.get(crop_et_sec, 'template_folder')
    except:
        template_ws = os.path.join(os.path.dirname(crop_et_ws), 'static')
    try:
        calibration_ws = config.get(crop_et_sec, 'spatial_cal_folder')
    except:
        calibration_ws = os.path.join(project_ws, 'calibration')

    # Sub folder names
    static_ws = os.path.join(project_ws, 'static')
    pmdata_ws = os.path.join(project_ws, 'pmdata')
    crop_params_path = os.path.join(static_ws, 'CropParams.txt')

    # Input units
    cell_elev_units = 'FEET'
    station_elev_units = 'FEET'

    # Field names
    cell_id_field = 'CELL_ID'
    cell_name_field = 'CELL_NAME'
    crop_acres_field = 'CROP_ACRES'
    dairy_cutting_field = 'Dairy_Cut'
    beef_cutting_field = 'Beef_Cut'

    # Only keep the following ET Cell fields
    keep_field_list = [cell_id_field, cell_name_field, 'AG_ACRES']
    # keep_field_list = ['NLDAS_ID', 'CELL_ID', 'HUC8', 'COUNTY', 'AG_ACRES']
    # keep_field_list = ['FIPS', 'COUNTY']

    # The maximum crop name was ~50 characters
    string_field_len = 50

    # Check input folders
    if not os.path.isdir(crop_et_ws):
        logging.error(('ERROR: The INI cropET folder ' +
                       'does not exist\n  {}').format(crop_et_ws))
        sys.exit()
    elif not os.path.isdir(bin_ws):
        logging.error('\nERROR: The Bin workspace {0} ' +
                      'does not exist\n'.format(bin_ws))
        sys.exit()
    elif not os.path.isdir(project_ws):
        logging.error(('ERROR: The project folder ' +
                       'does not exist\n  {}').format(project_ws))
        sys.exit()
    elif not os.path.isdir(gis_ws):
        logging.error(
            ('ERROR: The GIS folder ' + 'does not exist\n  {}').format(gis_ws))
        sys.exit()
    if '.gdb' not in calibration_ws and not os.path.isdir(calibration_ws):
        os.makedirs(calibration_ws)
    logging.info('\nGIS Workspace:      {0}'.format(gis_ws))
    logging.info('Project Workspace:  {0}'.format(project_ws))
    logging.info('CropET Workspace:   {0}'.format(crop_et_ws))
    logging.info('Bin Workspace:      {0}'.format(bin_ws))
    logging.info('Calib. Workspace:   {0}'.format(calibration_ws))

    # Check input files
    if not os.path.isfile(crop_params_path):
        logging.error('\nERROR: The crop parameters file {} ' +
                      'does not exist\n'.format(crop_params_path))
        sys.exit()
    elif not arcpy.Exists(cells_path):
        logging.error(('\nERROR: The ET Cell shapefile {} ' +
                       'does not exist\n').format(cells_path))
        sys.exit()
    elif not os.path.isfile(stations_path) or not arcpy.Exists(stations_path):
        logging.error(('ERROR: The NLDAS station shapefile ' +
                       'does not exist\n  %s').format(stations_path))
        sys.exit()
    logging.debug('Crop Params Path:   {0}'.format(crop_params_path))
    logging.debug('ET Cells Path:      {0}'.format(cells_path))
    logging.debug('Stations Path:      {0}'.format(stations_path))

    # For now, only allow calibration parameters in separate shapefiles
    ext = '.shp'
    # # Build output geodatabase if necessary
    # if calibration_ws.endswith('.gdb'):
    #     .debug('GDB Path:           {0}'.format(calibration_ws))
    #      = ''
    #      arcpy.Exists(calibration_ws) and overwrite_flag:
    #        try: arcpy.Delete_management(calibration_ws)
    #        except: pass
    #      calibration_ws is not None and not arcpy.Exists(calibration_ws):
    #        arcpy.CreateFileGDB_management(
    #            os.path.dirname(calibration_ws),
    #            os.path.basename(calibration_ws))
    # else:
    #      = '.shp'

    # Field Name, Property, Field Type
    # Property is the string of the CropParameter class property value
    # It will be used to access the property using getattr
    dairy_cutting_field = 'Dairy_Cut'
    beef_cutting_field = 'Beef_Cut'
    param_list = [
        # ['Name', 'name', 'STRING'],
        # ['ClassNum', 'class_number', 'LONG'],
        # ['IsAnnual', 'is_annual', 'SHORT'],
        # ['IrrigFlag', 'irrigation_flag', 'SHORT'],
        # ['IrrigDays', 'days_after_planting_irrigation', 'LONG'],
        # ['Crop_FW', 'crop_fw', 'LONG'],
        # ['WinterCov', 'winter_surface_cover_class', 'SHORT'],
        # ['CropKcMax', 'kc_max', 'FLOAT'],
        ['MAD_Init', 'mad_initial', 'LONG'],
        ['MAD_Mid', 'mad_midseason', 'LONG'],
        # ['RootDepIni', 'rooting_depth_initial', 'FLOAT'],
        # ['RootDepMax', 'rooting_depth_max', 'FLOAT'],
        # ['EndRootGrw', 'end_of_root_growth_fraction_time', 'FLOAT'],
        # ['HeightInit', 'height_initial', 'FLOAT'],
        # ['HeightMax', 'height_max', 'FLOAT'],
        # ['CurveNum', 'curve_number', 'LONG'],
        # ['CurveName', 'curve_name', 'STRING'],
        # ['CurveType', 'curve_type', 'SHORT'],
        # ['PL_GU_Flag', 'flag_for_means_to_estimate_pl_or_gu', 'SHORT'],
        ['T30_CGDD', 't30_for_pl_or_gu_or_cgdd', 'FLOAT'],
        ['PL_GU_Date', 'date_of_pl_or_gu', 'FLOAT'],
        ['CGDD_Tbase', 'tbase', 'FLOAT'],
        ['CGDD_EFC', 'cgdd_for_efc', 'LONG'],
        ['CGDD_Term', 'cgdd_for_termination', 'LONG'],
        ['Time_EFC', 'time_for_efc', 'LONG'],
        ['Time_Harv', 'time_for_harvest', 'LONG'],
        ['KillFrostC', 'killing_frost_temperature', 'Float'],
        # ['InvokeStrs', 'invoke_stress', 'SHORT'],
        # ['CN_Coarse', 'cn_coarse_soil', 'LONG'],
        # ['CN_Medium', 'cn_medium_soil', 'LONG'],
        # ['CN_Fine', 'cn_fine_soil', 'LONG']
    ]
    # if calibration_ws.endswith('.gdb'):
    #     _cutting_field = 'Dairy_Cuttings'
    #     _cutting_field = 'Beef_Cuttings'
    #     _list = [
    #        # ['Name', 'name', 'STRING'],
    #        # ['Class_Number', 'class_number', 'LONG'],
    #        # ['Is_Annual', 'is_annual', 'SHORT'],
    #        # ['Irrigation_Flag', 'irrigation_flag', 'SHORT'],
    #        # ['Irrigation_Days', 'days_after_planting_irrigation', 'LONG'],
    #        # ['Crop_FW', 'crop_fw', 'LONG'],
    #        # ['Winter_Cover_Class', 'winter_surface_cover_class', 'SHORT'],
    #        # ['Crop_Kc_Max', 'kc_max', 'FLOAT'],
    #        # ['MAD_Initial', 'mad_initial', 'LONG'],
    #        # ['MAD_Midseason', 'mad_midseason', 'LONG'],
    #        # ['Root_Depth_Ini', 'rooting_depth_initial', 'FLOAT'],
    #        # ['Root_Depth_Max', 'rooting_depth_max', 'FLOAT'],
    #        # ['End_Root_Growth', 'end_of_root_growth_fraction_time', 'FLOAT'],
    #        # ['Height_Initial', 'height_initial', 'FLOAT'],
    #        # ['Height_Maximum', 'height_max', 'FLOAT'],
    #        # ['Curve_Number', 'curve_number', 'LONG'],
    #        # ['Curve_Name', 'curve_name', 'STRING'],
    #        # ['Curve_Type', 'curve_type', 'SHORT'],
    #        # ['PL_GU_Flag', 'flag_for_means_to_estimate_pl_or_gu', 'SHORT'],
    #        ['T30_CGDD', 't30_for_pl_or_gu_or_cgdd', 'FLOAT'],
    #        ['PL_GU_Date', 'date_of_pl_or_gu', 'FLOAT'],
    #        ['CGDD_Tbase', 'tbase', 'FLOAT'],
    #        ['CGDD_EFC', 'cgdd_for_efc', 'LONG'],
    #        ['CGDD_Termination', 'cgdd_for_termination', 'LONG'],
    #        ['Time_EFC', 'time_for_efc', 'LONG'],
    #        ['Time_Harvest', 'time_for_harvest', 'LONG'],
    #        ['Killing_Crost_C', 'killing_frost_temperature', 'Float'],
    #        # ['Invoke_Stress', 'invoke_stress', 'SHORT'],
    #        # ['CN_Coarse_Soil', 'cn_coarse_soil', 'LONG'],
    #        # ['CN_Medium_Soil', 'cn_medium_soil', 'LONG'],
    #        # ['CN_Fine_Soil', 'cn_fine_soil', 'LONG']
    #    ]

    # Allow user to subset crops and cells from INI
    try:
        crop_skip_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_skip_list'))))
    except:
        crop_skip_list = []
    try:
        crop_test_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_test_list'))))
    except:
        crop_test_list = []
    try:
        cell_skip_list = config.get(crop_et_sec, 'cell_skip_list').split(',')
        cell_skip_list = sorted([c.strip() for c in cell_skip_list])
    except:
        cell_skip_list = []
    try:
        cell_test_list = config.get(crop_et_sec, 'cell_test_list').split(',')
        cell_test_list = sorted([c.strip() for c in cell_test_list])
    except:
        cell_test_list = []

    # Overwrite INI crop list with user defined values
    # Could also append to the INI crop list
    if crop_str:
        try:
            crop_test_list = sorted(list(util.parse_int_set(crop_str)))
        # try:
        #     crop_test_list = sorted(list(set(
        #         crop_test_list + list(util.parse_int_set(crop_str)))
        except:
            pass
    # Don't build crop parameter files for non-crops
    crop_skip_list = sorted(
        list(set(crop_skip_list + [44, 45, 46, 55, 56, 57])))

    # crop_test_list = sorted(list(set(crop_test_list + [46])))
    logging.debug('\ncrop_test_list = {0}'.format(crop_test_list))
    logging.debug('crop_skip_list = {0}'.format(crop_skip_list))
    logging.debug('cell_test_list = {0}'.format(cell_test_list))
    logging.debug('cell_test_list = {0}'.format(cell_test_list))

    # Read crop parameters using ET Demands functions/methods
    logging.info('\nReading Default Crop Parameters')
    sys.path.append(bin_ws)
    import crop_parameters
    crop_param_dict = crop_parameters.read_crop_parameters(crop_params_path)

    # arcpy.CheckOutExtension('Spatial')
    # arcpy.env.pyramid = 'NONE 0'
    arcpy.env.overwriteOutput = overwrite_flag
    arcpy.env.parallelProcessingFactor = 8

    # Get list of crops specified in ET cells
    # Currently this may only be crops with CDL acreage
    crop_field_list = [
        field.name for field in arcpy.ListFields(cells_path)
        if re.match('CROP_\d{2}', field.name)
    ]
    logging.debug('Cell crop fields: {}'.format(', '.join(crop_field_list)))
    crop_number_list = [
        int(f_name.split('_')[1]) for f_name in crop_field_list
    ]
    crop_number_list = [
        crop_num for crop_num in crop_number_list
        if not ((crop_test_list and crop_num not in crop_test_list) or
                (crop_skip_list and crop_num in crop_skip_list))
    ]
    logging.info('Cell crop numbers: {}'.format(', '.join(
        list(util.ranges(crop_number_list)))))

    # Get crop acreages for each cell
    crop_acreage_dict = defaultdict(dict)
    field_list = [cell_id_field] + crop_field_list
    with arcpy.da.SearchCursor(cells_path, field_list) as cursor:
        for row in cursor:
            for i, crop_num in enumerate(crop_number_list):
                crop_acreage_dict[crop_num][row[0]] = row[i + 1]

    # Make an empty template crop feature class
    logging.info('')
    crop_template_path = os.path.join(calibration_ws, 'crop_00_template' + ext)
    if overwrite_flag and arcpy.Exists(crop_template_path):
        logging.debug('Overwriting template crop feature class')
        arcpy.Delete_management(crop_template_path)
    if arcpy.Exists(crop_template_path):
        logging.info('Template crop feature class already exists, skipping')
    else:
        logging.info('Building template crop feature class')
        arcpy.CopyFeatures_management(cells_path, crop_template_path)

        # Remove unneeded et cell fields
        for field in arcpy.ListFields(crop_template_path):
            if (field.name not in keep_field_list and field.editable
                    and not field.required):
                logging.debug('  Delete field: {0}'.format(field.name))
                arcpy.DeleteField_management(crop_template_path, field.name)
        field_list = [f.name for f in arcpy.ListFields(crop_template_path)]

        # Add crop acreage field
        if crop_acres_field not in field_list:
            logging.debug('  Add field: {0}'.format(crop_acres_field))
            arcpy.AddField_management(crop_template_path, crop_acres_field,
                                      'Float')
            arcpy.CalculateField_management(crop_template_path,
                                            crop_acres_field, '0',
                                            'PYTHON_9.3')

        # Add crop parameter fields if necessary
        for param_field, param_method, param_type in param_list:
            logging.debug('  Add field: {0}'.format(param_field))
            if param_field not in field_list:
                arcpy.AddField_management(crop_template_path, param_field,
                                          param_type)
        # if dairy_cutting_field not in field_list:
        #     .debug('  Add field: {0}'.format(dairy_cutting_field))
        #     .AddField_management(crop_template_path, dairy_cutting_field, 'Short')
        #     .CalculateField_management(
        #        crop_template_path, dairy_cutting_field, dairy_cuttings, 'PYTHON')
        # if beef_cutting_field not in field_list:
        #     .debug('  Add field: {0}'.format(beef_cutting_field))
        #     .AddField_management(crop_template_path, beef_cutting_field, 'Short')
        #     .CalculateField_management(
        #        crop_template_path, beef_cutting_field, beef_cuttings, 'PYTHON')

    # Add an empty/zero crop field for the field mappings below
    # if len(arcpy.ListFields(cells_path, 'CROP_EMPTY')) == 0:
    #     .AddField_management(cells_path, 'CROP_EMPTY', 'Float')
    #     .CalculateField_management(
    #        cells_path, 'CROP_EMPTY', '0', 'PYTHON_9.3')

    # Process each crop
    logging.info('\nBuild crop feature classes')
    for crop_num in crop_number_list:
        try:
            crop_param = crop_param_dict[crop_num]
        except:
            continue
        logging.info('{0:>2d} {1}'.format(crop_num, crop_param))
        # Replace other characters with spaces, then remove multiple spaces
        crop_name = re.sub('[-"().,/~]', ' ', str(crop_param.name).lower())
        crop_name = ' '.join(crop_name.strip().split()).replace(' ', '_')
        crop_path = os.path.join(
            calibration_ws,
            'crop_{0:02d}_{1}{2}'.format(crop_num, crop_name, ext))
        crop_field = 'CROP_{0:02d}'.format(crop_num)

        # Skip if all zone crop areas are below threshold
        if all(
            [v < area_threshold
             for v in crop_acreage_dict[crop_num].values()]):
            logging.info('  All crop acreaeges below threshold, skipping crop')
            continue

        # Remove existing shapefiles if necessary
        if overwrite_flag and arcpy.Exists(crop_path):
            logging.debug('  Overwriting: {}'.format(
                os.path.basename(crop_path)))
            arcpy.Delete_management(crop_path)

        # Don't check skip list until after existing files are removed
        # if ((crop_test_list and crop_num not in crop_test_list) or
        #     _skip_list and crop_num in crop_skip_list)):
        #     .debug('  Skipping')
        #

        # Copy ET cells for each crop if needed
        if arcpy.Exists(crop_path):
            logging.debug('  Shapefile already exists, skipping')
            continue
        else:
            # logging.debug('    {0}'.format(crop_path))
            arcpy.Copy_management(crop_template_path, crop_path)
            # Remove extra fields
            # for field in arcpy.ListFields(crop_path):
            #      field.name not in keep_field_list:
            #        # logging.debug('    {0}'.format(field.name))
            #        arcpy.DeleteField_management(crop_path, field.name)

        # Add alfalfa cutting field
        if crop_num in [1, 2, 3, 4]:
            if len(arcpy.ListFields(crop_path, dairy_cutting_field)) == 0:
                logging.debug('  Add field: {0}'.format(dairy_cutting_field))
                arcpy.AddField_management(crop_path, dairy_cutting_field,
                                          'Short')
                arcpy.CalculateField_management(crop_path, dairy_cutting_field,
                                                dairy_cuttings, 'PYTHON')
            if len(arcpy.ListFields(crop_path, beef_cutting_field)) == 0:
                logging.debug('  Add field: {0}'.format(beef_cutting_field))
                arcpy.AddField_management(crop_path, beef_cutting_field,
                                          'Short')
                arcpy.CalculateField_management(crop_path, beef_cutting_field,
                                                beef_cuttings, 'PYTHON')

        # Write default crop parameters to file
        field_list = [p[0]
                      for p in param_list] + [cell_id_field, crop_acres_field]
        with arcpy.da.UpdateCursor(crop_path, field_list) as cursor:
            for row in cursor:
                # Skip and/or remove zones without crop acreage
                if crop_acreage_dict[crop_num][row[-2]] < area_threshold:
                    if remove_empty_flag:
                        cursor.deleteRow()
                    continue
                # Write parameter values
                for i, (param_field, param_method,
                        param_type) in enumerate(param_list):
                    row[i] = getattr(crop_param, param_method)
                # Write crop acreage
                row[-1] = crop_acreage_dict[crop_num][row[-2]]
                cursor.updateRow(row)
コード例 #8
0
def main(ini_path, time_filter, start_doy, end_doy, year_filter=''):
    """Create Median NIWR Shapefiles from annual_stat files

    Args:
        ini_path (str): file path of the project INI file
        year_filter (list): only include certain years for summary
            (single YYYY or range YYYY:YYYY)
        time_filter (str): 'annual', 'growing_season', 'doy'
        start_doy (int): starting julian doy (inclusive)
        end_doy (int): ending julian doy (inclusive)
    Returns:
        None
    """
    #  INI path
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        gis_ws = config.get('CROP_ET', 'gis_folder')
    except:
        logging.error(
            'gis_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'et_cells_path parameter must be set in the INI file, exiting')
        return False

    try:
        daily_output_path = config.get('CROP_ET', 'daily_output_folder')
    except:
        logging.error('ERROR: daily_output_folder ' +
                      'parameter is not set inINI file')
        sys.exit()

    try:
        etref_field = config.get('REFET', 'etref_field')
    except:
        logging.error(
            'etref_field parameter must be set in the INI file, exiting')
        return False

    # elevation units (look up elevation field units. include if present in et cell .shp)
    try:
        station_elev_units = config.get('CROP_ET', 'elev_units')
    except:
        logging.error('elev_units must be set in crop_et section of INI file, '
                      'exiting')

    # Year Filter
    year_list = None
    if year_filter:
        try:
            year_list = sorted(list(util.parse_int_set(year_filter)))
            # logging.info('\nyear_list = {0}'.format(year_list))
        except:
            pass

    # Sub folder names
    daily_ws = os.path.join(project_ws, daily_output_path)
    output_ws = os.path.join(project_ws, 'summary_shapefiles')
    if not os.path.exists(output_ws):
        os.makedirs(output_ws)

    # Check input folders
    if not os.path.exists(daily_ws):
        logging.critical('ERROR: The daily_stat folder does not exist.'
                         ' Check .ini settings')
        sys.exit()

    # Check input folders
    if not os.path.isdir(project_ws):
        logging.critical(('ERROR: The project folder ' +
                          'does not exist\n  {}').format(project_ws))
        sys.exit()
    elif not os.path.isdir(gis_ws):
        logging.critical(
            ('ERROR: The GIS folder ' + 'does not exist\n  {}').format(gis_ws))
        sys.exit()
    logging.info('\nGIS Workspace:      {0}'.format(gis_ws))

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I)
    #data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I)

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(daily_ws, f_name) for f_name in os.listdir(daily_ws)
        if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No daily files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Start with empty lists
    stations = []
    crop_nums = []

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        # logging.info('  {0}'.format(file_name))

        #station, crop_num = os.path.splitext(file_name)[0].split('_daily_crop_')
        station, crop_num = os.path.splitext(file_name)[0].split('_crop_')
        stations.append(station)
        crop_num = int(crop_num)
        crop_nums.append(crop_num)

    # Find unique crops and station ids
    unique_crop_nums = list(set(crop_nums))
    unique_stations = list(set(stations))

    # Loop through each crop and station list to build summary dataframes for
    # variables to include in output (if not in .csv skip)
    var_list = [
        'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irrigation', 'Runoff',
        'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz', 'P_eft'
    ]
    pmet_field = 'PM{}'.format(etref_field)
    var_list.insert(0, pmet_field)

    # Arc fieldnames can only be 10 characters. Shorten names to include _stat
    # field name list will be based on etref_field ETr, ETo, or ET (not ETo/ETr)
    if 'ETR' in pmet_field.upper():
        var_fieldname_list = [
            'ETr', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr',
            'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz',
            'P_eft'
        ]

    elif 'ETO' in pmet_field.upper():
        var_fieldname_list = [
            'ETo', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr',
            'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz',
            'P_eft'
        ]

    else:
        var_fieldname_list = [
            'ET', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT', 'Irr',
            'Runoff', 'DPerc', 'NIWR', 'Season', 'Start', 'End', 'P_rz',
            'P_eft'
        ]

    # Testing (should this be an input option?)
    # unique_crop_nums = [86]
    # unique_stations = [608807]
    print('\nCreating summary shapefiles.')
    if year_list:
        logging.info('\nIncluding years {} to {}.'.format(
            min(year_list), max(year_list)))
    # Apply Time Filter (annual, etd growing season, doy (start/end))
    if time_filter == 'annual':
        logging.info('\nIncluding January-December data.')
    if time_filter == 'growing_season':
        logging.info(
            '\nFiltering data using ETDemands defined growing season.')
    if time_filter == 'doy':
        logging.info('\nFiltering data using doy inputs. Start doy: {:03d}'
                     '  End doy: {:03d}'.format(start_doy, end_doy))
    if time_filter == 'wateryear':
        logging.info('\nSummarizing data by water year (Oct-Sept).')

    for crop in unique_crop_nums:
        print('\nProcessing Crop: {:02d}'.format(crop))

        # Initialize df variable to check if pandas df needs to be created
        output_df = None
        for station in unique_stations:
            #Build File Path
            file_path = os.path.join(daily_ws, '{}_crop_{:02d}.csv').format(
                station, crop)
            # Only process files that exists (crop/cell combinations)
            if not os.path.exists(file_path):
                continue

            # Read file into df
            daily_df = pd.read_csv(file_path, skiprows=1)
            # Add more DOY columns to simplify start/end DOY agg below
            daily_df['Start'] = daily_df.DOY.copy()
            daily_df['End'] = daily_df.DOY.copy()
            # Replace Non-growing season DOY values with nan
            daily_df.loc[daily_df.Season == 0, ['Start', 'End']] = np.nan

            # Apply Year Filter (inclusive)
            if year_list:
                daily_df = daily_df[(daily_df['Year'] >= min(year_list))
                                    & (daily_df['Year'] <= max(year_list))]
                # logging.info('Including Years: {}'.format(year_list))

            # Apply Time Filter (annual, etd growing season, doy (start/end))
            if time_filter == 'growing_season':
                daily_df = daily_df[(daily_df['Season'] == 1)]
            if time_filter == 'doy':
                daily_df = daily_df[(daily_df['DOY'] >= start_doy)
                                    & (daily_df['DOY'] <= end_doy)]
            if time_filter == 'wateryear':
                daily_df['WY'] = daily_df.Year.where(daily_df.Month < 10,
                                                     daily_df.Year + 1)
                if year_list:
                    daily_df = daily_df[daily_df['WY'].isin(year_list)]

            if daily_df.empty:
                logging.info(' Growing Season never started. Skipping cell {}'
                             ' for crop {}.'.format(station, crop))
                continue

            # Dictionary to control agg of each variable
            a = {
                'ETact': 'sum',
                'ETpot': 'sum',
                'ETbas': 'sum',
                'PPT': 'sum',
                'Irrigation': 'sum',
                'Runoff': 'sum',
                'DPerc': 'sum',
                'NIWR': 'sum',
                'Season': 'sum',
                'Start': 'min',
                'End': 'max',
                'Kc': 'mean',
                'Kcb': 'mean',
                'P_rz': 'sum',
                'P_eft': 'sum'
            }
            # Add etref_field to dictionary
            a[pmet_field] = 'sum'

            # GroupStats by Year of each column follow agg assignment above
            if time_filter == 'wateryear':
                yearlygroup_df = daily_df.groupby('WY', as_index=True).agg(a)
            else:
                yearlygroup_df = daily_df.groupby('Year', as_index=True).agg(a)

            if time_filter == 'annual' or time_filter == 'wateryear':
                yearlygroup_df[
                    'P_rz_fraction'] = yearlygroup_df.P_rz / yearlygroup_df.PPT
                yearlygroup_df[
                    'P_eft_fraction'] = yearlygroup_df.P_eft / yearlygroup_df.PPT
                var_list = [
                    'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT',
                    'Irrigation', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start',
                    'End', 'P_rz', 'P_eft', 'P_rz_fraction', 'P_eft_fraction'
                ]
                pmet_field = 'PM{}'.format(etref_field)
                var_list.insert(0, pmet_field)
                if 'ETR' in pmet_field.upper():
                    var_fieldname_list = [
                        'ETr', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT',
                        'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start',
                        'End', 'P_rz', 'P_eft', 'Prz_F', 'Peft_F'
                    ]

                elif 'ETO' in pmet_field.upper():
                    var_fieldname_list = [
                        'ETo', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT',
                        'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start',
                        'End', 'P_rz', 'P_eft', 'Prz_F', 'Peft_F'
                    ]

                else:
                    var_fieldname_list = [
                        'ET', 'ETact', 'ETpot', 'ETbas', 'Kc', 'Kcb', 'PPT',
                        'Irr', 'Runoff', 'DPerc', 'NIWR', 'Season', 'Start',
                        'End', 'P_rz', 'P_eft', 'Prz_F', 'Peft_F'
                    ]

            # print(var_list)
            # Take Mean of Yearly GroupStats
            mean_df = yearlygroup_df.mean(axis=0)
            mean_fieldnames = [v + '_mn' for v in var_fieldname_list]

            # Take Median of Yearly GroupStats
            median_df = yearlygroup_df.median(axis=0)
            median_fieldnames = [v + '_mdn' for v in var_fieldname_list]

            # Create df if it doesn't exist
            if output_df is None:
                output_df = pd.DataFrame(index=unique_stations,
                                         columns=mean_fieldnames +
                                         median_fieldnames)

            # Write data to each station row
            output_df.loc[station] = list(mean_df[var_list]) + \
                              list(median_df[var_list])

            # Cast summary objects to floats
            output_df = output_df.astype(float)

            # Grab min/max year for output folder naming
            if time_filter == 'wateryear':
                min_year = min(daily_df['WY'])
                max_year = max(daily_df['WY'])
            else:
                min_year = min(daily_df['Year'])
                max_year = max(daily_df['Year'])

        # Create station ID column from index (ETCells GRIDMET ID is int)
        output_df['Station'] = output_df.index.map(int)

        # Remove rows with Na (is this the best option?)
        # Write all stations to index and then remove empty
        output_df = output_df.dropna()

        # Output file name
        out_name = "{}_crop_{:02d}.shp".format(time_filter, crop)
        if time_filter == 'doy':
            out_name = "{}_{:03d}_{:03d}_crop_{:02d}.shp".format(
                time_filter, start_doy, end_doy, crop)

        # output folder
        if time_filter == 'wateryear':
            output_folder_path = os.path.join(
                output_ws, 'summary_WY{}to{}'.format(min_year, max_year))
        else:
            output_folder_path = os.path.join(
                output_ws, 'summary_{}to{}'.format(min_year, max_year))

        if min_year == max_year:
            if time_filter == 'wateryear':
                output_folder_path = os.path.join(
                    output_ws, 'summary_WY{}'.format(min_year))
            else:
                output_folder_path = os.path.join(
                    output_ws, 'summary_{}'.format(min_year))

        if not os.path.exists(output_folder_path):
            os.makedirs(output_folder_path)

        # Copy ETCELLS.shp and join summary data to it
        data = gpd.read_file(et_cells_path)

        # Data keep list (geometry is needed to write out as geodataframe)
        # keep_list = ['geometry','CELL_ID', 'LAT', 'LON', 'ELEV_M', 'ELEV_FT',
        #              'COUNTYNAME', 'STATENAME', 'STPO', 'HUC8',
        #              'AG_ACRES', 'CROP_{:02d}'.format(crop)]

        if station_elev_units.upper() in ['FT', 'FEET']:
            station_elev_field = 'ELEV_FT'
        elif station_elev_units.upper() in ['M', 'METERS']:
            station_elev_field = 'ELEV_M'

        # Elevation field is included if found in et_cell .shp
        try:
            keep_list = [
                'geometry', 'CELL_ID', 'LAT', 'LON', station_elev_field,
                'AG_ACRES', 'CROP_{:02d}'.format(crop)
            ]
            # Filter ETCells using keep list
            data = data[keep_list]
        except:
            logging.info(
                'Elevation field not found in et_cell .shp. Not including elevation in output.'
            )
            keep_list = [
                'geometry', 'CELL_ID', 'LAT', 'LON', 'AG_ACRES',
                'CROP_{:02d}'.format(crop)
            ]
            # Filter ETCells using keep list
            data = data[keep_list]

        # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!!
        merged_data = data.merge(output_df,
                                 left_on='CELL_ID',
                                 right_on='Station')
        # Remove redundant Station column
        merged_data = merged_data.drop(columns='Station')
        # Write output .shp
        merged_data.to_file(os.path.join(output_folder_path, out_name),
                            driver='ESRI Shapefile')
コード例 #9
0
def main(ini_path, time_agg, year_filter=''):

    """Read monthly summary files and create monthly, calendar year, and water year (oct-sep)
    summary files for each crop cell combination


    Args:
        ini_path (str): file path of the project INI file
        year_filter (list): only include certain years for summary
            (single YYYY or range YYYY:YYYY)
    Returns:
        None
    """

    # INI path
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'et_cells_path parameter must be set in the INI file, exiting')
        return False

    if time_agg == 'annual':
        print('\nSummarizing Annual Effective Precipitation')
        ws = os.path.join(project_ws, r'annual_stats')
        date_var = 'Year'
    elif time_agg == 'wateryear':
        print('\nSummarizing Water Year Effective Precipitation')
        ws = os.path.join(project_ws, r'monthly_stats')
        date_var = 'WY'
    # else:
    #     print('\nSummarizing Monthly Effective Precipitation')
    #     ws = os.path.join(project_ws, r'monthly_stats')
    #     date_var = 'Date'

    # Identify unique crops and station_ids in monthly_stats folder
    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I)
    # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$',
    #  re.I)

    # testing
    # monthly_ws = r"D:\upper_co_full\monthly_stats"
    # et_cells_path = os.path.join('D:\upper_co_full\gis','ETCells.shp')
    # etref_field = 'ETr_ASCE'

    # Build list of all data files
    data_file_list = sorted(
        [os.path.join(ws, f_name) for f_name in os.listdir(ws)
         if data_re.match(f_name)])
    if not data_file_list:
        logging.error(
            '  ERROR: No annual ET files were found\n' +
            '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Start with empty lists
    stations = []
    crop_nums = []

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        # station, crop_num = os.path.splitext(file_name)[0].split(
        # '_daily_crop_')
        station, crop_num = os.path.splitext(file_name)[0].split('_crop_')
        stations.append(station)
        crop_num = int(crop_num)
        crop_nums.append(crop_num)

    # Find unique crops and station ids
    unique_crop_nums = sorted(list(set(crop_nums)))
    unique_stations = sorted(list(set(stations)))

    # Year Filter
    year_list = None
    if year_filter:
        try:
            year_list = sorted(list(util.parse_int_set(year_filter)))
        except:
            pass



    print('\n Reading Data and Creating Effective PPT Plots')
    for station in unique_stations:
        logging.info('\n Processing Station: {}'.format(station))
        for crop in unique_crop_nums:
            logging.info('\n Processing Crop: {:02d}'.format(crop))
            crop_vars_list = ['ETp_{:02d}'.format(crop), 'PPT', 'Season']
            # Initialize df variable to check if pandas df needs to be created
            # Build File Path
            file_path = os.path.join(ws,
                                     '{}_crop_{:02d}.csv'.format(station,
                                                                 crop))
            # Only process files that exists (crop/cell combinations)
            if not os.path.exists(file_path):
                logging.info('Crop not present in cell. Skipping')
                continue

            # Read file into df (skip header)
            df = pd.read_csv(file_path, skiprows=1)
            # print(df.head())
            df.set_index('Year', inplace=True)

            # Filter based on Year List
            if year_list:
                df = df[df.index.isin(year_list)]

            # Min/Max for file naming
            year_min = min(df.index)
            year_max = max(df.index)

            if time_agg == 'wateryear':
                # add water year column
                df['WY'] = df.index.where(df.Month < 10, df.index + 1)
                # groupby WY (sum); select PPT variables
                df = df[['PPT', 'DPerc', 'P_rz', 'P_eft']].groupby(df.WY).sum()


                # calculate WY fractions
                df['P_rz_fraction'] = df.P_rz / df.PPT
                df['P_eft_fraction'] = df.P_eft / df.PPT
                df = df.reset_index()
                df.set_index('WY', inplace=True)
                if year_list:
                    df = df[df['WY'].isin(year_list)]
                else:
                    print('Removing first and last year of dataset to avoid partial water year totals.')
                    df = df.iloc[:-1]  # drop last year
                    df = df.iloc[1:]  # drop first year

            # print(df.head())
            # sys.exit()

            fig, ax1 = plt.subplots(1, 1, figsize=(16, 5))
            ax2 = ax1.twinx() # Create another axes that shares the same x-axis as ax.


            ax1.plot(df.index, df.P_rz_fraction, color='k', linestyle='-', lw=1.5, label='P_rz')
            ax1.plot(df.index, df.P_eft_fraction, color='mediumblue', lw=1.5, linestyle='-',
                     label='P_eft')
            ax1.set_ylabel('Fraction of Effectiveness', size=12)

            ax2.bar(df.index, df.PPT, color='lightgrey', label='PPT')
            ax2.bar(df.index, df.DPerc, color='darkgrey', label='DPerc')
            ax2.set_ylabel('Annual Precipitation/Deep Percolation', size=12)

            ax1.axes.set_title('Cell: {}, Crop: {:02d}'.format(station, crop, time_agg), fontsize=12, color="black", alpha=1)

            fig.legend()
            ax1.set_zorder(1)  # default zorder is 0 for ax1 and ax2
            ax1.patch.set_visible(False)  # prevents ax1 from hiding ax2

            output_ws = os.path.join(project_ws, 'effective_ppt_plots', '{}'.format(time_agg))
            if not os.path.exists(output_ws):
                os.makedirs(output_ws)

            output_path = os.path.join(output_ws, '{}_crop_{:02d}_{}_{}_{}.png'.format(station, crop, time_agg,
                                                                                     year_min, year_max))
            # print(output_path)
            fig.savefig(output_path, dpi=300)
            plt.close()
コード例 #10
0
def main(ini_path, year_filter=''):
    """Restructure monthly summary .csv files into a single .csv
    for input into USBR indicator method spreadsheet/workflow.


    Args:
        ini_path (str): file path of the project INI file
        year_filter (list): only include certain years for summary
            (single YYYY or range YYYY:YYYY)
    Returns:
        None
    """

    # INI path
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'et_cells_path parameter must be set in the INI file, exiting')
        return False

    monthly_ws = os.path.join(project_ws, r'monthly_stats')

    # Identify unique crops and station_ids in monthly_stats folder
    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I)
    # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$',
    #  re.I)

    # testing
    # monthly_ws = r"D:\upper_co_full\monthly_stats"
    # et_cells_path = os.path.join('D:\upper_co_full\gis','ETCells.shp')
    # etref_field = 'ETr_ASCE'

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(monthly_ws, f_name) for f_name in os.listdir(monthly_ws)
        if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No annual ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Start with empty lists
    stations = []
    crop_nums = []

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        # station, crop_num = os.path.splitext(file_name)[0].split(
        # '_daily_crop_')
        station, crop_num = os.path.splitext(file_name)[0].split('_crop_')
        stations.append(station)
        crop_num = int(crop_num)
        crop_nums.append(crop_num)

    # Find unique crops and station ids
    unique_crop_nums = sorted(list(set(crop_nums)))
    unique_stations = sorted(list(set(stations)))

    # Year Filter
    year_list = None
    if year_filter:
        try:
            year_list = sorted(list(util.parse_int_set(year_filter)))
        except:
            pass
    # Min/Max for file naming
    year_min = min(year_list)
    year_max = max(year_list)

    # Built full variable list for output order
    et_list = []
    for crop in unique_crop_nums:
        et_list.append('ETp_{:02d}'.format(crop))
        et_list.append('Season_{:02d}'.format(crop))
    full_var_list = ['Station_ID', 'Date'] + et_list + ['PPT']

    # Testing (cell with multiple crops)
    # unique_stations = [377392]
    # Loop through each station and crop list to build summary dataframes for
    print('\n Reading Data and Restructuring to USBR Format')

    df = pd.DataFrame(columns=full_var_list)
    for station in unique_stations:
        logging.info('\n Processing Station: {}'.format(station))
        loop_df = pd.DataFrame()
        for crop in unique_crop_nums:
            logging.info('\n Processing Crop: {:02d}'.format(crop))
            crop_vars_list = ['ETp_{:02d}'.format(crop), 'PPT', 'Season']
            # Initialize df variable to check if pandas df needs to be created
            # Build File Path
            file_path = os.path.join(
                monthly_ws, '{}_crop_{:02d}.csv'.format(station, crop))
            # Only process files that exists (crop/cell combinations)
            if not os.path.exists(file_path):
                logging.info('Crop not present in cell. Skipping')
                continue

            # Read file into df (skip header)
            monthly_df = pd.read_csv(file_path, skiprows=1)
            # Filter based on Year List
            if year_list:
                monthly_df = monthly_df[monthly_df['Year'].isin(year_list)]

            # Rename Columns to Match USBR Naming
            monthly_df = monthly_df.rename(
                {
                    'ETact': 'ETp_{:02d}'.format(crop),
                    'Season': 'Season_{:02d}'.format(crop)
                },
                axis='columns')
            # Add Station_ID column
            monthly_df['Station_ID'] = station

            # First pass create loop DF with PPT and Season
            if loop_df.empty:
                loop_df = monthly_df[[
                    'Station_ID', 'Date', 'ETp_{:02d}'.format(crop),
                    'Season_{:02d}'.format(crop), 'PPT'
                ]]
            else:
                # After df is built merge new ET data to existing df
                # Merge on both Station_ID and Date
                loop_df = loop_df.merge(monthly_df[[
                    'Station_ID', 'Date', 'ETp_{:02d}'.format(crop),
                    'Season_{:02d}'.format(crop)
                ]],
                                        left_on=['Station_ID', 'Date'],
                                        right_on=['Station_ID', 'Date'],
                                        how='outer')

            # Concat station_df to output df
        df = pd.concat([df, loop_df], axis=0, ignore_index=True, sort=True)
        #        df = pd.concat([df,loop_df])
        df = df.fillna(-9999)

    output_ws = os.path.join(project_ws, 'indicatormethod_restructure')
    if not os.path.exists(output_ws):
        os.makedirs(output_ws)

    output_path = os.path.join(
        output_ws, 'ETp_Monthly_{}_{}.csv'.format(year_min, year_max))

    # Write Output File
    df.to_csv(output_path, sep=',', columns=full_var_list, index=False)
コード例 #11
0
def main(ini_path,
         show_flag=False,
         save_flag=True,
         full_size=(3.2, 4.0),
         sub_size=(6.5, 8.0),
         full_dpi=300,
         sub_dpi=200,
         simplify_tol=None,
         states_flag=False):
    """Plot future statistic maps

    For now, data is stored in excel files in stats_tables folder

    Args:
        ini_path (str): file path of the project INI file
        show_flag (bool): if True, show figure
        save_flag (bool): if True, save figure to disk
        figure_size (tuple): figure size in inches (width, height)
        figure_dpi (int): figure dots per inch
        simplify_tol (float):
        states_flag (bool): if True, draw state boundaries

    Returns:
        None
    """
    image_ext = 'png'

    period_list = [2020, 2050, 2080]
    scenario_list = [5, 25, 50, 75, 95]

    full_value_list = ['et', 'eto', 'niwr', 'ppt', 'q', 'rs', 'tmean', 'u']
    sub_value_list = ['et', 'eto', 'niwr', 'ppt', 'q', 'rs', 'tmean', 'u']
    sub_delta_list = ['et', 'eto', 'niwr', 'ppt', 'q', 'rs', 'tmean', 'u']

    delta_type = {
        'et': 'percent',
        'eto': 'percent',
        'niwr': 'percent',
        'ppt': 'percent',
        'q': 'percent',
        'rs': 'percent',
        'tmean': 'delta',
        'u': 'percent'
    }

    # Adjust data type names in output files
    output_var = {
        'area': 'area',
        'et': 'et',
        'eto': 'eto',
        'niwr': 'niwr',
        'ppt': 'ppt',
        'q': 'q',
        'rs': 'rs',
        'tmean': 'tmean',
        'u': 'wind'
    }

    # Figure caption text
    value_text = {
        'area': 'Crop Area [acres]',
        'et': 'Evapotranspiration [mm]',
        'eto': 'Reference ET [mm/year]',
        'niwr': 'Net Irrigation Water\nRequirement [mm]',
        'ppt': 'Precipitation [mm]',
        'q': 'Specific Humidity [kg/kg]',
        'rs': 'Solar Radiation [W/m^2]',
        'tmean': 'Mean Temperature [C]',
        'u': 'Wind speed [m/s]'
    }
    delta_text = {
        'et': 'Evapotranspiration\nPercent Change [%]',
        'eto': 'Reference ET\nPercent Change [%]',
        'niwr': 'NIWR\nPercent Change [%]',
        'ppt': 'Precipitation\nPercent Change [%]',
        'q': 'Specific Humidity\nPercent Change [%]',
        'rs': 'Solar Radiation\nPercent Change [%]',
        'tmean': 'Mean Temperature\nDelta [C]',
        'u': 'Wind speed\nPercent Change [%]'
    }

    # Colormap
    cmap_names = {
        'area': {
            'value': 'white_green'
        },
        'et': {
            'value': 'blue_red',
            'delta': ['red_white', 'white_blue']
        },
        'eto': {
            'value': 'blue_red',
            'delta': ['red_white', 'white_blue']
        },
        'niwr': {
            'value': 'blue_red',
            'delta': ['blue_white', 'white_red']
        },
        'ppt': {
            'value': 'red_blue',
            'delta': ['red_white', 'white_blue']
        },
        'q': {
            'value': 'blue_red',
            'delta': ['red_white', 'white_blue']
        },
        'rs': {
            'value': 'blue_red',
            'delta': ['red_white', 'white_blue']
        },
        'tmean': {
            'value': 'blue_red',
            'delta': ['red_white', 'white_blue']
        },
        'u': {
            'value': 'blue_red',
            'delta': ['red_white', 'white_blue']
        }
    }

    # Round values/deltas to next multiple of this amount
    round_factor = {
        'area': {
            'value': 1000
        },
        'et': {
            'value': 1,
            'delta': 1
        },
        'eto': {
            'value': 1,
            'delta': 1
        },
        'niwr': {
            'value': 1,
            'delta': 1
        },
        'ppt': {
            'value': 1,
            'delta': 1
        },
        'q': {
            'value': 0.0001,
            'delta': 1
        },
        'rs': {
            'value': 1,
            'delta': 1
        },
        'tmean': {
            'value': 1,
            'delta': 1
        },
        'u': {
            'value': 1,
            'delta': 1
        }
    }

    # ET Cells field names
    cell_id_field = 'CELL_ID'
    ag_acres_field = 'AG_ACRES'

    # Excel file parameters
    # BasinID should not be in the file name
    full_table_fmt = '{basin_id}_base_{var}.xlsx'
    full_value_tab = 'Sheet1'
    sub_table_fmt = '{basin_id}_future_{var}.xlsx'
    sub_value_tab = 'Values'
    sub_delta_tabs = {'delta': 'Difference', 'percent': 'Percent Difference'}
    table_id_field = 'HUC'
    period_field = 'Period'
    scenario_fields = {
        5: '5th percentile',
        25: '25th percentile',
        50: 'Median',
        75: '75th percentile',
        95: '95th percentile'
    }

    # Draw state boundaries on figures
    states_path = 'Z:\USBR_Ag_Demands_Project\CAT_Basins\common\gis\states\cb_2014_us_state_500k_albers.shp'
    states_field = 'NAME'
    # states_path = 'Z:\USBR_Ag_Demands_Project\CAT_Basins\common\gis\states\state_nrcs_a_mbr_albers.shp'
    # states_field = 'STATENAME'

    # full_table_re = re.compile(
    #     '(?P<basin_id>\w+)_base_(?P<var>\w+).xlsx', re.I)
    # sub_table_re = re.compile(
    #     '(?P<basin_id>\w+)_future_(?P<var>\w+).xlsx', re.I)

    # font = matplotlib.font_manager.FontProperties(
    #     ='Comic Sans MS', weight='semibold', size=8)
    # font = matplotlib.font_manager.FontProperties(
    #     family='Tahoma', weight='semibold', size=label_size)
    # font = matplotlib.font_manager.FontProperties(
    #     ='Consolas', weight='semibold', size=7)

    # Check that the INI file can be read
    logging.info('\nGenerate crop summary maps from daily data')
    logging.info('  INI: {}'.format(ini_path))
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get the project workspace and daily ET folder from the INI file
    def get_config_param(config, param_name, section):
        """"""
        try:
            param_value = config.get(section, param_name)
        except:
            logging.error(('ERROR: The {} parameter is not set' +
                           ' in the INI file').format(param_name))
            sys.exit()
        return param_value

    basin_id = get_config_param(config, 'basin_id', crop_et_sec)
    project_ws = get_config_param(config, 'project_folder', crop_et_sec)
    cells_path = get_config_param(config, 'cells_path', crop_et_sec)

    stats_ws = os.path.join(project_ws, 'stats_tables')
    output_ws = os.path.join(project_ws, 'stats_maps')

    # Check workspaces
    if not os.path.isdir(stats_ws):
        logging.error(('\nERROR: The stats tables folder {0} ' +
                       'could be found\n').format(stats_ws))
        sys.exit()
    if not os.path.isfile(cells_path):
        logging.error(('\nERROR: The cells shapefile {0} ' +
                       'could be found\n').format(cells_path))
        sys.exit()
    if not os.path.isdir(output_ws):
        os.makedirs(output_ws)

    # Read ET Cells into memory with fiona and shapely
    # Convert multi-polygons to list of polygons
    logging.info('\nReading ET cells shapefile')
    cell_geom_dict = read_cell_geometries(cells_path, cell_id_field,
                                          simplify_tol)
    cell_extent = read_cell_extent(cells_path)
    if not cell_geom_dict:
        logging.error('  ET Cell shapefile not read in, exiting')
        return False

    # Read in state geometries
    state_geom_dict = {}
    if states_flag:
        logging.info('\nReading state shapefile')
        try:
            state_geom_dict = read_cell_geometries(states_path, states_field)
        except:
            logging.error('  State geometries not read in, ignoring')

        # Remove state features that don't intersect the cells extent
        for k, geom_list in state_geom_dict.items():
            geom_list = [
                g for g in geom_list
                if extents_overlap(list(g.bounds), cell_extent)
            ]
            if geom_list:
                state_geom_dict[k] = geom_list
            else:
                del state_geom_dict[k]

    # Keyword arguments to plotting functions
    full_kwargs = {
        'table_id_field': table_id_field,
        'scenario_field': scenario_fields[50],
        'state_geom_dict': state_geom_dict,
        'cell_geom_dict': cell_geom_dict,
        'cell_extent': cell_extent,
        'figure_size': full_size,
        'figure_dpi': full_dpi,
        'save_flag': save_flag,
        'show_flag': show_flag
    }
    sub_kwargs = {
        'period_list': period_list,
        'scenario_list': scenario_list,
        'table_id_field': table_id_field,
        'period_field': period_field,
        'scenario_fields': scenario_fields,
        'state_geom_dict': state_geom_dict,
        'cell_geom_dict': cell_geom_dict,
        'cell_extent': cell_extent,
        'figure_size': sub_size,
        'figure_dpi': sub_dpi,
        'save_flag': save_flag,
        'show_flag': show_flag
    }

    # Plot the crop area
    var = 'area'
    logging.info('\nVariable: {}'.format(var))
    cell_area_dict = read_cell_data(cells_path, cell_id_field, ag_acres_field)
    # Convert the crop area dictionary to dataframe even though it
    #   immediatly gets converted back to a dict in full_plot()
    # For simplicity, set column names to match excel file column names
    cell_area_df = pd.DataFrame(cell_area_dict.items(),
                                columns=[table_id_field, scenario_fields[50]])
    full_plot(os.path.join(
        output_ws, 'fullplot_{}_value.{}'.format(output_var[var], image_ext)),
              cell_area_df,
              caption=value_text[var],
              cmap_name=cmap_names[var]['value'],
              v_min=0,
              v_max=max(cell_area_dict.values()),
              **full_kwargs)

    # Build master type list
    type_list = sorted(set(full_value_list + sub_value_list + sub_delta_list))

    # Read in all tables
    for var in type_list:
        logging.info('\nVariable: {}'.format(var))
        full_table_name = full_table_fmt.format(basin_id=basin_id, var=var)
        logging.info('  {}'.format(full_table_name))
        full_value_df = pd.read_excel(os.path.join(stats_ws, full_table_name),
                                      sheetname=full_value_tab,
                                      skiprows=1)
        full_value_df[table_id_field] = full_value_df[table_id_field].astype(
            'str')
        logging.debug('  {}'.format(full_value_tab))
        logging.debug(str(full_value_df.head()) + '\n')

        sub_table_name = sub_table_fmt.format(basin_id=basin_id, var=var)
        logging.info('  {}'.format(sub_table_name))
        sub_value_df = pd.read_excel(os.path.join(stats_ws, sub_table_name),
                                     sheetname=sub_value_tab,
                                     skiprows=1)
        sub_value_df[table_id_field] = sub_value_df[table_id_field].astype(
            'str')
        logging.debug('  {}'.format(sub_value_tab))
        logging.debug(str(sub_value_df.head()) + '\n')

        # Switch tabs
        sub_delta_df = pd.read_excel(os.path.join(stats_ws, sub_table_name),
                                     sheetname=sub_delta_tabs[delta_type[var]],
                                     skiprows=1)
        sub_delta_df[table_id_field] = sub_delta_df[table_id_field].astype(
            'str')
        logging.debug('  {}'.format(sub_delta_tabs[delta_type[var]]))
        logging.debug(str(sub_delta_df.head()) + '\n')

        # Build colorbar ranges
        logging.info('\n  Computing colorbar ranges')

        # Calculate min/max value
        # DEADBEEF - Make this a separate function
        f = scenario_fields.values()
        full_value_min = min(full_value_df[f].values.flatten())
        full_value_max = max(full_value_df[f].values.flatten())
        sub_value_min = min(sub_value_df[f].values.flatten())
        sub_value_max = max(sub_value_df[f].values.flatten())
        sub_delta_min = min(sub_delta_df[f].values.flatten())
        sub_delta_max = max(sub_delta_df[f].values.flatten())

        # Adjust very small negative min deltas
        # if delta_min_negative_override < sub_delta_min < 0:
        #     sub_delta_min = delta_min_negative_override

        # Calculate min/max for value and delta
        full_value_round_min = myround(full_value_min, 'floor',
                                       round_factor[var]['value'])
        full_value_round_max = myround(full_value_max, 'ceil',
                                       round_factor[var]['value'])
        sub_value_round_min = myround(sub_value_min, 'floor',
                                      round_factor[var]['value'])
        sub_value_round_max = myround(sub_value_max, 'ceil',
                                      round_factor[var]['value'])
        sub_delta_round_min = myround(sub_delta_min, 'floor',
                                      round_factor[var]['delta'])
        sub_delta_round_max = myround(sub_delta_max, 'ceil',
                                      round_factor[var]['delta'])

        # Print min/max value
        logging.info('    Full Value Min: {0:>10.2f} {1:>10}'.format(
            full_value_min, full_value_round_min))
        logging.info('    Full Value Max: {0:>10.2f} {1:>10}'.format(
            full_value_max, full_value_round_max))
        logging.info('    Sub Value Min:  {0:>10.2f} {1:>10}'.format(
            sub_value_min, sub_value_round_min))
        logging.info('    Sub Value Max:  {0:>10.2f} {1:>10}'.format(
            sub_value_max, sub_value_round_max))
        logging.info('    Sub Delta Min:  {0:>10.2f} {1:>10}'.format(
            sub_delta_min, sub_delta_round_min))
        logging.info('    Sub Delta Max:  {0:>10.2f} {1:>10}'.format(
            sub_delta_max, sub_delta_round_max))

        # Min/Max values will be the same across fullplots and subplots
        match_colorbar_flag = True
        if match_colorbar_flag:
            full_value_round_min = min(full_value_round_min,
                                       sub_value_round_min)
            full_value_round_max = max(full_value_round_max,
                                       sub_value_round_max)
            sub_value_round_min = min(full_value_round_min,
                                      sub_value_round_min)
            sub_value_round_max = max(full_value_round_max,
                                      sub_value_round_max)

        # Build full value plots
        if var in full_value_list:
            output_name = 'fullplot_{}_value.{}'.format(
                output_var[var], image_ext)
            output_path = os.path.join(output_ws, output_name)
            full_plot(output_path,
                      full_value_df,
                      caption=value_text[var],
                      cmap_name=cmap_names[var]['value'],
                      v_min=full_value_round_min,
                      v_max=full_value_round_max,
                      **full_kwargs)

        # Build sub value plots
        if var in sub_value_list:
            output_name = 'subplot_{}_value.{}'.format(output_var[var],
                                                       image_ext)
            output_path = os.path.join(output_ws, output_name)
            sub_plot(output_path,
                     sub_value_df,
                     caption=value_text[var],
                     cmap_name=cmap_names[var]['value'],
                     v_min=sub_value_round_min,
                     v_max=sub_value_round_max,
                     **sub_kwargs)

        # Build sub delta plots
        if var in sub_delta_list:
            output_name = 'subplot_{}_delta.{}'.format(output_var[var],
                                                       image_ext)
            output_path = os.path.join(output_ws, output_name)
            sub_plot(output_path,
                     sub_delta_df,
                     caption=delta_text[var],
                     cmap_name=cmap_names[var]['delta'],
                     v_min=sub_delta_round_min,
                     v_max=sub_delta_round_max,
                     **sub_kwargs)
コード例 #12
0
def main(ini_path,
         start_date=None,
         end_date=None,
         crop_str='',
         overwrite_flag=False):
    """Compuate Growing Season Statistics

    Args:
        ini_path (str): file path of the project INI file
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare
        overwrite_flag (bool): If True, overwrite existing files

    Returns:
        None
    """

    # Field names
    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field   = 'Day'
    season_field = 'Season'

    # Output file/folder names
    gs_summary_name = 'growing_season_full_summary.csv'
    gs_mean_annual_name = 'growing_season_mean_annual.csv'
    baddata_name = 'growing_season_bad_data.txt'

    # Number of header lines in data file
    # header_lines = 2

    # Delimiter
    sep = ','
    # sep = r"\s*"

    logging.info('\nComputing growing season statistics')
    logging.info('  INI: {}'.format(ini_path))

    # Check that the INI file can be read
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get the project workspace and daily ET folder from the INI file
    def get_config_param(config, param_name, section):
        """"""
        try:
            param_value = config.get(section, param_name)
        except:
            logging.error(('ERROR: The {} parameter is not set' +
                           ' in the INI file').format(param_name))
            sys.exit()
        return param_value

    project_ws = get_config_param(config, 'project_folder', crop_et_sec)
    daily_stats_ws = os.path.join(
        project_ws, get_config_param(config, 'daily_output_folder',
                                     crop_et_sec))
    gs_stats_ws = os.path.join(
        project_ws, get_config_param(config, 'gs_output_folder', crop_et_sec))

    # Check workspaces
    if not os.path.isdir(daily_stats_ws):
        logging.error(('\nERROR: The daily ET stats folder {0} ' +
                       'could be found\n').format(daily_stats_ws))
        sys.exit()
    if not os.path.isdir(gs_stats_ws):
        os.mkdir(gs_stats_ws)

    # Range of data to plot
    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # Allow user to subset crops from INI
    try:
        crop_skip_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_skip_list'))))
    except:
        crop_skip_list = []
        # crop_skip_list = [44, 45, 46, 55, 56, 57]
    try:
        crop_test_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_test_list'))))
    except:
        crop_test_list = []
    # Overwrite INI crop list with user defined values
    # Could also append to the INI crop list
    if crop_str:
        try:
            crop_test_list = list(util.parse_int_set(crop_str))
        # try:
        #     crop_test_list = sorted(list(set(
        #         crop_test_list + list(util.parse_int_set(crop_str)))
        except:
            pass
    logging.debug('\n  crop_test_list = {0}'.format(crop_test_list))
    logging.debug('  crop_skip_list = {0}'.format(crop_skip_list))

    # Output file paths
    gs_summary_path = os.path.join(gs_stats_ws, gs_summary_name)
    gs_mean_annual_path = os.path.join(gs_stats_ws, gs_mean_annual_name)
    baddata_path = os.path.join(gs_stats_ws, baddata_name)

    # Build list of site files
    # site_file_re = '^RG\d{8}ETca.dat$'
    # site_file_list = sorted([item for item in os.listdir(workspace)
    #                         if re.match(site_file_re, item)])
    # site_file_list = sorted([
    #     item for item in os.listdir(daily_stats_ws)
    #     if re.match('\w+_daily_crop_\d{2}.csv$', item)])

    # Initialize output data arrays and open bad data log file
    gs_summary_data = []
    gs_mean_annual_data = []
    baddata_file = open(baddata_path, 'w')

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I)

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(daily_stats_ws, f_name)
        for f_name in os.listdir(daily_stats_ws) if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No daily ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        station, crop_num = os.path.splitext(file_name)[0].split(
            '_daily_crop_')
        crop_num = int(crop_num)
        logging.debug('    Station:         {0}'.format(station))
        logging.debug('    Crop Num:        {0}'.format(crop_num))
        if station == 'temp':
            logging.debug('      Skipping')
            continue

        # Get crop name
        with open(file_path, 'r') as file_f:
            crop_name = file_f.readline().split('-', 1)[1].strip()
            logging.debug('    Crop:            {0}'.format(crop_name))

        # Read data from file into record array (structured array)
        daily_df = pd.read_table(file_path, header=0, comment='#', sep=sep)
        logging.debug('    Fields: {0}'.format(', '.join(
            daily_df.columns.values)))
        daily_df[date_field] = pd.to_datetime(daily_df[date_field])
        daily_df.set_index(date_field, inplace=True)
        daily_df[year_field] = daily_df.index.year
        # daily_df[year_field] = daily_df[date_field].map(lambda x: x.year)

        # Build list of unique years
        year_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    All Years: {0}'.format(', '.join(
            list(util.ranges(year_array.tolist())))))
        # logging.debug('    All Years: {0}'.format(
        #    ','.join(map(str, year_array.tolist()))))

        # Don't include the first year in the stats
        crop_year_start = min(daily_df[year_field])
        logging.debug('    Skipping {}, first year'.format(crop_year_start))
        daily_df = daily_df[daily_df[year_field] > crop_year_start]

        # Check if start and end years have >= 365 days
        crop_year_start = min(daily_df[year_field])
        crop_year_end = max(daily_df[year_field])
        if sum(daily_df[year_field] == crop_year_start) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]
        if sum(daily_df[year_field] == crop_year_end) < 365:
            logging.debug(
                '    Skipping {}, missing days'.format(crop_year_end))
            daily_df = daily_df[daily_df[year_field] < crop_year_end]
        del crop_year_start, crop_year_end

        # Only keep years between year_start and year_end
        if year_start:
            daily_df = daily_df[daily_df[year_field] >= year_start]
        if year_end:
            daily_df = daily_df[daily_df[year_field] <= year_end]

        year_sub_array = np.sort(
            np.unique(np.array(daily_df[year_field]).astype(np.int)))
        logging.debug('    Plot Years: {0}'.format(', '.join(
            list(util.ranges(year_sub_array.tolist())))))
        # logging.debug('    Plot Years: {0}'.format(
        #    ','.join(map(str, year_sub_array.tolist()))))

        # Get separate date related fields
        date_array = daily_df.index.date
        year_array = daily_df[year_field].values.astype(np.int)
        doy_array = daily_df[doy_field].values.astype(np.int)

        # Remove leap days
        # leap_array = (doy_array == 366)
        # doy_sub_array = np.delete(doy_array, np.where(leap_array)[0])

        # Build separate arrays for each set of crop specific fields
        season_array = np.array(daily_df[season_field])

        # # Original code from growing_season script
        # Initialize mean annual growing season length variables
        gs_sum, gs_cnt, gs_mean = 0, 0, 0
        start_sum, start_cnt, start_mean = 0, 0, 0
        end_sum, end_cnt, end_mean = 0, 0, 0

        # Process each year
        for year_i, year in enumerate(year_sub_array):
            year_crop_str = "Crop: {0:2d} {1:32s}  Year: {2}".format(
                crop_num, crop_name, year)
            logging.debug(year_crop_str)

            # Extract data for target year
            year_mask = (year_array == year)
            date_sub_array = date_array[year_mask]
            doy_sub_array = doy_array[year_mask]
            season_sub_mask = season_array[year_mask]

            # Look for transitions in season value
            # Start transitions up the day before the actual start
            # End transitions down on the end date
            try:
                start_i = np.where(np.diff(season_sub_mask) == 1)[0][0] + 1
            except:
                start_i = None
            try:
                end_i = np.where(np.diff(season_sub_mask) == -1)[0][0]
            except:
                end_i = None

            # If start transition is not found, season starts on DOY 1
            if start_i is None and end_i is not None:
                start_i = 0
            # If end transition is not found, season ends on DOY 365/366
            elif start_i is not None and end_i is None:
                end_i = -1
            # If neither transition is found, season is always on
            # elif start_i is None and end_i is None:
            #     , end_i = 0, -1

            # Calculate start and stop day of year
            # Set start/end to 0 if season never gets set to 1
            if not np.any(season_sub_mask):
                skip_str = "  Skipping, season flag was never set to 1"
                logging.debug(skip_str)
                baddata_file.write('{0}  {1} {2}\n'.format(
                    station, year_crop_str, skip_str))
                start_doy, end_doy = 0, 0
                start_date, end_date = "", ""
            elif np.all(season_sub_mask):
                start_doy, end_doy = doy_sub_array[0], doy_sub_array[-1]
                start_date = date_sub_array[0].isoformat()
                end_date = date_sub_array[-1].isoformat()
            else:
                start_doy, end_doy = doy_sub_array[start_i], doy_sub_array[
                    end_i]
                start_date = date_sub_array[start_i].isoformat()
                end_date = date_sub_array[end_i].isoformat()
            gs_length = sum(season_sub_mask)
            logging.debug("Start: {0} ({1})  End: {2} ({3})".format(
                start_doy, start_date, end_doy, end_date))

            # Track growing season length and mean annual g.s. length
            if start_doy > 0 and end_doy > 0 and year_i != 0:
                start_sum += start_doy
                end_sum += end_doy
                gs_sum += gs_length
                start_cnt += 1
                end_cnt += 1
                gs_cnt += 1

            # Append data to list
            gs_summary_data.append([
                station, crop_num, crop_name, year, start_doy, end_doy,
                start_date, end_date, gs_length
            ])

            # Cleanup
            del year_mask, doy_sub_array, season_sub_mask
            del start_doy, end_doy, start_date, end_date, gs_length

        # Calculate mean annual growing season start/end/length
        if gs_cnt > 0:
            mean_start_doy = int(round(float(start_sum) / start_cnt))
            mean_end_doy = int(round(float(end_sum) / end_cnt))
            mean_length = int(round(float(gs_sum) / gs_cnt))
            mean_start_date = util.doy_2_date(year, mean_start_doy)
            mean_end_date = util.doy_2_date(year, mean_end_doy)
        else:
            mean_start_doy, mean_end_doy, mean_length = 0, 0, 0
            mean_start_date, mean_end_date = "", ""

        # Append mean annual growing season data to list
        gs_mean_annual_data.append([
            station, crop_num, crop_name, mean_start_doy, mean_end_doy,
            mean_start_date, mean_end_date, mean_length
        ])

        # Cleanup
        del season_array
        del gs_sum, gs_cnt, gs_mean
        del start_sum, start_cnt, start_mean
        del end_sum, end_cnt, end_mean
        del mean_start_doy, mean_end_doy, mean_length
        del mean_start_date, mean_end_date
        del year_array, year_sub_array, doy_array
        del daily_df
        logging.debug("")

    # Close bad data file log
    baddata_file.close()

    # Build output record array file
    gs_summary_csv = csv.writer(open(gs_summary_path, 'wb'))
    gs_summary_csv.writerow([
        'STATION', 'CROP_NUM', 'CROP_NAME', 'YEAR', 'START_DOY', 'END_DOY',
        'START_DATE', 'END_DATE', 'GS_LENGTH'
    ])
    gs_summary_csv.writerows(gs_summary_data)

    # Build output record array file
    gs_mean_annual_csv = csv.writer(open(gs_mean_annual_path, 'wb'))
    gs_mean_annual_csv.writerow([
        'STATION', 'CROP_NUM', 'CROP_NAME', 'MEAN_START_DOY', 'MEAN_END_DOY',
        'MEAN_START_DATE', 'MEAN_END_DATE', 'MEAN_GS_LENGTH'
    ])
    gs_mean_annual_csv.writerows(gs_mean_annual_data)

    # Cleanup
    del gs_summary_path, gs_summary_name
    del gs_summary_csv, gs_summary_data
    del gs_mean_annual_path, gs_mean_annual_name
    del gs_mean_annual_csv, gs_mean_annual_data
コード例 #13
0
def main(ini_path, time_filter, start_doy, end_doy, year_filter=''):
    """Create Median NIWR Shapefiles from annual_stat files

    Args:
        ini_path (str): file path of the project INI file
        year_filter (list): only include certain years for summary
            (single YYYY or range YYYY:YYYY)
        time_filter (str): 'annual', 'growing_season', 'doy'
        start_doy (int): starting julian doy (inclusive)
        end_doy (int): ending julian doy (inclusive)
    Returns:
        None
    """

    # INI path
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        gis_ws = config.get('CROP_ET', 'gis_folder')
    except:
        logging.error(
            'gis_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'et_cells_path parameter must be set in the INI file, exiting')
        return False

    try:
        daily_output_path = config.get('CROP_ET', 'daily_output_folder')
    except:
        logging.error('ERROR: daily_output_folder ' +
                      'parameter is not set inINI file')
        sys.exit()

    # Year Filter
    year_list = None
    if year_filter:
        try:
            year_list = sorted(list(util.parse_int_set(year_filter)))
        except:
            pass

    # Sub folder names
    daily_ws = os.path.join(project_ws, daily_output_path)
    output_ws = os.path.join(project_ws, 'cropweighted_shapefiles')
    if not os.path.exists(output_ws):
        os.makedirs(output_ws)

    # Check input folders
    if not os.path.exists(daily_ws):
        logging.critical('ERROR: The daily_stat folder does not exist.'
                         ' Check .ini settings')
        sys.exit()

    # Check input folders
    if not os.path.isdir(project_ws):
        logging.critical(('ERROR: The project folder ' +
                          'does not exist\n  {}').format(project_ws))
        sys.exit()
    elif not os.path.isdir(gis_ws):
        logging.critical(
            ('ERROR: The GIS folder ' + 'does not exist\n  {}').format(gis_ws))
        sys.exit()
    logging.info('\nGIS Workspace:      {0}'.format(gis_ws))

    # Create Output folder if it doesn't exist
    output_folder_path = os.path.join(project_ws, 'cropweighted_shapefiles')
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I)
    # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$',
    #  re.I)

    # Build list of all data files
    data_file_list = sorted([
        os.path.join(daily_ws, f_name) for f_name in os.listdir(daily_ws)
        if data_re.match(f_name)
    ])
    if not data_file_list:
        logging.error('  ERROR: No daily ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    cells = read_shapefile(et_cells_path)

    # Start with empty lists
    stations = []
    crop_nums = []

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        # station, crop_num = os.path.splitext(file_name)[0].split(
        # '_daily_crop_')
        station, crop_num = os.path.splitext(file_name)[0].split('_crop_')
        stations.append(station)
        crop_num = int(crop_num)
        crop_nums.append(crop_num)

    # Find unique crops and station ids
    unique_crop_nums = list(set(crop_nums))
    unique_stations = list(set(stations))

    # Testing (should this be an input option?)
    # unique_crop_nums = [86]
    # unique_stations = [608807]

    # Variables to calculate output statistics
    var_list = ['ETact', 'NIWR', 'P_rz', 'P_eft', 'PrzF', 'PeftF']

    logging.info('\nCreating Crop Area Weighted Shapefiles')
    # Apply Time Filter (annual, etd growing season, doy (start/end))
    if time_filter == 'annual':
        logging.info('\nSummarizing data based on calendar year.')
    if time_filter == 'growing_season':
        logging.info(
            '\nFiltering data using ETDemands defined growing season.')
    if time_filter == 'doy':
        logging.info('\nFiltering data using doy inputs. Start doy: {:03d} '
                     'End doy: {:03d}'.format(start_doy, end_doy))
    if time_filter == 'water_year':
        logging.info('\nSummarizing data based on water year.')

    for crop in unique_crop_nums:
        logging.info('\n Processing Crop: {:02d}'.format(crop))

        # Initialize df variable to check if pandas df needs to be created
        df = None
        for station in unique_stations:
            # Build File Path
            file_path = os.path.join(
                daily_ws, '{}_crop_{:02d}.csv'.format(station, crop))
            # Only process files that exists (crop/cell combinations)
            if not os.path.exists(file_path):
                continue

            # Read file into df
            daily_df = pd.read_csv(file_path, skiprows=1)

            # Apply Time Filter (annual, etd growing season, doy (start/end))
            if time_filter == 'growing_season':
                daily_df = daily_df[(daily_df['Season'] == 1)]
            if time_filter == 'doy':
                daily_df = daily_df[(daily_df['DOY'] >= start_doy)
                                    & (daily_df['DOY'] <= end_doy)]
            if time_filter == 'water_year':
                daily_df['WY'] = daily_df.Year.where(daily_df.Month < 10,
                                                     daily_df.Year + 1)

            # Apply Year Filter (apply after adding water year)
            if year_list:
                if time_filter == 'water_year':
                    daily_df = daily_df[(daily_df['WY'] >= min(year_list))
                                        & (daily_df['WY'] <= max(year_list))]
                else:
                    daily_df = daily_df[(daily_df['Year'] >= min(year_list))
                                        & (daily_df['Year'] <= max(year_list))]
                    # logging.info('Including Years: {}'.format(year_list))

            if daily_df.empty:
                logging.info(' Growing Season never started. Skipping cell {}'
                             ' for crop {}.'.format(station, crop))
                continue

            # Dictionary to control agg of each variable
            a = {
                'ETact': 'sum',
                'NIWR': 'sum',
                'P_rz': 'sum',
                'P_eft': 'sum',
                'PPT': 'sum'
            }

            # GroupStats by Year of each column follow agg assignment above
            if time_filter == 'water_year':
                yearlygroup_df = daily_df.groupby('WY', as_index=True).agg(a)
            else:
                yearlygroup_df = daily_df.groupby('Year', as_index=True).agg(a)

            yearlygroup_df['PrzF'] = yearlygroup_df.P_rz / yearlygroup_df.PPT
            yearlygroup_df['PeftF'] = yearlygroup_df.P_eft / yearlygroup_df.PPT

            # Take Mean of Yearly GroupStats
            mean_df = yearlygroup_df.mean(axis=0)
            mean_fieldnames = [v + '_mn_{:02d}'.format(crop) for v in var_list]

            # Take Median of Yearly GroupStats
            median_df = yearlygroup_df.median(axis=0)
            median_fieldnames = [
                v + '_md_{:02d}'.format(crop) for v in var_list
            ]

            # Create Dataframe if it doesn't exist
            if df is None:
                df = pd.DataFrame(index=unique_stations,
                                  columns=mean_fieldnames + median_fieldnames)

            # Write data to each station row
            df.loc[station] = list(mean_df[var_list]) + list(
                median_df[var_list])

            # Cast summary objects to floats
            df = df.astype(float)

            # Grab min/max year for output folder naming
            # assumes all daily files cover same time period
            # year represents CY or WY based on time_filter
            min_year = min(year_list)
            max_year = max(year_list)

        # Convert index to integers
        df.index = df.index.map(int)

        # Remove rows with Na (Is this the best option???)
        df = df.dropna()

        # Merge Crop ETact and NIWR to cells dataframe
        cells = pd.merge(cells,
                         df,
                         how='left',
                         left_on=['CELL_ID'],
                         right_index=True)

    # Change Ag_Acres cells with zero area to nan (Avoid ZeroDivisionError)
    cells[cells['AG_ACRES'] == 0] = np.nan

    # Calculate CropArea Weighted ETact and NIWR for each cell
    # List Comprehension (all combinations of var_list and stat)
    # https://www.safaribooksonline.com/library/view/python-cookbook/0596001673/ch01s15.html
    for var, stat in [(var, stat) for var in var_list
                      for stat in ['mn', 'md']]:
        # initialize empty columns (zeros)
        cells['CW{0}_{1}'.format(var, stat)] = 0
        for crop in unique_crop_nums:
            # reset temp
            temp = []
            # calculate crop fraction of weighted rate
            temp = cells['CROP_{0:02d}'.format(crop)].multiply(
                cells['{0}_{1}_{2:02d}'.format(var, stat, crop)]).divide(
                    cells['AG_ACRES'])
            # replace nan with zero
            temp = temp.fillna(0)
            # add crop fraction to total calculate weighted rate
            cells['CW{0}_{1}'.format(var, stat)] = cells['CW{0}_{1}'.format(
                var, stat)].add(temp)

    # Subset to "Final" dataframe for merge to output .shp
    # final_df = cells[['GRIDMET_ID', 'CWETact_mn', 'CWNIWR_mn', 'CWETact_md',
    #                   'CWNIWR_md']]
    final_df = cells[[
        'CELL_ID', 'CWETact_mn', 'CWNIWR_mn', 'CWETact_md', 'CWNIWR_md',
        'CWPrzF_mn', 'CWPrzF_md', 'CWPeftF_mn', 'CWPeftF_md'
    ]]

    # Copy ETCELLS.shp and join cropweighted data to it
    data = gpd.read_file(et_cells_path)

    # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!!
    merged_data = data.merge(final_df, on='CELL_ID')

    # Output file name
    out_name = "{}_cropweighted.shp".format(time_filter)
    if time_filter == 'doy':
        out_name = "{}_{:03d}_{:03d}_cropweighted.shp".format(
            time_filter, start_doy, end_doy)

    # output folder
    output_folder_path = os.path.join(
        output_ws, 'cropweighted_{}to{}'.format(min_year, max_year))
    if min_year == max_year:
        output_folder_path = os.path.join(output_ws,
                                          'cropweighted_{}'.format(min_year))

    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
    # Write output .shp
    merged_data.to_file(os.path.join(output_folder_path, out_name))
コード例 #14
0
ファイル: meancutting_update.py プロジェクト: usbr/et-demands
def main(ini_path, start_yr=None, end_yr=None):
    """Update MeanCutting.txt file with cutting information from annual
    stat file. Updating the MeanCutting.txt occurs after an initial "test"
    run to determine the total number of cuttings in each cell for either crop
    02 of crop 03. The model should be re-run after updating the MeanCutting.txt
    to apply the new cutting numbers.

    Args:
        ini_path (str): file path of project INI file
        start_yr (int): YYYY
        end_yr(int): YYYY

    Returns:
        None
    """

    logging.info('\nUpdating Mean Cutting File')
    logging.info('  INI: {}'.format(ini_path))

    # Check that INI file can be read
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get project workspace and daily ET folder from INI file
    # project workspace can use old or new ini file
    try:
        project_ws = config.get('PROJECT', 'project_folder')
    except:
        try:
            project_ws = config.get(crop_et_sec, 'project_folder')
        except:
            logging.error('ERROR: project_folder ' +
                          'parameter is not set in INI file')
            sys.exit()

    def get_config_param(config, param_name, section):
        """"""
        try:
            param_value = config.get(section, param_name)
        except:
            logging.error(('ERROR: {} parameter is not set' +
                           ' in INI file').format(param_name))
            sys.exit()
        return param_value

    ann_stats_ws = os.path.join(
        project_ws,
        get_config_param(config, 'annual_output_folder', crop_et_sec))

    static_ws = os.path.join(project_ws, 'static')

    try:
        mean_cutting_name = config.get(crop_et_sec, 'cell_cuttings_name')
    except:
        logging.error('cell_cuttings_name  must be set in the INI file, '
                      'exiting')
        sys.exit()

    mean_cuttings_path = os.path.join(static_ws, mean_cutting_name)

    # Check workspaces
    if not os.path.isdir(ann_stats_ws):
        logging.error(('\nERROR: Annual ET stats folder {0} ' +
                       'could be found\n').format(ann_stats_ws))
        sys.exit()

    # Range of data to use
    try:
        year_start = start_yr
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = end_yr
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End Year cannot be less than start year.\n')
        sys.exit()

    if year_start and year_end:
        logging.info('\nFiltering Cutting Statistic to include data from'
                     ' {}-{}.'.format(start_yr, end_yr))

    # Loop through annual result files and update cutting numbers for
    # both crop02 and crop03 in MeanCuttings.txt file (static folder)
    cutting_crop_list = ['02', '03']
    cutting_fieldname_list = ['Number Dairy', 'Number Beef']

    # initialize mean_cutting_df
    mean_cutting_df = []

    for crop, cuttingname in zip(cutting_crop_list, cutting_fieldname_list):
        logging.info('  Processiong Crop: {}, Cutting Field: {}'.format(
            crop, cuttingname))
        mean_cutting_df = pd.read_csv(mean_cuttings_path,
                                      skiprows=[0],
                                      sep='\t')
        mean_cutting_df.set_index(['ET Cell ID'], inplace=True)
        # convert index to str to match handle all cell ID data types
        mean_cutting_df.index = mean_cutting_df.index.map(str)
        # print(mean_cutting_df.head())
        for index, row in mean_cutting_df.iterrows():
            # cell_id = row['ET Cell ID']
            # Handle both str and float/int inputs and remove .0 decimal
            # https://docs.python.org/release/2.7.3/library/stdtypes.html#boolean-operations-and-or-not
            cell_id = (str(index)[-2:] == '.0' and str(index)[:-2]
                       or str(index))
            # print(cell_id)

            stats_path = os.path.join(ann_stats_ws,
                                      '{}_crop_{}.csv'.format(cell_id, crop))
            # print(stats_path)
            if os.path.exists(stats_path):
                stat_df = pd.read_csv(stats_path,
                                      usecols=['Cutting', 'Year'],
                                      skiprows=[0])
            else:
                logging.debug('\nCrop {} not present in cell {}. Not updating '
                              'cuttings information.'.format(crop, cell_id))
                continue

            # Filter df based on start and end year (if given)
            if year_start and year_end:
                stat_df = stat_df.loc[(stat_df.Year >= year_start)
                                      & (stat_df.Year <= year_end)]

            # take average of all years (round down to nearest int)
            avg_cutting = int(stat_df.Cutting.mean())
            # round up to 1 if avg is < 1
            if avg_cutting < 1:
                avg_cutting = 1
            # set cuttings value in output df
            mean_cutting_df.at[cell_id, cuttingname] = avg_cutting
            # print(mean_cutting_df.head())
    logging.info('\nUpdating MeanCuttings File: {}'.format(mean_cuttings_path))
    mean_cutting_df.to_csv(mean_cuttings_path, sep='\t')
    header_line = 'This file contains first (temporary) numbers of cutting ' \
                  'cycles for dairy and beef hay, based on latitude. ' \
                  'R.Allen 4/1/08\n'
    with open(mean_cuttings_path, 'r') as original:
        data = original.read()
    with open(mean_cuttings_path, 'w') as modified:
        modified.write(header_line + data)
コード例 #15
0
def main(ini_path,
         zone_type='huc8',
         area_threshold=10,
         dairy_cuttings=5,
         beef_cuttings=4,
         overwrite_flag=False,
         cleanup_flag=False):
    """Build static text files needed to run ET-Demands model

    Args:
        ini_path (str): file path of the project INI file
        zone_type (str): Zone type (huc8, huc10, county)
        area_threshold (float): CDL area threshold [acres]
        dairy_cuttings (int): Initial number of dairy hay cuttings
        beef_cuttings (int): Initial number of beef hay cuttings
        overwrite_flag (bool): If True, overwrite existing files
        cleanup_flag (bool): If True, remove temporary files

    Returns:
        None
    """
    logging.info('\nBuilding ET-Demands Static Files')

    # Input units
    cell_elev_units = 'FEET'
    station_elev_units = 'FEET'

    # Default values
    permeability = -999
    soil_depth = 60  # inches
    aridity = 50
    irrigation = 1
    crops = 85

    # Input paths
    # DEADBEEF - For now, get cropET folder from INI file
    # This function may eventually be moved into the main cropET code
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        gis_ws = config.get('CROP_ET', 'gis_folder')
    except:
        logging.error(
            'gis_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'cells_path parameter must be set in the INI file, exiting')
        return False
    try:
        stations_path = config.get('CROP_ET', 'stations_path')
    except:
        logging.error(
            'stations_path parameter must be set in the INI file, exiting')
        return False
    try:
        crop_et_ws = config.get('CROP_ET', 'crop_et_folder')
    except:
        logging.error(
            'crop_et_ws parameter must be set in the INI file, exiting')
        return False
    try:
        template_ws = config.get('CROP_ET', 'template_folder')
    except:
        template_ws = os.path.join(os.path.dirname(crop_et_ws), 'static')
        logging.info(
            ('\nStatic text file "template_folder" parameter was not set ' +
             'in the INI\n  Defaulting to: {}').format(template_ws))

    # Read data from geodatabase or shapefile
    # if '.gdb' in et_cells_path and not et_cells_path.endswith('.shp'):
    #     _flag = False
    #     _path = os.path.dirname(et_cells_path)
    #      gdb_path = r'D:\Projects\CAT_Basins\AltusOK\et-demands_py\et_demands.gdb'
    #     _cells_path = os.path.join(gdb_path, 'et_cells')

    # Output sub-folder names
    static_ws = os.path.join(project_ws, 'static')

    # Weather station shapefile
    # Generate by selecting the target NLDAS 4km cell intersecting each HUC
    station_id_field = 'NLDAS_ID'
    if zone_type == 'huc8':
        station_zone_field = 'HUC8'
    elif zone_type == 'huc10':
        station_zone_field = 'HUC10'
    elif zone_type == 'county':
        station_zone_field = 'COUNTYNAME'
        # station_zone_field = 'FIPS_C'
    station_lat_field = 'LAT'
    station_lon_field = 'LON'
    if station_elev_units.upper() in ['FT', 'FEET']:
        station_elev_field = 'ELEV_FT'
    elif station_elev_units.upper() in ['M', 'METERS']:
        station_elev_field = 'ELEV_M'
    # station_elev_field = 'ELEV_FT'

    # Field names
    cell_lat_field = 'LAT'
    # cell_lon_field = 'LON'
    if cell_elev_units.upper() in ['FT', 'FEET']:
        cell_elev_field = 'ELEV_FT'
    elif cell_elev_units.upper() in ['M', 'METERS']:
        cell_elev_field = 'ELEV_M'
    # cell_elev_field = 'ELEV_FT'
    cell_id_field = 'CELL_ID'
    cell_name_field = 'CELL_NAME'
    met_id_field = 'STATION_ID'
    # awc_field = 'AWC'
    clay_field = 'CLAY'
    sand_field = 'SAND'
    awc_in_ft_field = 'AWC_IN_FT'
    hydgrp_num_field = 'HYDGRP_NUM'
    hydgrp_field = 'HYDGRP'
    # huc_field = 'HUC{}'.format(huc)
    # permeability_field = 'PERMEABILITY'
    # soil_depth_field = 'SOIL_DEPTH'
    # aridity_field = 'ARIDITY'
    # dairy_cutting_field = 'DAIRY_CUTTINGS'
    # beef_cutting_field = 'BEEF_CUTTINGS'

    # Static file names
    cell_props_name = 'ETCellsProperties.txt'
    cell_crops_name = 'ETCellsCrops.txt'
    cell_cuttings_name = 'MeanCuttings.txt'
    crop_params_name = 'CropParams.txt'
    crop_coefs_name = 'CropCoefs.txt'
    eto_ratio_name = 'EToRatiosMon.txt'
    static_list = [
        crop_params_name, crop_coefs_name, cell_props_name, cell_crops_name,
        cell_cuttings_name, eto_ratio_name
    ]

    # Check input folders
    if not os.path.isdir(crop_et_ws):
        logging.critical(('ERROR: The INI cropET folder ' +
                          'does not exist\n  {}').format(crop_et_ws))
        sys.exit()
    elif not os.path.isdir(project_ws):
        logging.critical(('ERROR: The project folder ' +
                          'does not exist\n  {}').format(project_ws))
        sys.exit()
    elif not os.path.isdir(gis_ws):
        logging.critical(
            ('ERROR: The GIS folder ' + 'does not exist\n  {}').format(gis_ws))
        sys.exit()
    logging.info('\nGIS Workspace:      {0}'.format(gis_ws))
    logging.info('Project Workspace:  {0}'.format(project_ws))
    logging.info('CropET Workspace:   {0}'.format(crop_et_ws))
    logging.info('Template Workspace: {0}'.format(template_ws))

    # Check input files
    if not arcpy.Exists(et_cells_path):
        logging.error(('\nERROR: The ET Cell shapefile {} ' +
                       'does not exist\n').format(et_cells_path))
        sys.exit()
    elif not os.path.isfile(stations_path) or not arcpy.Exists(stations_path):
        logging.critical(('ERROR: The NLDAS station shapefile does ' +
                          'not exist\n  %s').format(stations_path))
        sys.exit()
    for static_name in static_list:
        if not os.path.isfile(os.path.join(template_ws, static_name)):
            logging.error(
                ('\nERROR: The static template {} does not ' +
                 'exist\n').format(os.path.join(template_ws, static_name)))
            sys.exit()
    logging.debug('ET Cells Path: {0}'.format(et_cells_path))
    logging.debug('Stations Path: {0}'.format(stations_path))

    # Check units
    if cell_elev_units.upper() not in ['FEET', 'FT', 'METERS', 'M']:
        logging.error(
            ('\nERROR: ET Cell elevation units {} are invalid\n' +
             '  Units must be METERS or FEET').format(cell_elev_units))
        sys.exit()
    elif station_elev_units.upper() not in ['FEET', 'FT', 'METERS', 'M']:
        logging.error(
            ('\nERROR: Station elevation units {} are invalid\n' +
             '  Units must be METERS or FEET').format(station_elev_units))
        sys.exit()

    # Build output table folder if necessary
    if not os.path.isdir(static_ws):
        os.makedirs(static_ws)

    # Read Weather station\NLDAS cell station data
    logging.info('\nReading station shapefile')
    logging.debug('  {}'.format(stations_path))
    fields = [
        station_zone_field, station_id_field, station_elev_field,
        station_lat_field, station_lon_field
    ]
    logging.debug('  Fields: {}'.format(fields))
    station_data_dict = defaultdict(dict)
    with arcpy.da.SearchCursor(stations_path, fields) as s_cursor:
        for row in s_cursor:
            for field in fields[1:]:
                # Key/match on strings even if ID is an integer
                station_data_dict[str(
                    row[0])][field] = row[fields.index(field)]
    for k, v in station_data_dict.iteritems():
        logging.debug('  {0}: {1}'.format(k, v))

    # Read ET Cell zonal stats
    logging.info('\nReading ET Cell Zonal Stats')
    logging.debug('  {}'.format(et_cells_path))
    crop_field_list = sorted([
        f.name for f in arcpy.ListFields(et_cells_path)
        if re.match('CROP_\d{2}', f.name)
    ])
    fields = [
        cell_id_field, cell_name_field, cell_lat_field, cell_elev_field,
        awc_in_ft_field, clay_field, sand_field, hydgrp_num_field, hydgrp_field
    ]
    fields = fields + crop_field_list
    logging.debug('  Fields: {}'.format(fields))
    cell_data_dict = defaultdict(dict)
    with arcpy.da.SearchCursor(et_cells_path, fields) as s_cursor:
        for row in s_cursor:
            for field in fields[1:]:
                # Key/match on strings even if ID is an integer
                cell_data_dict[str(row[0])][field] = row[fields.index(field)]

    # Update ET Cell MET_ID/STATION_ID value
    fields = [cell_id_field, met_id_field]
    with arcpy.da.UpdateCursor(et_cells_path, fields) as u_cursor:
        for row in u_cursor:
            try:
                row[1] = station_data_dict[row[0]][station_id_field]
                u_cursor.updateRow(row)
            except KeyError:
                pass

    # Covert elevation units if necessary
    if station_elev_units.upper() in ['METERS', 'M']:
        logging.debug('  Convert station elevation from meters to feet')
        for k in station_data_dict.iterkeys():
            station_data_dict[k][station_elev_field] /= 0.3048
    if cell_elev_units.upper() in ['METERS', 'M']:
        logging.debug('  Convert et cell elevation from meters to feet')
        for k in cell_data_dict.iterkeys():
            cell_data_dict[k][cell_elev_field] /= 0.3048

    logging.info('\nCopying template static files')
    for static_name in static_list:
        # if (overwrite_flag or
        #      os.path.isfile(os.path.join(static_ws, static_name))):
        logging.debug('  {}'.format(static_name))
        shutil.copy(os.path.join(template_ws, static_name), static_ws)
        # shutil.copyfile(
        #     .path.join(template_ws, static_name),
        #     .path.join(static_ws, crop_params_name))

    logging.info('\nWriting static text files')
    cell_props_path = os.path.join(static_ws, cell_props_name)
    cell_crops_path = os.path.join(static_ws, cell_crops_name)
    cell_cuttings_path = os.path.join(static_ws, cell_cuttings_name)
    # crop_params_path = os.path.join(static_ws, crop_params_name)
    # crop_coefs_path = os.path.join(static_ws, crop_coefs_name)
    eto_ratio_path = os.path.join(static_ws, eto_ratio_name)

    # Write cell properties
    logging.debug('  {}'.format(cell_props_path))
    with open(cell_props_path, 'a') as output_f:
        for cell_id, cell_data in sorted(cell_data_dict.iteritems()):
            if cell_id in station_data_dict.keys():
                station_data = station_data_dict[cell_id]
                station_id = station_data[station_id_field]
                station_lat = '{:>9.4f}'.format(
                    station_data[station_lat_field])
                station_lon = '{:>9.4f}'.format(
                    station_data[station_lon_field])
                station_elev = '{:.2f}'.format(
                    station_data[station_elev_field])
            else:
                logging.debug(('    Cell_ID {} was not found in the ' +
                               'station data').format(cell_id))
                station_id, lat, lon, elev = '', '', '', ''
            # There is an extra/unused column in the template and excel files
            output_list = [
                cell_id, cell_data[cell_name_field], station_id, station_lat,
                station_lon, station_elev, permeability,
                '{:.4f}'.format(cell_data[awc_in_ft_field]), soil_depth,
                cell_data[hydgrp_field], cell_data[hydgrp_num_field], aridity,
                ''
            ]
            output_f.write('\t'.join(map(str, output_list)) + '\n')
            del output_list
            del station_id, station_lat, station_lon, station_elev

    # Write cell crops
    logging.debug('  {}'.format(cell_crops_path))
    with open(cell_crops_path, 'a') as output_f:
        for cell_id, cell_data in sorted(cell_data_dict.iteritems()):
            if cell_id in station_data_dict.keys():
                station_id = station_data_dict[cell_id][station_id_field]
            else:
                logging.debug(('    Cell_ID {} was not found in the ' +
                               'station data').format(cell_id))
                station_id = ''
            output_list = [
                cell_id, cell_data[cell_name_field], station_id, irrigation
            ]
            crop_list = ['CROP_{:02d}'.format(i) for i in range(1, crops + 1)]
            crop_area_list = [
                cell_data[crop] if crop in cell_data.keys() else 0
                for crop in crop_list
            ]
            crop_flag_list = [
                1 if area > area_threshold else 0 for area in crop_area_list
            ]
            output_list = output_list + crop_flag_list
            output_f.write('\t'.join(map(str, output_list)) + '\n')
            del crop_list, crop_area_list, crop_flag_list, output_list

    # Write cell cuttings
    logging.debug('  {}'.format(cell_cuttings_path))
    with open(cell_cuttings_path, 'a') as output_f:
        for cell_id, cell_data in sorted(cell_data_dict.iteritems()):
            output_list = [
                cell_id, cell_data[cell_name_field],
                '{:>9.4f}'.format(cell_data[cell_lat_field]), dairy_cuttings,
                beef_cuttings
            ]
            output_f.write('\t'.join(map(str, output_list)) + '\n')
            del output_list

    # Write monthly ETo ratios
    logging.debug('  {}'.format(eto_ratio_path))
    with open(eto_ratio_path, 'a') as output_f:
        for cell_id, cell_data in sorted(cell_data_dict.iteritems()):
            if cell_id in station_data_dict.keys():
                station_data = station_data_dict[cell_id]
                station_id = station_data[station_id_field]
                # station_lat = '{:>9.4f}'.format(station_data[station_lat_field])
                # station_lon = '{:>9.4f}'.format(station_data[station_lon_field])
                # station_elev = '{:.2f}'.format(station_data[station_elev_field])
            else:
                logging.debug(('    Cell_ID {} was not found in the ' +
                               'station data').format(cell_id))
                # station_id, lat, lon, elev = '', '', '', ''
                continue
            output_list = [station_id, ''] + [1.0] * 12
            output_f.write('\t'.join(map(str, output_list)) + '\n')
            del output_list
コード例 #16
0
    ch.setLevel(getattr(logging, params['logLevel'].upper()))

    ## set logging format
    formatter = logging.Formatter("(" + str(os.getpid()) +
                                  ") %(asctime)s:%(levelname)s: %(message)s")
    ch.setFormatter(formatter)

    ## add handlers to logging object
    logger.addHandler(ch)

    # -------------------------------
    # ---- read pipeline configs ----
    # -------------------------------

    # parse config file
    pipe_cfg = read_ini(params['pipeCfgPath'])

    # establish which header we'll be using to populate instrument parameters
    if params['instrument'] == 'SkyCamT':
        inst_cfg_header = "skycamt_params"
    elif params['instrument'] == 'SkyCamZ':
        inst_cfg_header = "skycamz_params"

    # add to params dict
    try:
        params['rootPath'] = str(
            pipe_cfg['paths']['path_root_skymine'].rstrip("/") + "/")
        params['resRootPath'] = str(
            pipe_cfg['paths']['path_root_res'].rstrip("/") + "/")
        params['path_pw_list'] = str(pipe_cfg['paths']['path_pw_list'])
        params['path_lock'] = str(pipe_cfg['paths']['path_lock'])
コード例 #17
0
def main(ini_path,
         show_flag=False,
         save_flag=True,
         label_flag=False,
         figure_size=(12, 12),
         figure_dpi=300,
         start_date=None,
         end_date=None,
         crop_str='',
         simplify_tol=None,
         area_threshold=0):
    """Plot crop summary maps using daily output files

    Args:
        ini_path (str): file path of the project INI file
        show_flag (bool): if True, show maps
        save_flag (bool): if True, save maps to disk
        label_flag (bool): if True, label maps with cell values
        figure_size (tuple): width, height tuple [inches]
        start_date (str): ISO format date string (YYYY-MM-DD)
        end_date (str): ISO format date string (YYYY-MM-DD)
        crop_str (str): comma separate list or range of crops to compare
        simplify_tol (float): simplify tolerance [in the units of ET Cells]
        area_threshold (float): CDL area threshold [acres]

    Returns:
        None
    """

    # ET Cells field names
    cell_id_field = 'CELL_ID'
    crop_area_field = 'AG_ACRES'

    # Input field names
    date_field = 'Date'
    doy_field = 'DOY'
    year_field = 'Year'
    # month_field = 'Month'
    # day_field = 'Day'
    # pmeto_field = 'PMETo'
    # precip_field = 'PPT'
    # t30_field = 'T30'
    etact_field = 'ETact'
    # etpot_field = 'ETpot'
    # etbas_field = 'ETbas'
    # irrig_field = 'Irrigation'
    season_field = 'Season'
    cutting_field = 'Cutting'
    # runoff_field = 'Runoff'
    # dperc_field = 'DPerc'
    # niwr_field = 'NIWR'
    # kc_field = 'Kc'
    # kcb_field = 'Kcb'

    # Output field names
    annual_et_field = 'Annual_ET'
    seasonal_et_field = 'Seasonal_ET'
    gs_start_doy_field = 'Start_DOY'
    gs_end_doy_field = 'End_DOY'
    gs_length_field = 'GS_Length'

    # Number of header lines in data file
    # header_lines = 2

    # Additional figure controls
    # figure_dynamic_size = False
    # figure_ylabel_size = '12pt'

    # Delimiter
    sep = ','
    # sep = r"\s*"

    daily_input_re = re.compile(
        '(?P<cell_id>\w+)_daily_crop_(?P<crop_num>\d{2}).csv', re.I)
    # gs_input_re = re.compile(
    #     '(?P<cell_id>\w+)_gs_crop_(?P<crop_num>\d{2}).csv', re.I)

    logging.info('\nGenerate crop summary maps from daily data')
    logging.info('  INI: {}'.format(ini_path))

    # Check that the INI file can be read
    crop_et_sec = 'CROP_ET'
    config = util.read_ini(ini_path, crop_et_sec)

    # Get the project workspace and daily ET folder from the INI file
    def get_config_param(config, param_name, section):
        """"""
        try:
            param_value = config.get(section, param_name)
        except:
            logging.error(('ERROR: The {} parameter is not set' +
                           ' in the INI file').format(param_name))
            sys.exit()
        return param_value

    cells_path = get_config_param(config, 'cells_path', crop_et_sec)
    project_ws = get_config_param(config, 'project_folder', crop_et_sec)
    daily_stats_ws = os.path.join(
        project_ws, get_config_param(config, 'daily_output_folder',
                                     crop_et_sec))

    try:
        output_ws = os.path.join(
            project_ws, config.get(crop_et_sec, 'summary_maps_folder'))
    except:
        if 'stats' in daily_stats_ws:
            output_ws = daily_stats_ws.replace('stats', 'maps')
        else:
            output_ws = os.path.join(project_ws, 'summary_maps_folder')

    # Check workspaces
    if not os.path.isdir(daily_stats_ws):
        logging.error(('\nERROR: The daily ET stats folder {0} ' +
                       'could be found\n').format(daily_stats_ws))
        sys.exit()
    if not os.path.isfile(cells_path):
        logging.error(('\nERROR: The cells shapefile {0} ' +
                       'could be found\n').format(cells_path))
        sys.exit()
    if not os.path.isdir(output_ws):
        os.mkdir(output_ws)

    # Range of data to plot
    try:
        year_start = dt.datetime.strptime(start_date, '%Y-%m-%d').year
        logging.info('  Start Year:  {0}'.format(year_start))
    except:
        year_start = None
    try:
        year_end = dt.datetime.strptime(end_date, '%Y-%m-%d').year
        logging.info('  End Year:    {0}'.format(year_end))
    except:
        year_end = None
    if year_start and year_end and year_end < year_start:
        logging.error('\n  ERROR: End date must be after start date\n')
        sys.exit()

    # Allow user to subset crops from INI
    try:
        crop_skip_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_skip_list'))))
    except:
        crop_skip_list = []
        # crop_skip_list = [44, 45, 46]
    try:
        crop_test_list = sorted(
            list(util.parse_int_set(config.get(crop_et_sec,
                                               'crop_test_list'))))
    except:
        crop_test_list = []

    # Allow user to subset cells from INI
    try:
        cell_skip_list = config.get(crop_et_sec, 'cell_skip_list').split(',')
        cell_skip_list = sorted([c.strip() for c in cell_skip_list])
    except:
        cell_skip_list = []
    try:
        cell_test_list = config.get(crop_et_sec, 'cell_test_list').split(',')
        cell_test_list = sorted([c.strip() for c in cell_test_list])
    except:
        cell_test_list = []

    # Overwrite INI crop list with user defined values
    # Could also append to the INI crop list
    if crop_str:
        try:
            crop_test_list = list(util.parse_int_set(crop_str))
        # try:
        #     crop_test_list = sorted(list(set(
        #         crop_test_list + list(util.parse_int_set(crop_str)))
        except:
            pass
    logging.debug('\n  crop_test_list = {0}'.format(crop_test_list))
    logging.debug('  crop_skip_list = {0}'.format(crop_skip_list))
    logging.debug('  cell_test_list = {0}'.format(cell_test_list))
    logging.debug('  cell_test_list = {0}'.format(cell_test_list))

    # Build list of all daily ET files
    daily_path_dict = defaultdict(dict)
    for f_name in os.listdir(daily_stats_ws):
        f_match = daily_input_re.match(os.path.basename(f_name))
        if not f_match:
            continue
        cell_id = f_match.group('cell_id')
        crop_num = int(f_match.group('crop_num'))
        if f_match.group('cell_id') == 'test':
            continue
        elif crop_skip_list and crop_num in crop_skip_list:
            continue
        elif crop_test_list and crop_num not in crop_test_list:
            continue
        elif cell_skip_list and cell_id in cell_skip_list:
            continue
        elif cell_test_list and cell_id not in cell_test_list:
            continue
        else:
            daily_path_dict[crop_num][cell_id] = os.path.join(
                daily_stats_ws, f_name)
    if not daily_path_dict:
        logging.error('  ERROR: No daily ET files were found\n' +
                      '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    # Read ET Cells into memory with fiona and shapely
    # Convert multi-polygons to list of polygons
    cell_geom_dict = defaultdict(list)
    cell_data_dict = dict()
    cell_extent = []
    with fiona.open(cells_path, "r") as cell_f:
        cell_extent = cell_f.bounds[:]
        # Fiona is printing a debug statement here "Index: N"
        for item in cell_f:
            cell_id = item['properties'][cell_id_field]
            cell_data_dict[cell_id] = item['properties']

            # Simplify the geometry
            if simplify_tol is not None:
                item_geom = shape(item['geometry']).simplify(
                    simplify_tol, preserve_topology=False)
            else:
                item_geom = shape(item['geometry'])

            # Unpack multipolygons to lists of polygons
            if item_geom.is_empty:
                continue
            elif item_geom.geom_type == 'MultiPolygon':
                # Order the geometries from largest to smallest area
                item_geom_list = sorted([[g.area, g]
                                         for g in item_geom if not g.is_empty],
                                        reverse=True)
                for item_area, item_poly in item_geom_list:
                    cell_geom_dict[cell_id].append(item_poly)
            elif item_geom.geom_type == 'Polygon':
                cell_geom_dict[cell_id].append(item_geom)
            else:
                logging.error('Invalid geometry type')
                continue
    if not cell_geom_dict:
        logging.error('ET Cell shapefile not read in')
        sys.exit()

    # Plot keyword arguments
    plot_kwargs = {
        'extent': cell_extent,
        'fig_size': figure_size,
        'fig_dpi': figure_dpi,
        'save_flag': save_flag,
        'show_flag': show_flag,
        'label_flag': label_flag,
    }

    # Plot CELL_ID
    logging.info('\nPlotting total crop acreage')
    cell_id_dict = {k: k.replace(' ', '\n') for k in cell_data_dict.iterkeys()}
    # cell_id_dict = {k:k for k in cell_data_dict.iterkeys()}
    cell_plot_func(os.path.join(output_ws, 'cell_id.png'),
                   cell_geom_dict,
                   cell_id_dict,
                   cmap=None,
                   title_str='CELL_ID',
                   clabel_str='',
                   label_size=6,
                   **plot_kwargs)

    # Plot total CDL crop acreages
    logging.info('\nPlotting total crop acreage')
    crop_area_dict = {
        k: v[crop_area_field]
        for k, v in cell_data_dict.iteritems()
    }
    # crop_area_dict = {
    #     :v[crop_area_field] for k,v in cell_data_dict.iteritems()
    #      v[crop_area_field] > area_threshold}
    cell_plot_func(os.path.join(output_ws, 'total_crop_acreage.png'),
                   cell_geom_dict,
                   crop_area_dict,
                   cmap=cm.YlGn,
                   title_str='Total CDL Crop Area',
                   clabel_str='acres',
                   label_size=6,
                   **plot_kwargs)

    # Plot PMETo
    # pmeto_dict = {
    #     :v[crop_area_field]
    #      k,v in cell_data_dict.iteritems()}
    # cell_plot_func(
    #     .path.join(output_ws, 'eto.png'),
    #     , pmeto_dict, cmap=cm.YlGn,
    #     ='Reference ET', clabel_str='mm',
    #     =8, **plot_kwargs)

    # Build an empty dataframe to write the total area weighted ET
    # columns_dict = {cell_id_field:sorted(cell_data_dict.keys())}
    columns_dict = {
        'CROP_{0:02d}'.format(k): None
        for k in daily_path_dict.keys()
    }
    columns_dict[cell_id_field] = sorted(cell_data_dict.keys())
    crop_area_df = pd.DataFrame(columns_dict).set_index(cell_id_field)
    annual_et_df = pd.DataFrame(columns_dict).set_index(cell_id_field)
    seasonal_et_df = pd.DataFrame(columns_dict).set_index(cell_id_field)

    # First process by crop
    logging.info('')
    for crop_num in sorted(daily_path_dict.keys()):
        crop_column = 'CROP_{0:02d}'.format(crop_num)
        logging.info('Crop Num: {0:2d}'.format(crop_num))

        # First threshold CDL crop areas
        # Check all cell_id's against crop_area_dict keys
        crop_area_dict = {
            k: v[crop_column]
            for k, v in cell_data_dict.iteritems()
            if (k in daily_path_dict[crop_num].keys()
                and v[crop_column] > area_threshold)
        }
        # crop_area_dict = {
        #     k: v[crop_column] for k,v in cell_data_dict.iteritems()
        #     if k in daily_path_dict[crop_num].keys()}

        # Build an empty dataframe to write to
        crop_output_df = pd.DataFrame({
            cell_id_field:
            sorted(
                list(
                    set(daily_path_dict[crop_num].keys())
                    & set(crop_area_dict.keys()))),
            annual_et_field:
            None,
            seasonal_et_field:
            None,
            gs_start_doy_field:
            None,
            gs_end_doy_field:
            None,
            gs_length_field:
            None,
            cutting_field:
            None
        })
        crop_output_df.set_index(cell_id_field, inplace=True)

        # Process each cell
        for cell_id, input_path in sorted(daily_path_dict[crop_num].items()):
            logging.debug('  Cell ID:   {0}'.format(cell_id))

            # Skip if crop area is below threshold
            if cell_id not in crop_area_dict.keys():
                logging.debug('    Area below threshold, skipping')
                continue

            # Get crop name from the first line of the output file
            # DEADBEEF - This may not exist in the output file...
            with open(input_path, 'r') as file_f:
                crop_name = file_f.readline().split('-', 1)[1].strip()
                crop_name = crop_name.replace('--', ' - ')
                crop_name = crop_name.replace(' (', ' - ').replace(')', '')
                logging.debug('  Crop:      {0}'.format(crop_name))
            logging.debug('    {0}'.format(os.path.basename(input_path)))

            # Read data from file into record array (structured array)
            daily_df = pd.read_table(input_path,
                                     header=0,
                                     comment='#',
                                     sep=sep)
            logging.debug('    Fields: {0}'.format(', '.join(
                daily_df.columns.values)))
            daily_df[date_field] = pd.to_datetime(daily_df[date_field])
            daily_df.set_index(date_field, inplace=True)

            # Build list of unique years
            year_array = np.sort(
                np.unique(np.array(daily_df[year_field]).astype(np.int)))
            logging.debug('    All Years: {0}'.format(', '.join(
                list(util.ranges(year_array.tolist())))))
            # logging.debug('    All Years: {0}'.format(
            #    ','.join(map(str, year_array.tolist()))))

            # Don't include the first year in the stats
            crop_year_start = min(daily_df[year_field])
            logging.debug(
                '    Skipping {}, first year'.format(crop_year_start))
            daily_df = daily_df[daily_df[year_field] > crop_year_start]

            # Check if start and end years have >= 365 days
            crop_year_start = min(daily_df[year_field])
            crop_year_end = max(daily_df[year_field])
            if sum(daily_df[year_field] == crop_year_start) < 365:
                logging.debug(
                    '    Skipping {}, missing days'.format(crop_year_start))
                daily_df = daily_df[daily_df[year_field] > crop_year_start]
            if sum(daily_df[year_field] == crop_year_end) < 365:
                logging.debug(
                    '    Skipping {}, missing days'.format(crop_year_end))
                daily_df = daily_df[daily_df[year_field] < crop_year_end]
            del crop_year_start, crop_year_end

            # Only keep years between year_start and year_end
            if year_start:
                daily_df = daily_df[daily_df[year_field] >= year_start]
            if year_end:
                daily_df = daily_df[daily_df[year_field] <= year_end]

            year_sub_array = np.sort(
                np.unique(np.array(daily_df[year_field]).astype(np.int)))
            logging.debug('    Plot Years: {0}'.format(', '.join(
                list(util.ranges(year_sub_array.tolist())))))
            # logging.debug('    Plot Years: {0}'.format(
            #    ','.join(map(str, year_sub_array.tolist()))))

            # Seasonal/Annual ET
            crop_seasonal_et_df = daily_df[
                daily_df[season_field] > 0].resample('AS',
                                                     how={etact_field: np.sum})
            crop_annual_et_df = daily_df.resample('AS',
                                                  how={etact_field: np.sum})

            crop_output_df.set_value(cell_id, seasonal_et_field,
                                     float(crop_seasonal_et_df.mean()))
            crop_output_df.set_value(cell_id, annual_et_field,
                                     float(crop_annual_et_df.mean()))
            del crop_seasonal_et_df, crop_annual_et_df

            # Compute growing season start and end DOY from dailies
            crop_gs_df = daily_df[[year_field, season_field
                                   ]].resample('AS', how={year_field: np.mean})
            crop_gs_df[gs_start_doy_field] = None
            crop_gs_df[gs_end_doy_field] = None

            crop_gs_fields = [year_field, doy_field, season_field]
            crop_gs_groupby = daily_df[crop_gs_fields].groupby([year_field])
            for year, group in crop_gs_groupby:
                if not np.any(group[season_field].values):
                    logging.debug('  Skipping, season flag was never set to 1')
                    continue

                # Identify "changes" in season flag
                season_diff = np.diff(group[season_field].values)

                # Growing season start
                try:
                    start_i = np.where(season_diff == 1)[0][0] + 1
                    gs_start_doy = float(group.ix[start_i, doy_field])
                except:
                    gs_start_doy = float(min(group[doy_field].values))
                crop_gs_df.set_value(group.index[0], gs_start_doy_field,
                                     gs_start_doy)

                # Growing season end
                try:
                    end_i = np.where(season_diff == -1)[0][0] + 1
                    gs_end_doy = float(group.ix[end_i, doy_field])
                except:
                    gs_end_doy = float(max(group[doy_field].values))
                crop_gs_df.set_value(group.index[0], gs_end_doy_field,
                                     gs_end_doy)
                del season_diff

            # Write mean growing season start and end DOY
            crop_output_df.set_value(
                cell_id, gs_start_doy_field,
                int(round(crop_gs_df[gs_start_doy_field].mean(), 0)))
            crop_output_df.set_value(
                cell_id, gs_end_doy_field,
                int(round(crop_gs_df[gs_end_doy_field].mean(), 0)))

            # Growing season length
            crop_output_df.set_value(
                cell_id, gs_length_field,
                int(round(crop_gs_groupby[season_field].sum().mean(), 0)))

            # Crop cuttings
            # Maybe only sum cuttings that are in season
            if (cutting_field in list(daily_df.columns.values)
                    and np.any(daily_df[cutting_field].values)):
                gs_input_fields = [year_field, cutting_field]
                crop_gs_groupby = daily_df[gs_input_fields].groupby(
                    [year_field])
                crop_output_df.set_value(
                    cell_id, cutting_field,
                    int(round(crop_gs_groupby[cutting_field].sum().mean(), 0)))

            # Cleanup
            del crop_gs_groupby, crop_gs_df, crop_gs_fields

        # Make the maps
        logging.debug('')
        title_fmt = 'Crop {0:02d} - {1} - {2}'.format(crop_num, crop_name,
                                                      '{}')

        # Crop acreages
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_cdl_acreage.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_area_dict,
                       cmap=cm.YlGn,
                       clabel_str='acres',
                       title_str=title_fmt.format('CDL Area'),
                       **plot_kwargs)

        # Annual/Seasonal ET
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_et_actual.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_output_df[annual_et_field].to_dict(),
                       cmap=cm.YlGn,
                       clabel_str='mm',
                       title_str=title_fmt.format('Annual Evapotranspiration'),
                       **plot_kwargs)
        cell_plot_func(
            os.path.join(output_ws,
                         'crop_{0:02d}_et_seasonal.png'.format(crop_num)),
            cell_geom_dict,
            crop_output_df[seasonal_et_field].to_dict(),
            cmap=cm.YlGn,
            clabel_str='mm',
            title_str=title_fmt.format('Seasonal Evapotranspiration'),
            **plot_kwargs)

        # Growing Season Start/End/Length
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_gs_start_doy.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_output_df[gs_start_doy_field].to_dict(),
                       cmap=cm.RdYlBu,
                       clabel_str='Day of Year',
                       title_str=title_fmt.format('Growing Season Start'),
                       **plot_kwargs)
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_gs_end_doy.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_output_df[gs_end_doy_field].to_dict(),
                       cmap=cm.RdYlBu_r,
                       clabel_str='Day of Year',
                       title_str=title_fmt.format('Growing Season End'),
                       **plot_kwargs)
        cell_plot_func(os.path.join(
            output_ws, 'crop_{0:02d}_gs_length.png'.format(crop_num)),
                       cell_geom_dict,
                       crop_output_df[gs_length_field].to_dict(),
                       cmap=cm.RdYlBu_r,
                       clabel_str='Days',
                       title_str=title_fmt.format('Growing Season Length'),
                       **plot_kwargs)

        # Crop cuttings
        if np.any(crop_output_df[cutting_field].values):
            cell_plot_func(os.path.join(
                output_ws, 'crop_{0:02d}_cuttings.png'.format(crop_num)),
                           cell_geom_dict,
                           crop_output_df[cutting_field].to_dict(),
                           cmap=cm.RdYlBu_r,
                           clabel_str='Cuttings',
                           title_str=title_fmt.format('Crop Cuttings'),
                           **plot_kwargs)

        # Crop area weighted ET
        crop_area_df[crop_column] = pd.Series(crop_area_dict)
        annual_et_df[crop_column] = crop_output_df[annual_et_field]
        seasonal_et_df[crop_column] = crop_output_df[seasonal_et_field]

        # Compute and plot crop weighted average ET
        annual_et = ((annual_et_df * crop_area_df).sum(axis=1) /
                     crop_area_df.sum(axis=1))
        seasonal_et = ((seasonal_et_df * crop_area_df).sum(axis=1) /
                       crop_area_df.sum(axis=1))
        cell_plot_func(
            os.path.join(output_ws, 'et_actual.png'),
            cell_geom_dict,
            annual_et[annual_et.notnull()].to_dict(),
            cmap=cm.YlGn,
            clabel_str='mm',
            title_str='Crop Area Weighted Annual Evapotranspiration',
            **plot_kwargs)
        cell_plot_func(
            os.path.join(output_ws, 'et_seasonal.png'),
            cell_geom_dict,
            seasonal_et[seasonal_et.notnull()].to_dict(),
            cmap=cm.YlGn,
            clabel_str='mm',
            title_str='Crop Area Weighted Seasonal Evapotranspiration',
            **plot_kwargs)
        del annual_et, seasonal_et

        # Cleanup
        del crop_output_df
        gc.collect()

    # Compute and plot crop weighted average ET
    annual_et_df *= crop_area_df
    seasonal_et_df *= crop_area_df
    annual_et_df = annual_et_df.sum(axis=1) / crop_area_df.sum(axis=1)
    seasonal_et_df = seasonal_et_df.sum(axis=1) / crop_area_df.sum(axis=1)
    annual_et_df = annual_et_df[annual_et_df.notnull()]
    seasonal_et_df = seasonal_et_df[seasonal_et_df.notnull()]
    cell_plot_func(os.path.join(output_ws, 'et_actual.png'),
                   cell_geom_dict,
                   annual_et_df.to_dict(),
                   cmap=cm.YlGn,
                   clabel_str='mm',
                   title_str='Crop Area Weighted Annual Evapotranspiration',
                   **plot_kwargs)
    cell_plot_func(os.path.join(output_ws, 'et_seasonal.png'),
                   cell_geom_dict,
                   seasonal_et_df.to_dict(),
                   cmap=cm.YlGn,
                   clabel_str='mm',
                   title_str='Crop Area Weighted Seasonal Evapotranspiration',
                   **plot_kwargs)

    # Cleanup
    del crop_area_df, annual_et_df, seasonal_et_df
コード例 #18
0
def main(ini_path, overwrite_flag=True, cleanup_flag=True,
         growing_season = False, year_filter=[]):
    """Create Crop Area Weighted ETact and NIWR shapefiles
     from monthly_stat files

    Args:
        ini_path (str): file path of the project INI file
        overwrite_flag (bool): If True (default), overwrite existing files
        cleanup_flag (bool): If True, remove temporary files
        growing_season (bool): If True, filters data to April-October
        year_filter (int): Only includes data for one year in statistics
    Returns:
        None
    """
    # print('SCRIPT STILL IN DEVELOPMENT (SEE CODE). EXITING')
    # sys.exit()

    logging.info('\nCreating Crop Area Weighted Shapefiles')
    #  INI path
    config = util.read_ini(ini_path, section='CROP_ET')
    try:
        project_ws = config.get('CROP_ET', 'project_folder')
    except:
        logging.error(
            'project_folder parameter must be set in the INI file, exiting')
        return False
    try:
        gis_ws = config.get('CROP_ET', 'gis_folder')
    except:
        logging.error(
            'gis_folder parameter must be set in the INI file, exiting')
        return False
    try:
        et_cells_path = config.get('CROP_ET', 'cells_path')
    except:
        logging.error(
            'et_cells_path parameter must be set in the INI file, exiting')
        return False

    # Year Filter
    if year_filter:
        logging.info('\nEstimating Data for {0}'.format(year_filter))

    # Sub folder names
    daily_ws = os.path.join(project_ws, 'daily_stats')
    gs_ws = os.path.join(project_ws, 'growing_season_stats')

    # Check input folders
    if not os.path.exists(daily_ws):
        logging.critical('ERROR: The daily_stat folder does not exist.'
                         ' Check .ini settings')
        sys.exit()

    # Check input folders
    if not os.path.isdir(project_ws):
        logging.critical(('ERROR: The project folder ' +
                          'does not exist\n  {}').format(project_ws))
        sys.exit()
    elif not os.path.isdir(gis_ws):
        logging.critical(('ERROR: The GIS folder ' +
                          'does not exist\n  {}').format(gis_ws))
        sys.exit()
    logging.info('\nGIS Workspace:      {0}'.format(gis_ws))

    # Create Output folder if it doesn't exist
    output_folder_path = os.path.join(project_ws, 'cropweighted_shapefile')
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)

    # Regular expressions
    data_re = re.compile('(?P<CELLID>\w+)_crop_(?P<CROP>\d+).csv$', re.I)
    # data_re = re.compile('(?P<CELLID>\w+)_daily_crop_(?P<CROP>\d+).csv$', re.I)

    # Build list of all data files
    data_file_list = sorted(
        [os.path.join(daily_ws, f_name) for f_name in os.listdir(daily_ws)
         if data_re.match(f_name)])
    if not data_file_list:
        logging.error(
            '  ERROR: No daily ET files were found\n' +
            '  ERROR: Check the folder_name parameters\n')
        sys.exit()

    cells = read_shapefile(et_cells_path)

    # Start with empty lists
    stations = []
    crop_nums = []

    # Process each file
    for file_path in data_file_list:
        file_name = os.path.basename(file_path)
        logging.debug('')
        logging.info('  {0}'.format(file_name))

        # station, crop_num = os.path.splitext(file_name)[0].split('_daily_crop_')
        station, crop_num = os.path.splitext(file_name)[0].split('_crop_')
        stations.append(station)
        crop_num = int(crop_num)
        crop_nums.append(crop_num)

    # Find unique crops and station ids
    unique_crop_nums = list(set(crop_nums))
    unique_stations = list(set(stations))

    # Variables to calculate output statistics
    var_list = ['ETact', 'NIWR']

    logging.info('\n Creating Crop Area Weighted Shapefiles')
    if year_filter:
        logging.info('\nFiltering by Year: {}'.format(year_filter))
    if growing_season:
        logging.info('\nFiltering stats to Growing Season, Apr-Oct')

    for crop in unique_crop_nums:
        logging.info('\n Processing Crop: {:02d}'.format(crop))

        # Initialize df variable to check if pandas df needs to be created
        df = None
        for station in unique_stations:
            # Build File Path
            file_path = os.path.join(daily_ws,
                                     '{}_crop_{:02d}.csv'.format(station,
                                                                 crop))
            # Only process files that exists (crop/cell combinations)
            if not os.path.exists(file_path):
                continue

            # Read file into df
            daily_df = pd.read_csv(file_path, skiprows=1)
            # Filter data based on year_filter
            if year_filter:
                daily_df = daily_df[daily_df['Year']== year_filter]
                logging.info('\nFiltering by Year: {}'.format(year_filter))

            # Remove all non-growing season data if growing season flag = True
            # UPDATE TO USE SEASON FLAG IN DAILY CSV FILES (0 or 1)
            if growing_season:
                daily_df = daily_df[
                    (daily_df['Month'] >= 4) & (daily_df['Month'] <= 10)]
                logging.info('\nFiltering stats to Growing Season, Apr-Oct')
            # if growing_season:
            #     daily_df = daily_df[(daily_df['Season'] == 1)]

            # Dictionary to control agg of each variable
            a = {
                'ETact': 'sum',
                'NIWR': 'sum'}

            # GroupStats by Year of each column follow agg assignment above
            yearlygroup_df = daily_df.groupby('Year', as_index=True).agg(a)

            #Take Mean of Yearly GroupStats
            mean_df = yearlygroup_df.mean(axis=0)
            mean_fieldnames = [v + '_mn_{:02d}'.format(crop) for v in var_list]

            #Take Median of Yearly GroupStats
            median_df = yearlygroup_df.median(axis=0)
            median_fieldnames =[v + '_md_{:02d}'.format(crop) for v in var_list]

            #Create Dataframe if it doesn't exist
            if df is None:
               df = pd.DataFrame(index=unique_stations,
                                 columns=mean_fieldnames + median_fieldnames)

            #Write data to each station row
            df.loc[station] = list(mean_df[var_list]) + \
                              list(median_df[var_list])

        # Convert index to integers)
        df.index = df.index.map(int)

        # Remove rows with Na (Is this the best option???)
        df = df.dropna()

        # Merge Crop ETact and NIWR to cells dataframe
        cells = pd.merge(cells, df, how='left', left_on=['GRIDMET_ID'],
                         right_index=True)

    # Change Ag_Acres cells with zero area to nan (Avoid ZeroDivisionError)
    cells[cells['AG_ACRES'] == 0] = np.nan

    # Calculate CropArea Weighted ETact and NIWR for each cell
    # List Comprehension (All combinations of var_list and stat)
    # https://www.safaribooksonline.com/library/view/python-cookbook/0596001673/ch01s15.html
    for var, stat in [(var, stat) for var in var_list for stat in ['mn', 'md']]:
        # nitialize empty columns (zeros)
        cells['CW{0}_{1}'.format(var, stat)] = 0
        for crop in unique_crop_nums:
            # reset temp
            temp = []
            # calculate crop fraction of weighted rate
            temp = cells['CROP_{0:02d}'.format(crop)].multiply(
                cells['{0}_{1}_{2:02d}'.format(var, stat, crop)]).divide(cells['AG_ACRES'])
            # replace nan with zero
            temp = temp.fillna(0)
            # add crop fraction to total calculate weighted rate
            cells['CW{0}_{1}'.format(var, stat)] = cells['CW{0}_{1}'.format(var, stat)].add(temp)

    # Subset to "Final" dataframe for merge to output .shp
    Final = cells[['GRIDMET_ID', 'CWETact_mn', 'CWNIWR_mn','CWETact_md', 'CWNIWR_md']]

    # Copy ETCELLS.shp and join cropweighted data to it
    data = gpd.read_file(et_cells_path)

    # UPDATE TO NEWER ETCELLS STATION_ID FORMAT !!!!!
    merged_data = data.merge(Final, on='GRIDMET_ID')
    if not year_filter:
        year_filter = 'AllYears'
    if growing_season:
        out_filepath = os.path.join(output_folder_path,
                                    '{}_GS_CropWeighted.shp'.format(year_filter))
    else:
        out_filepath = os.path.join(output_folder_path,
                                    '{}_Ann_CropWeighted.shp'.format(year_filter))
    #Write output .shp
    merged_data.to_file(out_filepath)