def main(ini_path, show_flag=False, overwrite_flag=True):
    """Generate Bokeh figures

    Bokeh issues:
    Adjust y range based on non-muted data
        https://stackoverflow.com/questions/43620837/how-to-get-bokeh-to-dynamically-adjust-y-range-when-panning
    Linked interactive legends so that there is only one legend for the gridplot
    Maybe hide or mute QA values above max (instead of filtering them in advance)

    Args:
        ini_path (str):
        show_flag (bool): if True, show the figures in the browser.
            Default is False.
        overwrite_flag (bool): if True, overwrite existing tables.
            Default is True (for now)
    """
    logging.info('\nGenerate interactive timeseries figures')

    # Eventually read from INI
    plot_var_list = ['NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', 'EVI_SUR']
    # plot_var_list = [
    #     'NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA',
    #     'CLOUD_SCORE', 'FMASK_PCT']
    output_folder = 'figures'

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='FIGURES')
    inputs.parse_section(ini, section='BEAMER')

    # Output paths
    output_ws = os.path.join(ini['SUMMARY']['output_ws'], output_folder)
    if not os.path.isdir(output_ws):
        os.makedirs(output_ws)

    # Start/end year
    year_list = list(
        range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(
        utils.month_range(gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(', '.join(
        map(str, gridmet_months))))

    # Read in the zonal stats CSV
    logging.debug('  Reading zonal stats CSV file')
    input_df = pd.read_csv(
        os.path.join(ini['ZONAL_STATS']['output_ws'],
                     ini['BEAMER']['output_name']))
    logging.debug(input_df.head())

    logging.debug('  Filtering Landsat dataframe')
    input_df = input_df[input_df['PIXEL_COUNT'] > 0]

    # # This assumes that there are L5/L8 images in the dataframe
    # if not input_df.empty:
    #     max_pixel_count = max(input_df['PIXEL_COUNT'])
    # else:
    #     max_pixel_count = 0

    if ini['INPUTS']['fid_keep_list']:
        input_df = input_df[input_df['ZONE_FID'].isin(
            ini['INPUTS']['fid_keep_list'])]
    if ini['INPUTS']['fid_skip_list']:
        input_df = input_df[~input_df['ZONE_FID'].
                            isin(ini['INPUTS']['fid_skip_list'])]

    if year_list:
        input_df = input_df[input_df['YEAR'].isin(year_list)]
    if month_list:
        input_df = input_df[input_df['MONTH'].isin(month_list)]
    if doy_list:
        input_df = input_df[input_df['DOY'].isin(doy_list)]

    if ini['INPUTS']['path_keep_list']:
        input_df = input_df[input_df['PATH'].isin(
            ini['INPUTS']['path_keep_list'])]
    if (ini['INPUTS']['row_keep_list']
            and ini['INPUTS']['row_keep_list'] != ['XXX']):
        input_df = input_df[input_df['ROW'].isin(
            ini['INPUTS']['row_keep_list'])]

    # Assume the default is for these to be True and only filter if False
    if not ini['INPUTS']['landsat4_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT04']
    if not ini['INPUTS']['landsat5_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT05']
    if not ini['INPUTS']['landsat7_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LE07']
    if not ini['INPUTS']['landsat8_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LC08']

    if ini['INPUTS']['scene_id_keep_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])
        ])
        input_df = input_df[scene_id_df.isin(
            ini['INPUTS']['scene_id_keep_list']).values]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_keep_list'])]
    if ini['INPUTS']['scene_id_skip_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])
        ])
        input_df = input_df[np.logical_not(
            scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[~input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_skip_list'])]

    # Filter by QA/QC value
    if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty:
        logging.debug('    Maximum QA: {0}'.format(ini['SUMMARY']['max_qa']))
        input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']]

    # First filter by average cloud score
    if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty:
        logging.debug('    Maximum cloud score: {0}'.format(
            ini['SUMMARY']['max_cloud_score']))
        input_df = input_df[
            input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

    # Filter by Fmask percentage
    if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty:
        input_df['FMASK_PCT'] = 100 * (input_df['FMASK_COUNT'] /
                                       input_df['FMASK_TOTAL'])
        logging.debug('    Max Fmask threshold: {}'.format(
            ini['SUMMARY']['max_fmask_pct']))
        input_df = input_df[
            input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

    # Filter low count SLC-off images
    if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty:
        logging.debug('    Mininum SLC-off threshold: {}%'.format(
            ini['SUMMARY']['min_slc_off_pct']))
        # logging.debug('    Maximum pixel count: {}'.format(
        #     max_pixel_count))
        slc_off_mask = ((input_df['PLATFORM'] == 'LE07') &
                        ((input_df['YEAR'] >= 2004) |
                         ((input_df['YEAR'] == 2003) &
                          (input_df['DOY'] > 151))))
        slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL'])
        # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count)
        input_df = input_df[((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                             & slc_off_mask) | (~slc_off_mask)]

    if input_df.empty:
        logging.error('  Empty dataframe after filtering, exiting')
        return False

    # Process each zone separately
    logging.debug(input_df.head())
    zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values)))
    for zone_name in zone_name_list:
        logging.info('ZONE: {}'.format(zone_name))
        # The names are currently stored in the CSV as spaces
        zone_output_name = zone_name.replace(' ', '_')
        zone_df = input_df[input_df['ZONE_NAME'] == zone_name]
        if zone_df.empty:
            logging.info('  Empty zone dataframe, skipping zone')
            continue

        # Output file paths
        output_doy_path = os.path.join(
            output_ws, '{}_timeseries_doy.html'.format(zone_output_name))
        output_date_path = os.path.join(
            output_ws, '{}_timeseries_date.html'.format(zone_output_name))

        # # Check for QA field
        # if 'QA' not in zone_df.columns.values:
        #     # logging.warning(
        #     #     '  WARNING: QA field not present in CSV\n'
        #     #     '  To compute QA/QC values, please run "ee_summary_qaqc.py"\n'
        #     #     '  Script will continue with no QA/QC values')
        #     zone_df['QA'] = 0
        #     # raw_input('ENTER')
        #     # logging.error(
        #     #     '\nPlease run the "ee_summary_qaqc.py" script '
        #     #     'to compute QA/QC values\n')
        #     # sys.exit()

        # Check that plot variables are present
        for plot_var in plot_var_list:
            if plot_var not in zone_df.columns.values:
                logging.error('  The variable {} does not exist in the '
                              'dataframe'.format(plot_var))
                sys.exit()

        # if ini['INPUTS']['scene_id_keep_list']:
        #     # Replace XXX with primary ROW value for checking skip list SCENE_ID
        #     scene_id_df = pd.Series([
        #         s.replace('XXX', '{:03d}'.format(int(r)))
        #         for s, r in zip(zone_df['SCENE_ID'], zone_df['ROW'])])
        #     zone_df = zone_df[scene_id_df.isin(
        #         ini['INPUTS']['scene_id_keep_list']).values]
        #     # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        #     # zone_df = zone_df[zone_df['SCENE_ID'].isin(
        #     #     ini['INPUTS']['scene_id_keep_list'])]
        # if ini['INPUTS']['scene_id_skip_list']:
        #     # Replace XXX with primary ROW value for checking skip list SCENE_ID
        #     scene_id_df = pd.Series([
        #         s.replace('XXX', '{:03d}'.format(int(r)))
        #         for s, r in zip(zone_df['SCENE_ID'], zone_df['ROW'])])
        #     zone_df = zone_df[np.logical_not(scene_id_df.isin(
        #         ini['INPUTS']['scene_id_skip_list']).values)]
        #     # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        #     # zone_df = zone_df[np.logical_not(zone_df['SCENE_ID'].isin(
        #     #     ini['INPUTS']['scene_id_skip_list']))]

        # Compute colors for each QA value
        logging.debug('  Building column data source')
        qa_values = sorted(list(set(zone_df['QA'].values)))
        colors = {
            qa: "#%02x%02x%02x" % (int(r), int(g), int(b))
            for qa, (
                r, g, b,
                _) in zip(qa_values, 255 *
                          cm.viridis(mpl.colors.Normalize()(qa_values)))
        }
        logging.debug('  QA values: {}'.format(', '.join(map(str, qa_values))))

        # Unpack the data by QA type to support interactive legends
        sources = dict()
        for qa_value in qa_values:
            qa_df = zone_df[zone_df['QA'] == qa_value]
            qa_data = {
                'INDEX':
                list(range(len(qa_df.index))),
                'PLATFORM':
                qa_df['PLATFORM'],
                'DATE':
                pd.to_datetime(qa_df['DATE']),
                'DATE_STR':
                pd.to_datetime(
                    qa_df['DATE']).map(lambda x: x.strftime('%Y-%m-%d')),
                'DOY':
                qa_df['DOY'].values,
                'QA':
                qa_df['QA'].values,
                'COLOR': [colors[qa] for qa in qa_df['QA'].values]
            }
            for plot_var in plot_var_list:
                if plot_var in qa_df.columns.values:
                    qa_data.update({plot_var: qa_df[plot_var].values})
            sources[qa_value] = bokeh.models.ColumnDataSource(qa_data)

        tooltips = [("LANDSAT", "@PLATFORM"), ("DATE", "@TIME"),
                    ("DOY", "@DOY")]

        # Selection
        hover_circle = Circle(fill_color='#ff0000', line_color='#ff0000')
        selected_circle = Circle(fill_color='COLOR', line_color='COLOR')
        nonselected_circle = Circle(fill_color='#aaaaaa', line_color='#aaaaaa')

        # Plot the data by DOY
        logging.debug('  Building DOY timeseries figure')
        if os.path.isfile(output_doy_path):
            os.remove(output_doy_path)
        output_file(output_doy_path, title=zone_name)

        figure_args = dict(
            plot_width=750,
            plot_height=250,
            title=None,
            tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select",
            # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap",
            active_scroll="xwheel_zoom")
        plot_args = dict(size=4, alpha=0.9, color='COLOR')
        if ini['SUMMARY']['max_qa'] > 0:
            plot_args['legend'] = 'QA'

        figures = []
        for plot_i, plot_var in enumerate(plot_var_list):
            if plot_i == 0:
                f = figure(
                    # x_range=Range1d(1, 366, bounds=(1, 366)),
                    y_axis_label=plot_var,
                    **figure_args)
            else:
                f = figure(x_range=f.x_range,
                           y_axis_label=plot_var,
                           **figure_args)

            for qa, source in sorted(sources.items()):
                r = f.circle('DOY', plot_var, source=source, **plot_args)
                r.hover_glyph = hover_circle
                r.selection_glyph = selected_circle
                r.nonselection_glyph = nonselected_circle
                r.muted_glyph = nonselected_circle

                # DEADBEEF - This will display high QA points as muted
                # if qa > ini['SUMMARY']['max_qa']:
                #     r.muted = True
                #     # r.visible = False

            f.add_tools(bokeh.models.HoverTool(tooltips=tooltips))

            # if ini['SUMMARY']['max_qa'] > 0:
            f.legend.location = "top_left"
            f.legend.click_policy = "hide"
            # f.legend.click_policy = "mute"
            f.legend.orientation = "horizontal"

            figures.append(f)

        # Try to not allow more than 4 plots in a column
        p = gridplot(figures,
                     ncols=len(plot_var_list) // 3,
                     sizing_mode='stretch_both')

        if show_flag:
            show(p)
        save(p)

        # Plot the data by DATE
        logging.debug('  Building date timeseries figure')
        if os.path.isfile(output_date_path):
            os.remove(output_date_path)
        output_file(output_date_path, title=zone_name)

        figure_args = dict(
            plot_width=750,
            plot_height=250,
            title=None,
            tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select",
            # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap",
            active_scroll="xwheel_zoom",
            x_axis_type="datetime",
        )
        plot_args = dict(size=4, alpha=0.9, color='COLOR')
        if ini['SUMMARY']['max_qa'] > 0:
            plot_args['legend'] = 'QA'

        figures = []
        for plot_i, plot_var in enumerate(plot_var_list):
            if plot_i == 0:
                f = figure(
                    # x_range=Range1d(x_limit[0], x_limit[1], bounds=x_limit),
                    y_axis_label=plot_var,
                    **figure_args)
            else:
                f = figure(x_range=f.x_range,
                           y_axis_label=plot_var,
                           **figure_args)

            if plot_var == 'TS':
                f.y_range.bounds = (270, None)

            for qa, source in sorted(sources.items()):
                r = f.circle('DATE', plot_var, source=source, **plot_args)
                r.hover_glyph = hover_circle
                r.selection_glyph = selected_circle
                r.nonselection_glyph = nonselected_circle
                r.muted_glyph = nonselected_circle

                # DEADBEEF - This will display high QA points as muted
                # if qa > ini['SUMMARY']['max_qa']:
                #     r.muted = True
                #     # r.visible = False

            f.add_tools(bokeh.models.HoverTool(tooltips=tooltips))

            # if ini['SUMMARY']['max_qa'] > 0:
            f.legend.location = "top_left"
            f.legend.click_policy = "hide"
            # f.legend.click_policy = "mute"
            f.legend.orientation = "horizontal"

            figures.append(f)

        # Try to not allow more than 4 plots in a column
        p = gridplot(figures,
                     ncols=len(plot_var_list) // 3,
                     sizing_mode='stretch_both')

        if show_flag:
            show(p)
        save(p)

        # Pause after each iteration if show is True
        if show_flag:
            input('Press ENTER to continue')
def main(ini_path, show_flag=False, overwrite_flag=False):
    """Generate Beamer ETg summary figures

    Args:
        ini_path (str):
        show_flag (bool): if True, show the figures in the browser.
            Default is False.
        overwrite_flag (bool): if True, overwrite existing figures
            Default is True (for now)
    """

    logging.info('\nGenerate Beamer ETg summary figures')

    ncolors = [
        '#348ABD', '#7A68A6', '#A60628', '#467821',
        '#CF4457', '#188487', '#E24A33']
    xtick_fs = 8
    ytick_fs = 8
    xlabel_fs = 8
    ylabel_fs = 8
    ms = 2
    figsize = (3.0, 2.5)
    output_folder = 'figures'

    # For unit conversion
    eto_fields = [
        'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
        'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
        'WY_ETO']
    ppt_fields = ['WY_PPT']

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='FIGURES')
    inputs.parse_section(ini, section='BEAMER')

    # Output paths
    output_ws = os.path.join(
        ini['SUMMARY']['output_ws'], output_folder)
    if not os.path.isdir(output_ws):
        os.makedirs(output_ws)

    # Start/end year
    year_list = list(range(
        ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(utils.wrapped_range(
        ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(utils.wrapped_range(
        ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(utils.month_range(
        gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(
        ', '.join(map(str, gridmet_months))))

    # Read in the zonal stats CSV
    logging.debug('  Reading zonal stats CSV file')
    input_df = pd.read_csv(os.path.join(
        ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name']))
    logging.debug(input_df.head())

    logging.debug('  Filtering Landsat dataframe')
    input_df = input_df[input_df['PIXEL_COUNT'] > 0]

    # # This assumes that there are L5/L8 images in the dataframe
    # if not input_df.empty:
    #     max_pixel_count = max(input_df['PIXEL_COUNT'])
    # else:
    #     max_pixel_count = 0

    if ini['INPUTS']['fid_keep_list']:
        input_df = input_df[input_df['ZONE_FID'].isin(
            ini['INPUTS']['fid_keep_list'])]
    if ini['INPUTS']['fid_skip_list']:
        input_df = input_df[~input_df['ZONE_FID'].isin(
            ini['INPUTS']['fid_skip_list'])]

    if year_list:
        input_df = input_df[input_df['YEAR'].isin(year_list)]
    if month_list:
        input_df = input_df[input_df['MONTH'].isin(month_list)]
    if doy_list:
        input_df = input_df[input_df['DOY'].isin(doy_list)]

    if ini['INPUTS']['path_keep_list']:
        input_df = input_df[
            input_df['PATH'].isin(ini['INPUTS']['path_keep_list'])]
    if (ini['INPUTS']['row_keep_list'] and
            ini['INPUTS']['row_keep_list'] != ['XXX']):
        input_df = input_df[
            input_df['ROW'].isin(ini['INPUTS']['row_keep_list'])]

    # Assume the default is for these to be True and only filter if False
    if not ini['INPUTS']['landsat4_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT04']
    if not ini['INPUTS']['landsat5_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT05']
    if not ini['INPUTS']['landsat7_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LE07']
    if not ini['INPUTS']['landsat8_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LC08']

    if ini['INPUTS']['scene_id_keep_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])])
        input_df = input_df[scene_id_df.isin(
            ini['INPUTS']['scene_id_keep_list']).values]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_keep_list'])]
    if ini['INPUTS']['scene_id_skip_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])])
        input_df = input_df[np.logical_not(scene_id_df.isin(
            ini['INPUTS']['scene_id_skip_list']).values)]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[~input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_skip_list'])]

    # Filter by QA/QC value
    if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty:
        logging.debug('    Maximum QA: {0}'.format(
            ini['SUMMARY']['max_qa']))
        input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']]

    # First filter by average cloud score
    if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty:
        logging.debug('    Maximum cloud score: {0}'.format(
            ini['SUMMARY']['max_cloud_score']))
        input_df = input_df[
            input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

    # Filter by Fmask percentage
    if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty:
        input_df['FMASK_PCT'] = 100 * (
            input_df['FMASK_COUNT'] / input_df['FMASK_TOTAL'])
        logging.debug('    Max Fmask threshold: {}'.format(
            ini['SUMMARY']['max_fmask_pct']))
        input_df = input_df[
            input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

    # Filter low count SLC-off images
    if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty:
        logging.debug('    Mininum SLC-off threshold: {}%'.format(
            ini['SUMMARY']['min_slc_off_pct']))
        # logging.debug('    Maximum pixel count: {}'.format(
        #     max_pixel_count))
        slc_off_mask = (
            (input_df['PLATFORM'] == 'LE07') &
            ((input_df['YEAR'] >= 2004) |
             ((input_df['YEAR'] == 2003) & (input_df['DOY'] > 151))))
        slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL'])
        # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count)
        input_df = input_df[
            ((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) |
            (~slc_off_mask)]

    if input_df.empty:
        logging.error('  Empty dataframe after filtering, exiting')
        return False


    # Process each zone separately
    logging.debug(input_df.head())
    zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values)))
    for zone_name in zone_name_list:
        logging.info('ZONE: {}'.format(zone_name))
        # The names are currently stored in the CSV with spaces
        zone_output_name = zone_name.replace(' ', '_')
        zone_df = input_df[input_df['ZONE_NAME'] == zone_name]
        if zone_df.empty:
            logging.info('  Empty zone dataframe, skipping zone')
            continue


        logging.debug('  Computing annual summaries')
        annual_df = zone_df \
            .groupby(['ZONE_NAME', 'YEAR']) \
            .agg({
                'PIXEL_COUNT': ['count', 'mean'],
                'PIXEL_TOTAL': ['mean'],
                'FMASK_COUNT': 'mean',
                'FMASK_TOTAL': 'mean',
                'CLOUD_SCORE': 'mean',
                'ETSTAR_COUNT': 'mean',
                'NDVI_TOA': 'mean',
                'NDWI_TOA': 'mean',
                'ALBEDO_SUR': 'mean',
                'TS': 'mean',
                'EVI_SUR': 'mean',
                'ETSTAR_MEAN': 'mean',
                'ETG_MEAN': 'mean',
                'ETG_LPI': 'mean',
                'ETG_UPI': 'mean',
                'ETG_LCI': 'mean',
                'ETG_UCI': 'mean',
                'ET_MEAN': 'mean',
                'ET_LPI': 'mean',
                'ET_UPI': 'mean',
                'ET_LCI': 'mean',
                'ET_UCI': 'mean',
                'WY_ETO': 'mean',
                'WY_PPT': 'mean'
            })
        annual_df.columns = annual_df.columns.map('_'.join)
        annual_df = annual_df.rename(columns={
            'PIXEL_COUNT_count': 'SCENE_COUNT',
            'PIXEL_COUNT_mean': 'PIXEL_COUNT'})
        annual_df.rename(
            columns=lambda x: str(x).replace('_mean', ''), inplace=True)
        annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int)
        annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int)
        annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int)
        annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int)
        annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int)
        annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int)
        annual_df = annual_df.reset_index()

        # Convert ETo units
        if (ini['BEAMER']['eto_units'] == 'mm' and
                ini['FIGURES']['eto_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['eto_units'] == 'mm' and
                ini['FIGURES']['eto_units'] == 'in'):
            annual_df[eto_fields] /= (25.4)
        elif (ini['BEAMER']['eto_units'] == 'mm' and
                ini['FIGURES']['eto_units'] == 'ft'):
            annual_df[eto_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                    ini['BEAMER']['eto_units'], ini['FIGURES']['eto_units']))
            sys.exit()

        # Convert PPT units
        if (ini['BEAMER']['ppt_units'] == 'mm' and
                ini['FIGURES']['ppt_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['ppt_units'] == 'mm' and
                ini['FIGURES']['ppt_units'] == 'in'):
            annual_df[ppt_fields] /= (25.4)
        elif (ini['BEAMER']['ppt_units'] == 'mm' and
                ini['FIGURES']['ppt_units'] == 'ft'):
            annual_df[ppt_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not '
                 'currently supported, exiting').format(
                    ini['BEAMER']['ppt_units'], ini['FIGURES']['ppt_units']))
            sys.exit()


        logging.debug('  Generating figures')
        zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name]
        year_min, year_max = min(zone_df['YEAR']), max(zone_df['YEAR'])

        # Set default PPT min/max scaling
        ppt_min = 0
        if ini['FIGURES']['ppt_units'] == 'mm':
            ppt_max = 100 * math.ceil((max(zone_df['WY_PPT']) + 100) / 100)
        elif ini['FIGURES']['ppt_units'] == 'ft':
            ppt_max = 0.2 * math.ceil((max(zone_df['WY_PPT']) + 0.1) / 0.2)
        else:
            ppt_max = 1.2 * max(zone_df['WY_PPT'])


        logging.debug('    EVI vs PPT')
        figure_path = os.path.join(
            output_ws, '{}_evi.png'.format(zone_output_name))
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes([0.20, 0.21, 0.65, 0.75])
        ax1.set_xlabel('Year', fontsize=xlabel_fs)
        ax2 = ax1.twinx()
        ax1.plot(
            zone_df['YEAR'].values, zone_df['WY_PPT'],
            marker='o', c='0.5', ms=ms, label='WY PPT')
        ax1.yaxis.tick_right()
        ax1.yaxis.set_label_position("right")
        ax1.set_xlim([year_min - 1, year_max + 1])
        ax1.set_ylim([ppt_min, ppt_max])
        ax1.tick_params(axis='y', labelsize=ytick_fs)
        ax1.tick_params(axis='x', labelsize=xtick_fs)
        ax1.tick_params(axis='x', which='both', top='off')
        ax1.xaxis.set_minor_locator(MultipleLocator(1))
        for tick in ax1.get_xticklabels():
            tick.set_rotation(45)
            tick.set_ha('right')
        ax1.set_ylabel(
            'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']),
            fontsize=ylabel_fs)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['EVI_SUR'].values,
            marker='o', c=ncolors[0], ms=ms,
            label='EVI')
        ax1.plot(0, 0, marker='o', c=ncolors[0], ms=ms, label='EVI')
        ax2.yaxis.tick_left()
        ax2.yaxis.set_label_position("left")
        ax2.set_ylim([
            0.05 * math.floor((min(zone_df['EVI_SUR']) - 0.01) / 0.05),
            0.05 * math.ceil((max(zone_df['EVI_SUR']) + 0.01) / 0.05)])
        ax2.tick_params(axis='y', labelsize=ytick_fs)
        ax2.set_ylabel('EVI [dimensionless]', fontsize=ylabel_fs)
        ax1.legend(
            loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax1, ax2


        logging.debug('    ETo vs PPT')
        figure_path = os.path.join(
            output_ws, '{}_eto.png'.format(zone_output_name))
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75])
        ax1.set_xlabel('Year', fontsize=xlabel_fs)
        ax2 = ax1.twinx()
        ax1.plot(
            zone_df['YEAR'].values, zone_df['WY_PPT'],
            marker='o', c='0.5', ms=ms, label='WY PPT')
        ax1.yaxis.tick_right()
        ax1.yaxis.set_label_position("right")
        ax1.set_xlim([year_min - 1, year_max + 1])
        ax1.set_ylim([ppt_min, ppt_max])
        ax1.tick_params(axis='y', labelsize=ytick_fs)
        ax1.tick_params(axis='x', labelsize=xtick_fs)
        ax1.tick_params(axis='x', which='both', top='off')
        ax1.xaxis.set_minor_locator(MultipleLocator(1))
        for tick in ax1.get_xticklabels():
            tick.set_rotation(45)
            tick.set_ha('right')
        ax1.set_ylabel(
            'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']),
            fontsize=ylabel_fs)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['WY_ETO'].values,
            marker='o', c=ncolors[1], ms=ms, label='ETo')
        ax1.plot(0, 0, marker='o', c=ncolors[1], ms=ms, label='ETo')
        ax2.yaxis.tick_left()
        ax2.yaxis.set_label_position("left")
        ax2.set_ylim([
            max(0, 0.9 * min(zone_df['WY_ETO'])),
            1.1 * max(zone_df['WY_ETO'])])
        # ax2.set_ylim([
        #     max(0, 100 * math.floor((min(zone_df['WY_ETO']) - 100) / 100)),
        #     100 * math.ceil((max(zone_df['WY_ETO']) + 100) / 100)])
        ax2.tick_params(axis='y', labelsize=ytick_fs)
        ax2.set_ylabel(
            'ETo [{}/yr]'.format(ini['FIGURES']['eto_units']),
            fontsize=ylabel_fs)
        ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax1, ax2


        logging.debug('    ET vs PPT')
        figure_path = os.path.join(
            output_ws, '{}_et.png'.format(zone_output_name))
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75])
        ax1.set_xlabel('Year', fontsize=xlabel_fs)
        ax2 = ax1.twinx()
        ax1.plot(
            zone_df['YEAR'].values, zone_df['WY_PPT'],
            marker='o', c='0.5', ms=ms, label='WY PPT')
        ax1.yaxis.tick_right()
        ax1.yaxis.set_label_position("right")
        ax1.set_xlim([year_min - 1, year_max + 1])
        ax1.set_ylim([ppt_min, ppt_max])
        ax1.tick_params(axis='y', labelsize=ytick_fs)
        ax1.tick_params(axis='x', labelsize=xtick_fs)
        ax1.tick_params(axis='x', which='both', top='off')
        ax1.xaxis.set_minor_locator(MultipleLocator(1))
        for tick in ax1.get_xticklabels():
            tick.set_rotation(45)
            tick.set_ha('right')
        ax1.set_ylabel(
            'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']),
            fontsize=ylabel_fs)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ET_UCI'].values,
            marker='', c=ncolors[2], alpha=0.5, lw=0.75)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ET_LCI'].values,
            marker='', c=ncolors[2], alpha=0.5, lw=0.75)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ET_MEAN'].values,
            marker='o', c=ncolors[2], ms=ms, label='ET')
        ax1.plot(0, 0, marker='o', c=ncolors[2], ms=ms, label='ET')
        ax2.yaxis.tick_left()
        ax2.yaxis.set_label_position("left")
        ax2.set_ylim([
            max(0, 0.9 * min(zone_df['ET_LCI'])),
            1.1 * max(zone_df['ET_UCI'])])
        # ax2.set_ylim([
        #     max(0, 100 * math.floor((min(zone_df['ET_MEAN']) - 100) / 100)),
        #     100 * math.ceil((max(zone_df['ET_MEAN']) + 100) / 100)])
        ax2.tick_params(axis='y', labelsize=ytick_fs)
        ax2.set_ylabel(
            'ET [{}/yr]'.format(ini['FIGURES']['eto_units']),
            fontsize=ylabel_fs)
        ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax1, ax2


        logging.debug('    ETg vs PPT')
        figure_path = os.path.join(
            output_ws, '{}_etg.png'.format(zone_output_name))
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75])
        ax1.set_xlabel('Year', fontsize=xlabel_fs)
        ax2 = ax1.twinx()
        ax1.plot(
            zone_df['YEAR'].values, zone_df['WY_PPT'],
            marker='o', c='0.5', ms=ms, label='WY PPT')
        ax1.yaxis.tick_right()
        ax1.yaxis.set_label_position("right")
        ax1.set_xlim([year_min - 1, year_max + 1])
        ax1.set_ylim([ppt_min, ppt_max])
        ax1.tick_params(axis='y', labelsize=ytick_fs)
        ax1.tick_params(axis='x', labelsize=xtick_fs)
        ax1.tick_params(axis='x', which='both', top='off')
        ax1.xaxis.set_minor_locator(MultipleLocator(1))
        for tick in ax1.get_xticklabels():
            tick.set_rotation(45)
            tick.set_ha('right')
        ax1.set_ylabel(
            'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']),
            fontsize=ylabel_fs)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ETG_UCI'].values,
            marker='', c=ncolors[3], alpha=0.5, lw=0.75)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ETG_LCI'].values,
            marker='', c=ncolors[3], alpha=0.5, lw=0.75)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ETG_MEAN'].values,
            marker='o', c=ncolors[3], ms=ms, label='ETg')
        ax1.plot(0, 0, marker='o', c=ncolors[3], ms=ms, label='ETg')
        ax2.yaxis.tick_left()
        ax2.yaxis.set_label_position("left")
        ax2.set_ylim([
            max(0, 0.9 * min(zone_df['ETG_LCI'])),
            1.1 * max(zone_df['ETG_UCI'])])
        # ax2.set_ylim([
        #     max(0, 100 * math.floor((min(zone_df['ETG_MEAN']) - 100) / 100)),
        #     100 * math.ceil((max(zone_df['ETG_MEAN']) + 100) / 100)])
        ax2.tick_params(axis='y', labelsize=ytick_fs)
        ax2.set_ylabel(
            'ETg [{}/yr]'.format(ini['FIGURES']['eto_units']),
            fontsize=ylabel_fs)
        ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax1, ax2


        logging.debug('    Complimentary')
        figure_path = os.path.join(
            output_ws, '{}_complimentary.png'.format(zone_output_name))
        fig = plt.figure(figsize=(3, 2.5))
        ax = fig.add_axes([0.18, 0.16, 0.78, 0.80])
        # ax = fig.add_axes([0.18, 0.21, 0.67, 0.70])
        ax.plot(
            zone_df['WY_PPT'].values, zone_df['WY_ETO'].values,
            linestyle='', marker='o', c=ncolors[1], ms=3, label='ETo')
        ax.plot(
            zone_df['WY_PPT'].values, zone_df['ET_MEAN'].values,
            linestyle='', marker='o', c=ncolors[2], ms=3, label='ET')
        # xmax = 100 * math.ceil(max(zone_df['WY_PPT']) / 100)
        # ymax = 200 * math.ceil((max(zone_df['WY_ETO']) + 200) / 200)
        ax.set_xlim([ppt_min, ppt_max])
        ax.set_ylim([0, 1.2 * max(zone_df['WY_ETO'])])
        ax.tick_params(axis='y', labelsize=ytick_fs)
        ax.tick_params(axis='x', labelsize=xtick_fs)
        ax.tick_params(axis='x', which='both', top='off')
        ax.tick_params(axis='y', which='both', right='off')
        ax.set_xlabel('PPT [{}/yr]'.format(
            ini['FIGURES']['ppt_units']), fontsize=xlabel_fs)
        ax.set_ylabel('ET and ETo [{}/yr]'.format(
            ini['FIGURES']['eto_units']), fontsize=ylabel_fs)
        ax.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax
def main(ini_path, overwrite_flag=True):
    """Generate Beamer ETg summary tables

    Args:
        ini_path (str):
        overwrite_flag (bool): if True, overwrite existing tables
            Default is True (for now)
    """

    logging.info('\nGenerate Beamer ETg summary tables')

    # # Eventually get from INI (like ini['BEAMER']['landsat_products'])
    # daily_fields = [
    #     'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW',
    #     'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR',
    #     'PIXEL_COUNT', 'ETSTAR_COUNT',
    #     'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN',
    #     'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
    #     'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
    #     'ETO', 'PPT']
    # annual_fields = [
    #     'SCENE_COUNT', 'PIXEL_COUNT', 'ETSTAR_COUNT',
    #     'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS',
    #     'EVI_SUR_MEAN', 'EVI_SUR_MEDIAN', 'EVI_SUR_MIN', 'EVI_SUR_MAX',
    #     'ETSTAR_MEAN',
    #     'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
    #     'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
    #     'ETO', 'PPT']

    # For unit conversion
    eto_fields = [
        'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN',
        'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'ETO'
    ]
    ppt_fields = ['PPT']

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='BEAMER')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='TABLES')

    # Hardcode GRIDMET month range to the water year
    ini['SUMMARY']['gridmet_start_month'] = 10
    ini['SUMMARY']['gridmet_end_month'] = 9

    # Output paths
    output_daily_path = os.path.join(
        ini['SUMMARY']['output_ws'],
        ini['BEAMER']['output_name'].replace('.csv', '_daily.xlsx'))
    output_annual_path = os.path.join(
        ini['SUMMARY']['output_ws'],
        ini['BEAMER']['output_name'].replace('.csv', '_annual.xlsx'))

    # Check if files already exist
    if overwrite_flag:
        if os.path.isfile(output_daily_path):
            os.remove(output_daily_path)
        if os.path.isfile(output_annual_path):
            os.remove(output_annual_path)
    else:
        if (os.path.isfile(output_daily_path)
                and os.path.isfile(output_annual_path)):
            logging.info('\nOutput files already exist and '
                         'overwrite is False, exiting')
            return True

    # Start/end year
    year_list = list(
        range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(
        utils.month_range(gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(', '.join(
        map(str, gridmet_months))))

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # # Filter features by FID before merging geometries
    # if ini['INPUTS']['fid_keep_list']:
    #     landsat_df = landsat_df[landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_keep_list'])]
    # if ini['INPUTS']['fid_skip_list']:
    #     landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_skip_list'])]

    logging.info('\nProcessing zones')
    zone_df_dict = {}
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        zone_stats_ws = os.path.join(ini['ZONAL_STATS']['output_ws'],
                                     zone_name)
        if not os.path.isdir(zone_stats_ws):
            logging.debug(
                '  Folder {} does not exist, skipping'.format(zone_stats_ws))
            continue

        # Input paths
        landsat_daily_path = os.path.join(
            zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name))
        gridmet_daily_path = os.path.join(
            zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name))
        gridmet_monthly_path = os.path.join(
            zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name))
        if not os.path.isfile(landsat_daily_path):
            logging.error('  Landsat daily CSV does not exist, skipping zone')
            continue
        elif (not os.path.isfile(gridmet_daily_path)
              and not os.path.isfile(gridmet_monthly_path)):
            logging.error(
                '  GRIDMET daily or monthly CSV does not exist, skipping zone')
            continue
            # DEADBEEF - Eventually support generating only Landsat figures
            # logging.error(
            #     '  GRIDMET daily and/or monthly CSV files do not exist.\n'
            #     '  ETo and PPT will not be processed.')

        logging.debug('  Reading Landsat CSV')
        landsat_df = pd.read_csv(landsat_daily_path)

        logging.debug('  Filtering Landsat dataframe')
        landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0]

        # QA field should have been written in zonal stats code
        # Eventually this block can be removed
        if 'QA' not in landsat_df.columns.values:
            landsat_df['QA'] = 0

        # # This assumes that there are L5/L8 images in the dataframe
        # if not landsat_df.empty:
        #     max_pixel_count = max(landsat_df['PIXEL_COUNT'])
        # else:
        #     max_pixel_count = 0

        if year_list:
            landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)]
        if month_list:
            landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)]
        if doy_list:
            landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)]

        # Assume the default is for these to be True and only filter if False
        if not ini['INPUTS']['landsat4_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04']
        if not ini['INPUTS']['landsat5_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05']
        if not ini['INPUTS']['landsat7_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07']
        if not ini['INPUTS']['landsat8_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08']

        if ini['INPUTS']['path_keep_list']:
            landsat_df = landsat_df[landsat_df['PATH'].isin(
                ini['INPUTS']['path_keep_list'])]
        if (ini['INPUTS']['row_keep_list']
                and ini['INPUTS']['row_keep_list'] != ['XXX']):
            landsat_df = landsat_df[landsat_df['ROW'].isin(
                ini['INPUTS']['row_keep_list'])]

        if ini['INPUTS']['scene_id_keep_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[scene_id_df.isin(
                ini['INPUTS']['scene_id_keep_list']).values]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_keep_list'])]
        if ini['INPUTS']['scene_id_skip_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[np.logical_not(
                scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_skip_list']))]

        # Filter by QA/QC value
        if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty:
            logging.debug('    Maximum QA: {0}'.format(
                ini['SUMMARY']['max_qa']))
            landsat_df = landsat_df[
                landsat_df['QA'] <= ini['SUMMARY']['max_qa']]

        # Filter by average cloud score
        if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty:
            logging.debug('    Maximum cloud score: {0}'.format(
                ini['SUMMARY']['max_cloud_score']))
            landsat_df = landsat_df[
                landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

        # Filter by Fmask percentage
        if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty:
            landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] /
                                             landsat_df['FMASK_TOTAL'])
            logging.debug('    Max Fmask threshold: {}'.format(
                ini['SUMMARY']['max_fmask_pct']))
            landsat_df = landsat_df[
                landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

        # Filter low count SLC-off images
        if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty:
            logging.debug('    Mininum SLC-off threshold: {}%'.format(
                ini['SUMMARY']['min_slc_off_pct']))
            # logging.debug('    Maximum pixel count: {}'.format(
            #     max_pixel_count))
            slc_off_mask = ((landsat_df['PLATFORM'] == 'LE07') &
                            ((landsat_df['YEAR'] >= 2004) |
                             ((landsat_df['YEAR'] == 2003) &
                              (landsat_df['DOY'] > 151))))
            slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] /
                                 landsat_df['PIXEL_TOTAL'])
            # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count)
            landsat_df = landsat_df[(
                (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                & slc_off_mask) | (~slc_off_mask)]

        if landsat_df.empty:
            logging.error(
                '  Empty Landsat dataframe after filtering, skipping zone')
            continue

        # Aggregate GRIDMET (to water year)
        if os.path.isfile(gridmet_monthly_path):
            logging.debug('  Reading montly GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_monthly_path)
        elif os.path.isfile(gridmet_daily_path):
            logging.debug('  Reading daily GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_daily_path)

        logging.debug('  Computing GRIDMET summaries')
        # Summarize GRIDMET for target months year
        if (gridmet_start_month in [10, 11, 12]
                and gridmet_end_month in [10, 11, 12]):
            month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) &
                          (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
        elif (gridmet_start_month in [10, 11, 12]
              and gridmet_end_month not in [10, 11, 12]):
            month_mask = gridmet_df['MONTH'] >= gridmet_start_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
            month_mask = gridmet_df['MONTH'] <= gridmet_end_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        else:
            month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) &
                          (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        # GROUP_YEAR for rows not in the GRIDMET month range will be NAN
        gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])]

        if year_list:
            gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)]

        if gridmet_df.empty:
            logging.error(
                '    Empty GRIDMET dataframe after filtering by year')
            continue

        # Group GRIDMET data by user specified range (default is water year)
        gridmet_group_df = gridmet_df \
            .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR']) \
            .agg({'ETO': np.sum, 'PPT': np.sum}) \
            .reset_index() \
            .sort_values(by='GROUP_YEAR')
        # .rename(columns={'ETO': 'ETO', 'PPT': 'PPT'}) \
        # Rename wasn't working when chained...
        gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        gridmet_group_df['YEAR'] = gridmet_group_df['YEAR'].astype(int)

        # # Group GRIDMET data by month
        # gridmet_month_df = gridmet_df\
        #     .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR', 'MONTH']) \
        #     .agg({'ETO': np.sum, 'PPT': np.sum}) \
        #     .reset_index() \
        #     .sort_values(by=['GROUP_YEAR', 'MONTH'])
        # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        # # Rename monthly PPT columns
        # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str)
        # # Pivot rows up to separate columns
        # gridmet_month_df = gridmet_month_df.pivot_table(
        #     'PPT', ['ZONE_NAME', 'YEAR'], 'MONTH')
        # gridmet_month_df.reset_index(inplace=True)
        # columns = ['ZONE_NAME', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months]
        # gridmet_month_df = gridmet_month_df[columns]
        # del gridmet_month_df.index.name

        # Merge Landsat and GRIDMET collections
        zone_df = landsat_df.merge(gridmet_group_df,
                                   on=['ZONE_NAME', 'ZONE_FID', 'YEAR'])
        if zone_df is None or zone_df.empty:
            logging.info('  Empty zone dataframe, not generating figures')
            continue

        # Compute ETg
        zone_df['ETG_MEAN'] = zone_df['ETSTAR_MEAN'] * (zone_df['ETO'] -
                                                        zone_df['PPT'])
        zone_df['ETG_LPI'] = zone_df['ETSTAR_LPI'] * (zone_df['ETO'] -
                                                      zone_df['PPT'])
        zone_df['ETG_UPI'] = zone_df['ETSTAR_UPI'] * (zone_df['ETO'] -
                                                      zone_df['PPT'])
        zone_df['ETG_LCI'] = zone_df['ETSTAR_LCI'] * (zone_df['ETO'] -
                                                      zone_df['PPT'])
        zone_df['ETG_UCI'] = zone_df['ETSTAR_UCI'] * (zone_df['ETO'] -
                                                      zone_df['PPT'])

        # Compute ET
        zone_df['ET_MEAN'] = zone_df['ETG_MEAN'] + zone_df['PPT']
        zone_df['ET_LPI'] = zone_df['ETG_LPI'] + zone_df['PPT']
        zone_df['ET_UPI'] = zone_df['ETG_UPI'] + zone_df['PPT']
        zone_df['ET_LCI'] = zone_df['ETG_LCI'] + zone_df['PPT']
        zone_df['ET_UCI'] = zone_df['ETG_UCI'] + zone_df['PPT']

        # Append zone dataframes
        zone_df_dict[zone_name] = zone_df

    # Export each zone to a separate tab
    if not os.path.isfile(output_daily_path):
        logging.info('\nWriting daily values to Excel')
        excel_f = ExcelWriter(output_daily_path)
        for zone_name, zone_df in sorted(zone_df_dict.items()):
            logging.info('  {}'.format(zone_name))
            zone_df.to_excel(excel_f,
                             zone_name,
                             index=False,
                             float_format='%.4f')
            # zone_df.to_excel(excel_f, zone_name, index=False)
            del zone_df
        excel_f.save()

    if not os.path.isfile(output_annual_path):
        logging.info('\nComputing annual summaries')
        annual_df = pd.concat(list(zone_df_dict.values())) \
            .groupby(['ZONE_NAME', 'YEAR']) \
            .agg({
                'PIXEL_COUNT': ['count', 'mean'],
                'PIXEL_TOTAL': ['mean'],
                'FMASK_COUNT': 'mean',
                'FMASK_TOTAL': 'mean',
                'CLOUD_SCORE': 'mean',
                'ETSTAR_COUNT': 'mean',
                'NDVI_TOA': 'mean',
                'NDWI_TOA': 'mean',
                'ALBEDO_SUR': 'mean',
                'TS': 'mean',
                # 'EVI_SUR': 'mean',
                'EVI_SUR': ['mean', 'median', 'min', 'max'],
                'ETSTAR_MEAN': 'mean',
                'ETG_MEAN': 'mean',
                'ETG_LPI': 'mean',
                'ETG_UPI': 'mean',
                'ETG_LCI': 'mean',
                'ETG_UCI': 'mean',
                'ET_MEAN': 'mean',
                'ET_LPI': 'mean',
                'ET_UPI': 'mean',
                'ET_LCI': 'mean',
                'ET_UCI': 'mean',
                'ETO': 'mean',
                'PPT': 'mean'
            })
        annual_df.columns = annual_df.columns.map('_'.join)
        annual_df = annual_df.rename(columns={
            'PIXEL_COUNT_count': 'SCENE_COUNT',
            'PIXEL_COUNT_mean': 'PIXEL_COUNT'
        })
        annual_df = annual_df.rename(
            columns={
                'EVI_SUR_mean': 'EVI_SUR_MEAN',
                'EVI_SUR_median': 'EVI_SUR_MEDIAN',
                'EVI_SUR_min': 'EVI_SUR_MIN',
                'EVI_SUR_max': 'EVI_SUR_MAX'
            })
        annual_df.rename(columns=lambda x: str(x).replace('_mean', ''),
                         inplace=True)
        annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int)
        annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int)
        annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int)
        annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int)
        annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int)
        annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int)
        annual_df = annual_df.reset_index()

        # Convert ETo units
        if (ini['BEAMER']['eto_units'] == 'mm'
                and ini['TABLES']['eto_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['eto_units'] == 'mm'
              and ini['TABLES']['eto_units'] == 'in'):
            annual_df[eto_fields] /= (25.4)
        elif (ini['BEAMER']['eto_units'] == 'mm'
              and ini['TABLES']['eto_units'] == 'ft'):
            annual_df[eto_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                     ini['BEAMER']['eto_units'], ini['TABLES']['eto_units']))
            sys.exit()

        # Convert PPT units
        if (ini['BEAMER']['ppt_units'] == 'mm'
                and ini['TABLES']['ppt_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['ppt_units'] == 'mm'
              and ini['TABLES']['ppt_units'] == 'in'):
            annual_df[ppt_fields] /= (25.4)
        elif (ini['BEAMER']['ppt_units'] == 'mm'
              and ini['TABLES']['ppt_units'] == 'ft'):
            annual_df[ppt_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                     ini['BEAMER']['ppt_units'], ini['TABLES']['ppt_units']))
            sys.exit()

        logging.info('\nWriting annual values to Excel')
        excel_f = ExcelWriter(output_annual_path)
        for zone_name in sorted(zone_df_dict.keys()):
            logging.info('  {}'.format(zone_name))
            zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name]
            zone_df.to_excel(excel_f,
                             zone_name,
                             index=False,
                             float_format='%.4f')
            del zone_df
        excel_f.save()
def main(ini_path=None, overwrite_flag=True, show_flag=False):
    """Generate summary figures

    Args:
        ini_path (str): file path of the control file
        overwrite_flag (bool): if True, overwrite existing figures
        show_flag (bool): if True, show figures as they are being built
    """

    logging.info('\nGenerate summary figures')

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='FIGURES')

    # Band options
    band_list = [
        'albedo_sur',
        'cloud_score',
        'eto',
        'evi_sur',
        'fmask_count',
        'fmask_total',
        'ndvi_sur',
        'ndvi_toa',
        'ndwi_green_nir_sur',
        'ndwi_green_nir_toa',
        'ndwi_green_swir1_sur',
        'ndwi_green_swir1_toa',
        'ndwi_nir_swir1_sur',
        'ndwi_nir_swir1_toa',
        'ndwi_swir1_green_sur',
        'ndwi_swir1_green_toa',
        # 'ndwi_sur', 'ndwi_toa',
        'pixel_count',
        'pixel_total',
        'ppt',
        'tc_bright',
        'tc_green',
        'tc_wet',
        'ts'
    ]
    band_name = {
        'albedo_sur': 'Albedo',
        'cloud_score': 'Cloud Score',
        'eto': 'ETo',
        'evi_sur': 'EVI',
        'fmask_count': 'Fmask Count',
        'fmask_total': 'Fmask Total',
        'ndvi_sur': 'NDVI',
        'ndvi_toa': 'NDVI (TOA)',
        'ndwi_green_nir_sur': 'NDWI (Green, NIR)',
        'ndwi_green_nir_toa': 'NDWI (Green, NIR) (TOA)',
        'ndwi_green_swir1_sur': 'NDWI (Green, SWIR1)',
        'ndwi_green_swir1_toa': 'NDWI (Green, SWIR1) (TOA)',
        'ndwi_nir_swir1_sur': 'NDWI (NIR, SWIR1)',
        'ndwi_nir_swir1_toa': 'NDWI (NIR, SWIR1) (TOA)',
        'ndwi_swir1_green_sur': 'NDWI (SWIR1, Green)',
        'ndwi_swir1_green_toa': 'NDWI (SWIR1, Green) (TOA)',
        # 'ndwi_sur': 'NDWI (SWIR1, GREEN)',
        # 'ndwi_toa': 'NDWI (SWIR1, GREEN) (TOA)',
        'pixel_count': 'Pixel Count',
        'pixel_total': 'Pixel Total',
        'ppt': 'PPT',
        'tc_bright': 'Brightness',
        'tc_green': 'Greeness',
        'tc_wet': 'Wetness',
        'ts': 'Ts'
    }
    band_unit = {
        'albedo_sur': 'dimensionless',
        'cloud_score': 'dimensionless',
        'evi_sur': 'dimensionless',
        'eto': 'mm',
        'fmask_count': 'dimensionless',
        'fmask_total': 'dimensionless',
        'ndvi_sur': 'dimensionless',
        'ndvi_toa': 'dimensionless',
        'ndwi_green_nir_sur': 'dimensionless',
        'ndwi_green_nir_toa': 'dimensionless',
        'ndwi_green_swir1_sur': 'dimensionless',
        'ndwi_green_swir1_toa': 'dimensionless',
        'ndwi_nir_swir1_sur': 'dimensionless',
        'ndwi_nir_swir1_toa': 'dimensionless',
        'ndwi_swir1_green_sur': 'dimensionless',
        'ndwi_swir1_green_toa': 'dimensionless',
        # 'ndwi_sur': 'dimensionless',
        # 'ndwi_toa': 'dimensionless',
        'pixel_count': 'dimensionless',
        'pixel_total': 'dimensionless',
        'ppt': 'mm',
        'tc_bright': 'dimensionless',
        'tc_green': 'dimensionless',
        'tc_wet': 'dimensionless',
        'ts': 'K',
    }
    band_color = {
        'albedo_sur': '#CF4457',
        'cloud_score': '0.5',
        'eto': '#348ABD',
        'fmask_count': '0.5',
        'fmask_total': '0.5',
        'evi_sur': '#FFA500',
        'ndvi_sur': '#A60628',
        'ndvi_toa': '#A60628',
        'ndwi_green_nir_sur': '#4eae4b',
        'ndwi_green_nir_toa': '#4eae4b',
        'ndwi_green_swir1_sur': '#4eae4b',
        'ndwi_green_swir1_toa': '#4eae4b',
        'ndwi_nir_swir1_sur': '#4eae4b',
        'ndwi_nir_swir1_toa': '#4eae4b',
        'ndwi_swir1_green_sur': '#4eae4b',
        'ndwi_swir1_green_toa': '#4eae4b',
        # 'ndwi_sur': '#4eae4b',
        # 'ndwi_toa': '#4eae4b',
        'pixel_count': '0.5',
        'pixel_total': '0.5',
        'ppt': '0.5',
        'tc_bright': '#E24A33',
        'tc_green': '#E24A33',
        'tc_wet': '#E24A33',
        'ts': '#188487'
    }

    # A couple of color palettes to sample from
    # import seaborn as sns
    # print(sns.color_palette('hls', 20).as_hex())
    # print(sns.color_palette('husl', 20).as_hex())
    # print(sns.color_palette('hsv', 20).as_hex())
    # print(sns.color_palette('Set1', 20).as_hex())
    # print(sns.color_palette('Set2', 20).as_hex())

    # Hardcoded plot options
    figures_folder = 'figures'
    fig_type = 'large'

    plot_dict = dict()

    # Center y-labels in figure window (instead of centering on ticks/axes)
    plot_dict['center_ylabel'] = False

    # Axes percentages must be 0-1
    plot_dict['timeseries_band_ax_pct'] = [0.3, 0.92]
    plot_dict['timeseries_ppt_ax_pct'] = [0.0, 0.35]
    plot_dict['complement_band_ax_pct'] = [0.0, 0.5]
    plot_dict['complement_eto_ax_pct'] = [0.4, 1.0]

    if fig_type.lower() == 'large':
        plot_dict['title_fs'] = 12
        plot_dict['xtick_fs'] = 10
        plot_dict['ytick_fs'] = 10
        plot_dict['xlabel_fs'] = 10
        plot_dict['ylabel_fs'] = 10
        plot_dict['legend_fs'] = 10
        plot_dict['ts_ms'] = 3
        plot_dict['comp_ms'] = 4
        plot_dict['timeseries_ax'] = [0.12, 0.13, 0.78, 0.81]
        plot_dict['scatter_ax'] = [0.12, 0.10, 0.82, 0.84]
        plot_dict['complement_ax'] = [0.12, 0.10, 0.78, 0.84]
        plot_dict['fig_size'] = (6.0, 5.0)
    elif fig_type.lower() == 'small':
        plot_dict['title_fs'] = 10
        plot_dict['xtick_fs'] = 8
        plot_dict['ytick_fs'] = 8
        plot_dict['xlabel_fs'] = 8
        plot_dict['ylabel_fs'] = 8
        plot_dict['legend_fs'] = 8
        plot_dict['ts_ms'] = 1.5
        plot_dict['comp_ms'] = 2
        plot_dict['timeseries_ax'] = [0.18, 0.21, 0.67, 0.70]
        plot_dict['scatter_ax'] = [0.18, 0.21, 0.67, 0.70]
        plot_dict['complement_ax'] = [0.18, 0.16, 0.67, 0.75]
        plot_dict['fig_size'] = (3.0, 2.5)
    plot_dict['fig_dpi'] = 300
    plot_dict['show'] = show_flag
    plot_dict['overwrite'] = overwrite_flag

    # CSV parameters
    landsat_annual_fields = [
        'ZONE_FID',
        'ZONE_NAME',
        'YEAR',
        'SCENE_COUNT',
        'CLOUD_SCORE',
        'PIXEL_COUNT',
        'PIXEL_TOTAL',
        'FMASK_COUNT',
        'FMASK_TOTAL',
        'TS',
        'ALBEDO_SUR',
        'NDVI_TOA',
        'NDVI_SUR',
        'EVI_SUR',
        'NDWI_GREEN_NIR_SUR',
        'NDWI_GREEN_SWIR1_SUR',
        'NDWI_NIR_SWIR1_SUR',
        # 'NDWI_GREEN_NIR_TOA', 'NDWI_GREEN_SWIR1_TOA', 'NDWI_NIR_SWIR1_TOA',
        # 'NDWI_SWIR1_GREEN_TOA', 'NDWI_SWIR1_GREEN_SUR',
        # 'NDWI_TOA', 'NDWI_SUR',
        'TC_BRIGHT',
        'TC_GREEN',
        'TC_WET'
    ]

    # Add merged row XXX to keep list
    ini['INPUTS']['row_keep_list'].append('XXX')

    # Check figure bands
    timeseries_bands = ini['FIGURES']['timeseries_bands']
    scatter_bands = ini['FIGURES']['scatter_bands']
    complementary_bands = ini['FIGURES']['complementary_bands']
    if timeseries_bands:
        logging.info('Timeseries Bands:')
        for band in timeseries_bands:
            if band not in band_list:
                logging.info(
                    '  Invalid timeseries band: {}, exiting'.format(band))
                return False
            logging.info('  {}'.format(band))
    if scatter_bands:
        logging.info('Scatter Bands (x:y):')
        for band_x, band_y in scatter_bands:
            if band_x not in band_list:
                logging.info(
                    '  Invalid scatter band: {}, exiting'.format(band_x))
                return False
            elif band_y not in band_list:
                logging.info('  Invalid band: {}, exiting'.format(band_y))
                return False
            logging.info('  {}:{}'.format(band_x, band_y))
    if complementary_bands:
        logging.info('Complementary Bands:')
        for band in complementary_bands:
            if band not in band_list:
                logging.info(
                    '  Invalid complementary band: {}, exiting'.format(band))
                return False
            logging.info('  {}'.format(band))

    # Add input plot options
    plot_dict['ppt_plot_type'] = ini['FIGURES']['ppt_plot_type']
    plot_dict['scatter_best_fit'] = ini['FIGURES']['scatter_best_fit']

    # Start/end year
    year_list = list(
        range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(
        utils.month_range(gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(', '.join(
        map(str, gridmet_months))))

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # # Filter features by FID before merging geometries
    # if ini['INPUTS']['fid_keep_list']:
    #     landsat_df = landsat_df[landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_keep_list'])]
    # if ini['INPUTS']['fid_skip_list']:
    #     landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_skip_list'])]

    logging.info('\nProcessing zones')
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        zone_stats_ws = os.path.join(ini['ZONAL_STATS']['output_ws'],
                                     zone_name)
        zone_figures_ws = os.path.join(ini['SUMMARY']['output_ws'], zone_name,
                                       figures_folder)
        if not os.path.isdir(zone_stats_ws):
            logging.debug(
                '  Folder {} does not exist, skipping'.format(zone_stats_ws))
            continue
        elif not os.path.isdir(zone_figures_ws):
            os.makedirs(zone_figures_ws)

        # Input paths
        landsat_daily_path = os.path.join(
            zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name))
        gridmet_daily_path = os.path.join(
            zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name))
        gridmet_monthly_path = os.path.join(
            zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name))
        if not os.path.isfile(landsat_daily_path):
            logging.error('  Landsat daily CSV does not exist, skipping zone')
            continue
        elif (not os.path.isfile(gridmet_daily_path)
              and not os.path.isfile(gridmet_monthly_path)):
            logging.error(
                '  GRIDMET daily or monthly CSV does not exist, skipping zone')
            continue
            # DEADBEEF - Eventually support generating only Landsat figures
            # logging.error(
            #     '  GRIDMET daily and/or monthly CSV files do not exist.\n'
            #     '  ETo and PPT will not be processed.')

        # Output paths
        landsat_summary_path = os.path.join(
            zone_figures_ws, '{}_landsat_figures.csv'.format(zone_name))
        gridmet_summary_path = os.path.join(
            zone_figures_ws, '{}_gridmet_figures.csv'.format(zone_name))
        zone_summary_path = os.path.join(
            zone_figures_ws, '{}_zone_figures.csv'.format(zone_name))

        logging.debug('  Reading Landsat CSV')
        landsat_df = pd.read_csv(landsat_daily_path)

        logging.debug('  Filtering Landsat dataframe')
        landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0]

        # QA field should have been written in zonal stats code
        # Eventually this block can be removed
        if 'QA' not in landsat_df.columns.values:
            landsat_df['QA'] = 0

        # # This assumes that there are L5/L8 images in the dataframe
        # if not landsat_df.empty:
        #     max_pixel_count = max(landsat_df['PIXEL_COUNT'])
        # else:
        #     max_pixel_count = 0

        if year_list:
            landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)]
        if month_list:
            landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)]
        if doy_list:
            landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)]

        # Assume the default is for these to be True and only filter if False
        if not ini['INPUTS']['landsat4_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04']
        if not ini['INPUTS']['landsat5_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05']
        if not ini['INPUTS']['landsat7_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07']
        if not ini['INPUTS']['landsat8_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08']

        if ini['INPUTS']['path_keep_list']:
            landsat_df = landsat_df[landsat_df['PATH'].isin(
                ini['INPUTS']['path_keep_list'])]
        if (ini['INPUTS']['row_keep_list']
                and ini['INPUTS']['row_keep_list'] != ['XXX']):
            landsat_df = landsat_df[landsat_df['ROW'].isin(
                ini['INPUTS']['row_keep_list'])]

        if ini['INPUTS']['scene_id_keep_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[scene_id_df.isin(
                ini['INPUTS']['scene_id_keep_list']).values]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_keep_list'])]
        if ini['INPUTS']['scene_id_skip_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[np.logical_not(
                scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_skip_list']))]

        # Filter by QA/QC value
        if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty:
            logging.debug('    Maximum QA: {0}'.format(
                ini['SUMMARY']['max_qa']))
            landsat_df = landsat_df[
                landsat_df['QA'] <= ini['SUMMARY']['max_qa']]

        # Filter by average cloud score
        if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty:
            logging.debug('    Maximum cloud score: {0}'.format(
                ini['SUMMARY']['max_cloud_score']))
            landsat_df = landsat_df[
                landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

        # Filter by Fmask percentage
        if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty:
            landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] /
                                             landsat_df['FMASK_TOTAL'])
            logging.debug('    Max Fmask threshold: {}'.format(
                ini['SUMMARY']['max_fmask_pct']))
            landsat_df = landsat_df[
                landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

        # Filter low count SLC-off images
        if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty:
            logging.debug('    Mininum SLC-off threshold: {}%'.format(
                ini['SUMMARY']['min_slc_off_pct']))
            # logging.debug('    Maximum pixel count: {}'.format(
            #     max_pixel_count))
            slc_off_mask = ((landsat_df['LANDSAT'] == 'LE7') &
                            ((landsat_df['YEAR'] >= 2004) |
                             ((landsat_df['YEAR'] == 2003) &
                              (landsat_df['DOY'] > 151))))
            slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] /
                                 landsat_df['PIXEL_TOTAL'])
            # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count)
            landsat_df = landsat_df[(
                (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                & slc_off_mask) | (~slc_off_mask)]

        if landsat_df.empty:
            logging.error(
                '  Empty Landsat dataframe after filtering, skipping zone')
            continue

        logging.debug('  Computing Landsat annual summaries')
        agg_dict = {
            'PIXEL_COUNT': {
                'PIXEL_COUNT': 'mean',
                'SCENE_COUNT': 'count'
            },
            'PIXEL_TOTAL': {
                'PIXEL_TOTAL': 'mean'
            },
            'FMASK_COUNT': {
                'FMASK_COUNT': 'mean'
            },
            'FMASK_TOTAL': {
                'FMASK_TOTAL': 'mean'
            },
            'CLOUD_SCORE': {
                'CLOUD_SCORE': 'mean'
            }
        }
        for field in landsat_df.columns.values:
            if field in landsat_annual_fields:
                agg_dict.update({field: {field: 'mean'}})
        landsat_df = landsat_df \
            .groupby(['ZONE_NAME', 'ZONE_FID', 'YEAR']) \
            .agg(agg_dict)
        landsat_df.columns = landsat_df.columns.droplevel(0)
        landsat_df.reset_index(inplace=True)
        # landsat_df = landsat_df[landsat_annual_fields]
        landsat_df['YEAR'] = landsat_df['YEAR'].astype(np.int)
        landsat_df['SCENE_COUNT'] = landsat_df['SCENE_COUNT'].astype(np.int)
        landsat_df['PIXEL_COUNT'] = landsat_df['PIXEL_COUNT'].astype(np.int)
        landsat_df['PIXEL_TOTAL'] = landsat_df['PIXEL_TOTAL'].astype(np.int)
        landsat_df['FMASK_COUNT'] = landsat_df['FMASK_COUNT'].astype(np.int)
        landsat_df['FMASK_TOTAL'] = landsat_df['FMASK_TOTAL'].astype(np.int)
        landsat_df.sort_values(by='YEAR', inplace=True)

        # Aggregate GRIDMET (to water year)
        if os.path.isfile(gridmet_monthly_path):
            logging.debug('  Reading montly GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_monthly_path)
        elif os.path.isfile(gridmet_daily_path):
            logging.debug('  Reading daily GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_daily_path)

        logging.debug('  Computing GRIDMET summaries')
        # Summarize GRIDMET for target months year
        if (gridmet_start_month in [10, 11, 12]
                and gridmet_end_month in [10, 11, 12]):
            month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) &
                          (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
        elif (gridmet_start_month in [10, 11, 12]
              and gridmet_end_month not in [10, 11, 12]):
            month_mask = gridmet_df['MONTH'] >= gridmet_start_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
            month_mask = gridmet_df['MONTH'] <= gridmet_end_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        else:
            month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) &
                          (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        # GROUP_YEAR for rows not in the GRIDMET month range will be NAN
        gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])]

        if year_list:
            gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)]

        if gridmet_df.empty:
            logging.error(
                '    Empty GRIDMET dataframe after filtering by year')
            continue

        # Group GRIDMET data by user specified range (default is water year)
        gridmet_group_df = gridmet_df \
            .groupby(['ZONE_FID', 'ZONE_NAME', 'GROUP_YEAR']) \
            .agg({'ETO': {'ETO': 'sum'}, 'PPT': {'PPT': 'sum'}})
        gridmet_group_df.columns = gridmet_group_df.columns.droplevel(0)
        gridmet_group_df.reset_index(inplace=True)
        gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        gridmet_group_df.sort_values(by='YEAR', inplace=True)

        # # Group GRIDMET data by month
        # gridmet_month_df = gridmet_df.groupby(
        #     ['ZONE_FID', 'ZONE_NAME', 'GROUP_YEAR', 'MONTH']).agg({
        #         'ETO': {'ETO': 'sum'}, 'PPT': {'PPT': 'sum'}})
        # gridmet_month_df.columns = gridmet_month_df.columns.droplevel(0)
        # gridmet_month_df.reset_index(inplace=True)
        # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        # # gridmet_month_df.sort_values(by=['YEAR', 'MONTH'], inplace=True)
        # gridmet_month_df.reset_index(inplace=True)
        # # Rename monthly PPT columns
        # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str)
        # # Pivot rows up to separate columns
        # gridmet_month_df = gridmet_month_df.pivot_table(
        #     'PPT', ['ZONE_FID', 'YEAR'], 'MONTH')
        # gridmet_month_df.reset_index(inplace=True)
        # columns = ['ZONE_FID', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months]
        # gridmet_month_df = gridmet_month_df[columns]
        # del gridmet_month_df.index.name

        # Merge Landsat and GRIDMET collections
        zone_df = landsat_df.merge(gridmet_group_df,
                                   on=['ZONE_FID', 'ZONE_NAME', 'YEAR'])
        # gridmet_group_df, on=['ZONE_FID', 'YEAR'])
        # zone_df = zone_df.merge(
        #     gridmet_month_df, on=['ZONE_FID', 'ZONE_NAME', 'YEAR'])
        #     gridmet_month_df, on=['ZONE_FID', 'YEAR'])
        if zone_df is None or zone_df.empty:
            logging.info('  Empty zone dataframe, not generating figures')
            continue

        # Save annual Landsat and GRIDMET tables
        logging.debug('  Saving summary tables')

        logging.debug('  {}'.format(landsat_summary_path))
        landsat_df.sort_values(by=['YEAR'], inplace=True)
        landsat_df.to_csv(landsat_summary_path, index=False)
        # columns=export_fields

        logging.debug('  {}'.format(gridmet_summary_path))
        gridmet_group_df.sort_values(by=['YEAR'], inplace=True)
        gridmet_group_df.to_csv(gridmet_summary_path, index=False)
        # columns=export_fields

        logging.debug('  {}'.format(zone_summary_path))
        zone_df.sort_values(by=['YEAR'], inplace=True)
        zone_df.to_csv(zone_summary_path, index=False)
        # columns=export_fields

        # Adjust year range based on data availability?
        # start_year = min(zone_df['YEAR']),
        # end_year = max(zone_df['YEAR'])

        logging.debug('  Generating figures')
        for band in timeseries_bands:
            timeseries_plot(band, zone_df, zone_name, zone_figures_ws,
                            ini['INPUTS']['start_year'],
                            ini['INPUTS']['end_year'], band_name, band_unit,
                            band_color, plot_dict)

        for band_x, band_y in scatter_bands:
            scatter_plot(band_x, band_y, zone_df, zone_name, zone_figures_ws,
                         band_name, band_unit, band_color, plot_dict)

        for band in complementary_bands:
            complementary_plot(band, zone_df, zone_name, zone_figures_ws,
                               band_name, band_unit, band_color, plot_dict)

        del landsat_df, gridmet_df, zone_df
Esempio n. 5
0
def main(ini_path, overwrite_flag=True):
    """Generate Beamer ETg summary tables

    Args:
        ini_path (str):
        overwrite_flag (bool): if True, overwrite existing figures
            Default is True (for now)
    """

    logging.info('\nGenerate Beamer ETg summary tables')

    # # Eventually get from INI (like ini['BEAMER']['landsat_products'])
    # daily_fields = [
    #     'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW',
    #     'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR',
    #     'PIXEL_COUNT', 'ETSTAR_COUNT',
    #     'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN',
    #     'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
    #     'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
    #     'WY_ETO', 'WY_PPT']
    # annual_fields = [
    #     'SCENE_COUNT', 'PIXEL_COUNT', 'ETSTAR_COUNT',
    #     'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS',
    #     'EVI_SUR_MEAN', 'EVI_SUR_MEDIAN', 'EVI_SUR_MIN', 'EVI_SUR_MAX',
    #     'ETSTAR_MEAN',
    #     'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
    #     'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
    #     'WY_ETO', 'WY_PPT']

    # For unit conversion
    eto_fields = [
        'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN',
        'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'WY_ETO'
    ]
    ppt_fields = ['WY_PPT']

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='BEAMER')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='TABLES')

    # Output paths
    output_daily_path = os.path.join(
        ini['SUMMARY']['output_ws'],
        ini['BEAMER']['output_name'].replace('.csv', '_daily.xlsx'))
    output_annual_path = os.path.join(
        ini['SUMMARY']['output_ws'],
        ini['BEAMER']['output_name'].replace('.csv', '_annual.xlsx'))

    # Check if files already exist
    if overwrite_flag:
        if os.path.isfile(output_daily_path):
            os.remove(output_daily_path)
        if os.path.isfile(output_annual_path):
            os.remove(output_annual_path)
    else:
        if (os.path.isfile(output_daily_path)
                and os.path.isfile(output_annual_path)):
            logging.info('\nOutput files already exist and '
                         'overwrite is False, exiting')
            return True

    # Start/end year
    year_list = list(
        range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(
        utils.month_range(gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(', '.join(
        map(str, gridmet_months))))

    # Read in the zonal stats CSV
    logging.debug('  Reading zonal stats CSV file')
    input_df = pd.read_csv(
        os.path.join(ini['ZONAL_STATS']['output_ws'],
                     ini['BEAMER']['output_name']))

    logging.debug('  Filtering Landsat dataframe')
    input_df = input_df[input_df['PIXEL_COUNT'] > 0]

    # # This assumes that there are L5/L8 images in the dataframe
    # if not input_df.empty:
    #     max_pixel_count = max(input_df['PIXEL_COUNT'])
    # else:
    #     max_pixel_count = 0

    if ini['INPUTS']['fid_keep_list']:
        input_df = input_df[input_df['ZONE_FID'].isin(
            ini['INPUTS']['fid_keep_list'])]
    if ini['INPUTS']['fid_skip_list']:
        input_df = input_df[~input_df['ZONE_FID'].
                            isin(ini['INPUTS']['fid_skip_list'])]
    zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values)))

    if year_list:
        input_df = input_df[input_df['YEAR'].isin(year_list)]
    if month_list:
        input_df = input_df[input_df['MONTH'].isin(month_list)]
    if doy_list:
        input_df = input_df[input_df['DOY'].isin(doy_list)]

    if ini['INPUTS']['path_keep_list']:
        input_df = input_df[input_df['PATH'].isin(
            ini['INPUTS']['path_keep_list'])]
    if (ini['INPUTS']['row_keep_list']
            and ini['INPUTS']['row_keep_list'] != ['XXX']):
        input_df = input_df[input_df['ROW'].isin(
            ini['INPUTS']['row_keep_list'])]

    # Assume the default is for these to be True and only filter if False
    if not ini['INPUTS']['landsat4_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT04']
    if not ini['INPUTS']['landsat5_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT05']
    if not ini['INPUTS']['landsat7_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LE07']
    if not ini['INPUTS']['landsat8_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LC08']

    if ini['INPUTS']['scene_id_keep_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])
        ])
        input_df = input_df[scene_id_df.isin(
            ini['INPUTS']['scene_id_keep_list']).values]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_keep_list'])]
    if ini['INPUTS']['scene_id_skip_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])
        ])
        input_df = input_df[np.logical_not(
            scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[~input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_skip_list'])]

    # Filter by QA/QC value
    if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty:
        logging.debug('    Maximum QA: {0}'.format(ini['SUMMARY']['max_qa']))
        input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']]

    # First filter by average cloud score
    if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty:
        logging.debug('    Maximum cloud score: {0}'.format(
            ini['SUMMARY']['max_cloud_score']))
        input_df = input_df[
            input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

    # Filter by Fmask percentage
    if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty:
        input_df['FMASK_PCT'] = 100 * (input_df['FMASK_COUNT'] /
                                       input_df['FMASK_TOTAL'])
        logging.debug('    Max Fmask threshold: {}'.format(
            ini['SUMMARY']['max_fmask_pct']))
        input_df = input_df[
            input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

    # Filter low count SLC-off images
    if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty:
        logging.debug('    Mininum SLC-off threshold: {}%'.format(
            ini['SUMMARY']['min_slc_off_pct']))
        # logging.debug('    Maximum pixel count: {}'.format(
        #     max_pixel_count))
        slc_off_mask = ((input_df['PLATFORM'] == 'LE07') &
                        ((input_df['YEAR'] >= 2004) |
                         ((input_df['YEAR'] == 2003) &
                          (input_df['DOY'] > 151))))
        slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL'])
        # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count)
        input_df = input_df[((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                             & slc_off_mask) | (~slc_off_mask)]

    if input_df.empty:
        logging.error('  Empty dataframe after filtering, exiting')
        return False

    if not os.path.isfile(output_daily_path):
        logging.info('\nWriting daily values to Excel')
        excel_f = ExcelWriter(output_daily_path)
        for zone_name in sorted(zone_name_list):
            logging.info('  {}'.format(zone_name))
            zone_df = input_df[input_df['ZONE_NAME'] == zone_name]
            zone_df.to_excel(excel_f,
                             zone_name,
                             index=False,
                             float_format='%.4f')
            # zone_df.to_excel(excel_f, zone_name, index=False)
            del zone_df
        excel_f.save()

    if not os.path.isfile(output_annual_path):
        logging.info('\nComputing annual summaries')
        annual_df = input_df \
            .groupby(['ZONE_NAME', 'YEAR']) \
            .agg({
                'PIXEL_COUNT': ['count', 'mean'],
                'PIXEL_TOTAL': ['mean'],
                'FMASK_COUNT': 'mean',
                'FMASK_TOTAL': 'mean',
                'CLOUD_SCORE': 'mean',
                'ETSTAR_COUNT': 'mean',
                'NDVI_TOA': 'mean',
                'NDWI_TOA': 'mean',
                'ALBEDO_SUR': 'mean',
                'TS': 'mean',
                # 'EVI_SUR': 'mean',
                'EVI_SUR': ['mean', 'median', 'min', 'max'],
                'ETSTAR_MEAN': 'mean',
                'ETG_MEAN': 'mean',
                'ETG_LPI': 'mean',
                'ETG_UPI': 'mean',
                'ETG_LCI': 'mean',
                'ETG_UCI': 'mean',
                'ET_MEAN': 'mean',
                'ET_LPI': 'mean',
                'ET_UPI': 'mean',
                'ET_LCI': 'mean',
                'ET_UCI': 'mean',
                'WY_ETO': 'mean',
                'WY_PPT': 'mean'
            })
        annual_df.columns = annual_df.columns.map('_'.join)
        annual_df = annual_df.rename(columns={
            'PIXEL_COUNT_count': 'SCENE_COUNT',
            'PIXEL_COUNT_mean': 'PIXEL_COUNT'
        })
        annual_df = annual_df.rename(
            columns={
                'EVI_SUR_mean': 'EVI_SUR_MEAN',
                'EVI_SUR_median': 'EVI_SUR_MEDIAN',
                'EVI_SUR_min': 'EVI_SUR_MIN',
                'EVI_SUR_max': 'EVI_SUR_MAX'
            })
        annual_df.rename(columns=lambda x: str(x).replace('_mean', ''),
                         inplace=True)
        annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int)
        annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int)
        annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int)
        annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int)
        annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int)
        annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int)
        annual_df = annual_df.reset_index()

        # Convert ETo units
        if (ini['BEAMER']['eto_units'] == 'mm'
                and ini['TABLES']['eto_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['eto_units'] == 'mm'
              and ini['TABLES']['eto_units'] == 'in'):
            annual_df[eto_fields] /= (25.4)
        elif (ini['BEAMER']['eto_units'] == 'mm'
              and ini['TABLES']['eto_units'] == 'ft'):
            annual_df[eto_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                     ini['BEAMER']['eto_units'], ini['TABLES']['eto_units']))
            sys.exit()

        # Convert PPT units
        if (ini['BEAMER']['ppt_units'] == 'mm'
                and ini['TABLES']['ppt_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['ppt_units'] == 'mm'
              and ini['TABLES']['ppt_units'] == 'in'):
            annual_df[ppt_fields] /= (25.4)
        elif (ini['BEAMER']['ppt_units'] == 'mm'
              and ini['TABLES']['ppt_units'] == 'ft'):
            annual_df[ppt_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                     ini['BEAMER']['ppt_units'], ini['TABLES']['ppt_units']))
            sys.exit()

        logging.info('\nWriting annual values to Excel')
        excel_f = ExcelWriter(output_annual_path)
        for zone_name in sorted(zone_name_list):
            logging.info('  {}'.format(zone_name))
            zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name]
            zone_df.to_excel(excel_f,
                             zone_name,
                             index=False,
                             float_format='%.4f')
            del zone_df
        excel_f.save()
def main(ini_path, show_flag=False, overwrite_flag=True):
    """Generate Bokeh figures

    Bokeh issues:
    Adjust y range based on non-muted data
        https://stackoverflow.com/questions/43620837/how-to-get-bokeh-to-dynamically-adjust-y-range-when-panning
    Linked interactive legends so that there is only one legend for the gridplot
    Maybe hide or mute QA values above max (instead of filtering them in advance)

    Args:
        ini_path (str):
        show_flag (bool): if True, show the figures in the browser.
            Default is False.
        overwrite_flag (bool): if True, overwrite existing tables.
            Default is True (for now)
    """
    logging.info('\nGenerate interactive timeseries figures')

    # Eventually read from INI
    plot_var_list = ['NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', 'EVI_SUR']
    # plot_var_list = [
    #     'NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA',
    #     'CLOUD_SCORE', 'FMASK_PCT']
    output_folder = 'figures'

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='FIGURES')
    inputs.parse_section(ini, section='BEAMER')

    # Hardcode GRIDMET month range to the water year
    ini['SUMMARY']['gridmet_start_month'] = 10
    ini['SUMMARY']['gridmet_end_month'] = 9

    # Start/end year
    year_list = list(range(
        ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(utils.wrapped_range(
        ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(utils.wrapped_range(
        ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(utils.month_range(
        gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(
        ', '.join(map(str, gridmet_months))))

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']]

    # # Filter features by FID before merging geometries
    # if ini['INPUTS']['fid_keep_list']:
    #     landsat_df = landsat_df[landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_keep_list'])]
    # if ini['INPUTS']['fid_skip_list']:
    #     landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_skip_list'])]

    logging.info('\nProcessing zones')
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        zone_stats_ws = os.path.join(
            ini['ZONAL_STATS']['output_ws'], zone_name)
        zone_figures_ws = os.path.join(
            ini['SUMMARY']['output_ws'], zone_name, output_folder)
        if not os.path.isdir(zone_stats_ws):
            logging.debug('  Folder {} does not exist, skipping'.format(
                zone_stats_ws))
            continue
        elif not os.path.isdir(zone_figures_ws):
            os.makedirs(zone_figures_ws)

        # Input paths
        landsat_daily_path = os.path.join(
            zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name))
        gridmet_daily_path = os.path.join(
            zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name))
        gridmet_monthly_path = os.path.join(
            zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name))
        if not os.path.isfile(landsat_daily_path):
            logging.error('  Landsat daily CSV does not exist, skipping zone')
            continue
        elif (not os.path.isfile(gridmet_daily_path) and
              not os.path.isfile(gridmet_monthly_path)):
            logging.error(
                '  GRIDMET daily or monthly CSV does not exist, skipping zone')
            continue
            # DEADBEEF - Eventually support generating only Landsat figures
            # logging.error(
            #     '  GRIDMET daily and/or monthly CSV files do not exist.\n'
            #     '  ETo and PPT will not be processed.')

        # Output paths
        output_doy_path = os.path.join(
            zone_figures_ws, '{}_timeseries_doy.html'.format(zone_name))
        output_date_path = os.path.join(
            zone_figures_ws, '{}_timeseries_date.html'.format(zone_name))

        logging.debug('  Reading Landsat CSV')
        landsat_df = pd.read_csv(landsat_daily_path)

        logging.debug('  Filtering Landsat dataframe')
        landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0]

        # QA field should have been written in zonal stats code
        # Eventually this block can be removed
        if 'QA' not in landsat_df.columns.values:
            landsat_df['QA'] = 0

        # # This assumes that there are L5/L8 images in the dataframe
        # if not landsat_df.empty:
        #     max_pixel_count = max(landsat_df['PIXEL_COUNT'])
        # else:
        #     max_pixel_count = 0

        if year_list:
            landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)]
        if month_list:
            landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)]
        if doy_list:
            landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)]

        # Assume the default is for these to be True and only filter if False
        if not ini['INPUTS']['landsat4_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04']
        if not ini['INPUTS']['landsat5_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05']
        if not ini['INPUTS']['landsat7_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07']
        if not ini['INPUTS']['landsat8_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08']

        if ini['INPUTS']['path_keep_list']:
            landsat_df = landsat_df[
                landsat_df['PATH'].isin(ini['INPUTS']['path_keep_list'])]
        if (ini['INPUTS']['row_keep_list'] and
                ini['INPUTS']['row_keep_list'] != ['XXX']):
            landsat_df = landsat_df[
                landsat_df['ROW'].isin(ini['INPUTS']['row_keep_list'])]

        if ini['INPUTS']['scene_id_keep_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])])
            landsat_df = landsat_df[scene_id_df.isin(
                ini['INPUTS']['scene_id_keep_list']).values]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_keep_list'])]
        if ini['INPUTS']['scene_id_skip_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])])
            landsat_df = landsat_df[np.logical_not(scene_id_df.isin(
                ini['INPUTS']['scene_id_skip_list']).values)]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_skip_list']))]

        # Filter by QA/QC value
        if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty:
            logging.debug('    Maximum QA: {0}'.format(
                ini['SUMMARY']['max_qa']))
            landsat_df = landsat_df[
                landsat_df['QA'] <= ini['SUMMARY']['max_qa']]

        # Filter by average cloud score
        if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty:
            logging.debug('    Maximum cloud score: {0}'.format(
                ini['SUMMARY']['max_cloud_score']))
            landsat_df = landsat_df[
                landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

        # Filter by Fmask percentage
        if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty:
            landsat_df['FMASK_PCT'] = 100 * (
                landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL'])
            logging.debug('    Max Fmask threshold: {}'.format(
                ini['SUMMARY']['max_fmask_pct']))
            landsat_df = landsat_df[
                landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

        # Filter low count SLC-off images
        if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty:
            logging.debug('    Mininum SLC-off threshold: {}%'.format(
                ini['SUMMARY']['min_slc_off_pct']))
            # logging.debug('    Maximum pixel count: {}'.format(
            #     max_pixel_count))
            slc_off_mask = (
                (landsat_df['PLATFORM'] == 'LE07') &
                ((landsat_df['YEAR'] >= 2004) |
                 ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151))))
            slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL'])
            # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count)
            landsat_df = landsat_df[
                ((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) |
                (~slc_off_mask)]

        if landsat_df.empty:
            logging.error(
                '  Empty Landsat dataframe after filtering, skipping zone')
            continue

        # Aggregate GRIDMET (to water year)
        if os.path.isfile(gridmet_monthly_path):
            logging.debug('  Reading montly GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_monthly_path)
        elif os.path.isfile(gridmet_daily_path):
            logging.debug('  Reading daily GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_daily_path)

        logging.debug('  Computing GRIDMET summaries')
        # Summarize GRIDMET for target months year
        if (gridmet_start_month in [10, 11, 12] and
                gridmet_end_month in [10, 11, 12]):
            month_mask = (
                (gridmet_df['MONTH'] >= gridmet_start_month) &
                (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
        elif (gridmet_start_month in [10, 11, 12] and
              gridmet_end_month not in [10, 11, 12]):
            month_mask = gridmet_df['MONTH'] >= gridmet_start_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
            month_mask = gridmet_df['MONTH'] <= gridmet_end_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        else:
            month_mask = (
                (gridmet_df['MONTH'] >= gridmet_start_month) &
                (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        # GROUP_YEAR for rows not in the GRIDMET month range will be NAN
        gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])]

        if year_list:
            gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)]

        if gridmet_df.empty:
            logging.error(
                '    Empty GRIDMET dataframe after filtering by year')
            continue

        # Group GRIDMET data by user specified range (default is water year)
        gridmet_group_df = gridmet_df \
            .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR']) \
            .agg({'ETO': np.sum, 'PPT': np.sum}) \
            .reset_index() \
            .sort_values(by='GROUP_YEAR')
            # .rename(columns={'ETO': 'ETO', 'PPT': 'ETO'}) \
        # Rename wasn't working when chained...
        gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        gridmet_group_df['YEAR'] = gridmet_group_df['YEAR'].astype(int)

        # # Group GRIDMET data by month
        # gridmet_month_df = gridmet_df\
        #     .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR', 'MONTH']) \
        #     .agg({'ETO': np.sum, 'PPT': np.sum}) \
        #     .reset_index() \
        #     .sort_values(by=['GROUP_YEAR', 'MONTH'])
        # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        # # Rename monthly PPT columns
        # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str)
        # # Pivot rows up to separate columns
        # gridmet_month_df = gridmet_month_df.pivot_table(
        #     'PPT', ['ZONE_NAME', 'YEAR'], 'MONTH')
        # gridmet_month_df.reset_index(inplace=True)
        # columns = ['ZONE_NAME', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months]
        # gridmet_month_df = gridmet_month_df[columns]
        # del gridmet_month_df.index.name

        # Merge Landsat and GRIDMET collections
        zone_df = landsat_df.merge(
            gridmet_group_df, on=['ZONE_NAME', 'ZONE_FID', 'YEAR'])
        if zone_df is None or zone_df.empty:
            logging.info('  Empty zone dataframe, not generating figures')
            continue

        # Compute ETg
        zone_df['ETG_MEAN'] = zone_df['ETSTAR_MEAN'] * (
            zone_df['ETO'] - zone_df['PPT'])
        zone_df['ETG_LPI'] = zone_df['ETSTAR_LPI'] * (
            zone_df['ETO'] - zone_df['PPT'])
        zone_df['ETG_UPI'] = zone_df['ETSTAR_UPI'] * (
            zone_df['ETO'] - zone_df['PPT'])
        zone_df['ETG_LCI'] = zone_df['ETSTAR_LCI'] * (
            zone_df['ETO'] - zone_df['PPT'])
        zone_df['ETG_UCI'] = zone_df['ETSTAR_UCI'] * (
            zone_df['ETO'] - zone_df['PPT'])

        # Compute ET
        zone_df['ET_MEAN'] = zone_df['ETG_MEAN'] + zone_df['PPT']
        zone_df['ET_LPI'] = zone_df['ETG_LPI'] + zone_df['PPT']
        zone_df['ET_UPI'] = zone_df['ETG_UPI'] + zone_df['PPT']
        zone_df['ET_LCI'] = zone_df['ETG_LCI'] + zone_df['PPT']
        zone_df['ET_UCI'] = zone_df['ETG_UCI'] + zone_df['PPT']



        # ORIGINAL PLOTTING CODE
        # Check that plot variables are present
        for plot_var in plot_var_list:
            if plot_var not in landsat_df.columns.values:
                logging.error(
                    '  The plotting variable {} does not exist in the '
                    'dataframe'.format(plot_var))
                sys.exit()

        # if ini['INPUTS']['scene_id_keep_list']:
        #     # Replace XXX with primary ROW value for checking skip list SCENE_ID
        #     scene_id_df = pd.Series([
        #         s.replace('XXX', '{:03d}'.format(int(r)))
        #         for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])])
        #     landsat_df = landsat_df[scene_id_df.isin(
        #         ini['INPUTS']['scene_id_keep_list']).values]
        #     # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        #     # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
        #     #     ini['INPUTS']['scene_id_keep_list'])]
        # if ini['INPUTS']['scene_id_skip_list']:
        #     # Replace XXX with primary ROW value for checking skip list SCENE_ID
        #     scene_id_df = pd.Series([
        #         s.replace('XXX', '{:03d}'.format(int(r)))
        #         for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])])
        #     landsat_df = landsat_df[np.logical_not(scene_id_df.isin(
        #         ini['INPUTS']['scene_id_skip_list']).values)]
        #     # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        #     # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
        #     #     ini['INPUTS']['scene_id_skip_list']))]


        # Compute colors for each QA value
        logging.debug('  Building column data source')
        qa_values = sorted(list(set(zone_df['QA'].values)))
        colors = {
            qa: "#%02x%02x%02x" % (int(r), int(g), int(b))
            for qa, (r, g, b, _) in zip(
                qa_values,
                255 * cm.viridis(mpl.colors.Normalize()(qa_values)))
        }
        logging.debug('  QA values: {}'.format(
            ', '.join(map(str, qa_values))))

        # Unpack the data by QA type to support interactive legends
        sources = dict()
        # sources = defaultdict(dict)
        # platform_list = ['LT04', 'LT05', 'LE07', 'LC08']
        for qa_value in qa_values:
            # for platform in platform_list:
            # qa_df = zone_df[
            #     (zone_df['PLATFORM'] == platform) &
            #     (zone_df['QA'] == qa_value)]
            qa_df = zone_df[zone_df['QA'] == qa_value]
            qa_data = {
                'INDEX': list(range(len(qa_df.index))),
                'PLATFORM': qa_df['PLATFORM'],
                'DATE': pd.to_datetime(qa_df['DATE']),
                'TIME': pd.to_datetime(qa_df['DATE']).map(
                    lambda x: x.strftime('%Y-%m-%d')),
                'DOY': qa_df['DOY'].values,
                'QA': qa_df['QA'].values,
                'COLOR': [colors[qa] for qa in qa_df['QA'].values]
            }
            for plot_var in plot_var_list:
                if plot_var in qa_df.columns.values:
                    qa_data.update({plot_var: qa_df[plot_var].values})
            sources[qa_value] = bokeh.models.ColumnDataSource(qa_data)
            # sources[qa_value][platform] = bokeh.models.ColumnDataSource(
            #     qa_data)

        tooltips = [
            ("LANDSAT", "@PLATFORM"),
            ("DATE", "@TIME"),
            ("DOY", "@DOY")]
        # hover_tool = bokeh.models.HoverTool(tooltips=tooltips)
        # tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select"
        # tools = [
        #     hover_tool,
        #     bokeh.models.WheelZoomTool(dimensions='width'),
        #     bokeh.models.PanTool(dimensions='width'),
        #     bokeh.models.BoxZoomTool(dimensions='width'),
        #     bokeh.models.ResetTool(),
        #     bokeh.models.BoxSelectTool()]

        # Selection
        hover_circle = Circle(
            fill_color='#ff0000', line_color='#ff0000')
        selected_circle = Circle(
            fill_color='COLOR', line_color='COLOR')
        nonselected_circle = Circle(
            fill_color='#aaaaaa', line_color='#aaaaaa')

        # Plot the data by DOY
        logging.debug('  Building DOY timeseries figure')
        if os.path.isfile(output_doy_path):
            os.remove(output_doy_path)
        output_file(output_doy_path, title=zone_name)

        figure_args = dict(
            plot_width=750, plot_height=250, title=None,
            tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select",
            # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap",
            active_scroll="xwheel_zoom")
        plot_args = dict(
            size=4, alpha=0.9, color='COLOR')
        if ini['SUMMARY']['max_qa'] > 0:
            plot_args['legend'] = 'QA'

        figures = []
        for plot_i, plot_var in enumerate(plot_var_list):
            if plot_i == 0:
                f = figure(
                    # x_range=bokeh.models.Range1d(1, 366, bounds=(1, 366)),
                    y_axis_label=plot_var, **figure_args)
            else:
                f = figure(
                    x_range=f.x_range, y_axis_label=plot_var, **figure_args)

            # # Add each QA level as a separate object
            # for qa, platform_sources in sorted(sources.items()):
            #     for platform, source in platform_sources.items():
            #         if platform == 'LT05':
            #             r = f.triangle(
            #                 'DOY', plot_var, source=source, **plot_args)
            #         elif platform == 'LE07':
            #             r = f.square(
            #                 'DOY', plot_var, source=source, **plot_args)
            #         elif platform == 'LC08':
            #             r = f.circle(
            #                 'DOY', plot_var, source=source, **plot_args)
            #         else:
            #             r = f.diamond(
            #                 'DOY', plot_var, source=source, **plot_args)
            #         r.hover_glyph = hover_circle
            #         r.selection_glyph = selected_circle
            #         r.nonselection_glyph = nonselected_circle
            #         r.muted_glyph = nonselected_circle
            #         hover_tool.renderers.append(r)

            # Add each QA level as a separate object
            for qa, source in sorted(sources.items()):
                r = f.circle('DOY', plot_var, source=source, **plot_args)
                r.hover_glyph = hover_circle
                r.selection_glyph = selected_circle
                r.nonselection_glyph = nonselected_circle
                r.muted_glyph = nonselected_circle

                # # DEADBEEF - This will display high QA points as muted
                # if qa > ini['SUMMARY']['max_qa']:
                #     r.muted = True
                #     # r.visible = False

            f.add_tools(bokeh.models.HoverTool(tooltips=tooltips))

            # if ini['SUMMARY']['max_qa'] > 0:
            f.legend.location = "top_left"
            f.legend.click_policy = "hide"
            # f.legend.click_policy = "mute"
            f.legend.orientation = "horizontal"

            figures.append(f)
        del f

        # Try to not allow more than 4 plots in a column
        p = gridplot(
            figures, ncols=len(plot_var_list) // 3,
            sizing_mode='stretch_both')

        if show_flag:
            show(p)
        save(p)


        # Plot the data by DATE
        logging.debug('  Building date timeseries figure')
        if os.path.isfile(output_date_path):
            os.remove(output_date_path)
        output_file(output_date_path, title=zone_name)

        figure_args = dict(
            plot_width=750, plot_height=250, title=None,
            tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select",
            # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap",
            active_scroll="xwheel_zoom",
            x_axis_type="datetime",)
        plot_args = dict(
            size=4, alpha=0.9, color='COLOR')
        if ini['SUMMARY']['max_qa'] > 0:
            plot_args['legend'] = 'QA'

        figures = []
        for plot_i, plot_var in enumerate(plot_var_list):
            if plot_i == 0:
                f = figure(
                    # x_range=bokeh.models.Range1d(x_limit[0], x_limit[1], bounds=x_limit),
                    y_axis_label=plot_var, **figure_args)
            else:
                f = figure(
                    x_range=f.x_range, y_axis_label=plot_var, **figure_args)

            if plot_var == 'TS':
                f.y_range.bounds = (270, None)

            # # Add each QA level as a separate object
            # for qa, platform_sources in sorted(sources.items()):
            #     for platform, source in sorted(platform_sources.items()):
            #         if platform == 'LT05':
            #             r = f.triangle(
            #                 'DATE', plot_var, source=source, **plot_args)
            #         elif platform == 'LE07':
            #             r = f.square(
            #                 'DATE', plot_var, source=source, **plot_args)
            #         elif platform == 'LC08':
            #             r = f.circle(
            #                 'DATE', plot_var, source=source, **plot_args)
            #         else:
            #             r = f.diamond(
            #                 'DATE', plot_var, source=source, **plot_args)
            #         r.hover_glyph = hover_circle
            #         r.selection_glyph = selected_circle
            #         r.nonselection_glyph = nonselected_circle
            #         r.muted_glyph = nonselected_circle
            #         hover_tool.renderers.append(r)

            # Add each QA level as a separate object
            for qa, source in sorted(sources.items()):
                r = f.circle('DATE', plot_var, source=source, **plot_args)
                r.hover_glyph = hover_circle
                r.selection_glyph = selected_circle
                r.nonselection_glyph = nonselected_circle
                r.muted_glyph = nonselected_circle

                # # DEADBEEF - This will display high QA points as muted
                # if qa > ini['SUMMARY']['max_qa']:
                #     r.muted = True
                #     # r.visible = False
            f.add_tools(bokeh.models.HoverTool(tooltips=tooltips))

            # if ini['SUMMARY']['max_qa'] > 0:
            f.legend.location = "top_left"
            f.legend.click_policy = "hide"
            # f.legend.click_policy = "mute"
            f.legend.orientation = "horizontal"

            figures.append(f)
        del f

        # Try to not allow more than 4 plots in a column
        p = gridplot(
            figures, ncols=len(plot_var_list) // 3,
            sizing_mode='stretch_both')

        if show_flag:
            show(p)
        save(p)

        # Pause after each iteration if show is True
        if show_flag:
            input('Press ENTER to continue')
Esempio n. 7
0
def test_wrapped_range(a, b, x_min, x_max, expected):
    """Return the values between a range b for a given start/end"""
    assert list(utils.wrapped_range(a, b, x_min, x_max)) == expected
def main(ini_path=None, overwrite_flag=False):
    """Generate summary thumbnails

    Parameters
    ----------
    ini_path : str
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    """
    logging.info('\nGenerate summary thumbnails')

    # Inputs (eventually move to INI file?)
    vis_args = {
        'bands': ['red', 'green', 'blue'],
        # 'bands': ['swir1', 'nir', 'red'],
        'min': [0.01, 0.01, 0.01],
        'max': [0.4, 0.4, 0.4],
        'gamma': [1.8, 1.8, 1.8]
    }

    # Buffer zone polygon
    zone_buffer = 240

    # Generate images by DOY
    doy_flag = True
    # Generate images by date
    date_flag = True

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='SPATIAL')
    inputs.parse_section(ini, section='SUMMARY')

    year_list = range(ini['INPUTS']['start_year'],
                      ini['INPUTS']['end_year'] + 1)
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # Add merged row XXX to keep list
    ini['INPUTS']['row_keep_list'].append('XXX')

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # Need zone_shp_path projection to build EE geometries
    zone = {}
    zone['osr'] = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path'])
    zone['proj'] = gdc.osr_wkt(zone['osr'])
    # zone['proj'] = ee.Projection(zone['proj']).wkt().getInfo()
    # zone['proj'] = zone['proj'].replace('\n', '').replace(' ', '')
    # logging.debug('  Zone Projection: {}'.format(zone['proj']))

    # Initialize Earth Engine API key
    logging.debug('')
    ee.Initialize()

    coll_dict = {
        'LT04': 'LANDSAT/LT04/C01/T1_SR',
        'LT05': 'LANDSAT/LT05/C01/T1_SR',
        'LE07': 'LANDSAT/LE07/C01/T1_SR',
        'LC08': 'LANDSAT/LC08/C01/T1_SR'
    }

    logging.info('\nProcessing zones')
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone['fid'] = zone_fid
        zone['name'] = zone_name.replace(' ', '_')
        zone['json'] = zone_json
        logging.info('ZONE: {} (FID: {})'.format(zone['name'], zone['fid']))

        # Build EE geometry object for zonal stats
        zone['geom'] = ee.Geometry(geo_json=zone['json'],
                                   opt_proj=zone['proj'],
                                   opt_geodesic=False)
        # logging.debug('  Centroid: {}'.format(
        #     zone['geom'].centroid(100).getInfo()['coordinates']))

        # Use feature geometry to build extent, transform, and shape
        zone['extent'] = gdc.Extent(
            ogr.CreateGeometryFromJson(json.dumps(zone['json'])).GetEnvelope())
        # zone['extent'] = gdc.Extent(zone['geom'].GetEnvelope())
        zone['extent'] = zone['extent'].ogrenv_swap()
        zone['extent'] = zone['extent'].buffer(zone_buffer)
        zone['extent'] = zone['extent'].adjust_to_snap(
            'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'],
            ini['SPATIAL']['cellsize'])
        zone['geo'] = zone['extent'].geo(ini['SPATIAL']['cellsize'])
        zone['transform'] = gdc.geo_2_ee_transform(zone['geo'])
        # zone['transform'] = '[' + ','.join(map(str, zone['transform'])) + ']'
        zone['shape'] = zone['extent'].shape(ini['SPATIAL']['cellsize'])
        logging.debug('  Zone Shape: {}'.format(zone['shape']))
        logging.debug('  Zone Transform: {}'.format(zone['transform']))
        logging.debug('  Zone Extent: {}'.format(zone['extent']))
        # logging.debug('  Zone Geom: {}'.format(zone['geom'].getInfo()))

        # Build an EE geometry of the extent
        extent_geom = ee.Geometry.Rectangle(coords=list(zone['extent']),
                                            proj=zone['proj'],
                                            geodesic=False)

        if 'SUMMARY' in ini.keys():
            zone_output_ws = os.path.join(ini['SUMMARY']['output_ws'],
                                          zone['name'])
        elif 'EXPORT' in ini.keys():
            zone_output_ws = os.path.join(ini['EXPORT']['output_ws'],
                                          zone['name'])
        else:
            logging.error(
                'INI file does not contain a SUMMARY or EXPORT section')
            sys.exit()
        if not os.path.isdir(zone_output_ws):
            logging.debug(
                'Folder {} does not exist, skipping'.format(zone_output_ws))
            continue

        landsat_daily_path = os.path.join(
            zone_output_ws, '{}_landsat_daily.csv'.format(zone['name']))
        if not os.path.isfile(landsat_daily_path):
            logging.error('  Landsat daily CSV does not exist, skipping zone')
            continue

        output_doy_ws = os.path.join(zone_output_ws, 'thumbnails_doy')
        output_date_ws = os.path.join(zone_output_ws, 'thumbnails_date')
        if overwrite_flag and os.path.isdir(output_doy_ws):
            for file_name in os.listdir(output_doy_ws):
                os.remove(os.path.join(output_doy_ws, file_name))
        if overwrite_flag and os.path.isdir(output_date_ws):
            for file_name in os.listdir(output_date_ws):
                os.remove(os.path.join(output_date_ws, file_name))
        if doy_flag and not os.path.isdir(output_doy_ws):
            os.makedirs(output_doy_ws)
        if date_flag and not os.path.isdir(output_date_ws):
            os.makedirs(output_date_ws)

        logging.debug('  Reading Landsat CSV')
        landsat_df = pd.read_csv(landsat_daily_path)
        # landsat_df = pd.read_csv(
        #     landsat_daily_path, parse_dates=['DATE'], index_col='DATE')
        landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0]

        # Common summary filtering
        logging.debug('  Filtering using INI SUMMARY parameters')
        if year_list:
            landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)]
        if month_list:
            landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)]
        if doy_list:
            landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)]

        if ini['INPUTS']['path_keep_list']:
            landsat_df = landsat_df[landsat_df['PATH'].isin(
                ini['INPUTS']['path_keep_list'])]
        if (ini['INPUTS']['row_keep_list']
                and ini['INPUTS']['row_keep_list'] != ['XXX']):
            landsat_df = landsat_df[landsat_df['ROW'].isin(
                ini['INPUTS']['row_keep_list'])]

        # Assume the default is for these to be True and only filter if False
        if not ini['INPUTS']['landsat4_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04']
        if not ini['INPUTS']['landsat5_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05']
        if not ini['INPUTS']['landsat7_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07']
        if not ini['INPUTS']['landsat8_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08']

        if ini['INPUTS']['scene_id_keep_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[scene_id_df.isin(
                ini['INPUTS']['scene_id_keep_list']).values]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_keep_list'])]
        if ini['INPUTS']['scene_id_skip_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[np.logical_not(
                scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_skip_list']))]

        # Filter by QA/QC value
        if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty:
            logging.debug('    Maximum QA: {0}'.format(
                ini['SUMMARY']['max_qa']))
            landsat_df = landsat_df[
                landsat_df['QA'] <= ini['SUMMARY']['max_qa']]

        # Filter by average cloud score
        if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty:
            logging.debug('    Maximum cloud score: {0}'.format(
                ini['SUMMARY']['max_cloud_score']))
            landsat_df = landsat_df[
                landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

        # Filter by Fmask percentage
        if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty:
            landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] /
                                             landsat_df['FMASK_TOTAL'])
            logging.debug('    Max Fmask threshold: {}'.format(
                ini['SUMMARY']['max_fmask_pct']))
            landsat_df = landsat_df[
                landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

        # Filter low count SLC-off images
        if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty:
            logging.debug('    Mininum SLC-off threshold: {}%'.format(
                ini['SUMMARY']['min_slc_off_pct']))
            # logging.debug('    Maximum pixel count: {}'.format(
            #     max_pixel_count))
            slc_off_mask = ((landsat_df['PLATFORM'] == 'LE07') &
                            ((landsat_df['YEAR'] >= 2004) |
                             ((landsat_df['YEAR'] == 2003) &
                              (landsat_df['DOY'] > 151))))
            slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] /
                                 landsat_df['PIXEL_TOTAL'])
            # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count)
            landsat_df = landsat_df[(
                (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                & slc_off_mask) | (~slc_off_mask)]

        if landsat_df.empty:
            logging.error(
                '  Empty Landsat dataframe after filtering, skipping zone')
            continue

        logging.debug('  Downloading thumbnails')
        for landsat, start_date in zip(landsat_df['PLATFORM'],
                                       landsat_df['DATE']):
            start_dt = datetime.datetime.strptime(start_date, '%Y-%m-%d')
            end_dt = start_dt + datetime.timedelta(days=1)
            end_date = end_dt.strftime('%Y-%m-%d')

            output_doy_path = os.path.join(
                output_doy_ws,
                '{}_{}.png'.format(start_dt.strftime('%j_%Y-%m-%d'), landsat))
            output_date_path = os.path.join(
                output_date_ws,
                '{}_{}.png'.format(start_dt.strftime('%Y-%m-%d_%j'), landsat))

            # DEADBEEF - This seems like a poor approach
            save_doy_flag = False
            save_date_flag = False
            if doy_flag and not os.path.isfile(output_doy_path):
                save_doy_flag = True
            if date_flag and not os.path.isfile(output_date_path):
                save_date_flag = True
            if not save_doy_flag and not save_date_flag:
                logging.debug(
                    '  {} - file already exists, skipping'.format(start_date))
                continue
            logging.debug('  {}'.format(start_date))
            # logging.debug('  {}'.format(output_path))

            if landsat in ['LT04', 'LT05', 'LE07']:
                ee_coll = ee.ImageCollection(coll_dict[landsat]).select(
                    ['B1', 'B2', 'B3', 'B4', 'B5', 'B7'],
                    ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])
            elif landsat in ['LC08']:
                ee_coll = ee.ImageCollection(coll_dict[landsat]).select(
                    ['B2', 'B3', 'B4', 'B5', 'B6', 'B7'],
                    ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])
            ee_coll = ee_coll.filterDate(start_date, end_date)
            ee_image = ee.Image(ee_coll.median().divide(10000)) \
                .visualize(**vis_args) \
                .reproject(crs=zone['proj'], crsTransform=zone['transform']) \
                .paint(zone['geom'], color=0.5, width=1) \
                .clip(extent_geom)

            # Get the image thumbnail
            for i in range(10):
                try:
                    output_url = ee_image.getThumbUrl({'format': 'png'})
                    break
                except Exception as e:
                    logging.error('  Exception: {}, retry {}'.format(e, i))
                    logging.debug('{}'.format(e))
                    sleep(i**2)

            for i in range(10):
                try:
                    # DEADBEEF - This seems like a poor approach
                    if save_doy_flag and save_date_flag:
                        urllib.urlretrieve(output_url, output_doy_path)
                        shutil.copy(output_doy_path, output_date_path)
                    elif save_doy_flag:
                        urllib.urlretrieve(output_url, output_doy_path)
                    elif save_date_flag:
                        urllib.urlretrieve(output_url, output_date_path)
                    break
                except Exception as e:
                    logging.error('  Exception: {}, retry {}'.format(e, i))
                    logging.debug('{}'.format(e))
                    sleep(i**2)