Esempio n. 1
0
def main(ini_path, overwrite_flag=True):
    """Earth Engine Beamer ET Zonal Stats

    Args:
        ini_path (str):
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nEarth Engine Beamer ET Zonal Stats')

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='SPATIAL')
    inputs.parse_section(ini, section='BEAMER')
    inputs.parse_section(ini, section='EXPORT')
    inputs.parse_section(ini, section='ZONAL_STATS')

    # Overwrite landsat products with Beamer specific values
    ini['EXPORT']['landsat_products'] = [
        'ndvi_toa', 'ndwi_toa', 'albedo_sur', 'ts', 'evi_sur', 'etstar_mean',
        'etg_mean', 'etg_lpi', 'etg_upi', 'etg_lci', 'etg_uci', 'etg_mean',
        'et_lpi', 'et_upi', 'et_lci', 'et_uci'
    ]

    # First row  of csv is header
    header_list = [
        'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW',
        'YEAR', 'MONTH', 'DAY', 'DOY', 'PIXEL_COUNT', 'PIXEL_TOTAL',
        'FMASK_COUNT', 'FMASK_TOTAL', 'FMASK_PCT', 'ETSTAR_COUNT',
        'CLOUD_SCORE', 'QA', 'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS',
        'EVI_SUR', 'ETSTAR_MEAN', 'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI',
        'ETG_UCI', 'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'WY_ETO',
        'WY_PPT'
    ]
    int_fields = [
        'ZONE_FID', 'PATH', 'ROW', 'YEAR', 'MONTH', 'DAY', 'DOY',
        'PIXEL_COUNT', 'PIXEL_TOTAL', 'FMASK_COUNT', 'FMASK_TOTAL',
        'ETSTAR_COUNT'
    ]
    float_fields = list(
        set(header_list) - set(int_fields) -
        set(['ZONE_NAME', 'DATE', 'SCENE_ID', 'PLATFORM']))

    # Regular expression to pull out Landsat scene_id
    # If RE has capturing groups, findall call below will fail to extract ID
    landsat_re = re.compile('L[ETC]0[4578]_\d{3}XXX_\d{4}\d{2}\d{2}')
    # landsat_re = re.compile('L[ETC][4578]\d{3}XXX\d{4}\d{3}')
    # landsat_re = re.compile('L[ETC][4578]\d{3}\d{3}\d{4}\d{3}\D{3}\d{2}')

    # Remove the existing CSV
    output_path = os.path.join(ini['ZONAL_STATS']['output_ws'],
                               ini['BEAMER']['output_name'])
    if overwrite_flag and os.path.isfile(output_path):
        os.remove(output_path)
    # Create an empty CSV
    if not os.path.isfile(output_path):
        data_df = pd.DataFrame(columns=header_list)
        data_df[int_fields] = data_df[int_fields].astype(np.int64)
        data_df[float_fields] = data_df[float_fields].astype(np.float32)
        data_df.to_csv(output_path, index=False)

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)
    # zone_count = len(zone_geom_list)
    # output_fmt = '_{0:0%sd}.csv' % str(int(math.log10(zone_count)) + 1)

    # Check if the zone_names are unique
    # Eventually support merging common zone_names
    if len(set([z[1] for z in zone_geom_list])) != len(zone_geom_list):
        logging.error(
            '\nERROR: There appear to be duplicate zone ID/name values.'
            '\n  Currently, the values in "{}" must be unique.'
            '\n  Exiting.'.format(ini['INPUTS']['zone_field']))
        return False

    # Filter features by FID
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # Merge geometries
    if ini['INPUTS']['merge_geom_flag']:
        merge_geom = ogr.Geometry(ogr.wkbMultiPolygon)
        for zone in zone_geom_list:
            zone_multipolygon = ogr.ForceToMultiPolygon(
                ogr.CreateGeometryFromJson(json.dumps(zone[2])))
            for zone_polygon in zone_multipolygon:
                merge_geom.AddGeometry(zone_polygon)
        # merge_json = json.loads(merge_mp.ExportToJson())
        zone_geom_list = [[
            0, ini['INPUTS']['zone_filename'],
            json.loads(merge_geom.ExportToJson())
        ]]
        ini['INPUTS']['zone_field'] = ''

    # Set all zone specific parameters into a dictionary
    zone = {}

    # Need zone_shp_path projection to build EE geometries
    zone['osr'] = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path'])
    zone['proj'] = gdc.osr_wkt(zone['osr'])
    # zone['proj'] = ee.Projection(zone['proj']).wkt().getInfo()
    # zone['proj'] = zone['proj'].replace('\n', '').replace(' ', '')
    # logging.debug('  Zone Projection: {}'.format(zone['proj']))

    # Check that shapefile has matching spatial reference
    if not gdc.matching_spatref(zone['osr'], ini['SPATIAL']['osr']):
        logging.warning('  Zone OSR:\n{}\n'.format(zone['osr']))
        logging.warning('  Output OSR:\n{}\n'.format(
            ini['SPATIAL']['osr'].ExportToWkt()))
        logging.warning('  Zone Proj4:   {}'.format(
            zone['osr'].ExportToProj4()))
        logging.warning('  Output Proj4: {}'.format(
            ini['SPATIAL']['osr'].ExportToProj4()))
        logging.warning(
            '\nWARNING: \n'
            'The output and zone spatial references do not appear to match\n'
            'This will likely cause problems!')
        input('Press ENTER to continue')
    else:
        logging.debug('  Zone Projection:\n{}\n'.format(
            zone['osr'].ExportToWkt()))
        logging.debug('  Output Projection:\n{}\n'.format(
            ini['SPATIAL']['osr'].ExportToWkt()))
        logging.debug('  Output Cellsize: {}'.format(
            ini['SPATIAL']['cellsize']))

    # Initialize Earth Engine API key
    logging.info('\nInitializing Earth Engine')
    ee.Initialize()
    utils.ee_request(ee.Number(1).getInfo())

    # Read in ETo and PPT data from file
    if (ini['BEAMER']['eto_source'] == 'file'
            or ini['BEAMER']['ppt_source'] == 'file'):
        data_array = np.atleast_1d(
            np.genfromtxt(ini['BEAMER']['data_path'],
                          delimiter=',',
                          names=True,
                          encoding=None,
                          dtype=None))
        data_fields = data_array.dtype.names
        logging.debug('  CSV fields: {}'.format(', '.join(data_fields)))
        # DEADBEEF - Compare fields names assuming all upper case
        data_fields = [f.upper() for f in data_fields]
        eto_dict = defaultdict(dict)
        ppt_dict = defaultdict(dict)
        for row in data_array:
            z = str(row[data_fields.index(ini['BEAMER']['data_zone_field'])])
            y = row[data_fields.index(ini['BEAMER']['data_year_field'])]
            if ini['BEAMER']['eto_source'] == 'file':
                # DEADBEEF - Compare fields names assuming all upper case
                eto_dict[z][y] = row[data_fields.index(
                    ini['BEAMER']['data_eto_field'].upper())]
            if ini['BEAMER']['ppt_source'] == 'file':
                # DEADBEEF - Compare fields names assuming all upper case
                ppt_dict[z][y] = row[data_fields.index(
                    ini['BEAMER']['data_ppt_field'].upper())]

    # Get filtered/merged/prepped Landsat collection
    landsat_args = {
        k: v
        for section in ['INPUTS'] for k, v in ini[section].items() if k in [
            'landsat4_flag', 'landsat5_flag', 'landsat7_flag', 'landsat8_flag',
            'fmask_flag', 'acca_flag', 'start_year', 'end_year', 'start_month',
            'end_month', 'start_doy', 'end_doy', 'scene_id_keep_list',
            'scene_id_skip_list', 'path_keep_list', 'row_keep_list',
            'tile_geom', 'adjust_method', 'mosaic_method', 'refl_sur_method'
        ]
    }
    landsat_args['products'] = ini['EXPORT']['landsat_products']
    landsat = ee_common.Landsat(landsat_args)

    # Calculate zonal stats for each feature separately
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone['fid'] = zone_fid
        zone['name'] = zone_name.replace(' ', '_')
        zone['json'] = zone_json
        logging.info('ZONE: {} (FID: {})'.format(zone['name'], zone['fid']))

        # zone_key used for wy_ppt and wy_eto inputs from csv file
        if ini['INPUTS']['zone_field'] == 'FID':
            zone_key = str(zone['fid'])
            print('Using FID as zone_field')
        else:
            zone_key = zone['name']
            print('Using Name as zone_field')

        # Build EE geometry object for zonal stats
        zone['geom'] = ee.Geometry(geo_json=zone['json'],
                                   opt_proj=zone['proj'],
                                   opt_geodesic=False)
        # logging.debug('  Centroid: {}'.format(
        #     zone['geom'].centroid(100).getInfo()['coordinates']))

        # Use feature geometry to build extent, transform, and shape
        zone['extent'] = gdc.Extent(
            ogr.CreateGeometryFromJson(json.dumps(zone['json'])).GetEnvelope())
        # zone['extent'] = gdc.Extent(zone['geom'].GetEnvelope())
        zone['extent'] = zone['extent'].ogrenv_swap()
        zone['extent'] = zone['extent'].adjust_to_snap(
            'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'],
            ini['SPATIAL']['cellsize'])
        zone['geo'] = zone['extent'].geo(ini['SPATIAL']['cellsize'])
        zone['transform'] = gdc.geo_2_ee_transform(zone['geo'])
        # zone['transform'] = '[' + ','.join(map(str, zone['transform'])) + ']'
        zone['shape'] = zone['extent'].shape(ini['SPATIAL']['cellsize'])
        logging.debug('  Zone Shape: {}'.format(zone['shape']))
        logging.debug('  Zone Transform: {}'.format(zone['transform']))
        logging.debug('  Zone Extent: {}'.format(zone['extent']))
        # logging.debug('  Zone Geom: {}'.format(zone['geom'].getInfo()))

        # Assume all pixels in all 14+2 images could be reduced
        zone['max_pixels'] = zone['shape'][0] * zone['shape'][1]
        logging.debug('  Max Pixels: {}'.format(zone['max_pixels']))

        # Set output spatial reference
        # Eventually allow user to manually set these
        # output_crs = zone['proj']
        # ini['INPUTS']['transform'] = zone['transform']
        logging.debug('  Output Projection: {}'.format(ini['SPATIAL']['crs']))
        logging.debug('  Output Transform: {}'.format(zone['transform']))

        # Process date range by year
        start_dt = datetime.datetime(ini['INPUTS']['start_year'], 1, 1)
        end_dt = datetime.datetime(ini['INPUTS']['end_year'] + 1, 1,
                                   1) - datetime.timedelta(0, 1)
        iter_months = ini['BEAMER']['month_step']
        for i, iter_start_dt in enumerate(
                rrule.rrule(
                    # rrule.YEARLY, interval=interval_cnt,
                    rrule.MONTHLY,
                    interval=iter_months,
                    dtstart=start_dt,
                    until=end_dt)):
            iter_end_dt = (
                iter_start_dt +
                # relativedelta.relativedelta(years=interval_cnt) -
                relativedelta.relativedelta(months=iter_months) -
                datetime.timedelta(0, 1))
            if ((ini['INPUTS']['start_month']
                 and iter_end_dt.month < ini['INPUTS']['start_month'])
                    or (ini['INPUTS']['end_month']
                        and iter_start_dt.month > ini['INPUTS']['end_month'])):
                logging.debug('  {}  {}  skipping'.format(
                    iter_start_dt.date(), iter_end_dt.date()))
                continue
            elif (
                (ini['INPUTS']['start_doy'] and
                 int(iter_end_dt.strftime('%j')) < ini['INPUTS']['start_doy'])
                    or
                (ini['INPUTS']['end_doy'] and int(iter_start_dt.strftime('%j'))
                 > ini['INPUTS']['end_doy'])):
                logging.debug('  {}  {}  skipping'.format(
                    iter_start_dt.date(), iter_end_dt.date()))
                continue
            else:
                logging.info('  {}  {}'.format(iter_start_dt.date(),
                                               iter_end_dt.date()))
            year = iter_start_dt.year

            # Filter the GRIDMET collection
            wy_start_date = '{}-10-01'.format(year - 1)
            wy_end_date = '{}-10-01'.format(year)
            logging.debug('  WY: {} {}'.format(wy_start_date, wy_end_date))
            gridmet_coll = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET') \
                .filterDate(wy_start_date, wy_end_date)

            # # PRISM collection was uploaded as an asset
            # if ini['BEAMER']['ppt_source'] == 'prism':
            #     def prism_time_start(input_image):
            #         """Set time_start property on PRISM water year PPT collection"""
            #         # Assume year is the 4th item separated by "_"
            #         wy = ee.String(
            #             input_image.get('system:index')).split('_').get(3)
            #         date_start = ee.Date(ee.String(wy).cat('-10-01'))
            #         return input_image.select([0], ['ppt']).setMulti({
            #             'system:time_start': date_start.advance(-1, 'year').millis()
            #         })
            #     prism_coll = ee.ImageCollection('users/cgmorton/prism_800m_ppt_wy')
            #     prism_coll = ee.ImageCollection(prism_coll.map(prism_time_start)) \
            #         .filterDate(wy_start_dt, wy_end_dt)
            #     # prism_coll = ee.ImageCollection(
            #     #     ee_common.MapsEngineAssets.prism_ppt_wy).filterDate(
            #     #         wy_start_dt, wy_end_dt)

            # Get water year PPT for centroid of zone or read from file
            # Convert all input data to mm to match GRIDMET data
            if ini['BEAMER']['ppt_source'] == 'file':
                wy_ppt_input = ppt_dict[zone_key][year]
                if ini['BEAMER']['data_ppt_units'] == 'mm':
                    pass
                elif ini['BEAMER']['data_ppt_units'] == 'in':
                    wy_ppt_input *= 25.4
                elif ini['BEAMER']['data_ppt_units'] == 'ft':
                    wy_ppt_input *= (25.4 * 12)
            elif ini['BEAMER']['ppt_source'] == 'gridmet':
                wy_ppt_input = float(
                    utils.ee_getinfo(
                        ee.ImageCollection(
                            gridmet_coll.select(['pr'],
                                                ['ppt']).sum()).getRegion(
                                                    zone['geom'].centroid(1),
                                                    500))[1][4])
                # Calculate GRIDMET zonal mean of geometry
                # wy_ppt_input = float(ee.ImageCollection(
                #     gridmet_coll.select(['pr'], ['ppt'])).reduceRegion(
                #         reducer=ee.Reducer.sum(),
                #         geometry=zone['geom'],
                #         crs=ini['SPATIAL']['crs'],
                #         crsTransform=zone['transform'],
                #         bestEffort=False,
                #         tileScale=1).getInfo()['ppt']
            # elif ini['BEAMER']['ppt_source'] == 'prism':
            #     # Calculate PRISM zonal mean of geometry
            #     wy_ppt_input = float(utils.ee_getinfo(ee.ImageCollection(
            #         prism_coll.map(ee_common.prism_ppt_func)).sum().reduceRegion(
            #             reducer=ee.Reducer.mean(),
            #             geometry=zone['geom'],
            #             crs=ini['SPATIAL']['crs'],
            #             crsTransform=zone['transform'],
            #             bestEffort=False,
            #             tileScale=1))['ppt'])

            # Get water year ETo for centroid of zone or read from file
            # Convert all input data to mm for Beamer Method
            if ini['BEAMER']['eto_source'] == 'FILE':
                wy_eto_input = eto_dict[zone_key][year]
                if ini['BEAMER']['data_eto_units'] == 'mm':
                    pass
                elif ini['BEAMER']['data_eto_units'] == 'in':
                    wy_eto_input *= 25.4
                elif ini['BEAMER']['data_eto_units'] == 'ft':
                    wy_eto_input *= (25.4 * 12)
            # This assumes GRIMET data is in millimeters
            elif ini['BEAMER']['eto_source'] == 'gridmet':
                wy_eto_input = float(
                    utils.ee_getinfo(
                        ee.ImageCollection(gridmet_coll.select(
                            ['eto']).sum()).getRegion(zone['geom'].centroid(1),
                                                      500))[1][4])
                # wy_eto_input = float(ee.ImageCollection(
                #     gridmet_coll.select(['eto'])).reduceRegion(
                #         reducer=ee.Reducer.sum(),
                #         geometry=zone['geom'],
                #         crs=ini['SPATIAL']['crs'],
                #         crsTransform=zone['transform'],
                #         bestEffort=False,
                #         tileScale=1).getInfo()
            logging.debug('  Input  ETO: {} mm  PPT: {} mm'.format(
                wy_eto_input, wy_ppt_input))

            # Scale ETo & PPT
            wy_eto_input *= ini['BEAMER']['eto_factor']
            wy_ppt_input *= ini['BEAMER']['ppt_factor']

            # Convert output units from mm
            wy_eto_output = wy_eto_input
            wy_ppt_output = wy_ppt_input
            if ini['BEAMER']['ppt_units'] == 'mm':
                pass
            elif ini['BEAMER']['ppt_units'] == 'in':
                wy_ppt_output /= 25.4
            elif ini['BEAMER']['ppt_units'] == 'ft':
                wy_ppt_output /= (25.4 * 12)
            if ini['BEAMER']['eto_units'] == 'mm':
                pass
            elif ini['BEAMER']['eto_units'] == 'in':
                wy_eto_output /= 25.4
            elif ini['BEAMER']['eto_units'] == 'ft':
                wy_eto_output /= (25.4 * 12)
            logging.debug('  Output ETO: {} {}  PPT: {} {}'.format(
                wy_eto_output, ini['BEAMER']['eto_units'], wy_ppt_output,
                ini['BEAMER']['ppt_units']))

            # Initialize the Landsat object
            landsat.zone_geom = zone['geom']
            landsat.start_date = iter_start_dt.strftime('%Y-%m-%d')
            landsat.end_date = iter_end_dt.strftime('%Y-%m-%d')
            landsat_coll = landsat.get_collection()
            if ee.Image(landsat_coll.first()).getInfo() is None:
                logging.info('    No images, skipping')
                continue

            # # Print the collection SCENE_ID list
            # logging.debug('{}'.format(', '.join([
            #     f['properties']['SCENE_ID']
            #     for f in landsat_coll.getInfo()['features']])))
            # input('ENTER')

            # Add water year ETo and PPT values to each image
            def eto_ppt_func(img):
                """"""
                return ee.Image(img).setMulti({
                    'wy_eto': wy_eto_input,
                    'wy_ppt': wy_ppt_input
                })

            landsat_coll = ee.ImageCollection(landsat_coll.map(eto_ppt_func))

            # Compute ETg
            image_coll = ee.ImageCollection(landsat_coll.map(landsat_etg_func))

            # # Get the output image URL
            # output_url = ee.Image(landsat_coll.first()) \
            #     .select(['red', 'green', 'blue']) \
            #     .visualize(min=[0, 0, 0], max=[0.4, 0.4, 0.4]) \
            #     .getThumbUrl({'format': 'png', 'size': '600'})
            # # This would load the image in your browser
            # import webbrowser
            # webbrowser.open(output_url)
            # # webbrowser.read(output_url)

            # # Show the output image
            # window = tk.Tk()
            # output_file = Image.open(io.BytesIO(urllib.urlopen(output_url).read()))
            # output_photo = ImageTk.PhotoImage(output_file)
            # label = tk.Label(window, image=output_photo)
            # label.pack()
            # window.mainloop()

            # Compute zonal stats of polygon
            def beamer_zonal_stats_func(input_image):
                """"""
                # Beamer function adds 5 ETg and 1 ET* band
                # Landsat collection adds 3 ancillary bands
                bands = len(landsat_args['products']) + 3 + 6

                # .clip(zone['geom']) \
                input_mean = input_image \
                    .reduceRegion(
                        reducer=ee.Reducer.mean(),
                        geometry=zone['geom'],
                        crs=ini['SPATIAL']['crs'],
                        crsTransform=zone['transform'],
                        bestEffort=False,
                        tileScale=1,
                        maxPixels=zone['max_pixels'] * bands)

                fmask_img = input_image.select(['fmask'])
                input_count = fmask_img.gt(1) \
                    .addBands(fmask_img.gte(0).unmask()) \
                    .rename(['fmask', 'pixel']) \
                    .reduceRegion(
                        reducer=ee.Reducer.sum().combine(
                            ee.Reducer.count(), '', True),
                        geometry=zone['geom'],
                        crs=ini['SPATIAL']['crs'],
                        crsTransform=zone['transform'],
                        bestEffort=False,
                        tileScale=1,
                        maxPixels=zone['max_pixels'] * 3)

                etstar_count = input_image \
                    .select(['etstar_mean'], ['etstar_count']) \
                    .lte(ini['BEAMER']['etstar_threshold']) \
                    .reduceRegion(
                        reducer=ee.Reducer.sum(),
                        geometry=zone['geom'],
                        crs=ini['SPATIAL']['crs'],
                        crsTransform=zone['transform'],
                        bestEffort=False,
                        tileScale=1,
                        maxPixels=zone['max_pixels'] * 2)

                # Save as image properties
                return ee.Feature(
                    None, {
                        'scene_id': ee.String(input_image.get('SCENE_ID')),
                        'time': input_image.get('system:time_start'),
                        'row': input_mean.get('row'),
                        'pixel_count': input_count.get('pixel_sum'),
                        'pixel_total': input_count.get('pixel_count'),
                        'fmask_count': input_count.get('fmask_sum'),
                        'fmask_total': input_count.get('fmask_count'),
                        'cloud_score': input_mean.get('cloud_score'),
                        'etstar_count': etstar_count.get('etstar_count'),
                        'ndvi_toa': input_mean.get('ndvi_toa'),
                        'ndwi_toa': input_mean.get('ndwi_toa'),
                        'albedo_sur': input_mean.get('albedo_sur'),
                        'ts': input_mean.get('ts'),
                        'evi_sur': input_mean.get('evi_sur'),
                        'etstar_mean': input_mean.get('etstar_mean'),
                        'etg_mean': input_mean.get('etg_mean'),
                        'etg_lpi': input_mean.get('etg_lpi'),
                        'etg_upi': input_mean.get('etg_upi'),
                        'etg_lci': input_mean.get('etg_lci'),
                        'etg_uci': input_mean.get('etg_uci')
                    })

            # Calculate values and statistics
            stats_coll = ee.ImageCollection(
                image_coll.map(beamer_zonal_stats_func))

            # # DEADBEEF - Test the function for a single image
            # stats_info = beamer_zonal_stats_func(
            #     ee.Image(image_coll.first())).getInfo()
            # print(stats_info)
            # for k, v in sorted(stats_info['properties'].items()):
            #     logging.info('{:24s}: {}'.format(k, v))
            # input('ENTER')
            # return False

            # # DEADBEEF - Print the stats info to the screen
            # stats_info = stats_coll.getInfo()
            # import pprint
            # pp = pprint.PrettyPrinter(indent=4)
            # for ftr in stats_info['features']:
            #     pp.pprint(ftr)
            # input('ENTER')
            # # return False

            # Get the values from EE
            stats_desc = utils.ee_getinfo(stats_coll)
            if stats_desc is None:
                logging.error('  Timeout error, skipping')
                continue

            # Save data for writing
            row_list = []
            for ftr in stats_desc['features']:
                try:
                    count = int(ftr['properties']['pixel_count'])
                except (KeyError, TypeError) as e:
                    # logging.debug('  Exception: {}'.format(e))
                    continue
                if count == 0:
                    logging.info('  COUNT: 0, skipping')
                    continue

                # First get scene ID and time
                try:
                    scene_id = landsat_re.findall(
                        ftr['properties']['scene_id'])[0]
                    scene_time = datetime.datetime.utcfromtimestamp(
                        float(ftr['properties']['time']) / 1000)
                except:
                    pp = pprint.PrettyPrinter(indent=4)
                    pp.pprint(ftr)
                    input('ENTER')

                # Extract and save other properties
                try:
                    row_list.append({
                        'ZONE_FID':
                        zone_fid,
                        'ZONE_NAME':
                        zone_name,
                        'SCENE_ID':
                        scene_id,
                        'PLATFORM':
                        scene_id[0:4],
                        'PATH':
                        int(scene_id[5:8]),
                        'ROW':
                        int(ftr['properties']['row']),
                        # 'ROW': int(scene_id[8:11]),
                        'DATE':
                        scene_time.date().isoformat(),
                        'YEAR':
                        int(scene_time.year),
                        'MONTH':
                        int(scene_time.month),
                        'DAY':
                        int(scene_time.day),
                        'DOY':
                        int(scene_time.strftime('%j')),
                        'PIXEL_COUNT':
                        int(ftr['properties']['pixel_count']),
                        'PIXEL_TOTAL':
                        int(ftr['properties']['pixel_total']),
                        'FMASK_COUNT':
                        int(ftr['properties']['fmask_count']),
                        'FMASK_TOTAL':
                        int(ftr['properties']['fmask_total']),
                        'CLOUD_SCORE':
                        float(ftr['properties']['cloud_score']),
                        'ETSTAR_COUNT':
                        int(ftr['properties']['etstar_count']),
                        'NDVI_TOA':
                        float(ftr['properties']['ndvi_toa']),
                        'NDWI_TOA':
                        float(ftr['properties']['ndwi_toa']),
                        'ALBEDO_SUR':
                        float(ftr['properties']['albedo_sur']),
                        'TS':
                        float(ftr['properties']['ts']),
                        'EVI_SUR':
                        float(ftr['properties']['evi_sur']),
                        'ETSTAR_MEAN':
                        float(ftr['properties']['etstar_mean']),
                        'ETG_MEAN':
                        float(ftr['properties']['etg_mean']),
                        'ETG_LPI':
                        float(ftr['properties']['etg_lpi']),
                        'ETG_UPI':
                        float(ftr['properties']['etg_upi']),
                        'ETG_LCI':
                        float(ftr['properties']['etg_lci']),
                        'ETG_UCI':
                        float(ftr['properties']['etg_uci']),
                        'WY_ETO':
                        wy_eto_output,
                        'WY_PPT':
                        wy_ppt_output
                    })
                except (KeyError, TypeError) as e:
                    logging.info('  ERROR: {}\n  SCENE_ID: {}\n  '
                                 '  There may not be an SR image to join to\n'
                                 '  {}'.format(e, scene_id, ftr['properties']))
                    # input('ENTER')

            # Save all values to the dataframe (and export)
            if row_list:
                logging.debug('  Appending')
                data_df = data_df.append(row_list, ignore_index=True)

                # DEADBEEF
                if data_df['QA'].isnull().any():
                    data_df.loc[data_df['QA'].isnull(), 'QA'] = 0
                fmask_mask = data_df['FMASK_TOTAL'] > 0
                if fmask_mask.any():
                    data_df.loc[fmask_mask, 'FMASK_PCT'] = 100.0 * (
                        data_df.loc[fmask_mask, 'FMASK_COUNT'] /
                        data_df.loc[fmask_mask, 'FMASK_TOTAL'])

                logging.debug('  Saving')
                data_df[int_fields] = data_df[int_fields].astype(np.int64)
                data_df[float_fields] = data_df[float_fields].astype(
                    np.float32)

                # Compute ET from ETg and PPT offline
                # (must be after float conversion above)
                data_df['ET_MEAN'] = data_df['ETG_MEAN'] + data_df['WY_PPT']
                data_df['ET_LPI'] = data_df['ETG_LPI'] + data_df['WY_PPT']
                data_df['ET_UPI'] = data_df['ETG_UPI'] + data_df['WY_PPT']
                data_df['ET_LCI'] = data_df['ETG_LCI'] + data_df['WY_PPT']
                data_df['ET_UCI'] = data_df['ETG_UCI'] + data_df['WY_PPT']

                # Convert float fields to objects, set NaN to None
                for field in data_df.columns.values:
                    if field.upper() not in float_fields:
                        continue
                    data_df[field] = data_df[field].astype(object)
                    null_mask = data_df[field].isnull()
                    data_df.loc[null_mask, field] = None
                    data_df.loc[~null_mask,
                                field] = data_df.loc[~null_mask, field].map(
                                    lambda x: '{0:10.6f}'.format(x).strip())
                    # data_df.loc[~null_mask, [field]] = data_df.loc[~null_mask, [field]].apply(
                    #     lambda x: '{0:10.6f}'.format(x[0]).strip(), axis=1)

                # data_df = data_df.reindex_axis(header_list, axis=1)
                data_df = data_df.reindex(header_list, axis=1)
                # data_df.reset_index(drop=False, inplace=True)
                data_df.sort_values(['ZONE_FID', 'DATE', 'ROW'],
                                    ascending=True,
                                    inplace=True)
                # data_df.sort(
                #     ['ZONE_NAME', 'DATE'], ascending=[True, True], inplace=True)
                data_df.to_csv(output_path, index=False)
            del row_list
def main(ini_path, overwrite_flag=True):
    """Generate Beamer ETg summary tables

    Args:
        ini_path (str):
        overwrite_flag (bool): if True, overwrite existing tables
            Default is True (for now)
    """

    logging.info('\nGenerate Beamer ETg summary tables')

    # # Eventually get from INI (like ini['BEAMER']['landsat_products'])
    # daily_fields = [
    #     'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW',
    #     'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR',
    #     'PIXEL_COUNT', 'ETSTAR_COUNT',
    #     'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN',
    #     'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
    #     'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
    #     'ETO', 'PPT']
    # annual_fields = [
    #     'SCENE_COUNT', 'PIXEL_COUNT', 'ETSTAR_COUNT',
    #     'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS',
    #     'EVI_SUR_MEAN', 'EVI_SUR_MEDIAN', 'EVI_SUR_MIN', 'EVI_SUR_MAX',
    #     'ETSTAR_MEAN',
    #     'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
    #     'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
    #     'ETO', 'PPT']

    # For unit conversion
    eto_fields = [
        'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN',
        'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'ETO'
    ]
    ppt_fields = ['PPT']

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='BEAMER')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='TABLES')

    # Hardcode GRIDMET month range to the water year
    ini['SUMMARY']['gridmet_start_month'] = 10
    ini['SUMMARY']['gridmet_end_month'] = 9

    # Output paths
    output_daily_path = os.path.join(
        ini['SUMMARY']['output_ws'],
        ini['BEAMER']['output_name'].replace('.csv', '_daily.xlsx'))
    output_annual_path = os.path.join(
        ini['SUMMARY']['output_ws'],
        ini['BEAMER']['output_name'].replace('.csv', '_annual.xlsx'))

    # Check if files already exist
    if overwrite_flag:
        if os.path.isfile(output_daily_path):
            os.remove(output_daily_path)
        if os.path.isfile(output_annual_path):
            os.remove(output_annual_path)
    else:
        if (os.path.isfile(output_daily_path)
                and os.path.isfile(output_annual_path)):
            logging.info('\nOutput files already exist and '
                         'overwrite is False, exiting')
            return True

    # Start/end year
    year_list = list(
        range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(
        utils.month_range(gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(', '.join(
        map(str, gridmet_months))))

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # # Filter features by FID before merging geometries
    # if ini['INPUTS']['fid_keep_list']:
    #     landsat_df = landsat_df[landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_keep_list'])]
    # if ini['INPUTS']['fid_skip_list']:
    #     landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_skip_list'])]

    logging.info('\nProcessing zones')
    zone_df_dict = {}
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        zone_stats_ws = os.path.join(ini['ZONAL_STATS']['output_ws'],
                                     zone_name)
        if not os.path.isdir(zone_stats_ws):
            logging.debug(
                '  Folder {} does not exist, skipping'.format(zone_stats_ws))
            continue

        # Input paths
        landsat_daily_path = os.path.join(
            zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name))
        gridmet_daily_path = os.path.join(
            zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name))
        gridmet_monthly_path = os.path.join(
            zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name))
        if not os.path.isfile(landsat_daily_path):
            logging.error('  Landsat daily CSV does not exist, skipping zone')
            continue
        elif (not os.path.isfile(gridmet_daily_path)
              and not os.path.isfile(gridmet_monthly_path)):
            logging.error(
                '  GRIDMET daily or monthly CSV does not exist, skipping zone')
            continue
            # DEADBEEF - Eventually support generating only Landsat figures
            # logging.error(
            #     '  GRIDMET daily and/or monthly CSV files do not exist.\n'
            #     '  ETo and PPT will not be processed.')

        logging.debug('  Reading Landsat CSV')
        landsat_df = pd.read_csv(landsat_daily_path)

        logging.debug('  Filtering Landsat dataframe')
        landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0]

        # QA field should have been written in zonal stats code
        # Eventually this block can be removed
        if 'QA' not in landsat_df.columns.values:
            landsat_df['QA'] = 0

        # # This assumes that there are L5/L8 images in the dataframe
        # if not landsat_df.empty:
        #     max_pixel_count = max(landsat_df['PIXEL_COUNT'])
        # else:
        #     max_pixel_count = 0

        if year_list:
            landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)]
        if month_list:
            landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)]
        if doy_list:
            landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)]

        # Assume the default is for these to be True and only filter if False
        if not ini['INPUTS']['landsat4_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04']
        if not ini['INPUTS']['landsat5_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05']
        if not ini['INPUTS']['landsat7_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07']
        if not ini['INPUTS']['landsat8_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08']

        if ini['INPUTS']['path_keep_list']:
            landsat_df = landsat_df[landsat_df['PATH'].isin(
                ini['INPUTS']['path_keep_list'])]
        if (ini['INPUTS']['row_keep_list']
                and ini['INPUTS']['row_keep_list'] != ['XXX']):
            landsat_df = landsat_df[landsat_df['ROW'].isin(
                ini['INPUTS']['row_keep_list'])]

        if ini['INPUTS']['scene_id_keep_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[scene_id_df.isin(
                ini['INPUTS']['scene_id_keep_list']).values]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_keep_list'])]
        if ini['INPUTS']['scene_id_skip_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[np.logical_not(
                scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_skip_list']))]

        # Filter by QA/QC value
        if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty:
            logging.debug('    Maximum QA: {0}'.format(
                ini['SUMMARY']['max_qa']))
            landsat_df = landsat_df[
                landsat_df['QA'] <= ini['SUMMARY']['max_qa']]

        # Filter by average cloud score
        if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty:
            logging.debug('    Maximum cloud score: {0}'.format(
                ini['SUMMARY']['max_cloud_score']))
            landsat_df = landsat_df[
                landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

        # Filter by Fmask percentage
        if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty:
            landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] /
                                             landsat_df['FMASK_TOTAL'])
            logging.debug('    Max Fmask threshold: {}'.format(
                ini['SUMMARY']['max_fmask_pct']))
            landsat_df = landsat_df[
                landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

        # Filter low count SLC-off images
        if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty:
            logging.debug('    Mininum SLC-off threshold: {}%'.format(
                ini['SUMMARY']['min_slc_off_pct']))
            # logging.debug('    Maximum pixel count: {}'.format(
            #     max_pixel_count))
            slc_off_mask = ((landsat_df['PLATFORM'] == 'LE07') &
                            ((landsat_df['YEAR'] >= 2004) |
                             ((landsat_df['YEAR'] == 2003) &
                              (landsat_df['DOY'] > 151))))
            slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] /
                                 landsat_df['PIXEL_TOTAL'])
            # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count)
            landsat_df = landsat_df[(
                (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                & slc_off_mask) | (~slc_off_mask)]

        if landsat_df.empty:
            logging.error(
                '  Empty Landsat dataframe after filtering, skipping zone')
            continue

        # Aggregate GRIDMET (to water year)
        if os.path.isfile(gridmet_monthly_path):
            logging.debug('  Reading montly GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_monthly_path)
        elif os.path.isfile(gridmet_daily_path):
            logging.debug('  Reading daily GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_daily_path)

        logging.debug('  Computing GRIDMET summaries')
        # Summarize GRIDMET for target months year
        if (gridmet_start_month in [10, 11, 12]
                and gridmet_end_month in [10, 11, 12]):
            month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) &
                          (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
        elif (gridmet_start_month in [10, 11, 12]
              and gridmet_end_month not in [10, 11, 12]):
            month_mask = gridmet_df['MONTH'] >= gridmet_start_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
            month_mask = gridmet_df['MONTH'] <= gridmet_end_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        else:
            month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) &
                          (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        # GROUP_YEAR for rows not in the GRIDMET month range will be NAN
        gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])]

        if year_list:
            gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)]

        if gridmet_df.empty:
            logging.error(
                '    Empty GRIDMET dataframe after filtering by year')
            continue

        # Group GRIDMET data by user specified range (default is water year)
        gridmet_group_df = gridmet_df \
            .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR']) \
            .agg({'ETO': np.sum, 'PPT': np.sum}) \
            .reset_index() \
            .sort_values(by='GROUP_YEAR')
        # .rename(columns={'ETO': 'ETO', 'PPT': 'PPT'}) \
        # Rename wasn't working when chained...
        gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        gridmet_group_df['YEAR'] = gridmet_group_df['YEAR'].astype(int)

        # # Group GRIDMET data by month
        # gridmet_month_df = gridmet_df\
        #     .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR', 'MONTH']) \
        #     .agg({'ETO': np.sum, 'PPT': np.sum}) \
        #     .reset_index() \
        #     .sort_values(by=['GROUP_YEAR', 'MONTH'])
        # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        # # Rename monthly PPT columns
        # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str)
        # # Pivot rows up to separate columns
        # gridmet_month_df = gridmet_month_df.pivot_table(
        #     'PPT', ['ZONE_NAME', 'YEAR'], 'MONTH')
        # gridmet_month_df.reset_index(inplace=True)
        # columns = ['ZONE_NAME', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months]
        # gridmet_month_df = gridmet_month_df[columns]
        # del gridmet_month_df.index.name

        # Merge Landsat and GRIDMET collections
        zone_df = landsat_df.merge(gridmet_group_df,
                                   on=['ZONE_NAME', 'ZONE_FID', 'YEAR'])
        if zone_df is None or zone_df.empty:
            logging.info('  Empty zone dataframe, not generating figures')
            continue

        # Compute ETg
        zone_df['ETG_MEAN'] = zone_df['ETSTAR_MEAN'] * (zone_df['ETO'] -
                                                        zone_df['PPT'])
        zone_df['ETG_LPI'] = zone_df['ETSTAR_LPI'] * (zone_df['ETO'] -
                                                      zone_df['PPT'])
        zone_df['ETG_UPI'] = zone_df['ETSTAR_UPI'] * (zone_df['ETO'] -
                                                      zone_df['PPT'])
        zone_df['ETG_LCI'] = zone_df['ETSTAR_LCI'] * (zone_df['ETO'] -
                                                      zone_df['PPT'])
        zone_df['ETG_UCI'] = zone_df['ETSTAR_UCI'] * (zone_df['ETO'] -
                                                      zone_df['PPT'])

        # Compute ET
        zone_df['ET_MEAN'] = zone_df['ETG_MEAN'] + zone_df['PPT']
        zone_df['ET_LPI'] = zone_df['ETG_LPI'] + zone_df['PPT']
        zone_df['ET_UPI'] = zone_df['ETG_UPI'] + zone_df['PPT']
        zone_df['ET_LCI'] = zone_df['ETG_LCI'] + zone_df['PPT']
        zone_df['ET_UCI'] = zone_df['ETG_UCI'] + zone_df['PPT']

        # Append zone dataframes
        zone_df_dict[zone_name] = zone_df

    # Export each zone to a separate tab
    if not os.path.isfile(output_daily_path):
        logging.info('\nWriting daily values to Excel')
        excel_f = ExcelWriter(output_daily_path)
        for zone_name, zone_df in sorted(zone_df_dict.items()):
            logging.info('  {}'.format(zone_name))
            zone_df.to_excel(excel_f,
                             zone_name,
                             index=False,
                             float_format='%.4f')
            # zone_df.to_excel(excel_f, zone_name, index=False)
            del zone_df
        excel_f.save()

    if not os.path.isfile(output_annual_path):
        logging.info('\nComputing annual summaries')
        annual_df = pd.concat(list(zone_df_dict.values())) \
            .groupby(['ZONE_NAME', 'YEAR']) \
            .agg({
                'PIXEL_COUNT': ['count', 'mean'],
                'PIXEL_TOTAL': ['mean'],
                'FMASK_COUNT': 'mean',
                'FMASK_TOTAL': 'mean',
                'CLOUD_SCORE': 'mean',
                'ETSTAR_COUNT': 'mean',
                'NDVI_TOA': 'mean',
                'NDWI_TOA': 'mean',
                'ALBEDO_SUR': 'mean',
                'TS': 'mean',
                # 'EVI_SUR': 'mean',
                'EVI_SUR': ['mean', 'median', 'min', 'max'],
                'ETSTAR_MEAN': 'mean',
                'ETG_MEAN': 'mean',
                'ETG_LPI': 'mean',
                'ETG_UPI': 'mean',
                'ETG_LCI': 'mean',
                'ETG_UCI': 'mean',
                'ET_MEAN': 'mean',
                'ET_LPI': 'mean',
                'ET_UPI': 'mean',
                'ET_LCI': 'mean',
                'ET_UCI': 'mean',
                'ETO': 'mean',
                'PPT': 'mean'
            })
        annual_df.columns = annual_df.columns.map('_'.join)
        annual_df = annual_df.rename(columns={
            'PIXEL_COUNT_count': 'SCENE_COUNT',
            'PIXEL_COUNT_mean': 'PIXEL_COUNT'
        })
        annual_df = annual_df.rename(
            columns={
                'EVI_SUR_mean': 'EVI_SUR_MEAN',
                'EVI_SUR_median': 'EVI_SUR_MEDIAN',
                'EVI_SUR_min': 'EVI_SUR_MIN',
                'EVI_SUR_max': 'EVI_SUR_MAX'
            })
        annual_df.rename(columns=lambda x: str(x).replace('_mean', ''),
                         inplace=True)
        annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int)
        annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int)
        annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int)
        annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int)
        annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int)
        annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int)
        annual_df = annual_df.reset_index()

        # Convert ETo units
        if (ini['BEAMER']['eto_units'] == 'mm'
                and ini['TABLES']['eto_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['eto_units'] == 'mm'
              and ini['TABLES']['eto_units'] == 'in'):
            annual_df[eto_fields] /= (25.4)
        elif (ini['BEAMER']['eto_units'] == 'mm'
              and ini['TABLES']['eto_units'] == 'ft'):
            annual_df[eto_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                     ini['BEAMER']['eto_units'], ini['TABLES']['eto_units']))
            sys.exit()

        # Convert PPT units
        if (ini['BEAMER']['ppt_units'] == 'mm'
                and ini['TABLES']['ppt_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['ppt_units'] == 'mm'
              and ini['TABLES']['ppt_units'] == 'in'):
            annual_df[ppt_fields] /= (25.4)
        elif (ini['BEAMER']['ppt_units'] == 'mm'
              and ini['TABLES']['ppt_units'] == 'ft'):
            annual_df[ppt_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                     ini['BEAMER']['ppt_units'], ini['TABLES']['ppt_units']))
            sys.exit()

        logging.info('\nWriting annual values to Excel')
        excel_f = ExcelWriter(output_annual_path)
        for zone_name in sorted(zone_df_dict.keys()):
            logging.info('  {}'.format(zone_name))
            zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name]
            zone_df.to_excel(excel_f,
                             zone_name,
                             index=False,
                             float_format='%.4f')
            del zone_df
        excel_f.save()
def main(ini_path, show_flag=False, overwrite_flag=True):
    """Generate Bokeh figures

    Bokeh issues:
    Adjust y range based on non-muted data
        https://stackoverflow.com/questions/43620837/how-to-get-bokeh-to-dynamically-adjust-y-range-when-panning
    Linked interactive legends so that there is only one legend for the gridplot
    Maybe hide or mute QA values above max (instead of filtering them in advance)

    Args:
        ini_path (str):
        show_flag (bool): if True, show the figures in the browser.
            Default is False.
        overwrite_flag (bool): if True, overwrite existing tables.
            Default is True (for now)
    """
    logging.info('\nGenerate interactive timeseries figures')

    # Eventually read from INI
    plot_var_list = ['NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', 'EVI_SUR']
    # plot_var_list = [
    #     'NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA',
    #     'CLOUD_SCORE', 'FMASK_PCT']
    output_folder = 'figures'

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='FIGURES')
    inputs.parse_section(ini, section='BEAMER')

    # Output paths
    output_ws = os.path.join(ini['SUMMARY']['output_ws'], output_folder)
    if not os.path.isdir(output_ws):
        os.makedirs(output_ws)

    # Start/end year
    year_list = list(
        range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(
        utils.month_range(gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(', '.join(
        map(str, gridmet_months))))

    # Read in the zonal stats CSV
    logging.debug('  Reading zonal stats CSV file')
    input_df = pd.read_csv(
        os.path.join(ini['ZONAL_STATS']['output_ws'],
                     ini['BEAMER']['output_name']))
    logging.debug(input_df.head())

    logging.debug('  Filtering Landsat dataframe')
    input_df = input_df[input_df['PIXEL_COUNT'] > 0]

    # # This assumes that there are L5/L8 images in the dataframe
    # if not input_df.empty:
    #     max_pixel_count = max(input_df['PIXEL_COUNT'])
    # else:
    #     max_pixel_count = 0

    if ini['INPUTS']['fid_keep_list']:
        input_df = input_df[input_df['ZONE_FID'].isin(
            ini['INPUTS']['fid_keep_list'])]
    if ini['INPUTS']['fid_skip_list']:
        input_df = input_df[~input_df['ZONE_FID'].
                            isin(ini['INPUTS']['fid_skip_list'])]

    if year_list:
        input_df = input_df[input_df['YEAR'].isin(year_list)]
    if month_list:
        input_df = input_df[input_df['MONTH'].isin(month_list)]
    if doy_list:
        input_df = input_df[input_df['DOY'].isin(doy_list)]

    if ini['INPUTS']['path_keep_list']:
        input_df = input_df[input_df['PATH'].isin(
            ini['INPUTS']['path_keep_list'])]
    if (ini['INPUTS']['row_keep_list']
            and ini['INPUTS']['row_keep_list'] != ['XXX']):
        input_df = input_df[input_df['ROW'].isin(
            ini['INPUTS']['row_keep_list'])]

    # Assume the default is for these to be True and only filter if False
    if not ini['INPUTS']['landsat4_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT04']
    if not ini['INPUTS']['landsat5_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT05']
    if not ini['INPUTS']['landsat7_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LE07']
    if not ini['INPUTS']['landsat8_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LC08']

    if ini['INPUTS']['scene_id_keep_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])
        ])
        input_df = input_df[scene_id_df.isin(
            ini['INPUTS']['scene_id_keep_list']).values]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_keep_list'])]
    if ini['INPUTS']['scene_id_skip_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])
        ])
        input_df = input_df[np.logical_not(
            scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[~input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_skip_list'])]

    # Filter by QA/QC value
    if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty:
        logging.debug('    Maximum QA: {0}'.format(ini['SUMMARY']['max_qa']))
        input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']]

    # First filter by average cloud score
    if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty:
        logging.debug('    Maximum cloud score: {0}'.format(
            ini['SUMMARY']['max_cloud_score']))
        input_df = input_df[
            input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

    # Filter by Fmask percentage
    if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty:
        input_df['FMASK_PCT'] = 100 * (input_df['FMASK_COUNT'] /
                                       input_df['FMASK_TOTAL'])
        logging.debug('    Max Fmask threshold: {}'.format(
            ini['SUMMARY']['max_fmask_pct']))
        input_df = input_df[
            input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

    # Filter low count SLC-off images
    if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty:
        logging.debug('    Mininum SLC-off threshold: {}%'.format(
            ini['SUMMARY']['min_slc_off_pct']))
        # logging.debug('    Maximum pixel count: {}'.format(
        #     max_pixel_count))
        slc_off_mask = ((input_df['PLATFORM'] == 'LE07') &
                        ((input_df['YEAR'] >= 2004) |
                         ((input_df['YEAR'] == 2003) &
                          (input_df['DOY'] > 151))))
        slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL'])
        # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count)
        input_df = input_df[((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                             & slc_off_mask) | (~slc_off_mask)]

    if input_df.empty:
        logging.error('  Empty dataframe after filtering, exiting')
        return False

    # Process each zone separately
    logging.debug(input_df.head())
    zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values)))
    for zone_name in zone_name_list:
        logging.info('ZONE: {}'.format(zone_name))
        # The names are currently stored in the CSV as spaces
        zone_output_name = zone_name.replace(' ', '_')
        zone_df = input_df[input_df['ZONE_NAME'] == zone_name]
        if zone_df.empty:
            logging.info('  Empty zone dataframe, skipping zone')
            continue

        # Output file paths
        output_doy_path = os.path.join(
            output_ws, '{}_timeseries_doy.html'.format(zone_output_name))
        output_date_path = os.path.join(
            output_ws, '{}_timeseries_date.html'.format(zone_output_name))

        # # Check for QA field
        # if 'QA' not in zone_df.columns.values:
        #     # logging.warning(
        #     #     '  WARNING: QA field not present in CSV\n'
        #     #     '  To compute QA/QC values, please run "ee_summary_qaqc.py"\n'
        #     #     '  Script will continue with no QA/QC values')
        #     zone_df['QA'] = 0
        #     # raw_input('ENTER')
        #     # logging.error(
        #     #     '\nPlease run the "ee_summary_qaqc.py" script '
        #     #     'to compute QA/QC values\n')
        #     # sys.exit()

        # Check that plot variables are present
        for plot_var in plot_var_list:
            if plot_var not in zone_df.columns.values:
                logging.error('  The variable {} does not exist in the '
                              'dataframe'.format(plot_var))
                sys.exit()

        # if ini['INPUTS']['scene_id_keep_list']:
        #     # Replace XXX with primary ROW value for checking skip list SCENE_ID
        #     scene_id_df = pd.Series([
        #         s.replace('XXX', '{:03d}'.format(int(r)))
        #         for s, r in zip(zone_df['SCENE_ID'], zone_df['ROW'])])
        #     zone_df = zone_df[scene_id_df.isin(
        #         ini['INPUTS']['scene_id_keep_list']).values]
        #     # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        #     # zone_df = zone_df[zone_df['SCENE_ID'].isin(
        #     #     ini['INPUTS']['scene_id_keep_list'])]
        # if ini['INPUTS']['scene_id_skip_list']:
        #     # Replace XXX with primary ROW value for checking skip list SCENE_ID
        #     scene_id_df = pd.Series([
        #         s.replace('XXX', '{:03d}'.format(int(r)))
        #         for s, r in zip(zone_df['SCENE_ID'], zone_df['ROW'])])
        #     zone_df = zone_df[np.logical_not(scene_id_df.isin(
        #         ini['INPUTS']['scene_id_skip_list']).values)]
        #     # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        #     # zone_df = zone_df[np.logical_not(zone_df['SCENE_ID'].isin(
        #     #     ini['INPUTS']['scene_id_skip_list']))]

        # Compute colors for each QA value
        logging.debug('  Building column data source')
        qa_values = sorted(list(set(zone_df['QA'].values)))
        colors = {
            qa: "#%02x%02x%02x" % (int(r), int(g), int(b))
            for qa, (
                r, g, b,
                _) in zip(qa_values, 255 *
                          cm.viridis(mpl.colors.Normalize()(qa_values)))
        }
        logging.debug('  QA values: {}'.format(', '.join(map(str, qa_values))))

        # Unpack the data by QA type to support interactive legends
        sources = dict()
        for qa_value in qa_values:
            qa_df = zone_df[zone_df['QA'] == qa_value]
            qa_data = {
                'INDEX':
                list(range(len(qa_df.index))),
                'PLATFORM':
                qa_df['PLATFORM'],
                'DATE':
                pd.to_datetime(qa_df['DATE']),
                'DATE_STR':
                pd.to_datetime(
                    qa_df['DATE']).map(lambda x: x.strftime('%Y-%m-%d')),
                'DOY':
                qa_df['DOY'].values,
                'QA':
                qa_df['QA'].values,
                'COLOR': [colors[qa] for qa in qa_df['QA'].values]
            }
            for plot_var in plot_var_list:
                if plot_var in qa_df.columns.values:
                    qa_data.update({plot_var: qa_df[plot_var].values})
            sources[qa_value] = bokeh.models.ColumnDataSource(qa_data)

        tooltips = [("LANDSAT", "@PLATFORM"), ("DATE", "@TIME"),
                    ("DOY", "@DOY")]

        # Selection
        hover_circle = Circle(fill_color='#ff0000', line_color='#ff0000')
        selected_circle = Circle(fill_color='COLOR', line_color='COLOR')
        nonselected_circle = Circle(fill_color='#aaaaaa', line_color='#aaaaaa')

        # Plot the data by DOY
        logging.debug('  Building DOY timeseries figure')
        if os.path.isfile(output_doy_path):
            os.remove(output_doy_path)
        output_file(output_doy_path, title=zone_name)

        figure_args = dict(
            plot_width=750,
            plot_height=250,
            title=None,
            tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select",
            # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap",
            active_scroll="xwheel_zoom")
        plot_args = dict(size=4, alpha=0.9, color='COLOR')
        if ini['SUMMARY']['max_qa'] > 0:
            plot_args['legend'] = 'QA'

        figures = []
        for plot_i, plot_var in enumerate(plot_var_list):
            if plot_i == 0:
                f = figure(
                    # x_range=Range1d(1, 366, bounds=(1, 366)),
                    y_axis_label=plot_var,
                    **figure_args)
            else:
                f = figure(x_range=f.x_range,
                           y_axis_label=plot_var,
                           **figure_args)

            for qa, source in sorted(sources.items()):
                r = f.circle('DOY', plot_var, source=source, **plot_args)
                r.hover_glyph = hover_circle
                r.selection_glyph = selected_circle
                r.nonselection_glyph = nonselected_circle
                r.muted_glyph = nonselected_circle

                # DEADBEEF - This will display high QA points as muted
                # if qa > ini['SUMMARY']['max_qa']:
                #     r.muted = True
                #     # r.visible = False

            f.add_tools(bokeh.models.HoverTool(tooltips=tooltips))

            # if ini['SUMMARY']['max_qa'] > 0:
            f.legend.location = "top_left"
            f.legend.click_policy = "hide"
            # f.legend.click_policy = "mute"
            f.legend.orientation = "horizontal"

            figures.append(f)

        # Try to not allow more than 4 plots in a column
        p = gridplot(figures,
                     ncols=len(plot_var_list) // 3,
                     sizing_mode='stretch_both')

        if show_flag:
            show(p)
        save(p)

        # Plot the data by DATE
        logging.debug('  Building date timeseries figure')
        if os.path.isfile(output_date_path):
            os.remove(output_date_path)
        output_file(output_date_path, title=zone_name)

        figure_args = dict(
            plot_width=750,
            plot_height=250,
            title=None,
            tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select",
            # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap",
            active_scroll="xwheel_zoom",
            x_axis_type="datetime",
        )
        plot_args = dict(size=4, alpha=0.9, color='COLOR')
        if ini['SUMMARY']['max_qa'] > 0:
            plot_args['legend'] = 'QA'

        figures = []
        for plot_i, plot_var in enumerate(plot_var_list):
            if plot_i == 0:
                f = figure(
                    # x_range=Range1d(x_limit[0], x_limit[1], bounds=x_limit),
                    y_axis_label=plot_var,
                    **figure_args)
            else:
                f = figure(x_range=f.x_range,
                           y_axis_label=plot_var,
                           **figure_args)

            if plot_var == 'TS':
                f.y_range.bounds = (270, None)

            for qa, source in sorted(sources.items()):
                r = f.circle('DATE', plot_var, source=source, **plot_args)
                r.hover_glyph = hover_circle
                r.selection_glyph = selected_circle
                r.nonselection_glyph = nonselected_circle
                r.muted_glyph = nonselected_circle

                # DEADBEEF - This will display high QA points as muted
                # if qa > ini['SUMMARY']['max_qa']:
                #     r.muted = True
                #     # r.visible = False

            f.add_tools(bokeh.models.HoverTool(tooltips=tooltips))

            # if ini['SUMMARY']['max_qa'] > 0:
            f.legend.location = "top_left"
            f.legend.click_policy = "hide"
            # f.legend.click_policy = "mute"
            f.legend.orientation = "horizontal"

            figures.append(f)

        # Try to not allow more than 4 plots in a column
        p = gridplot(figures,
                     ncols=len(plot_var_list) // 3,
                     sizing_mode='stretch_both')

        if show_flag:
            show(p)
        save(p)

        # Pause after each iteration if show is True
        if show_flag:
            input('Press ENTER to continue')
def main(ini_path, show_flag=False, overwrite_flag=False):
    """Generate Beamer ETg summary figures

    Args:
        ini_path (str):
        show_flag (bool): if True, show the figures in the browser.
            Default is False.
        overwrite_flag (bool): if True, overwrite existing figures
            Default is True (for now)
    """

    logging.info('\nGenerate Beamer ETg summary figures')

    ncolors = [
        '#348ABD', '#7A68A6', '#A60628', '#467821',
        '#CF4457', '#188487', '#E24A33']
    xtick_fs = 8
    ytick_fs = 8
    xlabel_fs = 8
    ylabel_fs = 8
    ms = 2
    figsize = (3.0, 2.5)
    output_folder = 'figures'

    # For unit conversion
    eto_fields = [
        'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
        'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
        'WY_ETO']
    ppt_fields = ['WY_PPT']

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='FIGURES')
    inputs.parse_section(ini, section='BEAMER')

    # Output paths
    output_ws = os.path.join(
        ini['SUMMARY']['output_ws'], output_folder)
    if not os.path.isdir(output_ws):
        os.makedirs(output_ws)

    # Start/end year
    year_list = list(range(
        ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(utils.wrapped_range(
        ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(utils.wrapped_range(
        ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(utils.month_range(
        gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(
        ', '.join(map(str, gridmet_months))))

    # Read in the zonal stats CSV
    logging.debug('  Reading zonal stats CSV file')
    input_df = pd.read_csv(os.path.join(
        ini['ZONAL_STATS']['output_ws'], ini['BEAMER']['output_name']))
    logging.debug(input_df.head())

    logging.debug('  Filtering Landsat dataframe')
    input_df = input_df[input_df['PIXEL_COUNT'] > 0]

    # # This assumes that there are L5/L8 images in the dataframe
    # if not input_df.empty:
    #     max_pixel_count = max(input_df['PIXEL_COUNT'])
    # else:
    #     max_pixel_count = 0

    if ini['INPUTS']['fid_keep_list']:
        input_df = input_df[input_df['ZONE_FID'].isin(
            ini['INPUTS']['fid_keep_list'])]
    if ini['INPUTS']['fid_skip_list']:
        input_df = input_df[~input_df['ZONE_FID'].isin(
            ini['INPUTS']['fid_skip_list'])]

    if year_list:
        input_df = input_df[input_df['YEAR'].isin(year_list)]
    if month_list:
        input_df = input_df[input_df['MONTH'].isin(month_list)]
    if doy_list:
        input_df = input_df[input_df['DOY'].isin(doy_list)]

    if ini['INPUTS']['path_keep_list']:
        input_df = input_df[
            input_df['PATH'].isin(ini['INPUTS']['path_keep_list'])]
    if (ini['INPUTS']['row_keep_list'] and
            ini['INPUTS']['row_keep_list'] != ['XXX']):
        input_df = input_df[
            input_df['ROW'].isin(ini['INPUTS']['row_keep_list'])]

    # Assume the default is for these to be True and only filter if False
    if not ini['INPUTS']['landsat4_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT04']
    if not ini['INPUTS']['landsat5_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT05']
    if not ini['INPUTS']['landsat7_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LE07']
    if not ini['INPUTS']['landsat8_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LC08']

    if ini['INPUTS']['scene_id_keep_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])])
        input_df = input_df[scene_id_df.isin(
            ini['INPUTS']['scene_id_keep_list']).values]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_keep_list'])]
    if ini['INPUTS']['scene_id_skip_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])])
        input_df = input_df[np.logical_not(scene_id_df.isin(
            ini['INPUTS']['scene_id_skip_list']).values)]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[~input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_skip_list'])]

    # Filter by QA/QC value
    if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty:
        logging.debug('    Maximum QA: {0}'.format(
            ini['SUMMARY']['max_qa']))
        input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']]

    # First filter by average cloud score
    if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty:
        logging.debug('    Maximum cloud score: {0}'.format(
            ini['SUMMARY']['max_cloud_score']))
        input_df = input_df[
            input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

    # Filter by Fmask percentage
    if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty:
        input_df['FMASK_PCT'] = 100 * (
            input_df['FMASK_COUNT'] / input_df['FMASK_TOTAL'])
        logging.debug('    Max Fmask threshold: {}'.format(
            ini['SUMMARY']['max_fmask_pct']))
        input_df = input_df[
            input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

    # Filter low count SLC-off images
    if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty:
        logging.debug('    Mininum SLC-off threshold: {}%'.format(
            ini['SUMMARY']['min_slc_off_pct']))
        # logging.debug('    Maximum pixel count: {}'.format(
        #     max_pixel_count))
        slc_off_mask = (
            (input_df['PLATFORM'] == 'LE07') &
            ((input_df['YEAR'] >= 2004) |
             ((input_df['YEAR'] == 2003) & (input_df['DOY'] > 151))))
        slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL'])
        # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count)
        input_df = input_df[
            ((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) |
            (~slc_off_mask)]

    if input_df.empty:
        logging.error('  Empty dataframe after filtering, exiting')
        return False


    # Process each zone separately
    logging.debug(input_df.head())
    zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values)))
    for zone_name in zone_name_list:
        logging.info('ZONE: {}'.format(zone_name))
        # The names are currently stored in the CSV with spaces
        zone_output_name = zone_name.replace(' ', '_')
        zone_df = input_df[input_df['ZONE_NAME'] == zone_name]
        if zone_df.empty:
            logging.info('  Empty zone dataframe, skipping zone')
            continue


        logging.debug('  Computing annual summaries')
        annual_df = zone_df \
            .groupby(['ZONE_NAME', 'YEAR']) \
            .agg({
                'PIXEL_COUNT': ['count', 'mean'],
                'PIXEL_TOTAL': ['mean'],
                'FMASK_COUNT': 'mean',
                'FMASK_TOTAL': 'mean',
                'CLOUD_SCORE': 'mean',
                'ETSTAR_COUNT': 'mean',
                'NDVI_TOA': 'mean',
                'NDWI_TOA': 'mean',
                'ALBEDO_SUR': 'mean',
                'TS': 'mean',
                'EVI_SUR': 'mean',
                'ETSTAR_MEAN': 'mean',
                'ETG_MEAN': 'mean',
                'ETG_LPI': 'mean',
                'ETG_UPI': 'mean',
                'ETG_LCI': 'mean',
                'ETG_UCI': 'mean',
                'ET_MEAN': 'mean',
                'ET_LPI': 'mean',
                'ET_UPI': 'mean',
                'ET_LCI': 'mean',
                'ET_UCI': 'mean',
                'WY_ETO': 'mean',
                'WY_PPT': 'mean'
            })
        annual_df.columns = annual_df.columns.map('_'.join)
        annual_df = annual_df.rename(columns={
            'PIXEL_COUNT_count': 'SCENE_COUNT',
            'PIXEL_COUNT_mean': 'PIXEL_COUNT'})
        annual_df.rename(
            columns=lambda x: str(x).replace('_mean', ''), inplace=True)
        annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int)
        annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int)
        annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int)
        annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int)
        annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int)
        annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int)
        annual_df = annual_df.reset_index()

        # Convert ETo units
        if (ini['BEAMER']['eto_units'] == 'mm' and
                ini['FIGURES']['eto_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['eto_units'] == 'mm' and
                ini['FIGURES']['eto_units'] == 'in'):
            annual_df[eto_fields] /= (25.4)
        elif (ini['BEAMER']['eto_units'] == 'mm' and
                ini['FIGURES']['eto_units'] == 'ft'):
            annual_df[eto_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                    ini['BEAMER']['eto_units'], ini['FIGURES']['eto_units']))
            sys.exit()

        # Convert PPT units
        if (ini['BEAMER']['ppt_units'] == 'mm' and
                ini['FIGURES']['ppt_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['ppt_units'] == 'mm' and
                ini['FIGURES']['ppt_units'] == 'in'):
            annual_df[ppt_fields] /= (25.4)
        elif (ini['BEAMER']['ppt_units'] == 'mm' and
                ini['FIGURES']['ppt_units'] == 'ft'):
            annual_df[ppt_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not '
                 'currently supported, exiting').format(
                    ini['BEAMER']['ppt_units'], ini['FIGURES']['ppt_units']))
            sys.exit()


        logging.debug('  Generating figures')
        zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name]
        year_min, year_max = min(zone_df['YEAR']), max(zone_df['YEAR'])

        # Set default PPT min/max scaling
        ppt_min = 0
        if ini['FIGURES']['ppt_units'] == 'mm':
            ppt_max = 100 * math.ceil((max(zone_df['WY_PPT']) + 100) / 100)
        elif ini['FIGURES']['ppt_units'] == 'ft':
            ppt_max = 0.2 * math.ceil((max(zone_df['WY_PPT']) + 0.1) / 0.2)
        else:
            ppt_max = 1.2 * max(zone_df['WY_PPT'])


        logging.debug('    EVI vs PPT')
        figure_path = os.path.join(
            output_ws, '{}_evi.png'.format(zone_output_name))
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes([0.20, 0.21, 0.65, 0.75])
        ax1.set_xlabel('Year', fontsize=xlabel_fs)
        ax2 = ax1.twinx()
        ax1.plot(
            zone_df['YEAR'].values, zone_df['WY_PPT'],
            marker='o', c='0.5', ms=ms, label='WY PPT')
        ax1.yaxis.tick_right()
        ax1.yaxis.set_label_position("right")
        ax1.set_xlim([year_min - 1, year_max + 1])
        ax1.set_ylim([ppt_min, ppt_max])
        ax1.tick_params(axis='y', labelsize=ytick_fs)
        ax1.tick_params(axis='x', labelsize=xtick_fs)
        ax1.tick_params(axis='x', which='both', top='off')
        ax1.xaxis.set_minor_locator(MultipleLocator(1))
        for tick in ax1.get_xticklabels():
            tick.set_rotation(45)
            tick.set_ha('right')
        ax1.set_ylabel(
            'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']),
            fontsize=ylabel_fs)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['EVI_SUR'].values,
            marker='o', c=ncolors[0], ms=ms,
            label='EVI')
        ax1.plot(0, 0, marker='o', c=ncolors[0], ms=ms, label='EVI')
        ax2.yaxis.tick_left()
        ax2.yaxis.set_label_position("left")
        ax2.set_ylim([
            0.05 * math.floor((min(zone_df['EVI_SUR']) - 0.01) / 0.05),
            0.05 * math.ceil((max(zone_df['EVI_SUR']) + 0.01) / 0.05)])
        ax2.tick_params(axis='y', labelsize=ytick_fs)
        ax2.set_ylabel('EVI [dimensionless]', fontsize=ylabel_fs)
        ax1.legend(
            loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax1, ax2


        logging.debug('    ETo vs PPT')
        figure_path = os.path.join(
            output_ws, '{}_eto.png'.format(zone_output_name))
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75])
        ax1.set_xlabel('Year', fontsize=xlabel_fs)
        ax2 = ax1.twinx()
        ax1.plot(
            zone_df['YEAR'].values, zone_df['WY_PPT'],
            marker='o', c='0.5', ms=ms, label='WY PPT')
        ax1.yaxis.tick_right()
        ax1.yaxis.set_label_position("right")
        ax1.set_xlim([year_min - 1, year_max + 1])
        ax1.set_ylim([ppt_min, ppt_max])
        ax1.tick_params(axis='y', labelsize=ytick_fs)
        ax1.tick_params(axis='x', labelsize=xtick_fs)
        ax1.tick_params(axis='x', which='both', top='off')
        ax1.xaxis.set_minor_locator(MultipleLocator(1))
        for tick in ax1.get_xticklabels():
            tick.set_rotation(45)
            tick.set_ha('right')
        ax1.set_ylabel(
            'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']),
            fontsize=ylabel_fs)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['WY_ETO'].values,
            marker='o', c=ncolors[1], ms=ms, label='ETo')
        ax1.plot(0, 0, marker='o', c=ncolors[1], ms=ms, label='ETo')
        ax2.yaxis.tick_left()
        ax2.yaxis.set_label_position("left")
        ax2.set_ylim([
            max(0, 0.9 * min(zone_df['WY_ETO'])),
            1.1 * max(zone_df['WY_ETO'])])
        # ax2.set_ylim([
        #     max(0, 100 * math.floor((min(zone_df['WY_ETO']) - 100) / 100)),
        #     100 * math.ceil((max(zone_df['WY_ETO']) + 100) / 100)])
        ax2.tick_params(axis='y', labelsize=ytick_fs)
        ax2.set_ylabel(
            'ETo [{}/yr]'.format(ini['FIGURES']['eto_units']),
            fontsize=ylabel_fs)
        ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax1, ax2


        logging.debug('    ET vs PPT')
        figure_path = os.path.join(
            output_ws, '{}_et.png'.format(zone_output_name))
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75])
        ax1.set_xlabel('Year', fontsize=xlabel_fs)
        ax2 = ax1.twinx()
        ax1.plot(
            zone_df['YEAR'].values, zone_df['WY_PPT'],
            marker='o', c='0.5', ms=ms, label='WY PPT')
        ax1.yaxis.tick_right()
        ax1.yaxis.set_label_position("right")
        ax1.set_xlim([year_min - 1, year_max + 1])
        ax1.set_ylim([ppt_min, ppt_max])
        ax1.tick_params(axis='y', labelsize=ytick_fs)
        ax1.tick_params(axis='x', labelsize=xtick_fs)
        ax1.tick_params(axis='x', which='both', top='off')
        ax1.xaxis.set_minor_locator(MultipleLocator(1))
        for tick in ax1.get_xticklabels():
            tick.set_rotation(45)
            tick.set_ha('right')
        ax1.set_ylabel(
            'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']),
            fontsize=ylabel_fs)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ET_UCI'].values,
            marker='', c=ncolors[2], alpha=0.5, lw=0.75)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ET_LCI'].values,
            marker='', c=ncolors[2], alpha=0.5, lw=0.75)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ET_MEAN'].values,
            marker='o', c=ncolors[2], ms=ms, label='ET')
        ax1.plot(0, 0, marker='o', c=ncolors[2], ms=ms, label='ET')
        ax2.yaxis.tick_left()
        ax2.yaxis.set_label_position("left")
        ax2.set_ylim([
            max(0, 0.9 * min(zone_df['ET_LCI'])),
            1.1 * max(zone_df['ET_UCI'])])
        # ax2.set_ylim([
        #     max(0, 100 * math.floor((min(zone_df['ET_MEAN']) - 100) / 100)),
        #     100 * math.ceil((max(zone_df['ET_MEAN']) + 100) / 100)])
        ax2.tick_params(axis='y', labelsize=ytick_fs)
        ax2.set_ylabel(
            'ET [{}/yr]'.format(ini['FIGURES']['eto_units']),
            fontsize=ylabel_fs)
        ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax1, ax2


        logging.debug('    ETg vs PPT')
        figure_path = os.path.join(
            output_ws, '{}_etg.png'.format(zone_output_name))
        fig = plt.figure(figsize=figsize)
        ax1 = fig.add_axes([0.18, 0.21, 0.67, 0.75])
        ax1.set_xlabel('Year', fontsize=xlabel_fs)
        ax2 = ax1.twinx()
        ax1.plot(
            zone_df['YEAR'].values, zone_df['WY_PPT'],
            marker='o', c='0.5', ms=ms, label='WY PPT')
        ax1.yaxis.tick_right()
        ax1.yaxis.set_label_position("right")
        ax1.set_xlim([year_min - 1, year_max + 1])
        ax1.set_ylim([ppt_min, ppt_max])
        ax1.tick_params(axis='y', labelsize=ytick_fs)
        ax1.tick_params(axis='x', labelsize=xtick_fs)
        ax1.tick_params(axis='x', which='both', top='off')
        ax1.xaxis.set_minor_locator(MultipleLocator(1))
        for tick in ax1.get_xticklabels():
            tick.set_rotation(45)
            tick.set_ha('right')
        ax1.set_ylabel(
            'PPT [{}/yr]'.format(ini['FIGURES']['ppt_units']),
            fontsize=ylabel_fs)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ETG_UCI'].values,
            marker='', c=ncolors[3], alpha=0.5, lw=0.75)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ETG_LCI'].values,
            marker='', c=ncolors[3], alpha=0.5, lw=0.75)
        ax2.plot(
            zone_df['YEAR'].values, zone_df['ETG_MEAN'].values,
            marker='o', c=ncolors[3], ms=ms, label='ETg')
        ax1.plot(0, 0, marker='o', c=ncolors[3], ms=ms, label='ETg')
        ax2.yaxis.tick_left()
        ax2.yaxis.set_label_position("left")
        ax2.set_ylim([
            max(0, 0.9 * min(zone_df['ETG_LCI'])),
            1.1 * max(zone_df['ETG_UCI'])])
        # ax2.set_ylim([
        #     max(0, 100 * math.floor((min(zone_df['ETG_MEAN']) - 100) / 100)),
        #     100 * math.ceil((max(zone_df['ETG_MEAN']) + 100) / 100)])
        ax2.tick_params(axis='y', labelsize=ytick_fs)
        ax2.set_ylabel(
            'ETg [{}/yr]'.format(ini['FIGURES']['eto_units']),
            fontsize=ylabel_fs)
        ax1.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax1, ax2


        logging.debug('    Complimentary')
        figure_path = os.path.join(
            output_ws, '{}_complimentary.png'.format(zone_output_name))
        fig = plt.figure(figsize=(3, 2.5))
        ax = fig.add_axes([0.18, 0.16, 0.78, 0.80])
        # ax = fig.add_axes([0.18, 0.21, 0.67, 0.70])
        ax.plot(
            zone_df['WY_PPT'].values, zone_df['WY_ETO'].values,
            linestyle='', marker='o', c=ncolors[1], ms=3, label='ETo')
        ax.plot(
            zone_df['WY_PPT'].values, zone_df['ET_MEAN'].values,
            linestyle='', marker='o', c=ncolors[2], ms=3, label='ET')
        # xmax = 100 * math.ceil(max(zone_df['WY_PPT']) / 100)
        # ymax = 200 * math.ceil((max(zone_df['WY_ETO']) + 200) / 200)
        ax.set_xlim([ppt_min, ppt_max])
        ax.set_ylim([0, 1.2 * max(zone_df['WY_ETO'])])
        ax.tick_params(axis='y', labelsize=ytick_fs)
        ax.tick_params(axis='x', labelsize=xtick_fs)
        ax.tick_params(axis='x', which='both', top='off')
        ax.tick_params(axis='y', which='both', right='off')
        ax.set_xlabel('PPT [{}/yr]'.format(
            ini['FIGURES']['ppt_units']), fontsize=xlabel_fs)
        ax.set_ylabel('ET and ETo [{}/yr]'.format(
            ini['FIGURES']['eto_units']), fontsize=ylabel_fs)
        ax.legend(loc='upper right', frameon=False, fontsize=6, numpoints=1)
        if overwrite_flag or not os.path.isfile(figure_path):
            plt.savefig(figure_path, dpi=300)
        plt.close()
        del fig, ax
Esempio n. 5
0
def ee_image_download(ini_path=None, overwrite_flag=False):
    """Earth Engine Annual Mean Image Download

    Parameters
    ----------
    ini_path : str
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    """
    logging.info('\nEarth Engine EDDI Image Download')

    # 12 month EDDI
    aggregation_days = 365
    export_name = 'eddi_12month'
    output_name = 'eddi.12month'

    eddi_date_list = [
        '0131', '0228', '0331', '0430', '0531', '0630', '0731', '0831', '0930',
        '1031', '1130', '1231'
    ]
    # eddi_date_list = ['0930', '1231']
    # eddi_date_list = ['{:02d}01'.format(m) for m in range(1, 13)]
    # eddi_date_list = []

    eddi_folder = 'eddi'

    # Do we need to support separate EDDI years?
    # start_year = 1984
    # end_year = 2016

    #
    climo_year_start = 1979
    climo_year_end = 2017

    # Read config file
    # ini = inputs.ini_parse(ini_path, section='IMAGE')
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='SPATIAL')
    inputs.parse_section(ini, section='EXPORT')
    inputs.parse_section(ini, section='IMAGES')

    nodata_value = -9999

    # Manually set output spatial reference
    logging.info('\nHardcoding GRIDMET snap, cellsize and spatial reference')
    ini['output_x'], ini['output_y'] = -124.79299639209513, 49.41685579737572
    ini['SPATIAL']['cellsize'] = 0.041666001963701
    # ini['SPATIAL']['cellsize'] = [0.041666001963701, 0.041666001489718]
    # ini['output_x'] = -124.79166666666666666667
    # ini['output_y'] = 25.04166666666666666667
    # ini['SPATIAL']['cellsize'] = 1. / 24
    ini['SPATIAL']['osr'] = gdc.epsg_osr(4326)
    # ini['SPATIAL']['osr'] = gdc.epsg_osr(4269)
    ini['SPATIAL']['crs'] = 'EPSG:4326'
    logging.debug('  Snap: {} {}'.format(ini['output_x'], ini['output_y']))
    logging.debug('  Cellsize: {}'.format(ini['SPATIAL']['cellsize']))
    logging.debug('  OSR: {}'.format(ini['SPATIAL']['osr']))

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # Merge geometries
    if ini['INPUTS']['merge_geom_flag']:
        merge_geom = ogr.Geometry(ogr.wkbMultiPolygon)
        for zone in zone_geom_list:
            zone_multipolygon = ogr.ForceToMultiPolygon(
                ogr.CreateGeometryFromJson(json.dumps(zone[2])))
            for zone_polygon in zone_multipolygon:
                merge_geom.AddGeometry(zone_polygon)
        # merge_json = json.loads(merge_mp.ExportToJson())
        zone_geom_list = [[
            0, ini['INPUTS']['zone_filename'],
            json.loads(merge_geom.ExportToJson())
        ]]
        ini['INPUTS']['zone_field'] = ''

    # Need zone_shp_path projection to build EE geometries
    zone_osr = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path'])
    zone_proj = gdc.osr_wkt(zone_osr)
    # zone_proj = ee.Projection(zone_proj).wkt().getInfo()
    # zone_proj = zone_proj.replace('\n', '').replace(' ', '')
    logging.debug('  Zone Projection: {}'.format(zone_proj))

    # Initialize Earth Engine API key
    logging.info('\nInitializing Earth Engine')
    ee.Initialize()
    utils.ee_request(ee.Number(1).getInfo())

    # Get current running tasks
    tasks = utils.get_ee_tasks()

    # Download images for each feature separately
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        # Build EE geometry object for zonal stats
        zone_geom = ee.Geometry(zone_json, zone_proj, False)

        # Project the zone_geom to the GRIDMET projection
        # if zone_proj != output_proj:
        zone_geom = zone_geom.transform(ini['SPATIAL']['crs'], 0.001)

        # Get the extent from the Earth Engine geometry object?
        zone_extent = zone_geom.bounds().getInfo()['coordinates'][0]
        zone_extent = gdc.Extent([
            min(zip(*zone_extent)[0]),
            min(zip(*zone_extent)[1]),
            max(zip(*zone_extent)[0]),
            max(zip(*zone_extent)[1])
        ])
        # # Use GDAL and geometry json to build extent, transform, and shape
        # zone_extent = gdc.Extent(
        #     ogr.CreateGeometryFromJson(json.dumps(zone_json)).GetEnvelope())
        # # zone_extent = gdc.Extent(zone_geom.GetEnvelope())
        # zone_extent.ymin, zone_extent.xmax = zone_extent.xmax, zone_extent.ymin

        # Adjust extent to match raster
        zone_extent = zone_extent.adjust_to_snap('EXPAND', ini['output_x'],
                                                 ini['output_y'],
                                                 ini['SPATIAL']['cellsize'])
        zone_geo = zone_extent.geo(ini['SPATIAL']['cellsize'])
        zone_transform = gdc.geo_2_ee_transform(zone_geo)
        zone_transform = '[' + ','.join(map(str, zone_transform)) + ']'
        zone_shape = zone_extent.shape(ini['SPATIAL']['cellsize'])
        logging.debug('  Zone Shape: {}'.format(zone_shape))
        logging.debug('  Zone Transform: {}'.format(zone_transform))
        logging.debug('  Zone Extent: {}'.format(zone_extent))
        # logging.debug('  Geom: {}'.format(zone_geom.getInfo()))

        # output_transform = zone_transform[:]
        output_transform = '[' + ','.join(map(str, zone_transform)) + ']'
        output_shape = '[{1}x{0}]'.format(*zone_shape)
        logging.debug('  Output Projection: {}'.format(ini['SPATIAL']['crs']))
        logging.debug('  Output Transform: {}'.format(output_transform))
        logging.debug('  Output Shape: {}'.format(output_shape))

        zone_eddi_ws = os.path.join(ini['IMAGES']['output_ws'], zone_name,
                                    eddi_folder)
        if not os.path.isdir(zone_eddi_ws):
            os.makedirs(zone_eddi_ws)

        # GRIDMET PDSI
        # Process each image in the collection by date
        export_list = []

        export_list = list(
            date_range(datetime.datetime(ini['INPUTS']['start_year'], 1, 1),
                       datetime.datetime(ini['INPUTS']['end_year'], 12, 31),
                       skip_leap_days=True))

        # Filter list to only keep last dekad of October and December
        if eddi_date_list:
            export_list = [
                tgt_dt for tgt_dt in export_list
                if tgt_dt.strftime('%m%d') in eddi_date_list
            ]

        for tgt_dt in export_list:
            date_str = tgt_dt.strftime('%Y%m%d')
            logging.info('{} {}'.format(tgt_dt.strftime('%Y-%m-%d'),
                                        output_name))

            if tgt_dt >= datetime.datetime.today():
                logging.info('  Date after current date, skipping')
                continue

            # Rename to match naming style from getDownloadURL
            #     image_name.band.tif
            export_id = '{}_{}_{}'.format(ini['INPUTS']['zone_filename'],
                                          date_str, export_name.lower())
            output_id = '{}_{}'.format(date_str, output_name)

            export_path = os.path.join(ini['EXPORT']['export_ws'],
                                       export_id + '.tif')
            output_path = os.path.join(zone_eddi_ws, output_id + '.tif')
            logging.debug('  Export: {}'.format(export_path))
            logging.debug('  Output: {}'.format(output_path))

            if overwrite_flag:
                if export_id in tasks.keys():
                    logging.debug('  Task already submitted, cancelling')
                    ee.data.cancelTask(tasks[export_id])
                    del tasks[export_id]
                if os.path.isfile(export_path):
                    logging.debug('  Export image already exists, removing')
                    utils.remove_file(export_path)
                    # os.remove(export_path)
                if os.path.isfile(output_path):
                    logging.debug('  Output image already exists, removing')
                    utils.remove_file(output_path)
                    # os.remove(output_path)
            else:
                if os.path.isfile(export_path):
                    logging.debug('  Export image already exists, moving')
                    shutil.move(export_path, output_path)
                    gdc.raster_path_set_nodata(output_path, nodata_value)
                    # DEADBEEF - should raster stats be computed?
                    # gdc.raster_statistics(output_path)
                    continue
                elif os.path.isfile(output_path):
                    logging.debug('  Output image already exists, skipping')
                    continue
                elif export_id in tasks.keys():
                    logging.debug('  Task already submitted, skipping')
                    continue

            eddi_image = ee_eddi_image(tgt_dt.strftime('%Y-%m-%d'),
                                       agg_days=aggregation_days,
                                       variable='eddi',
                                       year_start=climo_year_start,
                                       year_end=climo_year_end)

            logging.debug('  Building export task')
            # if ini['EXPORT']['export_dest'] == 'gdrive':
            task = ee.batch.Export.image.toDrive(
                image=eddi_image,
                description=export_id,
                # folder=ini['EXPORT']['export_folder'],
                fileNamePrefix=export_id,
                dimensions=output_shape,
                crs=ini['SPATIAL']['crs'],
                crsTransform=output_transform)
            # elif ini['EXPORT']['export_dest'] == 'gdrive':
            #     task = ee.batch.Export.image.toCloudStorage(
            #         image=eddi_image,
            #         description=export_id,
            #         bucket=ini['EXPORT']['export_folder'],
            #         fileNamePrefix=export_id,
            #         dimensions=output_shape,
            #         crs=ini['SPATIAL']['crs'],
            #         crsTransform=output_transform)

            logging.debug('  Starting export task')
            utils.ee_request(task.start())
Esempio n. 6
0
def ee_image_download(ini_path=None, overwrite_flag=False):
    """Earth Engine Annual Mean Image Download

    Parameters
    ----------
    ini_path : str
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    """
    logging.info('\nEarth Engine Landsat Image Download')
    images_folder = 'landsat'

    if overwrite_flag:
        logging.warning(
            '\nAre you sure you want to overwrite existing images?')
        input('Press ENTER to continue')

    # Regular expression to pull out Landsat scene_id
    # landsat_re = re.compile(
    #     'L[ETC][4578]\d{6}(?P<YEAR>\d{4})(?P<DOY>\d{3})\D{3}\d{2}')

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='SPATIAL')
    inputs.parse_section(ini, section='EXPORT')
    inputs.parse_section(ini, section='IMAGES')

    nodata_value = -9999

    # Float32/Float64
    float_output_type = 'Float32'
    float_nodata_value = np.finfo(np.float32).min
    # Byte/Int16/UInt16/UInt32/Int32
    int_output_type = 'Byte'
    int_nodata_value = 255
    int_bands = ['cloud_score', 'fmask']

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Check if the zone_names are unique
    # Eventually support merging common zone_names
    if len(set([z[1] for z in zone_geom_list])) != len(zone_geom_list):
        logging.error(
            '\nERROR: There appear to be duplicate zone ID/name values.'
            '\n  Currently, the values in "{}" must be unique.'
            '\n  Exiting.'.format(ini['INPUTS']['zone_field']))
        return False

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']]

    # Merge geometries
    if ini['INPUTS']['merge_geom_flag']:
        merge_geom = ogr.Geometry(ogr.wkbMultiPolygon)
        for zone in zone_geom_list:
            zone_multipolygon = ogr.ForceToMultiPolygon(
                ogr.CreateGeometryFromJson(json.dumps(zone[2])))
            for zone_polygon in zone_multipolygon:
                merge_geom.AddGeometry(zone_polygon)
        # merge_json = json.loads(merge_mp.ExportToJson())
        zone_geom_list = [[
            0, ini['INPUTS']['zone_filename'],
            json.loads(merge_geom.ExportToJson())]]
        ini['INPUTS']['zone_field'] = ''

    # Need zone_shp_path projection to build EE geometries
    zone_osr = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path'])
    zone_proj = gdc.osr_wkt(zone_osr)
    # zone_proj = ee.Projection(zone_proj).wkt().getInfo()
    # zone_proj = zone_proj.replace('\n', '').replace(' ', '')
    # logging.debug('  Zone Projection: {}'.format(zone_proj))

    # Check that shapefile has matching spatial reference
    if not gdc.matching_spatref(zone_osr, ini['SPATIAL']['osr']):
        logging.warning('  Zone OSR:\n{}\n'.format(zone_osr))
        logging.warning('  Output OSR:\n{}\n'.format(
            ini['SPATIAL']['osr']))
        logging.warning('  Zone Proj4:   {}'.format(zone_osr.ExportToProj4()))
        logging.warning('  Output Proj4: {}'.format(
            ini['SPATIAL']['osr'].ExportToProj4()))
        logging.warning(
            '\nWARNING: \n'
            'The output and zone spatial references do not appear to match\n'
            'This will likely cause problems!')
        input('Press ENTER to continue')
    else:
        logging.debug('  Zone Projection:\n{}\n'.format(
            zone_osr.ExportToWkt()))
        logging.debug('  Output Projection:\n{}\n'.format(
            ini['SPATIAL']['osr'].ExportToWkt()))
        logging.debug('  Output Cellsize: {}'.format(
            ini['SPATIAL']['cellsize']))

    # Keyword arguments for ee_common.get_landsat_collection() and
    #   ee_common.get_landsat_image()
    # Zone geom will be updated inside the loop
    landsat_args = {
        k: v for section in ['INPUTS']
        for k, v in ini[section].items()
        if k in [
            'landsat4_flag', 'landsat5_flag',
            'landsat7_flag', 'landsat8_flag',
            'fmask_flag', 'acca_flag',
            'start_year', 'end_year',
            'start_month', 'end_month',
            'start_doy', 'end_doy',
            'scene_id_keep_list', 'scene_id_skip_list',
            'path_keep_list', 'row_keep_list',
            'refl_sur_method', 'adjust_method', 'mosaic_method']}
    # landsat_args['start_date'] = start_date
    # landsat_args['end_date'] = end_date

    # For composite images, compute all components bands
    landsat_args['products'] = ini['IMAGES']['download_bands'][:]
    if 'refl_toa' in landsat_args['products']:
        landsat_args['products'].extend([
            'blue_toa', 'green_toa', 'red_toa',
            'nir_toa', 'swir1_toa', 'swir2_toa'])
        landsat_args['products'].remove('refl_toa')
    if 'refl_sur' in landsat_args['products']:
        landsat_args['products'].extend([
            'blue_sur', 'green_sur', 'red_sur',
            'nir_sur', 'swir1_sur', 'swir2_sur'])
        landsat_args['products'].remove('refl_sur')
    if 'tasseled_cap' in landsat_args['products']:
        landsat_args['products'].extend([
            'tc_green', 'tc_bright', 'tc_wet'])
        landsat_args['products'].remove('tasseled_cap')

    # Initialize Earth Engine API key
    logging.info('\nInitializing Earth Engine')
    ee.Initialize()
    utils.ee_request(ee.Number(1).getInfo())

    # Get current running tasks
    tasks = utils.get_ee_tasks()


    # Download images for each feature separately
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        # Build EE geometry object for zonal stats
        zone_geom = ee.Geometry(
            geo_json=zone_json, opt_proj=zone_proj, opt_geodesic=False)
        landsat_args['zone_geom'] = zone_geom
        # logging.debug('  Centroid: {}'.format(
        #     zone_geom.centroid(100).getInfo()['coordinates']))

        # Use feature geometry to build extent, transform, and shape
        zone_extent = gdc.Extent(
            ogr.CreateGeometryFromJson(json.dumps(zone_json)).GetEnvelope())
        # zone_extent = gdc.Extent(zone_geom.GetEnvelope())
        zone_extent.ymin, zone_extent.xmax = zone_extent.xmax, zone_extent.ymin
        zone_extent = zone_extent.buffer(ini['IMAGES']['image_buffer'])
        zone_extent = zone_extent.adjust_to_snap(
            'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'],
            ini['SPATIAL']['cellsize'])
        zone_geo = zone_extent.geo(ini['SPATIAL']['cellsize'])
        zone_transform = gdc.geo_2_ee_transform(zone_geo)
        zone_transform = '[' + ','.join(map(str, zone_transform)) + ']'
        zone_shape = zone_extent.shape(ini['SPATIAL']['cellsize'])
        logging.debug('  Zone Shape: {}'.format(zone_shape))
        logging.debug('  Zone Transform: {}'.format(zone_transform))
        logging.debug('  Zone Extent: {}'.format(zone_extent))
        # logging.debug('  Zone Geom: {}'.format(zone_geom.getInfo()))

        # output_transform = zone_transform[:]
        output_transform = '[' + ','.join(map(str, zone_transform)) + ']'
        output_shape = '{1}x{0}'.format(*zone_shape)
        # logging.debug('  Image Transform: {}'.format(output_transform))
        # logging.debug('  Image Shape: {}'.format(output_shape))

        zone_images_ws = os.path.join(
            ini['IMAGES']['output_ws'], zone_name, images_folder)
        if not os.path.isdir(zone_images_ws):
            os.makedirs(zone_images_ws)

        # Move to EE common?
        def get_collection_ids(image):
            return ee.Feature(None, {'id': image.get('SCENE_ID')})

        # Get list of available Landsat images
        landsat_obj = ee_common.Landsat(landsat_args)
        scene_id_list = [
            f['properties']['id']
            for f in landsat_obj.get_collection().map(
                get_collection_ids).getInfo()['features']]

        # Get list of unique image "dates"
        # Keep scene_id components as string for set operation
        # If not mosaicing images, include path/row in set
        #   otherwise set to None
        if not ini['INPUTS']['mosaic_method']:
            scene_id_list = set([
                (image_id[12:20], image_id[0:4], image_id[5:8], image_id[8:11])
                for image_id in scene_id_list])
        else:
            scene_id_list = set([
                (image_id[12:20], image_id[0:4], None, None)
                for image_id in scene_id_list])
        logging.debug('  Scene Count: {}\n'.format(len(scene_id_list)))

        # Process each image in the collection by date
        # Leave scene_id components as strings
        for date, landsat, path, row in sorted(scene_id_list):
            scene_dt = datetime.datetime.strptime(date, '%Y%m%d')
            year = scene_dt.strftime('%Y')
            doy = scene_dt.strftime('%j')

            # If not mosaicing images, include path/row in name
            if not ini['INPUTS']['mosaic_method']:
                landsat_str = '{}{}{}'.format(landsat, path, row)
            else:
                landsat_str = '{}'.format(landsat)
            logging.info('{} {} (DOY {})'.format(
                landsat.upper(), scene_dt.strftime('%Y-%m-%d'), doy))

            zone_year_ws = os.path.join(zone_images_ws, year)
            if not os.path.isdir(zone_year_ws):
                os.makedirs(zone_year_ws)

            # Get the prepped Landsat image by ID
            landsat_image = ee.Image(landsat_obj.get_image(
                landsat, year, doy, path, row))

            # Clip using the feature geometry
            if ini['IMAGES']['clip_landsat_flag']:
                landsat_image = landsat_image.clip(zone_geom)
            else:
                landsat_image = landsat_image.clip(ee.Geometry.Rectangle(
                    list(zone_extent), ini['SPATIAL']['crs'], False))

            # DEADBEEF - Display a single image
            # ee_common.show_thumbnail(landsat_image.visualize(
            #     bands=['fmask', 'fmask', 'fmask'], min=0, max=4))
            # ee_common.show_thumbnail(landsat_image.visualize(
            #     bands=['toa_red', 'toa_green', 'toa_blue'],
            #     min=0.05, max=0.35, gamma=1.4))
            # return True

            # Set the masked values to a nodata value
            # so that the TIF can have a nodata value other than 0 set
            landsat_image = landsat_image.unmask(nodata_value, False)

            for band in ini['IMAGES']['download_bands']:
                logging.debug('  Band: {}'.format(band))

                # Rename to match naming style from getDownloadURL
                #     image_name.band.tif
                export_id = '{}_{}_{}_{}_{}'.format(
                    ini['INPUTS']['zone_filename'], date, doy,
                    landsat_str.lower(), band.lower())
                output_id = '{}_{}_{}.{}'.format(
                    date, doy, landsat_str.lower(), band)

                export_path = os.path.join(
                    ini['EXPORT']['export_ws'], export_id + '.tif')
                output_path = os.path.join(
                    zone_year_ws, output_id + '.tif')
                logging.debug('  Export: {}'.format(export_path))
                logging.debug('  Output: {}'.format(output_path))

                if overwrite_flag:
                    if export_id in tasks.keys():
                        logging.debug('  Task already submitted, cancelling')
                        ee.data.cancelTask(tasks[export_id])
                        del tasks[export_id]
                    if os.path.isfile(export_path):
                        logging.debug(
                            '  Export image already exists, removing')
                        utils.remove_file(export_path)
                        # os.remove(export_path)
                    if os.path.isfile(output_path):
                        logging.debug(
                            '  Output image already exists, removing')
                        utils.remove_file(output_path)
                        # os.remove(output_path)
                else:
                    if os.path.isfile(export_path):
                        logging.debug('  Export image already exists, moving')
                        if band in int_bands:
                            subprocess.check_output([
                                'gdalwarp',
                                '-ot', int_output_type, '-overwrite',
                                '-of', 'GTiff', '-co', 'COMPRESS=LZW',
                                '-srcnodata', str(nodata_value),
                                '-dstnodata', str(int_nodata_value),
                                export_path, output_path])
                        else:
                            subprocess.check_output([
                                'gdalwarp',
                                '-ot', float_output_type, '-overwrite',
                                '-of', 'GTiff', '-co', 'COMPRESS=LZW',
                                '-srcnodata', str(nodata_value),
                                '-dstnodata', '{:f}'.format(float_nodata_value),
                                export_path, output_path])
                        with open(os.devnull, 'w') as devnull:
                            subprocess.check_call(
                                ['gdalinfo', '-stats', output_path],
                                stdout=devnull)
                        subprocess.check_output(
                            ['gdalmanage', 'delete', export_path])
                        continue
                    elif os.path.isfile(output_path):
                        logging.debug(
                            '  Output image already exists, skipping')
                        continue
                    elif export_id in tasks.keys():
                        logging.debug(
                            '  Task already submitted, skipping')
                        continue

                # Should composites include Ts?
                if band == 'refl_toa':
                    band_list = [
                        'blue_toa', 'green_toa', 'red_toa',
                        'nir_toa', 'swir1_toa', 'swir2_toa']
                elif band == 'refl_sur':
                    band_list = [
                        'blue_sur', 'green_sur', 'red_sur',
                        'nir_sur', 'swir1_sur', 'swir2_sur']
                elif band == 'tasseled_cap':
                    band_list = ['tc_bright', 'tc_green', 'tc_wet']
                else:
                    band_list = [band]
                band_image = landsat_image.select(band_list)

                # CGM 2016-09-26 - Don't apply any cloud masks to images
                # # Apply cloud mask before exporting
                # if fmask_flag and band not in ['refl_sur', 'cloud', 'fmask']:
                #     fmask = ee.Image(landsat_image.select(['fmask']))
                #     cloud_mask = fmask.eq(2).Or(fmask.eq(3)).Or(fmask.eq(4)).Not()
                #     band_image = band_image.updateMask(cloud_mask)

                logging.debug('  Building export task')
                # if ini['EXPORT']['export_dest'] == 'gdrive':
                task = ee.batch.Export.image.toDrive(
                    band_image,
                    description=export_id,
                    # folder=ini['EXPORT']['export_folder'],
                    fileNamePrefix=export_id,
                    dimensions=output_shape,
                    crs=ini['SPATIAL']['crs'],
                    crsTransform=output_transform)
                # elif ini['EXPORT']['export_dest'] == 'cloud':
                #     task = ee.batch.Export.image.toCloudStorage(
                #         band_image,
                #         description=export_id,
                #         bucket=ini['EXPORT']['export_folder'],
                #         fileNamePrefix=export_id,
                #         dimensions=output_shape,
                #         crs=ini['SPATIAL']['crs'],
                #         crsTransform=output_transform)

                logging.debug('  Starting export task')
                utils.ee_request(task.start())
def ee_beamer_et(ini_path=None, overwrite_flag=False):
    """Earth Engine Beamer ET Image Download

    Args:
        ini_path (str):
        overwrite_flag (bool): if True, overwrite existing files

    Returns:
        None
    """
    logging.info('\nEarth Engine Beamer Annual Mean ETg Image Download')

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='SPATIAL')
    inputs.parse_section(ini, section='IMAGES')
    inputs.parse_section(ini, section='BEAMER')

    ini['IMAGES']['download_bands'] = [
        'etg_mean', 'etg_lci', 'etg_uci', 'etg_lpi', 'etg_upi'
    ]
    stat_list = ['mean', 'median']
    nodata_value = -9999
    zips_folder = 'zips'
    annuals_folder = 'annuals'

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)
    # zone_count = len(zone_geom_list)
    # output_fmt = '_{0:0%sd}.csv' % str(int(math.log10(zone_count)) + 1)

    # Check if the zone_names are unique
    # Eventually support merging common zone_names
    if len(set([z[1] for z in zone_geom_list])) != len(zone_geom_list):
        logging.error(
            '\nERROR: There appear to be duplicate zone ID/name values.'
            '\n  Currently, the values in "{}" must be unique.'
            '\n  Exiting.'.format(ini['INPUTS']['zone_field']))
        return False

    # Filter features by FID
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # Merge geometries
    if ini['INPUTS']['merge_geom_flag']:
        merge_geom = ogr.Geometry(ogr.wkbMultiPolygon)
        for zone in zone_geom_list:
            zone_multipolygon = ogr.ForceToMultiPolygon(
                ogr.CreateGeometryFromJson(json.dumps(zone[2])))
            for zone_polygon in zone_multipolygon:
                merge_geom.AddGeometry(zone_polygon)
        # merge_json = json.loads(merge_mp.ExportToJson())
        zone_geom_list = [[
            0, ini['INPUTS']['zone_filename'],
            json.loads(merge_geom.ExportToJson())
        ]]
        ini['INPUTS']['zone_field'] = ''

    # Set all zone specific parameters into a dictionary
    zone = {}

    # Need zone_shp_path projection to build EE geometries
    zone['osr'] = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path'])
    zone['proj'] = gdc.osr_wkt(zone['osr'])
    # zone['proj'] = ee.Projection(zone['proj']).wkt().getInfo()
    # zone['proj'] = zone['proj'].replace('\n', '').replace(' ', '')
    # logging.debug('  Zone Projection: {}'.format(zone['proj']))

    # Check that shapefile has matching spatial reference
    if not gdc.matching_spatref(zone['osr'], ini['SPATIAL']['osr']):
        logging.warning('  Zone OSR:\n{}\n'.format(zone['osr']))
        logging.warning('  Output OSR:\n{}\n'.format(
            ini['SPATIAL']['osr'].ExportToWkt()))
        logging.warning('  Zone Proj4:   {}'.format(
            zone['osr'].ExportToProj4()))
        logging.warning('  Output Proj4: {}'.format(
            ini['SPATIAL']['osr'].ExportToProj4()))
        logging.warning(
            '\nWARNING: \n'
            'The output and zone spatial references do not appear to match\n'
            'This will likely cause problems!')
        input('Press ENTER to continue')
    else:
        logging.debug('  Zone Projection:\n{}\n'.format(
            zone['osr'].ExportToWkt()))
        logging.debug('  Output Projection:\n{}\n'.format(
            ini['SPATIAL']['osr'].ExportToWkt()))
        logging.debug('  Output Cellsize: {}'.format(
            ini['SPATIAL']['cellsize']))

    # Initialize Earth Engine API key
    logging.info('\nInitializing Earth Engine')
    ee.Initialize()
    utils.ee_request(ee.Number(1).getInfo())

    # Get list of path/row strings to centroid coordinates
    if ini['INPUTS']['tile_keep_list']:
        ini['INPUTS']['tile_geom'] = [
            wrs2.tile_centroids[tile]
            for tile in ini['INPUTS']['tile_keep_list']
            if tile in wrs2.tile_centroids.keys()
        ]
        ini['INPUTS']['tile_geom'] = ee.Geometry.MultiPoint(
            ini['INPUTS']['tile_geom'], 'EPSG:4326')
    else:
        ini['INPUTS']['tile_geom'] = None

    # Read in ETo and PPT data from file
    if (ini['BEAMER']['eto_source'] == 'file'
            or ini['BEAMER']['ppt_source'] == 'file'):
        data_array = np.genfromtxt(ini['BEAMER']['data_path'],
                                   delimiter=',',
                                   names=True,
                                   dtype=None)
        data_fields = data_array.dtype.names
        logging.debug('  CSV fields: {}'.format(', '.join(data_fields)))
        # DEADBEEF - Compare fields names assuming all upper case
        data_fields = [f.upper() for f in data_fields]
        eto_dict = defaultdict(dict)
        ppt_dict = defaultdict(dict)
        for row in data_array:
            z = str(row[data_fields.index(ini['BEAMER']['data_zone_field'])])
            y = int(row[data_fields.index(ini['BEAMER']['data_year_field'])])
            if ini['BEAMER']['eto_source'] == 'file':
                # DEADBEEF - Compare fields names assuming all upper case
                eto_dict[z][y] = row[data_fields.index(
                    ini['BEAMER']['data_eto_field'].upper())]
            if ini['BEAMER']['ppt_source'] == 'file':
                # DEADBEEF - Compare fields names assuming all upper case
                ppt_dict[z][y] = row[data_fields.index(
                    ini['BEAMER']['data_ppt_field'].upper())]

    # Get filtered/merged/prepped Landsat collection
    landsat_args = {
        k: v
        for section in ['INPUTS'] for k, v in ini[section].items() if k in [
            'landsat4_flag', 'landsat5_flag', 'landsat7_flag', 'landsat8_flag',
            'fmask_flag', 'acca_flag', 'start_year', 'end_year', 'start_month',
            'end_month', 'start_doy', 'end_doy', 'scene_id_keep_list',
            'scene_id_skip_list', 'path_keep_list', 'row_keep_list',
            'tile_geom', 'adjust_method', 'mosaic_method', 'refl_sur_method'
        ]
    }
    landsat_args['products'] = ['evi_sur']
    landsat = ee_common.Landsat(landsat_args)

    # Download images for each feature separately
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone['fid'] = zone_fid
        zone['name'] = zone_name.replace(' ', '_')
        zone['json'] = zone_json
        logging.info('ZONE: {} (FID: {})'.format(zone['name'], zone['fid']))

        # Build EE geometry object for zonal stats
        zone['geom'] = ee.Geometry(geo_json=zone['json'],
                                   opt_proj=zone['proj'],
                                   opt_geodesic=False)
        # logging.debug('  Centroid: {}'.format(
        #     zone['geom'].centroid(100).getInfo()['coordinates']))

        # Use feature geometry to build extent, transform, and shape
        zone['extent'] = gdc.Extent(
            ogr.CreateGeometryFromJson(json.dumps(zone['json'])).GetEnvelope())
        # zone['extent'] = gdc.Extent(zone['geom'].GetEnvelope())
        zone['extent'] = zone['extent'].ogrenv_swap()
        zone['extent'] = zone['extent'].adjust_to_snap(
            'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'],
            ini['SPATIAL']['cellsize'])
        zone['geo'] = zone['extent'].geo(ini['SPATIAL']['cellsize'])
        zone['transform'] = gdc.geo_2_ee_transform(zone['geo'])
        # zone['transform'] = '[' + ','.join(map(str, zone['transform'])) + ']'
        zone['shape'] = zone['extent'].shape(ini['SPATIAL']['cellsize'])
        logging.debug('  Zone Shape: {}'.format(zone['shape']))
        logging.debug('  Zone Transform: {}'.format(zone['transform']))
        logging.debug('  Zone Extent: {}'.format(zone['extent']))
        # logging.debug('  Zone Geom: {}'.format(zone['geom'].getInfo()))

        # Assume all pixels in all 14+2 images could be reduced
        zone['max_pixels'] = zone['shape'][0] * zone['shape'][1]
        logging.debug('  Max Pixels: {}'.format(zone['max_pixels']))

        # Set output spatial reference
        # Eventually allow user to manually set these
        # output_crs = zone['proj']
        logging.debug('  Image Projection: {}'.format(ini['SPATIAL']['crs']))

        # output_transform = zone['transform'][:]
        output_transform = '[' + ','.join(map(str, zone['transform'])) + ']'
        output_shape = '{1}x{0}'.format(*zone['shape'])
        logging.debug('  Image Transform: {}'.format(output_transform))
        logging.debug('  Image Shape: {}'.format(output_shape))

        zone_output_ws = os.path.join(ini['IMAGES']['output_ws'], zone_name)
        zone_zips_ws = os.path.join(zone_output_ws, zips_folder)
        zone_annuals_ws = os.path.join(zone_output_ws, annuals_folder)
        if not os.path.isdir(zone_zips_ws):
            os.makedirs(zone_zips_ws)
        if not os.path.isdir(zone_annuals_ws):
            os.makedirs(zone_annuals_ws)

        # Process date range by year
        interval_cnt = 1
        start_dt = datetime.datetime(ini['INPUTS']['start_year'], 1, 1)
        end_dt = datetime.datetime(ini['INPUTS']['end_year'] + 1, 1,
                                   1) - datetime.timedelta(0, 1)
        for i, iter_start_dt in enumerate(
                rrule.rrule(rrule.YEARLY,
                            interval=interval_cnt,
                            dtstart=start_dt,
                            until=end_dt)):
            iter_end_dt = (iter_start_dt +
                           relativedelta.relativedelta(years=interval_cnt) -
                           datetime.timedelta(0, 1))
            if ((ini['INPUTS']['start_month']
                 and iter_end_dt.month < ini['INPUTS']['start_month'])
                    or (ini['INPUTS']['end_month']
                        and iter_start_dt.month > ini['INPUTS']['end_month'])):
                logging.debug('  {}  {}  skipping'.format(
                    iter_start_dt.date(), iter_end_dt.date()))
                continue
            elif (
                (ini['INPUTS']['start_doy'] and
                 int(iter_end_dt.strftime('%j')) < ini['INPUTS']['start_doy'])
                    or
                (ini['INPUTS']['end_doy'] and int(iter_start_dt.strftime('%j'))
                 > ini['INPUTS']['end_doy'])):
                logging.debug('  {}  {}  skipping'.format(
                    iter_start_dt.date(), iter_end_dt.date()))
                continue
            else:
                logging.info('{}  {}'.format(iter_start_dt.date(),
                                             iter_end_dt.date()))
            year = iter_start_dt.year

            # image_id = 'etg_{}_{}'.format(
            image_id = '{}_{}'.format(zone_name.lower().replace(' ', '_'),
                                      year)
            zip_path = os.path.join(zone_zips_ws, image_id + '.zip')
            # median_path = os.path.join(
            #     zone_output_ws, image_id + '.img')
            logging.debug('  Zip: {}'.format(zip_path))

            if os.path.isfile(zip_path) and overwrite_flag:
                logging.debug('    Output already exists, removing zip')
                os.remove(zip_path)
            elif os.path.isfile(zip_path) and not overwrite_flag:
                # Check that existing ZIP files can be opened
                try:
                    with zipfile.ZipFile(zip_path, 'r') as z:
                        pass
                except Exception as e:
                    logging.warning('    Zip file error, removing'.format(i))
                    os.remove(zip_path)

            # Filter the GRIDMET collection
            wy_start_date = '{}-10-01'.format(year - 1)
            wy_end_date = '{}-10-01'.format(year)
            logging.debug('  WY: {} {}'.format(wy_start_date, wy_end_date))
            gridmet_coll = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET') \
                .filterDate(wy_start_date, wy_end_date)

            # # PRISM collection was uploaded as an asset
            # if ini['BEAMER']['ppt_source'] == 'prism':
            #     def prism_time_start(input_image):
            #         """Set time_start property on PRISM water year PPT collection"""
            #         # Assume year is the 4th item separated by "_"
            #         water_year = ee.String(input_image.get('system:index')).split('_').get(3)
            #         date_start = ee.Date(ee.String(water_year).cat('-10-01'))
            #         return input_image.select([0], ['ppt']).set({
            #             'system:time_start': date_start.millis()
            #         })
            #     prism_coll = ee.ImageCollection('users/cgmorton/prism_800m_ppt_wy')
            #     prism_coll = prism_coll.map(prism_time_start) \
            #         .filterDate(wy_start_date, wy_end_date)

            # Get water year PPT from file
            # Convert all input data to mm to match GRIDMET data
            if ini['BEAMER']['ppt_source'] == 'file':
                wy_ppt_input = ppt_dict[zone_name][year]
                if ini['BEAMER']['data_ppt_units'] == 'mm':
                    pass
                elif ini['BEAMER']['data_ppt_units'] == 'in':
                    wy_ppt_input *= 25.4
                elif ini['BEAMER']['data_ppt_units'] == 'ft':
                    wy_ppt_input *= (25.4 * 12)
            elif ini['BEAMER']['ppt_source'] == 'gridmet':
                # GET GRIDMET value at centroid of geometry
                wy_ppt_input = float(
                    utils.ee_getinfo(
                        ee.ImageCollection(
                            gridmet_coll.select(['pr'],
                                                ['ppt']).sum()).getRegion(
                                                    zone['geom'].centroid(1),
                                                    500))[1][4])
                # Calculate GRIDMET zonal mean of geometry
                # wy_ppt_input = float(ee.ImageCollection(
                #     gridmet_coll.select(['pr'], ['ppt'])).reduceRegion(
                #         reducer=ee.Reducer.sum(),
                #         geometry=zone['geom'],
                #         crs=ini['SPATIAL']['crs'],
                #         crsTransform=zone['transform'],
                #         bestEffort=False,
                #         tileScale=1).getInfo()['ppt']
            # elif ini['BEAMER']['ppt_source'] == 'prism':
            #     # Calculate PRISM zonal mean of geometry
            #     wy_ppt_input = float(ee.ImageCollection(
            #         prism_coll.map(ee_common.prism_ppt_func)).sum().reduceRegion(
            #             reducer=ee.Reducer.mean(),
            #             geometry=zone['geom'],
            #             crs=ini['SPATIAL']['crs'],
            #             crsTransform=zone['transform'],
            #             bestEffort=False,
            #             tileScale=1).getInfo()['ppt'])

            # Get water year ETo read from file
            # Convert all input data to mm for Beamer Method
            if ini['BEAMER']['eto_source'] == 'file':
                wy_eto_input = eto_dict[zone_name][year]
                if ini['BEAMER']['data_eto_units'] == 'mm':
                    pass
                elif ini['BEAMER']['data_eto_units'] == 'in':
                    wy_eto_input *= 25.4
                elif ini['BEAMER']['data_eto_units'] == 'ft':
                    wy_eto_input *= (25.4 * 12)
            # This assumes GRIMET data is in millimeters
            elif ini['BEAMER']['eto_source'] == 'gridmet':
                wy_eto_input = float(
                    utils.ee_getinfo(
                        ee.ImageCollection(gridmet_coll.select(
                            ['eto']).sum()).getRegion(zone['geom'].centroid(1),
                                                      500))[1][4])
                # wy_eto_input = float(ee.ImageCollection(
                #     gridmet_coll.select(['eto'])).reduceRegion(
                #         reducer=ee.Reducer.sum(),
                #         geometry=zone['geom'],
                #         crs=ini['SPATIAL']['crs'],
                #         crsTransform=zone['transform'],
                #         bestEffort=False,
                #         tileScale=1).getInfo()
            logging.debug('  Input ETO: {} mm  PPT: {} mm'.format(
                wy_eto_input, wy_ppt_input))

            # Scale ETo & PPT
            wy_eto_input *= ini['BEAMER']['eto_factor']
            wy_ppt_input *= ini['BEAMER']['ppt_factor']

            # Convert output units from mm
            wy_ppt_output = wy_ppt_input
            wy_eto_output = wy_eto_input
            if ini['IMAGES']['ppt_units'] == 'mm':
                pass
            elif ini['IMAGES']['ppt_units'] == 'in':
                wy_ppt_output /= 25.4
            elif ini['IMAGES']['ppt_units'] == 'ft':
                wy_ppt_output /= (25.4 * 12)
            if ini['IMAGES']['eto_units'] == 'mm':
                pass
            elif ini['IMAGES']['eto_units'] == 'in':
                wy_eto_output /= 25.4
            elif ini['IMAGES']['eto_units'] == 'ft':
                wy_eto_output /= (25.4 * 12)
            logging.debug('  Output ETO: {} {} PPT: {} {}'.format(
                wy_eto_output, ini['IMAGES']['eto_units'], wy_ppt_output,
                ini['IMAGES']['ppt_units']))

            # Initialize the Landsat object for target zone and iteration
            landsat.zone_geom = zone['geom']
            landsat.start_date = iter_start_dt.strftime('%Y-%m-%d')
            landsat.end_date = iter_end_dt.strftime('%Y-%m-%d')
            landsat_coll = landsat.get_collection()
            # print(sorted(utils.ee_getinfo(
            #     landsat_coll.aggregate_histogram('SCENE_ID'))))
            # input('ENTER')

            # Skip if Landsat collection is empty
            if not utils.ee_getinfo(
                    landsat_coll.aggregate_histogram('SCENE_ID')):
                logging.info('  Empty Landsat collection, skipping')
                continue

            # Add water year ETo and PPT values to each image
            def eto_ppt_func(img):
                """"""
                return ee.Image(img).setMulti({
                    'wy_eto': wy_eto_output,
                    'wy_ppt': wy_ppt_output
                })

            landsat_coll = ee.ImageCollection(landsat_coll.map(eto_ppt_func))

            # Build each collection separately then merge
            etg_coll = ee.ImageCollection(landsat_coll.map(
                    ee_common.beamer_func)) \
                .select(ini['IMAGES']['download_bands'])

            # Clip using the feature geometry
            # Set the masked values to a nodata value
            # so that the TIF can have a nodata value other than 0 set
            etg_image = ee.Image(etg_coll.mean()) \
                .clip(zone['geom']) \
                .unmask(nodata_value, False)

            if not os.path.isfile(zip_path):
                # Get the download URL
                logging.debug('  Requesting URL')
                zip_url = utils.ee_request(
                    etg_image.getDownloadURL({
                        'name': image_id,
                        'crs': ini['SPATIAL']['crs'],
                        'crs_transform': output_transform,
                        'dimensions': output_shape
                    }))

                # Try downloading a few times
                logging.info('  Downloading')
                for i in range(1, 10):
                    try:
                        response = urlrequest.urlopen(zip_url)
                        with open(zip_path, 'wb') as output_f:
                            shutil.copyfileobj(response, output_f)
                        break
                    except Exception as e:
                        logging.info('  Resending query')
                        logging.debug('  {}'.format(e))
                        sleep(i**2)
                        os.remove(zip_path)

            # Try extracting the files
            try:
                logging.info('  Extracting')
                with zipfile.ZipFile(zip_path, 'r') as z:
                    z.extractall(zone_annuals_ws)
            except Exception as e:
                logging.warning('    Error: could not extract'.format(i))
                logging.debug('  {}'.format(e))
                try:
                    os.remove(zip_path)
                except Exception as e:
                    pass

            # Set nodata value
            for item in os.listdir(zone_annuals_ws):
                if item.startswith(image_id) and item.endswith('.tif'):
                    gdc.raster_path_set_nodata(
                        os.path.join(zone_annuals_ws, item), nodata_value)
                    raster_statistics(os.path.join(zone_annuals_ws, item))

        logging.info('\nComputing composite rasters from annual means')
        for stat in stat_list:
            logging.info('  Stat: {}'.format(stat))
            for band in ini['IMAGES']['download_bands']:
                logging.info('  {}'.format(band))
                image_band_list = [
                    os.path.join(zone_annuals_ws, item)
                    for item in os.listdir(zone_annuals_ws)
                    if item.endswith('.{}.tif'.format(band.lower()))
                ]
                # for image_path in image_band_list:
                #     raster_path_set_nodata(image_path, nodata_value)

                output_path = os.path.join(
                    # zone_output_ws, 'etg_{}_{}.{}.tif'.format(
                    zone_output_ws,
                    '{}_{}.{}.tif'.format(zone_name.lower().replace(' ', '_'),
                                          stat.lower(), band.lower()))
                logging.debug('  {}'.format(output_path))

                # Use GDAL to compute the composite raster
                cell_statistics(image_band_list, output_path, stat.lower())
                raster_statistics(output_path)
def main(ini_path=None, overwrite_flag=True, show_flag=False):
    """Generate summary figures

    Args:
        ini_path (str): file path of the control file
        overwrite_flag (bool): if True, overwrite existing figures
        show_flag (bool): if True, show figures as they are being built
    """

    logging.info('\nGenerate summary figures')

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='FIGURES')

    # Band options
    band_list = [
        'albedo_sur',
        'cloud_score',
        'eto',
        'evi_sur',
        'fmask_count',
        'fmask_total',
        'ndvi_sur',
        'ndvi_toa',
        'ndwi_green_nir_sur',
        'ndwi_green_nir_toa',
        'ndwi_green_swir1_sur',
        'ndwi_green_swir1_toa',
        'ndwi_nir_swir1_sur',
        'ndwi_nir_swir1_toa',
        'ndwi_swir1_green_sur',
        'ndwi_swir1_green_toa',
        # 'ndwi_sur', 'ndwi_toa',
        'pixel_count',
        'pixel_total',
        'ppt',
        'tc_bright',
        'tc_green',
        'tc_wet',
        'ts'
    ]
    band_name = {
        'albedo_sur': 'Albedo',
        'cloud_score': 'Cloud Score',
        'eto': 'ETo',
        'evi_sur': 'EVI',
        'fmask_count': 'Fmask Count',
        'fmask_total': 'Fmask Total',
        'ndvi_sur': 'NDVI',
        'ndvi_toa': 'NDVI (TOA)',
        'ndwi_green_nir_sur': 'NDWI (Green, NIR)',
        'ndwi_green_nir_toa': 'NDWI (Green, NIR) (TOA)',
        'ndwi_green_swir1_sur': 'NDWI (Green, SWIR1)',
        'ndwi_green_swir1_toa': 'NDWI (Green, SWIR1) (TOA)',
        'ndwi_nir_swir1_sur': 'NDWI (NIR, SWIR1)',
        'ndwi_nir_swir1_toa': 'NDWI (NIR, SWIR1) (TOA)',
        'ndwi_swir1_green_sur': 'NDWI (SWIR1, Green)',
        'ndwi_swir1_green_toa': 'NDWI (SWIR1, Green) (TOA)',
        # 'ndwi_sur': 'NDWI (SWIR1, GREEN)',
        # 'ndwi_toa': 'NDWI (SWIR1, GREEN) (TOA)',
        'pixel_count': 'Pixel Count',
        'pixel_total': 'Pixel Total',
        'ppt': 'PPT',
        'tc_bright': 'Brightness',
        'tc_green': 'Greeness',
        'tc_wet': 'Wetness',
        'ts': 'Ts'
    }
    band_unit = {
        'albedo_sur': 'dimensionless',
        'cloud_score': 'dimensionless',
        'evi_sur': 'dimensionless',
        'eto': 'mm',
        'fmask_count': 'dimensionless',
        'fmask_total': 'dimensionless',
        'ndvi_sur': 'dimensionless',
        'ndvi_toa': 'dimensionless',
        'ndwi_green_nir_sur': 'dimensionless',
        'ndwi_green_nir_toa': 'dimensionless',
        'ndwi_green_swir1_sur': 'dimensionless',
        'ndwi_green_swir1_toa': 'dimensionless',
        'ndwi_nir_swir1_sur': 'dimensionless',
        'ndwi_nir_swir1_toa': 'dimensionless',
        'ndwi_swir1_green_sur': 'dimensionless',
        'ndwi_swir1_green_toa': 'dimensionless',
        # 'ndwi_sur': 'dimensionless',
        # 'ndwi_toa': 'dimensionless',
        'pixel_count': 'dimensionless',
        'pixel_total': 'dimensionless',
        'ppt': 'mm',
        'tc_bright': 'dimensionless',
        'tc_green': 'dimensionless',
        'tc_wet': 'dimensionless',
        'ts': 'K',
    }
    band_color = {
        'albedo_sur': '#CF4457',
        'cloud_score': '0.5',
        'eto': '#348ABD',
        'fmask_count': '0.5',
        'fmask_total': '0.5',
        'evi_sur': '#FFA500',
        'ndvi_sur': '#A60628',
        'ndvi_toa': '#A60628',
        'ndwi_green_nir_sur': '#4eae4b',
        'ndwi_green_nir_toa': '#4eae4b',
        'ndwi_green_swir1_sur': '#4eae4b',
        'ndwi_green_swir1_toa': '#4eae4b',
        'ndwi_nir_swir1_sur': '#4eae4b',
        'ndwi_nir_swir1_toa': '#4eae4b',
        'ndwi_swir1_green_sur': '#4eae4b',
        'ndwi_swir1_green_toa': '#4eae4b',
        # 'ndwi_sur': '#4eae4b',
        # 'ndwi_toa': '#4eae4b',
        'pixel_count': '0.5',
        'pixel_total': '0.5',
        'ppt': '0.5',
        'tc_bright': '#E24A33',
        'tc_green': '#E24A33',
        'tc_wet': '#E24A33',
        'ts': '#188487'
    }

    # A couple of color palettes to sample from
    # import seaborn as sns
    # print(sns.color_palette('hls', 20).as_hex())
    # print(sns.color_palette('husl', 20).as_hex())
    # print(sns.color_palette('hsv', 20).as_hex())
    # print(sns.color_palette('Set1', 20).as_hex())
    # print(sns.color_palette('Set2', 20).as_hex())

    # Hardcoded plot options
    figures_folder = 'figures'
    fig_type = 'large'

    plot_dict = dict()

    # Center y-labels in figure window (instead of centering on ticks/axes)
    plot_dict['center_ylabel'] = False

    # Axes percentages must be 0-1
    plot_dict['timeseries_band_ax_pct'] = [0.3, 0.92]
    plot_dict['timeseries_ppt_ax_pct'] = [0.0, 0.35]
    plot_dict['complement_band_ax_pct'] = [0.0, 0.5]
    plot_dict['complement_eto_ax_pct'] = [0.4, 1.0]

    if fig_type.lower() == 'large':
        plot_dict['title_fs'] = 12
        plot_dict['xtick_fs'] = 10
        plot_dict['ytick_fs'] = 10
        plot_dict['xlabel_fs'] = 10
        plot_dict['ylabel_fs'] = 10
        plot_dict['legend_fs'] = 10
        plot_dict['ts_ms'] = 3
        plot_dict['comp_ms'] = 4
        plot_dict['timeseries_ax'] = [0.12, 0.13, 0.78, 0.81]
        plot_dict['scatter_ax'] = [0.12, 0.10, 0.82, 0.84]
        plot_dict['complement_ax'] = [0.12, 0.10, 0.78, 0.84]
        plot_dict['fig_size'] = (6.0, 5.0)
    elif fig_type.lower() == 'small':
        plot_dict['title_fs'] = 10
        plot_dict['xtick_fs'] = 8
        plot_dict['ytick_fs'] = 8
        plot_dict['xlabel_fs'] = 8
        plot_dict['ylabel_fs'] = 8
        plot_dict['legend_fs'] = 8
        plot_dict['ts_ms'] = 1.5
        plot_dict['comp_ms'] = 2
        plot_dict['timeseries_ax'] = [0.18, 0.21, 0.67, 0.70]
        plot_dict['scatter_ax'] = [0.18, 0.21, 0.67, 0.70]
        plot_dict['complement_ax'] = [0.18, 0.16, 0.67, 0.75]
        plot_dict['fig_size'] = (3.0, 2.5)
    plot_dict['fig_dpi'] = 300
    plot_dict['show'] = show_flag
    plot_dict['overwrite'] = overwrite_flag

    # CSV parameters
    landsat_annual_fields = [
        'ZONE_FID',
        'ZONE_NAME',
        'YEAR',
        'SCENE_COUNT',
        'CLOUD_SCORE',
        'PIXEL_COUNT',
        'PIXEL_TOTAL',
        'FMASK_COUNT',
        'FMASK_TOTAL',
        'TS',
        'ALBEDO_SUR',
        'NDVI_TOA',
        'NDVI_SUR',
        'EVI_SUR',
        'NDWI_GREEN_NIR_SUR',
        'NDWI_GREEN_SWIR1_SUR',
        'NDWI_NIR_SWIR1_SUR',
        # 'NDWI_GREEN_NIR_TOA', 'NDWI_GREEN_SWIR1_TOA', 'NDWI_NIR_SWIR1_TOA',
        # 'NDWI_SWIR1_GREEN_TOA', 'NDWI_SWIR1_GREEN_SUR',
        # 'NDWI_TOA', 'NDWI_SUR',
        'TC_BRIGHT',
        'TC_GREEN',
        'TC_WET'
    ]

    # Add merged row XXX to keep list
    ini['INPUTS']['row_keep_list'].append('XXX')

    # Check figure bands
    timeseries_bands = ini['FIGURES']['timeseries_bands']
    scatter_bands = ini['FIGURES']['scatter_bands']
    complementary_bands = ini['FIGURES']['complementary_bands']
    if timeseries_bands:
        logging.info('Timeseries Bands:')
        for band in timeseries_bands:
            if band not in band_list:
                logging.info(
                    '  Invalid timeseries band: {}, exiting'.format(band))
                return False
            logging.info('  {}'.format(band))
    if scatter_bands:
        logging.info('Scatter Bands (x:y):')
        for band_x, band_y in scatter_bands:
            if band_x not in band_list:
                logging.info(
                    '  Invalid scatter band: {}, exiting'.format(band_x))
                return False
            elif band_y not in band_list:
                logging.info('  Invalid band: {}, exiting'.format(band_y))
                return False
            logging.info('  {}:{}'.format(band_x, band_y))
    if complementary_bands:
        logging.info('Complementary Bands:')
        for band in complementary_bands:
            if band not in band_list:
                logging.info(
                    '  Invalid complementary band: {}, exiting'.format(band))
                return False
            logging.info('  {}'.format(band))

    # Add input plot options
    plot_dict['ppt_plot_type'] = ini['FIGURES']['ppt_plot_type']
    plot_dict['scatter_best_fit'] = ini['FIGURES']['scatter_best_fit']

    # Start/end year
    year_list = list(
        range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(
        utils.month_range(gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(', '.join(
        map(str, gridmet_months))))

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # # Filter features by FID before merging geometries
    # if ini['INPUTS']['fid_keep_list']:
    #     landsat_df = landsat_df[landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_keep_list'])]
    # if ini['INPUTS']['fid_skip_list']:
    #     landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_skip_list'])]

    logging.info('\nProcessing zones')
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        zone_stats_ws = os.path.join(ini['ZONAL_STATS']['output_ws'],
                                     zone_name)
        zone_figures_ws = os.path.join(ini['SUMMARY']['output_ws'], zone_name,
                                       figures_folder)
        if not os.path.isdir(zone_stats_ws):
            logging.debug(
                '  Folder {} does not exist, skipping'.format(zone_stats_ws))
            continue
        elif not os.path.isdir(zone_figures_ws):
            os.makedirs(zone_figures_ws)

        # Input paths
        landsat_daily_path = os.path.join(
            zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name))
        gridmet_daily_path = os.path.join(
            zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name))
        gridmet_monthly_path = os.path.join(
            zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name))
        if not os.path.isfile(landsat_daily_path):
            logging.error('  Landsat daily CSV does not exist, skipping zone')
            continue
        elif (not os.path.isfile(gridmet_daily_path)
              and not os.path.isfile(gridmet_monthly_path)):
            logging.error(
                '  GRIDMET daily or monthly CSV does not exist, skipping zone')
            continue
            # DEADBEEF - Eventually support generating only Landsat figures
            # logging.error(
            #     '  GRIDMET daily and/or monthly CSV files do not exist.\n'
            #     '  ETo and PPT will not be processed.')

        # Output paths
        landsat_summary_path = os.path.join(
            zone_figures_ws, '{}_landsat_figures.csv'.format(zone_name))
        gridmet_summary_path = os.path.join(
            zone_figures_ws, '{}_gridmet_figures.csv'.format(zone_name))
        zone_summary_path = os.path.join(
            zone_figures_ws, '{}_zone_figures.csv'.format(zone_name))

        logging.debug('  Reading Landsat CSV')
        landsat_df = pd.read_csv(landsat_daily_path)

        logging.debug('  Filtering Landsat dataframe')
        landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0]

        # QA field should have been written in zonal stats code
        # Eventually this block can be removed
        if 'QA' not in landsat_df.columns.values:
            landsat_df['QA'] = 0

        # # This assumes that there are L5/L8 images in the dataframe
        # if not landsat_df.empty:
        #     max_pixel_count = max(landsat_df['PIXEL_COUNT'])
        # else:
        #     max_pixel_count = 0

        if year_list:
            landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)]
        if month_list:
            landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)]
        if doy_list:
            landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)]

        # Assume the default is for these to be True and only filter if False
        if not ini['INPUTS']['landsat4_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04']
        if not ini['INPUTS']['landsat5_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05']
        if not ini['INPUTS']['landsat7_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07']
        if not ini['INPUTS']['landsat8_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08']

        if ini['INPUTS']['path_keep_list']:
            landsat_df = landsat_df[landsat_df['PATH'].isin(
                ini['INPUTS']['path_keep_list'])]
        if (ini['INPUTS']['row_keep_list']
                and ini['INPUTS']['row_keep_list'] != ['XXX']):
            landsat_df = landsat_df[landsat_df['ROW'].isin(
                ini['INPUTS']['row_keep_list'])]

        if ini['INPUTS']['scene_id_keep_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[scene_id_df.isin(
                ini['INPUTS']['scene_id_keep_list']).values]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_keep_list'])]
        if ini['INPUTS']['scene_id_skip_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[np.logical_not(
                scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_skip_list']))]

        # Filter by QA/QC value
        if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty:
            logging.debug('    Maximum QA: {0}'.format(
                ini['SUMMARY']['max_qa']))
            landsat_df = landsat_df[
                landsat_df['QA'] <= ini['SUMMARY']['max_qa']]

        # Filter by average cloud score
        if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty:
            logging.debug('    Maximum cloud score: {0}'.format(
                ini['SUMMARY']['max_cloud_score']))
            landsat_df = landsat_df[
                landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

        # Filter by Fmask percentage
        if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty:
            landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] /
                                             landsat_df['FMASK_TOTAL'])
            logging.debug('    Max Fmask threshold: {}'.format(
                ini['SUMMARY']['max_fmask_pct']))
            landsat_df = landsat_df[
                landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

        # Filter low count SLC-off images
        if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty:
            logging.debug('    Mininum SLC-off threshold: {}%'.format(
                ini['SUMMARY']['min_slc_off_pct']))
            # logging.debug('    Maximum pixel count: {}'.format(
            #     max_pixel_count))
            slc_off_mask = ((landsat_df['LANDSAT'] == 'LE7') &
                            ((landsat_df['YEAR'] >= 2004) |
                             ((landsat_df['YEAR'] == 2003) &
                              (landsat_df['DOY'] > 151))))
            slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] /
                                 landsat_df['PIXEL_TOTAL'])
            # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count)
            landsat_df = landsat_df[(
                (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                & slc_off_mask) | (~slc_off_mask)]

        if landsat_df.empty:
            logging.error(
                '  Empty Landsat dataframe after filtering, skipping zone')
            continue

        logging.debug('  Computing Landsat annual summaries')
        agg_dict = {
            'PIXEL_COUNT': {
                'PIXEL_COUNT': 'mean',
                'SCENE_COUNT': 'count'
            },
            'PIXEL_TOTAL': {
                'PIXEL_TOTAL': 'mean'
            },
            'FMASK_COUNT': {
                'FMASK_COUNT': 'mean'
            },
            'FMASK_TOTAL': {
                'FMASK_TOTAL': 'mean'
            },
            'CLOUD_SCORE': {
                'CLOUD_SCORE': 'mean'
            }
        }
        for field in landsat_df.columns.values:
            if field in landsat_annual_fields:
                agg_dict.update({field: {field: 'mean'}})
        landsat_df = landsat_df \
            .groupby(['ZONE_NAME', 'ZONE_FID', 'YEAR']) \
            .agg(agg_dict)
        landsat_df.columns = landsat_df.columns.droplevel(0)
        landsat_df.reset_index(inplace=True)
        # landsat_df = landsat_df[landsat_annual_fields]
        landsat_df['YEAR'] = landsat_df['YEAR'].astype(np.int)
        landsat_df['SCENE_COUNT'] = landsat_df['SCENE_COUNT'].astype(np.int)
        landsat_df['PIXEL_COUNT'] = landsat_df['PIXEL_COUNT'].astype(np.int)
        landsat_df['PIXEL_TOTAL'] = landsat_df['PIXEL_TOTAL'].astype(np.int)
        landsat_df['FMASK_COUNT'] = landsat_df['FMASK_COUNT'].astype(np.int)
        landsat_df['FMASK_TOTAL'] = landsat_df['FMASK_TOTAL'].astype(np.int)
        landsat_df.sort_values(by='YEAR', inplace=True)

        # Aggregate GRIDMET (to water year)
        if os.path.isfile(gridmet_monthly_path):
            logging.debug('  Reading montly GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_monthly_path)
        elif os.path.isfile(gridmet_daily_path):
            logging.debug('  Reading daily GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_daily_path)

        logging.debug('  Computing GRIDMET summaries')
        # Summarize GRIDMET for target months year
        if (gridmet_start_month in [10, 11, 12]
                and gridmet_end_month in [10, 11, 12]):
            month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) &
                          (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
        elif (gridmet_start_month in [10, 11, 12]
              and gridmet_end_month not in [10, 11, 12]):
            month_mask = gridmet_df['MONTH'] >= gridmet_start_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
            month_mask = gridmet_df['MONTH'] <= gridmet_end_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        else:
            month_mask = ((gridmet_df['MONTH'] >= gridmet_start_month) &
                          (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        # GROUP_YEAR for rows not in the GRIDMET month range will be NAN
        gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])]

        if year_list:
            gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)]

        if gridmet_df.empty:
            logging.error(
                '    Empty GRIDMET dataframe after filtering by year')
            continue

        # Group GRIDMET data by user specified range (default is water year)
        gridmet_group_df = gridmet_df \
            .groupby(['ZONE_FID', 'ZONE_NAME', 'GROUP_YEAR']) \
            .agg({'ETO': {'ETO': 'sum'}, 'PPT': {'PPT': 'sum'}})
        gridmet_group_df.columns = gridmet_group_df.columns.droplevel(0)
        gridmet_group_df.reset_index(inplace=True)
        gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        gridmet_group_df.sort_values(by='YEAR', inplace=True)

        # # Group GRIDMET data by month
        # gridmet_month_df = gridmet_df.groupby(
        #     ['ZONE_FID', 'ZONE_NAME', 'GROUP_YEAR', 'MONTH']).agg({
        #         'ETO': {'ETO': 'sum'}, 'PPT': {'PPT': 'sum'}})
        # gridmet_month_df.columns = gridmet_month_df.columns.droplevel(0)
        # gridmet_month_df.reset_index(inplace=True)
        # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        # # gridmet_month_df.sort_values(by=['YEAR', 'MONTH'], inplace=True)
        # gridmet_month_df.reset_index(inplace=True)
        # # Rename monthly PPT columns
        # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str)
        # # Pivot rows up to separate columns
        # gridmet_month_df = gridmet_month_df.pivot_table(
        #     'PPT', ['ZONE_FID', 'YEAR'], 'MONTH')
        # gridmet_month_df.reset_index(inplace=True)
        # columns = ['ZONE_FID', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months]
        # gridmet_month_df = gridmet_month_df[columns]
        # del gridmet_month_df.index.name

        # Merge Landsat and GRIDMET collections
        zone_df = landsat_df.merge(gridmet_group_df,
                                   on=['ZONE_FID', 'ZONE_NAME', 'YEAR'])
        # gridmet_group_df, on=['ZONE_FID', 'YEAR'])
        # zone_df = zone_df.merge(
        #     gridmet_month_df, on=['ZONE_FID', 'ZONE_NAME', 'YEAR'])
        #     gridmet_month_df, on=['ZONE_FID', 'YEAR'])
        if zone_df is None or zone_df.empty:
            logging.info('  Empty zone dataframe, not generating figures')
            continue

        # Save annual Landsat and GRIDMET tables
        logging.debug('  Saving summary tables')

        logging.debug('  {}'.format(landsat_summary_path))
        landsat_df.sort_values(by=['YEAR'], inplace=True)
        landsat_df.to_csv(landsat_summary_path, index=False)
        # columns=export_fields

        logging.debug('  {}'.format(gridmet_summary_path))
        gridmet_group_df.sort_values(by=['YEAR'], inplace=True)
        gridmet_group_df.to_csv(gridmet_summary_path, index=False)
        # columns=export_fields

        logging.debug('  {}'.format(zone_summary_path))
        zone_df.sort_values(by=['YEAR'], inplace=True)
        zone_df.to_csv(zone_summary_path, index=False)
        # columns=export_fields

        # Adjust year range based on data availability?
        # start_year = min(zone_df['YEAR']),
        # end_year = max(zone_df['YEAR'])

        logging.debug('  Generating figures')
        for band in timeseries_bands:
            timeseries_plot(band, zone_df, zone_name, zone_figures_ws,
                            ini['INPUTS']['start_year'],
                            ini['INPUTS']['end_year'], band_name, band_unit,
                            band_color, plot_dict)

        for band_x, band_y in scatter_bands:
            scatter_plot(band_x, band_y, zone_df, zone_name, zone_figures_ws,
                         band_name, band_unit, band_color, plot_dict)

        for band in complementary_bands:
            complementary_plot(band, zone_df, zone_name, zone_figures_ws,
                               band_name, band_unit, band_color, plot_dict)

        del landsat_df, gridmet_df, zone_df
Esempio n. 9
0
def ee_image_download(ini_path=None, overwrite_flag=False):
    """Earth Engine Annual Mean Image Download

    Parameters
    ----------
    ini_path : str
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    """
    logging.info('\nEarth Engine GRIDMET Image Download')

    # Do we need to support separate GRIDMET years?
    # start_year = 1984
    # end_year = 2016

    gridmet_download_bands = {
        'eto': 'ETo',
        'pr': 'PPT',
    }

    # If false, script will export annual and water year total images
    gridmet_monthly_flag = False

    gridmet_flag = True
    pdsi_flag = False

    pdsi_date_list = [
        '0120',
        '0220',
        '0320',
        '0420',
        '0520',
        '0620',
        '0720',
        '0820',
        '0920',
        '1020',
        '1120',
        '1220',
    ]
    # pdsi_date_list = ['0920', '1220']
    # pdsi_date_list = []

    if gridmet_monthly_flag:
        gridmet_folder = 'gridmet_monthly'
    else:
        gridmet_folder = 'gridmet_annual'
    if not pdsi_date_list:
        pdsi_folder = 'pdsi_full'
    else:
        pdsi_folder = 'pdsi'

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='SPATIAL')
    inputs.parse_section(ini, section='EXPORT')
    inputs.parse_section(ini, section='IMAGES')

    nodata_value = -9999

    # Manually set output spatial reference
    logging.info('\nHardcoding GRIDMET snap, cellsize and spatial reference')
    ini['output_x'], ini['output_y'] = -124.79299639209513, 49.41685579737572
    ini['SPATIAL']['cellsize'] = 0.041666001963701
    # ini['SPATIAL']['cellsize'] = [0.041666001963701, 0.041666001489718]
    # ini['output_x'], ini['output_y'] = -124.79166666666666666667, 25.04166666666666666667
    # ini['SPATIAL']['cellsize'] = 1. / 24
    ini['SPATIAL']['osr'] = gdc.epsg_osr(4326)
    # ini['SPATIAL']['osr'] = gdc.epsg_osr(4269)
    ini['SPATIAL']['crs'] = 'EPSG:4326'
    logging.debug('  Snap: {} {}'.format(ini['output_x'], ini['output_y']))
    logging.debug('  Cellsize: {}'.format(ini['SPATIAL']['cellsize']))
    logging.debug('  OSR: {}'.format(ini['SPATIAL']['osr']))

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # Merge geometries
    if ini['INPUTS']['merge_geom_flag']:
        merge_geom = ogr.Geometry(ogr.wkbMultiPolygon)
        for zone in zone_geom_list:
            zone_multipolygon = ogr.ForceToMultiPolygon(
                ogr.CreateGeometryFromJson(json.dumps(zone[2])))
            for zone_polygon in zone_multipolygon:
                merge_geom.AddGeometry(zone_polygon)
        # merge_json = json.loads(merge_mp.ExportToJson())
        zone_geom_list = [[
            0, ini['INPUTS']['zone_filename'],
            json.loads(merge_geom.ExportToJson())
        ]]
        ini['INPUTS']['zone_field'] = ''

    # Need zone_shp_path projection to build EE geometries
    zone_osr = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path'])
    zone_proj = gdc.osr_wkt(zone_osr)
    # zone_proj = ee.Projection(zone_proj).wkt().getInfo()
    # zone_proj = zone_proj.replace('\n', '').replace(' ', '')
    logging.debug('  Zone Projection: {}'.format(zone_proj))

    # Initialize Earth Engine API key
    logging.info('\nInitializing Earth Engine')
    ee.Initialize()
    utils.ee_request(ee.Number(1).getInfo())

    # Get current running tasks
    tasks = utils.get_ee_tasks()

    # Download images for each feature separately
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        # Build EE geometry object for zonal stats
        zone_geom = ee.Geometry(zone_json, zone_proj, False)

        # Project the zone_geom to the GRIDMET projection
        # if zone_proj != output_proj:
        zone_geom = zone_geom.transform(ini['SPATIAL']['crs'], 0.001)

        # Get the extent from the Earth Engine geometry object?
        zone_extent = zone_geom.bounds().getInfo()['coordinates'][0]
        zone_extent = gdc.Extent([
            min(zip(*zone_extent)[0]),
            min(zip(*zone_extent)[1]),
            max(zip(*zone_extent)[0]),
            max(zip(*zone_extent)[1])
        ])
        # # Use GDAL and geometry json to build extent, transform, and shape
        # zone_extent = gdc.Extent(
        #     ogr.CreateGeometryFromJson(json.dumps(zone_json)).GetEnvelope())
        # # zone_extent = gdc.Extent(zone_geom.GetEnvelope())
        # zone_extent.ymin, zone_extent.xmax = zone_extent.xmax, zone_extent.ymin

        # Adjust extent to match raster
        zone_extent = zone_extent.adjust_to_snap('EXPAND',
                                                 ini['SPATIAL']['snap_x'],
                                                 ini['SPATIAL']['snap_y'],
                                                 ini['SPATIAL']['cellsize'])
        zone_geo = zone_extent.geo(ini['SPATIAL']['cellsize'])
        zone_transform = gdc.geo_2_ee_transform(zone_geo)
        zone_transform = '[' + ','.join(map(str, zone_transform)) + ']'
        zone_shape = zone_extent.shape(ini['SPATIAL']['cellsize'])
        logging.debug('  Zone Shape: {}'.format(zone_shape))
        logging.debug('  Zone Transform: {}'.format(zone_transform))
        logging.debug('  Zone Extent: {}'.format(zone_extent))
        # logging.debug('  Geom: {}'.format(zone_geom.getInfo()))

        # output_transform = zone_transform[:]
        output_transform = '[' + ','.join(map(str, zone_transform)) + ']'
        output_shape = '[{1}x{0}]'.format(*zone_shape)
        logging.debug('  Output Projection: {}'.format(ini['SPATIAL']['crs']))
        logging.debug('  Output Transform: {}'.format(output_transform))
        logging.debug('  Output Shape: {}'.format(output_shape))

        zone_gridmet_ws = os.path.join(ini['IMAGES']['output_ws'], zone_name,
                                       gridmet_folder)
        zone_pdsi_ws = os.path.join(ini['IMAGES']['output_ws'], zone_name,
                                    pdsi_folder)
        if not os.path.isdir(zone_gridmet_ws):
            os.makedirs(zone_gridmet_ws)
        if not os.path.isdir(zone_pdsi_ws):
            os.makedirs(zone_pdsi_ws)

        # GRIDMET PPT & ETo
        if gridmet_flag:
            # Process each image in the collection by date
            export_list = []
            for year in range(ini['INPUTS']['start_year'],
                              ini['INPUTS']['end_year'] + 1):
                for b_key, b_name in sorted(gridmet_download_bands.items()):
                    if gridmet_monthly_flag:
                        # Monthly
                        for start_month in range(1, 13):
                            start_dt = datetime.datetime(year, start_month, 1)
                            end_dt = (start_dt + relativedelta(months=1) -
                                      datetime.timedelta(0, 1))
                            export_list.append([
                                start_dt, end_dt,
                                '{:04d}{:02d}'.format(year, start_month),
                                b_key, b_name
                            ])
                    else:
                        # Calendar year
                        export_list.append([
                            datetime.datetime(year, 1, 1),
                            datetime.datetime(year + 1, 1, 1),
                            '{:04d}'.format(year), b_key, b_name
                        ])
                        # Water year
                        export_list.append([
                            datetime.datetime(year - 1, 10, 1),
                            datetime.datetime(year, 10, 1) -
                            datetime.timedelta(0, 1), '{:04d}wy'.format(year),
                            b_key, b_name
                        ])

            for start_dt, end_dt, date_str, b_key, b_name in export_list:
                logging.info('{} {}'.format(date_str, b_name))
                if end_dt > datetime.datetime.today():
                    logging.info('  End date after current date, skipping')
                    continue

                # Rename to match naming style from getDownloadURL
                #     image_name.band.tif
                export_id = '{}_{}_gridmet_{}'.format(
                    ini['INPUTS']['zone_filename'], date_str, b_name.lower())
                output_id = '{}_gridmet.{}'.format(date_str, b_name.lower())

                export_path = os.path.join(ini['EXPORT']['export_ws'],
                                           export_id + '.tif')
                output_path = os.path.join(zone_gridmet_ws, output_id + '.tif')
                logging.debug('  Export: {}'.format(export_path))
                logging.debug('  Output: {}'.format(output_path))

                if overwrite_flag:
                    if export_id in tasks.keys():
                        logging.debug('  Task already submitted, cancelling')
                        ee.data.cancelTask(tasks[export_id])
                        del tasks[export_id]
                    if os.path.isfile(export_path):
                        logging.debug(
                            '  Export image already exists, removing')
                        utils.remove_file(export_path)
                        # os.remove(export_path)
                    if os.path.isfile(output_path):
                        logging.debug(
                            '  Output image already exists, removing')
                        utils.remove_file(output_path)
                        # os.remove(output_path)
                else:
                    if os.path.isfile(export_path):
                        logging.debug('  Export image already exists, moving')
                        shutil.move(export_path, output_path)
                        gdc.raster_path_set_nodata(output_path, nodata_value)
                        # DEADBEEF - should raster stats be computed?
                        # gdc.raster_statistics(output_path)
                        # subprocess.check_output([
                        #         'gdalwarp',
                        #         '-ot', float_output_type, '-overwrite',
                        #         '-of', 'GTiff', '-co', 'COMPRESS=LZW',
                        #         '-srcnodata', str(nodata_value),
                        #         '-dstnodata', '{:f}'.format(float_nodata_value),
                        #         export_path, output_path])
                        # with open(os.devnull, 'w') as devnull:
                        #     subprocess.check_call(
                        #         ['gdalinfo', '-stats', output_path],
                        #         stdout=devnull)
                        # subprocess.check_output(
                        #     ['gdalmanage', 'delete', export_path])
                        continue
                    elif os.path.isfile(output_path):
                        logging.debug(
                            '  Output image already exists, skipping')
                        continue
                    elif export_id in tasks.keys():
                        logging.debug('  Task already submitted, skipping')
                        continue

                # GRIDMET collection is available in EarthEngine
                gridmet_coll = ee.ImageCollection('IDAHO_EPSCOR/GRIDMET')\
                    .filterDate(start_dt, end_dt) \
                    .select([b_key])

                gridmet_image = ee.Image(gridmet_coll.sum())

                logging.debug('  Starting download task')
                # if ini['EXPORT']['export_dest'] == 'gdrive':
                task = ee.batch.Export.image.toDrive(
                    image=gridmet_image,
                    description=export_id,
                    # folder=ini['EXPORT']['export_folder'],
                    fileNamePrefix=export_id,
                    dimensions=output_shape,
                    crs=ini['SPATIAL']['crs'],
                    crsTransform=output_transform)
                # elif ini['EXPORT']['export_dest'] == 'gdrive':
                #     task = ee.batch.Export.image.toCloudStorage(
                #         image=gridmet_image,
                #         description=export_id,
                #         bucket=ini['EXPORT']['export_folder'],
                #         fileNamePrefix=export_id,
                #         dimensions=output_shape,
                #         crs=ini['SPATIAL']['crs'],
                #         crsTransform=output_transform)

                logging.debug('  Starting export task')
                utils.ee_request(task.start())

        # GRIDMET PDSI
        if pdsi_flag:
            # Process each image in the collection by date
            export_list = []
            b_name = 'pdsi'
            for year in range(ini['INPUTS']['start_year'],
                              ini['INPUTS']['end_year'] + 1):
                # Dekad
                for start_month in range(1, 13):
                    for start_day, end_day in zip([1, 10, 20], [10, 20, 30]):
                        if start_month == 12 and start_day == 20:
                            # Go to the first day of the next year (and month)
                            start_dt = datetime.datetime(
                                year, start_month, start_day)
                            end_dt = datetime.datetime(year + 1, 1, 1)
                        elif start_month < 12 and start_day == 20:
                            # Go to the first day of the next month
                            start_dt = datetime.datetime(
                                year, start_month, start_day)
                            end_dt = datetime.datetime(year, start_month + 1,
                                                       1)
                        else:
                            start_dt = datetime.datetime(
                                year, start_month, start_day)
                            end_dt = datetime.datetime(year, start_month,
                                                       end_day)
                        end_dt = end_dt - datetime.timedelta(0, 1)
                        export_list.append([
                            start_dt, end_dt, '{:04d}{:02d}{:02d}'.format(
                                year, start_month, start_day), b_name
                        ])

            # Filter list to only keep last dekad of October and December
            if pdsi_date_list:
                export_list = [[
                    start_dt, end_dt, date_str, b_name
                ] for start_dt, end_dt, date_str, b_name in export_list
                               if start_dt.strftime('%m%d') in pdsi_date_list]

            for start_dt, end_dt, date_str, b_name in export_list:
                logging.info('{} {}'.format(date_str, b_name))

                # Rename to match naming style from getDownloadURL
                #     image_name.band.tif
                export_id = '{}_{}_{}'.format(
                    os.path.splitext(
                        ini['INPUTS']['zone_filename'])[0].lower(), date_str,
                    b_name.lower())
                output_id = '{}_{}'.format(date_str, b_name.lower())

                export_path = os.path.join(ini['EXPORT']['export_ws'],
                                           export_id + '.tif')
                output_path = os.path.join(zone_pdsi_ws, output_id + '.tif')
                logging.debug('  Export: {}'.format(export_path))
                logging.debug('  Output: {}'.format(output_path))

                if overwrite_flag:
                    if export_id in tasks.keys():
                        logging.debug('  Task already submitted, cancelling')
                        ee.data.cancelTask(tasks[export_id])
                        del tasks[export_id]
                    if os.path.isfile(export_path):
                        logging.debug(
                            '  Export image already exists, removing')
                        utils.remove_file(export_path)
                        # os.remove(export_path)
                    if os.path.isfile(output_path):
                        logging.debug(
                            '  Output image already exists, removing')
                        utils.remove_file(output_path)
                        # os.remove(output_path)
                else:
                    if os.path.isfile(export_path):
                        logging.debug('  Export image already exists, moving')
                        shutil.move(export_path, output_path)
                        gdc.raster_path_set_nodata(output_path, nodata_value)
                        # DEADBEEF - should raster stats be computed?
                        # gdc.raster_statistics(output_path)
                        continue
                    elif os.path.isfile(output_path):
                        logging.debug(
                            '  Output image already exists, skipping')
                        continue
                    elif export_id in tasks.keys():
                        logging.debug('  Task already submitted, skipping')
                        continue

                # PDSI collection is available in EarthEngine
                # Index the PDSI image directly
                pdsi_image = ee.Image('IDAHO_EPSCOR/PDSI/{}'.format(
                    start_dt.strftime('%Y%m%d')))
                # pdsi_coll = ee.ImageCollection('IDAHO_EPSCOR/PDSI')\
                #     .filterDate(start_dt, end_dt) \
                #     .select(['pdsi'])
                # pdsi_image = ee.Image(pdsi_coll.mean())

                logging.debug('  Building export task')
                # if ini['EXPORT']['export_dest'] == 'gdrive':
                task = ee.batch.Export.image.toDrive(
                    image=pdsi_image,
                    description=export_id,
                    # folder=ini['EXPORT']['export_folder'],
                    fileNamePrefix=export_id,
                    dimensions=output_shape,
                    crs=ini['SPATIAL']['crs'],
                    crsTransform=output_transform)
                # elif ini['EXPORT']['export_dest'] == 'cloud':
                #     task = ee.batch.Export.image.toCloudStorage(
                #         image=pdsi_image,
                #         description=export_id,
                #         bucket=ini['EXPORT']['export_folder'],
                #         fileNamePrefix=export_id,
                #         dimensions=output_shape,
                #         crs=ini['SPATIAL']['crs'],
                #         crsTransform=output_transform)

                logging.debug('  Starting export task')
                utils.ee_request(task.start())
Esempio n. 10
0
def main(ini_path, overwrite_flag=True):
    """Generate Beamer ETg summary tables

    Args:
        ini_path (str):
        overwrite_flag (bool): if True, overwrite existing figures
            Default is True (for now)
    """

    logging.info('\nGenerate Beamer ETg summary tables')

    # # Eventually get from INI (like ini['BEAMER']['landsat_products'])
    # daily_fields = [
    #     'ZONE_NAME', 'ZONE_FID', 'DATE', 'SCENE_ID', 'PLATFORM', 'PATH', 'ROW',
    #     'YEAR', 'MONTH', 'DAY', 'DOY', 'WATER_YEAR',
    #     'PIXEL_COUNT', 'ETSTAR_COUNT',
    #     'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS', 'EVI_SUR', 'ETSTAR_MEAN',
    #     'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
    #     'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
    #     'WY_ETO', 'WY_PPT']
    # annual_fields = [
    #     'SCENE_COUNT', 'PIXEL_COUNT', 'ETSTAR_COUNT',
    #     'NDVI_TOA', 'NDWI_TOA', 'ALBEDO_SUR', 'TS',
    #     'EVI_SUR_MEAN', 'EVI_SUR_MEDIAN', 'EVI_SUR_MIN', 'EVI_SUR_MAX',
    #     'ETSTAR_MEAN',
    #     'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI',
    #     'ET_MEAN', 'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI',
    #     'WY_ETO', 'WY_PPT']

    # For unit conversion
    eto_fields = [
        'ETG_MEAN', 'ETG_LPI', 'ETG_UPI', 'ETG_LCI', 'ETG_UCI', 'ET_MEAN',
        'ET_LPI', 'ET_UPI', 'ET_LCI', 'ET_UCI', 'WY_ETO'
    ]
    ppt_fields = ['WY_PPT']

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='BEAMER')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='TABLES')

    # Output paths
    output_daily_path = os.path.join(
        ini['SUMMARY']['output_ws'],
        ini['BEAMER']['output_name'].replace('.csv', '_daily.xlsx'))
    output_annual_path = os.path.join(
        ini['SUMMARY']['output_ws'],
        ini['BEAMER']['output_name'].replace('.csv', '_annual.xlsx'))

    # Check if files already exist
    if overwrite_flag:
        if os.path.isfile(output_daily_path):
            os.remove(output_daily_path)
        if os.path.isfile(output_annual_path):
            os.remove(output_annual_path)
    else:
        if (os.path.isfile(output_daily_path)
                and os.path.isfile(output_annual_path)):
            logging.info('\nOutput files already exist and '
                         'overwrite is False, exiting')
            return True

    # Start/end year
    year_list = list(
        range(ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(
        utils.month_range(gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(', '.join(
        map(str, gridmet_months))))

    # Read in the zonal stats CSV
    logging.debug('  Reading zonal stats CSV file')
    input_df = pd.read_csv(
        os.path.join(ini['ZONAL_STATS']['output_ws'],
                     ini['BEAMER']['output_name']))

    logging.debug('  Filtering Landsat dataframe')
    input_df = input_df[input_df['PIXEL_COUNT'] > 0]

    # # This assumes that there are L5/L8 images in the dataframe
    # if not input_df.empty:
    #     max_pixel_count = max(input_df['PIXEL_COUNT'])
    # else:
    #     max_pixel_count = 0

    if ini['INPUTS']['fid_keep_list']:
        input_df = input_df[input_df['ZONE_FID'].isin(
            ini['INPUTS']['fid_keep_list'])]
    if ini['INPUTS']['fid_skip_list']:
        input_df = input_df[~input_df['ZONE_FID'].
                            isin(ini['INPUTS']['fid_skip_list'])]
    zone_name_list = sorted(list(set(input_df['ZONE_NAME'].values)))

    if year_list:
        input_df = input_df[input_df['YEAR'].isin(year_list)]
    if month_list:
        input_df = input_df[input_df['MONTH'].isin(month_list)]
    if doy_list:
        input_df = input_df[input_df['DOY'].isin(doy_list)]

    if ini['INPUTS']['path_keep_list']:
        input_df = input_df[input_df['PATH'].isin(
            ini['INPUTS']['path_keep_list'])]
    if (ini['INPUTS']['row_keep_list']
            and ini['INPUTS']['row_keep_list'] != ['XXX']):
        input_df = input_df[input_df['ROW'].isin(
            ini['INPUTS']['row_keep_list'])]

    # Assume the default is for these to be True and only filter if False
    if not ini['INPUTS']['landsat4_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT04']
    if not ini['INPUTS']['landsat5_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LT05']
    if not ini['INPUTS']['landsat7_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LE07']
    if not ini['INPUTS']['landsat8_flag']:
        input_df = input_df[input_df['PLATFORM'] != 'LC08']

    if ini['INPUTS']['scene_id_keep_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])
        ])
        input_df = input_df[scene_id_df.isin(
            ini['INPUTS']['scene_id_keep_list']).values]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_keep_list'])]
    if ini['INPUTS']['scene_id_skip_list']:
        # Replace XXX with primary ROW value for checking skip list SCENE_ID
        scene_id_df = pd.Series([
            s.replace('XXX', '{:03d}'.format(int(r)))
            for s, r in zip(input_df['SCENE_ID'], input_df['ROW'])
        ])
        input_df = input_df[np.logical_not(
            scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
        # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        # input_df = input_df[~input_df['SCENE_ID'].isin(
        #     ini['INPUTS']['scene_id_skip_list'])]

    # Filter by QA/QC value
    if ini['SUMMARY']['max_qa'] >= 0 and not input_df.empty:
        logging.debug('    Maximum QA: {0}'.format(ini['SUMMARY']['max_qa']))
        input_df = input_df[input_df['QA'] <= ini['SUMMARY']['max_qa']]

    # First filter by average cloud score
    if ini['SUMMARY']['max_cloud_score'] < 100 and not input_df.empty:
        logging.debug('    Maximum cloud score: {0}'.format(
            ini['SUMMARY']['max_cloud_score']))
        input_df = input_df[
            input_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

    # Filter by Fmask percentage
    if ini['SUMMARY']['max_fmask_pct'] < 100 and not input_df.empty:
        input_df['FMASK_PCT'] = 100 * (input_df['FMASK_COUNT'] /
                                       input_df['FMASK_TOTAL'])
        logging.debug('    Max Fmask threshold: {}'.format(
            ini['SUMMARY']['max_fmask_pct']))
        input_df = input_df[
            input_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

    # Filter low count SLC-off images
    if ini['SUMMARY']['min_slc_off_pct'] > 0 and not input_df.empty:
        logging.debug('    Mininum SLC-off threshold: {}%'.format(
            ini['SUMMARY']['min_slc_off_pct']))
        # logging.debug('    Maximum pixel count: {}'.format(
        #     max_pixel_count))
        slc_off_mask = ((input_df['PLATFORM'] == 'LE07') &
                        ((input_df['YEAR'] >= 2004) |
                         ((input_df['YEAR'] == 2003) &
                          (input_df['DOY'] > 151))))
        slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / input_df['PIXEL_TOTAL'])
        # slc_off_pct = 100 * (input_df['PIXEL_COUNT'] / max_pixel_count)
        input_df = input_df[((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                             & slc_off_mask) | (~slc_off_mask)]

    if input_df.empty:
        logging.error('  Empty dataframe after filtering, exiting')
        return False

    if not os.path.isfile(output_daily_path):
        logging.info('\nWriting daily values to Excel')
        excel_f = ExcelWriter(output_daily_path)
        for zone_name in sorted(zone_name_list):
            logging.info('  {}'.format(zone_name))
            zone_df = input_df[input_df['ZONE_NAME'] == zone_name]
            zone_df.to_excel(excel_f,
                             zone_name,
                             index=False,
                             float_format='%.4f')
            # zone_df.to_excel(excel_f, zone_name, index=False)
            del zone_df
        excel_f.save()

    if not os.path.isfile(output_annual_path):
        logging.info('\nComputing annual summaries')
        annual_df = input_df \
            .groupby(['ZONE_NAME', 'YEAR']) \
            .agg({
                'PIXEL_COUNT': ['count', 'mean'],
                'PIXEL_TOTAL': ['mean'],
                'FMASK_COUNT': 'mean',
                'FMASK_TOTAL': 'mean',
                'CLOUD_SCORE': 'mean',
                'ETSTAR_COUNT': 'mean',
                'NDVI_TOA': 'mean',
                'NDWI_TOA': 'mean',
                'ALBEDO_SUR': 'mean',
                'TS': 'mean',
                # 'EVI_SUR': 'mean',
                'EVI_SUR': ['mean', 'median', 'min', 'max'],
                'ETSTAR_MEAN': 'mean',
                'ETG_MEAN': 'mean',
                'ETG_LPI': 'mean',
                'ETG_UPI': 'mean',
                'ETG_LCI': 'mean',
                'ETG_UCI': 'mean',
                'ET_MEAN': 'mean',
                'ET_LPI': 'mean',
                'ET_UPI': 'mean',
                'ET_LCI': 'mean',
                'ET_UCI': 'mean',
                'WY_ETO': 'mean',
                'WY_PPT': 'mean'
            })
        annual_df.columns = annual_df.columns.map('_'.join)
        annual_df = annual_df.rename(columns={
            'PIXEL_COUNT_count': 'SCENE_COUNT',
            'PIXEL_COUNT_mean': 'PIXEL_COUNT'
        })
        annual_df = annual_df.rename(
            columns={
                'EVI_SUR_mean': 'EVI_SUR_MEAN',
                'EVI_SUR_median': 'EVI_SUR_MEDIAN',
                'EVI_SUR_min': 'EVI_SUR_MIN',
                'EVI_SUR_max': 'EVI_SUR_MAX'
            })
        annual_df.rename(columns=lambda x: str(x).replace('_mean', ''),
                         inplace=True)
        annual_df['SCENE_COUNT'] = annual_df['SCENE_COUNT'].astype(np.int)
        annual_df['PIXEL_COUNT'] = annual_df['PIXEL_COUNT'].astype(np.int)
        annual_df['PIXEL_TOTAL'] = annual_df['PIXEL_TOTAL'].astype(np.int)
        annual_df['FMASK_COUNT'] = annual_df['FMASK_COUNT'].astype(np.int)
        annual_df['FMASK_TOTAL'] = annual_df['FMASK_TOTAL'].astype(np.int)
        annual_df['ETSTAR_COUNT'] = annual_df['ETSTAR_COUNT'].astype(np.int)
        annual_df = annual_df.reset_index()

        # Convert ETo units
        if (ini['BEAMER']['eto_units'] == 'mm'
                and ini['TABLES']['eto_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['eto_units'] == 'mm'
              and ini['TABLES']['eto_units'] == 'in'):
            annual_df[eto_fields] /= (25.4)
        elif (ini['BEAMER']['eto_units'] == 'mm'
              and ini['TABLES']['eto_units'] == 'ft'):
            annual_df[eto_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                     ini['BEAMER']['eto_units'], ini['TABLES']['eto_units']))
            sys.exit()

        # Convert PPT units
        if (ini['BEAMER']['ppt_units'] == 'mm'
                and ini['TABLES']['ppt_units'] == 'mm'):
            pass
        elif (ini['BEAMER']['ppt_units'] == 'mm'
              and ini['TABLES']['ppt_units'] == 'in'):
            annual_df[ppt_fields] /= (25.4)
        elif (ini['BEAMER']['ppt_units'] == 'mm'
              and ini['TABLES']['ppt_units'] == 'ft'):
            annual_df[ppt_fields] /= (12 * 25.4)
        else:
            logging.error(
                ('\nERROR: Input units {} and output units {} are not ' +
                 'currently supported, exiting').format(
                     ini['BEAMER']['ppt_units'], ini['TABLES']['ppt_units']))
            sys.exit()

        logging.info('\nWriting annual values to Excel')
        excel_f = ExcelWriter(output_annual_path)
        for zone_name in sorted(zone_name_list):
            logging.info('  {}'.format(zone_name))
            zone_df = annual_df[annual_df['ZONE_NAME'] == zone_name]
            zone_df.to_excel(excel_f,
                             zone_name,
                             index=False,
                             float_format='%.4f')
            del zone_df
        excel_f.save()
def main(ini_path, show_flag=False, overwrite_flag=True):
    """Generate Bokeh figures

    Bokeh issues:
    Adjust y range based on non-muted data
        https://stackoverflow.com/questions/43620837/how-to-get-bokeh-to-dynamically-adjust-y-range-when-panning
    Linked interactive legends so that there is only one legend for the gridplot
    Maybe hide or mute QA values above max (instead of filtering them in advance)

    Args:
        ini_path (str):
        show_flag (bool): if True, show the figures in the browser.
            Default is False.
        overwrite_flag (bool): if True, overwrite existing tables.
            Default is True (for now)
    """
    logging.info('\nGenerate interactive timeseries figures')

    # Eventually read from INI
    plot_var_list = ['NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA', 'EVI_SUR']
    # plot_var_list = [
    #     'NDVI_TOA', 'ALBEDO_SUR', 'TS', 'NDWI_TOA',
    #     'CLOUD_SCORE', 'FMASK_PCT']
    output_folder = 'figures'

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='ZONAL_STATS')
    inputs.parse_section(ini, section='SUMMARY')
    inputs.parse_section(ini, section='FIGURES')
    inputs.parse_section(ini, section='BEAMER')

    # Hardcode GRIDMET month range to the water year
    ini['SUMMARY']['gridmet_start_month'] = 10
    ini['SUMMARY']['gridmet_end_month'] = 9

    # Start/end year
    year_list = list(range(
        ini['INPUTS']['start_year'], ini['INPUTS']['end_year'] + 1))
    month_list = list(utils.wrapped_range(
        ini['INPUTS']['start_month'], ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(utils.wrapped_range(
        ini['INPUTS']['start_doy'], ini['INPUTS']['end_doy'], 1, 366))

    # GRIDMET month range (default to water year)
    gridmet_start_month = ini['SUMMARY']['gridmet_start_month']
    gridmet_end_month = ini['SUMMARY']['gridmet_end_month']
    gridmet_months = list(utils.month_range(
        gridmet_start_month, gridmet_end_month))
    logging.info('\nGridmet months: {}'.format(
        ', '.join(map(str, gridmet_months))))

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'], zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']]

    # # Filter features by FID before merging geometries
    # if ini['INPUTS']['fid_keep_list']:
    #     landsat_df = landsat_df[landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_keep_list'])]
    # if ini['INPUTS']['fid_skip_list']:
    #     landsat_df = landsat_df[~landsat_df['ZONE_FID'].isin(
    #         ini['INPUTS']['fid_skip_list'])]

    logging.info('\nProcessing zones')
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone_name = zone_name.replace(' ', '_')
        logging.info('ZONE: {} (FID: {})'.format(zone_name, zone_fid))

        zone_stats_ws = os.path.join(
            ini['ZONAL_STATS']['output_ws'], zone_name)
        zone_figures_ws = os.path.join(
            ini['SUMMARY']['output_ws'], zone_name, output_folder)
        if not os.path.isdir(zone_stats_ws):
            logging.debug('  Folder {} does not exist, skipping'.format(
                zone_stats_ws))
            continue
        elif not os.path.isdir(zone_figures_ws):
            os.makedirs(zone_figures_ws)

        # Input paths
        landsat_daily_path = os.path.join(
            zone_stats_ws, '{}_landsat_daily.csv'.format(zone_name))
        gridmet_daily_path = os.path.join(
            zone_stats_ws, '{}_gridmet_daily.csv'.format(zone_name))
        gridmet_monthly_path = os.path.join(
            zone_stats_ws, '{}_gridmet_monthly.csv'.format(zone_name))
        if not os.path.isfile(landsat_daily_path):
            logging.error('  Landsat daily CSV does not exist, skipping zone')
            continue
        elif (not os.path.isfile(gridmet_daily_path) and
              not os.path.isfile(gridmet_monthly_path)):
            logging.error(
                '  GRIDMET daily or monthly CSV does not exist, skipping zone')
            continue
            # DEADBEEF - Eventually support generating only Landsat figures
            # logging.error(
            #     '  GRIDMET daily and/or monthly CSV files do not exist.\n'
            #     '  ETo and PPT will not be processed.')

        # Output paths
        output_doy_path = os.path.join(
            zone_figures_ws, '{}_timeseries_doy.html'.format(zone_name))
        output_date_path = os.path.join(
            zone_figures_ws, '{}_timeseries_date.html'.format(zone_name))

        logging.debug('  Reading Landsat CSV')
        landsat_df = pd.read_csv(landsat_daily_path)

        logging.debug('  Filtering Landsat dataframe')
        landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0]

        # QA field should have been written in zonal stats code
        # Eventually this block can be removed
        if 'QA' not in landsat_df.columns.values:
            landsat_df['QA'] = 0

        # # This assumes that there are L5/L8 images in the dataframe
        # if not landsat_df.empty:
        #     max_pixel_count = max(landsat_df['PIXEL_COUNT'])
        # else:
        #     max_pixel_count = 0

        if year_list:
            landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)]
        if month_list:
            landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)]
        if doy_list:
            landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)]

        # Assume the default is for these to be True and only filter if False
        if not ini['INPUTS']['landsat4_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04']
        if not ini['INPUTS']['landsat5_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05']
        if not ini['INPUTS']['landsat7_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07']
        if not ini['INPUTS']['landsat8_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08']

        if ini['INPUTS']['path_keep_list']:
            landsat_df = landsat_df[
                landsat_df['PATH'].isin(ini['INPUTS']['path_keep_list'])]
        if (ini['INPUTS']['row_keep_list'] and
                ini['INPUTS']['row_keep_list'] != ['XXX']):
            landsat_df = landsat_df[
                landsat_df['ROW'].isin(ini['INPUTS']['row_keep_list'])]

        if ini['INPUTS']['scene_id_keep_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])])
            landsat_df = landsat_df[scene_id_df.isin(
                ini['INPUTS']['scene_id_keep_list']).values]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_keep_list'])]
        if ini['INPUTS']['scene_id_skip_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])])
            landsat_df = landsat_df[np.logical_not(scene_id_df.isin(
                ini['INPUTS']['scene_id_skip_list']).values)]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_skip_list']))]

        # Filter by QA/QC value
        if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty:
            logging.debug('    Maximum QA: {0}'.format(
                ini['SUMMARY']['max_qa']))
            landsat_df = landsat_df[
                landsat_df['QA'] <= ini['SUMMARY']['max_qa']]

        # Filter by average cloud score
        if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty:
            logging.debug('    Maximum cloud score: {0}'.format(
                ini['SUMMARY']['max_cloud_score']))
            landsat_df = landsat_df[
                landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

        # Filter by Fmask percentage
        if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty:
            landsat_df['FMASK_PCT'] = 100 * (
                landsat_df['FMASK_COUNT'] / landsat_df['FMASK_TOTAL'])
            logging.debug('    Max Fmask threshold: {}'.format(
                ini['SUMMARY']['max_fmask_pct']))
            landsat_df = landsat_df[
                landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

        # Filter low count SLC-off images
        if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty:
            logging.debug('    Mininum SLC-off threshold: {}%'.format(
                ini['SUMMARY']['min_slc_off_pct']))
            # logging.debug('    Maximum pixel count: {}'.format(
            #     max_pixel_count))
            slc_off_mask = (
                (landsat_df['PLATFORM'] == 'LE07') &
                ((landsat_df['YEAR'] >= 2004) |
                 ((landsat_df['YEAR'] == 2003) & (landsat_df['DOY'] > 151))))
            slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / landsat_df['PIXEL_TOTAL'])
            # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count)
            landsat_df = landsat_df[
                ((slc_off_pct >= ini['SUMMARY']['min_slc_off_pct']) & slc_off_mask) |
                (~slc_off_mask)]

        if landsat_df.empty:
            logging.error(
                '  Empty Landsat dataframe after filtering, skipping zone')
            continue

        # Aggregate GRIDMET (to water year)
        if os.path.isfile(gridmet_monthly_path):
            logging.debug('  Reading montly GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_monthly_path)
        elif os.path.isfile(gridmet_daily_path):
            logging.debug('  Reading daily GRIDMET CSV')
            gridmet_df = pd.read_csv(gridmet_daily_path)

        logging.debug('  Computing GRIDMET summaries')
        # Summarize GRIDMET for target months year
        if (gridmet_start_month in [10, 11, 12] and
                gridmet_end_month in [10, 11, 12]):
            month_mask = (
                (gridmet_df['MONTH'] >= gridmet_start_month) &
                (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
        elif (gridmet_start_month in [10, 11, 12] and
              gridmet_end_month not in [10, 11, 12]):
            month_mask = gridmet_df['MONTH'] >= gridmet_start_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR'] + 1
            month_mask = gridmet_df['MONTH'] <= gridmet_end_month
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        else:
            month_mask = (
                (gridmet_df['MONTH'] >= gridmet_start_month) &
                (gridmet_df['MONTH'] <= gridmet_end_month))
            gridmet_df.loc[month_mask, 'GROUP_YEAR'] = gridmet_df['YEAR']
        # GROUP_YEAR for rows not in the GRIDMET month range will be NAN
        gridmet_df = gridmet_df[~pd.isnull(gridmet_df['GROUP_YEAR'])]

        if year_list:
            gridmet_df = gridmet_df[gridmet_df['GROUP_YEAR'].isin(year_list)]

        if gridmet_df.empty:
            logging.error(
                '    Empty GRIDMET dataframe after filtering by year')
            continue

        # Group GRIDMET data by user specified range (default is water year)
        gridmet_group_df = gridmet_df \
            .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR']) \
            .agg({'ETO': np.sum, 'PPT': np.sum}) \
            .reset_index() \
            .sort_values(by='GROUP_YEAR')
            # .rename(columns={'ETO': 'ETO', 'PPT': 'ETO'}) \
        # Rename wasn't working when chained...
        gridmet_group_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        gridmet_group_df['YEAR'] = gridmet_group_df['YEAR'].astype(int)

        # # Group GRIDMET data by month
        # gridmet_month_df = gridmet_df\
        #     .groupby(['ZONE_NAME', 'ZONE_FID', 'GROUP_YEAR', 'MONTH']) \
        #     .agg({'ETO': np.sum, 'PPT': np.sum}) \
        #     .reset_index() \
        #     .sort_values(by=['GROUP_YEAR', 'MONTH'])
        # gridmet_month_df.rename(columns={'GROUP_YEAR': 'YEAR'}, inplace=True)
        # # Rename monthly PPT columns
        # gridmet_month_df['MONTH'] = 'PPT_M' + gridmet_month_df['MONTH'].astype(str)
        # # Pivot rows up to separate columns
        # gridmet_month_df = gridmet_month_df.pivot_table(
        #     'PPT', ['ZONE_NAME', 'YEAR'], 'MONTH')
        # gridmet_month_df.reset_index(inplace=True)
        # columns = ['ZONE_NAME', 'YEAR'] + ['PPT_M{}'.format(m) for m in gridmet_months]
        # gridmet_month_df = gridmet_month_df[columns]
        # del gridmet_month_df.index.name

        # Merge Landsat and GRIDMET collections
        zone_df = landsat_df.merge(
            gridmet_group_df, on=['ZONE_NAME', 'ZONE_FID', 'YEAR'])
        if zone_df is None or zone_df.empty:
            logging.info('  Empty zone dataframe, not generating figures')
            continue

        # Compute ETg
        zone_df['ETG_MEAN'] = zone_df['ETSTAR_MEAN'] * (
            zone_df['ETO'] - zone_df['PPT'])
        zone_df['ETG_LPI'] = zone_df['ETSTAR_LPI'] * (
            zone_df['ETO'] - zone_df['PPT'])
        zone_df['ETG_UPI'] = zone_df['ETSTAR_UPI'] * (
            zone_df['ETO'] - zone_df['PPT'])
        zone_df['ETG_LCI'] = zone_df['ETSTAR_LCI'] * (
            zone_df['ETO'] - zone_df['PPT'])
        zone_df['ETG_UCI'] = zone_df['ETSTAR_UCI'] * (
            zone_df['ETO'] - zone_df['PPT'])

        # Compute ET
        zone_df['ET_MEAN'] = zone_df['ETG_MEAN'] + zone_df['PPT']
        zone_df['ET_LPI'] = zone_df['ETG_LPI'] + zone_df['PPT']
        zone_df['ET_UPI'] = zone_df['ETG_UPI'] + zone_df['PPT']
        zone_df['ET_LCI'] = zone_df['ETG_LCI'] + zone_df['PPT']
        zone_df['ET_UCI'] = zone_df['ETG_UCI'] + zone_df['PPT']



        # ORIGINAL PLOTTING CODE
        # Check that plot variables are present
        for plot_var in plot_var_list:
            if plot_var not in landsat_df.columns.values:
                logging.error(
                    '  The plotting variable {} does not exist in the '
                    'dataframe'.format(plot_var))
                sys.exit()

        # if ini['INPUTS']['scene_id_keep_list']:
        #     # Replace XXX with primary ROW value for checking skip list SCENE_ID
        #     scene_id_df = pd.Series([
        #         s.replace('XXX', '{:03d}'.format(int(r)))
        #         for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])])
        #     landsat_df = landsat_df[scene_id_df.isin(
        #         ini['INPUTS']['scene_id_keep_list']).values]
        #     # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        #     # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
        #     #     ini['INPUTS']['scene_id_keep_list'])]
        # if ini['INPUTS']['scene_id_skip_list']:
        #     # Replace XXX with primary ROW value for checking skip list SCENE_ID
        #     scene_id_df = pd.Series([
        #         s.replace('XXX', '{:03d}'.format(int(r)))
        #         for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])])
        #     landsat_df = landsat_df[np.logical_not(scene_id_df.isin(
        #         ini['INPUTS']['scene_id_skip_list']).values)]
        #     # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
        #     # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
        #     #     ini['INPUTS']['scene_id_skip_list']))]


        # Compute colors for each QA value
        logging.debug('  Building column data source')
        qa_values = sorted(list(set(zone_df['QA'].values)))
        colors = {
            qa: "#%02x%02x%02x" % (int(r), int(g), int(b))
            for qa, (r, g, b, _) in zip(
                qa_values,
                255 * cm.viridis(mpl.colors.Normalize()(qa_values)))
        }
        logging.debug('  QA values: {}'.format(
            ', '.join(map(str, qa_values))))

        # Unpack the data by QA type to support interactive legends
        sources = dict()
        # sources = defaultdict(dict)
        # platform_list = ['LT04', 'LT05', 'LE07', 'LC08']
        for qa_value in qa_values:
            # for platform in platform_list:
            # qa_df = zone_df[
            #     (zone_df['PLATFORM'] == platform) &
            #     (zone_df['QA'] == qa_value)]
            qa_df = zone_df[zone_df['QA'] == qa_value]
            qa_data = {
                'INDEX': list(range(len(qa_df.index))),
                'PLATFORM': qa_df['PLATFORM'],
                'DATE': pd.to_datetime(qa_df['DATE']),
                'TIME': pd.to_datetime(qa_df['DATE']).map(
                    lambda x: x.strftime('%Y-%m-%d')),
                'DOY': qa_df['DOY'].values,
                'QA': qa_df['QA'].values,
                'COLOR': [colors[qa] for qa in qa_df['QA'].values]
            }
            for plot_var in plot_var_list:
                if plot_var in qa_df.columns.values:
                    qa_data.update({plot_var: qa_df[plot_var].values})
            sources[qa_value] = bokeh.models.ColumnDataSource(qa_data)
            # sources[qa_value][platform] = bokeh.models.ColumnDataSource(
            #     qa_data)

        tooltips = [
            ("LANDSAT", "@PLATFORM"),
            ("DATE", "@TIME"),
            ("DOY", "@DOY")]
        # hover_tool = bokeh.models.HoverTool(tooltips=tooltips)
        # tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select"
        # tools = [
        #     hover_tool,
        #     bokeh.models.WheelZoomTool(dimensions='width'),
        #     bokeh.models.PanTool(dimensions='width'),
        #     bokeh.models.BoxZoomTool(dimensions='width'),
        #     bokeh.models.ResetTool(),
        #     bokeh.models.BoxSelectTool()]

        # Selection
        hover_circle = Circle(
            fill_color='#ff0000', line_color='#ff0000')
        selected_circle = Circle(
            fill_color='COLOR', line_color='COLOR')
        nonselected_circle = Circle(
            fill_color='#aaaaaa', line_color='#aaaaaa')

        # Plot the data by DOY
        logging.debug('  Building DOY timeseries figure')
        if os.path.isfile(output_doy_path):
            os.remove(output_doy_path)
        output_file(output_doy_path, title=zone_name)

        figure_args = dict(
            plot_width=750, plot_height=250, title=None,
            tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select",
            # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap",
            active_scroll="xwheel_zoom")
        plot_args = dict(
            size=4, alpha=0.9, color='COLOR')
        if ini['SUMMARY']['max_qa'] > 0:
            plot_args['legend'] = 'QA'

        figures = []
        for plot_i, plot_var in enumerate(plot_var_list):
            if plot_i == 0:
                f = figure(
                    # x_range=bokeh.models.Range1d(1, 366, bounds=(1, 366)),
                    y_axis_label=plot_var, **figure_args)
            else:
                f = figure(
                    x_range=f.x_range, y_axis_label=plot_var, **figure_args)

            # # Add each QA level as a separate object
            # for qa, platform_sources in sorted(sources.items()):
            #     for platform, source in platform_sources.items():
            #         if platform == 'LT05':
            #             r = f.triangle(
            #                 'DOY', plot_var, source=source, **plot_args)
            #         elif platform == 'LE07':
            #             r = f.square(
            #                 'DOY', plot_var, source=source, **plot_args)
            #         elif platform == 'LC08':
            #             r = f.circle(
            #                 'DOY', plot_var, source=source, **plot_args)
            #         else:
            #             r = f.diamond(
            #                 'DOY', plot_var, source=source, **plot_args)
            #         r.hover_glyph = hover_circle
            #         r.selection_glyph = selected_circle
            #         r.nonselection_glyph = nonselected_circle
            #         r.muted_glyph = nonselected_circle
            #         hover_tool.renderers.append(r)

            # Add each QA level as a separate object
            for qa, source in sorted(sources.items()):
                r = f.circle('DOY', plot_var, source=source, **plot_args)
                r.hover_glyph = hover_circle
                r.selection_glyph = selected_circle
                r.nonselection_glyph = nonselected_circle
                r.muted_glyph = nonselected_circle

                # # DEADBEEF - This will display high QA points as muted
                # if qa > ini['SUMMARY']['max_qa']:
                #     r.muted = True
                #     # r.visible = False

            f.add_tools(bokeh.models.HoverTool(tooltips=tooltips))

            # if ini['SUMMARY']['max_qa'] > 0:
            f.legend.location = "top_left"
            f.legend.click_policy = "hide"
            # f.legend.click_policy = "mute"
            f.legend.orientation = "horizontal"

            figures.append(f)
        del f

        # Try to not allow more than 4 plots in a column
        p = gridplot(
            figures, ncols=len(plot_var_list) // 3,
            sizing_mode='stretch_both')

        if show_flag:
            show(p)
        save(p)


        # Plot the data by DATE
        logging.debug('  Building date timeseries figure')
        if os.path.isfile(output_date_path):
            os.remove(output_date_path)
        output_file(output_date_path, title=zone_name)

        figure_args = dict(
            plot_width=750, plot_height=250, title=None,
            tools="xwheel_zoom,xpan,xbox_zoom,reset,box_select",
            # tools="xwheel_zoom,xpan,xbox_zoom,reset,tap",
            active_scroll="xwheel_zoom",
            x_axis_type="datetime",)
        plot_args = dict(
            size=4, alpha=0.9, color='COLOR')
        if ini['SUMMARY']['max_qa'] > 0:
            plot_args['legend'] = 'QA'

        figures = []
        for plot_i, plot_var in enumerate(plot_var_list):
            if plot_i == 0:
                f = figure(
                    # x_range=bokeh.models.Range1d(x_limit[0], x_limit[1], bounds=x_limit),
                    y_axis_label=plot_var, **figure_args)
            else:
                f = figure(
                    x_range=f.x_range, y_axis_label=plot_var, **figure_args)

            if plot_var == 'TS':
                f.y_range.bounds = (270, None)

            # # Add each QA level as a separate object
            # for qa, platform_sources in sorted(sources.items()):
            #     for platform, source in sorted(platform_sources.items()):
            #         if platform == 'LT05':
            #             r = f.triangle(
            #                 'DATE', plot_var, source=source, **plot_args)
            #         elif platform == 'LE07':
            #             r = f.square(
            #                 'DATE', plot_var, source=source, **plot_args)
            #         elif platform == 'LC08':
            #             r = f.circle(
            #                 'DATE', plot_var, source=source, **plot_args)
            #         else:
            #             r = f.diamond(
            #                 'DATE', plot_var, source=source, **plot_args)
            #         r.hover_glyph = hover_circle
            #         r.selection_glyph = selected_circle
            #         r.nonselection_glyph = nonselected_circle
            #         r.muted_glyph = nonselected_circle
            #         hover_tool.renderers.append(r)

            # Add each QA level as a separate object
            for qa, source in sorted(sources.items()):
                r = f.circle('DATE', plot_var, source=source, **plot_args)
                r.hover_glyph = hover_circle
                r.selection_glyph = selected_circle
                r.nonselection_glyph = nonselected_circle
                r.muted_glyph = nonselected_circle

                # # DEADBEEF - This will display high QA points as muted
                # if qa > ini['SUMMARY']['max_qa']:
                #     r.muted = True
                #     # r.visible = False
            f.add_tools(bokeh.models.HoverTool(tooltips=tooltips))

            # if ini['SUMMARY']['max_qa'] > 0:
            f.legend.location = "top_left"
            f.legend.click_policy = "hide"
            # f.legend.click_policy = "mute"
            f.legend.orientation = "horizontal"

            figures.append(f)
        del f

        # Try to not allow more than 4 plots in a column
        p = gridplot(
            figures, ncols=len(plot_var_list) // 3,
            sizing_mode='stretch_both')

        if show_flag:
            show(p)
        save(p)

        # Pause after each iteration if show is True
        if show_flag:
            input('Press ENTER to continue')
def main(ini_path=None, overwrite_flag=False):
    """Generate summary thumbnails

    Parameters
    ----------
    ini_path : str
    overwrite_flag : bool, optional
        If True, overwrite existing files (the default is False).

    """
    logging.info('\nGenerate summary thumbnails')

    # Inputs (eventually move to INI file?)
    vis_args = {
        'bands': ['red', 'green', 'blue'],
        # 'bands': ['swir1', 'nir', 'red'],
        'min': [0.01, 0.01, 0.01],
        'max': [0.4, 0.4, 0.4],
        'gamma': [1.8, 1.8, 1.8]
    }

    # Buffer zone polygon
    zone_buffer = 240

    # Generate images by DOY
    doy_flag = True
    # Generate images by date
    date_flag = True

    # Read config file
    ini = inputs.read(ini_path)
    inputs.parse_section(ini, section='INPUTS')
    inputs.parse_section(ini, section='SPATIAL')
    inputs.parse_section(ini, section='SUMMARY')

    year_list = range(ini['INPUTS']['start_year'],
                      ini['INPUTS']['end_year'] + 1)
    month_list = list(
        utils.wrapped_range(ini['INPUTS']['start_month'],
                            ini['INPUTS']['end_month'], 1, 12))
    doy_list = list(
        utils.wrapped_range(ini['INPUTS']['start_doy'],
                            ini['INPUTS']['end_doy'], 1, 366))

    # Add merged row XXX to keep list
    ini['INPUTS']['row_keep_list'].append('XXX')

    # Get ee features from shapefile
    zone_geom_list = gdc.shapefile_2_geom_list_func(
        ini['INPUTS']['zone_shp_path'],
        zone_field=ini['INPUTS']['zone_field'],
        reverse_flag=False)

    # Filter features by FID before merging geometries
    if ini['INPUTS']['fid_keep_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] in ini['INPUTS']['fid_keep_list']
        ]
    if ini['INPUTS']['fid_skip_list']:
        zone_geom_list = [
            zone_obj for zone_obj in zone_geom_list
            if zone_obj[0] not in ini['INPUTS']['fid_skip_list']
        ]

    # Need zone_shp_path projection to build EE geometries
    zone = {}
    zone['osr'] = gdc.feature_path_osr(ini['INPUTS']['zone_shp_path'])
    zone['proj'] = gdc.osr_wkt(zone['osr'])
    # zone['proj'] = ee.Projection(zone['proj']).wkt().getInfo()
    # zone['proj'] = zone['proj'].replace('\n', '').replace(' ', '')
    # logging.debug('  Zone Projection: {}'.format(zone['proj']))

    # Initialize Earth Engine API key
    logging.debug('')
    ee.Initialize()

    coll_dict = {
        'LT04': 'LANDSAT/LT04/C01/T1_SR',
        'LT05': 'LANDSAT/LT05/C01/T1_SR',
        'LE07': 'LANDSAT/LE07/C01/T1_SR',
        'LC08': 'LANDSAT/LC08/C01/T1_SR'
    }

    logging.info('\nProcessing zones')
    for zone_fid, zone_name, zone_json in zone_geom_list:
        zone['fid'] = zone_fid
        zone['name'] = zone_name.replace(' ', '_')
        zone['json'] = zone_json
        logging.info('ZONE: {} (FID: {})'.format(zone['name'], zone['fid']))

        # Build EE geometry object for zonal stats
        zone['geom'] = ee.Geometry(geo_json=zone['json'],
                                   opt_proj=zone['proj'],
                                   opt_geodesic=False)
        # logging.debug('  Centroid: {}'.format(
        #     zone['geom'].centroid(100).getInfo()['coordinates']))

        # Use feature geometry to build extent, transform, and shape
        zone['extent'] = gdc.Extent(
            ogr.CreateGeometryFromJson(json.dumps(zone['json'])).GetEnvelope())
        # zone['extent'] = gdc.Extent(zone['geom'].GetEnvelope())
        zone['extent'] = zone['extent'].ogrenv_swap()
        zone['extent'] = zone['extent'].buffer(zone_buffer)
        zone['extent'] = zone['extent'].adjust_to_snap(
            'EXPAND', ini['SPATIAL']['snap_x'], ini['SPATIAL']['snap_y'],
            ini['SPATIAL']['cellsize'])
        zone['geo'] = zone['extent'].geo(ini['SPATIAL']['cellsize'])
        zone['transform'] = gdc.geo_2_ee_transform(zone['geo'])
        # zone['transform'] = '[' + ','.join(map(str, zone['transform'])) + ']'
        zone['shape'] = zone['extent'].shape(ini['SPATIAL']['cellsize'])
        logging.debug('  Zone Shape: {}'.format(zone['shape']))
        logging.debug('  Zone Transform: {}'.format(zone['transform']))
        logging.debug('  Zone Extent: {}'.format(zone['extent']))
        # logging.debug('  Zone Geom: {}'.format(zone['geom'].getInfo()))

        # Build an EE geometry of the extent
        extent_geom = ee.Geometry.Rectangle(coords=list(zone['extent']),
                                            proj=zone['proj'],
                                            geodesic=False)

        if 'SUMMARY' in ini.keys():
            zone_output_ws = os.path.join(ini['SUMMARY']['output_ws'],
                                          zone['name'])
        elif 'EXPORT' in ini.keys():
            zone_output_ws = os.path.join(ini['EXPORT']['output_ws'],
                                          zone['name'])
        else:
            logging.error(
                'INI file does not contain a SUMMARY or EXPORT section')
            sys.exit()
        if not os.path.isdir(zone_output_ws):
            logging.debug(
                'Folder {} does not exist, skipping'.format(zone_output_ws))
            continue

        landsat_daily_path = os.path.join(
            zone_output_ws, '{}_landsat_daily.csv'.format(zone['name']))
        if not os.path.isfile(landsat_daily_path):
            logging.error('  Landsat daily CSV does not exist, skipping zone')
            continue

        output_doy_ws = os.path.join(zone_output_ws, 'thumbnails_doy')
        output_date_ws = os.path.join(zone_output_ws, 'thumbnails_date')
        if overwrite_flag and os.path.isdir(output_doy_ws):
            for file_name in os.listdir(output_doy_ws):
                os.remove(os.path.join(output_doy_ws, file_name))
        if overwrite_flag and os.path.isdir(output_date_ws):
            for file_name in os.listdir(output_date_ws):
                os.remove(os.path.join(output_date_ws, file_name))
        if doy_flag and not os.path.isdir(output_doy_ws):
            os.makedirs(output_doy_ws)
        if date_flag and not os.path.isdir(output_date_ws):
            os.makedirs(output_date_ws)

        logging.debug('  Reading Landsat CSV')
        landsat_df = pd.read_csv(landsat_daily_path)
        # landsat_df = pd.read_csv(
        #     landsat_daily_path, parse_dates=['DATE'], index_col='DATE')
        landsat_df = landsat_df[landsat_df['PIXEL_COUNT'] > 0]

        # Common summary filtering
        logging.debug('  Filtering using INI SUMMARY parameters')
        if year_list:
            landsat_df = landsat_df[landsat_df['YEAR'].isin(year_list)]
        if month_list:
            landsat_df = landsat_df[landsat_df['MONTH'].isin(month_list)]
        if doy_list:
            landsat_df = landsat_df[landsat_df['DOY'].isin(doy_list)]

        if ini['INPUTS']['path_keep_list']:
            landsat_df = landsat_df[landsat_df['PATH'].isin(
                ini['INPUTS']['path_keep_list'])]
        if (ini['INPUTS']['row_keep_list']
                and ini['INPUTS']['row_keep_list'] != ['XXX']):
            landsat_df = landsat_df[landsat_df['ROW'].isin(
                ini['INPUTS']['row_keep_list'])]

        # Assume the default is for these to be True and only filter if False
        if not ini['INPUTS']['landsat4_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT04']
        if not ini['INPUTS']['landsat5_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LT05']
        if not ini['INPUTS']['landsat7_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LE07']
        if not ini['INPUTS']['landsat8_flag']:
            landsat_df = landsat_df[landsat_df['PLATFORM'] != 'LC08']

        if ini['INPUTS']['scene_id_keep_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[scene_id_df.isin(
                ini['INPUTS']['scene_id_keep_list']).values]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_keep_list'])]
        if ini['INPUTS']['scene_id_skip_list']:
            # Replace XXX with primary ROW value for checking skip list SCENE_ID
            scene_id_df = pd.Series([
                s.replace('XXX', '{:03d}'.format(int(r)))
                for s, r in zip(landsat_df['SCENE_ID'], landsat_df['ROW'])
            ])
            landsat_df = landsat_df[np.logical_not(
                scene_id_df.isin(ini['INPUTS']['scene_id_skip_list']).values)]
            # This won't work: SCENE_ID have XXX but scene_id_skip_list don't
            # landsat_df = landsat_df[np.logical_not(landsat_df['SCENE_ID'].isin(
            #     ini['INPUTS']['scene_id_skip_list']))]

        # Filter by QA/QC value
        if ini['SUMMARY']['max_qa'] >= 0 and not landsat_df.empty:
            logging.debug('    Maximum QA: {0}'.format(
                ini['SUMMARY']['max_qa']))
            landsat_df = landsat_df[
                landsat_df['QA'] <= ini['SUMMARY']['max_qa']]

        # Filter by average cloud score
        if ini['SUMMARY']['max_cloud_score'] < 100 and not landsat_df.empty:
            logging.debug('    Maximum cloud score: {0}'.format(
                ini['SUMMARY']['max_cloud_score']))
            landsat_df = landsat_df[
                landsat_df['CLOUD_SCORE'] <= ini['SUMMARY']['max_cloud_score']]

        # Filter by Fmask percentage
        if ini['SUMMARY']['max_fmask_pct'] < 100 and not landsat_df.empty:
            landsat_df['FMASK_PCT'] = 100 * (landsat_df['FMASK_COUNT'] /
                                             landsat_df['FMASK_TOTAL'])
            logging.debug('    Max Fmask threshold: {}'.format(
                ini['SUMMARY']['max_fmask_pct']))
            landsat_df = landsat_df[
                landsat_df['FMASK_PCT'] <= ini['SUMMARY']['max_fmask_pct']]

        # Filter low count SLC-off images
        if ini['SUMMARY']['min_slc_off_pct'] > 0 and not landsat_df.empty:
            logging.debug('    Mininum SLC-off threshold: {}%'.format(
                ini['SUMMARY']['min_slc_off_pct']))
            # logging.debug('    Maximum pixel count: {}'.format(
            #     max_pixel_count))
            slc_off_mask = ((landsat_df['PLATFORM'] == 'LE07') &
                            ((landsat_df['YEAR'] >= 2004) |
                             ((landsat_df['YEAR'] == 2003) &
                              (landsat_df['DOY'] > 151))))
            slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] /
                                 landsat_df['PIXEL_TOTAL'])
            # slc_off_pct = 100 * (landsat_df['PIXEL_COUNT'] / max_pixel_count)
            landsat_df = landsat_df[(
                (slc_off_pct >= ini['SUMMARY']['min_slc_off_pct'])
                & slc_off_mask) | (~slc_off_mask)]

        if landsat_df.empty:
            logging.error(
                '  Empty Landsat dataframe after filtering, skipping zone')
            continue

        logging.debug('  Downloading thumbnails')
        for landsat, start_date in zip(landsat_df['PLATFORM'],
                                       landsat_df['DATE']):
            start_dt = datetime.datetime.strptime(start_date, '%Y-%m-%d')
            end_dt = start_dt + datetime.timedelta(days=1)
            end_date = end_dt.strftime('%Y-%m-%d')

            output_doy_path = os.path.join(
                output_doy_ws,
                '{}_{}.png'.format(start_dt.strftime('%j_%Y-%m-%d'), landsat))
            output_date_path = os.path.join(
                output_date_ws,
                '{}_{}.png'.format(start_dt.strftime('%Y-%m-%d_%j'), landsat))

            # DEADBEEF - This seems like a poor approach
            save_doy_flag = False
            save_date_flag = False
            if doy_flag and not os.path.isfile(output_doy_path):
                save_doy_flag = True
            if date_flag and not os.path.isfile(output_date_path):
                save_date_flag = True
            if not save_doy_flag and not save_date_flag:
                logging.debug(
                    '  {} - file already exists, skipping'.format(start_date))
                continue
            logging.debug('  {}'.format(start_date))
            # logging.debug('  {}'.format(output_path))

            if landsat in ['LT04', 'LT05', 'LE07']:
                ee_coll = ee.ImageCollection(coll_dict[landsat]).select(
                    ['B1', 'B2', 'B3', 'B4', 'B5', 'B7'],
                    ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])
            elif landsat in ['LC08']:
                ee_coll = ee.ImageCollection(coll_dict[landsat]).select(
                    ['B2', 'B3', 'B4', 'B5', 'B6', 'B7'],
                    ['blue', 'green', 'red', 'nir', 'swir1', 'swir2'])
            ee_coll = ee_coll.filterDate(start_date, end_date)
            ee_image = ee.Image(ee_coll.median().divide(10000)) \
                .visualize(**vis_args) \
                .reproject(crs=zone['proj'], crsTransform=zone['transform']) \
                .paint(zone['geom'], color=0.5, width=1) \
                .clip(extent_geom)

            # Get the image thumbnail
            for i in range(10):
                try:
                    output_url = ee_image.getThumbUrl({'format': 'png'})
                    break
                except Exception as e:
                    logging.error('  Exception: {}, retry {}'.format(e, i))
                    logging.debug('{}'.format(e))
                    sleep(i**2)

            for i in range(10):
                try:
                    # DEADBEEF - This seems like a poor approach
                    if save_doy_flag and save_date_flag:
                        urllib.urlretrieve(output_url, output_doy_path)
                        shutil.copy(output_doy_path, output_date_path)
                    elif save_doy_flag:
                        urllib.urlretrieve(output_url, output_doy_path)
                    elif save_date_flag:
                        urllib.urlretrieve(output_url, output_date_path)
                    break
                except Exception as e:
                    logging.error('  Exception: {}, retry {}'.format(e, i))
                    logging.debug('{}'.format(e))
                    sleep(i**2)