def find_updated_data(self, raster_url_list): updated_raster_url_list = [] config_sheet_datetime_text = gs.get_value('tech_title', self.name, 'last_updated', self.gfw_env) config_sheet_datetime = datetime.datetime.strptime( config_sheet_datetime_text, '%m/%d/%Y') # order is important here-- key names are the same + don't want to overwrite proper timestamps if self.name == 'umd_landsat_alerts': bucket = 'gfw-gee-glad-export' else: bucket = 'terra-i' bucket_timestamps = {} output_dict = aws.get_timestamps(bucket) # add this to our current dict bucket_timestamps.update(output_dict) for raster_url in raster_url_list: raster_name = urlparse.urlparse(raster_url).path.replace('/', '') raster_timestamp = bucket_timestamps[raster_name] if raster_timestamp > config_sheet_datetime: updated_raster_url_list.append(raster_url) return updated_raster_url_list
def check_current_version(self, wdpa_gdb): """ Check the filename of the zip we've just downloaded against what we currently have in the metadata doc If we're up to date, exit this workflow, logging that we've 'checked' the dataset Otherwise continue to process :param wdpa_gdb: the unzipped gdb just downloaded from wdpa :return: """ # Parameters required to check the metadata response spreadsheet unique_col = 'Technical Title' unique_val = 'wdpa_protected_areas' update_col = 'Frequency of Updates' sheet_name = 'Form Responses 1' gs_key = r'1hJ48cMrADMEJ67L5hTQbT5hhV20YCJHpN1NwjXiC3pI' current_version_text = gs.get_value(unique_col, unique_val, update_col, sheet_name, gs_key) gdb_name = os.path.splitext(os.path.basename(wdpa_gdb))[0] download_version = gdb_name.replace('WDPA_', '').replace('_Public', '') download_month, download_year = self.parse_month_abbrev( download_version) # Format to match what's in the metadata download_version_text = 'Monthly. Current version: {0}, {1}.'.format( download_month, download_year) # Versions match; no need to update if current_version_text == download_version_text: logging.info('No new data on the wdpa site') # Important for the script that reads the log file and sends an email # Including this 'Checked' message will show that we checked the layer but it didn't need updating logging.critical('Checked | {0}'.format(self.name)) sys.exit(0) # Update the value in the metadata table and continue processing the dataset else: logging.debug( 'Current WDPA version text is {0}, downloaded version is {1} Updating ' 'dataset now.'.format(current_version_text, download_version_text)) gs.set_value(unique_col, unique_val, update_col, sheet_name, download_version_text, gs_key) # update the citation as well citation_str = r'IUCN and UNEP-WCMC ({y}), The World Database on Protected Areas (WDPA) [On-line], ' \ r'{m}, {y}, Cambridge, UK: UNEP-WCMC. Available at: www.protectedplanet.net. Accessed ' \ r'through Global Forest Watch in [insert month/year]. ' \ r'www.globalforestwatch.org'.format(m=download_month, y=download_year) gs.set_value(unique_col, unique_val, 'Citation', sheet_name, citation_str, gs_key)
def check_current_version(self, wdpa_gdb): """ Check the filename of the zip we've just downloaded against what we currently have in the metadata doc If we're up to date, exit this workflow, logging that we've 'checked' the dataset Otherwise continue to process :param wdpa_gdb: the unzipped gdb just downloaded from wdpa :return: """ # Parameters required to check the metadata response spreadsheet unique_col = "Technical Title" unique_val = "wdpa_protected_areas" update_col = "Frequency of Updates" sheet_name = "sheet1" gs_key = r"1hJ48cMrADMEJ67L5hTQbT5hhV20YCJHpN1NwjXiC3pI" current_version_text = gs.get_value(unique_col, unique_val, update_col, sheet_name, gs_key) gdb_name = os.path.splitext(os.path.basename(wdpa_gdb))[0] download_version = gdb_name.replace("WDPA_", "").replace("_Public", "") download_version = self.parse_month_abbrev(download_version) # Format to match what's in the metadata download_version_text = "Monthly. Current version: {0}.".format(download_version) # Versions match; no need to update if current_version_text == download_version_text: logging.info("No new data on the wdpa site") # Important for the script that reads the log file and sends an email # Including this 'Checked' message will show that we checked the layer but it didn't need updating logging.critical("Checked | {0}".format(self.name)) sys.exit(0) # Update the value in the metadata table and continue processing the dataset else: logging.debug( "Current WDPA version text is {0}, downloaded version is {1} Updating " "dataset now.".format(current_version_text, download_version_text) )
def find_updated_data(self, raster_url_list): updated_raster_url_list = [] config_sheet_datetime_text = gs.get_value('tech_title', self.name, 'last_updated', self.gfw_env) config_sheet_datetime = datetime.datetime.strptime(config_sheet_datetime_text, '%m/%d/%Y') first_url = raster_url_list[0] netloc = urlparse.urlparse(first_url).netloc bucket = netloc.split('.')[0] bucket_timestamps = aws.get_timestamps(bucket) for raster_url in raster_url_list: raster_name = urlparse.urlparse(raster_url).path.replace('/', '') raster_timestamp = bucket_timestamps[raster_name] if raster_timestamp > config_sheet_datetime: updated_raster_url_list.append(raster_url) return updated_raster_url_list