def archive_log_files(order_id, product_id): """Archive the log files for the current job """ logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) try: # Determine the destination path for the logs output_dir = Environment().get_distribution_directory() destination_path = os.path.join(output_dir, 'logs', order_id) # Create the path utilities.create_directory(destination_path) # Job log file logfile_path = EspaLogging.get_filename(settings.PROCESSING_LOGGER) full_logfile_path = os.path.abspath(logfile_path) log_name = os.path.basename(full_logfile_path) # Determine full destination destination_file = os.path.join(destination_path, log_name) # Copy it shutil.copyfile(full_logfile_path, destination_file) # Mapper log file full_logfile_path = os.path.abspath(MAPPER_LOG_FILENAME) final_log_name = '-'.join([MAPPER_LOG_PREFIX, order_id, product_id]) final_log_name = '.'.join([final_log_name, 'log']) # Determine full destination destination_file = os.path.join(destination_path, final_log_name) # Copy it shutil.copyfile(full_logfile_path, destination_file) except Exception: # We don't care because we are at the end of processing # And if we are on the successful path, we don't care either logger.exception('Exception encountered and follows')
def main(): """Some parameter and logging setup, then call the process routine """ # Create a command line argument parser description = 'Main mapper for a request' parser = ArgumentParser(description=description) # Add our only options to determine if we are a developer or not parser.add_argument('--developer', action='store_true', dest='developer', default=False, help='use a developer mode for sleeping') # Parse the command line arguments args = parser.parse_args() proc_cfg = retrieve_cfg(PROC_CFG_FILENAME) EspaLogging.configure_base_logger(filename=MAPPER_LOG_FILENAME) # Initially set to the base logger logger = EspaLogging.get_logger('base') try: # Joe-Developer doesn't want to wait so if set skip sleeping developer_sleep_mode = args.developer process(proc_cfg, developer_sleep_mode) except Exception: logger.exception('Processing failed stacktrace follows')
def set_product_error(server, order_id, product_id, processing_location): """Call the API server routine to set a product request to error Provides a sleep retry implementation to hopefully by-pass any errors encountered, so that we do not get requests that have failed, but show a status of processing. """ if server is not None: logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) attempt = 0 sleep_seconds = settings.DEFAULT_SLEEP_SECONDS while True: try: logger.info('Product ID is [{}]'.format(product_id)) logger.info('Order ID is [{}]'.format(order_id)) logger.info('Processing Location is [{}]' .format(processing_location)) logged_contents = \ EspaLogging.read_logger_file(settings.PROCESSING_LOGGER) status = server.set_scene_error(product_id, order_id, processing_location, logged_contents) if not status: logger.critical('Failed processing API call to' ' set_scene_error') return False break except Exception: logger.critical('Failed processing API call to' ' set_scene_error') logger.exception('Exception encountered and follows') if attempt < settings.MAX_SET_SCENE_ERROR_ATTEMPTS: sleep(sleep_seconds) # sleep before trying again attempt += 1 sleep_seconds = int(sleep_seconds * 1.5) continue else: return False return True
def untar_data(source_file, destination_directory): ''' Description: Using tar extract the file contents into a destination directory. Notes: Works with '*.tar.gz' and '*.tar' files. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # If both source and destination are localhost we can just copy the data cmd = ' '.join(['tar', '--directory', destination_directory, '-xvf', source_file]) logger.info("Unpacking [%s] to [%s]" % (source_file, destination_directory)) # Unpack the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.exception("Failed to unpack data") raise finally: if len(output) > 0: logger.info(output)
def change_ownership(product_path, user, group, recursive=False): """ Change the ownership of a product Args: product_path: The full path to a file or folder whose ownership will be updated user: The new owner user group: The new owner group recursive: Whether or not to apply chown recursively Returns: None """ try: logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) except Exception: logger = get_base_logger() ownership = '{u}:{g}'.format(u=user, g=group) if recursive: cmd = ' '.join(['chown', '-R', ownership, product_path]) else: cmd = ' '.join(['chown', ownership, product_path]) output = execute_cmd(cmd) if len(output) > 0: logger.info(output)
def untar_data(source_file, destination_directory): ''' Description: Using tar extract the file contents into a destination directory. Notes: Works with '*.tar.gz' and '*.tar' files. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Look up the configured level of multithreading num_threads_str = retrieve_pigz_cfg(PROC_CFG_FILENAME) # If both source and destination are localhost we can just copy the data cmd = ' '.join([ 'unpigz -p ', num_threads_str, ' < ', source_file, ' | tar', '--directory', destination_directory, ' -xv' ]) logger.info("Unpacking [%s] to [%s]" % (source_file, destination_directory)) # Unpack the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.exception("Failed to unpack data") raise finally: if len(output) > 0: logger.info(output)
def remote_copy_file_to_file(source_host, source_file, destination_file): ''' Description: Use unix 'cp' to copy a file from one place to another on a remote machine using ssh. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) cmd = ' '.join([ 'ssh', '-q', '-o', 'StrictHostKeyChecking=no', source_host, 'cp', source_file, destination_file ]) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.error("Failed to copy file") raise finally: if len(output) > 0: logger.info(output) logger.info("Transfer complete - SSH-CP")
def get_sleep_duration(cfg, start_time, dont_sleep, key='espa_min_request_duration_in_seconds'): """Logs details and returns number of seconds to sleep """ try: logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) except Exception: logger = get_base_logger() # Determine if we need to sleep end_time = datetime.datetime.now() seconds_elapsed = (end_time - start_time).seconds logger.info('Processing Time Elapsed {0} Seconds'.format(seconds_elapsed)) min_seconds = int((cfg.get(key))) seconds_to_sleep = 1 if dont_sleep: # We don't need to sleep seconds_to_sleep = 1 elif seconds_elapsed < min_seconds: seconds_to_sleep = (min_seconds - seconds_elapsed) logger.info('Sleeping An Additional {0} Seconds'.format(seconds_to_sleep)) return seconds_to_sleep
def archive_log_files(args, proc_cfg, proc_status): """Archive the log files for the current execution Args: args <args>: Command line arguments proc_cfg <ConfigParser>: Configuration proc_status <bool>: True = Success, False = Error """ base_log = cli_log_filename(args) proc_log = EspaLogging.get_filename(settings.PROCESSING_LOGGER) dist_path = proc_cfg.get('processing', 'espa_log_archive') destination_path = os.path.join(dist_path, args.order_id) # Create the archive path util.create_directory(destination_path) # Copy them copy_log_file(base_log, destination_path, proc_status) copy_log_file(proc_log, destination_path, proc_status) # Remove the source versions if os.path.exists(base_log): os.unlink(base_log) if os.path.exists(proc_log): os.unlink(proc_log)
def transfer_file(source_host, source_file, destination_host, destination_file, source_username=None, source_pw=None, destination_username=None, destination_pw=None): ''' Description: Using cp/FTP/SCP transfer a file from a source location to a destination location. Notes: We are not doing anything significant here other then some logic and fallback to SCP if FTP fails. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) logger.info("Transfering [%s:%s] to [%s:%s]" % (source_host, source_file, destination_host, destination_file)) # If both source and destination are localhost we can just copy the data if source_host == 'localhost' and destination_host == 'localhost': shutil.copyfile(source_file, destination_file) return # If both source and destination hosts are the same, we can use ssh to copy # the files locally on the remote host if source_host == destination_host: remote_copy_file_to_file(source_host, source_file, destination_file) return # Try FTP first before SCP if usernames and passwords are provided if source_username is not None and source_pw is not None: try: ftp_from_remote_location(source_username, source_pw, source_host, source_file, destination_file) return except Exception as excep: logger.warning("FTP failures will attempt transfer using SCP") logger.warning("FTP Errors: %s" % str(excep)) elif destination_username is not None and destination_pw is not None: try: ftp_to_remote_location(destination_username, destination_pw, source_file, destination_host, destination_file) return except Exception as excep: logger.warning("FTP failures will attempt transfer using SCP") logger.warning("FTP Errors: %s" % str(excep)) # As a last resort try SCP scp_transfer_file(source_host, source_file, destination_host, destination_file)
def scp_transfer_file(source_host, source_file, destination_host, destination_file): ''' Description: Using SCP transfer a file from a source location to a destination location. Note: - It is assumed ssh has been setup for access between the localhost and destination system - If wild cards are to be used with the source, then the destination file must be a directory. ***No checking is performed in this code*** ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) if source_host == destination_host: msg = "source and destination host match unable to scp" logger.error(msg) raise Exception(msg) cmd = ['scp', '-q', '-o', 'StrictHostKeyChecking=no', '-C'] # Build the source portion of the command # Single quote the source to allow for wild cards if source_host == 'localhost': cmd.append(source_file) else: cmd.append("'%s:%s'" % (source_host, source_file)) # Build the destination portion of the command if destination_host == 'localhost': cmd.append(destination_file) else: cmd.append('%s:%s' % (destination_host, destination_file)) cmd = ' '.join(cmd) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: if len(output) > 0: logger.info(output) logger.error("Failed to transfer data") raise logger.info("Transfer complete - SCP")
def http_transfer_file(download_url, destination_file): """ Description: Using http transfer a file from a source location to a destination file on the localhost. """ logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) logger.info(download_url) session = requests.Session() session.mount('http://', HTTPAdapter(max_retries=1)) session.mount('https://', HTTPAdapter(max_retries=1)) req = None try: # Use .netrc credentials by default since no auth= specified # we'll use this method for now to get through to the lp daac req = session.get(url=download_url, timeout=300.0) if not req.ok: logger.error("Transfer Failed - HTTP") req.raise_for_status() with open(destination_file, 'wb') as local_fd: local_fd.write(req.content) except Exception: logger.exception("Transfer Issue - HTTP - {0}".format(download_url)) msg = "Connection timed out" # Sleep randomly from 1 to 10 minutes before raising the exception sleep_seconds = int(random.random() * 540) + 60 logger.debug( 'Transfer Issue - Sleeping for {} seconds'.format(sleep_seconds)) sleep(sleep_seconds) raise Exception(msg) finally: if req is not None: req.close() logger.info("Transfer Complete - HTTP")
def ftp_from_remote_location(username, pword, host, remotefile, localfile): ''' Description: Transfers files from a remote location to the local machine using ftplib. Parameters: username = Username for ftp account pword = Password for ftp account host = The ftp server host remotefile = The file to transfer localfile = Full path to where the local file should be created. (Parent directories must exist) Returns: None Errors: Raises Exception() in the event of error ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Make sure the src_file is absolute, otherwise ftp will choke if not remotefile.startswith('/'): remotefile = ''.join(['/', remotefile]) password = urllib2.unquote(pword) url = 'ftp://%s/%s' % (host, remotefile) logger.info("Transferring file from %s to %s" % (url, localfile)) ftp = None try: with open(localfile, 'wb') as loc_file: def callback(data): loc_file.write(data) ftp = ftplib.FTP(host, timeout=60) ftp.login(user=username, passwd=password) ftp.set_debuglevel(0) ftp.retrbinary(' '.join(['RETR', remotefile]), callback) finally: if ftp: ftp.quit() logger.info("Transfer complete - FTP")
def sentinel2_sensor_info(product_id): """Determine information from Product ID Example ID: S2A_MSI_L1C_T16TDS_20190723_20190723 Note: - This assumes the ESPA formatted Sentinel-2 naming convention """ logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) (sensor_code, sensor, proc_level, tile, date_acq, date_proc) = product_id.split('_') date_acquired = datetime.datetime.strptime(date_acq, '%Y%m%d').date() # Determine the product prefix product_prefix = ('{sc}{s}{p}{t:>06}{d:>08}'.format(sc=sensor_code, s=sensor, p=proc_level, t=tile, d=date_acq)) # Determine the default pixel sizes meters = DEFAULT_PIXEL_SIZE['meters'][sensor_code] dd = DEFAULT_PIXEL_SIZE['dd'][sensor_code] default_pixel_size = {'meters': meters, 'dd': dd} # Sensor string is used in plotting sensor_name = None if is_sentinel2_l1_old(product_id): sensor_name = 'SENTINEL-2A' elif is_sentinel2_l1_new(product_id): sensor_name = 'SENTINEL-2B' return SensorInfo(product_prefix=product_prefix, date_acquired=date_acquired, sensor_name=sensor_name, default_pixel_size=default_pixel_size, horizontal=0, vertical=0, path=0, row=0, tile=tile)
def ftp_to_remote_location(username, pword, localfile, host, remotefile): ''' Description: Transfers files from the local machine to a remote location using ftplib. Parameters: username = Username for ftp account pword = Password for ftp account host = The ftp server host remotefile = Full path of where to store the file (Directories must exist) localfile = Full path of file to transfer out Returns: None Errors: Raises Exception() in the event of error ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Make sure the src_file is absolute, otherwise ftp will choke if not remotefile.startswith('/'): remotefile = ''.join(['/', remotefile]) password = urllib2.unquote(pword) logger.info("Transferring file from %s to %s" % (localfile, 'ftp://%s/%s' % (host, remotefile))) ftp = None try: ftp = ftplib.FTP(host, user=username, passwd=password, timeout=60) with open(localfile, 'rb') as tmp_fd: ftp.storbinary(' '.join(['STOR', remotefile]), tmp_fd, 1024) finally: if ftp: ftp.quit() logger.info("Transfer complete - FTP")
def get_filename(work_dir, product_id): """Retrieve the Landsat metadata filename to use The file may have issues, so call the fix function to remove those issues. """ logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) filename = '' # Save the current directory and change to the work directory current_directory = os.getcwd() os.chdir(work_dir) try: for meta_file in glob.glob('{0}_MTL.*'.format(product_id)): if ('old' not in meta_file and not meta_file.startswith('lnd')): # Save the filename and break out of the directory loop filename = meta_file break if filename == '': raise ESPAException('Unable to locate the MTL file in [{0}]' .format(work_dir)) logger.info('Located MTL file: [{0}]'.format(filename)) filename = fix_file(filename) logger.info('Using MTL file: [{0}]'.format(filename)) finally: # Change back to the original directory os.chdir(current_directory) return filename
def get_filename(work_dir, product_id): """Retrieve the Landsat metadata filename to use The file may have issues, so call the fix function to remove those issues. """ logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) filename = '' # Save the current directory and change to the work directory current_directory = os.getcwd() os.chdir(work_dir) try: for meta_file in glob.glob('{0}_MTL.*'.format(product_id)): if ('old' not in meta_file and not meta_file.startswith('lnd')): # Save the filename and break out of the directory loop filename = meta_file break if filename == '': raise ESPAException( 'Unable to locate the MTL file in [{0}]'.format(work_dir)) logger.info('Located MTL file: [{0}]'.format(filename)) filename = fix_file(filename) logger.info('Using MTL file: [{0}]'.format(filename)) finally: # Change back to the original directory os.chdir(current_directory) return filename
def copy_files_to_directory(source_files, destination_directory): ''' Description: Use unix 'cp' to copy files from one place to another on the localhost. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) if isinstance(source_files, list): for source_file in source_files: cmd = ' '.join(['cp', source_file, destination_directory]) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.error("Failed to copy file") raise finally: if len(output) > 0: logger.info(output) logger.info("Transfer complete - CP")
def remote_copy_file_to_file(source_host, source_file, destination_file): ''' Description: Use unix 'cp' to copy a file from one place to another on a remote machine using ssh. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', source_host, 'cp', source_file, destination_file]) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.error("Failed to copy file") raise finally: if len(output) > 0: logger.info(output) logger.info("Transfer complete - SSH-CP")
def move_files_to_directory(source_files, destination_directory): ''' Description: Move files from one place to another on the localhost. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) if isinstance(source_files, str): filename = os.path.basename(source_files) new_name = os.path.join(destination_directory, filename) os.rename(source_files, new_name) elif isinstance(source_files, list): for source_file in source_files: filename = os.path.basename(source_file) new_name = os.path.join(destination_directory, filename) os.rename(source_file, new_name) logger.info("Transfer complete - MOVE")
def distribute_product_local(immutability, product_name, source_path, packaging_path): logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Deliver the product files # Attempt X times sleeping between each attempt sleep_seconds = settings.DEFAULT_SLEEP_SECONDS max_number_of_attempts = settings.MAX_DISTRIBUTION_ATTEMPTS max_package_attempts = settings.MAX_PACKAGING_ATTEMPTS attempt = 0 product_file = 'ERROR' cksum_file = 'ERROR' while True: try: # Package the product files to the online cache location # Attempt X times sleeping between each sub_attempt sub_attempt = 0 while True: try: (product_file, cksum_file, local_cksum_value) = package_product(immutability, source_path, packaging_path, product_name) # Change the attributes on the files so that we can't # remove them if immutability: cmd = ' '.join(['sudo', 'chattr', '+i', product_file, cksum_file]) output = utilities.execute_cmd(cmd) if len(output) > 0: logger.info(output) except Exception: logger.exception("An exception occurred processing %s" % product_name) if sub_attempt < max_package_attempts: sleep(sleep_seconds) # sleep before trying again sub_attempt += 1 continue else: raise break # Always log where we placed the files logger.info("Delivered product to location %s" " and checksum location %s" % (product_file, cksum_file)) except Exception: if attempt < max_number_of_attempts: sleep(sleep_seconds) # sleep before trying again attempt += 1 # adjust for next set sleep_seconds = int(sleep_seconds * 1.5) continue else: raise break return (product_file, cksum_file)
def distribute_product_remote(immutability, product_name, source_path, packaging_path, cache_path, parms): logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) opts = parms['options'] env = Environment() # Determine the remote hostname to use destination_host = utilities.get_cache_hostname(env.get_cache_host_list()) # Deliver the product files # Attempt X times sleeping between each attempt sleep_seconds = settings.DEFAULT_SLEEP_SECONDS max_number_of_attempts = settings.MAX_DISTRIBUTION_ATTEMPTS max_package_attempts = settings.MAX_PACKAGING_ATTEMPTS max_delivery_attempts = settings.MAX_DELIVERY_ATTEMPTS attempt = 0 product_file = 'ERROR' cksum_file = 'ERROR' while True: try: # Package the product files # Attempt X times sleeping between each sub_attempt sub_attempt = 0 while True: try: (product_full_path, cksum_full_path, local_cksum_value) = package_product(immutability, source_path, packaging_path, product_name) except Exception: logger.exception("An exception occurred processing %s" % product_name) if sub_attempt < max_package_attempts: sleep(sleep_seconds) # sleep before trying again sub_attempt += 1 continue else: raise break # Distribute the product # Attempt X times sleeping between each sub_attempt sub_attempt = 0 while True: try: (remote_cksum_value, product_file, cksum_file) = \ transfer_product(immutability, destination_host, cache_path, opts['destination_username'], opts['destination_pw'], product_full_path, cksum_full_path) except Exception: logger.exception("An exception occurred processing %s" % product_name) if sub_attempt < max_delivery_attempts: sleep(sleep_seconds) # sleep before trying again sub_attempt += 1 continue else: raise break # Checksum validation if local_cksum_value.split()[0] != remote_cksum_value.split()[0]: raise ESPAException("Failed checksum validation between" " %s and %s:%s" % (product_full_path, destination_host, product_file)) # Always log where we placed the files logger.info("Delivered product to %s at location %s" " and cksum location %s" % (destination_host, product_file, cksum_file)) except Exception: if attempt < max_number_of_attempts: sleep(sleep_seconds) # sleep before trying again attempt += 1 # adjust for next set sleep_seconds = int(sleep_seconds * 1.5) continue else: raise break return (product_file, cksum_file)
def distribute_statistics_local(immutability, product_id, source_path, destination_path): ''' Description: Copies the statistics to the specified directory on the local system Parameters: product_id - The unique product ID associated with the files. source_path - The full path to where the statistics files to distribute reside. destination_path - The full path on the local system to copy the statistics files into. Note: - It is assumed a stats directory exists under the source_path - A stats directory will be created under the destination path ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) d_name = 'stats' # Save the current directory location and change to the source directory current_directory = os.getcwd() os.chdir(source_path) try: stats_wildcard = ''.join([product_id, '*']) stats_path = os.path.join(destination_path, d_name) stats_files = os.path.join(d_name, stats_wildcard) dest_stats_wildcard = os.path.join(stats_path, stats_wildcard) # Create the statistics directory under the destination path logger.info("Creating directory {0}".format(stats_path)) utilities.create_directory(stats_path) # Change the attributes on the files so that we can remove them if immutability: cmd = ' '.join(['sudo', 'chattr', '-if', dest_stats_wildcard]) output = '' try: output = utilities.execute_cmd(cmd) except Exception: pass finally: if len(output) > 0: logger.info(output) # Remove any pre-existing statistics for this product ID cmd = ' '.join(['rm', '-f', dest_stats_wildcard]) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Transfer the statistics files for file_path in glob.glob(stats_files): filename = os.path.basename(file_path) dest_file_path = os.path.join(stats_path, filename) logger.info("Copying {0} to {1}".format(filename, dest_file_path)) shutil.copyfile(file_path, dest_file_path) # Change the attributes on the files so that we can't remove them if immutability: cmd = ' '.join(['sudo', 'chattr', '+i', dest_stats_wildcard]) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) except Exception: logger.exception('An exception occurred processing {0}'. format(product_id)) raise finally: # Change back to the previous directory os.chdir(current_directory)
def distribute_statistics_remote(immutability, product_id, source_path, destination_host, destination_path, destination_username, destination_pw): ''' Description: Transfers the statistics to the specified directory on the destination host Parameters: product_id - The unique product ID associated with the files. source_path - The full path to where the statistics files to distribute reside. destination_host - The hostname/url for where to distribute the files. destination_path - The full path on the local system to copy the statistics files into. destination_username - The user name to use for FTP destination_pw - The password to use for FTP Note: - It is assumed ssh has been setup for access between the localhost and destination system - It is assumed a stats directory exists under the current directory ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) d_name = 'stats' # Save the current directory location current_directory = os.getcwd() # Attempt X times sleeping between each attempt attempt = 0 sleep_seconds = settings.DEFAULT_SLEEP_SECONDS while True: # Change to the source directory os.chdir(source_path) try: stats_wildcard = ''.join([product_id, '*']) stats_path = os.path.join(destination_path, d_name) stats_files = os.path.join(d_name, stats_wildcard) remote_stats_wildcard = os.path.join(stats_path, stats_wildcard) # Create the statistics directory on the destination host logger.info("Creating directory {0} on {1}". format(stats_path, destination_host)) cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'mkdir', '-p', stats_path]) output = '' try: logger.debug(' '.join(["mkdir cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Change the attributes on the files so that we can remove them if immutability: cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'sudo', 'chattr', '-if', remote_stats_wildcard]) output = '' try: logger.debug(' '.join(["chattr remote stats cmd:", cmd])) output = utilities.execute_cmd(cmd) except Exception: pass finally: if len(output) > 0: logger.info(output) # Remove any pre-existing statistics cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'rm', '-f', remote_stats_wildcard]) output = '' try: logger.debug(' '.join(["rm remote stats cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Transfer the stats statistics transfer.transfer_file('localhost', stats_files, destination_host, stats_path, destination_username=destination_username, destination_pw=destination_pw) logger.info("Verifying statistics transfers") # NOTE - Re-purposing the stats_files variable stats_files = glob.glob(stats_files) for file_name in stats_files: local_cksum_value = 'a b' remote_cksum_value = 'b c' # Generate a local checksum value cmd = ' '.join([settings.ESPA_CHECKSUM_TOOL, file_name]) try: logger.debug(' '.join(["checksum cmd:", cmd])) local_cksum_value = utilities.execute_cmd(cmd) except Exception: if len(local_cksum_value) > 0: logger.error(local_cksum_value) raise # Generate a remote checksum value remote_file = os.path.join(destination_path, file_name) cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, settings.ESPA_CHECKSUM_TOOL, remote_file]) try: remote_cksum_value = utilities.execute_cmd(cmd) except Exception: if len(remote_cksum_value) > 0: logger.error(remote_cksum_value) raise # Checksum validation if (local_cksum_value.split()[0] != remote_cksum_value.split()[0]): raise ESPAException("Failed checksum validation between" " %s and %s:%s" % (file_name, destination_host, remote_file)) # Change the attributes on the files so that we can't remove them if immutability: cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'sudo', 'chattr', '+i', remote_stats_wildcard]) output = '' try: logger.debug(' '.join(["chattr remote stats cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) except Exception: logger.exception("An exception occurred processing %s" % product_id) if attempt < settings.MAX_DELIVERY_ATTEMPTS: sleep(sleep_seconds) # sleep before trying again attempt += 1 continue else: raise finally: # Change back to the previous directory os.chdir(current_directory) break
def package_product(immutability, source_directory, destination_directory, product_name): ''' Description: Package the contents of the source directory into a gzipped tarball located in the destination directory and generates a checksum file for it. The filename will be prefixed with the specified product name. Returns: product_full_path - The full path to the product including filename cksum_full_path - The full path to the check sum including filename cksum_value - The checksum value ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) product_full_path = os.path.join(destination_directory, product_name) # Make sure the directory exists. utilities.create_directory(destination_directory) # Remove any pre-existing files # Grab the first part of the filename, which is not unique filename_parts = product_full_path.split('-') filename_parts[-1] = '*' # Replace the last element of the list filename = '-'.join(filename_parts) # Join with '-' # Name of the checksum to be created cksum_filename = '.'.join([product_name, settings.ESPA_CHECKSUM_EXTENSION]) # Change the attributes on the files so that we can remove them if immutability: cmd = ' '.join(['sudo', 'chattr', '-if', filename, cksum_filename]) output = '' try: output = utilities.execute_cmd(cmd) except Exception: pass finally: if len(output) > 0: logger.info(output) # Remove the file first just in-case this is a second run cmd = ' '.join(['rm', '-f', filename]) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Change to the source directory current_directory = os.getcwd() os.chdir(source_directory) try: # Tar the files logger.info("Packaging completed product to %s.tar.gz" % product_full_path) # Grab the files to tar and gzip product_files = glob.glob("*") # Execute tar with zipping, the full/path/*.tar.gz name is returned product_full_path = utilities.tar_files(product_full_path, product_files, gzip=True) # Change file permissions logger.info("Changing file permissions on %s to 0644" % product_full_path) os.chmod(product_full_path, 0644) # Verify that the archive is good output = '' cmd = ' '.join(['tar', '-tf', product_full_path]) try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # If it was good create a checksum file cksum_output = '' cmd = ' '.join([settings.ESPA_CHECKSUM_TOOL, product_full_path]) try: cksum_output = utilities.execute_cmd(cmd) finally: if len(cksum_output) > 0: logger.info(cksum_output) # Get the base filename of the file that was checksum'd cksum_prod_filename = os.path.basename(product_full_path) logger.debug("Checksum file = %s" % cksum_filename) logger.debug("Checksum'd file = %s" % cksum_prod_filename) # Make sure they are strings cksum_values = cksum_output.split() cksum_value = "%s %s" % (str(cksum_values[0]), str(cksum_prod_filename)) logger.info("Generating cksum: %s" % cksum_value) cksum_full_path = os.path.join(destination_directory, cksum_filename) try: with open(cksum_full_path, 'wb+') as cksum_fd: cksum_fd.write(cksum_value) except Exception: logger.exception('Error building checksum file') raise finally: # Change back to the previous directory os.chdir(current_directory) return (product_full_path, cksum_full_path, cksum_value)
def transfer_product(immutability, destination_host, destination_directory, destination_username, destination_pw, product_filename, cksum_filename): ''' Description: Transfers the product and associated checksum to the specified directory on the destination host Returns: cksum_value - The check sum value from the destination destination_product_file - The full path on the destination Note: - It is assumed ssh has been setup for access between the localhost and destination system ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Create the destination directory on the destination host logger.info("Creating destination directory %s on %s" % (destination_directory, destination_host)) cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'mkdir', '-p', destination_directory]) output = '' try: logger.debug(' '.join(["mkdir cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Figure out the destination full paths destination_cksum_file = os.path.join(destination_directory, os.path.basename(cksum_filename)) destination_product_file = os.path.join(destination_directory, os.path.basename(product_filename)) # Remove any pre-existing files # Grab the first part of the filename, which is not unique remote_filename_parts = destination_product_file.split('-') remote_filename_parts[-1] = '*' # Replace the last element of the list remote_filename = '-'.join(remote_filename_parts) # Join with '-' # Change the attributes on the files so that we can remove them if immutability: cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'sudo', 'chattr', '-if', remote_filename]) output = '' try: logger.debug(' '.join(["chattr remote file cmd:", cmd])) output = utilities.execute_cmd(cmd) except Exception: pass finally: if len(output) > 0: logger.info(output) # Remove the files on the remote system cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'rm', '-f', remote_filename]) output = '' try: logger.debug(' '.join(["rm remote file cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Transfer the checksum file transfer.transfer_file('localhost', cksum_filename, destination_host, destination_cksum_file, destination_username=destination_username, destination_pw=destination_pw) # Transfer the product file transfer.transfer_file('localhost', product_filename, destination_host, destination_product_file, destination_username=destination_username, destination_pw=destination_pw) # Change the attributes on the files so that we can't remove them if immutability: cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'sudo', 'chattr', '+i', remote_filename]) output = '' try: logger.debug(' '.join(["chattr remote file cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Get the remote checksum value cksum_value = '' cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, settings.ESPA_CHECKSUM_TOOL, destination_product_file]) try: logger.debug(' '.join(["ssh cmd:", cmd])) cksum_value = utilities.execute_cmd(cmd) except Exception: if len(cksum_value) > 0: logger.error(cksum_value) raise return (cksum_value, destination_product_file, destination_cksum_file)
finally: # Change back to the previous directory os.chdir(current_directory) if __name__ == '__main__': ''' Description: This is test code only used during proto-typing. It only provides stats for landsat and modis data. ''' # Configure logging EspaLogging.configure(settings.PROCESSING_LOGGER, order='test', product='statistics') logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Hold the wild card strings in a type based dictionary files_to_search_for = dict() # Landsat files files_to_search_for['SR'] = ['*_sr_band[0-9].img'] files_to_search_for['TOA'] = ['*_toa_band[0-9].img'] files_to_search_for['INDEX'] = [ '*_nbr.img', '*_nbr2.img', '*_ndmi.img', '*_ndvi.img', '*_evi.img', '*_savi.img', '*_msavi.img' ] # MODIS files
def http_transfer_file(download_url, destination_file): ''' Description: Using http transfer a file from a source location to a destination file on the localhost. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) logger.info(download_url) # First way # file_size = 0 # retrieved_bytes = 0 # with closing(requests.get(download_url, stream=True)) as req: # if not req.ok: # raise Exception("Transfer Failed - HTTP - Reason(%s)" # % req.reason) # # file_size = int(req.headers['content-length']) # # with open(destination_file, 'wb') as local_fd: # for data_chunk in req.iter_content(settings.TRANSFER_BLOCK_SIZE): # local_fd.write(data_chunk) # retrieved_bytes += len(data_chunk) # # if retrieved_bytes != file_size: # raise Exception("Transfer Failed - HTTP - Retrieved %d" # " out of %d bytes" % (retrieved_bytes, file_size)) # else: # logger.info("Transfer Complete - HTTP") # Second way # req = None # try: # req = requests.get(download_url) # # if not req.ok: # logger.error("Transfer Failed - HTTP") # req.raise_for_status() # # with open(destination_file, 'wb') as local_fd: # local_fd.write(req.content) # except: # logger.error("Transfer Failed - HTTP") # raise # finally: # if req is not None: # req.close() # Third way session = requests.Session() session.mount('http://', requests.adapters.HTTPAdapter(max_retries=3)) session.mount('https://', requests.adapters.HTTPAdapter(max_retries=3)) retry_attempt = 0 done = False while not done: req = None try: req = session.get(url=download_url, timeout=300.0) if not req.ok: logger.error("Transfer Failed - HTTP") req.raise_for_status() with open(destination_file, 'wb') as local_fd: local_fd.write(req.content) done = True except Exception: logger.exception("Transfer Issue - HTTP") if retry_attempt > 3: raise Exception("Transfer Failed - HTTP" " - exceeded retry limit") retry_attempt += 1 # Sleep randomly from 1 to 10 minutes sleep_seconds = int(random.random()*540)+60 sleep(sleep_seconds) finally: if req is not None: req.close() logger.info("Transfer Complete - HTTP")
def http_transfer_file(download_url, destination_file): ''' Description: Using http transfer a file from a source location to a destination file on the localhost. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) logger.info(download_url) # First way # file_size = 0 # retrieved_bytes = 0 # with closing(requests.get(download_url, stream=True)) as req: # if not req.ok: # raise Exception("Transfer Failed - HTTP - Reason(%s)" # % req.reason) # # file_size = int(req.headers['content-length']) # # with open(destination_file, 'wb') as local_fd: # for data_chunk in req.iter_content(settings.TRANSFER_BLOCK_SIZE): # local_fd.write(data_chunk) # retrieved_bytes += len(data_chunk) # # if retrieved_bytes != file_size: # raise Exception("Transfer Failed - HTTP - Retrieved %d" # " out of %d bytes" % (retrieved_bytes, file_size)) # else: # logger.info("Transfer Complete - HTTP") # Second way # req = None # try: # req = requests.get(download_url) # # if not req.ok: # logger.error("Transfer Failed - HTTP") # req.raise_for_status() # # with open(destination_file, 'wb') as local_fd: # local_fd.write(req.content) # except: # logger.error("Transfer Failed - HTTP") # raise # finally: # if req is not None: # req.close() # Third way session = requests.Session() session.mount('http://', requests.adapters.HTTPAdapter(max_retries=3)) session.mount('https://', requests.adapters.HTTPAdapter(max_retries=3)) retry_attempt = 0 done = False while not done: req = None try: req = session.get(url=download_url, timeout=300.0) if not req.ok: logger.error("Transfer Failed - HTTP") req.raise_for_status() with open(destination_file, 'wb') as local_fd: local_fd.write(req.content) done = True except Exception: logger.exception("Transfer Issue - HTTP") if retry_attempt > 3: raise Exception("Transfer Failed - HTTP" " - exceeded retry limit") retry_attempt += 1 # Sleep randomly from 1 to 10 minutes sleep_seconds = int(random.random() * 540) + 60 sleep(sleep_seconds) finally: if req is not None: req.close() logger.info("Transfer Complete - HTTP")
def main(): """Configures an order from the command line input and calls the processing code using the order """ args = parse_command_line() proc_cfg = config.retrieve_cfg(PROC_CFG_FILENAME) proc_cfg = override_config(args, proc_cfg) # Configure the base logger for this request EspaLogging.configure_base_logger(filename=cli_log_filename(args)) # Configure the processing logger for this request EspaLogging.configure(settings.PROCESSING_LOGGER, order=args.order_id, product=args.product_id, debug=args.debug) # CLI will use the base logger logger = EspaLogging.get_logger('base') logger.info('*** Begin ESPA Processing on host [{}] ***' .format(socket.gethostname())) # Set to error condition proc_status = False try: # Extra command line validation if args.pixel_size is not None and args.pixel_size_units is None: raise CliError('Must specify --pixel-size-units if specifying' ' --pixel-size') export_environment_variables(proc_cfg) template = load_template(filename=TEMPLATE_FILENAME) order = update_template(args=args, template=template) # Change to the processing directory current_directory = os.getcwd() os.chdir(proc_cfg.get('processing', 'espa_work_dir')) try: # All processors are implemented in the processor module pp = processor.get_instance(proc_cfg, order) (destination_product_file, destination_cksum_file) = pp.process() # Set to success condition proc_status = True finally: # Change back to the previous directory os.chdir(current_directory) except Exception: logger.exception('*** Errors during processing ***') sys.exit(1) finally: logger.info('*** ESPA Processing Terminated ***') if not args.bridge_mode: archive_log_files(args, proc_cfg, proc_status)
def process(proc_cfg, developer_sleep_mode=False): """Read all lines from STDIN and process them Each line is converted to a JSON dictionary of the parameters for processing. Validation is performed on the JSON dictionary to test if valid for this mapper. After validation the generation of the products is performed. """ # Initially set to the base logger logger = EspaLogging.get_logger('base') processing_location = socket.gethostname() # Process each line from stdin for line in sys.stdin: if not line or len(line) < 1 or not line.strip().find('{') > -1: # this is how the nlineinputformat is supplying values: # 341104 {"orderid": # logger.info('BAD LINE:{}##'.format(line)) continue else: # take the entry starting at the first opening parenth to the end line = line[line.find('{'):] line = line.strip() # Reset these for each line (server, order_id, product_id) = (None, None, None) start_time = datetime.datetime.now() # Initialize so that we don't sleep dont_sleep = True try: line = line.replace('#', '') parms = json.loads(line) if not parameters.test_for_parameter(parms, 'options'): raise ValueError('Error missing JSON [options] record') # TODO scene will be replaced with product_id someday (order_id, product_id, product_type, options) = \ (parms['orderid'], parms['scene'], parms['product_type'], parms['options']) if product_id != 'plot': # Developer mode is always false unless you are a developer # so sleeping will always occur for none plotting requests # Override with the developer mode dont_sleep = developer_sleep_mode # Fix the orderid in-case it contains any single quotes # The processors can not handle single quotes in the email # portion due to usage in command lines. parms['orderid'] = order_id.replace("'", '') # If it is missing due to above TODO, then add it if not parameters.test_for_parameter(parms, 'product_id'): parms['product_id'] = product_id # Figure out if debug level logging was requested debug = False if parameters.test_for_parameter(options, 'debug'): debug = options['debug'] # Configure and get the logger for this order request EspaLogging.configure(settings.PROCESSING_LOGGER, order=order_id, product=product_id, debug=debug) logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) logger.info('Processing {}:{}'.format(order_id, product_id)) # Update the status in the database if parameters.test_for_parameter(parms, 'espa_api'): if parms['espa_api'] != 'skip_api': server = api_interface.api_connect(parms['espa_api']) if server is not None: status = server.update_status(product_id, order_id, processing_location, 'processing') if not status: logger.warning('Failed processing API call' ' to update_status to processing') if product_id != 'plot': # Make sure we can process the sensor tmp_info = sensor.info(product_id) del tmp_info # Make sure we have a valid output format if not parameters.test_for_parameter(options, 'output_format'): logger.warning('[output_format] parameter missing' ' defaulting to envi') options['output_format'] = 'envi' if (options['output_format'] not in parameters.VALID_OUTPUT_FORMATS): raise ValueError('Invalid Output format {}' .format(options['output_format'])) # ---------------------------------------------------------------- # NOTE: The first thing the product processor does during # initialization is validate the input parameters. # ---------------------------------------------------------------- destination_product_file = 'ERROR' destination_cksum_file = 'ERROR' pp = None try: # All processors are implemented in the processor module pp = processor.get_instance(proc_cfg, parms) (destination_product_file, destination_cksum_file) = \ pp.process() finally: # Free disk space to be nice to the whole system. if pp is not None: pp.remove_product_directory() # Sleep the number of seconds for minimum request duration sleep(get_sleep_duration(proc_cfg, start_time, dont_sleep)) archive_log_files(order_id, product_id) # Everything was successfull so mark the scene complete if server is not None: status = server.mark_scene_complete(product_id, order_id, processing_location, destination_product_file, destination_cksum_file, '') if not status: logger.warning('Failed processing API call to' ' mark_scene_complete') except Exception as excep: # First log the exception logger.exception('Exception encountered stacktrace follows') # Sleep the number of seconds for minimum request duration sleep(get_sleep_duration(proc_cfg, start_time, dont_sleep)) archive_log_files(order_id, product_id) if server is not None: try: status = set_product_error(server, order_id, product_id, processing_location) except Exception: logger.exception('Exception encountered stacktrace' ' follows') finally: # Reset back to the base logger logger = EspaLogging.get_logger('base')
def main(): """Configures an order from the command line input and calls the processing code using the order """ args = parse_command_line() proc_cfg = config.retrieve_cfg(PROC_CFG_FILENAME) proc_cfg = override_config(args, proc_cfg) # Configure the base logger for this request EspaLogging.configure_base_logger(filename=cli_log_filename(args)) # Configure the processing logger for this request EspaLogging.configure(settings.PROCESSING_LOGGER, order=args.order_id, product=args.product_id, debug=args.debug) # CLI will use the base logger logger = EspaLogging.get_logger('base') logger.info('*** Begin ESPA Processing on host [{}] ***'.format( socket.gethostname())) # Set to error condition proc_status = False try: # Extra command line validation if args.pixel_size is not None and args.pixel_size_units is None: raise CliError('Must specify --pixel-size-units if specifying' ' --pixel-size') export_environment_variables(proc_cfg) template = load_template(filename=TEMPLATE_FILENAME) order = update_template(args=args, template=template) # Change to the processing directory current_directory = os.getcwd() os.chdir(proc_cfg.get('processing', 'espa_work_dir')) try: # All processors are implemented in the processor module pp = processor.get_instance(proc_cfg, order) (destination_product_file, destination_cksum_file) = pp.process() # Set to success condition proc_status = True finally: # Change back to the previous directory os.chdir(current_directory) except Exception: logger.exception('*** Errors during processing ***') sys.exit(1) finally: logger.info('*** ESPA Processing Terminated ***') if not args.bridge_mode: archive_log_files(args, proc_cfg, proc_status)
def validate_reprojection_parameters(parms, product_id): ''' Description: Perform a check on the possible reprojection parameters Note: We blindly convert values to float or int without checking them. It is assumed that the web tier has validated them. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Create this and set to None if not present if not test_for_parameter(parms, 'projection'): logger.warning('projection: missing defaulting to None') parms['projection'] = None # Create this and set to 'near' if not present if not test_for_parameter(parms, 'resample_method'): logger.warning('resample_method: missing defaulting to near') parms['resample_method'] = 'near' # Make sure these have at least a False value required_parameters = ['reproject', 'image_extents', 'resize'] for parameter in required_parameters: if not test_for_parameter(parms, parameter): logger.warning( '{0}: missing defaulting to False'.format(parameter)) parms[parameter] = False if parms['reproject']: if not test_for_parameter(parms, 'target_projection'): raise RuntimeError('Missing target_projection parameter') else: # Convert to lower case target_projection = parms['target_projection'].lower() parms['target_projection'] = target_projection # Verify a valid projection if target_projection not in VALID_PROJECTIONS: msg = ('Invalid target_projection [{0}]:' ' Argument must be one of ({1})'.format( target_projection, ', '.join(VALID_PROJECTIONS))) raise ValueError(msg) if target_projection == 'sinu': if not test_for_parameter(parms, 'central_meridian'): raise RuntimeError('Missing central_meridian parameter') else: parms['central_meridian'] = \ float(parms['central_meridian']) if not test_for_parameter(parms, 'false_easting'): raise RuntimeError('Missing false_easting parameter') else: parms['false_easting'] = float(parms['false_easting']) if not test_for_parameter(parms, 'false_northing'): raise RuntimeError('Missing false_northing parameter') else: parms['false_northing'] = float(parms['false_northing']) if not test_for_parameter(parms, 'datum'): parms['datum'] = None if target_projection == 'aea': if not test_for_parameter(parms, 'std_parallel_1'): raise RuntimeError('Missing std_parallel_1 parameter') else: parms['std_parallel_1'] = float(parms['std_parallel_1']) if not test_for_parameter(parms, 'std_parallel_2'): raise RuntimeError('Missing std_parallel_2 parameter') else: parms['std_parallel_2'] = float(parms['std_parallel_2']) if not test_for_parameter(parms, 'origin_lat'): raise RuntimeError('Missing origin_lat parameter') else: parms['origin_lat'] = float(parms['origin_lat']) if not test_for_parameter(parms, 'central_meridian'): raise RuntimeError('Missing central_meridian parameter') else: parms['central_meridian'] = \ float(parms['central_meridian']) if not test_for_parameter(parms, 'false_easting'): raise RuntimeError('Missing false_easting parameter') else: parms['false_easting'] = float(parms['false_easting']) if not test_for_parameter(parms, 'false_northing'): raise RuntimeError('Missing false_northing parameter') else: parms['false_northing'] = float(parms['false_northing']) # The datum must be in uppercase for the processing code to # work so if it is present here, we force it if not test_for_parameter(parms, 'datum'): raise RuntimeError('Missing datum parameter') else: parms['datum'] = parms['datum'].upper() if parms['datum'] not in settings.VALID_DATUMS: valid_items = ', '.join(settings.VALID_DATUMS) raise ValueError('Invalid datum [{0}]:' ' Argument must be one of [{1}]'.format( parms['datum'], valid_items)) if target_projection == 'utm': if not test_for_parameter(parms, 'utm_zone'): raise RuntimeError('Missing utm_zone parameter') else: zone = int(parms['utm_zone']) if zone < 0 or zone > 60: raise ValueError('Invalid utm_zone [{0}]:' ' Value must be 0-60'.format(zone)) parms['utm_zone'] = zone if not test_for_parameter(parms, 'utm_north_south'): raise RuntimeError('Missing utm_north_south parameter') elif parms['utm_north_south'] not in VALID_NS: raise ValueError('Invalid utm_north_south [{0}]:' ' Argument must be one of [{1}]'.format( parms['utm_north_south'], ', '.join(VALID_NS))) if not test_for_parameter(parms, 'datum'): parms['datum'] = None if target_projection == 'ps': if not test_for_parameter(parms, 'latitude_true_scale'): # Must be tested before origin_lat raise RuntimeError('Missing latitude_true_scale parameter') else: value = float(parms['latitude_true_scale']) if ((value < 60.0 and value > -60.0) or value > 90.0 or value < -90.0): raise ValueError('Invalid latitude_true_scale [{0}]:' ' Value must be between' ' (-60.0 and -90.0) or' ' (60.0 and 90.0)'.format(value)) parms['latitude_true_scale'] = value if not test_for_parameter(parms, 'longitude_pole'): raise RuntimeError('Missing longitude_pole parameter') else: parms['longitude_pole'] = float(parms['longitude_pole']) if not test_for_parameter(parms, 'origin_lat'): # If the user did not specify the origin_lat value, then # set it based on the latitude true scale lat_ts = float(parms['latitude_true_scale']) if lat_ts < 0: parms['origin_lat'] = -90.0 else: parms['origin_lat'] = 90.0 else: value = float(parms['origin_lat']) if value != -90.0 and value != 90.0: raise ValueError( 'Invalid origin_lat [{0}]:' ' Value must be -90.0 or 90.0'.format(value)) parms['origin_lat'] = value if not test_for_parameter(parms, 'false_easting'): raise RuntimeError('Missing false_easting parameter') else: parms['false_easting'] = float(parms['false_easting']) if not test_for_parameter(parms, 'false_northing'): raise RuntimeError('Missing false_northing parameter') else: parms['false_northing'] = float(parms['false_northing']) if not test_for_parameter(parms, 'datum'): parms['datum'] = None if target_projection == 'lonlat': if not test_for_parameter(parms, 'datum'): parms['datum'] = None if parms['resample_method'] not in VALID_RESAMPLE_METHODS: raise ValueError('Invalid resample_method [{0}]:' ' Argument must be one of [{1}]'.format( parms['resample_method'], ', '.join(VALID_RESAMPLE_METHODS))) if parms['image_extents']: if not test_for_parameter(parms, 'image_extents_units'): raise RuntimeError('Missing image_extents_units parameter') else: if parms['image_extents_units'] not in VALID_IMAGE_EXTENTS_UNITS: raise ValueError('Invalid image_extents_units [{0}]:' ' Argument must be one of [{1}]'.format( parms['image_extents_units'], ', '.join(VALID_IMAGE_EXTENTS_UNITS))) if not test_for_parameter(parms, 'minx'): raise RuntimeError('Missing minx parameter') else: parms['minx'] = float(parms['minx']) if not test_for_parameter(parms, 'miny'): raise RuntimeError('Missing miny parameter') else: parms['miny'] = float(parms['miny']) if not test_for_parameter(parms, 'maxx'): raise RuntimeError('Missing maxx parameter') else: parms['maxx'] = float(parms['maxx']) if not test_for_parameter(parms, 'maxy'): raise RuntimeError('Missing maxy parameter') else: parms['maxy'] = float(parms['maxy']) else: # Default these parms['minx'] = None parms['miny'] = None parms['maxx'] = None parms['maxy'] = None parms['image_extents_units'] = None if parms['resize']: if not test_for_parameter(parms, 'pixel_size'): raise RuntimeError('Missing pixel_size parameter') else: parms['pixel_size'] = float(parms['pixel_size']) if not test_for_parameter(parms, 'pixel_size_units'): raise RuntimeError('Missing pixel_size_units parameter') else: if parms['pixel_size_units'] not in VALID_PIXEL_SIZE_UNITS: valid_items = ', '.join(VALID_PIXEL_SIZE_UNITS) raise ValueError('Invalid pixel_size_units [{0}]:' ' Argument must be one of [{1}]'.format( parms['pixel_size_units'], valid_items)) else: # Default this parms['pixel_size'] = None parms['pixel_size_units'] = None if ((parms['reproject'] or parms['image_extents']) and not parms['resize']): # Sombody asked for reproject or extents, but didn't specify a pixel # size units = 'meters' if parms['reproject'] and parms['target_projection'] == 'lonlat': units = 'dd' # Default to the sensor specific meters or dd equivalent parms['pixel_size'] = sensor.info(product_id).default_pixel_size[units] parms['pixel_size_units'] = units logger.warning( 'resize: parameter not provided' ' but required for reprojection or image extents' ' (Defaulting pixel_size({0}) and pixel_size_units({1})'.format( parms['pixel_size'], parms['pixel_size_units']))
def generate_statistics(work_directory, files_to_search_for): ''' Description: Create the stats output directory and each output stats file for each file specified. Notes: The stats directory is created here because we only want it in the product if we need statistics. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Change to the working directory current_directory = os.getcwd() os.chdir(work_directory) try: stats_output_path = 'stats' try: os.makedirs(stats_output_path) except OSError as excep: # Python >2.5 if excep.errno == errno.EEXIST and os.path.isdir( stats_output_path): pass else: raise # Build the list of files to process file_names = dict() for band_type in files_to_search_for: file_names[band_type] = list() for search in files_to_search_for[band_type]: file_names[band_type].extend(glob.glob(search)) # Generate the requested statistics for each tile for band_type in file_names: for file_name in file_names[band_type]: logger.info("Generating statistics for: %s" % file_name) (minimum, maximum, mean, stddev, valid) = get_statistics(file_name, band_type) # Drop the filename extention so we can replace it with # 'stats' base = os.path.splitext(file_name)[0] base_name = '.'.join([base, 'stats']) # Figure out the full path filename stats_output_file = os.path.join(stats_output_path, base_name) # Buffer the stats data_io = StringIO() data_io.write("FILENAME=%s\n" % file_name) data_io.write("MINIMUM=%f\n" % minimum) data_io.write("MAXIMUM=%f\n" % maximum) data_io.write("MEAN=%f\n" % mean) data_io.write("STDDEV=%f\n" % stddev) data_io.write("VALID=%s\n" % valid) # Create the stats file with open(stats_output_file, 'w+') as stat_fd: stat_fd.write(data_io.getvalue()) finally: # Change back to the previous directory os.chdir(current_directory)