def reformat(metadata_filename, work_directory, input_format, output_format): ''' Description: Re-format the bands to the specified format using our raw binary tools or gdal, whichever is appropriate for the task. Input espa: Output Formats: envi(espa), gtiff, and hdf ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Don't do anything if they match if input_format == output_format: return # Change to the working directory current_directory = os.getcwd() os.chdir(work_directory) try: # Convert from our internal ESPA/ENVI format to GeoTIFF if input_format == 'envi' and output_format == 'gtiff': gtiff_name = metadata_filename.rstrip('.xml') # Call with deletion of source files cmd = ' '.join(['convert_espa_to_gtif', '--del_src_files', '--xml', metadata_filename, '--gtif', gtiff_name]) output = '' try: output = utilities.execute_cmd(cmd) # Rename the XML file back to *.xml from *_gtif.xml meta_gtiff_name = metadata_filename.split('.xml')[0] meta_gtiff_name = ''.join([meta_gtiff_name, '_gtif.xml']) os.rename(meta_gtiff_name, metadata_filename) except Exception, e: raise ee.ESPAException(ee.ErrorCodes.reformat, str(e)), None, sys.exc_info()[2] finally: if len(output) > 0: logger.info(output) # Remove all the *.tfw files since gtiff was chosen a bunch may # be present files_to_remove = glob.glob('*.tfw') if len(files_to_remove) > 0: cmd = ' '.join(['rm', '-rf'] + files_to_remove) logger.info(' '.join(['REMOVING TFW DATA COMMAND:', cmd])) output = '' try: output = utilities.execute_cmd(cmd) except Exception, e: raise ee.ESPAException(ee.ErrorCodes.reformat, str(e)), None, sys.exc_info()[2] finally:
def untar_data(source_file, destination_directory): ''' Description: Using tar extract the file contents into a destination directory. Notes: Works with '*.tar.gz' and '*.tar' files. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # If both source and destination are localhost we can just copy the data cmd = ' '.join(['tar', '--directory', destination_directory, '-xvf', source_file]) logger.info("Unpacking [%s] to [%s]" % (source_file, destination_directory)) # Unpack the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception as e: logger.error("Failed to unpack data") raise e finally: if len(output) > 0: logger.info(output)
def untar_data(source_file, destination_directory): ''' Description: Using tar extract the file contents into a destination directory. Notes: Works with '*.tar.gz' and '*.tar' files. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # If both source and destination are localhost we can just copy the data cmd = ' '.join(['tar', '--directory', destination_directory, '-xvf', source_file]) logger.info("Unpacking [%s] to [%s]" % (source_file, destination_directory)) # Unpack the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.exception("Failed to unpack data") raise finally: if len(output) > 0: logger.info(output)
def remote_copy_file_to_file(source_host, source_file, destination_file): ''' Description: Use unix 'cp' to copy a file from one place to another on a remote machine using ssh. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) cmd = ' '.join([ 'ssh', '-q', '-o', 'StrictHostKeyChecking=no', source_host, 'cp', source_file, destination_file ]) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.error("Failed to copy file") raise finally: if len(output) > 0: logger.info(output) logger.info("Transfer complete - SSH-CP")
def untar_data(source_file, destination_directory): ''' Description: Using tar extract the file contents into a destination directory. Notes: Works with '*.tar.gz' and '*.tar' files. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Look up the configured level of multithreading num_threads_str = retrieve_pigz_cfg(PROC_CFG_FILENAME) # If both source and destination are localhost we can just copy the data cmd = ' '.join([ 'unpigz -p ', num_threads_str, ' < ', source_file, ' | tar', '--directory', destination_directory, ' -xv' ]) logger.info("Unpacking [%s] to [%s]" % (source_file, destination_directory)) # Unpack the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.exception("Failed to unpack data") raise finally: if len(output) > 0: logger.info(output)
def process(cfg, parms): """ Product processing state flow management and fulfilment Args: cfg (dict): environmental configuration parms (dict): processing request options Returns: dict: execution status, resources used, and metadata """ # Verify work request schema parms = schema.load(parms) # Make a sub-directory bucket_name = get_product_bucket(parms, cfg.get('output_bucket_fmt')) # Initialize the processing directory. directories = staging.initialize_processing_directory(cfg.get('work_dir'), bucket_name) # Build the product name product_name = get_product_name(parms['input_name'], cfg.get('output_filename_fmt')) # Stage the required input data staging.stage_input_data(download_urls=parms['input_urls'], staging=directories.get('stage'), destination=directories.get('work'), unpack=cfg.get('auto_unpack_files'), remove_staged=cfg.get('keep_intermediate','').lower() != 'false') shell_sequence = providers.sequence(parms['products'][0], product_id=parms['input_name']) logging.warning(shell_sequence) results = list() for cmd in shell_sequence.split(';'): r = utilities.execute_cmd(cmd, directories.get('work')) r.update(utilities.snapshot_resources(log=False)) results.append(r) # Remove science products and intermediate data not requested cleanup_work_dir() # Customize products customize_products() # Generate statistics products generate_statistics() # Reformat product reformat_products() # Package and deliver product destination_product, destination_cksum = distribute_product() # Remove the product directory # Free disk space to be nice to the whole system. remove_product_directory(directories['base']) return results
def scp_transfer_file(source_host, source_file, destination_host, destination_file): ''' Description: Using SCP transfer a file from a source location to a destination location. Note: - It is assumed ssh has been setup for access between the localhost and destination system - If wild cards are to be used with the source, then the destination file must be a directory. ***No checking is performed in this code*** ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) if source_host == destination_host: msg = "source and destination host match unable to scp" logger.error(msg) raise Exception(msg) cmd = ['scp', '-q', '-o', 'StrictHostKeyChecking=no', '-c', 'arcfour', '-C'] # Build the source portion of the command # Single quote the source to allow for wild cards if source_host == 'localhost': cmd.append(source_file) else: cmd.append("'%s:%s'" % (source_host, source_file)) # Build the destination portion of the command if destination_host == 'localhost': cmd.append(destination_file) else: cmd.append('%s:%s' % (destination_host, destination_file)) cmd = ' '.join(cmd) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception as e: if len(output) > 0: logger.info(output) logger.error("Failed to transfer data") raise e logger.info("Transfer complete - SCP")
def scp_transfer_file(source_host, source_file, destination_host, destination_file): ''' Description: Using SCP transfer a file from a source location to a destination location. Note: - It is assumed ssh has been setup for access between the localhost and destination system - If wild cards are to be used with the source, then the destination file must be a directory. ***No checking is performed in this code*** ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) if source_host == destination_host: msg = "source and destination host match unable to scp" logger.error(msg) raise Exception(msg) cmd = ['scp', '-q', '-o', 'StrictHostKeyChecking=no', '-C'] # Build the source portion of the command # Single quote the source to allow for wild cards if source_host == 'localhost': cmd.append(source_file) else: cmd.append("'%s:%s'" % (source_host, source_file)) # Build the destination portion of the command if destination_host == 'localhost': cmd.append(destination_file) else: cmd.append('%s:%s' % (destination_host, destination_file)) cmd = ' '.join(cmd) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: if len(output) > 0: logger.info(output) logger.error("Failed to transfer data") raise logger.info("Transfer complete - SCP")
def warp_image(source_file, output_file, base_warp_command=None, resample_method='near', pixel_size=None, no_data_value=None): ''' Description: Executes the warping command on the specified source file ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) try: # Turn GDAL PAM off to prevent *.aux.xml files os.environ['GDAL_PAM_ENABLED'] = 'NO' cmd = copy.deepcopy(base_warp_command) # Resample method to use cmd.extend(['-r', resample_method]) # Resize the pixels if pixel_size is not None: cmd.extend(['-tr', str(pixel_size), str(pixel_size)]) # Specify the fill/nodata value if no_data_value is not None: cmd.extend(['-srcnodata', no_data_value]) cmd.extend(['-dstnodata', no_data_value]) # Now add the filenames cmd.extend([source_file, output_file]) cmd = ' '.join(cmd) logger.info("Warping %s with %s" % (source_file, cmd)) output = utilities.execute_cmd(cmd) if len(output) > 0: logger.info(output) except Exception: raise finally: # Remove the environment variable we set above del os.environ['GDAL_PAM_ENABLED']
def copy_files_to_directory(source_files, destination_directory): ''' Description: Use unix 'cp' to copy files from one place to another on the localhost. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) if isinstance(source_files, list): for source_file in source_files: cmd = ' '.join(['cp', source_file, destination_directory]) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception: logger.error("Failed to copy file") raise finally: if len(output) > 0: logger.info(output) logger.info("Transfer complete - CP")
def copy_files_to_directory(source_files, destination_directory): ''' Description: Use unix 'cp' to copy files from one place to another on the localhost. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) if type(source_files) == list: for source_file in source_files: cmd = ' '.join(['cp', source_file, destination_directory]) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception as e: logger.error("Failed to copy file") raise e finally: if len(output) > 0: logger.info(output) logger.info("Transfer complete - CP")
def remote_copy_file_to_file(source_host, source_file, destination_file): ''' Description: Use unix 'cp' to copy a file from one place to another on a remote machine using ssh. ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', source_host, 'cp', source_file, destination_file]) # Transfer the data and raise any errors output = '' try: output = utilities.execute_cmd(cmd) except Exception as e: logger.error("Failed to copy file") raise e finally: if len(output) > 0: logger.info(output) logger.info("Transfer complete - SSH-CP")
def reformat(metadata_filename, work_directory, input_format, output_format): ''' Description: Re-format the bands to the specified format using our raw binary tools or gdal, whichever is appropriate for the task. Input espa: Output Formats: envi(espa), gtiff, and hdf ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Don't do anything if they match if input_format == output_format: return # Change to the working directory current_directory = os.getcwd() os.chdir(work_directory) try: # Convert from our internal ESPA/ENVI format to GeoTIFF if input_format == 'envi' and output_format == 'gtiff': gtiff_name = metadata_filename.rstrip('.xml') # Call with deletion of source files cmd = ' '.join(['convert_espa_to_gtif', '--del_src_files', '--xml', metadata_filename, '--gtif', gtiff_name]) output = '' try: output = utilities.execute_cmd(cmd) # Rename the XML file back to *.xml from *_gtif.xml meta_gtiff_name = metadata_filename.split('.xml')[0] meta_gtiff_name = ''.join([meta_gtiff_name, '_gtif.xml']) os.rename(meta_gtiff_name, metadata_filename) finally: if len(output) > 0: logger.info(output) # Remove all the *.tfw files since gtiff was chosen a bunch may # be present files_to_remove = glob.glob('*.tfw') if len(files_to_remove) > 0: cmd = ' '.join(['rm', '-f'] + files_to_remove) logger.info(' '.join(['REMOVING TFW DATA COMMAND:', cmd])) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Convert from our internal ESPA/ENVI format to HDF elif input_format == 'envi' and output_format == 'hdf-eos2': # convert_espa_to_hdf hdf_name = metadata_filename.replace('.xml', '.hdf') # Call with deletion of source files cmd = ' '.join(['convert_espa_to_hdf', '--del_src_files', '--xml', metadata_filename, '--hdf', hdf_name]) output = '' try: output = utilities.execute_cmd(cmd) # Rename the XML file back to *.xml from *_hdf.xml meta_hdf_name = metadata_filename.replace('.xml', '_hdf.xml') os.rename(meta_hdf_name, metadata_filename) finally: if len(output) > 0: logger.info(output) # Convert from our internal ESPA/ENVI format to ENVI-BIP elif input_format == 'envi' and output_format == 'envi-bip': # convert_espa_to_bip bip_name = metadata_filename.replace('.xml', '.img') # Call with deletion of source files cmd = ' '.join(['convert_espa_to_bip', '--del_src_files', '--convert_qa', '--xml', metadata_filename, '--bip', bip_name]) output = '' try: output = utilities.execute_cmd(cmd) # Rename the XML file back to *.xml from *_hdf.xml meta_hdf_name = metadata_filename.replace('.xml', '_hdf.xml') os.rename(meta_hdf_name, metadata_filename) finally: if len(output) > 0: logger.info(output) # Requested conversion not implemented else: raise ValueError("Unsupported reformat combination (%s, %s)" % (input_format, output_format)) finally: # Change back to the previous directory os.chdir(current_directory)
def distribute_product_local(immutability, product_name, source_path, packaging_path): logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Deliver the product files # Attempt X times sleeping between each attempt sleep_seconds = settings.DEFAULT_SLEEP_SECONDS max_number_of_attempts = settings.MAX_DISTRIBUTION_ATTEMPTS max_package_attempts = settings.MAX_PACKAGING_ATTEMPTS attempt = 0 product_file = 'ERROR' cksum_file = 'ERROR' while True: try: # Package the product files to the online cache location # Attempt X times sleeping between each sub_attempt sub_attempt = 0 while True: try: (product_file, cksum_file, local_cksum_value) = package_product(immutability, source_path, packaging_path, product_name) # Change the attributes on the files so that we can't # remove them if immutability: cmd = ' '.join(['sudo', 'chattr', '+i', product_file, cksum_file]) output = utilities.execute_cmd(cmd) if len(output) > 0: logger.info(output) except Exception: logger.exception("An exception occurred processing %s" % product_name) if sub_attempt < max_package_attempts: sleep(sleep_seconds) # sleep before trying again sub_attempt += 1 continue else: raise break # Always log where we placed the files logger.info("Delivered product to location %s" " and checksum location %s" % (product_file, cksum_file)) except Exception: if attempt < max_number_of_attempts: sleep(sleep_seconds) # sleep before trying again attempt += 1 # adjust for next set sleep_seconds = int(sleep_seconds * 1.5) continue else: raise break return (product_file, cksum_file)
def reformat(metadata_filename, work_directory, input_format, output_format): """ Description: Re-format the bands to the specified format using our raw binary tools or gdal, whichever is appropriate for the task. Input espa: Output Formats: envi(espa), gtiff, and hdf """ logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Don't do anything if they match if input_format == output_format: return # Change to the working directory current_directory = os.getcwd() os.chdir(work_directory) try: # Convert from our internal ESPA/ENVI format to GeoTIFF if input_format == 'envi' and output_format == 'gtiff': gtiff_name = metadata_filename.rstrip('.xml') # Call with deletion of source files cmd = ' '.join([ 'convert_espa_to_gtif', '--del_src_files', '--xml', metadata_filename, '--gtif', gtiff_name ]) output = '' try: output = utilities.execute_cmd(cmd) # Rename the XML file back to *.xml from *_gtif.xml meta_gtiff_name = metadata_filename.split('.xml')[0] meta_gtiff_name = ''.join([meta_gtiff_name, '_gtif.xml']) os.rename(meta_gtiff_name, metadata_filename) finally: if len(output) > 0: logger.info(output) # Remove all *.tfw files that are likely present files_to_remove = glob.glob('*.tfw') if len(files_to_remove) > 0: cmd = ' '.join(['rm', '-f'] + files_to_remove) logger.info(' '.join(['REMOVING TFW DATA COMMAND:', cmd])) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Convert from our internal ESPA/ENVI format to HDF elif input_format == 'envi' and output_format == 'hdf-eos2': # convert_espa_to_hdf hdf_name = metadata_filename.replace('.xml', '.hdf') # Call with deletion of source files cmd = ' '.join([ 'convert_espa_to_hdf', '--del_src_files', '--xml', metadata_filename, '--hdf', hdf_name ]) output = '' try: output = utilities.execute_cmd(cmd) # Rename the XML file back to *.xml from *_hdf.xml meta_name = metadata_filename.replace('.xml', '_hdf.xml') os.rename(meta_name, metadata_filename) # Remove .hdf.hdr file - it will likely be there if all of the # bands in the HDF have the same resolution files_to_remove = glob.glob('*.hdf.hdr') if len(files_to_remove) > 0: cmd = ' '.join(['rm', '-f'] + files_to_remove) logger.info(' '.join(['REMOVING HDF.HDR COMMAND:', cmd])) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) finally: if len(output) > 0: logger.info(output) # Convert from our internal ESPA/ENVI format to NetCDF elif input_format == 'envi' and output_format == 'netcdf': # convert_espa_to_netcdf netcdf_name = metadata_filename.replace('.xml', '.nc') # Call with deletion of source files cmd = ' '.join([ 'convert_espa_to_netcdf', '--del_src_files', '--xml', metadata_filename, '--netcdf', netcdf_name ]) output = '' try: output = utilities.execute_cmd(cmd) # Rename the XML file back to *.xml from *_nc.xml meta_name = metadata_filename.replace('.xml', '_nc.xml') os.rename(meta_name, metadata_filename) finally: if len(output) > 0: logger.info(output) # Requested conversion not implemented else: raise ValueError("Unsupported reformat combination (%s, %s)" % (input_format, output_format)) finally: # Change back to the previous directory os.chdir(current_directory)
def distribute_statistics_remote(immutability, product_id, source_path, destination_host, destination_path, destination_username, destination_pw): ''' Description: Transfers the statistics to the specified directory on the destination host Parameters: product_id - The unique product ID associated with the files. source_path - The full path to where the statistics files to distribute reside. destination_host - The hostname/url for where to distribute the files. destination_path - The full path on the local system to copy the statistics files into. destination_username - The user name to use for FTP destination_pw - The password to use for FTP Note: - It is assumed ssh has been setup for access between the localhost and destination system - It is assumed a stats directory exists under the current directory ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) d_name = 'stats' # Save the current directory location current_directory = os.getcwd() # Attempt X times sleeping between each attempt attempt = 0 sleep_seconds = settings.DEFAULT_SLEEP_SECONDS while True: # Change to the source directory os.chdir(source_path) try: stats_wildcard = ''.join([product_id, '*']) stats_path = os.path.join(destination_path, d_name) stats_files = os.path.join(d_name, stats_wildcard) remote_stats_wildcard = os.path.join(stats_path, stats_wildcard) # Create the statistics directory on the destination host logger.info("Creating directory {0} on {1}". format(stats_path, destination_host)) cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'mkdir', '-p', stats_path]) output = '' try: logger.debug(' '.join(["mkdir cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Change the attributes on the files so that we can remove them if immutability: cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'sudo', 'chattr', '-if', remote_stats_wildcard]) output = '' try: logger.debug(' '.join(["chattr remote stats cmd:", cmd])) output = utilities.execute_cmd(cmd) except Exception: pass finally: if len(output) > 0: logger.info(output) # Remove any pre-existing statistics cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'rm', '-f', remote_stats_wildcard]) output = '' try: logger.debug(' '.join(["rm remote stats cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Transfer the stats statistics transfer.transfer_file('localhost', stats_files, destination_host, stats_path, destination_username=destination_username, destination_pw=destination_pw) logger.info("Verifying statistics transfers") # NOTE - Re-purposing the stats_files variable stats_files = glob.glob(stats_files) for file_name in stats_files: local_cksum_value = 'a b' remote_cksum_value = 'b c' # Generate a local checksum value cmd = ' '.join([settings.ESPA_CHECKSUM_TOOL, file_name]) try: logger.debug(' '.join(["checksum cmd:", cmd])) local_cksum_value = utilities.execute_cmd(cmd) except Exception: if len(local_cksum_value) > 0: logger.error(local_cksum_value) raise # Generate a remote checksum value remote_file = os.path.join(destination_path, file_name) cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, settings.ESPA_CHECKSUM_TOOL, remote_file]) try: remote_cksum_value = utilities.execute_cmd(cmd) except Exception: if len(remote_cksum_value) > 0: logger.error(remote_cksum_value) raise # Checksum validation if (local_cksum_value.split()[0] != remote_cksum_value.split()[0]): raise ESPAException("Failed checksum validation between" " %s and %s:%s" % (file_name, destination_host, remote_file)) # Change the attributes on the files so that we can't remove them if immutability: cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'sudo', 'chattr', '+i', remote_stats_wildcard]) output = '' try: logger.debug(' '.join(["chattr remote stats cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) except Exception: logger.exception("An exception occurred processing %s" % product_id) if attempt < settings.MAX_DELIVERY_ATTEMPTS: sleep(sleep_seconds) # sleep before trying again attempt += 1 continue else: raise finally: # Change back to the previous directory os.chdir(current_directory) break
def distribute_statistics_local(immutability, product_id, source_path, destination_path): ''' Description: Copies the statistics to the specified directory on the local system Parameters: product_id - The unique product ID associated with the files. source_path - The full path to where the statistics files to distribute reside. destination_path - The full path on the local system to copy the statistics files into. Note: - It is assumed a stats directory exists under the source_path - A stats directory will be created under the destination path ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) d_name = 'stats' # Save the current directory location and change to the source directory current_directory = os.getcwd() os.chdir(source_path) try: stats_wildcard = ''.join([product_id, '*']) stats_path = os.path.join(destination_path, d_name) stats_files = os.path.join(d_name, stats_wildcard) dest_stats_wildcard = os.path.join(stats_path, stats_wildcard) # Create the statistics directory under the destination path logger.info("Creating directory {0}".format(stats_path)) utilities.create_directory(stats_path) # Change the attributes on the files so that we can remove them if immutability: cmd = ' '.join(['sudo', 'chattr', '-if', dest_stats_wildcard]) output = '' try: output = utilities.execute_cmd(cmd) except Exception: pass finally: if len(output) > 0: logger.info(output) # Remove any pre-existing statistics for this product ID cmd = ' '.join(['rm', '-f', dest_stats_wildcard]) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Transfer the statistics files for file_path in glob.glob(stats_files): filename = os.path.basename(file_path) dest_file_path = os.path.join(stats_path, filename) logger.info("Copying {0} to {1}".format(filename, dest_file_path)) shutil.copyfile(file_path, dest_file_path) # Change the attributes on the files so that we can't remove them if immutability: cmd = ' '.join(['sudo', 'chattr', '+i', dest_stats_wildcard]) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) except Exception: logger.exception('An exception occurred processing {0}'. format(product_id)) raise finally: # Change back to the previous directory os.chdir(current_directory)
def transfer_product(immutability, destination_host, destination_directory, destination_username, destination_pw, product_filename, cksum_filename): ''' Description: Transfers the product and associated checksum to the specified directory on the destination host Returns: cksum_value - The check sum value from the destination destination_product_file - The full path on the destination Note: - It is assumed ssh has been setup for access between the localhost and destination system ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) # Create the destination directory on the destination host logger.info("Creating destination directory %s on %s" % (destination_directory, destination_host)) cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'mkdir', '-p', destination_directory]) output = '' try: logger.debug(' '.join(["mkdir cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Figure out the destination full paths destination_cksum_file = os.path.join(destination_directory, os.path.basename(cksum_filename)) destination_product_file = os.path.join(destination_directory, os.path.basename(product_filename)) # Remove any pre-existing files # Grab the first part of the filename, which is not unique remote_filename_parts = destination_product_file.split('-') remote_filename_parts[-1] = '*' # Replace the last element of the list remote_filename = '-'.join(remote_filename_parts) # Join with '-' # Change the attributes on the files so that we can remove them if immutability: cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'sudo', 'chattr', '-if', remote_filename]) output = '' try: logger.debug(' '.join(["chattr remote file cmd:", cmd])) output = utilities.execute_cmd(cmd) except Exception: pass finally: if len(output) > 0: logger.info(output) # Remove the files on the remote system cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'rm', '-f', remote_filename]) output = '' try: logger.debug(' '.join(["rm remote file cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Transfer the checksum file transfer.transfer_file('localhost', cksum_filename, destination_host, destination_cksum_file, destination_username=destination_username, destination_pw=destination_pw) # Transfer the product file transfer.transfer_file('localhost', product_filename, destination_host, destination_product_file, destination_username=destination_username, destination_pw=destination_pw) # Change the attributes on the files so that we can't remove them if immutability: cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, 'sudo', 'chattr', '+i', remote_filename]) output = '' try: logger.debug(' '.join(["chattr remote file cmd:", cmd])) output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Get the remote checksum value cksum_value = '' cmd = ' '.join(['ssh', '-q', '-o', 'StrictHostKeyChecking=no', destination_host, settings.ESPA_CHECKSUM_TOOL, destination_product_file]) try: logger.debug(' '.join(["ssh cmd:", cmd])) cksum_value = utilities.execute_cmd(cmd) except Exception: if len(cksum_value) > 0: logger.error(cksum_value) raise return (cksum_value, destination_product_file, destination_cksum_file)
def package_product(immutability, source_directory, destination_directory, product_name): ''' Description: Package the contents of the source directory into a gzipped tarball located in the destination directory and generates a checksum file for it. The filename will be prefixed with the specified product name. Returns: product_full_path - The full path to the product including filename cksum_full_path - The full path to the check sum including filename cksum_value - The checksum value ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) product_full_path = os.path.join(destination_directory, product_name) # Make sure the directory exists. utilities.create_directory(destination_directory) # Remove any pre-existing files # Grab the first part of the filename, which is not unique filename_parts = product_full_path.split('-') filename_parts[-1] = '*' # Replace the last element of the list filename = '-'.join(filename_parts) # Join with '-' # Name of the checksum to be created cksum_filename = '.'.join([product_name, settings.ESPA_CHECKSUM_EXTENSION]) # Change the attributes on the files so that we can remove them if immutability: cmd = ' '.join(['sudo', 'chattr', '-if', filename, cksum_filename]) output = '' try: output = utilities.execute_cmd(cmd) except Exception: pass finally: if len(output) > 0: logger.info(output) # Remove the file first just in-case this is a second run cmd = ' '.join(['rm', '-f', filename]) output = '' try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # Change to the source directory current_directory = os.getcwd() os.chdir(source_directory) try: # Tar the files logger.info("Packaging completed product to %s.tar.gz" % product_full_path) # Grab the files to tar and gzip product_files = glob.glob("*") # Execute tar with zipping, the full/path/*.tar.gz name is returned product_full_path = utilities.tar_files(product_full_path, product_files, gzip=True) # Change file permissions logger.info("Changing file permissions on %s to 0644" % product_full_path) os.chmod(product_full_path, 0644) # Verify that the archive is good output = '' cmd = ' '.join(['tar', '-tf', product_full_path]) try: output = utilities.execute_cmd(cmd) finally: if len(output) > 0: logger.info(output) # If it was good create a checksum file cksum_output = '' cmd = ' '.join([settings.ESPA_CHECKSUM_TOOL, product_full_path]) try: cksum_output = utilities.execute_cmd(cmd) finally: if len(cksum_output) > 0: logger.info(cksum_output) # Get the base filename of the file that was checksum'd cksum_prod_filename = os.path.basename(product_full_path) logger.debug("Checksum file = %s" % cksum_filename) logger.debug("Checksum'd file = %s" % cksum_prod_filename) # Make sure they are strings cksum_values = cksum_output.split() cksum_value = "%s %s" % (str(cksum_values[0]), str(cksum_prod_filename)) logger.info("Generating cksum: %s" % cksum_value) cksum_full_path = os.path.join(destination_directory, cksum_filename) try: with open(cksum_full_path, 'wb+') as cksum_fd: cksum_fd.write(cksum_value) except Exception: logger.exception('Error building checksum file') raise finally: # Change back to the previous directory os.chdir(current_directory) return (product_full_path, cksum_full_path, cksum_value)
def process_test_order(request_file, products_file, env_vars, keep_log, plot, pre, post): ''' Description: Process the test order file. ''' logger = logging.getLogger(__name__) template_file = 'template.json' template_dict = None tmp_order = 'tmp-test-order' order_id = (request_file.split('.json')[0]).replace("'", '') if pre: order_id = ''.join([order_id, '-PRE']) if post: order_id = ''.join([order_id, '-POST']) have_error = False status = True error_msg = '' products = list() if not plot: with open(products_file, 'r') as scenes_fd: while (1): product = scenes_fd.readline().strip() if not product: break products.append(product) else: products = ['plot'] logger.info("Processing Products [%s]" % ', '.join(products)) with open(template_file, 'r') as template_fd: template_contents = template_fd.read() if not template_contents: raise Exception("Template file [%s] is empty" % template_file) template_dict = json.loads(template_contents) if template_dict is None: logger.error("Loading template.json") for product in products: logger.info("Processing Product [%s]" % product) with open(request_file, 'r') as request_fd: request_contents = request_fd.read() if not request_contents: raise Exception("Order file [%s] is empty" % request_file) request_dict = json.loads(request_contents) if request_dict is None: logger.error("Loading [%s]" % request_file) # Merge the requested options with the template options, to create # a new dict with the requested options overriding the template. new_dict = template_dict.copy() new_dict.update(request_dict) new_dict['options'] = template_dict['options'].copy() new_dict['options'].update(request_dict['options']) # Turn it into a string for follow-on processing order_contents = json.dumps(new_dict, indent=4, sort_keys=True) logger.info("Processing Request File [%s]" % request_file) with open(tmp_order, 'w') as tmp_fd: logger.info("Creating [%s]" % tmp_order) tmp_line = order_contents # Update the order for the developer tmp = product[:3] download_url = 'null' is_modis = False if tmp == 'MOD' or tmp == 'MYD': is_modis = True # for plots if not is_modis and not plot: product_path = ('%s/%s/%s%s' % (env_vars['dev_data_dir']['value'], product[:3], product, '.tar.gz')) logger.info("Using Product Path [%s]" % product_path) if not os.path.isfile(product_path): error_msg = ("Missing product data (%s)" % product_path) have_error = True break download_url = ('file://%s' % product_path) elif not plot: if tmp == 'MOD': base_source_path = settings.TERRA_BASE_SOURCE_PATH else: base_source_path = settings.AQUA_BASE_SOURCE_PATH short_name = sensor.instance(product).short_name version = sensor.instance(product).version archive_date = utilities.date_from_doy( sensor.instance(product).year, sensor.instance(product).doy) xxx = '%s.%s.%s' % (str(archive_date.year).zfill(4), str(archive_date.month).zfill(2), str(archive_date.day).zfill(2)) product_path = ('%s/%s.%s/%s' % (base_source_path, short_name, version, xxx)) if tmp == 'MOD' or tmp == 'MYD': download_url = ('http://%s/%s/%s.hdf' % (settings.MODIS_INPUT_CHECK_HOST, product_path, product)) sensor_name = 'plot' if not plot: sensor_name = sensor.instance(product).sensor_name logger.info("Processing Sensor [%s]" % sensor_name) else: logger.info("Processing Plot Request") tmp_line = tmp_line.replace('\n', '') tmp_line = tmp_line.replace("ORDER_ID", order_id) tmp_line = tmp_line.replace("SCENE_ID", product) if sensor_name in ['tm', 'etm', 'olitirs']: tmp_line = tmp_line.replace("PRODUCT_TYPE", 'landsat') elif sensor_name in ['terra', 'aqua']: tmp_line = tmp_line.replace("PRODUCT_TYPE", 'modis') else: tmp_line = tmp_line.replace("PRODUCT_TYPE", 'plot') tmp_line = tmp_line.replace("DOWNLOAD_URL", download_url) tmp_fd.write(tmp_line) # Validate again, since we modified it parms = json.loads(tmp_line) # parms = parameters.instance(json.loads(tmp_line)) print(json.dumps(parms, indent=4, sort_keys=True)) # END - with tmp_order # END - with request_file if have_error: logger.error(error_msg) return False keep_log_str = '' if keep_log: keep_log_str = '--keep-log' cmd = ("cd ..; cat test-orders/%s | ./ondemand_mapper.py %s" % (tmp_order, keep_log_str)) output = '' try: logger.info("Processing [%s]" % cmd) output = utilities.execute_cmd(cmd) if len(output) > 0: print output except Exception, e: logger.exception("Processing failed") status = False
def process_test_order(args, request_file, products_file, env_vars): """Process the test order file """ logger = logging.getLogger(__name__) template_file = 'template.json' template_dict = None order_id = args.request if args.pre: order_id = ''.join([order_id, '-PRE']) if args.post: order_id = ''.join([order_id, '-POST']) have_error = False status = True error_msg = '' products = list() if not args.plot: with open(products_file, 'r') as scenes_fd: while (1): product = scenes_fd.readline().strip() if not product: break products.append(product) else: products = ['plot'] logger.info('Processing Products [{0}]'.format(', '.join(products))) with open(template_file, 'r') as template_fd: template_contents = template_fd.read() if not template_contents: raise Exception('Template file [{0}] is empty' .format(template_file)) template_dict = json.loads(template_contents) if template_dict is None: logger.error('Loading template.json') for product_id in products: logger.info('Processing Product [{0}]'.format(product_id)) tmp_order = 'test-{0}-{1}'.format(order_id, product_id) with open(request_file, 'r') as request_fd: request_contents = request_fd.read() if not request_contents: raise Exception('Order file [{0}] is empty' .format(request_file)) logger.info('Processing Request File [{0}]'.format(request_file)) request_dict = json.loads(request_contents) if request_dict is None: logger.error('Loading [{0}]'.format(request_file)) # Merge the requested options with the template options, to create # a new dict with the requested options overriding the template. new_dict = template_dict.copy() new_dict.update(request_dict) new_dict['options'] = template_dict['options'].copy() new_dict['options'].update(request_dict['options']) # Turn it into a string for follow-on processing order_contents = json.dumps(new_dict, indent=4, sort_keys=True) sensor_code = get_satellite_sensor_code(product_id) with open(tmp_order, 'w') as tmp_fd: logger.info('Creating [{0}]'.format(tmp_order)) tmp_line = order_contents # Update the order for the developer download_url = 'null' # for plots if not sensor.is_modis(product_id) and not args.plot: product_path = ('{0}/{1}/{2}{3}' .format(env_vars['dev_data_dir']['value'], sensor_code, product_id, '.tar.gz')) logger.info('Using Product Path [{0}]' .format(product_path)) if not os.path.isfile(product_path): error_msg = ('Missing product data [{0}]' .format(product_path)) have_error = True break download_url = 'file://{0}'.format(product_path) elif not args.plot: if sensor.is_terra(product_id): base_source_path = '/MOLT' else: base_source_path = '/MOLA' parts = product_id.split('.') short_name = parts[0] version = parts[3] date_YYYYDDD = parts[1][1:] date_acquired = datetime.datetime.strptime(date_YYYYDDD, '%Y%j').date() xxx = ('{0}.{1}.{2}' .format(str(date_acquired.year).zfill(4), str(date_acquired.month).zfill(2), str(date_acquired.day).zfill(2))) product_path = ('{0}/{1}.{2}/{3}' .format(base_source_path, short_name, version, xxx)) if sensor.is_modis(product_id): download_url = ('http://{0}/{1}/{2}.hdf' .format(DAAC_HOSTNAME, product_path, product_id)) sensor_name = 'plot' if not args.plot: sensor_name = sensor.info(product_id).sensor_name logger.info('Processing Sensor [{0}]'.format(sensor_name)) else: logger.info('Processing Plot Request') tmp_line = tmp_line.replace('\n', '') tmp_line = tmp_line.replace('ORDER_ID', order_id) tmp_line = tmp_line.replace('SCENE_ID', product_id) if sensor_name in ['tm', 'etm', 'olitirs']: tmp_line = tmp_line.replace('PRODUCT_TYPE', 'landsat') elif sensor_name in ['terra', 'aqua']: tmp_line = tmp_line.replace('PRODUCT_TYPE', 'modis') else: tmp_line = tmp_line.replace('PRODUCT_TYPE', 'plot') tmp_line = tmp_line.replace('DOWNLOAD_URL', download_url) tmp_fd.write(tmp_line) # Validate again, since we modified it parms = json.loads(tmp_line) print(json.dumps(parms, indent=4, sort_keys=True)) if have_error: logger.error(error_msg) return False cmd = ('cd ..; cat test-orders/{0} | ./ondemand_mapper.py --developer' .format(tmp_order)) output = '' try: logger.info('Processing [{0}]'.format(cmd)) output = utilities.execute_cmd(cmd) if len(output) > 0: print output except Exception, e: logger.exception('Processing failed') status = False os.unlink(tmp_order)
def distribute_statistics_local(product_id, source_path, destination_path): ''' Description: Copies the statistics to the specified directory on the local system Parameters: product_id - The unique product ID associated with the files. source_path - The full path to where the statistics files to distribute reside. destination_path - The full path on the local system to copy the statistics files into. Note: - It is assumed a stats directory exists under the source_path - A stats directory will be created under the destination path ''' logger = EspaLogging.get_logger(settings.PROCESSING_LOGGER) d_name = 'stats' # Save the current directory location and change to the source directory current_directory = os.getcwd() os.chdir(source_path) try: stats_path = os.path.join(destination_path, d_name) stats_files = ''.join([d_name, '/', product_id, '*']) # Create the statistics directory under the destination path logger.info("Creating directory {0}".format(stats_path)) utilities.create_directory(stats_path) # Remove any pre-existing statistics for this product ID cmd = ' '.join(['rm', '-f', os.path.join(destination_path, stats_files)]) output = '' try: output = utilities.execute_cmd(cmd) except Exception as e: raise ee.ESPAException(ee.ErrorCodes.distributing_product, str(e)), None, sys.exc_info()[2] finally: if len(output) > 0: logger.info(output) # Transfer the statistics files for file_path in glob.glob(stats_files): filename = os.path.basename(file_path) dest_file_path = os.path.join(stats_path, filename) logger.info("Copying {0} to {1}".format(filename, dest_file_path)) shutil.copyfile(file_path, dest_file_path) except Exception as e: logger.exception("An exception occurred processing {0}". format(product_id)) e_code = ee.ErrorCodes.distributing_product raise ee.ESPAException(e_code, str(e)), None, sys.exc_info()[2] finally: # Change back to the previous directory os.chdir(current_directory)
finally: if len(output) > 0: logger.info(output) # Convert from our internal ESPA/ENVI format to HDF elif input_format == 'envi' and output_format == 'hdf-eos2': # convert_espa_to_hdf hdf_name = metadata_filename.replace('.xml', '.hdf') # Call with deletion of source files cmd = ' '.join(['convert_espa_to_hdf', '--del_src_files', '--xml', metadata_filename, '--hdf', hdf_name]) output = '' try: output = utilities.execute_cmd(cmd) # Rename the XML file back to *.xml from *_hdf.xml meta_hdf_name = metadata_filename.replace('.xml', '_hdf.xml') os.rename(meta_hdf_name, metadata_filename) except Exception, e: raise ee.ESPAException(ee.ErrorCodes.reformat, str(e)), None, sys.exc_info()[2] finally: if len(output) > 0: logger.info(output) # Requested conversion not implemented else: raise ValueError("Unsupported reformat combination (%s, %s)"
def process_test_order(args, request_file, products_file, env_vars): """Process the test order file """ logger = logging.getLogger(__name__) template_file = 'template.json' template_dict = None order_id = args.request if args.pre: order_id = ''.join([order_id, '-PRE']) if args.post: order_id = ''.join([order_id, '-POST']) have_error = False status = True error_msg = '' products = list() if not args.plot: with open(products_file, 'r') as scenes_fd: while (1): product = scenes_fd.readline().strip() if not product: break products.append(product) else: products = ['plot'] logger.info('Processing Products [{0}]'.format(', '.join(products))) with open(template_file, 'r') as template_fd: template_contents = template_fd.read() if not template_contents: raise Exception( 'Template file [{0}] is empty'.format(template_file)) template_dict = json.loads(template_contents) if template_dict is None: logger.error('Loading template.json') for product_id in products: logger.info('Processing Product [{0}]'.format(product_id)) tmp_order = 'test-{0}-{1}'.format(order_id, product_id) with open(request_file, 'r') as request_fd: request_contents = request_fd.read() if not request_contents: raise Exception( 'Order file [{0}] is empty'.format(request_file)) logger.info('Processing Request File [{0}]'.format(request_file)) request_dict = json.loads(request_contents) if request_dict is None: logger.error('Loading [{0}]'.format(request_file)) # Merge the requested options with the template options, to create # a new dict with the requested options overriding the template. new_dict = template_dict.copy() new_dict.update(request_dict) new_dict['options'] = template_dict['options'].copy() new_dict['options'].update(request_dict['options']) # Turn it into a string for follow-on processing order_contents = json.dumps(new_dict, indent=4, sort_keys=True) sensor_code = get_satellite_sensor_code(product_id) with open(tmp_order, 'w') as tmp_fd: logger.info('Creating [{0}]'.format(tmp_order)) tmp_line = order_contents # Update the order for the developer download_url = 'null' # for plots if not sensor.is_modis(product_id) and not args.plot: product_path = ('{0}/{1}/{2}{3}'.format( env_vars['dev_data_dir']['value'], sensor_code, product_id, '.tar.gz')) logger.info( 'Using Product Path [{0}]'.format(product_path)) if not os.path.isfile(product_path): error_msg = ( 'Missing product data [{0}]'.format(product_path)) have_error = True break download_url = 'file://{0}'.format(product_path) elif not args.plot: if sensor.is_terra(product_id): base_source_path = '/MOLT' else: base_source_path = '/MOLA' parts = product_id.split('.') short_name = parts[0] version = parts[3] date_YYYYDDD = parts[1][1:] date_acquired = datetime.datetime.strptime( date_YYYYDDD, '%Y%j').date() xxx = ('{0}.{1}.{2}'.format( str(date_acquired.year).zfill(4), str(date_acquired.month).zfill(2), str(date_acquired.day).zfill(2))) product_path = ('{0}/{1}.{2}/{3}'.format( base_source_path, short_name, version, xxx)) if sensor.is_modis(product_id): download_url = ('http://{0}/{1}/{2}.hdf'.format( DAAC_HOSTNAME, product_path, product_id)) sensor_name = 'plot' if not args.plot: sensor_name = sensor.info(product_id).sensor_name logger.info('Processing Sensor [{0}]'.format(sensor_name)) else: logger.info('Processing Plot Request') tmp_line = tmp_line.replace('\n', '') tmp_line = tmp_line.replace('ORDER_ID', order_id) tmp_line = tmp_line.replace('SCENE_ID', product_id) if sensor_name in ['tm', 'etm', 'olitirs']: tmp_line = tmp_line.replace('PRODUCT_TYPE', 'landsat') elif sensor_name in ['terra', 'aqua']: tmp_line = tmp_line.replace('PRODUCT_TYPE', 'modis') else: tmp_line = tmp_line.replace('PRODUCT_TYPE', 'plot') tmp_line = tmp_line.replace('DOWNLOAD_URL', download_url) tmp_fd.write(tmp_line) # Validate again, since we modified it parms = json.loads(tmp_line) print(json.dumps(parms, indent=4, sort_keys=True)) if have_error: logger.error(error_msg) return False cmd = ('cd ..; cat test-orders/{0} | ./ondemand_mapper.py --developer'. format(tmp_order)) output = '' try: logger.info('Processing [{0}]'.format(cmd)) output = utilities.execute_cmd(cmd) if len(output) > 0: print output except Exception, e: logger.exception('Processing failed') status = False os.unlink(tmp_order)