def main(user_args): log_level = user_args.log_level PDS_info = json.load(open(pds_info, 'r')) reddis_queue = RedisQueue('UPC_ReadyQueue') logger = logging.getLogger('UPC_Queueing') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("UPC Queue: %s", reddis_queue.id_name) try: Session, _ = db_connect(pds_db) session = Session() except Exception as e: logger.error("%s", e) return 1 # For each archive in the db, test if there are files that are ready to # process for archive_id in session.query(Files.archiveid).distinct(): result = session.query(Files).filter(Files.archiveid == archive_id, Files.upc_required == 't') # Get filepath from archive id archive_name = session.query(Archives.archive_name).filter( Archives.archiveid == archive_id).first() # No archive name = no path. Skip these values. if (archive_name is None): logger.warn("No archive name found for archive id: %s", archive_id) continue try: # Since results are returned as lists, we have to access the 0th # element to pull out the string archive name. fpath = PDS_info[archive_name[0]]['path'] except KeyError: logger.warn("Unable to locate file path for archive id %s", archive_id) continue # Add each file in the archive to the redis queue. for element in result: fname = fpath + element.filename fid = element.fileid reddis_queue.QueueAdd((fname, fid, archive_name[0])) logger.info("Added %s files from %s", result.count(), archive_name) return 0
def test_redis_queue(): # Redis Queue Objects RQ_main = RedisQueue('UPC_ReadyQueue') RQ_main.QueueAdd(("/Path/to/my/file.img", "1", "ARCHIVE")) if int(RQ_main.QueueSize()) > 0: # get a file from the queue item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] fid = item[1] archive = item[2] assert inputfile == "/Path/to/my/file.img" assert fid == "1" assert archive == "ARCHIVE"
def main(user_args): archive = user_args.archive volume = user_args.volume search = user_args.search log_level = user_args.log_level logger = logging.getLogger('Browse_Queueing.' + archive) level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting Process') PDSinfoDICT = json.load(open(pds_info, 'r')) archiveID = PDSinfoDICT[archive]['archiveid'] RQ = RedisQueue('Browse_ReadyQueue') logger.info("Browse Queue: %s", RQ.id_name) try: Session, _ = db_connect(pds_db) session = Session() logger.info('Database Connection Success') except: logger.error('Database Connection Error') return 1 if volume: volstr = '%' + volume + '%' qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.filename.like(volstr), Files.upc_required == 't') else: qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.upc_required == 't') if qOBJ: addcount = 0 for element in qOBJ: fname = PDSinfoDICT[archive]['path'] + element.filename fid = element.fileid RQ.QueueAdd((fname, fid, archive)) addcount = addcount + 1 logger.info('Files Added to UPC Queue: %s', addcount)
def main(): # pdb.set_trace() args = Args() args.parse_args() logger = logging.getLogger('Thumbnail_Queueing.' + args.archive) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting Process') PDSinfoDICT = json.load(open(pds_info, 'r')) archiveID = PDSinfoDICT[args.archive]['archiveid'] RQ = RedisQueue('Thumbnail_ReadyQueue') try: Session, _ = db_connect(pds_db) session = Session() logger.info('Database Connection Success') except: logger.error('Database Connection Error') if args.volume: volstr = '%' + args.volume + '%' qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.filename.like(volstr), Files.upc_required == 't') else: qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.upc_required == 't') if qOBJ: addcount = 0 for element in qOBJ: fname = PDSinfoDICT[args.archive]['path'] + element.filename fid = element.fileid RQ.QueueAdd((fname, fid, args.archive)) addcount = addcount + 1 logger.info('Files Added to Thumbnail Queue: %s', addcount)
class IngestQueueProcess(QueueProcess): def __init__(self, *args, link_only=False, **kwargs): super().__init__(*args, **kwargs) # Default to empty string if falsy value specified self.search = self.search or '' self.link_only = link_only self.link_queue = RedisQueue('LinkQueue', self.namespace) def get_matching_files(self): """ Gets the files matching the processing parameters. Returns ------- results : list A list of files """ archivepath = self.get_archive_att('path') if self.volume: archivepath = join(archivepath, self.volume) results = [] for dirpath, _, files in os.walk(archivepath): for filename in files: fname = join(dirpath, filename) if self.search in filename: results.append(join(dirpath, filename)) return results def enqueue(self, element): """ Enqueue a single element Parameters ---------- element : str The filename to be enqueued Returns ------- None """ if basename(element.lower()) == "voldesc.cat": self.link_queue.QueueAdd((element, self.archive)) if not self.link_only: self.ready_queue.QueueAdd((element, self.archive))
def main(user_args): log_level = user_args.log_level logger = logging.getLogger('DI_Queueing') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) PDS_info = json.load(open(pds_info, 'r')) reddis_queue = RedisQueue('DI_ReadyQueue') logger.info("DI Queue: %s", reddis_queue.id_name) try: Session, _ = db_connect(pds_db) session = Session() except Exception as e: logger.error("%s", e) return 1 for target in PDS_info: archive_id = PDS_info[target]['archiveid'] td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") expired = archive_expired(session, archive_id, testing_date) # If any files within the archive are expired, send them to the queue if expired.count(): for f in expired: reddis_queue.QueueAdd((f.filename, target)) logger.info('Archive %s DI Ready: %s Files', target, str(expired.count())) else: logger.info('Archive %s DI Current', target) return 0
def main(user_args): archive = user_args.archive volume = user_args.volume jobarray = user_args.jobarray log_level = user_args.log_level RQ = RedisQueue('DI_ReadyQueue') PDSinfoDICT = json.load(open(pds_info, 'r')) try: archiveID = PDSinfoDICT[archive]['archiveid'] except KeyError: print("\nArchive '{}' not found in {}\n".format(archive, pds_info)) print("The following archives are available:") for k in PDSinfoDICT.keys(): print("\t{}".format(k)) exit() logger = logging.getLogger('DI_Queueing.' + archive) level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("DI Queue: %s", RQ.id_name) logger.info('Starting %s DI Queueing', archive) if volume: logger.info('Queueing %s Volume', volume) try: Session, _ = db_connect(pds_db) session = Session() logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 if volume: volstr = '%' + volume + '%' vol_exists = session.query(Files).filter( Files.archiveid == archiveID, Files.filename.like(volstr)).first() if not vol_exists: print( f"No files exist in the database for volume \"{volume}\"." " Either the volume does not exist or it has not been properly ingested.\n" ) exit() td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") if volume: volstr = '%' + volume + '%' testQ = session.query(Files).filter( Files.archiveid == archiveID, Files.filename.like(volstr)).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) is None)) else: testQ = session.query(Files).filter( Files.archiveid == archiveID).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) is None)) addcount = 0 for element in testQ: try: RQ.QueueAdd((element.filename, archive)) addcount = addcount + 1 except: logger.warning('File %s Not Added to DI_ReadyQueue', element.filename) logger.info('Files Added to Queue %s', addcount) logger.info('DI Queueing Complete')
def main(user_args): upc_session_maker, upc_engine = db_connect(upc_db) persist = user_args.persist log_level = user_args.log_level namespace = user_args.namespace try: slurm_job_id = os.environ['SLURM_ARRAY_JOB_ID'] slurm_array_id = os.environ['SLURM_ARRAY_TASK_ID'] except: slurm_job_id = '' slurm_array_id = '' inputfile = '' context = {'job_id': slurm_job_id, 'array_id':slurm_array_id, 'inputfile': inputfile} logger = logging.getLogger('UPC_Process') level = logging.getLevelName(log_level) logger.setLevel(level) log_file_handle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(job_id)s - %(array_id)s - %(inputfile)s - %(name)s - %(levelname)s, %(message)s') log_file_handle.setFormatter(formatter) logger.addHandler(log_file_handle) logger = logging.LoggerAdapter(logger, context) # Redis Queue Objects RQ_main = RedisQueue('UPC_UpdateQueue', namespace) RQ_work = RedisQueue('UPC_UpdateWorkQueue', namespace) logger.info("UPC Update Queue: %s", RQ_main.id_name) RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) # If the queue isn't registered, add it and set it to "running" RQ_lock.add({RQ_main.id_name: '1'}) # while there are items in the redis queue while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): # get a file from the queue item = RQ_main.Qfile2Qwork(RQ_main.getQueueName(), RQ_work.getQueueName()) item_list = literal_eval(item) inputfile = item_list[0] archive = item_list[1] failing_command = item_list[2] update_type = item_list[3] upc_id = None if not os.path.isfile(inputfile): RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during UPC update') logger.debug("%s is not a file\n", inputfile) exit() # Build URL for edr_source edr_source = inputfile.replace(workarea, web_base) # Update the logger context to include inputfile context['inputfile'] = inputfile try: session = upc_session_maker() session.close() except TypeError as e: logger.error("Unable to create a database session/connection to the upc database: %s", e) raise e try: if update_type.lower() == 'upc': recipe_file = recipe_base + "/" + archive + '.json' no_extension_inputfile = os.path.splitext(inputfile)[0] cam_info_file = no_extension_inputfile + '_caminfo.pvl' footprint_file = no_extension_inputfile + '_footprint.json' catlab_output = no_extension_inputfile + '_catlab.pvl' with open(recipe_file) as fp: upc_json = json.load(fp)['upc'] # Attempt to get the optional search_term_mapping for the upc # process try: search_term_mapping = upc_json['search_term_mapping'] except KeyError: search_term_mapping = {} # Some datasets with attached PDS labels cause PVL to hang, # so recipe includes call to dump label using `catlab` # If present, use the catlab output as pds_label instead of inputfile if os.path.exists(catlab_output): pds_label = pvl.load(catlab_output) else: pds_label = pvl.load(inputfile) instrument_name = get_instrument_name(pds_label) spacecraft_name = get_spacecraft_name(pds_label) target_name = get_target_name(pds_label) with session_scope(upc_session_maker) as session: target_qobj = Targets.create(session, targetname=target_name, displayname=target_name.title(), system=target_name) target_id = target_qobj.targetid with session_scope(upc_session_maker) as session: instrument_qobj = Instruments.create(session, instrument=instrument_name, spacecraft=spacecraft_name) instrument_id = instrument_qobj.instrumentid ######## Generate DataFiles Record ######## datafile_attributes = create_datafiles_atts(pds_label, edr_source, no_extension_inputfile + '.cub') datafile_attributes['instrumentid'] = instrument_id datafile_attributes['targetid'] = target_id with session_scope(upc_session_maker) as session: datafile_qobj = DataFiles.create(session, **datafile_attributes) upc_id = datafile_qobj.upcid ######## Generate SearchTerms Record ######## search_term_attributes = create_search_terms_atts(cam_info_file, upc_id, no_extension_inputfile + '.cub', footprint_file, search_term_mapping) search_term_attributes['targetid'] = target_id search_term_attributes['instrumentid'] = instrument_id with session_scope(upc_session_maker) as session: SearchTerms.create(session, **search_term_attributes) ######## Generate JsonKeywords Record ######## json_keywords_attributes = create_json_keywords_atts(cam_info_file, upc_id, inputfile, failing_command, logger) with session_scope(upc_session_maker) as session: JsonKeywords.create(session, **json_keywords_attributes) # Derived Processing: # If we don't have a upcid, get the matching ID from the database if not upc_id: with session_scope(upc_session_maker) as session: src = inputfile.replace(workarea, web_base) datafile = session.query(DataFiles).filter(or_(DataFiles.source == src, DataFiles.detached_label == src)).first() if not datafile: RQ_error.QueueAdd(f'No matching upcid was found for {inputfile}, ' 'derived product paths could not be added') logger.warning(f'No matching upcid was found for %s, ' 'derived product paths could not be added', inputfile) upc_id = datafile.upcid final_path = makedir(inputfile) src = os.path.splitext(inputfile)[0] derived_product = os.path.join(final_path, os.path.splitext(os.path.basename(inputfile))[0]) # If derived products exist, copy them to the derived area and add the path to the db try: shutil.move(src + '.browse.jpg', derived_product + '.browse.jpg') shutil.move(src + '.thumbnail.jpg', derived_product + '.thumbnail.jpg') add_url(derived_product, upc_id, upc_session_maker) except FileNotFoundError: RQ_error.QueueAdd(f'Unable to locate or access derived products for {inputfile}') logger.warning(f'Unable to locate or access derived products for %s', inputfile) if not persist: # Remove all files file from the workarea except for the copied # source file file_prefix = os.path.splitext(inputfile)[0] workarea_files = glob(file_prefix + '*') # os.remove(os.path.join(workarea, 'print.prt')) for file in workarea_files: os.remove(file) # Handle SQL specific database errors except SQLAlchemyError as e: logger.error("Database operation failed: %s \nRequeueing (%s, %s)", e, inputfile, archive) RQ_main.QueueAdd((inputfile, archive, failing_command, update_type)) raise e RQ_work.QueueRemove(item) # Disconnect from the engines upc_engine.dispose()
def main(user_args): log_level = user_args.log_level override = user_args.override logger = logging.getLogger('Ingest_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Ingest.log') print("Log File: {}Ingest.log".format(pds_log)) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("Starting Ingest Process") PDSinfoDICT = json.load(open(pds_info, 'r')) RQ_main = RedisQueue('Ingest_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ_main.id_name: '1'}) RQ_work = RedisQueue('Ingest_WorkQueue') try: Session, engine = db_connect(pds_db) session = Session() logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 index = 1 while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] archive = item[1] if not os.path.isfile(inputfile): RQ_error.QueueAdd( f'Unable to locate or access {inputfile} during ingest processing' ) logger.warn("%s is not a file\n", inputfile) continue RQ_work.QueueAdd(inputfile) subfile = inputfile.replace(PDSinfoDICT[archive]['path'], '') # Calculate checksum in chunks of 4096 f_hash = hashlib.md5() with open(inputfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) filechecksum = f_hash.hexdigest() QOBJ = session.query(Files).filter_by(filename=subfile).first() runflag = False if QOBJ is None or filechecksum != QOBJ.checksum: runflag = True if runflag or override: date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") fileURL = inputfile.replace(archive_base, web_base) # If all upc requirements are in 'inputfile,' flag for upc try: upcflag = all(req in inputfile for req in PDSinfoDICT[archive]['upc_reqs']) except KeyError: logger.warn( "No upc_reqs found for %s\nSetting upc eligibility False for all related files.", str(archive)) upcflag = False filesize = os.path.getsize(inputfile) try: ingest_entry = Files() if QOBJ is not None and override: ingest_entry.fileid = QOBJ.fileid ingest_entry.archiveid = PDSinfoDICT[archive]['archiveid'] ingest_entry.filename = subfile ingest_entry.entry_date = date ingest_entry.checksum = filechecksum ingest_entry.upc_required = upcflag ingest_entry.validation_required = True ingest_entry.header_only = False ingest_entry.release_date = date ingest_entry.file_url = fileURL ingest_entry.file_size = filesize ingest_entry.di_pass = True ingest_entry.di_date = date session.merge(ingest_entry) session.flush() RQ_work.QueueRemove(inputfile) index = index + 1 except Exception as e: logger.error("Error During File Insert %s : %s", str(subfile), str(e)) elif not runflag and not override: RQ_work.QueueRemove(inputfile) logger.warn( "Not running ingest: file %s already present" " in database and no override flag supplied", inputfile) if index >= 250: try: session.commit() logger.info("Commit 250 files to Database: Success") index = 1 except Exception as e: session.rollback() logger.warn("Unable to commit to database: %s", str(e)) else: logger.info("No Files Found in Ingest Queue") try: session.commit() logger.info("Commit to Database: Success") except Exception as e: logger.error("Unable to commit to database: %s", str(e)) session.rollback() # Close connection to database session.close() engine.dispose() if RQ_main.QueueSize() == 0 and RQ_work.QueueSize() == 0: logger.info("Process Complete All Queues Empty") elif RQ_main.QueueSize() == 0 and RQ_work.QueueSize() != 0: logger.warning("Process Done Work Queue NOT Empty Contains %s Files", str(RQ_work.QueueSize())) logger.info("Ingest Complete")
def main(user_args): log_level = user_args.log_level logger = logging.getLogger('Browse_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) RQ_main = RedisQueue('Browse_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ_main.id_name: '1'}) PDSinfoDICT = json.load(open(pds_info, 'r')) pds_session_maker, pds_engine = db_connect(pds_db) pds_session = pds_session_maker() upc_session_maker, upc_engine = db_connect(upc_db) upc_session = upc_session_maker() tid = get_tid('fullimageurl', upc_session) while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] fid = item[1] archive = item[2] if os.path.isfile(inputfile): logger.info('Starting Process: %s', inputfile) finalpath = makedir(inputfile) recipeOBJ = Recipe() recipeOBJ.addMissionJson(archive, 'reduced') infile = workarea + os.path.splitext( os.path.basename(inputfile))[0] + '.Binput.cub' outfile = workarea + os.path.splitext( os.path.basename(inputfile))[0] + '.Boutput.cub' status = 'success' for item in recipeOBJ.getProcesses(): if status == 'error': logger.error("Error processing %s", inputfile) break elif status == 'success': processOBJ = Process() processOBJ.ProcessFromRecipe(item, recipeOBJ.getRecipe()) if '2isis' in item: processOBJ.updateParameter('from_', inputfile) processOBJ.updateParameter('to', outfile) elif item == 'spiceinit': processOBJ.updateParameter('from_', infile) elif item == 'cubeatt': label = pvl.load(infile) bands = PDSinfoDICT[archive]['bandorder'] query_bands = label['IsisCube']['BandBin'][ PDSinfoDICT[archive]['bandbinQuery']] # Create a set from the list / single value try: query_band_set = set(query_bands) except: query_band_set = set([query_bands]) # Iterate through 'bands' and grab the first value that is present in the # set defined by 'bandbinquery' -- if not present, default to 1 exband = next( (band for band in bands if band in query_band_set), 1) band_infile = infile + '+' + str(exband) processOBJ.updateParameter('from_', band_infile) processOBJ.updateParameter('to', outfile) elif item == 'ctxevenodd': label = pvl.load(infile) SS = label['IsisCube']['Instrument']['SpatialSumming'] if SS != 1: break else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'reduce': label = pvl.load(infile) Nline = label['IsisCube']['Core']['Dimensions'][ 'Lines'] Nsample = label['IsisCube']['Core']['Dimensions'][ 'Samples'] Nline = int(Nline) Nsample = int(Nsample) Sfactor = scaleFactor(Nline, Nsample, recip_json) processOBJ.updateParameter('lscale', Sfactor) processOBJ.updateParameter('sscale', Sfactor) processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'isis2std': final_outfile = finalpath + '/' + os.path.splitext( os.path.basename(inputfile))[0] + '.browse.jpg' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', final_outfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) try: func(**v) logger.info('Process %s :: Success', k) if os.path.isfile(outfile): if '.cub' in outfile: os.rename(outfile, infile) status = 'success' if '2isis' in item: isisSerial = getISISid(infile) except ProcessError as e: print(e) logger.error('Process %s :: Error', k) status = 'error' if status == 'success': DB_addURL(upc_session, isisSerial, final_outfile, tid) os.remove(infile) logger.info('Browse Process Success: %s', inputfile) AddProcessDB(pds_session, fid, 't') else: RQ_error.QueueAdd( f'Unable to locate or access {inputfile} during browse processing' ) logger.error('File %s Not Found', inputfile) upc_session.close() pds_session.close() upc_engine.dispose() pds_engine.dispose()
def main(user_args): proc = user_args.proc derived = user_args.derived log_level = user_args.log_level namespace = user_args.namespace try: slurm_job_id = os.environ['SLURM_ARRAY_JOB_ID'] slurm_array_id = os.environ['SLURM_ARRAY_TASK_ID'] except: slurm_job_id = '' slurm_array_id = '' inputfile = '' context = {'job_id': slurm_job_id, 'array_id':slurm_array_id, 'inputfile': inputfile} logger = logging.getLogger('UPC_Process') level = logging.getLevelName(log_level) logger.setLevel(level) log_file_handle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(job_id)s - %(array_id)s - %(inputfile)s - %(name)s - %(levelname)s, %(message)s') log_file_handle.setFormatter(formatter) logger.addHandler(log_file_handle) logger = logging.LoggerAdapter(logger, context) # Redis Queue Objects RQ_main = RedisQueue('UPC_ReadyQueue', namespace) RQ_error = RedisQueue(upc_error_queue, namespace) RQ_work = RedisQueue('UPC_WorkQueue', namespace) RQ_update = RedisQueue('UPC_UpdateQueue', namespace) logger.info("UPC Processing Queue: %s", RQ_main.id_name) RQ_lock = RedisLock(lock_obj) # If the queue isn't registered, add it and set it to "running" RQ_lock.add({RQ_main.id_name: '1'}) # while there are items in the redis queue while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): # get a file from the queue item = RQ_main.Qfile2Qwork(RQ_main.getQueueName(), RQ_work.getQueueName()) inputfile = literal_eval(item)[0] archive = literal_eval(item)[1] if not os.path.isfile(inputfile): RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during UPC processing') logger.debug("%s is not a file\n", inputfile) exit() # Update the logger context to include inputfile context['inputfile'] = inputfile recipe_file = recipe_base + "/" + archive + '.json' no_extension_inputfile = os.path.splitext(inputfile)[0] cam_info_file = no_extension_inputfile + '_caminfo.pvl' footprint_file = no_extension_inputfile + '_footprint.json' catlab_output = no_extension_inputfile + '_catlab.pvl' if proc: with open(recipe_file) as fp: upc_json = json.load(fp)['upc'] recipe_string = json.dumps(upc_json['recipe']) processes = generate_processes(inputfile, recipe_string, logger, no_extension_inputfile=no_extension_inputfile, catlab_output=catlab_output, cam_info_file=cam_info_file, footprint_file=footprint_file) failing_command, _ = process(processes, workarea, logger) if derived: if os.path.isfile(inputfile): recipe_file = recipe_base + "/" + archive + '.json' with open(recipe_file) as fp: recipe = json.load(fp, object_pairs_hook=parse_pairs)['reduced'] recipe_string = json.dumps(recipe['recipe']) logger.info('Starting Process: %s', inputfile) work_dir = os.path.dirname(inputfile) derived_product = os.path.join(work_dir, os.path.splitext(os.path.basename(inputfile))[0]) no_extension_inputfile = os.path.splitext(inputfile)[0] processes = generate_processes(inputfile, recipe_string, logger, no_extension_inputfile=no_extension_inputfile, derived_product=derived_product) failing_command, _ = process(processes, workarea, logger) if failing_command: logger.warn(logger.error('%s Processing Error: %s', inputfile, failing_command)) if proc: RQ_update.QueueAdd((inputfile, archive, failing_command, 'upc')) elif derived: RQ_update.QueueAdd((inputfile, archive, failing_command, 'derived')) RQ_work.QueueRemove(item) logger.info("UPC processing exited")
def main(user_args): key = user_args.key norun = user_args.norun namespace = user_args.namespace if namespace is None: namespace = default_namespace # Set up logging logger = logging.getLogger(key) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) RQ_lock = RedisLock(lock_obj) RQ_lock.add({'Services': '1'}) if not RQ_lock.available('Services'): exit() # Connect to database and access 'jobs' table DBQO = PDS_DBquery('JOBS') if key is None: # If no key is specified, grab the first key try: key = DBQO.jobKey() try: key = key.decode('utf-8') except: pass # If the queue is empty, it'll throw a type error. except TypeError: logger.debug('No keys found in clusterjobs database') exit(1) try: # Set the 'queued' column to current time i.e. prep for processing DBQO.setJobsQueued(key) except KeyError as e: logger.error('%s', e) exit(1) logger.info('Starting Process') xmlOBJ = jobXML(DBQO.jobXML4Key(key)) # Make directory if it doesn't exist directory = scratch + key if not os.path.exists(directory): os.makedirs(directory) logger.info('Working Area: %s', directory) # Set up Redis Hash for ground range RedisH = RedisHash(key + '_info') RedisH.RemoveAll() RedisErrorH = RedisHash(key + '_error') RedisErrorH.RemoveAll() RedisH_DICT = {} RedisH_DICT['service'] = xmlOBJ.getProcess() RedisH_DICT['fileformat'] = xmlOBJ.getOutFormat() RedisH_DICT['outbit'] = xmlOBJ.getOutBit() if xmlOBJ.getRangeType() is not None: RedisH_DICT['grtype'] = xmlOBJ.getRangeType() RedisH_DICT['minlat'] = xmlOBJ.getMinLat() RedisH_DICT['maxlat'] = xmlOBJ.getMaxLat() RedisH_DICT['minlon'] = xmlOBJ.getMinLon() RedisH_DICT['maxlon'] = xmlOBJ.getMaxLon() if RedisH.IsInHash('service'): pass else: RedisH.AddHash(RedisH_DICT) if RedisH.IsInHash('service'): logger.info('Redis info Hash: Success') else: logger.error('Redis info Hash Not Found') # End ground range RQ_recipe = RedisQueue(key + '_recipe', namespace) RQ_recipe.RemoveAll() RQ_file = RedisQueue(key + '_FileQueue', namespace) RQ_file.RemoveAll() RQ_WorkQueue = RedisQueue(key + '_WorkQueue', namespace) RQ_WorkQueue.RemoveAll() RQ_loggy = RedisQueue(key + '_loggy', namespace) RQ_loggy.RemoveAll() RQ_zip = RedisQueue(key + '_ZIP', namespace) RQ_zip.RemoveAll() if xmlOBJ.getProcess() == 'POW': fileList = xmlOBJ.getFileListWB() elif xmlOBJ.getProcess() == 'MAP2': fileList = xmlOBJ.getMFileListWB() for List_file in fileList: # Input and output file naming and path stuff if xmlOBJ.getProcess() == 'POW': if xmlOBJ.getInst() == 'THEMIS_IR': Input_file = List_file.replace('odtie1_', 'odtir1_') Input_file = Input_file.replace('xxedr', 'xxrdr') Input_file = Input_file.replace('EDR.QUB', 'RDR.QUB') Input_file = Input_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) elif xmlOBJ.getInst() == 'ISSNA': Input_file = List_file.replace('.IMG', '.LBL') Input_file = Input_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) elif xmlOBJ.getInst() == 'ISSWA': Input_file = List_file.replace('.IMG', '.LBL') Input_file = Input_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) elif xmlOBJ.getInst() == 'SOLID STATE IMAGING SYSTEM': Input_file = List_file.replace('.img', '.lbl') Input_file = Input_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) else: Input_file = List_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) elif xmlOBJ.getProcess() == 'MAP2': Input_file = List_file.replace('file://pds_san', '/pds_san') if '+' in Input_file: tempsplit = Input_file.split('+') tempFile = tempsplit[0] else: tempFile = Input_file label = pvl.load(tempFile) # Output final file naming Tbasename = os.path.splitext(os.path.basename(tempFile))[0] splitBase = Tbasename.split('_') labP = xmlOBJ.getProjection() if labP == 'INPUT': lab_proj = label['IsisCube']['Mapping']['ProjectionName'][0:4] else: lab_proj = labP[0:4] if xmlOBJ.getClat() is None or xmlOBJ.getClon() is None: basefinal = splitBase[0] + splitBase[1] + \ splitBase[2] + '_MAP2_' + lab_proj.upper() else: lab_clat = float(xmlOBJ.getClat()) if lab_clat >= 0: labH = 'N' elif lab_clat < 0: labH = 'S' lab_clon = float(xmlOBJ.getClon()) basefinal = splitBase[0] + splitBase[1] + splitBase[ 2] + '_MAP2_' + str(lab_clat) + labH + str( lab_clon) + '_' + lab_proj.upper() RedisH.MAPname(basefinal) try: RQ_file.QueueAdd(Input_file) logger.info('File %s Added to Redis Queue', Input_file) except Exception as e: logger.warn('File %s NOT Added to Redis Queue', Input_file) print('Redis Queue Error', e) RedisH.FileCount(RQ_file.QueueSize()) logger.info('Count of Files Queue: %s', str(RQ_file.QueueSize())) # Map Template Stuff logger.info('Making Map File') mapOBJ = MakeMap() if xmlOBJ.getProcess() == 'MAP2' and xmlOBJ.getProjection() == 'INPUT': proj = label['IsisCube']['Mapping']['ProjectionName'] mapOBJ.Projection(proj) else: mapOBJ.Projection(xmlOBJ.getProjection()) if xmlOBJ.getClon() is not None: mapOBJ.CLon(float(xmlOBJ.getClon())) if xmlOBJ.getClat() is not None: mapOBJ.CLat(float(xmlOBJ.getClat())) if xmlOBJ.getFirstParallel() is not None: mapOBJ.FirstParallel(float(xmlOBJ.getFirstParallel())) if xmlOBJ.getSecondParallel() is not None: mapOBJ.SecondParallel(float(xmlOBJ.getSecondParallel())) if xmlOBJ.getResolution() is not None: mapOBJ.PixelRes(float(xmlOBJ.getResolution())) if xmlOBJ.getTargetName() is not None: mapOBJ.Target(xmlOBJ.getTargetName()) if xmlOBJ.getERadius() is not None: mapOBJ.ERadius(float(xmlOBJ.getERadius())) if xmlOBJ.getPRadius() is not None: mapOBJ.PRadius(float(xmlOBJ.getPRadius())) if xmlOBJ.getLatType() is not None: mapOBJ.LatType(xmlOBJ.getLatType()) if xmlOBJ.getLonDirection() is not None: mapOBJ.LonDirection(xmlOBJ.getLonDirection()) if xmlOBJ.getLonDomain() is not None: mapOBJ.LonDomain(int(xmlOBJ.getLonDomain())) if xmlOBJ.getProcess() == 'MAP2': if xmlOBJ.getMinLat() is not None: mapOBJ.MinLat(float(xmlOBJ.getMinLat())) if xmlOBJ.getMaxLat() is not None: mapOBJ.MaxLat(float(xmlOBJ.getMaxLat())) if xmlOBJ.getMinLon() is not None: mapOBJ.MinLon(float(xmlOBJ.getMinLon())) if xmlOBJ.getMaxLon() is not None: mapOBJ.MaxLon(float(xmlOBJ.getMaxLon())) mapOBJ.Map2pvl() MAPfile = directory + "/" + key + '.map' mapOBJ.Map2File(MAPfile) try: f = open(MAPfile) f.close logger.info('Map File Creation: Success') except IOError as e: logger.error('Map File %s Not Found', MAPfile) # ** End Map Template Stuff ** logger.info('Building Recipe') recipeOBJ = Recipe() if xmlOBJ.getProcess() == 'POW': recipeOBJ.AddJsonFile(recipe_base + xmlOBJ.getCleanName() + '.json', "pow") elif xmlOBJ.getProcess() == 'MAP2': recipeOBJ.AddJsonFile(recipe_base + "map2_process.json", "map") # Test for stretch and add to recipe # if MAP2 and 8 or 16 bit run stretch to set range if xmlOBJ.getOutBit() == 'input': testBitType = str(label['IsisCube']['Core']['Pixels']['Type']).upper() else: testBitType = xmlOBJ.getOutBit().upper() if xmlOBJ.getProcess() == 'MAP2' and xmlOBJ.STR_Type() is None: if str(label['IsisCube']['Core']['Pixels']['Type']).upper( ) != xmlOBJ.getOutBit().upper() and str( label['IsisCube']['Core']['Pixels']['Type']).upper() != 'REAL': if str(label['IsisCube']['Core']['Pixels'] ['Type']).upper() == 'SIGNEDWORD': strpairs = '0:-32765 0:-32765 100:32765 100:32765' elif str(label['IsisCube']['Core']['Pixels'] ['Type']).upper() == 'UNSIGNEDBYTE': strpairs = '0:1 0:1 100:254 100:254' STRprocessOBJ = Process() STRprocessOBJ.newProcess('stretch') STRprocessOBJ.AddParameter('from_', 'value') STRprocessOBJ.AddParameter('to', 'value') STRprocessOBJ.AddParameter('usepercentages', 'yes') STRprocessOBJ.AddParameter('pairs', strpairs) recipeOBJ.AddProcess(STRprocessOBJ.getProcess()) strType = xmlOBJ.STR_Type() if strType == 'StretchPercent' and xmlOBJ.STR_PercentMin( ) is not None and xmlOBJ.STR_PercentMax( ) is not None and testBitType != 'REAL': if float(xmlOBJ.STR_PercentMin()) != 0 and float( xmlOBJ.STR_PercentMax()) != 100: if testBitType == 'UNSIGNEDBYTE': strpairs = '0:1 ' + xmlOBJ.STR_PercentMin() + ':1 ' + \ xmlOBJ.STR_PercentMax() + ':254 100:254' elif testBitType == 'SIGNEDWORD': strpairs = '0:-32765 ' + xmlOBJ.STR_PercentMin() + ':-32765 ' + \ xmlOBJ.STR_PercentMax() + ':32765 100:32765' STRprocessOBJ = Process() STRprocessOBJ.newProcess('stretch') STRprocessOBJ.AddParameter('from_', 'value') STRprocessOBJ.AddParameter('to', 'value') STRprocessOBJ.AddParameter('usepercentages', 'yes') STRprocessOBJ.AddParameter('pairs', strpairs) recipeOBJ.AddProcess(STRprocessOBJ.getProcess()) elif strType == 'GaussStretch': STRprocessOBJ = Process() STRprocessOBJ.newProcess('gaussstretch') STRprocessOBJ.AddParameter('from_', 'value') STRprocessOBJ.AddParameter('to', 'value') STRprocessOBJ.AddParameter('gsigma', xmlOBJ.STR_GaussSigma()) recipeOBJ.AddProcess(STRprocessOBJ.getProcess()) elif strType == 'HistogramEqualization': STRprocessOBJ = Process() STRprocessOBJ.newProcess('histeq') STRprocessOBJ.AddParameter('from_', 'value') STRprocessOBJ.AddParameter('to', 'value') if xmlOBJ.STR_PercentMin() is None: STRprocessOBJ.AddParameter('minper', '0') else: STRprocessOBJ.AddParameter('minper', xmlOBJ.STR_PercentMin()) if xmlOBJ.STR_PercentMax() is None: STRprocessOBJ.AddParameter('maxper', '100') else: STRprocessOBJ.AddParameter('maxper', xmlOBJ.STR_PercentMax()) recipeOBJ.AddProcess(STRprocessOBJ.getProcess()) elif strType == 'SigmaStretch': STRprocessOBJ = Process() STRprocessOBJ.newProcess('sigmastretch') STRprocessOBJ.AddParameter('from_', 'value') STRprocessOBJ.AddParameter('to', 'value') STRprocessOBJ.AddParameter('variance', xmlOBJ.STR_SigmaVariance()) recipeOBJ.AddProcess(STRprocessOBJ.getProcess()) # Test for output bit type and add to recipe if xmlOBJ.getProcess() == 'POW': if xmlOBJ.getOutBit().upper() == 'UNSIGNEDBYTE' or xmlOBJ.getOutBit( ).upper() == 'SIGNEDWORD': CAprocessOBJ = Process() CAprocessOBJ.newProcess('cubeatt-bit') CAprocessOBJ.AddParameter('from_', 'value') CAprocessOBJ.AddParameter('to', 'value') recipeOBJ.AddProcess(CAprocessOBJ.getProcess()) elif xmlOBJ.getProcess() == 'MAP2': if xmlOBJ.getOutBit().upper() != 'INPUT': if xmlOBJ.getOutBit().upper( ) == 'UNSIGNEDBYTE' or xmlOBJ.getOutBit().upper() == 'SIGNEDWORD': if str(label['IsisCube']['Core']['Pixels'] ['Type']).upper() != xmlOBJ.getOutBit().upper(): CAprocessOBJ = Process() CAprocessOBJ.newProcess('cubeatt-bit') CAprocessOBJ.AddParameter('from_', 'value') CAprocessOBJ.AddParameter('to', 'value') recipeOBJ.AddProcess(CAprocessOBJ.getProcess()) # Add Grid(MAP2) if xmlOBJ.getGridInterval() is not None: GprocessOBJ = Process() GprocessOBJ.newProcess('grid') GprocessOBJ.AddParameter('from_', 'value') GprocessOBJ.AddParameter('to', 'value') GprocessOBJ.AddParameter('latinc', xmlOBJ.getGridInterval()) GprocessOBJ.AddParameter('loninc', xmlOBJ.getGridInterval()) GprocessOBJ.AddParameter('outline', 'yes') GprocessOBJ.AddParameter('boundary', 'yes') GprocessOBJ.AddParameter('linewidth', '3') recipeOBJ.AddProcess(GprocessOBJ.getProcess()) # OUTPUT FORMAT # Test for GDAL and add to recipe Oformat = xmlOBJ.getOutFormat() if Oformat == 'GeoTiff-BigTiff' or Oformat == 'GeoJPEG-2000' or Oformat == 'JPEG' or Oformat == 'PNG': if Oformat == 'GeoJPEG-2000': Oformat = 'JP2KAK' if Oformat == 'GeoTiff-BigTiff': Oformat = 'GTiff' GDALprocessOBJ = Process() GDALprocessOBJ.newProcess('gdal_translate') if xmlOBJ.getOutBit() != 'input': GDALprocessOBJ.AddParameter( '-ot', GDALprocessOBJ.GDAL_OBit(xmlOBJ.getOutBit())) GDALprocessOBJ.AddParameter('-of', Oformat) if Oformat == 'GTiff' or Oformat == 'JP2KAK' or Oformat == 'JPEG': GDALprocessOBJ.AddParameter('-co', GDALprocessOBJ.GDAL_Creation(Oformat)) recipeOBJ.AddProcess(GDALprocessOBJ.getProcess()) # set up pds2isis and add to recipe elif Oformat == 'PDS': pdsProcessOBJ = Process() pdsProcessOBJ.newProcess('isis2pds') pdsProcessOBJ.AddParameter('from_', 'value') pdsProcessOBJ.AddParameter('to', 'value') if xmlOBJ.getOutBit() == 'unsignedbyte': pdsProcessOBJ.AddParameter('bittype', '8bit') elif xmlOBJ.getOutBit() == 'signedword': pdsProcessOBJ.AddParameter('bittype', 's16bit') recipeOBJ.AddProcess(pdsProcessOBJ.getProcess()) for item in recipeOBJ.getProcesses(): processOBJ = Process() processOBJ.ProcessFromRecipe(item, recipeOBJ.getRecipe()) if item == 'cam2map': processOBJ.updateParameter('map', MAPfile) if xmlOBJ.getResolution() is None: processOBJ.updateParameter('pixres', 'CAMERA') else: processOBJ.updateParameter('pixres', 'MAP') if xmlOBJ.getRangeType() is None: processOBJ.updateParameter('defaultrange', 'MINIMIZE') elif xmlOBJ.getRangeType() == 'smart' or xmlOBJ.getRangeType( ) == 'fill': processOBJ.updateParameter('defaultrange', 'CAMERA') processOBJ.AddParameter('trim', 'YES') elif item == 'map2map': processOBJ.updateParameter('map', MAPfile) if xmlOBJ.getResolution() is None: processOBJ.updateParameter('pixres', 'FROM') else: processOBJ.updateParameter('pixres', 'MAP') if xmlOBJ.OutputGeometry() is not None: processOBJ.updateParameter('defaultrange', 'MAP') processOBJ.AddParameter('trim', 'YES') else: processOBJ.updateParameter('defaultrange', 'FROM') processJSON = processOBJ.Process2JSON() try: RQ_recipe.QueueAdd(processJSON) logger.info('Recipe Element Added to Redis: %s : Success', item) except Exception as e: logger.warn('Recipe Element NOT Added to Redis: %s', item) # HPC job stuff logger.info('HPC Cluster job Submission Starting') jobOBJ = HPCjob() jobOBJ.setJobName(key + '_Service') jobOBJ.setStdOut(slurm_log + key + '_%A_%a.out') jobOBJ.setStdError(slurm_log + key + '_%A_%a.err') jobOBJ.setWallClock('24:00:00') jobOBJ.setMemory('24576') jobOBJ.setPartition('pds') JAsize = RQ_file.QueueSize() jobOBJ.setJobArray(JAsize) logger.info('Job Array Size : %s', str(JAsize)) # @TODO replace with source activate <env> #jobOBJ.addPath('/usgs/apps/anaconda/bin') # Whether or not we use the default namespace, this guarantees that the POW/MAP queues will match the namespace # used in the job manager. if xmlOBJ.getProcess() == 'POW': cmd = cmd_dir + "pow_process.py -k {} -n {}".format(key, namespace) elif xmlOBJ.getProcess() == 'MAP2': cmd = cmd_dir + "map_process.py -k {} -n {}".format(key, namespace) logger.info('HPC Command: %s', cmd) jobOBJ.setCommand(cmd) SBfile = directory + '/' + key + '.sbatch' jobOBJ.MakeJobFile(SBfile) try: sb = open(SBfile) sb.close logger.info('SBATCH File Creation: Success') except IOError as e: logger.error('SBATCH File %s Not Found', SBfile) if norun: logger.info('No-run mode, will not submit HPC job.') else: try: jobOBJ.Run() logger.info('Job Submission to HPC: Success') DBQO.setJobsStarted(key) except IOError as e: logger.error('Jobs NOT Submitted to HPC')
def main(key, namespace=None): key = user_args.key namespace = user_args.namespace print(namespace) if namespace is None: namespace = default_namespace workarea = scratch + key + '/' RQ_file = RedisQueue(key + '_FileQueue', namespace) RQ_work = RedisQueue(key + '_WorkQueue', namespace) RQ_zip = RedisQueue(key + '_ZIP', namespace) RQ_loggy = RedisQueue(key + '_loggy', namespace) RQ_final = RedisQueue('FinalQueue', namespace) RQ_recipe = RedisQueue(key + '_recipe', namespace) RHash = RedisHash(key + '_info') RHerror = RedisHash(key + '_error') RQ_lock = RedisLock(lock_obj) RQ_lock.add({'POW': '1'}) if int(RQ_file.QueueSize()) == 0: print("No Files Found in Redis Queue") elif RQ_lock.available('POW'): print(RQ_file.getQueueName()) jobFile = RQ_file.Qfile2Qwork(RQ_file.getQueueName(), RQ_work.getQueueName()) # Setup system logging basename = os.path.splitext(os.path.basename(jobFile))[0] logger = logging.getLogger(key + '.' + basename) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + '/Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting POW Processing') # set up loggy loggyOBJ = Loggy(basename) # File Naming if '+' in jobFile: bandSplit = jobFile.split('+') inputFile = bandSplit[0] else: inputFile = jobFile infile = workarea + \ os.path.splitext(os.path.basename(jobFile))[0] + '.input.cub' outfile = workarea + \ os.path.splitext(os.path.basename(jobFile))[0] + '.output.cub' status = 'success' for element in RQ_recipe.RecipeGet(): if status == 'error': break elif status == 'success': processOBJ = Process() process = processOBJ.JSON2Process(element) if 'gdal_translate' not in processOBJ.getProcessName(): print(processOBJ.getProcessName()) if '2isis' in processOBJ.getProcessName(): processOBJ.updateParameter('from_', inputFile) processOBJ.updateParameter('to', outfile) elif 'cubeatt-band' in processOBJ.getProcessName(): if '+' in jobFile: infileB = infile + '+' + bandSplit[1] processOBJ.updateParameter('from_', infileB) processOBJ.updateParameter('to', outfile) processOBJ.ChangeProcess('cubeatt') else: continue elif 'cubeatt-bit' in processOBJ.getProcessName(): if RHash.OutBit() == 'unsignedbyte': temp_outfile = outfile + '+lsb+tile+attached+unsignedbyte+1:254' elif RHash.OutBit() == 'signedword': temp_outfile = outfile + '+lsb+tile+attached+signedword+-32765:32765' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', temp_outfile) processOBJ.ChangeProcess('cubeatt') elif 'spice' in processOBJ.getProcessName(): processOBJ.updateParameter('from_', infile) elif 'ctxevenodd' in processOBJ.getProcessName(): label = pvl.load(infile) SS = label['IsisCube']['Instrument']['SpatialSumming'] print(SS) if SS != 1: continue else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif 'mocevenodd' in processOBJ.getProcessName(): label = pvl.load(infile) CTS = label['IsisCube']['Instrument'][ 'CrosstrackSumming'] print(CTS) if CTS != 1: continue else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif 'mocnoise50' in processOBJ.getProcessName(): label = pvl.load(infile) CTS = label['IsisCube']['Instrument'][ 'CrosstrackSumming'] if CTS != 1: continue else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif 'cam2map' in processOBJ.getProcessName(): processOBJ.updateParameter('from', infile) processOBJ.updateParameter('to', outfile) if RHash.getGRtype() == 'smart' or RHash.getGRtype( ) == 'fill': subloggyOBJ = SubLoggy('cam2map') camrangeOUT = workarea + basename + '_camrange.txt' isis.camrange(from_=infile, to=camrangeOUT) cam = pvl.load(camrangeOUT) if cam['UniversalGroundRange']['MaximumLatitude'] < float(RHash.getMinLat()) or \ cam['UniversalGroundRange']['MinimumLatitude'] > float(RHash.getMaxLat()) or \ cam['UniversalGroundRange']['MaximumLongitude'] < float(RHash.getMinLon()) or \ cam['UniversalGroundRange']['MinimumLongitude'] > float(RHash.getMaxLon()): status = 'error' eSTR = "Error Ground Range Outside Extent Range" RHerror.addError( os.path.splitext( os.path.basename(jobFile))[0], eSTR) subloggyOBJ.setStatus('ERROR') subloggyOBJ.errorOut(eSTR) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) break elif RHash.getGRtype() == 'smart': if cam['UniversalGroundRange'][ 'MinimumLatitude'] > float( RHash.getMinLat()): minlat = cam['UniversalGroundRange'][ 'MinimumLatitude'] else: minlat = RHash.getMinLat() if cam['UniversalGroundRange'][ 'MaximumLatitude'] < float( RHash.getMaxLat()): maxlat = cam['UniversalGroundRange'][ 'MaximumLatitude'] else: maxlat = RHash.getMaxLat() if cam['UniversalGroundRange'][ 'MinimumLongitude'] > float( RHash.getMinLon()): minlon = cam['UniversalGroundRange'][ 'MinimumLongitude'] else: minlon = RHash.getMinLon() if cam['UniversalGroundRange'][ 'MaximumLongitude'] < float( RHash.getMaxLon()): maxlon = cam['UniversalGroundRange'][ 'MaximumLongitude'] else: maxlon = RHash.getMaxLon() elif RHash.getGRtype() == 'fill': minlat = RHash.getMinLat() maxlat = RHash.getMaxLat() minlon = RHash.getMinLon() maxlon = RHash.getMaxLon() processOBJ.AddParameter('minlat', minlat) processOBJ.AddParameter('maxlat', maxlat) processOBJ.AddParameter('minlon', minlon) processOBJ.AddParameter('maxlon', maxlon) #os.remove(camrangeOUT) elif 'isis2pds' in processOBJ.getProcessName(): finalfile = infile.replace('.input.cub', '_final.img') processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', finalfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) print(processOBJ.getProcess()) for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) subloggyOBJ = SubLoggy(k) try: func(**v) logger.info('Process %s :: Success', k) subloggyOBJ.setStatus('SUCCESS') subloggyOBJ.setCommand(processOBJ.LogCommandline()) subloggyOBJ.setHelpLink(processOBJ.LogHelpLink()) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) if os.path.isfile(outfile): os.rename(outfile, infile) status = 'success' except ProcessError as e: logger.error('Process %s :: Error', k) logger.error(e) status = 'error' eSTR = 'Error Executing ' + k + \ ' Standard Error: ' + str(e) RHerror.addError( os.path.splitext(os.path.basename(jobFile))[0], eSTR) subloggyOBJ.setStatus('ERROR') subloggyOBJ.setCommand(processOBJ.LogCommandline()) subloggyOBJ.setHelpLink(processOBJ.LogHelpLink()) subloggyOBJ.errorOut(eSTR) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) else: GDALcmd = "" for process, v, in processOBJ.getProcess().items(): subloggyOBJ = SubLoggy(process) GDALcmd += process for dict_key, value in v.items(): GDALcmd += ' ' + dict_key + ' ' + value frmt = RHash.Format() if frmt == 'GeoTiff-BigTiff': fileext = 'tif' elif frmt == 'GeoJPEG-2000': fileext = 'jp2' elif frmt == 'JPEG': fileext = 'jpg' elif frmt == 'PNG': fileext = 'png' elif frmt == 'GIF': fileext = 'gif' logGDALcmd = GDALcmd + ' ' + basename + \ '.input.cub ' + basename + '_final.' + fileext finalfile = infile.replace('.input.cub', '_final.' + fileext) GDALcmd += ' ' + infile + ' ' + finalfile print(GDALcmd) result = subprocess.call(GDALcmd, shell=True) if result == 0: logger.info('Process GDAL translate :: Success') status = 'success' subloggyOBJ.setStatus('SUCCESS') subloggyOBJ.setCommand(logGDALcmd) subloggyOBJ.setHelpLink( 'http://www.gdal.org/gdal_translate.html') loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) #os.remove(infile) else: errmsg = 'Error Executing GDAL translate: Error' logger.error(errmsg) status = 'error' RHerror.addError( os.path.splitext(os.path.basename(jobFile))[0], errmsg) subloggyOBJ.setStatus('ERROR') subloggyOBJ.setCommand(logGDALcmd) subloggyOBJ.setHelpLink( 'http://www.gdal.org/gdal_translate.html') subloggyOBJ.errorOut('Process GDAL translate :: Error') loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) if status == 'success': if RHash.Format() == 'ISIS3': finalfile = infile.replace('.input.cub', '_final.cub') shutil.move(infile, finalfile) if RHash.getStatus() != 'ERROR': RHash.Status('SUCCESS') try: RQ_zip.QueueAdd(finalfile) logger.info('File Added to ZIP Queue') except: logger.error('File NOT Added to ZIP Queue') elif status == 'error': RHash.Status('ERROR') if os.path.isfile(infile): #os.remove(infile) pass try: RQ_loggy.QueueAdd(loggyOBJ.Loggy2json()) RQ_work.QueueRemove(jobFile) logger.info('JSON Added to Loggy Queue') except: logger.error('JSON NOT Added to Loggy Queue') if RQ_file.QueueSize() == 0 and RQ_work.QueueSize() == 0: try: RQ_final.QueueAdd(key) logger.info('Key %s Added to Final Queue: Success', key) logger.info( 'Both Queues Empty: filequeue = %s work queue = %s', str(RQ_file.QueueSize()), str(RQ_work.QueueSize())) logger.info('JOB Complete') except: logger.error('Key NOT Added to Final Queue') elif RQ_file.QueueSize() == 0 and RQ_work.QueueSize() != 0: logger.warning( 'Work Queue Not Empty: filequeue = %s work queue = %s', str(RQ_file.QueueSize()), str(RQ_work.QueueSize()))
def main(user_args): key = user_args.key norun = user_args.norun namespace = user_args.namespace if namespace is None: namespace = default_namespace # Set up logging logger = logging.getLogger(key) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) RQ_lock = RedisLock(lock_obj) RQ_lock.add({'Services': '1'}) if not RQ_lock.available('Services'): exit() # Connect to database and access 'jobs' table DBQO = PDS_DBquery('JOBS') if key is None: # If no key is specified, grab the first key try: key = DBQO.jobKey() try: key = key.decode('utf-8') except: pass # If the queue is empty, it'll throw a type error. except TypeError: logger.debug('No keys found in clusterjobs database') exit(1) try: # Set the 'queued' column to current time i.e. prep for processing DBQO.setJobsQueued(key) except KeyError as e: logger.error('%s', e) exit(1) logger.info('Starting Process') xmlOBJ = jobXML(DBQO.jobXML4Key(key)) # Make directory if it doesn't exist directory = os.path.join(workarea, key) if not os.path.exists(directory): os.makedirs(directory) logger.info('Working Area: %s', directory) # Set up Redis Hash for ground range RedisH = RedisHash(key + '_info') RedisH.RemoveAll() RedisErrorH = RedisHash(key + '_error') RedisErrorH.RemoveAll() RedisH_DICT = {} RedisH_DICT['service'] = xmlOBJ.getProcess() RedisH_DICT['fileformat'] = xmlOBJ.getOutFormat() RedisH_DICT['outbit'] = xmlOBJ.getOutBit() if xmlOBJ.getRangeType() is not None: RedisH_DICT['grtype'] = xmlOBJ.getRangeType() RedisH_DICT['minlat'] = xmlOBJ.getMinLat() RedisH_DICT['maxlat'] = xmlOBJ.getMaxLat() RedisH_DICT['minlon'] = xmlOBJ.getMinLon() RedisH_DICT['maxlon'] = xmlOBJ.getMaxLon() if RedisH.IsInHash('service'): pass else: RedisH.AddHash(RedisH_DICT) if RedisH.IsInHash('service'): logger.info('Redis info Hash: Success') else: logger.error('Redis info Hash Not Found') # End ground range RQ_recipe = RedisQueue(key + '_recipe', namespace) RQ_recipe.RemoveAll() RQ_file = RedisQueue(key + '_FileQueue', namespace) RQ_file.RemoveAll() RQ_WorkQueue = RedisQueue(key + '_WorkQueue', namespace) RQ_WorkQueue.RemoveAll() RQ_loggy = RedisQueue(key + '_loggy', namespace) RQ_loggy.RemoveAll() RQ_zip = RedisQueue(key + '_ZIP', namespace) RQ_zip.RemoveAll() if xmlOBJ.getProcess() == 'POW': fileList = xmlOBJ.getFileListWB() elif xmlOBJ.getProcess() == 'MAP2': fileList = xmlOBJ.getMFileListWB() for List_file in fileList: # Input and output file naming and path stuff if xmlOBJ.getProcess() == 'POW': if xmlOBJ.getInst() == 'THEMIS_IR': Input_file = List_file.replace('odtie1_', 'odtir1_') Input_file = Input_file.replace('xxedr', 'xxrdr') Input_file = Input_file.replace('EDR.QUB', 'RDR.QUB') Input_file = Input_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) elif xmlOBJ.getInst() == 'ISSNA': Input_file = List_file.replace('.IMG', '.LBL') Input_file = Input_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) elif xmlOBJ.getInst() == 'ISSWA': Input_file = List_file.replace('.IMG', '.LBL') Input_file = Input_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) elif xmlOBJ.getInst() == 'SOLID STATE IMAGING SYSTEM': Input_file = List_file.replace('.img', '.lbl') Input_file = Input_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) else: Input_file = List_file.replace( 'http://pdsimage.wr.usgs.gov/Missions/', archive_base) elif xmlOBJ.getProcess() == 'MAP2': Input_file = List_file.replace('file://pds_san', '/pds_san') if '+' in Input_file: tempsplit = Input_file.split('+') tempFile = tempsplit[0] else: tempFile = Input_file # Output final file naming Tbasename = os.path.splitext(os.path.basename(tempFile))[0] splitBase = Tbasename.split('_') labP = xmlOBJ.getProjection() isis_label = pvl.load(tempFile) if labP == 'INPUT': lab_proj = isis_label['IsisCube']['Mapping']['ProjectionName'][ 0:4] else: lab_proj = labP[0:4] if xmlOBJ.getClat() is None or xmlOBJ.getClon() is None: basefinal = splitBase[0] + splitBase[1] + \ splitBase[2] + '_MAP2_' + lab_proj.upper() else: lab_clat = float(xmlOBJ.getClat()) if lab_clat >= 0: labH = 'N' elif lab_clat < 0: labH = 'S' lab_clon = float(xmlOBJ.getClon()) basefinal = splitBase[0] + splitBase[1] + splitBase[ 2] + '_MAP2_' + str(lab_clat) + labH + str( lab_clon) + '_' + lab_proj.upper() RedisH.MAPname(basefinal) try: basename = os.path.splitext(os.path.basename(Input_file))[0] RQ_file.QueueAdd(Input_file) logger.info('File %s Added to Redis Queue', Input_file) except Exception as e: logger.warning('File %s NOT Added to Redis Queue', Input_file) print('Redis Queue Error', e) RedisH.FileCount(RQ_file.QueueSize()) logger.info('Count of Files Queue: %s', str(RQ_file.QueueSize())) # Map Template Stuff logger.info('Making Map File') mapOBJ = MakeMap() if xmlOBJ.getProcess() == 'MAP2' and xmlOBJ.getProjection() == 'INPUT': proj = isis_label['IsisCube']['Mapping']['ProjectionName'] mapOBJ.Projection(proj) else: mapOBJ.Projection(xmlOBJ.getProjection()) if xmlOBJ.getClon() is not None: mapOBJ.CLon(float(xmlOBJ.getClon())) if xmlOBJ.getClat() is not None: mapOBJ.CLat(float(xmlOBJ.getClat())) if xmlOBJ.getFirstParallel() is not None: mapOBJ.FirstParallel(float(xmlOBJ.getFirstParallel())) if xmlOBJ.getSecondParallel() is not None: mapOBJ.SecondParallel(float(xmlOBJ.getSecondParallel())) if xmlOBJ.getResolution() is not None: mapOBJ.PixelRes(float(xmlOBJ.getResolution())) if xmlOBJ.getTargetName() is not None: mapOBJ.Target(xmlOBJ.getTargetName()) if xmlOBJ.getERadius() is not None: mapOBJ.ERadius(float(xmlOBJ.getERadius())) if xmlOBJ.getPRadius() is not None: mapOBJ.PRadius(float(xmlOBJ.getPRadius())) if xmlOBJ.getLatType() is not None: mapOBJ.LatType(xmlOBJ.getLatType()) if xmlOBJ.getLonDirection() is not None: mapOBJ.LonDirection(xmlOBJ.getLonDirection()) if xmlOBJ.getLonDomain() is not None: mapOBJ.LonDomain(int(xmlOBJ.getLonDomain())) if xmlOBJ.getProcess() == 'MAP2': if xmlOBJ.getMinLat() is not None: mapOBJ.MinLat(float(xmlOBJ.getMinLat())) if xmlOBJ.getMaxLat() is not None: mapOBJ.MaxLat(float(xmlOBJ.getMaxLat())) if xmlOBJ.getMinLon() is not None: mapOBJ.MinLon(float(xmlOBJ.getMinLon())) if xmlOBJ.getMaxLon() is not None: mapOBJ.MaxLon(float(xmlOBJ.getMaxLon())) mapOBJ.Map2pvl() MAPfile = directory + "/" + key + '.map' mapOBJ.Map2File(MAPfile) try: f = open(MAPfile) f.close logger.info('Map File Creation: Success') except IOError as e: logger.error('Map File %s Not Found', MAPfile) # ** End Map Template Stuff ** logger.info('Building Recipe') if xmlOBJ.getProcess() == 'POW': try: pds_label = pvl.load(Input_file.split('+')[0]) except: # If pvl fails to load a label, chances are that is because the input file is in a pre-PDS format, # in which case generate_pow_process() doesn't need the label anyway. # Catch all exceptions because pvl can throw many different types of errors when attempting to read such files logger.warning('PVL was unable to parse PDS label for %s', Input_file.split('+')[0]) pds_label = None recipeOBJ = generate_pow_recipe(xmlOBJ, pds_label, MAPfile) elif xmlOBJ.getProcess() == 'MAP2': recipeOBJ = generate_map2_recipe(xmlOBJ, isis_label, MAPfile) # OUTPUT FORMAT # Test for GDAL and add to recipe Oformat = xmlOBJ.getOutFormat() if Oformat == 'GeoTiff-BigTiff' or Oformat == 'GeoJPEG-2000' or Oformat == 'JPEG' or Oformat == 'PNG': if Oformat == 'GeoJPEG-2000': Oformat = 'JP2KAK' if Oformat == 'GeoTiff-BigTiff': Oformat = 'GTiff' gdal_translate_dict = {} def GDAL_OBit(ibit): bitDICT = { 'unsignedbyte': 'Byte', 'signedword': 'Int16', 'real': 'Float32' } try: return bitDICT[ibit] except KeyError: raise Exception( f"Unsupported ibit type given {ibit}. " + f"Currently supported bit types are {list(bitDICT.keys())}" ) def GDAL_Creation(format): cDICT = { 'JPEG': 'quality=100', 'JP2KAK': 'quality=100', 'GTiff': 'bigtiff=if_safer' } try: return cDICT[format] except KeyError: raise Exception( f"Unsupported format {format}. " + f"Currently supported bit types are {list(cDICT.keys())}") if xmlOBJ.getOutBit() != 'input': gdal_translate_dict['outputType'] = GDAL_OBit(xmlOBJ.getOutBit()) gdal_translate_dict['format'] = Oformat if Oformat == 'GTiff' or Oformat == 'JP2KAK' or Oformat == 'JPEG': gdal_translate_dict['creationOptions'] = [GDAL_Creation(Oformat)] frmt = xmlOBJ.getOutFormat() if frmt == 'GeoTiff-BigTiff': fileext = 'tif' elif frmt == 'GeoJPEG-2000': fileext = 'jp2' elif frmt == 'JPEG': fileext = 'jpg' elif frmt == 'PNG': fileext = 'png' elif frmt == 'GIF': fileext = 'gif' gdal_translate_dict['src'] = list( recipeOBJ.items())[-1][-1]['to'].split('+')[0] gdal_translate_dict[ 'dest'] = "{{no_extension_inputfile}}_final." + fileext recipeOBJ['gdal_translate'] = gdal_translate_dict # set up pds2isis and add to recipe elif Oformat == 'PDS': isis2pds_dict = {} isis2pds_dict['from_'] = list(recipeOBJ.items())[-1][-1]['to'] isis2pds_dict['to'] = "{{no_extension_inputfile}}_final.img" if xmlOBJ.getOutBit() == 'unsignedbyte': isis2pds_dict['bittype'] = '8bit' elif xmlOBJ.getOutBit() == 'signedword': isis2pds_dict['bittype'] = 's16bit' recipeOBJ['isis.isis2pds'] = isis2pds_dict try: RQ_recipe.QueueAdd(json.dumps(recipeOBJ)) logger.info('Recipe Added to Redis') except Exception as e: logger.warning('Recipe NOT Added to Redis: %s', recipeOBJ) # HPC job stuff logger.info('HPC Cluster job Submission Starting') jobOBJ = HPCjob() jobOBJ.setJobName(key + '_Service') jobOBJ.setStdOut(slurm_log + key + '_%A_%a.out') jobOBJ.setStdError(slurm_log + key + '_%A_%a.err') jobOBJ.setWallClock('24:00:00') jobOBJ.setMemory('24576') jobOBJ.setPartition('pds') JAsize = RQ_file.QueueSize() jobOBJ.setJobArray(JAsize) logger.info('Job Array Size : %s', str(JAsize)) # Whether or not we use the default namespace, this guarantees that the POW/MAP queues will match the namespace # used in the job manager. if xmlOBJ.getProcess() == 'POW': cmd = cmd_dir + "pow_process.py -k {} -n {}".format(key, namespace) elif xmlOBJ.getProcess() == 'MAP2': cmd = cmd_dir + "map_process.py -k {} -n {}".format(key, namespace) logger.info('HPC Command: %s', cmd) jobOBJ.setCommand(cmd) SBfile = directory + '/' + key + '.sbatch' jobOBJ.MakeJobFile(SBfile) try: sb = open(SBfile) sb.close logger.info('SBATCH File Creation: Success') except IOError as e: logger.error('SBATCH File %s Not Found', SBfile) if norun: logger.info('No-run mode, will not submit HPC job.') else: try: jobOBJ.Run() logger.info('Job Submission to HPC: Success') DBQO.setJobsStarted(key) except IOError as e: logger.error('Jobs NOT Submitted to HPC')
def main(user_args): archive = user_args.archive volume = user_args.volume search = user_args.search log_level = user_args.log_level ingest = user_args.ingest RQ_ingest = RedisQueue('Ingest_ReadyQueue') RQ_linking = RedisQueue('LinkQueue') # Set up logging logger = logging.getLogger(archive + '_INGEST') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Ingest.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) print("Log File: {}Ingest.log".format(pds_log)) PDSinfoDICT = json.load(open(pds_info, 'r')) try: archivepath = PDSinfoDICT[archive]['path'][:-1] except KeyError: print("\nArchive '{}' not found in {}\n".format(archive, pds_info)) print("The following archives are available:") for k in PDSinfoDICT.keys(): print("\t{}".format(k)) logging.error("Unable to locate %s", archive) exit() if volume: archivepath = archivepath + '/' + volume logger.info('Starting Ingest for: %s', archivepath) logger.info('Ingest Queue: %s', str(RQ_ingest.id_name)) logger.info('Linking Queue: %s', str(RQ_linking.id_name)) # Possible bug in RQ? Can't add to queue in "if fname == voldesc" queue_size = RQ_ingest.QueueSize() voldescs = [] for dirpath, _, files in os.walk(archivepath): for filename in files: fname = os.path.join(dirpath, filename) if search in fname: try: if os.path.basename(fname).lower() == "voldesc.cat": voldescs.append(fname) if ingest: RQ_ingest.QueueAdd((fname, archive)) except Exception as e: logger.warning('File %s NOT added to Ingest Queue: %s', fname, str(e)) else: continue n_added = RQ_ingest.QueueSize() - queue_size for fpath in voldescs: RQ_linking.QueueAdd((fpath, archive)) logger.info('Files added to Ingest Queue: %s', n_added)
def main(user_args): upc_session_maker, upc_engine = db_connect(upc_db) pds_session_maker, pds_engine = db_connect(pds_db) persist = user_args.persist log_level = user_args.log_level try: slurm_job_id = os.environ['SLURM_ARRAY_JOB_ID'] slurm_array_id = os.environ['SLURM_ARRAY_TASK_ID'] except: slurm_job_id = '' slurm_array_id = '' inputfile = '' context = { 'job_id': slurm_job_id, 'array_id': slurm_array_id, 'inputfile': inputfile } logger = logging.getLogger('UPC_Process') level = logging.getLevelName(log_level) logger.setLevel(level) log_file_handle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(job_id)s - %(array_id)s - %(inputfile)s - %(name)s - %(levelname)s, %(message)s' ) log_file_handle.setFormatter(formatter) logger.addHandler(log_file_handle) logger = logging.LoggerAdapter(logger, context) # ***************** Set up logging ***************** # Redis Queue Objects RQ_main = RedisQueue('UPC_ReadyQueue') logger.info("UPC Processing Queue: %s", RQ_main.id_name) RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) # If the queue isn't registered, add it and set it to "running" RQ_lock.add({RQ_main.id_name: '1'}) # if there are items in the redis queue if int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): # get a file from the queue item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] fid = item[1] archive = item[2] if not os.path.isfile(inputfile): RQ_error.QueueAdd( f'Unable to locate or access {inputfile} during UPC processing' ) logger.debug("%s is not a file\n", inputfile) exit() # Build URL for edr_source based on archive path from PDSinfo.json PDSinfoDICT = json.load(open(pds_info, 'r')) archive_path = PDSinfoDICT[archive]['path'] orig_file = inputfile.replace(workarea, archive_path) edr_source = orig_file.replace(archive_base, web_base) # Update the logger context to include inputfile context['inputfile'] = inputfile recipe_file = recipe_base + "/" + archive + '.json' with open(recipe_file) as fp: upc_json = json.load(fp)['upc'] recipe_string = json.dumps(upc_json['recipe']) # Attempt to get the optional search_term_mapping for the upc # process try: search_term_mapping = upc_json['search_term_mapping'] except KeyError: search_term_mapping = {} processes, infile, caminfoOUT, footprint_file, workarea_pwd = generate_processes( inputfile, recipe_string, logger) failing_command = process(processes, workarea_pwd, logger) pds_label = pvl.load(inputfile) ######## Generate DataFiles Record ######## upc_id = create_datafiles_record(pds_label, edr_source, infile + '.cub', upc_session_maker) ######## Generate SearchTerms Record ######## create_search_terms_record(pds_label, caminfoOUT, upc_id, infile + '.cub', footprint_file, search_term_mapping, upc_session_maker) ######## Generate JsonKeywords Record ######## create_json_keywords_record(caminfoOUT, upc_id, inputfile, failing_command, upc_session_maker, logger) try: pds_session = pds_session_maker() pds_session.flush() except: logger.debug("Unable to flush database connection") AddProcessDB(pds_session, fid, True) pds_session.close() if not persist: # Remove all files file from the workarea except for the copied # source file workarea_files = glob(workarea_pwd + '/*') workarea_files.remove(inputfile) for file in workarea_files: os.remove(file) # Disconnect from the engines pds_engine.dispose() upc_engine.dispose() logger.info("UPC processing exited")
class QueueProcess(): def __init__(self, process_name, archive, volume=None, search=None, log_level='INFO', namespace=None): self.process_name = process_name self.archive = archive self.logger = self.get_logger(log_level) self.archive_info = json.load(open(pds_info, 'r')) try: self.archive_id = self.get_archive_att('archiveid') except KeyError: self.logger.error("Archive %s not found in %s", archive, pds_info) raise self.volume = volume self.search = search self.namespace = namespace self.ready_queue = RedisQueue(f"{process_name}_ReadyQueue", namespace) self.error_queue = RedisQueue(f"{process_name}_ErrorQueue", namespace) self.logger.info("%s queue: %s", process_name, self.ready_queue.id_name) try: pds_session_maker, _ = db_connect(pds_db) self.logger.info('Database Connection Success') except Exception as e: self.logger.error('Database Connection Error\n\n%s', e) raise self.session_maker = pds_session_maker def get_logger(self, log_level): """ Instantiate and return a logger based on process information. Parameters ---------- log_level : str The string descriptor of the log level (critical, error, warning, info, debug) Returns ------- logger : logging.Logger The parameterized logger """ if not hasattr(self, "logger"): logger = logging.getLogger( f"{self.process_name}_Queueing.{self.archive}") level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) self.logger = logger return self.logger def get_matching_files(self): """ Get the files matching the process characteristics. Overridden by child classes.""" raise NotImplementedError() def enqueue(self, element): """ Add an element to the queue. Overridden by child classes.""" raise NotImplementedError() def run(self, elements=None, copy=True): """ Copies and queues a set of elements into the process's queue(s) Parameters ---------- elements : sqlalchemy.orm.query.Query or list The elements to be processed copy : boolean If true, copies files to the work area. If false, skips copying step. Returns ------- None """ source_path = self.get_archive_att('path') if copy and not has_space(elements, source_path, workarea, disk_usage_ratio): self.logger.error( "Unable to copy files: Insufficient disk space in %s.", workarea) raise IOError(f"Insufficient disk space in {workarea}.") addcount = 0 for element in elements: try: try: fname = element.filename except: fname = element fname = join(source_path, fname) if copy: fname = copy_files(fname, archive_base, workarea) self.enqueue(fname) addcount = addcount + 1 except Exception as e: self.error_queue.QueueAdd( f'Unable to copy / queue {fname}: {e}') self.logger.error('Unable to copy / queue %s: %s', fname, e) self.logger.info('Files Added to %s Queue: %s', self.process_name, addcount) def get_archive_att(self, att): """ Get the archive id from this process's archive name Returns ------- archive_id : int The integer value associated with the archive name. """ try: archive_att = self.archive_info[self.archive][att] except KeyError: msg = "\nArchive '{}' not found in {}\n".format( self.archive, pds_info) msg += "The following archives are available:\n" for k in self.archive_info.keys(): msg += "\t{}\n".format(k) print(msg) raise return archive_att
def main(user_args): log_level = user_args.log_level PDSinfoDICT = json.load(open(pds_info, 'r')) # Set up logging logger = logging.getLogger('DI_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting DI Process') try: Session, engine = db_connect(pds_db) session = Session() logger.info('DataBase Connecton: Success') except Exception as e: logger.error('DataBase Connection Error: %s', str(e)) return 1 RQ = RedisQueue('DI_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ.id_name: '1'}) index = 0 logger.info("DI Queue: %s", RQ.id_name) while int(RQ.QueueSize()) > 0 and RQ_lock.available(RQ.id_name): item = literal_eval(RQ.QueueGet()) inputfile = item[0] archive = item[1] logger.debug("%s - %s", inputfile, archive) try: Qelement = session.query(Files).filter( Files.filename == inputfile).one() except Exception as e: logger.warn('Filename query failed for inputfile %s: %s', inputfile, str(e)) continue archive_path = PDSinfoDICT[archive]['path'] cpfile = archive_path + Qelement.filename if os.path.isfile(cpfile): f_hash = hashlib.md5() with open(cpfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) checksum = f_hash.hexdigest() Qelement.di_pass = checksum == Qelement.checksum if !Qelement.di_pass: logger.warn('File %s checksum %s does not match the database entry checksum %s', cpfile, checksum, Qelement.checksum) Qelement.di_date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") session.flush() index = index + 1 if index > 50: session.commit() logger.info('Session Commit for 50 Records: Success') index = 0 else: RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during DI processing') logger.warn('File %s Not Found', cpfile) try: session.commit() logger.info("End Commit DI process to Database: Success") index = 1 except Exception as e: logger.warn("Unable to commit changes to database\n\n%s", e) session.rollback() # Close connection to database session.close() engine.dispose()
def main(): # pdb.set_trace() args = Args() args.parse_args() logger = logging.getLogger('Derived_Queueing.' + args.archive) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting Process') PDSinfoDICT = json.load(open(pds_info, 'r')) archiveID = PDSinfoDICT[args.archive]['archiveid'] RQ = RedisQueue('Derived_ReadyQueue') error_queue = RedisQueue('UPC_ErrorQueue') try: Session, _ = db_connect(pds_db) session = Session() logger.info('Database Connection Success') except: logger.error('Database Connection Error') if args.volume: volstr = '%' + args.volume + '%' qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.filename.like(volstr), Files.upc_required == 't') else: qOBJ = session.query(Files).filter(Files.archiveid == archiveID, Files.upc_required == 't') if args.search: qf = '%' + args.search + '%' qOBJ = qOBJ.filter(Files.filename.like(qf)) if qOBJ: path = PDSinfoDICT[args.archive]['path'] addcount = 0 size = 0 for element in qOBJ: fname = path + element.filename size += getsize(fname) size_free = disk_usage(workarea).free if size >= (disk_usage_ratio * size_free): logger.error("Unable to process %s: size %d exceeds %d", volume, size, (size_free * disk_usage_ratio)) exit() for element in qOBJ: fname = path + element.filename fid = element.fileid try: dest_path = dirname(fname) dest_path = dest_path.replace(archive_base, workarea) pathlib.Path(dest_path).mkdir(parents=True, exist_ok=True) for f in glob.glob(splitext(fname)[0] + r'.*'): if not exists(f'{dest_path}{f}'): copy2(f, dest_path) RQ.QueueAdd((join(dest_path,basename(element.filename)), fid, args.archive)) addcount = addcount + 1 except Exception as e: error_queue.QueueAdd(f'Unable to copy / queue {fname}: {e}') logger.error('Unable to copy / queue %s: %s', fname, e) logger.info('Files Added to Derived Queue: %s', addcount)
def main(user_args): key = user_args.key namespace = user_args.namespace if namespace is None: namespace = default_namespace workarea = scratch + key + '/' RQ_file = RedisQueue(key + '_FileQueue', namespace) RQ_work = RedisQueue(key + '_WorkQueue', namespace) RQ_zip = RedisQueue(key + '_ZIP', namespace) RQ_loggy = RedisQueue(key + '_loggy', namespace) RQ_final = RedisQueue('FinalQueue', namespace) RQ_recipe = RedisQueue(key + '_recipe', namespace) RHash = RedisHash(key + '_info') RHerror = RedisHash(key + '_error') RQ_lock = RedisLock(lock_obj) RQ_lock.add({'MAP': '1'}) if int(RQ_file.QueueSize()) == 0: print("No Files Found in Redis Queue") elif RQ_lock.available('MAP'): jobFile = RQ_file.Qfile2Qwork(RQ_file.getQueueName(), RQ_work.getQueueName()) # Setup system logging basename = os.path.splitext(os.path.basename(jobFile))[0] logger = logging.getLogger(key + '.' + basename) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + '/Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting MAP Processing') loggyOBJ = Loggy(basename) # File Naming infile = workarea + \ os.path.splitext(os.path.basename(jobFile))[0] + '.input.cub' outfile = workarea + \ os.path.splitext(os.path.basename(jobFile))[0] + '.output.cub' # Recipe Stuff status = 'success' for element in RQ_recipe.RecipeGet(): if status == 'error': break elif status == 'success': processOBJ = Process() process = processOBJ.JSON2Process(element) if 'gdal_translate' not in processOBJ.getProcessName(): if 'cubeatt-band' in processOBJ.getProcessName(): if '+' in jobFile: processOBJ.updateParameter('from_', jobFile) processOBJ.updateParameter('to', outfile) processOBJ.ChangeProcess('cubeatt') else: continue elif 'map2map' in processOBJ.getProcessName(): if '+' in jobFile: processOBJ.updateParameter('from_', infile) else: processOBJ.updateParameter('from_', jobFile) processOBJ.updateParameter('to', outfile) elif 'cubeatt-bit' in processOBJ.getProcessName(): if RHash.OutBit() == 'unsignedbyte': temp_outfile = outfile + '+lsb+tile+attached+unsignedbyte+1:254' elif RHash.OutBit() == 'signedword': temp_outfile = outfile + '+lsb+tile+attached+signedword+-32765:32765' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', temp_outfile) processOBJ.ChangeProcess('cubeatt') elif 'isis2pds' in processOBJ.getProcessName(): # finalfile = infile.replace('.input.cub', '_final.img') finalfile = workarea + RHash.getMAPname() + '.img' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', finalfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) print(processOBJ.getProcess()) for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) subloggyOBJ = SubLoggy(k) try: func(**v) logger.info('Process %s :: Success', k) subloggyOBJ.setStatus('SUCCESS') subloggyOBJ.setCommand(processOBJ.LogCommandline()) subloggyOBJ.setHelpLink(processOBJ.LogHelpLink()) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) if os.path.isfile(outfile): os.rename(outfile, infile) status = 'success' except ProcessError as e: logger.error('Process %s :: Error', k) logger.error(e) status = 'error' eSTR = 'Error Executing ' + k + \ ' Standard Error: ' + str(e) RHerror.addError( os.path.splitext(os.path.basename(jobFile))[0], eSTR) subloggyOBJ.setStatus('ERROR') subloggyOBJ.setCommand(processOBJ.LogCommandline()) subloggyOBJ.setHelpLink(processOBJ.LogHelpLink()) subloggyOBJ.errorOut(eSTR) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) RQ_work.QueueRemove(jobFile) break else: GDALcmd = "" for process, v, in processOBJ.getProcess().items(): subloggyOBJ = SubLoggy(process) GDALcmd += process for dict_key, value in v.items(): GDALcmd += ' ' + dict_key + ' ' + value img_format = RHash.Format() if img_format == 'GeoTiff-BigTiff': fileext = 'tif' elif img_format == 'GeoJPEG-2000': fileext = 'jp2' elif img_format == 'JPEG': fileext = 'jpg' elif img_format == 'PNG': fileext = 'png' elif img_format == 'GIF': fileext = 'gif' logGDALcmd = GDALcmd + ' ' + basename + '.input.cub ' + RHash.getMAPname( ) + '.' + fileext finalfile = workarea + RHash.getMAPname() + '.' + fileext GDALcmd += ' ' + infile + ' ' + finalfile print(GDALcmd) try: subprocess.call(GDALcmd, shell=True) logger.info('Process GDAL translate :: Success') status = 'success' subloggyOBJ.setStatus('SUCCESS') subloggyOBJ.setCommand(logGDALcmd) subloggyOBJ.setHelpLink( 'www.gdal.org/gdal_translate.html') loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) os.remove(infile) except OSError as e: logger.error('Process GDAL translate :: Error') logger.error(e) status = 'error' RHerror.addError( os.path.splitext(os.path.basename(jobFile))[0], 'Process GDAL translate :: Error') subloggyOBJ.setStatus('ERROR') subloggyOBJ.setCommand(logGDALcmd) subloggyOBJ.setHelpLink( 'http://www.gdal.org/gdal_translate.html') subloggyOBJ.errorOut(e) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) if status == 'success': if RHash.Format() == 'ISIS3': finalfile = workarea + RHash.getMAPname() + '.cub' shutil.move(infile, finalfile) if RHash.getStatus() != 'ERROR': RHash.Status('SUCCESS') try: RQ_zip.QueueAdd(finalfile) logger.info('File Added to ZIP Queue') except: logger.error('File NOT Added to ZIP Queue') try: RQ_loggy.QueueAdd(loggyOBJ.Loggy2json()) logger.info('JSON Added to Loggy Queue') except: logger.error('JSON NOT Added to Loggy Queue') RQ_work.QueueRemove(jobFile) elif status == 'error': RHash.Status('ERROR') if os.path.isfile(infile): os.remove(infile) if RQ_file.QueueSize() == 0 and RQ_work.QueueSize() == 0: try: RQ_final.QueueAdd(key) logger.info('Key %s Added to Final Queue: Success', key) logger.info('Job Complete') except: logger.error('Key NOT Added to Final Queue') else: logger.warning('Queues Not Empty: filequeue = %s work queue = %s', str(RQ_file.QueueSize()), str(RQ_work.QueueSize()))
def main(user_args): key = user_args.key namespace = user_args.namespace if namespace is None: namespace = default_namespace work_dir = os.path.join(workarea, key) RQ_file = RedisQueue(key + '_FileQueue', namespace) RQ_work = RedisQueue(key + '_WorkQueue', namespace) RQ_zip = RedisQueue(key + '_ZIP', namespace) RQ_loggy = RedisQueue(key + '_loggy', namespace) RQ_final = RedisQueue('FinalQueue', namespace) RQ_recipe = RedisQueue(key + '_recipe', namespace) RQ_error = RedisQueue(upc_error_queue, namespace) RHash = RedisHash(key + '_info') RHerror = RedisHash(key + '_error') RQ_lock = RedisLock(lock_obj) RQ_lock.add({'MAP': '1'}) if int(RQ_file.QueueSize()) > 0 and RQ_lock.available('MAP'): jobFile = RQ_file.Qfile2Qwork(RQ_file.getQueueName(), RQ_work.getQueueName()) # Setup system logging basename = os.path.splitext(os.path.basename(jobFile))[0] logger = logging.getLogger(key + '.' + basename) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + '/Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting MAP Processing') # File Naming infile = os.path.join(work_dir, \ os.path.splitext(os.path.basename(jobFile))[0] + '.input.cub') outfile = os.path.join( work_dir, os.path.splitext(os.path.basename(jobFile))[0] + '.output.cub') # Recipe Stuff status = 'success' recipe_string = RQ_recipe.QueueGet() no_extension_inputfile = os.path.join( work_dir, os.path.splitext(os.path.basename(jobFile))[0]) processes = generate_processes( jobFile, recipe_string, logger, no_extension_inputfile=no_extension_inputfile) failing_command, error = process(processes, work_dir, logger) process_log = generate_log_json(processes, basename, failing_command, error) if failing_command: status = 'error' if status == 'success': final_file_list = [] img_format = RHash.Format() # If final output format is ISIS3 or PDS, will use an ISIS program to create if img_format == 'ISIS3' or img_format == 'PDS': last_output = list(processes.items())[-1][-1]['to'] last_output = last_output.split('+')[0] if img_format == 'ISIS3': finalfile = os.path.join(work_dir, RHash.getMAPname() + '.cub') else: finalfile = os.path.join(work_dir, RHash.getMAPname() + '.img') # Else will use GDAL, so set extension and define possible ancillary files else: if img_format == 'GeoTiff-BigTiff' or img_format == 'GTiff': fileext = 'tif' elif img_format == 'GeoJPEG-2000': fileext = 'jp2' elif img_format == 'JPEG': fileext = 'jpg' elif img_format == 'PNG': fileext = 'png' elif img_format == 'GIF': fileext = 'gif' last_output = list(processes.items())[-1][-1]['dest'] finalfile = os.path.join(work_dir, RHash.getMAPname() + '.' + fileext) # Possible ancillary files last_output_msk = last_output + '.msk' last_output_aux = last_output + '.aux.xml' if os.path.isfile(last_output_msk): finalfile_msk = os.path.join( work_dir, RHash.getMAPname() + '.' + fileext + '.msk') shutil.move(last_output_msk, finalfile_msk) final_file_list.append(finalfile_msk) if os.path.isfile(last_output_aux): finalfile_aux = os.path.join( work_dir, RHash.getMAPname() + '.' + fileext + '.aux.xml') shutil.move(last_output_aux, finalfile_aux) final_file_list.append(finalfile_aux) shutil.move(last_output, finalfile) final_file_list.append(finalfile) if RHash.getStatus() != 'ERROR': RHash.Status('SUCCESS') # Loop over the list of final output files and add them to RQ_zip for item in final_file_list: try: RQ_zip.QueueAdd(item) logger.info('File Added to ZIP Queue') except: logger.error('File NOT Added to ZIP Queue') try: RQ_loggy.QueueAdd(process_log) logger.info('JSON Added to Loggy Queue') except Exception as e: logger.error(f'JSON NOT Added to Loggy Queue with error: {e}') RQ_work.QueueRemove(jobFile) elif status == 'error': RHash.Status('ERROR') logger.error(f'Process {failing_command} :: Error') logger.error(error) error_string = f'Error Executing {failing_command}' \ f'Standard Error: {error}' RHerror.addError(basename, error_string) RQ_error.QueueAdd( f'Process {failing_command} failed for {jobFile}') if os.path.isfile(infile): os.remove(infile) if RQ_file.QueueSize() == 0 and RQ_work.QueueSize() == 0: try: RQ_final.QueueAdd(key) logger.info('Key %s Added to Final Queue: Success', key) logger.info('Job Complete') except: logger.error('Key NOT Added to Final Queue') else: logger.warning('Queues Not Empty: filequeue = %s work queue = %s', str(RQ_file.QueueSize()), str(RQ_work.QueueSize()))