def main(user_args): log_level = user_args.log_level namespace = user_args.namespace if namespace is None: namespace = default_namespace logger = logging.getLogger('final_job_manager') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) #***************Look at Final queue for work************ RQ_final = RedisQueue('FinalQueue', namespace) logger.debug("Reddis Queue: %s", RQ_final.id_name) if int(RQ_final.QueueSize()) == 0: logger.debug('Nothing Found in Final Queue') else: FKey = RQ_final.QueueGet() logger.info('Found %s in Final Queue', str(FKey)) # ** *************** HPC job stuff *********************** logger.info('HPC Cluster job Submission Starting') jobOBJ = HPCjob() jobOBJ.setJobName(FKey + '_Final') jobOBJ.setStdOut(slurm_log + FKey + '_%A_%a.out') jobOBJ.setStdError(slurm_log + FKey + '_%A_%a.err') jobOBJ.setWallClock('24:00:00') jobOBJ.setMemory('8192') jobOBJ.setPartition('pds') cmd = "{}service_final.py -n {} -k {}".format(cmd_dir, namespace, FKey) jobOBJ.setCommand(cmd) logger.info('HPC Command: %s', cmd) #SBfile = '/scratch/pds_services/' + FKey + '/' + FKey + '_final.sbatch' SBfile = os.path.join(workarea, FKey, (FKey + '_final.sbatch')) jobOBJ.MakeJobFile(SBfile) try: sb = open(SBfile) sb.close() logger.info('SBATCH File Creation: Success') except IOError as e: logger.error('SBATCH File %s Not Found', SBfile) try: jobOBJ.Run() logger.info('Job Submission to HPC: Success') except IOError as e: logger.error('Jobs NOT Submitted to HPC\n%s', e)
def test_redis_queue(): # Redis Queue Objects RQ_main = RedisQueue('UPC_ReadyQueue') RQ_main.QueueAdd(("/Path/to/my/file.img", "1", "ARCHIVE")) if int(RQ_main.QueueSize()) > 0: # get a file from the queue item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] fid = item[1] archive = item[2] assert inputfile == "/Path/to/my/file.img" assert fid == "1" assert archive == "ARCHIVE"
def main(user_args): log_level = user_args.log_level RQ = RedisQueue('LinkQueue') logger = logging.getLogger('LINK_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Link.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) while int(RQ.QueueSize()) > 0: # Grab a tuple of values from the redis queue item = literal_eval(RQ.QueueGet()) # Split tuple into two values inputfile = item[0] archive = item[1] json_file_path = recipe_base + archive + '.json' try: with open(json_file_path, 'r') as f: json_dict = json.load(f) except ValueError as e: logging.warn(e) continue link_src_path = json_dict['src'] voldesc = load_pvl(inputfile) dataset_id = voldesc['VOLUME']['DATA_SET_ID'] volume_id = voldesc['VOLUME']['VOLUME_ID'] # if more than one dataset id exists, link each of them if isinstance(dataset_id, (list, tuple, set)): [link(link_src_path, link_dest, volume_id, x) for x in dataset_id] else: # Not container type link(link_src_path, link_dest, volume_id, dataset_id)
def main(user_args): log_level = user_args.log_level PDSinfoDICT = json.load(open(pds_info, 'r')) # Set up logging logger = logging.getLogger('DI_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting DI Process') try: Session, engine = db_connect(pds_db) session = Session() logger.info('DataBase Connecton: Success') except Exception as e: logger.error('DataBase Connection Error: %s', str(e)) return 1 RQ = RedisQueue('DI_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ.id_name: '1'}) index = 0 logger.info("DI Queue: %s", RQ.id_name) while int(RQ.QueueSize()) > 0 and RQ_lock.available(RQ.id_name): item = literal_eval(RQ.QueueGet()) inputfile = item[0] archive = item[1] logger.debug("%s - %s", inputfile, archive) try: Qelement = session.query(Files).filter( Files.filename == inputfile).one() except Exception as e: logger.warn('Filename query failed for inputfile %s: %s', inputfile, str(e)) continue archive_path = PDSinfoDICT[archive]['path'] cpfile = archive_path + Qelement.filename if os.path.isfile(cpfile): f_hash = hashlib.md5() with open(cpfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) checksum = f_hash.hexdigest() Qelement.di_pass = checksum == Qelement.checksum if !Qelement.di_pass: logger.warn('File %s checksum %s does not match the database entry checksum %s', cpfile, checksum, Qelement.checksum) Qelement.di_date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") session.flush() index = index + 1 if index > 50: session.commit() logger.info('Session Commit for 50 Records: Success') index = 0 else: RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during DI processing') logger.warn('File %s Not Found', cpfile) try: session.commit() logger.info("End Commit DI process to Database: Success") index = 1 except Exception as e: logger.warn("Unable to commit changes to database\n\n%s", e) session.rollback() # Close connection to database session.close() engine.dispose()
def main(user_args): log_level = user_args.log_level override = user_args.override logger = logging.getLogger('Ingest_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Ingest.log') print("Log File: {}Ingest.log".format(pds_log)) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("Starting Ingest Process") PDSinfoDICT = json.load(open(pds_info, 'r')) RQ_main = RedisQueue('Ingest_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ_main.id_name: '1'}) RQ_work = RedisQueue('Ingest_WorkQueue') try: Session, engine = db_connect(pds_db) session = Session() logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 index = 1 while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] archive = item[1] if not os.path.isfile(inputfile): RQ_error.QueueAdd( f'Unable to locate or access {inputfile} during ingest processing' ) logger.warn("%s is not a file\n", inputfile) continue RQ_work.QueueAdd(inputfile) subfile = inputfile.replace(PDSinfoDICT[archive]['path'], '') # Calculate checksum in chunks of 4096 f_hash = hashlib.md5() with open(inputfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) filechecksum = f_hash.hexdigest() QOBJ = session.query(Files).filter_by(filename=subfile).first() runflag = False if QOBJ is None or filechecksum != QOBJ.checksum: runflag = True if runflag or override: date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") fileURL = inputfile.replace(archive_base, web_base) # If all upc requirements are in 'inputfile,' flag for upc try: upcflag = all(req in inputfile for req in PDSinfoDICT[archive]['upc_reqs']) except KeyError: logger.warn( "No upc_reqs found for %s\nSetting upc eligibility False for all related files.", str(archive)) upcflag = False filesize = os.path.getsize(inputfile) try: ingest_entry = Files() if QOBJ is not None and override: ingest_entry.fileid = QOBJ.fileid ingest_entry.archiveid = PDSinfoDICT[archive]['archiveid'] ingest_entry.filename = subfile ingest_entry.entry_date = date ingest_entry.checksum = filechecksum ingest_entry.upc_required = upcflag ingest_entry.validation_required = True ingest_entry.header_only = False ingest_entry.release_date = date ingest_entry.file_url = fileURL ingest_entry.file_size = filesize ingest_entry.di_pass = True ingest_entry.di_date = date session.merge(ingest_entry) session.flush() RQ_work.QueueRemove(inputfile) index = index + 1 except Exception as e: logger.error("Error During File Insert %s : %s", str(subfile), str(e)) elif not runflag and not override: RQ_work.QueueRemove(inputfile) logger.warn( "Not running ingest: file %s already present" " in database and no override flag supplied", inputfile) if index >= 250: try: session.commit() logger.info("Commit 250 files to Database: Success") index = 1 except Exception as e: session.rollback() logger.warn("Unable to commit to database: %s", str(e)) else: logger.info("No Files Found in Ingest Queue") try: session.commit() logger.info("Commit to Database: Success") except Exception as e: logger.error("Unable to commit to database: %s", str(e)) session.rollback() # Close connection to database session.close() engine.dispose() if RQ_main.QueueSize() == 0 and RQ_work.QueueSize() == 0: logger.info("Process Complete All Queues Empty") elif RQ_main.QueueSize() == 0 and RQ_work.QueueSize() != 0: logger.warning("Process Done Work Queue NOT Empty Contains %s Files", str(RQ_work.QueueSize())) logger.info("Ingest Complete")
def main(user_args): log_level = user_args.log_level logger = logging.getLogger('Browse_Process') level = logging.getLevelName(log_level) logger.setLevel(level) logFileHandle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) RQ_main = RedisQueue('Browse_ReadyQueue') RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) RQ_lock.add({RQ_main.id_name: '1'}) PDSinfoDICT = json.load(open(pds_info, 'r')) pds_session_maker, pds_engine = db_connect(pds_db) pds_session = pds_session_maker() upc_session_maker, upc_engine = db_connect(upc_db) upc_session = upc_session_maker() tid = get_tid('fullimageurl', upc_session) while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] fid = item[1] archive = item[2] if os.path.isfile(inputfile): logger.info('Starting Process: %s', inputfile) finalpath = makedir(inputfile) recipeOBJ = Recipe() recipeOBJ.addMissionJson(archive, 'reduced') infile = workarea + os.path.splitext( os.path.basename(inputfile))[0] + '.Binput.cub' outfile = workarea + os.path.splitext( os.path.basename(inputfile))[0] + '.Boutput.cub' status = 'success' for item in recipeOBJ.getProcesses(): if status == 'error': logger.error("Error processing %s", inputfile) break elif status == 'success': processOBJ = Process() processOBJ.ProcessFromRecipe(item, recipeOBJ.getRecipe()) if '2isis' in item: processOBJ.updateParameter('from_', inputfile) processOBJ.updateParameter('to', outfile) elif item == 'spiceinit': processOBJ.updateParameter('from_', infile) elif item == 'cubeatt': label = pvl.load(infile) bands = PDSinfoDICT[archive]['bandorder'] query_bands = label['IsisCube']['BandBin'][ PDSinfoDICT[archive]['bandbinQuery']] # Create a set from the list / single value try: query_band_set = set(query_bands) except: query_band_set = set([query_bands]) # Iterate through 'bands' and grab the first value that is present in the # set defined by 'bandbinquery' -- if not present, default to 1 exband = next( (band for band in bands if band in query_band_set), 1) band_infile = infile + '+' + str(exband) processOBJ.updateParameter('from_', band_infile) processOBJ.updateParameter('to', outfile) elif item == 'ctxevenodd': label = pvl.load(infile) SS = label['IsisCube']['Instrument']['SpatialSumming'] if SS != 1: break else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'reduce': label = pvl.load(infile) Nline = label['IsisCube']['Core']['Dimensions'][ 'Lines'] Nsample = label['IsisCube']['Core']['Dimensions'][ 'Samples'] Nline = int(Nline) Nsample = int(Nsample) Sfactor = scaleFactor(Nline, Nsample, recip_json) processOBJ.updateParameter('lscale', Sfactor) processOBJ.updateParameter('sscale', Sfactor) processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'isis2std': final_outfile = finalpath + '/' + os.path.splitext( os.path.basename(inputfile))[0] + '.browse.jpg' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', final_outfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) try: func(**v) logger.info('Process %s :: Success', k) if os.path.isfile(outfile): if '.cub' in outfile: os.rename(outfile, infile) status = 'success' if '2isis' in item: isisSerial = getISISid(infile) except ProcessError as e: print(e) logger.error('Process %s :: Error', k) status = 'error' if status == 'success': DB_addURL(upc_session, isisSerial, final_outfile, tid) os.remove(infile) logger.info('Browse Process Success: %s', inputfile) AddProcessDB(pds_session, fid, 't') else: RQ_error.QueueAdd( f'Unable to locate or access {inputfile} during browse processing' ) logger.error('File %s Not Found', inputfile) upc_session.close() pds_session.close() upc_engine.dispose() pds_engine.dispose()
def main(user_args): upc_session_maker, upc_engine = db_connect(upc_db) pds_session_maker, pds_engine = db_connect(pds_db) persist = user_args.persist log_level = user_args.log_level try: slurm_job_id = os.environ['SLURM_ARRAY_JOB_ID'] slurm_array_id = os.environ['SLURM_ARRAY_TASK_ID'] except: slurm_job_id = '' slurm_array_id = '' inputfile = '' context = { 'job_id': slurm_job_id, 'array_id': slurm_array_id, 'inputfile': inputfile } logger = logging.getLogger('UPC_Process') level = logging.getLevelName(log_level) logger.setLevel(level) log_file_handle = logging.FileHandler(pds_log + 'Process.log') formatter = logging.Formatter( '%(asctime)s - %(job_id)s - %(array_id)s - %(inputfile)s - %(name)s - %(levelname)s, %(message)s' ) log_file_handle.setFormatter(formatter) logger.addHandler(log_file_handle) logger = logging.LoggerAdapter(logger, context) # ***************** Set up logging ***************** # Redis Queue Objects RQ_main = RedisQueue('UPC_ReadyQueue') logger.info("UPC Processing Queue: %s", RQ_main.id_name) RQ_error = RedisQueue(upc_error_queue) RQ_lock = RedisLock(lock_obj) # If the queue isn't registered, add it and set it to "running" RQ_lock.add({RQ_main.id_name: '1'}) # if there are items in the redis queue if int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name): # get a file from the queue item = literal_eval(RQ_main.QueueGet()) inputfile = item[0] fid = item[1] archive = item[2] if not os.path.isfile(inputfile): RQ_error.QueueAdd( f'Unable to locate or access {inputfile} during UPC processing' ) logger.debug("%s is not a file\n", inputfile) exit() # Build URL for edr_source based on archive path from PDSinfo.json PDSinfoDICT = json.load(open(pds_info, 'r')) archive_path = PDSinfoDICT[archive]['path'] orig_file = inputfile.replace(workarea, archive_path) edr_source = orig_file.replace(archive_base, web_base) # Update the logger context to include inputfile context['inputfile'] = inputfile recipe_file = recipe_base + "/" + archive + '.json' with open(recipe_file) as fp: upc_json = json.load(fp)['upc'] recipe_string = json.dumps(upc_json['recipe']) # Attempt to get the optional search_term_mapping for the upc # process try: search_term_mapping = upc_json['search_term_mapping'] except KeyError: search_term_mapping = {} processes, infile, caminfoOUT, footprint_file, workarea_pwd = generate_processes( inputfile, recipe_string, logger) failing_command = process(processes, workarea_pwd, logger) pds_label = pvl.load(inputfile) ######## Generate DataFiles Record ######## upc_id = create_datafiles_record(pds_label, edr_source, infile + '.cub', upc_session_maker) ######## Generate SearchTerms Record ######## create_search_terms_record(pds_label, caminfoOUT, upc_id, infile + '.cub', footprint_file, search_term_mapping, upc_session_maker) ######## Generate JsonKeywords Record ######## create_json_keywords_record(caminfoOUT, upc_id, inputfile, failing_command, upc_session_maker, logger) try: pds_session = pds_session_maker() pds_session.flush() except: logger.debug("Unable to flush database connection") AddProcessDB(pds_session, fid, True) pds_session.close() if not persist: # Remove all files file from the workarea except for the copied # source file workarea_files = glob(workarea_pwd + '/*') workarea_files.remove(inputfile) for file in workarea_files: os.remove(file) # Disconnect from the engines pds_engine.dispose() upc_engine.dispose() logger.info("UPC processing exited")
def main(user_args): key = user_args.key namespace = user_args.namespace if namespace is None: namespace = default_namespace work_dir = os.path.join(workarea, key) RQ_file = RedisQueue(key + '_FileQueue', namespace) RQ_work = RedisQueue(key + '_WorkQueue', namespace) RQ_zip = RedisQueue(key + '_ZIP', namespace) RQ_loggy = RedisQueue(key + '_loggy', namespace) RQ_final = RedisQueue('FinalQueue', namespace) RQ_recipe = RedisQueue(key + '_recipe', namespace) RQ_error = RedisQueue(upc_error_queue, namespace) RHash = RedisHash(key + '_info') RHerror = RedisHash(key + '_error') RQ_lock = RedisLock(lock_obj) RQ_lock.add({'MAP': '1'}) if int(RQ_file.QueueSize()) > 0 and RQ_lock.available('MAP'): jobFile = RQ_file.Qfile2Qwork(RQ_file.getQueueName(), RQ_work.getQueueName()) # Setup system logging basename = os.path.splitext(os.path.basename(jobFile))[0] logger = logging.getLogger(key + '.' + basename) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + '/Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting MAP Processing') # File Naming infile = os.path.join(work_dir, \ os.path.splitext(os.path.basename(jobFile))[0] + '.input.cub') outfile = os.path.join( work_dir, os.path.splitext(os.path.basename(jobFile))[0] + '.output.cub') # Recipe Stuff status = 'success' recipe_string = RQ_recipe.QueueGet() no_extension_inputfile = os.path.join( work_dir, os.path.splitext(os.path.basename(jobFile))[0]) processes = generate_processes( jobFile, recipe_string, logger, no_extension_inputfile=no_extension_inputfile) failing_command, error = process(processes, work_dir, logger) process_log = generate_log_json(processes, basename, failing_command, error) if failing_command: status = 'error' if status == 'success': final_file_list = [] img_format = RHash.Format() # If final output format is ISIS3 or PDS, will use an ISIS program to create if img_format == 'ISIS3' or img_format == 'PDS': last_output = list(processes.items())[-1][-1]['to'] last_output = last_output.split('+')[0] if img_format == 'ISIS3': finalfile = os.path.join(work_dir, RHash.getMAPname() + '.cub') else: finalfile = os.path.join(work_dir, RHash.getMAPname() + '.img') # Else will use GDAL, so set extension and define possible ancillary files else: if img_format == 'GeoTiff-BigTiff' or img_format == 'GTiff': fileext = 'tif' elif img_format == 'GeoJPEG-2000': fileext = 'jp2' elif img_format == 'JPEG': fileext = 'jpg' elif img_format == 'PNG': fileext = 'png' elif img_format == 'GIF': fileext = 'gif' last_output = list(processes.items())[-1][-1]['dest'] finalfile = os.path.join(work_dir, RHash.getMAPname() + '.' + fileext) # Possible ancillary files last_output_msk = last_output + '.msk' last_output_aux = last_output + '.aux.xml' if os.path.isfile(last_output_msk): finalfile_msk = os.path.join( work_dir, RHash.getMAPname() + '.' + fileext + '.msk') shutil.move(last_output_msk, finalfile_msk) final_file_list.append(finalfile_msk) if os.path.isfile(last_output_aux): finalfile_aux = os.path.join( work_dir, RHash.getMAPname() + '.' + fileext + '.aux.xml') shutil.move(last_output_aux, finalfile_aux) final_file_list.append(finalfile_aux) shutil.move(last_output, finalfile) final_file_list.append(finalfile) if RHash.getStatus() != 'ERROR': RHash.Status('SUCCESS') # Loop over the list of final output files and add them to RQ_zip for item in final_file_list: try: RQ_zip.QueueAdd(item) logger.info('File Added to ZIP Queue') except: logger.error('File NOT Added to ZIP Queue') try: RQ_loggy.QueueAdd(process_log) logger.info('JSON Added to Loggy Queue') except Exception as e: logger.error(f'JSON NOT Added to Loggy Queue with error: {e}') RQ_work.QueueRemove(jobFile) elif status == 'error': RHash.Status('ERROR') logger.error(f'Process {failing_command} :: Error') logger.error(error) error_string = f'Error Executing {failing_command}' \ f'Standard Error: {error}' RHerror.addError(basename, error_string) RQ_error.QueueAdd( f'Process {failing_command} failed for {jobFile}') if os.path.isfile(infile): os.remove(infile) if RQ_file.QueueSize() == 0 and RQ_work.QueueSize() == 0: try: RQ_final.QueueAdd(key) logger.info('Key %s Added to Final Queue: Success', key) logger.info('Job Complete') except: logger.error('Key NOT Added to Final Queue') else: logger.warning('Queues Not Empty: filequeue = %s work queue = %s', str(RQ_file.QueueSize()), str(RQ_work.QueueSize()))