def main(): # pdb.set_trace() #***************** Setup Logging ************** logger = logging.getLogger('FinalJobber') logger.setLevel(logging.INFO) #logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/Service.log') logFileHandle = logging.FileHandler(pds_log + 'Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) #***************Look at Final queue for work************ RQ_final = RedisQueue('FinalQueue') if int(RQ_final.QueueSize()) == 0: # logger.info('NO Keys Found in FinalQueue') print 'Nothing Found in Final Queue' else: FKey = RQ_final.QueueGet() logger.info('Found %s in Final Queue', FKey) # ** *************** HPC job stuff *********************** logger.info('HPC Cluster job Submission Starting') jobOBJ = HPCjob() jobOBJ.setJobName(FKey + '_Final') #jobOBJ.setStdOut('/usgs/cdev/PDS/output/' + FKey + '_%A_%a.out') #jobOBJ.setStdError('/usgs/cdev/PDS/output/' + FKey + '_%A_%a.err') jobOBJ.setStdOut(slurm_log + FKey + '_%A_%a.out') jobOBJ.setStdError(slurm_log + FKey + '_%A_%a.err') jobOBJ.setWallClock('24:00:00') jobOBJ.setMemory('8192') jobOBJ.setPartition('pds') cmd = cmd_dir + 'ServiceFinal.py ' + FKey jobOBJ.setCommand(cmd) logger.info('HPC Command: %s', cmd) #SBfile = '/scratch/pds_services/' + FKey + '/' + FKey + '_final.sbatch' SBfile = scratch + FKey + '/' + FKey + '_final.sbatch' jobOBJ.MakeJobFile(SBfile) try: sb = open(SBfile) sb.close logger.info('SBATCH File Creation: Success') except IOError as e: logger.error('SBATCH File %s Not Found', SBfile) try: jobOBJ.Run() logger.info('Job Submission to HPC: Success') except IOError as e: logger.error('Jobs NOT Submitted to HPC')
def main(): # pdb.set_trace() archiveID = { 16: '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/CTX/', 74: '/pds_san/PDS_Archive/Lunar_Reconnaissance_Orbiter/LROC/EDR/', 124: '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/HiRISE/', 101: '/pds_san/PDS_Archive/Apollo/Rock_Sample_Images/' } # ********* Set up logging ************* logger = logging.getLogger('DI_Process') logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting DI Process') try: # Throws away engine information session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 RQ = RedisQueue('ChecksumUpdate_Queue') index = 0 count = 0 while int(RQ.QueueSize()) > 0: inputfile = RQ.QueueGet() Qelement = session.query(Files).filter( Files.filename == inputfile).one() cpfile = archiveID[Qelement.archiveid] + Qelement.filename if os.path.isfile(cpfile): """ CScmd = 'md5sum ' + cpfile process = subprocess.Popen( CScmd, stdout=subprocess.PIPE, shell=True) (stdout, stderr) = process.communicate() temp_checksum = stdout.split()[0] # temp_checksum = hashlib.md5(open(tempfile, 'rb').read()).hexdigest() # os.remove(tempfile) """ # Calculate checksum in chunks of 4096 f_hash = hashlib.md5() with open(cpfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) checksum = f_hash.hexdigest() if checksum != Qelement.checksum: Qelement.checksum = checksum Qelement.di_pass = '******' Qelement.di_date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") session.flush() index = index + 1 count = count + 1 logger.info('Update Checksum %s: Success', inputfile) if count > 25: session.commit() logger.info('Session Commit for 25 Records: Success') count = 0 else: logger.error('File %s Not Found', cpfile) try: session.commit() except: session.rollback() logger.error('Error durint commit') logger.info("End Commit DI process to Database: Success") logger.info('Checksum for %s Files Updated', str(index))
def main(): #pdb.set_trace() args = Args() args.parse_args() RQ = RedisQueue('DI_ReadyQueue') PDSinfoDICT = json.load(open(pds_info, 'r')) try: archiveID = PDSinfoDICT[args.archive]['archiveid'] except KeyError: print("\nArchive '{}' not found in {}\n".format( args.archive, pds_info)) print("The following archives are available:") for k in PDSinfoDICT.keys(): print("\t{}".format(k)) exit() # ********* Set up logging ************* logger = logging.getLogger('DI_Queueing.' + args.archive) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'DI.log') #logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting %s DI Queueing', args.archive) if args.volume: logger.info('Queueing %s Volume', args.volume) try: # @TODO switch back to prd session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') # ************* date stuff *************** td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") if args.volume: volstr = '%' + args.volume + '%' testcount = session.query(Files).filter( Files.archiveid == archiveID, Files.filename.like(volstr)).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) == None)).count() # logger.info('Query Count %s', testcount) testQ = session.query(Files).filter( Files.archiveid == archiveID, Files.filename.like(volstr)).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) == None)) else: testcount = session.query(Files).filter( Files.archiveid == archiveID).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) == None)).count() testQ = session.query(Files).filter( Files.archiveid == archiveID).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) == None)) addcount = 0 for element in testQ: try: RQ.QueueAdd((element.filename, args.archive)) addcount = addcount + 1 except: logger.error('File %s Not Added to DI_ReadyQueue', element.filename) logger.info('Files Added to Queue %s', addcount) logger.info('DI Queueing Complete')
def main(): # pdb.set_trace() # ***************** Set up logging ***************** logger = logging.getLogger('ProjectionBrowse_Process') logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/Process.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) RQ_main = RedisQueue('PilotB_ReadyQueue') while int(RQ_main.QueueSize()) > 0: inputfile = RQ_main.QueueGet() if os.path.isfile(inputfile): logger.info('Starting Process: %s', inputfile) if 'Mars_Reconnaissance_Orbiter/CTX/' in inputfile: mission = 'CTX' # ********** Derived DIR path Stuff ********************** finalpath = makedir(inputfile) recipeOBJ = Recipe() recipe_json = recipeOBJ.getRecipeJSON(mission) recipeOBJ.AddJsonFile(recipe_json, 'reduced') infile = workarea + \ os.path.splitext(os.path.basename(inputfile))[ 0] + '.Pinput.cub' outfile = workarea + \ os.path.splitext(os.path.basename(inputfile))[ 0] + '.Poutput.cub' status = 'success' for item in recipeOBJ.getProcesses(): if status == 'error': break elif status == 'success': processOBJ = Process() processR = processOBJ.ProcessFromRecipe( item, recipeOBJ.getRecipe()) if '2isis' in item: processOBJ.updateParameter('from_', inputfile) processOBJ.updateParameter('to', outfile) elif item == 'spiceinit': processOBJ.updateParameter('from_', infile) elif item == 'ctxevenodd': label = pvl.load(infile) SS = label['IsisCube']['Instrument']['SpatialSumming'] if SS != 1: break else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'reduce': label = pvl.load(infile) Nline = label['IsisCube']['Core']['Dimensions']['Lines'] Nsample = label['IsisCube']['Core']['Dimensions']['Samples'] Sfactor = scaleFactor(Nline, Nsample) processOBJ.updateParameter('lscale', Sfactor) processOBJ.updateParameter('sscale', Sfactor) processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) elif item == 'isis2std': outfile = finalpath + '/' + \ os.path.splitext(os.path.basename(inputfile))[ 0] + '.projectionbrowse.png' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) print processOBJ.getProcess() for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) try: func(**v) logger.info('Process %s :: Success', k) if os.path.isfile(outfile): if '.cub' in outfile: os.rename(outfile, infile) status = 'success' except ProcessError as e: logger.error('Process %s :: Error', k) status = 'error' if status == 'success': os.remove(infile) logger.info('Browse Process Success: %s', inputfile) else: logger.error('File %s Not Found', inputfile)
def main(): # pdb.set_trace() FKey = sys.argv[-1] # FKey = "0f9ce6e5d6c9f241a3e4c2704d9e2c83" #***************** Setup Logging ************** logger = logging.getLogger('ServiceFinal.' + FKey) logger.setLevel(logging.INFO) #logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/Service.log') logFileHandle = logging.FileHandler(pds_log + 'Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting Final Process') #************Set up REDIS Queues **************** zipQueue = RedisQueue(FKey + '_ZIP') loggyQueue = RedisQueue(FKey + '_loggy') infoHash = RedisHash(FKey + '_info') recipeQueue = RedisQueue(FKey + '_recipe') errorHash = RedisHash(FKey + '_error') DBQO = PDS_DBquery('JOBS') if errorHash.HashCount() > 0: root = ET.Element('errors') test = errorHash.getKeys() for key in test: sub = ET.Element('error') root.append(sub) field1 = ET.SubElement(sub, 'file') field1.text = key Eval = errorHash.getError(key) field2 = ET.SubElement(sub, 'message') field2.text = Eval tree = ET.ElementTree(root) # testfile = 'test.xml' # with open(testfile, "w") as fh: fh = BytesIO() tree.write(fh, encoding='utf-8', xml_declaration=True) testval = DBQO.addErrors(FKey, fh.getvalue()) if testval == 'Success': logger.info('Error XML add to JOBS DB') elif testval == 'Error': logger.error('Addin Error XML to JOBS DB: Error') print(fh.getvalue()) #Fdir = '/pds_san/PDS_Services/' + infoHash.Service() + '/' + FKey Fdir = pow_map2_base + infoHash.Service() + '/' + FKey # Fdir = '/scratch/bsucharski/PDS_service/' + FKey #Wpath = '/scratch/pds_services/' + FKey Wpath = scratch + FKey #********* Make final directory ************ if not os.path.exists(Fdir): try: os.makedirs(Fdir) logger.info('Final Location Success: %s', Fdir) except: logger.error('Error Making Final Directory') #********** Block to build job log file ************** outputLOG = Wpath + "/" + FKey + '.log' logOBJ = open(outputLOG, "w") logOBJ.write(" U.S. Geological Survey Cloud Processing Services\n") logOBJ.write(" http://astrocloud.wr.usgs.gov\n\n") if infoHash.Service() == 'POW': logOBJ.write(" Processing On the Web(POW)\n\n") logOBJ.write(" Processing Provided by ASTROGEOLOGY USGS Flagstaff\n") logOBJ.write(" Contact Information: [email protected]\n\n") logOBJ.write( "____________________________________________________________________\n\n" ) logOBJ.write("JOB INFORMATION\n\n") logOBJ.write(" SERVICE: " + infoHash.Service() + "\n") logOBJ.write(" JOB KEY: " + FKey + "\n") logOBJ.write(" PROCESSING DATE: " + datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + "\n") isisV = subprocess.check_output(['ls', '-la', '/usgs/pkgs/isis3']) isisA = isisV.split('>') logOBJ.write(" ISIS VERSION: " + isisA[-1]) if infoHash.getStatus() == 'ERROR': logOBJ.write(" JOB STATUS: " + infoHash.getStatus() + " See Details Below\n") else: logOBJ.write(" JOB STATUS: " + infoHash.getStatus() + "\n") logOBJ.write(" FILE COUNT: " + infoHash.getFileCount() + "\n\n") logOBJ.write( "_____________________________________________________________________\n\n" ) logOBJ.write("PROCESSING INFORMATION\n\n") for element in loggyQueue.ListGet(): procDICT = json.loads(element, object_pairs_hook=OrderedDict) for infile in procDICT: logOBJ.write(" IMAGE: " + infile + "\n") for proc, testD in procDICT[infile].items(): logOBJ.write(" PROCESS: " + str(proc) + "\n") for k, val in procDICT[infile][proc].items(): if k == 'status': logOBJ.write(" STATUS: " + val + "\n") elif k == 'command': logOBJ.write(" COMMAND: " + val + "\n") elif k == 'helplink': logOBJ.write(" HELP LINK: " + val + "\n\n") elif k == 'error': logOBJ.write(" ERROR: " + val + "\n\n") logOBJ.write("END-PROCESSING\n") logOBJ.close() #******** Block for to copy and zip files to final directory ****** Zfile = Wpath + '/' + FKey + '.zip' logger.info('Making Zip File %s', Zfile) # log file stuff try: Lfile = FKey + '.log' Zcmd = 'zip -j ' + Zfile + " -q " + outputLOG process = subprocess.Popen(Zcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = process.communicate() # zOBJ.write(outputLOG, arcname=Lfile) logger.info('Log file %s Added to Zip File: Success', Lfile) logger.info('zip stdout: ' + stdout) logger.info('zip stderr: ' + stderr) except: logger.error('Log File %s NOT Added to Zip File', Lfile) try: shutil.copyfile(outputLOG, Fdir + "/" + Lfile) logger.info('Copied Log File %s to Final Area: Success', Lfile) os.remove(outputLOG) except IOError as e: logger.error('Log File %s NOT COPIED to Final Area', Lfile) logger.error(e) # file stuff for Lelement in zipQueue.ListGet(): Pfile = os.path.basename(Lelement) # auxfile = os.path.basename(Lelement) + '.aux.xml' auxfile = Wpath + '/' + os.path.basename(Lelement) + '.aux.xml' try: Zcmd = 'zip -j ' + Zfile + " -q " + Lelement process = subprocess.Popen(Zcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = process.communicate() logger.info('File %s Added to Zip File: Success', Pfile) if os.path.isfile(auxfile): Zcmd = 'zip -j ' + Zfile + " -q " + auxfile process = subprocess.Popen(Zcmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = process.communicate() logger.info('File %s Added to Zip File: Success', os.path.basename(Lelement) + '.aux.xml') except: logger.error('Error During Zip Operation') try: shutil.copyfile(Wpath + '/' + Pfile, Fdir + '/' + Pfile) logger.info('Copy File %s : Success', Pfile) os.remove(Wpath + "/" + Pfile) if os.path.isfile(auxfile): shutil.copyfile( auxfile, Fdir + '/' + os.path.basename(Lelement) + '.aux.xml') logger.info('Copy File %s : Success', os.path.basename(Lelement) + '.aux.xml') os.remove(auxfile) except IOError as e: logger.error('Error During File Copy Operation') logger.error(e) # zOBJ.close() try: shutil.copy(Zfile, Fdir + '/' + FKey + '.zip') os.remove(Zfile) logger.info('Zip File Copied to Final Directory') except IOError as e: logger.error('Error During Zip File Copy Operation') logger.error(e) #************** Clean up ******************* os.remove(Wpath + '/' + FKey + '.map') os.remove(Wpath + '/' + FKey + '.sbatch') try: # os.rmdir(Wpath) shutil.rmtree(Wpath) logger.info('Working Directory Removed: Success') except: logger.error('Working Directory NOT Removed') DBQO2 = PDS_DBquery('JOBS') DBQO2.setJobsFinished(FKey) infoHash.RemoveAll() loggyQueue.RemoveAll() zipQueue.RemoveAll() recipeQueue.RemoveAll() logger.info('Job %s is Complete', FKey)
def main(): # pdb.set_trace() Key = sys.argv[-1] workarea = '/scratch/pds_services/' + Key + '/' RQ_file = RedisQueue(Key + '_FileQueue') RQ_work = RedisQueue(Key + '_WorkQueue') RQ_zip = RedisQueue(Key + '_ZIP') RQ_loggy = RedisQueue(Key + '_loggy') RQ_final = RedisQueue('FinalQueue') RHash = RedisHash(Key + '_info') RHerror = RedisHash(Key + '_error') if int(RQ_file.QueueSize()) == 0: print "No Files Found in Redis Queue" else: jobFile = RQ_file.Qfile2Qwork(RQ_file.getQueueName(), RQ_work.getQueueName()) #******************** Setup system logging ********************** basename = os.path.splitext(os.path.basename(jobFile))[0] logger = logging.getLogger(Key + '.' + basename) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting MAP Processing') loggyOBJ = Loggy(basename) # *************** File Naming *************** infile = workarea + \ os.path.splitext(os.path.basename(jobFile))[0] + '.input.cub' outfile = workarea + \ os.path.splitext(os.path.basename(jobFile))[0] + '.output.cub' # *********** Recipe Stuff ******************** RQ_recipe = RedisQueue(Key + '_recipe') status = 'success' for element in RQ_recipe.RecipeGet(): if status == 'error': break elif status == 'success': processOBJ = Process() process = processOBJ.JSON2Process(element) if 'gdal_translate' not in processOBJ.getProcessName(): if 'cubeatt-band' in processOBJ.getProcessName(): if '+' in jobFile: # bandSplit = jobFile.split('+') # infileB = infile + '+' + bandSplit[1] processOBJ.updateParameter('from_', jobFile) processOBJ.updateParameter('to', outfile) processOBJ.ChangeProcess('cubeatt') else: continue elif 'map2map' in processOBJ.getProcessName(): if '+' in jobFile: processOBJ.updateParameter('from_', infile) else: processOBJ.updateParameter('from_', jobFile) processOBJ.updateParameter('to', outfile) elif 'cubeatt-bit' in processOBJ.getProcessName(): if RHash.OutBit() == 'unsignedbyte': temp_outfile = outfile + '+lsb+tile+attached+unsignedbyte+1:254' elif RHash.OutBit() == 'signedword': temp_outfile = outfile + '+lsb+tile+attached+signedword+-32765:32765' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', temp_outfile) processOBJ.ChangeProcess('cubeatt') elif 'isis2pds' in processOBJ.getProcessName(): # finalfile = infile.replace('.input.cub', '_final.img') finalfile = workarea + RHash.getMAPname() + '.img' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', finalfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) print processOBJ.getProcess() for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) subloggyOBJ = SubLoggy(k) try: func(**v) logger.info('Process %s :: Success', k) subloggyOBJ.setStatus('SUCCESS') subloggyOBJ.setCommand(processOBJ.LogCommandline()) subloggyOBJ.setHelpLink(processOBJ.LogHelpLink()) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) if os.path.isfile(outfile): os.rename(outfile, infile) status = 'success' except ProcessError as e: logger.error('Process %s :: Error', k) logger.error(e) status = 'error' eSTR = 'Error Executing ' + k + \ ' Standard Error: ' + str(e) RHerror.addError( os.path.splitext(os.path.basename(jobFile))[0], eSTR) subloggyOBJ.setStatus('ERROR') subloggyOBJ.setCommand(processOBJ.LogCommandline()) subloggyOBJ.setHelpLink(processOBJ.LogHelpLink()) subloggyOBJ.errorOut(eSTR) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) else: GDALcmd = "" for process, v, in processOBJ.getProcess().items(): subloggyOBJ = SubLoggy(process) GDALcmd += process for key, value in v.items(): GDALcmd += ' ' + key + ' ' + value if RHash.Format() == 'GeoTiff-BigTiff': fileext = 'tif' elif RHash.Format() == 'GeoJPEG-2000': fileext = 'jp2' elif RHash.Format() == 'JPEG': fileext = 'jpg' elif RHash.Format() == 'PNG': fileext = 'png' elif RHash.Format() == 'GIF': fileext = 'gif' logGDALcmd = GDALcmd + ' ' + basename + '.input.cub ' + RHash.getMAPname() + \ '.' + fileext finalfile = workarea + RHash.getMAPname() + '.' + fileext # finalfile = infile.replace('.input.cub', '_final.' + fileext) GDALcmd += ' ' + infile + ' ' + finalfile print GDALcmd try: result = subprocess.call(GDALcmd, shell=True) logger.info('Process GDAL translate :: Success') status = 'success' subloggyOBJ.setStatus('SUCCESS') subloggyOBJ.setCommand(logGDALcmd) subloggyOBJ.setHelpLink( 'www.gdal.org/gdal_translate.html') loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) os.remove(infile) except OSError, e: logger.error('Process GDAL translate :: Error') logger.error(e.stderr) status = 'error' RHerror.addError( os.path.splitext(os.path.basename(jobFile))[0], 'Process GDAL translate :: Error') subloggyOBJ.setStatus('ERROR') subloggyOBJ.setCommand(logGDALcmd) subloggyOBJ.setHelpLink( 'http://www.gdal.org/gdal_translate.html') subloggyOBJ.errorOut(e.stderr) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) if status == 'success': if RHash.Format() == 'ISIS3': finalfile = workarea + RHash.getMAPname() + '.cub' # finalfile = infile.replace('.input.cub', '_final.cub') shutil.move(infile, finalfile) if RHash.getStatus() != 'ERROR': RHash.Status('SUCCESS') try: RQ_zip.QueueAdd(finalfile) logger.info('File Added to ZIP Queue') except: logger.error('File NOT Added to ZIP Queue') try: RQ_loggy.QueueAdd(loggyOBJ.Loggy2json()) logger.info('JSON Added to Loggy Queue') except: logger.error('JSON NOT Added to Loggy Queue') RQ_work.QueueRemove(jobFile) elif status == 'error': RHash.Status('ERROR') if os.path.isfile(infile): os.remove(infile) if RQ_file.QueueSize() == 0 and RQ_work.QueueSize() == 0: try: RQ_final.QueueAdd(Key) logger.info('Key %s Added to Final Queue: Success', Key) logger.info('Job Complete') except: logger.error('Key NOT Added to Final Queue') else: logger.warning('Queues Not Empty: filequeue = %s work queue = %s', str(RQ_file.QueueSize()), str(RQ_work.QueueSize()))
def main(): args = Args() args.parse_args() override = args.override # ********* Set up logging ************* logger = logging.getLogger('Ingest_Process') logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Ingest.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("Starting Process") PDSinfoDICT = json.load(open(pds_info, 'r')) RQ_main = RedisQueue('Ingest_ReadyQueue') RQ_work = RedisQueue('Ingest_WorkQueue') RQ_upc = RedisQueue('UPC_ReadyQueue') RQ_thumb = RedisQueue('Thumbnail_ReadyQueue') RQ_browse = RedisQueue('Browse_ReadyQueue') #RQ_pilotB = RedisQueue('PilotB_ReadyQueue') try: session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') index = 1 while int(RQ_main.QueueSize()) > 0: item = literal_eval(RQ_main.QueueGet().decode("utf-8")) inputfile = item[0] archive = item[1] RQ_work.QueueAdd(inputfile) subfile = inputfile.replace(PDSinfoDICT[archive]['path'], '') # Calculate checksum in chunks of 4096 f_hash = hashlib.md5() with open(inputfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) filechecksum = f_hash.hexdigest() QOBJ = session.query(Files).filter_by(filename=subfile).first() runflag = False if QOBJ is None: runflag = True elif filechecksum != QOBJ.checksum: runflag = True if runflag == True or override == True: date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") fileURL = inputfile.replace(archive_base, web_base) # If all upc requirements are in 'inputfile,' flag for upc upcflag = all(x in inputfile for x in PDSinfoDICT[archive]['upc_reqs']) filesize = os.path.getsize(inputfile) try: # If we found an existing file and want to overwrite the data if QOBJ is not None and override == True: testIN = QOBJ # If the file was not found, create a new entry else: testIN = Files() testIN.archiveid = PDSinfoDICT[archive]['archiveid'] testIN.filename = subfile testIN.entry_date = date testIN.checksum = filechecksum testIN.upc_required = upcflag testIN.validation_required = True testIN.header_only = False testIN.release_date = date testIN.file_url = fileURL testIN.file_size = filesize testIN.di_pass = True testIN.di_date = date session.merge(testIN) session.flush() if upcflag == True: RQ_upc.QueueAdd((inputfile, testIN.fileid, archive)) RQ_thumb.QueueAdd((inputfile, testIN.fileid, archive)) RQ_browse.QueueAdd((inputfile, testIN.fileid, archive)) #RQ_pilotB.QueueAdd((inputfile, testIN.fileid, archive)) RQ_work.QueueRemove(inputfile) index = index + 1 except Exception as e: print(e) logger.error("Error During File Insert %s", subfile) elif runflag == False: RQ_work.QueueRemove(inputfile) if index >= 250: try: session.commit() logger.info("Commit 250 files to Database: Success") index = 1 except: session.rollback() logger.error("Something Went Wrong During DB Insert") else: logger.info("No Files Found in Ingest Queue") try: session.commit() logger.info("Commit to Database: Success") except: session.rollback() if RQ_main.QueueSize() == 0 and RQ_work.QueueSize() == 0: logger.info("Process Complete All Queues Empty") elif RQ_main.QueueSize() == 0 and RQ_work.QueueSize() != 0: logger.warning("Process Done Work Queue NOT Empty Contains %s Files", str( RQ_work.QueueSize())) logger.info("Ingest Complete")
def main(): # pdb.set_trace() args = Args() args.parse_args() RQ = RedisQueue('ChecksumUpdate_Queue') archiveDICT = { 'cassiniISS': '/pds_san/PDS_Archive/Cassini/ISS', 'mroCTX': '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/CTX', 'mroHIRISE': '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/HiRISE', 'LROLRC_EDR': '/pds_san/PDS_Archive/Lunar_Reconnaissance_Orbiter/LROC/EDR/' } archiveID = { 'cassiniISS': 'cassini_iss_edr', 'mroCTX': 16, 'mroHIRISE_EDR': '124', 'LROLRC_EDR': 74 } # ********* Set up logging ************* logger = logging.getLogger('ChecksumUpdate_Queueing.' + args.archive) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting %s Checksum update Queueing', args.archive) if args.volume: logger.info('Queueing %s Volume', args.volume) try: # Throws away engine information session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 if args.volume: volstr = '%' + args.volume + '%' QueryOBJ = session.query(Files).filter( Files.archiveid == archiveID[args.archive], Files.filename.like(volstr)) else: QueryOBJ = session.query(Files).filter( Files.archiveid == archiveID[args.archive]) addcount = 0 for element in QueryOBJ: try: RQ.QueueAdd(element.filename) addcount = addcount + 1 except: logger.error('File %s Not Added to DI_ReadyQueue', element.filename) logger.info('Files Added to Queue %s', addcount) logger.info('DI Queueing Complete')