def main(): #pdb.set_trace() args = Args() args.parse_args() RQ = RedisQueue('DI_ReadyQueue') PDSinfoDICT = json.load(open(pds_info, 'r')) try: archiveID = PDSinfoDICT[args.archive]['archiveid'] except KeyError: print("\nArchive '{}' not found in {}\n".format( args.archive, pds_info)) print("The following archives are available:") for k in PDSinfoDICT.keys(): print("\t{}".format(k)) exit() # ********* Set up logging ************* logger = logging.getLogger('DI_Queueing.' + args.archive) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'DI.log') #logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting %s DI Queueing', args.archive) if args.volume: logger.info('Queueing %s Volume', args.volume) try: # @TODO switch back to prd session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') # ************* date stuff *************** td = (datetime.datetime.now(pytz.utc) - datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S") testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S") if args.volume: volstr = '%' + args.volume + '%' testcount = session.query(Files).filter( Files.archiveid == archiveID, Files.filename.like(volstr)).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) == None)).count() # logger.info('Query Count %s', testcount) testQ = session.query(Files).filter( Files.archiveid == archiveID, Files.filename.like(volstr)).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) == None)) else: testcount = session.query(Files).filter( Files.archiveid == archiveID).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) == None)).count() testQ = session.query(Files).filter( Files.archiveid == archiveID).filter( or_( cast(Files.di_date, Date) < testing_date, cast(Files.di_date, Date) == None)) addcount = 0 for element in testQ: try: RQ.QueueAdd((element.filename, args.archive)) addcount = addcount + 1 except: logger.error('File %s Not Added to DI_ReadyQueue', element.filename) logger.info('Files Added to Queue %s', addcount) logger.info('DI Queueing Complete')
def main(): args = Args() args.parse_args() override = args.override # ********* Set up logging ************* logger = logging.getLogger('Ingest_Process') logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler(pds_log + 'Ingest.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info("Starting Process") PDSinfoDICT = json.load(open(pds_info, 'r')) RQ_main = RedisQueue('Ingest_ReadyQueue') RQ_work = RedisQueue('Ingest_WorkQueue') RQ_upc = RedisQueue('UPC_ReadyQueue') RQ_thumb = RedisQueue('Thumbnail_ReadyQueue') RQ_browse = RedisQueue('Browse_ReadyQueue') #RQ_pilotB = RedisQueue('PilotB_ReadyQueue') try: session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') index = 1 while int(RQ_main.QueueSize()) > 0: item = literal_eval(RQ_main.QueueGet().decode("utf-8")) inputfile = item[0] archive = item[1] RQ_work.QueueAdd(inputfile) subfile = inputfile.replace(PDSinfoDICT[archive]['path'], '') # Calculate checksum in chunks of 4096 f_hash = hashlib.md5() with open(inputfile, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): f_hash.update(chunk) filechecksum = f_hash.hexdigest() QOBJ = session.query(Files).filter_by(filename=subfile).first() runflag = False if QOBJ is None: runflag = True elif filechecksum != QOBJ.checksum: runflag = True if runflag == True or override == True: date = datetime.datetime.now( pytz.utc).strftime("%Y-%m-%d %H:%M:%S") fileURL = inputfile.replace(archive_base, web_base) # If all upc requirements are in 'inputfile,' flag for upc upcflag = all(x in inputfile for x in PDSinfoDICT[archive]['upc_reqs']) filesize = os.path.getsize(inputfile) try: # If we found an existing file and want to overwrite the data if QOBJ is not None and override == True: testIN = QOBJ # If the file was not found, create a new entry else: testIN = Files() testIN.archiveid = PDSinfoDICT[archive]['archiveid'] testIN.filename = subfile testIN.entry_date = date testIN.checksum = filechecksum testIN.upc_required = upcflag testIN.validation_required = True testIN.header_only = False testIN.release_date = date testIN.file_url = fileURL testIN.file_size = filesize testIN.di_pass = True testIN.di_date = date session.merge(testIN) session.flush() if upcflag == True: RQ_upc.QueueAdd((inputfile, testIN.fileid, archive)) RQ_thumb.QueueAdd((inputfile, testIN.fileid, archive)) RQ_browse.QueueAdd((inputfile, testIN.fileid, archive)) #RQ_pilotB.QueueAdd((inputfile, testIN.fileid, archive)) RQ_work.QueueRemove(inputfile) index = index + 1 except Exception as e: print(e) logger.error("Error During File Insert %s", subfile) elif runflag == False: RQ_work.QueueRemove(inputfile) if index >= 250: try: session.commit() logger.info("Commit 250 files to Database: Success") index = 1 except: session.rollback() logger.error("Something Went Wrong During DB Insert") else: logger.info("No Files Found in Ingest Queue") try: session.commit() logger.info("Commit to Database: Success") except: session.rollback() if RQ_main.QueueSize() == 0 and RQ_work.QueueSize() == 0: logger.info("Process Complete All Queues Empty") elif RQ_main.QueueSize() == 0 and RQ_work.QueueSize() != 0: logger.warning("Process Done Work Queue NOT Empty Contains %s Files", str( RQ_work.QueueSize())) logger.info("Ingest Complete")
def main(): # pdb.set_trace() Key = sys.argv[-1] workarea = '/scratch/pds_services/' + Key + '/' RQ_file = RedisQueue(Key + '_FileQueue') RQ_work = RedisQueue(Key + '_WorkQueue') RQ_zip = RedisQueue(Key + '_ZIP') RQ_loggy = RedisQueue(Key + '_loggy') RQ_final = RedisQueue('FinalQueue') RHash = RedisHash(Key + '_info') RHerror = RedisHash(Key + '_error') if int(RQ_file.QueueSize()) == 0: print "No Files Found in Redis Queue" else: jobFile = RQ_file.Qfile2Qwork(RQ_file.getQueueName(), RQ_work.getQueueName()) #******************** Setup system logging ********************** basename = os.path.splitext(os.path.basename(jobFile))[0] logger = logging.getLogger(Key + '.' + basename) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/Service.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting MAP Processing') loggyOBJ = Loggy(basename) # *************** File Naming *************** infile = workarea + \ os.path.splitext(os.path.basename(jobFile))[0] + '.input.cub' outfile = workarea + \ os.path.splitext(os.path.basename(jobFile))[0] + '.output.cub' # *********** Recipe Stuff ******************** RQ_recipe = RedisQueue(Key + '_recipe') status = 'success' for element in RQ_recipe.RecipeGet(): if status == 'error': break elif status == 'success': processOBJ = Process() process = processOBJ.JSON2Process(element) if 'gdal_translate' not in processOBJ.getProcessName(): if 'cubeatt-band' in processOBJ.getProcessName(): if '+' in jobFile: # bandSplit = jobFile.split('+') # infileB = infile + '+' + bandSplit[1] processOBJ.updateParameter('from_', jobFile) processOBJ.updateParameter('to', outfile) processOBJ.ChangeProcess('cubeatt') else: continue elif 'map2map' in processOBJ.getProcessName(): if '+' in jobFile: processOBJ.updateParameter('from_', infile) else: processOBJ.updateParameter('from_', jobFile) processOBJ.updateParameter('to', outfile) elif 'cubeatt-bit' in processOBJ.getProcessName(): if RHash.OutBit() == 'unsignedbyte': temp_outfile = outfile + '+lsb+tile+attached+unsignedbyte+1:254' elif RHash.OutBit() == 'signedword': temp_outfile = outfile + '+lsb+tile+attached+signedword+-32765:32765' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', temp_outfile) processOBJ.ChangeProcess('cubeatt') elif 'isis2pds' in processOBJ.getProcessName(): # finalfile = infile.replace('.input.cub', '_final.img') finalfile = workarea + RHash.getMAPname() + '.img' processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', finalfile) else: processOBJ.updateParameter('from_', infile) processOBJ.updateParameter('to', outfile) print processOBJ.getProcess() for k, v in processOBJ.getProcess().items(): func = getattr(isis, k) subloggyOBJ = SubLoggy(k) try: func(**v) logger.info('Process %s :: Success', k) subloggyOBJ.setStatus('SUCCESS') subloggyOBJ.setCommand(processOBJ.LogCommandline()) subloggyOBJ.setHelpLink(processOBJ.LogHelpLink()) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) if os.path.isfile(outfile): os.rename(outfile, infile) status = 'success' except ProcessError as e: logger.error('Process %s :: Error', k) logger.error(e) status = 'error' eSTR = 'Error Executing ' + k + \ ' Standard Error: ' + str(e) RHerror.addError( os.path.splitext(os.path.basename(jobFile))[0], eSTR) subloggyOBJ.setStatus('ERROR') subloggyOBJ.setCommand(processOBJ.LogCommandline()) subloggyOBJ.setHelpLink(processOBJ.LogHelpLink()) subloggyOBJ.errorOut(eSTR) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) else: GDALcmd = "" for process, v, in processOBJ.getProcess().items(): subloggyOBJ = SubLoggy(process) GDALcmd += process for key, value in v.items(): GDALcmd += ' ' + key + ' ' + value if RHash.Format() == 'GeoTiff-BigTiff': fileext = 'tif' elif RHash.Format() == 'GeoJPEG-2000': fileext = 'jp2' elif RHash.Format() == 'JPEG': fileext = 'jpg' elif RHash.Format() == 'PNG': fileext = 'png' elif RHash.Format() == 'GIF': fileext = 'gif' logGDALcmd = GDALcmd + ' ' + basename + '.input.cub ' + RHash.getMAPname() + \ '.' + fileext finalfile = workarea + RHash.getMAPname() + '.' + fileext # finalfile = infile.replace('.input.cub', '_final.' + fileext) GDALcmd += ' ' + infile + ' ' + finalfile print GDALcmd try: result = subprocess.call(GDALcmd, shell=True) logger.info('Process GDAL translate :: Success') status = 'success' subloggyOBJ.setStatus('SUCCESS') subloggyOBJ.setCommand(logGDALcmd) subloggyOBJ.setHelpLink( 'www.gdal.org/gdal_translate.html') loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) os.remove(infile) except OSError, e: logger.error('Process GDAL translate :: Error') logger.error(e.stderr) status = 'error' RHerror.addError( os.path.splitext(os.path.basename(jobFile))[0], 'Process GDAL translate :: Error') subloggyOBJ.setStatus('ERROR') subloggyOBJ.setCommand(logGDALcmd) subloggyOBJ.setHelpLink( 'http://www.gdal.org/gdal_translate.html') subloggyOBJ.errorOut(e.stderr) loggyOBJ.AddProcess(subloggyOBJ.getSLprocess()) if status == 'success': if RHash.Format() == 'ISIS3': finalfile = workarea + RHash.getMAPname() + '.cub' # finalfile = infile.replace('.input.cub', '_final.cub') shutil.move(infile, finalfile) if RHash.getStatus() != 'ERROR': RHash.Status('SUCCESS') try: RQ_zip.QueueAdd(finalfile) logger.info('File Added to ZIP Queue') except: logger.error('File NOT Added to ZIP Queue') try: RQ_loggy.QueueAdd(loggyOBJ.Loggy2json()) logger.info('JSON Added to Loggy Queue') except: logger.error('JSON NOT Added to Loggy Queue') RQ_work.QueueRemove(jobFile) elif status == 'error': RHash.Status('ERROR') if os.path.isfile(infile): os.remove(infile) if RQ_file.QueueSize() == 0 and RQ_work.QueueSize() == 0: try: RQ_final.QueueAdd(Key) logger.info('Key %s Added to Final Queue: Success', Key) logger.info('Job Complete') except: logger.error('Key NOT Added to Final Queue') else: logger.warning('Queues Not Empty: filequeue = %s work queue = %s', str(RQ_file.QueueSize()), str(RQ_work.QueueSize()))
def main(): # pdb.set_trace() args = Args() args.parse_args() RQ = RedisQueue('ChecksumUpdate_Queue') archiveDICT = { 'cassiniISS': '/pds_san/PDS_Archive/Cassini/ISS', 'mroCTX': '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/CTX', 'mroHIRISE': '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/HiRISE', 'LROLRC_EDR': '/pds_san/PDS_Archive/Lunar_Reconnaissance_Orbiter/LROC/EDR/' } archiveID = { 'cassiniISS': 'cassini_iss_edr', 'mroCTX': 16, 'mroHIRISE_EDR': '124', 'LROLRC_EDR': 74 } # ********* Set up logging ************* logger = logging.getLogger('ChecksumUpdate_Queueing.' + args.archive) logger.setLevel(logging.INFO) logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/DI.log') formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s, %(message)s') logFileHandle.setFormatter(formatter) logger.addHandler(logFileHandle) logger.info('Starting %s Checksum update Queueing', args.archive) if args.volume: logger.info('Queueing %s Volume', args.volume) try: # Throws away engine information session, _ = db_connect(pds_db) logger.info('DataBase Connecton: Success') except: logger.error('DataBase Connection: Error') return 1 if args.volume: volstr = '%' + args.volume + '%' QueryOBJ = session.query(Files).filter( Files.archiveid == archiveID[args.archive], Files.filename.like(volstr)) else: QueryOBJ = session.query(Files).filter( Files.archiveid == archiveID[args.archive]) addcount = 0 for element in QueryOBJ: try: RQ.QueueAdd(element.filename) addcount = addcount + 1 except: logger.error('File %s Not Added to DI_ReadyQueue', element.filename) logger.info('Files Added to Queue %s', addcount) logger.info('DI Queueing Complete')