Esempio n. 1
0
def main():

    #pdb.set_trace()

    args = Args()
    args.parse_args()

    RQ = RedisQueue('DI_ReadyQueue')

    PDSinfoDICT = json.load(open(pds_info, 'r'))
    try:
        archiveID = PDSinfoDICT[args.archive]['archiveid']
    except KeyError:
        print("\nArchive '{}' not found in {}\n".format(
            args.archive, pds_info))
        print("The following archives are available:")
        for k in PDSinfoDICT.keys():
            print("\t{}".format(k))
        exit()

# ********* Set up logging *************
    logger = logging.getLogger('DI_Queueing.' + args.archive)
    logger.setLevel(logging.INFO)
    logFileHandle = logging.FileHandler(pds_log + 'DI.log')
    #logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/DI.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info('Starting %s DI Queueing', args.archive)
    if args.volume:
        logger.info('Queueing %s Volume', args.volume)

    try:
        # @TODO switch back to prd
        session, _ = db_connect(pds_db)

        logger.info('DataBase Connecton: Success')
    except:
        logger.error('DataBase Connection: Error')


# ************* date stuff ***************

    td = (datetime.datetime.now(pytz.utc) -
          datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
    testing_date = datetime.datetime.strptime(str(td), "%Y-%m-%d %H:%M:%S")

    if args.volume:
        volstr = '%' + args.volume + '%'

        testcount = session.query(Files).filter(
            Files.archiveid == archiveID, Files.filename.like(volstr)).filter(
                or_(
                    cast(Files.di_date, Date) < testing_date,
                    cast(Files.di_date, Date) == None)).count()
        #        logger.info('Query Count %s', testcount)
        testQ = session.query(Files).filter(
            Files.archiveid == archiveID, Files.filename.like(volstr)).filter(
                or_(
                    cast(Files.di_date, Date) < testing_date,
                    cast(Files.di_date, Date) == None))
    else:
        testcount = session.query(Files).filter(
            Files.archiveid == archiveID).filter(
                or_(
                    cast(Files.di_date, Date) < testing_date,
                    cast(Files.di_date, Date) == None)).count()
        testQ = session.query(Files).filter(
            Files.archiveid == archiveID).filter(
                or_(
                    cast(Files.di_date, Date) < testing_date,
                    cast(Files.di_date, Date) == None))

    addcount = 0
    for element in testQ:
        try:
            RQ.QueueAdd((element.filename, args.archive))
            addcount = addcount + 1
        except:
            logger.error('File %s Not Added to DI_ReadyQueue',
                         element.filename)

    logger.info('Files Added to Queue %s', addcount)

    logger.info('DI Queueing Complete')
Esempio n. 2
0
def main():
    args = Args()
    args.parse_args()
    override = args.override
    # ********* Set up logging *************
    logger = logging.getLogger('Ingest_Process')
    logger.setLevel(logging.INFO)
    logFileHandle = logging.FileHandler(pds_log + 'Ingest.log')

    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info("Starting Process")
    PDSinfoDICT = json.load(open(pds_info, 'r'))

    RQ_main = RedisQueue('Ingest_ReadyQueue')
    RQ_work = RedisQueue('Ingest_WorkQueue')

    RQ_upc = RedisQueue('UPC_ReadyQueue')
    RQ_thumb = RedisQueue('Thumbnail_ReadyQueue')
    RQ_browse = RedisQueue('Browse_ReadyQueue')
    #RQ_pilotB = RedisQueue('PilotB_ReadyQueue')

    try:
        session, _ = db_connect(pds_db)
        logger.info('DataBase Connecton: Success')
    except:
        logger.error('DataBase Connection: Error')

    index = 1

    while int(RQ_main.QueueSize()) > 0:

        item = literal_eval(RQ_main.QueueGet().decode("utf-8"))
        inputfile = item[0]
        archive = item[1]
        RQ_work.QueueAdd(inputfile)
        
        subfile = inputfile.replace(PDSinfoDICT[archive]['path'], '')
        # Calculate checksum in chunks of 4096
        f_hash = hashlib.md5()
        with open(inputfile, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                f_hash.update(chunk)
        filechecksum = f_hash.hexdigest()

        QOBJ = session.query(Files).filter_by(filename=subfile).first()

        runflag = False
        if QOBJ is None:
            runflag = True
        elif filechecksum != QOBJ.checksum:
            runflag = True

        if runflag == True or override == True:
            date = datetime.datetime.now(
                pytz.utc).strftime("%Y-%m-%d %H:%M:%S")
            fileURL = inputfile.replace(archive_base, web_base)

            # If all upc requirements are in 'inputfile,' flag for upc
            upcflag =  all(x in inputfile for x in PDSinfoDICT[archive]['upc_reqs'])
            filesize = os.path.getsize(inputfile)

            try:
                # If we found an existing file and want to overwrite the data
                if QOBJ is not None and override == True:
                    testIN = QOBJ
                # If the file was not found, create a new entry
                else:
                    testIN = Files()
                testIN.archiveid = PDSinfoDICT[archive]['archiveid']
                testIN.filename = subfile
                testIN.entry_date = date
                testIN.checksum = filechecksum
                testIN.upc_required = upcflag
                testIN.validation_required = True
                testIN.header_only = False
                testIN.release_date = date
                testIN.file_url = fileURL
                testIN.file_size = filesize
                testIN.di_pass = True
                testIN.di_date = date

                session.merge(testIN)
                session.flush()

                if upcflag == True:
                    RQ_upc.QueueAdd((inputfile, testIN.fileid, archive))
                    RQ_thumb.QueueAdd((inputfile, testIN.fileid, archive))
                    RQ_browse.QueueAdd((inputfile, testIN.fileid, archive))
                    #RQ_pilotB.QueueAdd((inputfile, testIN.fileid, archive))


                RQ_work.QueueRemove(inputfile)

                index = index + 1

            except Exception as e:
                print(e)
                logger.error("Error During File Insert %s", subfile)

        elif runflag == False:
            RQ_work.QueueRemove(inputfile)

        if index >= 250:
            try:
                session.commit()
                logger.info("Commit 250 files to Database: Success")
                index = 1
            except:
                session.rollback()
                logger.error("Something Went Wrong During DB Insert")
    else:
        logger.info("No Files Found in Ingest Queue")
        try:
            session.commit()
            logger.info("Commit to Database: Success")
        except:
            session.rollback()

    if RQ_main.QueueSize() == 0 and RQ_work.QueueSize() == 0:
        logger.info("Process Complete All Queues Empty")
    elif RQ_main.QueueSize() == 0 and RQ_work.QueueSize() != 0:
        logger.warning("Process Done Work Queue NOT Empty Contains %s Files", str(
            RQ_work.QueueSize()))

    logger.info("Ingest Complete")
Esempio n. 3
0
def main():

    #    pdb.set_trace()

    Key = sys.argv[-1]

    workarea = '/scratch/pds_services/' + Key + '/'

    RQ_file = RedisQueue(Key + '_FileQueue')
    RQ_work = RedisQueue(Key + '_WorkQueue')
    RQ_zip = RedisQueue(Key + '_ZIP')
    RQ_loggy = RedisQueue(Key + '_loggy')
    RQ_final = RedisQueue('FinalQueue')
    RHash = RedisHash(Key + '_info')
    RHerror = RedisHash(Key + '_error')

    if int(RQ_file.QueueSize()) == 0:
        print "No Files Found in Redis Queue"
    else:
        jobFile = RQ_file.Qfile2Qwork(RQ_file.getQueueName(),
                                      RQ_work.getQueueName())

        #******************** Setup system logging **********************
        basename = os.path.splitext(os.path.basename(jobFile))[0]
        logger = logging.getLogger(Key + '.' + basename)
        logger.setLevel(logging.INFO)

        logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/Service.log')
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
        logFileHandle.setFormatter(formatter)
        logger.addHandler(logFileHandle)

        logger.info('Starting MAP Processing')

        loggyOBJ = Loggy(basename)

        # *************** File Naming ***************
        infile = workarea + \
            os.path.splitext(os.path.basename(jobFile))[0] + '.input.cub'
        outfile = workarea + \
            os.path.splitext(os.path.basename(jobFile))[0] + '.output.cub'

        # *********** Recipe Stuff ********************

        RQ_recipe = RedisQueue(Key + '_recipe')

        status = 'success'

        for element in RQ_recipe.RecipeGet():

            if status == 'error':
                break
            elif status == 'success':
                processOBJ = Process()
                process = processOBJ.JSON2Process(element)

                if 'gdal_translate' not in processOBJ.getProcessName():

                    if 'cubeatt-band' in processOBJ.getProcessName():
                        if '+' in jobFile:
                            #                            bandSplit = jobFile.split('+')
                            #                           infileB = infile + '+' + bandSplit[1]
                            processOBJ.updateParameter('from_', jobFile)
                            processOBJ.updateParameter('to', outfile)
                            processOBJ.ChangeProcess('cubeatt')
                        else:
                            continue

                    elif 'map2map' in processOBJ.getProcessName():
                        if '+' in jobFile:
                            processOBJ.updateParameter('from_', infile)
                        else:
                            processOBJ.updateParameter('from_', jobFile)
                        processOBJ.updateParameter('to', outfile)

                    elif 'cubeatt-bit' in processOBJ.getProcessName():
                        if RHash.OutBit() == 'unsignedbyte':
                            temp_outfile = outfile + '+lsb+tile+attached+unsignedbyte+1:254'
                        elif RHash.OutBit() == 'signedword':
                            temp_outfile = outfile + '+lsb+tile+attached+signedword+-32765:32765'
                        processOBJ.updateParameter('from_', infile)
                        processOBJ.updateParameter('to', temp_outfile)
                        processOBJ.ChangeProcess('cubeatt')

                    elif 'isis2pds' in processOBJ.getProcessName():
                        #                        finalfile = infile.replace('.input.cub', '_final.img')
                        finalfile = workarea + RHash.getMAPname() + '.img'
                        processOBJ.updateParameter('from_', infile)
                        processOBJ.updateParameter('to', finalfile)

                    else:
                        processOBJ.updateParameter('from_', infile)
                        processOBJ.updateParameter('to', outfile)

                    print processOBJ.getProcess()

                    for k, v in processOBJ.getProcess().items():
                        func = getattr(isis, k)
                        subloggyOBJ = SubLoggy(k)
                        try:
                            func(**v)
                            logger.info('Process %s :: Success', k)
                            subloggyOBJ.setStatus('SUCCESS')
                            subloggyOBJ.setCommand(processOBJ.LogCommandline())
                            subloggyOBJ.setHelpLink(processOBJ.LogHelpLink())
                            loggyOBJ.AddProcess(subloggyOBJ.getSLprocess())

                            if os.path.isfile(outfile):
                                os.rename(outfile, infile)
                            status = 'success'

                        except ProcessError as e:
                            logger.error('Process %s :: Error', k)
                            logger.error(e)
                            status = 'error'
                            eSTR = 'Error Executing ' + k + \
                                ' Standard Error: ' + str(e)
                            RHerror.addError(
                                os.path.splitext(os.path.basename(jobFile))[0],
                                eSTR)
                            subloggyOBJ.setStatus('ERROR')
                            subloggyOBJ.setCommand(processOBJ.LogCommandline())
                            subloggyOBJ.setHelpLink(processOBJ.LogHelpLink())
                            subloggyOBJ.errorOut(eSTR)
                            loggyOBJ.AddProcess(subloggyOBJ.getSLprocess())

                else:

                    GDALcmd = ""
                    for process, v, in processOBJ.getProcess().items():
                        subloggyOBJ = SubLoggy(process)
                        GDALcmd += process
                        for key, value in v.items():
                            GDALcmd += ' ' + key + ' ' + value

                    if RHash.Format() == 'GeoTiff-BigTiff':
                        fileext = 'tif'
                    elif RHash.Format() == 'GeoJPEG-2000':
                        fileext = 'jp2'
                    elif RHash.Format() == 'JPEG':
                        fileext = 'jpg'
                    elif RHash.Format() == 'PNG':
                        fileext = 'png'
                    elif RHash.Format() == 'GIF':
                        fileext = 'gif'

                    logGDALcmd = GDALcmd + ' ' + basename + '.input.cub ' + RHash.getMAPname() + \
                        '.' + fileext
                    finalfile = workarea + RHash.getMAPname() + '.' + fileext
                    #                    finalfile = infile.replace('.input.cub', '_final.' + fileext)
                    GDALcmd += ' ' + infile + ' ' + finalfile
                    print GDALcmd
                    try:
                        result = subprocess.call(GDALcmd, shell=True)
                        logger.info('Process GDAL translate :: Success')
                        status = 'success'
                        subloggyOBJ.setStatus('SUCCESS')
                        subloggyOBJ.setCommand(logGDALcmd)
                        subloggyOBJ.setHelpLink(
                            'www.gdal.org/gdal_translate.html')
                        loggyOBJ.AddProcess(subloggyOBJ.getSLprocess())
                        os.remove(infile)
                    except OSError, e:
                        logger.error('Process GDAL translate :: Error')
                        logger.error(e.stderr)
                        status = 'error'
                        RHerror.addError(
                            os.path.splitext(os.path.basename(jobFile))[0],
                            'Process GDAL translate :: Error')
                        subloggyOBJ.setStatus('ERROR')
                        subloggyOBJ.setCommand(logGDALcmd)
                        subloggyOBJ.setHelpLink(
                            'http://www.gdal.org/gdal_translate.html')
                        subloggyOBJ.errorOut(e.stderr)
                        loggyOBJ.AddProcess(subloggyOBJ.getSLprocess())

        if status == 'success':
            if RHash.Format() == 'ISIS3':
                finalfile = workarea + RHash.getMAPname() + '.cub'
                #                finalfile = infile.replace('.input.cub', '_final.cub')
                shutil.move(infile, finalfile)
            if RHash.getStatus() != 'ERROR':
                RHash.Status('SUCCESS')

            try:
                RQ_zip.QueueAdd(finalfile)
                logger.info('File Added to ZIP Queue')
            except:
                logger.error('File NOT Added to ZIP Queue')

            try:
                RQ_loggy.QueueAdd(loggyOBJ.Loggy2json())
                logger.info('JSON Added to Loggy Queue')
            except:
                logger.error('JSON NOT Added to Loggy Queue')

            RQ_work.QueueRemove(jobFile)
        elif status == 'error':
            RHash.Status('ERROR')
            if os.path.isfile(infile):
                os.remove(infile)

        if RQ_file.QueueSize() == 0 and RQ_work.QueueSize() == 0:
            try:
                RQ_final.QueueAdd(Key)
                logger.info('Key %s Added to Final Queue: Success', Key)
                logger.info('Job Complete')
            except:
                logger.error('Key NOT Added to Final Queue')
        else:
            logger.warning('Queues Not Empty: filequeue = %s  work queue = %s',
                           str(RQ_file.QueueSize()), str(RQ_work.QueueSize()))
Esempio n. 4
0
def main():

    #    pdb.set_trace()

    args = Args()
    args.parse_args()

    RQ = RedisQueue('ChecksumUpdate_Queue')

    archiveDICT = {
        'cassiniISS': '/pds_san/PDS_Archive/Cassini/ISS',
        'mroCTX': '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/CTX',
        'mroHIRISE': '/pds_san/PDS_Archive/Mars_Reconnaissance_Orbiter/HiRISE',
        'LROLRC_EDR':
        '/pds_san/PDS_Archive/Lunar_Reconnaissance_Orbiter/LROC/EDR/'
    }

    archiveID = {
        'cassiniISS': 'cassini_iss_edr',
        'mroCTX': 16,
        'mroHIRISE_EDR': '124',
        'LROLRC_EDR': 74
    }

    # ********* Set up logging *************
    logger = logging.getLogger('ChecksumUpdate_Queueing.' + args.archive)
    logger.setLevel(logging.INFO)
    logFileHandle = logging.FileHandler('/usgs/cdev/PDS/logs/DI.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s, %(message)s')
    logFileHandle.setFormatter(formatter)
    logger.addHandler(logFileHandle)

    logger.info('Starting %s Checksum update Queueing', args.archive)
    if args.volume:
        logger.info('Queueing %s Volume', args.volume)

    try:
        # Throws away engine information
        session, _ = db_connect(pds_db)
        logger.info('DataBase Connecton: Success')
    except:
        logger.error('DataBase Connection: Error')
        return 1

    if args.volume:
        volstr = '%' + args.volume + '%'
        QueryOBJ = session.query(Files).filter(
            Files.archiveid == archiveID[args.archive],
            Files.filename.like(volstr))
    else:
        QueryOBJ = session.query(Files).filter(
            Files.archiveid == archiveID[args.archive])
    addcount = 0
    for element in QueryOBJ:
        try:
            RQ.QueueAdd(element.filename)
            addcount = addcount + 1
        except:
            logger.error('File %s Not Added to DI_ReadyQueue',
                         element.filename)

    logger.info('Files Added to Queue %s', addcount)

    logger.info('DI Queueing Complete')