Esempio n. 1
0
def process_submitted_files():
    version = utils.get_tesseract_version()
    oldest_file_path = get_oldest_file(BATCHSUBMITED)
    logger.info(['I am inside processsubmitted files', oldest_file_path])
    starttime = time.strftime("%c")
    modifiedtime = time.strftime("%c")
    filepath = oldest_file_path
    filealtpath = os.path.join(BATCHINPROGRESS, os.path.basename(filepath))
    filefinalpath = os.path.join(BATCHPROCESSED, os.path.basename(filealtpath))
    logger.info([filepath])
    logger.info([filealtpath])
    jsonobj = json.load(open(filepath, 'r'))
    logger.info([jsonobj])
    logger.info([filepath, 'Constructing Header'])
    jsonobj['header'] = {}
    logger.info([filealtpath, 'adding start and modified time to header'])
    jsonobj['header']['start time'] = starttime
    jsonobj['header']['modified time'] = modifiedtime
    logger.info([filealtpath, 'adding status to in progress'])
    jsonobj['header']['status'] = "in progress"
    logger.info([filealtpath, 'adding total count of subjects'])
    jsonobj['header']['total'] = len(jsonobj['subjects'])
    logger.info([filealtpath, 'adding complete flag and setting it to 0'])
    jsonobj['header']['complete'] = 0
    logger.info([filealtpath, 'adding tesseract version'])
    jsonobj['header']['OCR engine'] = version
    json.dump(jsonobj, open(filealtpath, "w"))
    logger.info([filepath, ' Removing from batchsubmited folder '])
    os.remove(filepath)
    process_files(filealtpath)
Esempio n. 2
0
def cron_jobs():
    """
    logic for doing the cron job of copying over the file
    from batchsubmited to batchprocessed. Later, building
    url for each identifier in json file and doing a get
    request to single file ocr url, saving the ocr returned
    and dumping back the json file
    """
    version = utils.get_tesseract_version()
    if isBatchInProgress(process_path):
        #logger.info(['Batch in progress'])
        return
    '''inprogress_file_path = get_oldest_file(BATCHINPROGRESS)
    logger.info([inprogress_file_path, 'Inprogress file path'])
    oldest_file_path = get_oldest_file(BATCHSUBMITED)
    logger.info([oldest_file_path, 'Oldest File Path'])'''
    #if oldest_file_path is None:
        #logger.info(['no files found for batch processing'])            
     #   return    
    try:
       inprogress_file_path = get_oldest_file(BATCHINPROGRESS)
       logger.info([inprogress_file_path, 'Inprogress file path'])
       oldest_file_path = get_oldest_file(BATCHSUBMITED)
       logger.info([oldest_file_path, 'Oldest File Path'])
       if inprogress_file_path is not None:
            logger.info('calling process_files')
            process_files(inprogress_file_path)
       elif oldest_file_path is not None:
           logger.info('calling process_submitted_files')
           logger.info('about to call process_submitted_files')
           process_submitted_files()
    except Exception as cronexcept:
        logger.info([filealtpath, 'Exception: {0}'.format(cronexcept)])
        logger.info([filealtpath, traceback.format_exc()])