def step(self): ''' This class checks the output from a LIMB or OCR process to see if it matches certain criteria Require params process_title from command line Other values taken from config.ini In the case of errors a message will be returned and sent to the previous step. ''' error = None try: self.getVariables() tools.ensureDirsExist(self.bw_pdf_input_dir, self.color_pdf_input_dir,self.input_files_dir) # Check if color pdf is ok if not limb_tools.pageCountMatches(self.color_pdf_input_dir,self.input_files_dir,self.valid_exts): raise DataError('PDF page count does not match input picture count in "{0}"!'.format(self.color_pdf_input_dir)) # Check if bw pdf is ok if not limb_tools.pageCountMatches(self.bw_pdf_input_dir,self.preprocessed_input_files,self.valid_exts): raise DataError('PDF page count does not match input picture count in "{0}"!'.format(self.bw_pdf_input_dir)) except IOError as e: error = "IOError - {0}".format(e.strerror) except DataError as e: error = "Validation error - {0}.".format(e.strerror) return error
def ocrIsReady(self): ''' Check to see if OCR is finished return boolean ''' try: # raises error if one of our directories is missing tools.ensureDirsExist(self.pdf_input_dir, self.input_files) except IOError as e: msg = ('One of the output folder from OCR is not yet created.' ' Waiting for OCR to be ready. Error: {0}') msg = msg.format(e.strerror) self.debug_message(msg) return False # legr: we can use limb_tools generally - they are not Limb specific # we should rename them someday pdf_ok = limb_tools.pageCountMatches(self.pdf_input_dir, self.input_files, self.valid_exts) if pdf_ok: return True return False