def deleteWorkingFolders(self): ''' Delete tempoary folders. ''' fs.clear_folder(self.temp_folder, also_folder=True) if self.settings['output_pdf']: fs.clear_folder(self.temp_pdf_folder, also_folder=True)
def step(self): ''' This script's role is to wait until ocr processing is complete before finishing. In the event of a timeout, it reports back to previous step before exiting. ''' error = None try: #=================================================================== # Get and set variables #=================================================================== self.getVariables() #=================================================================== # Delete existing bw-pdf file #=================================================================== fs.clear_folder(self.goobi_pdf) #=================================================================== # Wait for PDF-file to be ready on OCR-server #=================================================================== error = self.waitForOcr() except IOError as e: # if we get an IO error we need to crash error = ('Error reading from directory {0}') error = error.format(e.strerror) return error except ValueError as e: # caused by conversion of non-numeric strings in config to nums error = "Invalid config data supplied, error: {0}" error = error.format(e.strerror) return error # if we've gotten this far, we've timed out and need to go back to the previous step return error
def step(self): error = None try: #=================================================================== # Get and set variables #=================================================================== self.getVariables() #=================================================================== # Create folder for temporary pdf-files #=================================================================== fs.create_folder(self.temp_folder) #=================================================================== # Delete previously created color pdf #=================================================================== fs.clear_folder(self.pdf_color_folder_path) #=================================================================== # Convert input images to one pdf #=================================================================== msg = ('Creating PDF-file from images in "{0}". Outputting to temp ' 'folder "{1}" and creates one PDF with path "{2}". Resize ' 'PDF to {3}% and compress with quality {4}%') msg = msg.format(self.input_folder,self.temp_folder, self.color_pdf_path,self.resize,self.quality) self.debug_message(msg) convert.createPdfFromFolder(src = self.input_folder, file_dest = self.color_pdf_path, temp_folder = self.temp_folder, quality = self.quality, resize_pct = self.resize, valid_exts = self.valid_exts) except image_tools.ConvertError as e: error = str(e) return error
def addFrontispiecesToPdfs(self): # Create temp folder for temp pdf-files temp_folder = os.path.join(self.temp_root,self.process_title) tools.create_folder(temp_folder) self.addFrontispiecesToPdf(self.frontispieces_bw,self.pdf_bw_path,temp_folder) self.addFrontispiecesToPdf(self.frontispieces,self.pdf_color_path,temp_folder) # Delete temp_folder fs.clear_folder(temp_folder, also_folder=True)
def processFiles(self): ''' Process all the files ''' file_paths = sorted(self.img_proc_info['images'].keys()) for file_path in file_paths: info = self.img_proc_info['images'][file_path] proc_time_stat = self.processFile(file_path,info) self.add_to_avg_time_stat(proc_time_stat) fs.clear_folder(self.temp_folder) if self.settings['has_binding'] and not self.settings['remove_binding']: for b in self.bindings: file_name,_ = os.path.splitext(os.path.basename(b.rstrip(os.sep))) b_pdf_dest = os.path.join(self.temp_pdf_folder,file_name+'.pdf') if self.settings['output_images']: shutil.copy2(b,self.output_image_location) if self.settings['output_pdf']: image_tools.compressFile(b,b_pdf_dest,resize=50,quality=33)
def addBindingsToPdf(self): #======================================================================= # Get density for bw-pdf (i.e. DPI/PixelsPerInch) #======================================================================= density = pdf_tools.getDensity(src=self.pdf_bw_path,layer=0) #======================================================================= # Create temp folder for temp pdf-files #======================================================================= temp_folder = os.path.join(self.temp_root,self.process_title) tools.create_folder(temp_folder) #======================================================================= # Get path for first and last image #======================================================================= images = fs.getFilesInFolderWithExts(self.img_master_path, self.valid_exts, absolute=True) #======================================================================= # Create PDF of bindings (first and last image in master image folder) #======================================================================= front_image_path = images[0] end_image_path = images[-1] front_pdf_path = os.path.join(temp_folder,'front.pdf') end_pdf_path = os.path.join(temp_folder,'end.pdf') image_tools.compressFile(input_file = front_image_path, output_file = front_pdf_path, quality = self.quality, resize = self.resize, density = density) image_tools.compressFile(input_file = end_image_path, output_file = end_pdf_path, quality = self.quality, resize = self.resize, density = density) #======================================================================= # Add front and back-binding to pdf #======================================================================= pdf_list = [front_pdf_path,self.pdf_bw_path,end_pdf_path] temp_dest = os.path.join(temp_folder,self.process_title+'.pdf') pdf_tools.joinPdfFiles(pdf_list, temp_dest) #======================================================================= # Move new pdf from temp to bw-pdf location (overwrite) #======================================================================= shutil.move(temp_dest, self.pdf_bw_path) #======================================================================= # Delete temp_folder #======================================================================= fs.clear_folder(temp_folder, also_folder=True)
def getCropCoordinates(self): image_paths = sorted(self.img_proc_info['images'].keys()) debug_pivot = self.settings['debug_pivot'] if self.debug: self.logger.debug('Get crop coordinates') for image_path in image_paths: w = self.img_proc_info['images'][image_path]['image_width'] h = self.img_proc_info['images'][image_path]['image_height'] time_stat = {} t = time.time() if self.settings['bw_for_innercrop']: threshold = self.settings['innercrop_bw_src_threshold'] file_name,_ = os.path.splitext(os.path.basename(image_path)) dest = os.path.join(self.temp_folder,file_name+'_bw_for_innercrop.tif') src = image_tools.convertToBw(image_path,dest,threshold=threshold) else: src = image_path time_stat['BW to get crop coordinates'] = time.time()-t t = time.time() fuzzval = self.settings['innercrop_fuzzval'] mode = self.settings['innercrop_mode'] #=================================================================== # TODO: Add an try-except here. If non-valid output, raise error # except error and set crop to False (e.g. use mean/avg crops later) #=================================================================== _,coordinates = image_tools.innercrop(src,self.temp_folder,w=w,h=h, innercrop_path=self.innercrop_exe_path, mode=mode,fuzzval=fuzzval) self.img_proc_info['images'][image_path]['crop_coordinates'] = coordinates time_stat['Get crop coordinates'] = time.time()-t fs.clear_folder(self.temp_folder) self.add_to_avg_time_stat(time_stat) if self.debug: count = self.img_proc_info['avg_time_stat']['Get crop coordinates'][1] avg = self.img_proc_info['avg_time_stat']['Get crop coordinates'][2] if (count%debug_pivot) == 0: # log for every 10 processed iamges left = len(image_paths)-count time_used = get_delta_time(count*avg) time_left = get_delta_time(left*avg) msg = ('\t{0} images cropped, {1} images left, ' '{2} time elapsed, {3} est. time left.') self.logger.debug(msg.format(count,left,time_used,time_left))
def createPdfFromFolder(src, file_dest,temp_folder, quality=50,resize_pct=50,valid_exts=['jpg','tif']): ''' Use ImageMagick to create one pdf from all the images in a folder and output to a given destination. Create a pdf of each image and place in temp folder. Merge output pdf-files to pdf-dest and remove temp folder. ''' image_paths = fs.getFilesInFolderWithExts(src, valid_exts) for image in image_paths: # Handle spaces in filenames image = '"' + image + '"' input_path = os.path.join(src,image) file_name,_ = os.path.splitext(image) output_file_name = file_name+'.pdf' output_path = os.path.join(temp_folder,output_file_name) image_tools.compressFile(input_path, output_path, quality, resize_pct) pdf_misc.mergePdfFilesInFolder(temp_folder,file_dest) fs.clear_folder(temp_folder,also_folder=True)
def step(self): error = None try: #=================================================================== # Get and set variables and settings for preprocessing # self.settings contains settigs for preprocessing script #=================================================================== self.getVariables() #=================================================================== # Remove previously preprocessed images #=================================================================== fs.clear_folder(self.img_pre_processed_path) #=================================================================== # Preprocess images #=================================================================== ip = image_preprocessor.ImagePreprocessor(self.img_master_path, self.settings, self.glogger, self.debug) ip.processFolder() except image_tools.ConvertError as e: error = str(e) return error
def step(self): error = None try: self.get_variables() # Check if we have a METS file (meta.xml) self.check_paths() # Clean up the METS file (meta.xml) reset.reset_mets_file(self.command_line.process_path) # Clear files from Limb and Goobi (if any) fs.clear_folder(self.goobi_altos) fs.clear_folder(self.goobi_toc) fs.clear_folder(self.goobi_ojs) fs.clear_folder(self.goobi_pdf) fs.clear_folder(self.splitted_pdfs) fs.clear_folder(self.thumbnails) except ValueError as e: error = e except IOError as e: error = e.strerror return error