def deleteWorkingFolders(self):
     '''
     Delete tempoary folders.
     '''
     fs.clear_folder(self.temp_folder, also_folder=True)
     if self.settings['output_pdf']:
         fs.clear_folder(self.temp_pdf_folder, also_folder=True)
Ejemplo n.º 2
0
 def step(self):
     '''
     This script's role is to wait until
     ocr processing is complete before finishing.
     In the event of a timeout, it reports back to 
     previous step before exiting.
     '''
     error = None
     try:
         #===================================================================
         # Get and set variables
         #===================================================================
         self.getVariables()
         #===================================================================
         # Delete existing bw-pdf file
         #===================================================================
         fs.clear_folder(self.goobi_pdf)
         #===================================================================
         # Wait for PDF-file to be ready on OCR-server
         #===================================================================
         error = self.waitForOcr()
     except IOError as e:
         # if we get an IO error we need to crash
         error = ('Error reading from directory {0}')
         error = error.format(e.strerror)
         return error
     except ValueError as e:
         # caused by conversion of non-numeric strings in config to nums
         error = "Invalid config data supplied, error: {0}"
         error = error.format(e.strerror)
         return error
     # if we've gotten this far, we've timed out and need to go back to the previous step
     return error
 def step(self):
     error = None
     try:
         #===================================================================
         # Get and set variables
         #===================================================================
         self.getVariables()
         #===================================================================
         # Create folder for temporary pdf-files
         #===================================================================
         fs.create_folder(self.temp_folder)
         #===================================================================
         # Delete previously created color pdf
         #===================================================================
         fs.clear_folder(self.pdf_color_folder_path)
         #===================================================================
         # Convert input images to one pdf
         #===================================================================
         msg = ('Creating PDF-file from images in "{0}". Outputting to temp '
                'folder "{1}" and creates one PDF with path "{2}". Resize '
                'PDF to {3}% and compress with quality {4}%')
         msg = msg.format(self.input_folder,self.temp_folder,
                          self.color_pdf_path,self.resize,self.quality)
         self.debug_message(msg)
         convert.createPdfFromFolder(src         = self.input_folder, 
                                     file_dest   = self.color_pdf_path, 
                                     temp_folder = self.temp_folder, 
                                     quality     = self.quality, 
                                     resize_pct  = self.resize, 
                                     valid_exts  = self.valid_exts)
     except image_tools.ConvertError as e:
         error = str(e)
     return error
 def addFrontispiecesToPdfs(self):
     # Create temp folder for temp pdf-files
     temp_folder = os.path.join(self.temp_root,self.process_title)
     tools.create_folder(temp_folder)
     self.addFrontispiecesToPdf(self.frontispieces_bw,self.pdf_bw_path,temp_folder)
     self.addFrontispiecesToPdf(self.frontispieces,self.pdf_color_path,temp_folder)
     # Delete temp_folder
     fs.clear_folder(temp_folder, also_folder=True)
 def processFiles(self):
     '''
     Process all the files
     '''
     file_paths = sorted(self.img_proc_info['images'].keys())
     for file_path in file_paths:
         info = self.img_proc_info['images'][file_path]
         proc_time_stat = self.processFile(file_path,info)
         self.add_to_avg_time_stat(proc_time_stat)
         fs.clear_folder(self.temp_folder)
     if self.settings['has_binding'] and not self.settings['remove_binding']:
         for b in self.bindings:
             file_name,_ = os.path.splitext(os.path.basename(b.rstrip(os.sep)))
             b_pdf_dest = os.path.join(self.temp_pdf_folder,file_name+'.pdf')
             if self.settings['output_images']: shutil.copy2(b,self.output_image_location)
             if self.settings['output_pdf']: image_tools.compressFile(b,b_pdf_dest,resize=50,quality=33)
 def addBindingsToPdf(self):
     #=======================================================================
     # Get density for bw-pdf (i.e. DPI/PixelsPerInch)
     #=======================================================================
     density = pdf_tools.getDensity(src=self.pdf_bw_path,layer=0)
     #=======================================================================
     # Create temp folder for temp pdf-files
     #=======================================================================
     temp_folder = os.path.join(self.temp_root,self.process_title)
     tools.create_folder(temp_folder)
     #=======================================================================
     # Get path for first and last image
     #=======================================================================
     images = fs.getFilesInFolderWithExts(self.img_master_path,
                                          self.valid_exts,
                                          absolute=True)
     #=======================================================================
     # Create PDF of bindings (first and last image in master image folder)
     #=======================================================================
     front_image_path = images[0]
     end_image_path = images[-1]
     front_pdf_path = os.path.join(temp_folder,'front.pdf')
     end_pdf_path = os.path.join(temp_folder,'end.pdf') 
     image_tools.compressFile(input_file     = front_image_path, 
                              output_file    = front_pdf_path,
                              quality        = self.quality,
                              resize         = self.resize,
                              density        = density)
     image_tools.compressFile(input_file     = end_image_path, 
                              output_file    = end_pdf_path,
                              quality        = self.quality,
                              resize         = self.resize,
                              density        = density)
     #=======================================================================
     # Add front and back-binding to pdf
     #=======================================================================
     pdf_list = [front_pdf_path,self.pdf_bw_path,end_pdf_path]
     temp_dest = os.path.join(temp_folder,self.process_title+'.pdf')
     pdf_tools.joinPdfFiles(pdf_list, temp_dest)
     #=======================================================================
     # Move new pdf from temp to bw-pdf location (overwrite)
     #=======================================================================
     shutil.move(temp_dest, self.pdf_bw_path)
     #=======================================================================
     # Delete temp_folder
     #=======================================================================
     fs.clear_folder(temp_folder, also_folder=True)
 def getCropCoordinates(self):
     image_paths = sorted(self.img_proc_info['images'].keys())
     debug_pivot = self.settings['debug_pivot']
     if self.debug: self.logger.debug('Get crop coordinates')
     for image_path in image_paths:
         w = self.img_proc_info['images'][image_path]['image_width']
         h = self.img_proc_info['images'][image_path]['image_height']
         time_stat = {}
         t = time.time()
         if self.settings['bw_for_innercrop']:
             threshold = self.settings['innercrop_bw_src_threshold']
             file_name,_ = os.path.splitext(os.path.basename(image_path))
             dest = os.path.join(self.temp_folder,file_name+'_bw_for_innercrop.tif')
             src = image_tools.convertToBw(image_path,dest,threshold=threshold)
         else:
             src = image_path
         time_stat['BW to get crop coordinates'] = time.time()-t
         t = time.time()
         fuzzval = self.settings['innercrop_fuzzval']
         mode = self.settings['innercrop_mode']
         #===================================================================
         # TODO: Add an try-except here. If non-valid output, raise error
         # except error and set crop to False (e.g. use mean/avg crops later)
         #===================================================================
         _,coordinates = image_tools.innercrop(src,self.temp_folder,w=w,h=h,
                                                innercrop_path=self.innercrop_exe_path,
                                                mode=mode,fuzzval=fuzzval)
         self.img_proc_info['images'][image_path]['crop_coordinates'] = coordinates
         time_stat['Get crop coordinates'] = time.time()-t
         fs.clear_folder(self.temp_folder)
         self.add_to_avg_time_stat(time_stat)
         if self.debug:
             count = self.img_proc_info['avg_time_stat']['Get crop coordinates'][1]
             avg = self.img_proc_info['avg_time_stat']['Get crop coordinates'][2]
             if (count%debug_pivot) == 0: # log for every 10 processed iamges
                 left = len(image_paths)-count
                 time_used = get_delta_time(count*avg)
                 time_left = get_delta_time(left*avg)
                 msg = ('\t{0} images cropped, {1} images left, '
                        '{2} time elapsed, {3} est. time left.')
                 self.logger.debug(msg.format(count,left,time_used,time_left))
def createPdfFromFolder(src, file_dest,temp_folder,
                        quality=50,resize_pct=50,valid_exts=['jpg','tif']):
    '''
    Use ImageMagick to create one pdf from all the images in a folder and 
    output to a given destination.
    
    Create a pdf of each image and place in temp folder. Merge output pdf-files
    to pdf-dest and remove temp folder.
    
    '''
    image_paths = fs.getFilesInFolderWithExts(src, valid_exts)
    for image in image_paths:
        # Handle spaces in filenames
        image = '"' + image + '"'
        input_path = os.path.join(src,image)
        file_name,_ = os.path.splitext(image)
        output_file_name = file_name+'.pdf'
        output_path = os.path.join(temp_folder,output_file_name)
        image_tools.compressFile(input_path, output_path, quality, resize_pct)
    pdf_misc.mergePdfFilesInFolder(temp_folder,file_dest)
    fs.clear_folder(temp_folder,also_folder=True)
 def step(self):
     error = None
     try:
         #===================================================================
         # Get and set variables and settings for preprocessing
         # self.settings contains settigs for preprocessing script
         #===================================================================
         self.getVariables()
         #===================================================================
         # Remove previously preprocessed images
         #===================================================================
         fs.clear_folder(self.img_pre_processed_path)
         #===================================================================
         # Preprocess images
         #===================================================================
         ip = image_preprocessor.ImagePreprocessor(self.img_master_path,
                                                   self.settings,
                                                   self.glogger,
                                                   self.debug)
         ip.processFolder()
     except image_tools.ConvertError as e:
         error = str(e)
     return error
 def step(self):
     error = None
     try:
         self.get_variables()
         # Check if we have a METS file (meta.xml)
         self.check_paths()
         # Clean up the METS file (meta.xml)
         reset.reset_mets_file(self.command_line.process_path)
         # Clear files from Limb and Goobi (if any)
         fs.clear_folder(self.goobi_altos)
         fs.clear_folder(self.goobi_toc)
         fs.clear_folder(self.goobi_ojs)
         fs.clear_folder(self.goobi_pdf)
         fs.clear_folder(self.splitted_pdfs)
         fs.clear_folder(self.thumbnails)
     except ValueError as e:
         error = e
     except IOError as e:
         error = e.strerror
     return error