def waitForPreprocessedImages(self):
     retry = 0
     while retry <= self.pp_retry_num:
         # ==================================================================
         # Get current number of preprocessed images
         # ==================================================================
         pp_files = fs.getFilesInFolderWithExts(self.source_folder, self.valid_exts)
         # ==================================================================
         # Exit loop when preprocessed images are ready
         # ==================================================================
         if len(pp_files) == self.expected_image_count:
             # ==============================================================
             # Wait 30 sec to make sure images are completely copied
             # ==============================================================
             time.sleep(30)
             return True
         # ==================================================================
         # This shouldn't happen, but we have seen pdf's with duplicate pages, so better check
         # ==================================================================
         if len(pp_files) > self.expected_image_count:
             if len(pp_files) > 0:
                 self.debug_message("Der er flere preprocesserede billeder ({}) end scannede billeder ({})"
                                    .format(pp_files, self.expected_image_count))
                 return False
         # ==================================================================
         # Wait "self.pp_retry_wait" seconds
         # ==================================================================
         retry += 1
         self.debug_message("Preprocesserede billeder ikke klar, venter {} sek".format(self.pp_retry_wait))
         self.debug_message("Retry {} of {}".format(retry, self.pp_retry_num))
         time.sleep(self.pp_retry_wait)
     return False
 def addBindingsToPdf(self):
     #=======================================================================
     # Get density for bw-pdf (i.e. DPI/PixelsPerInch)
     #=======================================================================
     density = pdf_tools.getDensity(src=self.pdf_bw_path,layer=0)
     #=======================================================================
     # Create temp folder for temp pdf-files
     #=======================================================================
     temp_folder = os.path.join(self.temp_root,self.process_title)
     tools.create_folder(temp_folder)
     #=======================================================================
     # Get path for first and last image
     #=======================================================================
     images = fs.getFilesInFolderWithExts(self.img_master_path,
                                          self.valid_exts,
                                          absolute=True)
     #=======================================================================
     # Create PDF of bindings (first and last image in master image folder)
     #=======================================================================
     front_image_path = images[0]
     end_image_path = images[-1]
     front_pdf_path = os.path.join(temp_folder,'front.pdf')
     end_pdf_path = os.path.join(temp_folder,'end.pdf') 
     image_tools.compressFile(input_file     = front_image_path, 
                              output_file    = front_pdf_path,
                              quality        = self.quality,
                              resize         = self.resize,
                              density        = density)
     image_tools.compressFile(input_file     = end_image_path, 
                              output_file    = end_pdf_path,
                              quality        = self.quality,
                              resize         = self.resize,
                              density        = density)
     #=======================================================================
     # Add front and back-binding to pdf
     #=======================================================================
     pdf_list = [front_pdf_path,self.pdf_bw_path,end_pdf_path]
     temp_dest = os.path.join(temp_folder,self.process_title+'.pdf')
     pdf_tools.joinPdfFiles(pdf_list, temp_dest)
     #=======================================================================
     # Move new pdf from temp to bw-pdf location (overwrite)
     #=======================================================================
     shutil.move(temp_dest, self.pdf_bw_path)
     #=======================================================================
     # Delete temp_folder
     #=======================================================================
     fs.clear_folder(temp_folder, also_folder=True)
def createPdfFromFolder(src, file_dest,temp_folder,
                        quality=50,resize_pct=50,valid_exts=['jpg','tif']):
    '''
    Use ImageMagick to create one pdf from all the images in a folder and 
    output to a given destination.
    
    Create a pdf of each image and place in temp folder. Merge output pdf-files
    to pdf-dest and remove temp folder.
    
    '''
    image_paths = fs.getFilesInFolderWithExts(src, valid_exts)
    for image in image_paths:
        # Handle spaces in filenames
        image = '"' + image + '"'
        input_path = os.path.join(src,image)
        file_name,_ = os.path.splitext(image)
        output_file_name = file_name+'.pdf'
        output_path = os.path.join(temp_folder,output_file_name)
        image_tools.compressFile(input_path, output_path, quality, resize_pct)
    pdf_misc.mergePdfFilesInFolder(temp_folder,file_dest)
    fs.clear_folder(temp_folder,also_folder=True)
 def step(self):
     error = None
     try:
         self.getVariables()
         msg = 'Copying files from {0} to {1} via transit {2}.'
         msg = msg.format(self.source_folder, self.hotfolder_dir, self.transit_dir)
         self.debug_message(msg)
         # ==================================================================
         # Wait for preprocessed images to be ready
         # Returns false if it times out 
         # ==================================================================
         if not self.waitForPreprocessedImages():
             pp_files = fs.getFilesInFolderWithExts(self.source_folder, self.valid_exts)
             raise Exception('Timed out or count error while waiting for pre-processing of '
                             'images. Current number of processed images: '
                             '{0}. Expected amount: {1}'.format(pp_files, self.expected_image_count))
         # ==================================================================
         # Copy files to OCR-server
         # ==================================================================
         self.debug_message("Start copy of preprocessed images to OCR-server")
         tools.copy_files(source          = self.source_folder,
                          dest            = self.hotfolder_dir,
                          transit         = self.transit_dir,
                          delete_original = False,
                          wait_interval   = self.retry_wait,
                          max_retries     = self.retry_num,
                          logger          = self.glogger,
                          valid_exts      = self.valid_exts)
         self.debug_message("Finished copy of preprocessed images to OCR-server")
     except errors.TransferError as e:
         error = e.strerror
     except errors.TransferTimedOut as e:
         error = e.strerror
     except Exception as e:
         error = str(e)
     return error
    def getVariables(self):
        """
        Get all required vars from command line + config
        and confirm their existence.
        """
        process_title = self.command_line.process_title
        process_path = self.command_line.process_path
        # ======================================================================
        # Path to folder with master image files
        # ======================================================================
        mi_img = self.getConfigItem('img_master_path',
                                    section=self.folder_structure_section)
        self.master_folder = os.path.join(process_path, mi_img)
        # ======================================================================
        # Path to folder with preprocessed files
        # ======================================================================
        pp_img = self.getConfigItem('img_pre_processed_path',
                                    section=self.folder_structure_section)
        self.source_folder = os.path.join(process_path, pp_img)
        # ======================================================================
        # legr: Get the correct OCR server for the process - antikva or fraktur
        # Break if argument somehow is missing or have an invalid name
        # ======================================================================
        try:
            ocr_workflow_type = self.getSetting('ocr_workflow_type').lower()
        except KeyError:
            self.error_message('{0} er ikke givet med som variabel til scriptet.'.format('ocr_workflow_type'))
            
        if ocr_workflow_type == 'antikva':
            # legr: currently antikva on ocr-01
            ocr_transitfolder = self.getSetting('ocr_antikva_transit')
            ocr_hotfolder = self.getSetting('ocr_antikva_hotfolder')
        elif ocr_workflow_type == 'fraktur':
            # legr: currently fraktur on ocr-02
            ocr_transitfolder = self.getSetting('ocr_fraktur_transit')
            ocr_hotfolder = self.getSetting('ocr_fraktur_hotfolder')
        else:
            err = ('Variablen "{0}" fra kaldet af "{1}" skal enten vaere '
                   '"fraktur" eller "antikva", men er pt. "{2}".')
            err = err.format('ocr_workflow_type', self.name, ocr_workflow_type)
            self.error_message(err)

        self.transit_dir = os.path.join(ocr_transitfolder, process_title)
        self.hotfolder_dir = os.path.join(ocr_hotfolder, process_title)
        # ======================================================================
        # Set retry wait time and retry count for copying files
        # ======================================================================
        self.retry_wait = int(self.getConfigItem('retry_wait'))
        self.retry_num = int(self.getConfigItem('retry_num'))
        # ======================================================================
        # Set valid extensions for image files to check as preprocessed
        # ======================================================================
        self.valid_exts = self.getConfigItem('valid_file_exts', None, self.valid_exts_section).split(';')
        # ======================================================================
        # Set variables for waiting for preprocessed images to be ready
        # ======================================================================
        self.pp_retry_wait = int(self.getConfigItem('preprocess_retry_wait'))
        self.pp_retry_num = int(self.getConfigItem('preprocess_retry_num'))
        img_list = fs.getFilesInFolderWithExts(self.master_folder, self.valid_exts)
        # Source images miunus first and last image
        self.expected_image_count = len(img_list)-2