def run(self):
        if not self.job.image_path:
            stderr = "No image path could be determined"
            return self.results(stdout=None, stderr=stderr, exitcode=1)
        if not os.path.isfile(self.job.image_path):
            stderr = "Could not find page image %s" % self.job.image_path
            return self.results(stdout=None, stderr=stderr, exitcode=1)

        # Create output parent directory if it doesn't exist
        if not os.path.isdir(self.output_parent_dir):
            mkdirs_exists_ok(self.output_parent_dir)

        cmd = ["tesseract", self.job.image_path, self.output_filename, "-l", self.job.font.name, self.cfg]
        proc = exec_cmd(cmd)

        if proc.exitcode != 0:
            return self.results(stdout=proc.stdout, stderr=proc.stderr, exitcode=proc.exitcode)

        # Rename hOCR file to XML
        if os.path.isfile(self.job.hocr_file) and not os.path.isfile(self.job.xml_file):
            logger.debug("Renaming %s to %s" % (self.job.hocr_file, self.job.xml_file))
            os.rename(self.job.hocr_file, self.job.xml_file)

        self.job.page_result.ocr_text_path = self.job.txt_file
        self.job.page_result.ocr_xml_path = self.job.xml_file
        return self.results(stdout=None, stderr=None, exitcode=0)
    def run(self):
        self.generate_input_doc_list()

        if not self.input_font_path:
            stderr = "No input font path could be determined"
            return self.results(stdout=None, stderr=stderr, exitcode=1)
        if not os.path.isfile(self.input_font_path):
            stderr = "Could not find input font path %s" % self.input_font_path
            return self.results(stdout=None, stderr=stderr, exitcode=1)

        # Create output parent directory if it doesn't exist
        if not os.path.isdir(self.output_path):
            mkdirs_exists_ok(self.output_path)

        cmd = [
            "java", self.java_max_heap,
            "-Done-jar.main.class=edu.berkeley.cs.nlp.ocular.main.Transcribe",
            "-jar", self.jar,
            "-outputPath", self.output_path,
            "-inputDocListPath", self.input_doc_list_path,
            "-inputFontPath", self.input_font_path,
            "-inputLmPath", self.input_lm_path,
            "-inputGsmPath", self.input_gsm_path,
            "-allowGlyphSubstitution", "true",
            "-skipAlreadyTranscribedDocs", 'true',
            "-emissionEngine", self.job.settings.ocular_emission_engine,
        ]
        if self.extra_command_parameters:
            cmd = cmd + self.extra_command_parameters
        proc = exec_cmd(cmd, realtime=True)

        if proc.exitcode != 0:
            #logger.info("OcularTranscribe STDOUT: %s", proc.stdout)
            return self.results(stdout=proc.stdout, stderr=proc.stderr, exitcode=proc.exitcode)

        # Loop over each of this job's pages and build transcribed output paths
        # These paths are added as results if the file is found
        for j in self.job.jobs:
            _image_basename = os.path.basename(j.image_path)
            _image_name = os.path.splitext(_image_basename)[0]
            _txt_name = "%s%s" % (_image_name, self.ocr_text_suffix)
            _alto_name = "%s.alto.xml" % _image_name
            _txt_path = os.path.join(self.transcribed_output_path, _txt_name)
            _alto_path = os.path.join(self.transcribed_output_path, _alto_name)
            if os.path.isfile(_txt_path):
                j.page_result.ocr_text_path = _txt_path
            if os.path.isfile(_alto_path):
                j.page_result.ocr_xml_path = _alto_path
        # Add extra transfers
        if os.path.isdir(self.transcription_dir):
            self.job.extra_transfers.append(self.transcription_dir)

        return self.results(stdout=None, stderr=None, exitcode=0)
    def run(self):
        self.generate_input_doc_list()

        if not self.input_font_path:
            stderr = "No input font path could be determined"
            return self.results(stdout=None, stderr=stderr, exitcode=1)
        if not os.path.isfile(self.input_font_path):
            stderr = "Could not find input font path %s" % self.input_font_path
            return self.results(stdout=None, stderr=stderr, exitcode=1)

        # Create output parent directory if it doesn't exist
        if not os.path.isdir(self.output_path):
            mkdirs_exists_ok(self.output_path)

        cmd = [
            "java", self.java_max_heap,
            "-Done-jar.main.class=edu.berkeley.cs.nlp.ocular.main.TrainFont",
            "-jar", self.jar,
            "-outputPath", self.output_path,
            "-inputDocListPath", self.input_doc_list_path,
            "-inputFontPath", self.input_font_path,
            "-inputLmPath", self.input_lm_path,
            "-inputGsmPath", self.input_gsm_path,
            #"-numDocs", str((len(self.images))),
            "-outputFontPath", self.output_font_path,
            "-outputLmPath", self.output_lm_path,
            "-outputGsmPath", self.output_gsm_path,
            "-continueFromLastCompleteIteration", "true",
            "-allowGlyphSubstitution", "true",
            "-updateLM", "true",
            "-updateGsm", "true",
            "-emissionEngine", self.job.settings.ocular_emission_engine,
        ]
        if self.extra_command_parameters:
            cmd = cmd + self.extra_command_parameters
        proc = exec_cmd(cmd, realtime=True)

        if proc.exitcode != 0:
            #logger.info("OcularFontTraining STDOUT: %s", proc.stdout)
            return self.results(stdout=proc.stdout, stderr=proc.stderr, exitcode=proc.exitcode)

        # Only set font_training_result on one page (job) since this is a per-work result
        if os.path.isfile(self.output_font_path):
            self.job.font_training_result.font_path = self.output_font_path
        if os.path.isfile(self.output_lm_path):
            self.job.font_training_result.language_model_path = self.output_lm_path
        if os.path.isfile(self.output_gsm_path):
            self.job.font_training_result.glyph_substitution_model_path = self.output_gsm_path

        return self.results(stdout=None, stderr=None, exitcode=0)
    def save(self, data, dirname, filename, overwrite=False):
        if not os.path.isdir(dirname):
            logger.debug("Creating payload directory %s" % dirname)
            mkdirs_exists_ok(dirname)
        if not overwrite and os.path.exists(filename):
            logger.error("payload file %s already exists" % filename)
            return None

        if overwrite:
            logger.debug("Overwriting payload file at %s" % filename)
        else:
            logger.debug("Saving payload to %s" % filename)

        with open(filename, 'w') as outfile:
            json.dump(data, outfile)
        return True