def process(self, pages, target_path): autopilot = self.config['autopilot'].get(bool) if not autopilot and not find_in_path('scantailor'): raise MissingDependencyException( "Could not find executable `scantailor` in" " $PATH. Please install the appropriate" " package(s)!") # Create temporary files/directories projectfile = Path(tempfile.mkstemp(suffix='.ScanTailor')[1]) out_dir = Path(tempfile.mkdtemp(prefix='st-out')) # Map input paths to their pages so we can more easily associate # the generated output files with their pages later on in_paths = {} for page in pages: fpath = page.get_latest_processed(image_only=True) if fpath is None: fpath = page.raw_image in_paths[unicode(fpath)] = page logger.info("Generating ScanTailor configuration") self._generate_configuration(sorted(in_paths.keys()), projectfile, out_dir) if not autopilot: logger.warn("If you are changing output settings (in the last " "step, you *have* to run the last step from the GUI. " "Due to a bug in ScanTailor, your settings would " "otherwise be ignored.") time.sleep(5) logger.info("Opening ScanTailor GUI for manual adjustment") subprocess.call([find_in_path('scantailor'), unicode(projectfile)]) # Check if the user already generated output files from the GUI if not sum(1 for x in out_dir.glob('*.tif')) == len(pages): logger.info("Generating output images from ScanTailor " "configuration.") self._generate_output(projectfile, out_dir, len(pages)) # Associate generated output files with our pages for fname in out_dir.glob('*.tif'): out_stem = fname.stem for in_path, page in in_paths.iteritems(): if Path(in_path).stem == out_stem: target_fname = target_path/fname.name shutil.copyfile(unicode(fname), unicode(target_fname)) page.processed_images[self.__name__] = target_fname break else: logger.warn("Could not find page for output file {0}" .format(fname)) # Remove temporary files/directories shutil.rmtree(unicode(out_dir)) projectfile.unlink()
def process(self, path): autopilot = self.config['autopilot'].get(bool) if not autopilot and not find_in_path('scantailor'): raise MissingDependencyException( "Could not find executable `scantailor` in" " $PATH. Please install the appropriate" " package(s)!") projectfile = path / "{0}.ScanTailor".format(path.name) img_dir = path / 'raw' out_dir = path / 'done' if not projectfile.exists(): self._generate_configuration(projectfile, img_dir, out_dir) if not autopilot: logger.info("Opening ScanTailor GUI for manual adjustment") subprocess.call(['scantailor', unicode(projectfile)]) logger.info("Generating output images from ScanTailor configuration.") self._generate_output(projectfile, out_dir)
def output(self, pages, target_path, metadata, table_of_contents): logger.info("Assembling PDF.") tmpdir = Path(tempfile.mkdtemp()) # NOTE: pdfbeads only finds *html files for the text layer in the # working directory, so we have to chdir() into it old_path = os.path.abspath(os.path.curdir) os.chdir(unicode(tmpdir)) images = [] for page in pages: fpath = page.get_latest_processed(image_only=True) if fpath is None: fpath = page.raw_image link_path = (tmpdir / fpath.name) link_path.symlink_to(fpath) if 'tesseract' in page.processed_images: ocr_path = page.processed_images['tesseract'] (tmpdir / ocr_path.name).symlink_to(ocr_path) images.append(link_path) # TODO: Use metadata to create a METAFILE for pdfbeads # TODO: Use table_of_contents to create a TOCFILE for pdfbeads # TODO: Use page.page_label to create a LSPEC for pdfbeads pdf_file = target_path / "book.pdf" cmd = [find_in_path("pdfbeads"), "-d"] cmd.extend([f.name for f in images]) cmd.extend(["-o", unicode(pdf_file)]) logger.debug("Running " + " ".join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) last_count = 0 while proc.poll() is None: current_count = sum(1 for x in tmpdir.glob('*.jbig2')) if current_count > last_count: last_count = current_count self.on_progressed.send(self, progress=float(current_count) / len(images)) time.sleep(.01) logger.debug("Output:\n{0}".format(proc.stdout.read())) os.chdir(old_path)
def output(self, pages, target_path, metadata, table_of_contents): logger.info("Assembling PDF.") tmpdir = Path(tempfile.mkdtemp()) # NOTE: pdfbeads only finds *html files for the text layer in the # working directory, so we have to chdir() into it old_path = os.path.abspath(os.path.curdir) os.chdir(unicode(tmpdir)) images = [] for page in pages: fpath = page.get_latest_processed(image_only=True) if fpath is None: fpath = page.raw_image link_path = (tmpdir/fpath.name) link_path.symlink_to(fpath) if 'tesseract' in page.processed_images: ocr_path = page.processed_images['tesseract'] (tmpdir/ocr_path.name).symlink_to(ocr_path) images.append(link_path) # TODO: Use metadata to create a METAFILE for pdfbeads # TODO: Use table_of_contents to create a TOCFILE for pdfbeads # TODO: Use page.page_label to create a LSPEC for pdfbeads pdf_file = target_path/"book.pdf" cmd = [find_in_path("pdfbeads"), "-d"] cmd.extend([f.name for f in images]) cmd.extend(["-o", unicode(pdf_file)]) logger.debug("Running " + " ".join(cmd)) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) last_count = 0 while proc.poll() is None: current_count = sum(1 for x in tmpdir.glob('*.jbig2')) if current_count > last_count: last_count = current_count self.on_progressed.send( self, progress=float(current_count)/len(images)) time.sleep(.01) logger.debug("Output:\n{0}".format(proc.stdout.read())) os.chdir(old_path)
def process(self, path): autopilot = (self.config['scantailor']['autopilot'] .get(bool) or self.config['autopilot'].get(bool)) if not autopilot and not find_in_path('scantailor'): raise MissingDependencyException( "Could not find executable `scantailor` in" " $PATH. Please install the appropriate" " package(s)!") projectfile = os.path.join(path, "{0}.ScanTailor".format( os.path.basename(path))) img_dir = os.path.join(path, 'raw') out_dir = os.path.join(path, 'done') if not os.path.exists(projectfile): self._generate_configuration(projectfile, img_dir, out_dir) if not autopilot: logger.info("Opening ScanTailor GUI for manual adjustment") subprocess.call(['scantailor', projectfile]) logger.info("Generating output images from ScanTailor configuration.") self._generate_output(projectfile, out_dir)
def _generate_output(self, projectfile, out_dir, num_pages): logger.debug("Generating output...") temp_dir = Path(tempfile.mkdtemp(prefix="spreads.")) split_config = self._split_configuration(projectfile, temp_dir) logger.debug("Launching those subprocesses!") processes = [subprocess.Popen([find_in_path('scantailor-cli'), '--start-filter=6', unicode(cfgfile), unicode(out_dir)]) for cfgfile in split_config] last_count = 0 while processes: recent_count = sum(1 for x in out_dir.glob('*.tif')) if recent_count > last_count: progress = 0.5 + (float(recent_count)/num_pages)/2 self.on_progressed.send(self, progress=progress) last_count = recent_count for p in processes[:]: if p.poll() is not None: processes.remove(p) time.sleep(.01) shutil.rmtree(unicode(temp_dir))
def process(self, path): autopilot = self.config['autopilot'].get(bool) if not autopilot and not find_in_path('scantailor'): raise MissingDependencyException( "Could not find executable `scantailor` in" " $PATH. Please install the appropriate" " package(s)!") projectfile = path / "{0}.ScanTailor".format(path.name) img_dir = path / 'raw' out_dir = path / 'done' if not projectfile.exists(): self._generate_configuration(projectfile, img_dir, out_dir) if not autopilot: logger.info("Opening ScanTailor GUI for manual adjustment") subprocess.call(['scantailor', unicode(projectfile)]) logger.info("Generating output images from ScanTailor configuration.") num_pages = sum(1 for x in img_dir.iterdir() if x.suffix.lower() in ('.jpeg', '.jpg')) self._generate_output(projectfile, out_dir, num_pages)
def process(self, pages, target_path): """ Run the most recent image of every page through ScanTailor. :param pages: Pages to be processed :type pages: list of :py:class:`spreads.workflow.Page` :param target_path: Base directory where rotated images are to be stored :type target_path: :py:class:`pathlib.Path` """ autopilot = self.config['autopilot'].get(bool) if not autopilot and not util.find_in_path('scantailor'): raise util.MissingDependencyException( "Could not find executable `scantailor` in" " $PATH. Please install the appropriate" " package(s)!") # Create temporary files/directories projectfile = Path(tempfile.mkstemp(suffix='.ScanTailor')[1]) out_dir = Path(tempfile.mkdtemp(prefix='st-out')) # Map input paths to their pages so we can more easily associate # the generated output files with their pages later on in_paths = {} for page in pages: fpath = page.get_latest_processed(image_only=True) if fpath is None: fpath = page.raw_image in_paths[unicode(fpath)] = page logger.info("Generating ScanTailor configuration") self._generate_configuration(sorted(in_paths.keys()), projectfile, out_dir) if not autopilot: logger.warn("If you are changing output settings (in the last " "step, you *have* to run the last step from the GUI. " "Due to a bug in ScanTailor, your settings would " "otherwise be ignored.") time.sleep(5) logger.info("Opening ScanTailor GUI for manual adjustment") util.get_subprocess([GUI_BIN, unicode(projectfile)]) # Check if the user already generated output files from the GUI if not sum(1 for x in out_dir.glob('*.tif')) == len(pages): logger.info("Generating output images from ScanTailor " "configuration.") self._generate_output(projectfile, out_dir, len(pages)) # Associate generated output files with our pages for fname in out_dir.glob('*.tif'): out_stem = fname.stem for in_path, page in in_paths.iteritems(): if Path(in_path).stem == out_stem: target_fname = target_path / fname.name shutil.copyfile(unicode(fname), unicode(target_fname)) page.processed_images[self.__name__] = target_fname break else: logger.warn( "Could not find page for output file {0}".format(fname)) # Remove temporary files/directories shutil.rmtree(unicode(out_dir)) # FIXME: This fails on Windows since there seems to be some non-gcable # reference to the file around, but I currently cannot figure # out where, so we just ignore the error... try: projectfile.unlink() except WindowsError as e: if e.errno == 32: pass
import codecs import logging import os import re import shutil import subprocess import tempfile import time from spreads.vendor.pathlib import Path import spreads.util as util from spreads.plugin import HookPlugin, OutputHooksMixin BIN = util.find_in_path('pdfbeads') IS_WIN = util.is_os('windows') if not BIN: raise util.MissingDependencyException( "Could not find executable `pdfbeads`. Please install the appropriate " "package(s)!") logger = logging.getLogger('spreadsplug.pdfbeads') class PDFBeadsPlugin(HookPlugin, OutputHooksMixin): __name__ = 'pdfbeads' def output(self, pages, target_path, metadata, table_of_contents):
import re import shutil import subprocess import tempfile import time import xml.etree.cElementTree as ET import psutil from spreads.vendor.pathlib import Path import spreads.util as util from spreads.config import OptionTemplate from spreads.plugin import HookPlugin, ProcessHooksMixin IS_WIN = util.is_os('windows') CLI_BIN = util.find_in_path('scantailor-cli') GUI_BIN = util.find_in_path('scantailor') if not CLI_BIN: raise util.MissingDependencyException( "Could not find executable `scantailor-cli`. Please" " install the" " appropriate package(s)!") logger = logging.getLogger('spreadsplug.scantailor') class ScanTailorPlugin(HookPlugin, ProcessHooksMixin): __name__ = 'scantailor' @classmethod
import re import shutil import subprocess import tempfile import time import xml.etree.cElementTree as ET import psutil from pathlib import Path import spreads.util as util from spreads.config import OptionTemplate from spreads.plugin import HookPlugin, ProcessHooksMixin IS_WIN = util.is_os('windows') CLI_BIN = util.find_in_path('scantailor-cli') GUI_BIN = util.find_in_path('scantailor') if not CLI_BIN: raise util.MissingDependencyException( "Could not find executable `scantailor-cli`. Please" " install the" " appropriate package(s)!") logger = logging.getLogger('spreadsplug.scantailor') class ScanTailorPlugin(HookPlugin, ProcessHooksMixin): __name__ = 'scantailor' @classmethod def configuration_template(cls):
def process(self, pages, target_path): """ Run the most recent image of every page through ScanTailor. :param pages: Pages to be processed :type pages: list of :py:class:`spreads.workflow.Page` :param target_path: Base directory where rotated images are to be stored :type target_path: :py:class:`pathlib.Path` """ autopilot = self.config['autopilot'].get(bool) if not autopilot and not util.find_in_path('scantailor'): raise util.MissingDependencyException( "Could not find executable `scantailor` in" " $PATH. Please install the appropriate" " package(s)!") # Create temporary files/directories projectfile = Path(tempfile.mkstemp(suffix='.ScanTailor')[1]) out_dir = Path(tempfile.mkdtemp(prefix='st-out')) # Map input paths to their pages so we can more easily associate # the generated output files with their pages later on in_paths = {} for page in pages: fpath = page.get_latest_processed(image_only=True) if fpath is None: fpath = page.raw_image in_paths[unicode(fpath)] = page logger.info("Generating ScanTailor configuration") self._generate_configuration(sorted(in_paths.keys()), projectfile, out_dir) if not autopilot: logger.warn("If you are changing output settings (in the last " "step, you *have* to run the last step from the GUI. " "Due to a bug in ScanTailor, your settings would " "otherwise be ignored.") time.sleep(5) logger.info("Opening ScanTailor GUI for manual adjustment") proc = util.get_subprocess([GUI_BIN, unicode(projectfile)]) proc.wait() # Check if the user already generated output files from the GUI if not sum(1 for x in out_dir.glob('*.tif')) == len(pages): logger.info("Generating output images from ScanTailor " "configuration.") self._generate_output(projectfile, out_dir, len(pages)) # Associate generated output files with our pages for fname in out_dir.glob('*.tif'): out_stem = fname.stem for in_path, page in in_paths.iteritems(): if Path(in_path).stem == out_stem: target_fname = target_path/fname.name shutil.copyfile(unicode(fname), unicode(target_fname)) page.processed_images[self.__name__] = target_fname break else: logger.warn("Could not find page for output file {0}" .format(fname)) # Remove temporary files/directories shutil.rmtree(unicode(out_dir)) # FIXME: This fails on Windows since there seems to be some non-gcable # reference to the file around, but I currently cannot figure # out where, so we just ignore the error... try: projectfile.unlink() except WindowsError as e: if e.errno == 32: pass
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import division, unicode_literals import logging import os import subprocess from spreads.plugin import HookPlugin, OutputHookMixin from spreads.util import MissingDependencyException, find_in_path if not find_in_path('djvubind'): raise MissingDependencyException("Could not find executable `djvubind` in" " $PATH. Please install the appropriate" " package(s)!") logger = logging.getLogger('spreadsplug.djvubind') class DjvuBindPlugin(HookPlugin, OutputHookMixin): __name__ = 'djvubind' def output(self, path): logger.info("Assembling DJVU.") img_dir = path / 'done' djvu_file = path / 'out' / "{0}.djvu".format(path.name) cmd = ["djvubind", unicode(img_dir)]
def __init__(self, config): super(ScanTailorPlugin, self).__init__(config) self._enhanced = bool(re.match(r".*<images\|directory\|->.*", subprocess.check_output( find_in_path('scantailor-cli')) .splitlines()[7]))
import logging import os import re import subprocess import xml.etree.cElementTree as ET from concurrent import futures from spreads.plugin import HookPlugin from spreads.util import find_in_path, MissingDependencyException if not find_in_path('tesseract'): raise MissingDependencyException("Could not find executable `tesseract`" " in $PATH. Please install the" " appropriate package(s)!") logger = logging.getLogger('spreadsplug.tesseract') class TesseractPlugin(HookPlugin): @classmethod def add_arguments(cls, command, parser): if command == 'postprocess': parser.add_argument("--language", "-l", dest="language", default="eng", help="OCR language (3-letter language code)" " [default: eng]") def process(self, path):
from __future__ import division, unicode_literals import logging import os import shutil import subprocess import tempfile import time from spreads.vendor.pathlib import Path from spreads.plugin import HookPlugin, OutputHookMixin from spreads.util import MissingDependencyException, find_in_path if not find_in_path('pdfbeads'): raise MissingDependencyException("Could not find executable `pdfbeads`." "Please install the appropriate " "package(s)!") logger = logging.getLogger('spreadsplug.pdfbeads') class PDFBeadsPlugin(HookPlugin, OutputHookMixin): __name__ = 'pdfbeads' def output(self, pages, target_path, metadata, table_of_contents): logger.info("Assembling PDF.") tmpdir = Path(tempfile.mkdtemp()) # NOTE: pdfbeads only finds *html files for the text layer in the
from __future__ import division, unicode_literals import logging import math import multiprocessing import os import re import shutil import subprocess import tempfile from xml.etree.cElementTree import ElementTree as ET from spreads.plugin import HookPlugin from spreads.util import find_in_path, MissingDependencyException if not find_in_path('scantailor-cli'): raise MissingDependencyException("Could not find executable" " `scantailor-cli` in $PATH. Please" " install the appropriate package(s)!") logger = logging.getLogger('spreadsplug.scantailor') class ScanTailorPlugin(HookPlugin): _enhanced = bool(re.match(r".*<images\|directory\|->.*", subprocess.check_output('scantailor-cli') .splitlines()[7])) @classmethod def add_arguments(cls, command, parser): if command == "postprocess":
import multiprocessing import os import re import shutil import subprocess import tempfile import time import xml.etree.cElementTree as ET from itertools import chain import spreads.util as util from spreads.config import OptionTemplate from spreads.plugin import HookPlugin, ProcessHooksMixin from spreads.vendor.pathlib import Path BIN = util.find_in_path('tesseract') if not BIN: raise util.MissingDependencyException( "Could not find executable `tesseract`. Please install the appropriate" " package(s)!") # Newer versions of Tesseract provide a flag to obtain a list of installed # OCR languages, for older versions we have to read out the directory # containing the training data for languages. try: AVAILABLE_LANGS = (util.get_subprocess( [BIN, "--list-langs"], stderr=subprocess.STDOUT, stdout=subprocess.PIPE).communicate()[0].split("\n")[1:-1]) # There should be at least a single language if not AVAILABLE_LANGS:
import multiprocessing import os import re import shutil import subprocess import tempfile import time import xml.etree.cElementTree as ET from itertools import chain import spreads.util as util from spreads.config import OptionTemplate from spreads.plugin import HookPlugin, ProcessHooksMixin from pathlib import Path BIN = util.find_in_path('tesseract') if not BIN: raise util.MissingDependencyException( "Could not find executable `tesseract`. Please install the appropriate" " package(s)!") # Newer versions of Tesseract provide a flag to obtain a list of installed # OCR languages, for older versions we have to read out the directory # containing the training data for languages. try: AVAILABLE_LANGS = (util.get_subprocess([BIN, "--list-langs"], stderr=subprocess.STDOUT, stdout=subprocess.PIPE) .communicate()[0] .split("\n")[1:-1]) # There should be at least a single language
def _generate_configuration(self, in_paths, projectfile, out_dir): filterconf = [self.config[x].get(bool) for x in ('rotate', 'split_pages', 'deskew', 'content', 'auto_margins')] start_filter = filterconf.index(True)+1 end_filter = len(filterconf) - list(reversed(filterconf)).index(True) marginconf = self.config['margins'].as_str_seq() generation_cmd = [find_in_path('scantailor-cli'), '--start-filter={0}'.format(start_filter), '--end-filter={0}'.format(end_filter), '--layout=1.5', '-o={0}'.format(projectfile)] page_detection = self.config['detection'].get() == 'page' if self._enhanced and page_detection: generation_cmd.extend([ '--enable-page-detection', '--disable-content-detection', '--enable-fine-tuning' ]) else: generation_cmd.extend([ '--margins-top={0}'.format(marginconf[0]), '--margins-right={0}'.format(marginconf[1]), '--margins-bottom={0}'.format(marginconf[2]), '--margins-left={0}'.format(marginconf[3]), ]) # NOTE: We cannot pass individual filenames on windows, since we have # a limit of 32,768 characters for commands. Thus, we first try to # find a wildcard for our paths that matches only them, and if that # fails, throw an Exception and tell the user to use a proper OS... wildcard = wildcardify(in_paths) if not wildcard and IS_WIN: raise SpreadsException("Please use a proper operating system.") elif not wildcard: generation_cmd.extend(in_paths) else: generation_cmd.append(wildcard) generation_cmd.append(unicode(out_dir)) logger.debug(" ".join(generation_cmd)) proc = psutil.Process(subprocess.Popen(generation_cmd).pid) num_images = len(in_paths) num_steps = (end_filter - start_filter)+1 last_fileidx = 0 recent_fileidx = 0 finished_steps = 0 while proc.is_running(): try: recent_fileidx = next(in_paths.index(x.path) for x in proc.open_files() if x.path in in_paths) except StopIteration: pass except psutil.AccessDenied: # This means the process is no longer running break if recent_fileidx == last_fileidx: time.sleep(.01) continue if recent_fileidx < last_fileidx: finished_steps += 1 last_fileidx = recent_fileidx progress = 0.5*((finished_steps*num_images+last_fileidx) / float(num_steps*num_images)) self.on_progressed.send(self, progress=progress)
import logging import re import subprocess import xml.etree.cElementTree as ET from concurrent import futures from spreads.plugin import HookPlugin, PluginOption from spreads.util import find_in_path, MissingDependencyException if not find_in_path('tesseract'): raise MissingDependencyException("Could not find executable `tesseract`" " in $PATH. Please install the" " appropriate package(s)!") AVAILABLE_LANGS = (subprocess.check_output(["tesseract", "--list-langs"], stderr=subprocess.STDOUT) .split("\n")[1:-1]) logger = logging.getLogger('spreadsplug.tesseract') class TesseractPlugin(HookPlugin): __name__ = 'tesseract' @classmethod def add_arguments(cls, command, parser): if command == 'postprocess': parser.add_argument("--language", "-l", dest="language", default="eng", help="OCR language (3-letter language code)"
import codecs import logging import os import re import shutil import subprocess import tempfile import time from pathlib import Path import spreads.util as util from spreads.plugin import HookPlugin, OutputHooksMixin BIN = util.find_in_path('pdfbeads') IS_WIN = util.is_os('windows') if not BIN: raise util.MissingDependencyException( "Could not find executable `pdfbeads`. Please install the appropriate " "package(s)!") logger = logging.getLogger('spreadsplug.pdfbeads') class PDFBeadsPlugin(HookPlugin, OutputHooksMixin): __name__ = 'pdfbeads' def output(self, pages, target_path, metadata, table_of_contents): """ Go through pages and bundle their most recent images into a PDF
import logging import math import multiprocessing import re import shutil import subprocess import tempfile from xml.etree.cElementTree import ElementTree as ET from spreads.vendor.pathlib import Path from spreads.plugin import HookPlugin, ProcessHookMixin, PluginOption from spreads.util import find_in_path, MissingDependencyException if not find_in_path('scantailor-cli'): raise MissingDependencyException("Could not find executable" " `scantailor-cli` in $PATH. Please" " install the appropriate package(s)!") logger = logging.getLogger('spreadsplug.scantailor') class ScanTailorPlugin(HookPlugin, ProcessHookMixin): __name__ = 'scantailor' @classmethod def configuration_template(cls): conf = {'autopilot': PluginOption(value=False, docstring="Skip manual correction"), 'rotate': PluginOption(value=False, docstring="Rotate pages"),
import logging import os import re import subprocess import tempfile import time from fractions import Fraction from itertools import chain from spreads.vendor.pathlib import Path from spreads.plugin import DevicePlugin, PluginOption, DeviceFeatures from spreads.util import (DeviceException, find_in_path, MissingDependencyException) if not find_in_path('exiftool'): raise MissingDependencyException("Could not find executable `exiftool`" " in $PATH. Please install the" " appropriate package(s)!") class CHDKPTPException(Exception): pass class CHDKCameraDevice(DevicePlugin): """ Plugin for digital cameras running the CHDK firmware. """ features = (DeviceFeatures.PREVIEW, DeviceFeatures.IS_CAMERA)
# -*- coding: utf-8 -*- from __future__ import division, unicode_literals import logging import os import subprocess from spreads.plugin import HookPlugin from spreads.util import MissingDependencyException, find_in_path if not find_in_path('djvubind'): raise MissingDependencyException("Could not find executable `djvubind` in" " $PATH. Please install the appropriate" " package(s)!") logger = logging.getLogger('spreadsplug.djvubind') class DjvuBindPlugin(HookPlugin): __name__ = 'djvubind' def output(self, path): logger.info("Assembling DJVU.") img_dir = path / 'done' djvu_file = path / 'out' / "{0}.djvu".format(path.name) cmd = ["djvubind", unicode(img_dir)] if not img_dir.glob("*.html"): cmd.append("--no-ocr") logger.debug("Running " + " ".join(cmd)) subprocess.check_output(cmd, stderr=subprocess.STDOUT)
# -*- coding: utf-8 -*- from __future__ import division, unicode_literals import logging import os import subprocess from spreads.plugin import HookPlugin from spreads.util import MissingDependencyException, find_in_path if not find_in_path("djvubind"): raise MissingDependencyException( "Could not find executable `djvubind` in" " $PATH. Please install the appropriate" " package(s)!" ) logger = logging.getLogger("spreadsplug.djvubind") class DjvuBindPlugin(HookPlugin): def output(self, path): logger.info("Assembling DJVU.") img_dir = os.path.join(path, "done") djvu_file = os.path.join(path, "out", "{0}.djvu".format(os.path.basename(path))) cmd = ["djvubind", img_dir] if self.config["djvubind"]["ocr"].get(unicode) == "none": cmd.append("--no-ocr") logger.debug("Running " + " ".join(cmd)) _ = subprocess.check_output(cmd, stderr=subprocess.STDOUT) os.rename("book.djvu", djvu_file)