def __init__(self, file_name): DocumentProcessor.__init__(self, file_name) poppler.cvar.globalParams.setEnableFreeType("yes") poppler.cvar.globalParams.setAntialias("yes") poppler.cvar.globalParams.setVectorAntialias("yes") self._doc = poppler.PDFDoc(self._file_name) self._index = None # store url md5 # NOTE: assuming the name of the file given is the md5 of the url self._url_md5 = os.path.split(self._file_name)[1] #self.logger.debug("PDFProcessor, got url_md5=[%s]"%self._url_md5) # keyword for boolean search self._AND = ' and'
def __init__(self, file_name): DocumentProcessor.__init__(self, file_name) poppler.cvar.globalParams.setEnableFreeType("yes") poppler.cvar.globalParams.setErrQuiet(True) poppler.cvar.globalParams.setAntialias("yes") poppler.cvar.globalParams.setVectorAntialias("yes") self._doc = poppler.PDFDoc(self._file_name) self._index = None # store url md5 # NOTE: assuming the name of the file given is the md5 of the url self._url_md5 = os.path.split(self._file_name)[1] #self.logger.debug("PDFProcessor, got url_md5=[%s]"%self._url_md5) # keyword for boolean search self._AND = ' and'
def __init__(self, file_name): DocumentProcessor.__init__(self, file_name) self._img = Image.open(file_name)