def __init__(self, rootdir, callback=dummy_progress_cb): """ Index files in rootdir (see constructor) Arguments: callback --- called during the indexation (may be called *often*). step : DocSearch.INDEX_STEP_READING or DocSearch.INDEX_STEP_SORTING progression : how many elements done yet total : number of elements to do document (only if step == DocSearch.INDEX_STEP_READING): file being read """ self.rootdir = rootdir base_indexdir = os.getenv("XDG_DATA_HOME", os.path.expanduser("~/.local/share")) self.indexdir = os.path.join(base_indexdir, "paperwork", "index") mkdir_p(self.indexdir) self.__docs_by_id = {} # docid --> doc self.label_list = [] need_index_rewrite = True try: logger.info("Opening index dir '%s' ..." % self.indexdir) self.index = whoosh.index.open_dir(self.indexdir) # check that the schema is up-to-date # We use the string representation of the schemas, because previous # versions of whoosh don't always implement __eq__ if str(self.index.schema) == str(self.WHOOSH_SCHEMA): need_index_rewrite = False except whoosh.index.EmptyIndexError, exc: logger.warning("Failed to open index '%s'" % self.indexdir) logger.warning("Exception was: %s" % str(exc))
def steal_page(self, page): """ Steal a page from another document """ if page.doc == self: return mkdir_p(self.path) other_doc = page.doc other_doc_nb_pages = page.doc.nb_pages new_page = ImgPage(self, self.nb_pages) print "%s --> %s" % (str(page), str(new_page)) new_page._steal_content(page) self.drop_cache()
def __init__(self, rootdir, callback=dummy_progress_cb): """ Index files in rootdir (see constructor) Arguments: callback --- called during the indexation (may be called *often*). step : DocSearch.INDEX_STEP_READING or DocSearch.INDEX_STEP_SORTING progression : how many elements done yet total : number of elements to do document (only if step == DocSearch.INDEX_STEP_READING): file being read """ self.rootdir = rootdir base_indexdir = os.getenv("XDG_DATA_HOME", os.path.expanduser("~/.local/share")) self.indexdir = os.path.join(base_indexdir, "paperwork", "index") mkdir_p(self.indexdir) self.__docs_by_id = {} # docid --> doc self.label_list = [] try: print("Opening index dir '%s' ..." % self.indexdir) self.index = whoosh.index.open_dir(self.indexdir) except whoosh.index.EmptyIndexError, exc: print("Failed to open index '%s'" % self.indexdir) print("Will try to create a new one") schema = whoosh.fields.Schema( docid=whoosh.fields.ID(stored=True, unique=True), doctype=whoosh.fields.ID(stored=True, unique=False), content=whoosh.fields.TEXT(spelling=True), label=whoosh.fields.KEYWORD(stored=True, commas=True, spelling=True, scorable=True), last_read=whoosh.fields.DATETIME(stored=True), ) self.index = whoosh.index.create_in(self.indexdir, schema) print("Index '%s' created" % self.indexdir)
def __init__(self, rootdir, callback=dummy_progress_cb): """ Index files in rootdir (see constructor) Arguments: callback --- called during the indexation (may be called *often*). step : DocSearch.INDEX_STEP_READING or DocSearch.INDEX_STEP_SORTING progression : how many elements done yet total : number of elements to do document (only if step == DocSearch.INDEX_STEP_READING): file being read """ self.rootdir = rootdir base_indexdir = os.getenv("XDG_DATA_HOME", os.path.expanduser("~/.local/share")) self.indexdir = os.path.join(base_indexdir, "paperwork", "index") mkdir_p(self.indexdir) self.__docs_by_id = {} # docid --> doc self.label_list = [] try: print ("Opening index dir '%s' ..." % self.indexdir) self.index = whoosh.index.open_dir(self.indexdir) except whoosh.index.EmptyIndexError, exc: print ("Failed to open index '%s'" % self.indexdir) print ("Will try to create a new one") schema = whoosh.fields.Schema( docid=whoosh.fields.ID(stored=True, unique=True), doctype=whoosh.fields.ID(stored=True, unique=False), content=whoosh.fields.TEXT(spelling=True), label=whoosh.fields.KEYWORD(stored=True, commas=True, spelling=True, scorable=True), last_read=whoosh.fields.DATETIME(stored=True), ) self.index = whoosh.index.create_in(self.indexdir, schema) print ("Index '%s' created" % self.indexdir)
def check_workdir(self): """ Check that the current work dir (see config.PaperworkConfig) exists. If not, open the settings dialog. """ mkdir_p(self.rootdir)