Esempio n. 1
0
 def __index_file(self, filepath):
     """Indexes the contents of the file at the specified path."""
     has_file_changed, db_record = self._index.has_file_updated(filepath)
     if FORCE_INDEX_REBUILD and db_record is not None:
         has_file_changed = True
     if not has_file_changed:
         return
     log.debug('indexing file: %s' % filepath)
     self._index.index_file(filepath, document_id=db_record.id)
Esempio n. 2
0
    def __index_dir(self, dpath):
        """Indexes the contents of the directory at the specified path.
        """
        log.debug('Checking directory: %s' % dpath)
        # sanity checks
        if not isinstance(settings.EXCLUDE_FILE_SUFFIX, (tuple, type(None))):
            raise Exception(
                "settings.EXCLUDE_FILE_SUFFIX must be a tuple or None, found: %s"
                % type(settings.EXCLUDE_FILE_SUFFIX))
        if not isinstance(settings.INCLUDE_FILE_SUFFIX, (tuple, type(None))):
            raise Exception(
                "settings.INCLUDE_FILE_SUFFIX must be a tuple or None, found: %s"
                % type(settings.INCLUDE_FILE_SUFFIX))
        # nested, reused code block
        def check_name(name):
            """Returns True if the item with the specified name can be indexed"""
            can_index = True
            # ignore hidden files
            if name.startswith("."):
                return False
            # ignore excluded files
            if settings.EXCLUDE_FILE_SUFFIX:
                for suffix in settings.EXCLUDE_FILE_SUFFIX:
                    can_index = True
                    if name.endswith(suffix):
                        return False
            # ignore files that do not have the given suffixes
            if settings.INCLUDE_FILE_SUFFIX:
                for suffix in settings.INCLUDE_FILE_SUFFIX:
                    can_index = False
                    if name.endswith(suffix):
                        return True
            return can_index

        # perform item indexing
        if not self._is_recursive:
            # just check the files in the target directory
            items = os.listdir(dpath)
            for item in items:
                if not check_name(item):
                    continue
                path = os.path.join(dpath, item)
                self.__index_file(path)
                pass
        else:
            # traverse the given path
            for dirpath, dirnames, filenames in os.walk(dpath):
                dirname = os.path.basename(dirpath)
                # ignore hidden dirs
                if dirname.startswith('.'):
                    continue
                for name in filenames:
                    can_index = check_name(name)
                    if can_index:
                        path = os.path.join(dirpath, name)
                        self.__index_file(path)
        pass
Esempio n. 3
0
    def __index_dir(self, dpath):
        """Indexes the contents of the directory at the specified path.
        """
        log.debug("Checking directory: %s" % dpath)
        # sanity checks
        if not isinstance(settings.EXCLUDE_FILE_SUFFIX, (tuple, type(None))):
            raise Exception(
                "settings.EXCLUDE_FILE_SUFFIX must be a tuple or None, found: %s" % type(settings.EXCLUDE_FILE_SUFFIX)
            )
        if not isinstance(settings.INCLUDE_FILE_SUFFIX, (tuple, type(None))):
            raise Exception(
                "settings.INCLUDE_FILE_SUFFIX must be a tuple or None, found: %s" % type(settings.INCLUDE_FILE_SUFFIX)
            )
        # nested, reused code block
        def check_name(name):
            """Returns True if the item with the specified name can be indexed"""
            can_index = True
            # ignore hidden files
            if name.startswith("."):
                return False
            # ignore excluded files
            if settings.EXCLUDE_FILE_SUFFIX:
                for suffix in settings.EXCLUDE_FILE_SUFFIX:
                    can_index = True
                    if name.endswith(suffix):
                        return False
            # ignore files that do not have the given suffixes
            if settings.INCLUDE_FILE_SUFFIX:
                for suffix in settings.INCLUDE_FILE_SUFFIX:
                    can_index = False
                    if name.endswith(suffix):
                        return True
            return can_index

        # perform item indexing
        if not self._is_recursive:
            # just check the files in the target directory
            items = os.listdir(dpath)
            for item in items:
                if not check_name(item):
                    continue
                path = os.path.join(dpath, item)
                self.__index_file(path)
                pass
        else:
            # traverse the given path
            for dirpath, dirnames, filenames in os.walk(dpath):
                dirname = os.path.basename(dirpath)
                # ignore hidden dirs
                if dirname.startswith("."):
                    continue
                for name in filenames:
                    can_index = check_name(name)
                    if can_index:
                        path = os.path.join(dirpath, name)
                        self.__index_file(path)
        pass
Esempio n. 4
0
 def __index_file(self, filepath):
     """Indexes the contents of the file at the specified path."""
     has_file_changed, db_record = self._index.has_file_updated(filepath)
     if FORCE_INDEX_REBUILD and db_record is not None:
         has_file_changed = True
     if not has_file_changed:
         return
     log.debug("indexing file: %s" % filepath)
     self._index.index_file(filepath, document_id=db_record.id)
Esempio n. 5
0
 def clean_index(self):
     """Cleans the index by purging any documents that no longer exist.
     """
     # iterate each record in the database
     # see if it exists on the file system
     for record in self.get_indexed_files():
         if not os.path.exists(record.path):
             self._index.delete_by_term('path', record.path)
             record.delete_instance()
             logger.debug('removed indexed file: %s' % record)
Esempio n. 6
0
    def __index_dir(self, dpath):
        """Indexes the contents of the directory at the specified path."""
        log.debug('Checking directory: %s' % dpath)
        # sanity checks
        if not isinstance(settings.EXCLUDE_FILE_SUFFIX,
                          (tuple, list, type(None))):
            raise Exception(
                'settings.EXCLUDE_FILE_SUFFIX must be a tuple or None, found: %s'
                % type(settings.EXCLUDE_FILE_SUFFIX))
        if not isinstance(settings.INCLUDE_FILE_SUFFIX,
                          (tuple, list, type(None))):
            raise Exception(
                'settings.INCLUDE_FILE_SUFFIX must be a tuple or None, found: %s'
                % type(settings.INCLUDE_FILE_SUFFIX))
        if not os.listdir(dpath):
            raise Exception('Directory to index is empty: %s' % dpath)

        # nested, reused code block
        def check_name(name):
            """Returns True if the item with the specified name can be indexed."""
            # ignore hidden files
            if name.startswith('.'):
                return False
            can_index = True
            # ignore excluded files
            if settings.EXCLUDE_FILE_SUFFIX:
                can_index = True
                if any(filter(name.endswith, settings.EXCLUDE_FILE_SUFFIX)):
                    return False
            # ignore files that do not have the given suffixes
            if settings.INCLUDE_FILE_SUFFIX:
                can_index = False
                if any(filter(name.endswith, settings.INCLUDE_FILE_SUFFIX)):
                    return True
            return can_index

        # perform item indexing
        if not self._is_recursive:
            # just check the files in the target directory
            items = os.listdir(dpath)
            for item in filter(check_name, items):
                path = os.path.join(dpath, item)
                self.__index_file(path)
        else:
            # traverse the given path
            for dirpath, dirnames, filenames in os.walk(dpath):
                # ignore hidden dirs
                dirnames[:] = [d for d in dirnames if not d.startswith('.')]
                for name in filter(check_name, filenames):
                    path = os.path.join(dirpath, name)
                    self.__index_file(path)
Esempio n. 7
0
 def clean_index(self):
     """Cleans the index by purging any documents that no longer exist.
     """
     # iterate each record in the database
     # see if it exists on the file system
     for record in self.get_indexed_files():
         if not os.path.exists(record.path):
             try:
                 self.index.delete_document(record.id)
             except xapian.DocNotFoundError:
                 # it is safe to continue
                 pass
             record.delete_instance()
             logger.debug('removed indexed file: %s' % record)
Esempio n. 8
0
 def clean_index(self):
     """Cleans the index by purging any documents that no longer exist.
     """
     # iterate each record in the database
     # see if it exists on the file system
     for record in self.get_indexed_files():
         if not os.path.exists(record.path):
             try:
                 self.index.delete_document(record.id)
             except xapian.DocNotFoundError:
                 # it is safe to continue
                 pass
             record.delete_instance()
             logger.debug('removed indexed file: %s' % record)
Esempio n. 9
0
 def clean_index(self):
     """Cleans the index by purging any documents that no longer exist.
     """
     # iterate each record in the database
     # see if it exists on the file system
     for record in self.get_indexed_files():
         if not os.path.exists(record.path):
             self._index.delete_by_term('path', record.path)
             record.delete_instance()
             logger.debug('removed indexed file: %s' % record)
     # Docs says the index has this method, it doesn't
     # must find a way to 'purge' deleted documents.
     # It does remove them from the query, but the index info is stored until purged.
     # http://packages.python.org/Whoosh/indexing.html#deleting-documents
     #self.index.commit()
     pass
Esempio n. 10
0
 def clean_index(self):
     """Cleans the index by purging any documents that no longer exist.
     """
     # iterate each record in the database
     # see if it exists on the file system
     for record in self.get_indexed_files():
         if not os.path.exists(record.path):
             self._index.delete_by_term('path', record.path)
             record.delete_instance()
             logger.debug('removed indexed file: %s' % record)
     # Docs says the index has this method, it doesn't
     # must find a way to 'purge' deleted documents.
     # It does remove them from the query, but the index info is stored until purged.
     # http://packages.python.org/Whoosh/indexing.html#deleting-documents
     #self.index.commit()
     pass
Esempio n. 11
0
 def open(self, index_path, **kwargs):
     """Creates or opens an index at the specified path."""
     if not os.path.isdir(index_path):
         msg = 'Directory `%s` is not a valid index directory.' % index_path
         log.warning(msg)
         raise Exception(msg)
     # create the dir, if needed
     path = os.path.join(index_path, self._name)
     if not os.path.isdir(path):
         os.mkdir(path)
         log.warning('created index directory at %s' % path)
     # create or open the index
     if self._rebuild_index or not self._index.index_exists(path):
         log.debug('creating index at %s' % path)
         self._index.create_index(path)
     else:
         log.debug('opening index at %s' % path)
         self._index.open_index(path, writable=self._is_writable)
     # store indexes path
     self._path = path
Esempio n. 12
0
 def open(self, index_path, **kwargs):
     """Creates or opens an index at the specified path."""
     if not os.path.isdir(index_path):
         msg = "Directory `%s` is not a valid index directory." % index_path
         log.warning(msg)
         raise Exception(msg)
     # create the dir, if needed
     path = os.path.join(index_path, self._name)
     if not os.path.isdir(path):
         os.mkdir(path)
         log.warning("created index directory at %s" % path)
     # create or open the index
     if self._rebuild_index or not self._index.index_exists(path):
         log.debug("creating index at %s" % path)
         self._index.create_index(path)
     else:
         log.debug("opening index at %s" % path)
         self._index.open_index(path, writable=self._is_writable)
     # store indexes path
     self._path = path
Esempio n. 13
0
 def find_path(self, path):
     """Finds the document at the specified path
     """
     log.debug('search for path: %s' % path)
     return self._searcher.find_path(path)
Esempio n. 14
0
 def find_text(self, text, pagenum=1, limit=10):
     """Finds the specified text by searching the internal index
     """
     log.debug('[%s] searching for: %s' % (datetime.now(), text))
     return self._searcher.find_text(text, pagenum, limit)
Esempio n. 15
0
 def find_path(self, path):
     """Finds the document at the specified path."""
     log.debug('search for path: %s' % path)
     return self._searcher.find_path(path)
Esempio n. 16
0
 def find_text(self, text, pagenum=1, limit=10):
     """Finds the specified text by searching the internal index."""
     log.debug('[%s] searching for: %s', datetime.now(), text)
     return self._searcher.find_text(text, pagenum, limit)