Exemple #1
0
 def test_toplevel_addition_causes_change(self):
   c = DirCache()
   base = self.test_data.path_to('');
   c.listdir(base)
   self.assertFalse(c.listdir_with_changed_status(base)[1])
   time.sleep(1.2)
   self.test_data.write1('READMEx')
   self.assertTrue(c.listdir_with_changed_status(base)[1])
 def test_toplevel_deletion_causes_changed(self):
   c = DirCache()
   base = self.test_data.path_to('');
   something = self.test_data.path_to('something');
   c.listdir(base)
   c.listdir(something)
   self.assertFalse(c.listdir_with_changed_status(base)[1])
   self.assertFalse(c.listdir_with_changed_status(something)[1])
   time.sleep(1.2)
   self.test_data.rm_rf(something)
   self.assertTrue(c.listdir_with_changed_status(base)[1])
   self.assertTrue(c.listdir_with_changed_status(something)[1])
Exemple #3
0
 def test_toplevel_deletion_causes_changed(self):
   c = DirCache()
   base = self.test_data.path_to('');
   something = self.test_data.path_to('something');
   c.listdir(base)
   c.listdir(something)
   self.assertFalse(c.listdir_with_changed_status(base)[1])
   self.assertFalse(c.listdir_with_changed_status(something)[1])
   time.sleep(1.2)
   self.test_data.rm_rf(something)
   self.assertTrue(c.listdir_with_changed_status(base)[1])
   self.assertTrue(c.listdir_with_changed_status(something)[1])
Exemple #4
0
  def __init__(self, settings):
    self.settings = settings
    self.needs_indexing = Event() # fired when the database gets dirtied and needs syncing
    self._pending_indexer = None # non-None if a DBIndex is running
    self._cur_index = None # the last DBIndex object --> actually runs the searches

    self._dir_cache = DirCache() # thread only state

    # if we are currently looking for changed dirs, this is the iterator
    # directories remaining to be checked
    self._pending_up_to_date_generator = None 

    self.settings.register('dirs', list, [], self._on_settings_dirs_changed)
    self._on_settings_dirs_changed(None, self.settings.dirs)

    self.settings.register('ignores', list, [], self._on_settings_ignores_changed)
    if self.settings.ignores == []:
	self.settings.ignores = DEFAULT_IGNORES;

    self._on_settings_ignores_changed(None, self.settings.ignores)
 def test_toplevel_modification_doesnt_cause_change(self):
   c = DirCache()
   base = self.test_data.path_to('');
   c.listdir(base)
   self.assertFalse(c.listdir_with_changed_status(base)[1])
   time.sleep(1.2)
   self.test_data.write1('READMEx')
   self.assertTrue(c.listdir_with_changed_status(base)[1])
   time.sleep(1.2)
   self.test_data.write2('READMEx')
   self.assertFalse(c.listdir_with_changed_status(base)[1])
Exemple #6
0
    def __init__(self, settings):
        self.settings = settings
        self.needs_indexing = Event(
        )  # fired when the database gets dirtied and needs syncing
        self._pending_indexer = None  # non-None if a DBIndexer is running
        self._cur_shard_manager = None  # the current DBShardManager object. This has all the DBIndexShards.
        self._cur_query_cache = None

        self._dir_cache = DirCache()  # thread only state

        self.settings.register('dirs', list, [],
                               self._on_settings_dirs_changed)
        self._on_settings_dirs_changed(None, self.settings.dirs)

        self.settings.register('ignores', list, [],
                               self._on_settings_ignores_changed)
        if self.settings.ignores == []:
            self.settings.ignores = DEFAULT_IGNORES

        self._on_settings_ignores_changed(None, self.settings.ignores)

        self.settings.register('token', str, "",
                               self._on_settings_token_changed)
        self._on_settings_token_changed(None, self.settings.token)
Exemple #7
0
  def __init__(self, settings):
    self.settings = settings
    self.needs_indexing = Event() # fired when the database gets dirtied and needs syncing
    self._pending_indexer = None # non-None if a DBIndexer is running
    self._cur_shard_manager = None # the current DBShardManager object. This has all the DBIndexShards.
    self._cur_query_cache = None

    self._dir_cache = DirCache() # thread only state

    self.settings.register('dirs', list, [], self._on_settings_dirs_changed)
    self._on_settings_dirs_changed(None, self.settings.dirs)

    self.settings.register('ignores', list, [], self._on_settings_ignores_changed)
    if self.settings.ignores == []:
      self.settings.ignores = DEFAULT_IGNORES;

    self._on_settings_ignores_changed(None, self.settings.ignores)

    self.settings.register('token', str, "", self._on_settings_token_changed)
    self._on_settings_token_changed(None, self.settings.token)
Exemple #8
0
class DB(object):
  def __init__(self, settings):
    self.settings = settings
    self.needs_indexing = Event() # fired when the database gets dirtied and needs syncing
    self._pending_indexer = None # non-None if a DBIndexer is running
    self._cur_shard_manager = None # the current DBShardManager object. This has all the DBIndexShards.
    self._cur_query_cache = None

    self._dir_cache = DirCache() # thread only state

    # if we are currently looking for changed dirs, this is the iterator
    # directories remaining to be checked
    self._pending_up_to_date_generator = None

    self.settings.register('dirs', list, [], self._on_settings_dirs_changed)
    self._on_settings_dirs_changed(None, self.settings.dirs)

    self.settings.register('ignores', list, [], self._on_settings_ignores_changed)
    if self.settings.ignores == []:
      self.settings.ignores = DEFAULT_IGNORES;

    self._on_settings_ignores_changed(None, self.settings.ignores)

    self.settings.register('token', str, "", self._on_settings_token_changed)
    self._on_settings_token_changed(None, self.settings.token)

  def close(self):
    if self._cur_shard_manager:
      self._cur_shard_manager.close()
      self._cur_shard_manager = None

  ###########################################################################

  def _on_settings_dirs_changed(self, old, new):
    self._dirs = map(lambda d: DBDir(d), new)
    self._set_dirty()

  @property
  def dirs(self):
    return list(self._dirs)

  def add_dir(self, d):
    real_d = os.path.realpath(d)

    cur = list(self.settings.dirs)
    if real_d in cur:
      raise DBException("Directory %s exists already as %s" % (d, real_d))

    # commit change
    cur.append(real_d)
    self.settings.dirs = cur  # triggers _on_settings_dirs_changed
    return self.dirs[-1]

  def delete_dir(self, d):
    if type(d) != DBDir:
      raise Exception("Expected DBDir")
    cur = list(self.settings.dirs)
    if d.path not in cur:
      raise DBException("not found")
    cur.remove(d.path)
    self.settings.dirs = cur # triggers _on_settings_dirs_changed

  ###########################################################################

  def _on_settings_ignores_changed(self, old, new):
    self._set_dirty()

  @property
  def ignores(self):
    return list(self.settings.ignores)

  def ignore(self,pattern):
    i = list(self.settings.ignores)
    if pattern in i:
      return
    i.append(pattern)
    self.settings.ignores = i

  def unignore(self,pattern):
    i = list(self.settings.ignores)
    i.remove(pattern)
    self.settings.ignores = i

  ###########################################################################

  def _on_settings_token_changed(self, old, new):
    self._set_dirty()

  @property
  def token(self):
    return self.settings.token

  @token.setter
  def token(self, token):
    self.settings.token = token

  ###########################################################################

  @property
  def has_index(self):
    return self._cur_shard_manager != None

  @property
  def is_up_to_date(self):
    return self._pending_indexer == None

  def check_up_to_date(self):
    if not self.is_up_to_date:
      return False
    import time
    self.check_up_to_date_a_bit_more()
    while self._pending_up_to_date_generator:
      self.check_up_to_date_a_bit_more()

  @traced
  def check_up_to_date_a_bit_more(self):
    if not self.is_up_to_date:
      return

    if self._pending_up_to_date_generator == None:
      logging.debug("Starting to check for changed directories.")
      self._pending_up_to_date_generator = self._dir_cache.iterdirnames().__iter__()

    for i in range(10):
      try:
        d = self._pending_up_to_date_generator.next()
      except StopIteration:
        self._pending_up_to_date_generator = None
        logging.debug("Done checking for changed directories.")
        break
      if self._dir_cache.listdir_with_changed_status(d)[1]:
        logging.debug("Change detected in %s!", d)
        self._pending_up_to_date_generator = None
        self._set_dirty()
        break

  def begin_reindex(self):
    self._set_dirty()

  def _set_dirty(self):
    was_indexing = self._pending_indexer != None
    if self._pending_indexer:
      self._pending_indexer = None
    self._pending_indexer = 1 # set to 1 as indication to step_indexer to create new indexer
    if not was_indexing:
      self.needs_indexing.fire()

  @traced
  def status(self):
    if self._pending_indexer:
      if isinstance(self._pending_indexer, DBIndexer): # is an integer briefly between _set_dirty and first step_indexer
        if self._cur_shard_manager:
          status = "syncing: %s, %s" % (self._pending_indexer.progress, self._cur_shard_manager.status)
        else:
          status = "first-time sync: %s" % self._pending_indexer.progress
      else:
        status = "sync scheduled"
    else:
      if self._cur_shard_manager:
        status = "up-to-date: %s" % self._cur_shard_manager.status
      else:
        status = "sync required"

    res = DBStatus()
    res.is_up_to_date = self.is_up_to_date
    res.has_index = self.has_index
    res.status = status
    return res

  @traced
  def step_indexer(self):
    if not self._pending_indexer:
      return

    if not isinstance(self._pending_indexer, DBIndexer):
      self._dir_cache.set_ignores(self.settings.ignores)
      self._pending_indexer = DBIndexer(self.settings.dirs, self._dir_cache)

    if self._pending_indexer.complete:
      self._cur_shard_manager = DBShardManager(self._pending_indexer)
      self._cur_query_cache = QueryCache()
      self._pending_indexer = None
    else:
      self._pending_indexer.index_a_bit_more()

  def sync(self):
    """Ensures database index is up-to-date"""
    self.check_up_to_date()
    while self._pending_indexer:
      self.step_indexer()

  ###########################################################################
  @traced
  def search(self, *args, **kwargs):
    """
    Searches the database.

    args/kwargs should be either a Query object, or arguments to the Query-object constructor.
    """
    if self._pending_indexer:
      self.step_indexer()
      # step sync might change the db sync status
      if not self._cur_shard_manager:
        return QueryResult()

    query = Query.from_kargs(args, kwargs)
    return query.execute(self._cur_shard_manager, self._cur_query_cache)
Exemple #9
0
 def test_up_to_date_after_change(self):
   c = DirCache()
   something = self.test_data.path_to('something');
   c.listdir(something)
   self.test_data.rm_rf(something)
   self.assertEquals([], c.listdir(self.test_data.path_to('something')))
Exemple #10
0
 def test_listdir_when_gone(self):
   c = DirCache()
   something = self.test_data.path_to('something');
   c.listdir(something)
   self.test_data.rm_rf(something)
   self.assertEquals([], c.listdir(self.test_data.path_to('something')))
Exemple #11
0
 def test_listdir_on_invalid_dir(self):
   c = DirCache()
   # shoudl not raise exception
   c.listdir(self.test_data.path_to('xxx'))
Exemple #12
0
 def test_up_to_date_after_change(self):
   c = DirCache()
   something = self.test_data.path_to('something');
   c.listdir(something)
   self.test_data.rm_rf(something)
   self.assertEquals([], c.listdir(self.test_data.path_to('something')))
Exemple #13
0
 def test_listdir_when_gone(self):
   c = DirCache()
   something = self.test_data.path_to('something');
   c.listdir(something)
   self.test_data.rm_rf(something)
   self.assertEquals([], c.listdir(self.test_data.path_to('something')))
Exemple #14
0
 def test_listdir_on_invalid_dir(self):
   c = DirCache()
   # shoudl not raise exception
   c.listdir(self.test_data.path_to('xxx'))
Exemple #15
0
class DB(object):
  def __init__(self, settings):
    self.settings = settings
    self.needs_indexing = Event() # fired when the database gets dirtied and needs syncing
    self._pending_indexer = None # non-None if a DBIndex is running
    self._cur_index = None # the last DBIndex object --> actually runs the searches

    self._dir_cache = DirCache() # thread only state

    # if we are currently looking for changed dirs, this is the iterator
    # directories remaining to be checked
    self._pending_up_to_date_generator = None 

    self.settings.register('dirs', list, [], self._on_settings_dirs_changed)
    self._on_settings_dirs_changed(None, self.settings.dirs)

    self.settings.register('ignores', list, [], self._on_settings_ignores_changed)
    if self.settings.ignores == []:
	self.settings.ignores = DEFAULT_IGNORES;

    self._on_settings_ignores_changed(None, self.settings.ignores)

  ###########################################################################

  def _on_settings_dirs_changed(self, old, new):
    self._dirs = map(lambda d: DBDir(d), new)
    self._set_dirty()

  @property
  def dirs(self):
    return list(self._dirs)

  def add_dir(self, d):
    real_d = os.path.realpath(d)

    cur = list(self.settings.dirs)
    if real_d in cur:
      raise DBException("Directory %s exists already as %s" % (d, real_d))

    # commit change
    cur.append(real_d)
    self.settings.dirs = cur  # triggers _on_settings_dirs_changed
    return self.dirs[-1]

  def delete_dir(self, d):
    if type(d) != DBDir:
      raise Exception("Expected DBDir")
    cur = list(self.settings.dirs)
    if d.path not in cur:
      raise DBException("not found")
    cur.remove(d.path)
    self.settings.dirs = cur # triggers _on_settings_dirs_changed

  ###########################################################################

  def _on_settings_ignores_changed(self, old, new):
    self._set_dirty()

  @property
  def ignores(self):
    return list(self.settings.ignores)

  def ignore(self,pattern):
    i = list(self.settings.ignores)
    if pattern in i:
      return
    i.append(pattern)
    self.settings.ignores = i

  def unignore(self,pattern):
    i = list(self.settings.ignores)
    i.remove(pattern)
    self.settings.ignores = i

  ###########################################################################

  @property
  def has_index(self):
    return self._cur_index != None

  @property
  def is_up_to_date(self):
    return self._pending_indexer == None

  def check_up_to_date(self):
    if not self.is_up_to_date:
      return False
    import time
    self.check_up_to_date_a_bit_more()
    while self._pending_up_to_date_generator:
      self.check_up_to_date_a_bit_more()

  @trace
  def check_up_to_date_a_bit_more(self):
    if not self.is_up_to_date:
      return

    if self._pending_up_to_date_generator == None:
      logging.debug("Starting to check for changed directories.")
      self._pending_up_to_date_generator = self._dir_cache.iterdirnames().__iter__()

    for i in range(10):
      try:
        d = self._pending_up_to_date_generator.next()
      except StopIteration:
        self._pending_up_to_date_generator = None
        logging.debug("Done checking for changed directories.")
        break
      if self._dir_cache.listdir_with_changed_status(d)[1]:
        logging.debug("Change detected in %s!", d)
        self._pending_up_to_date_generator = None
        self._set_dirty()
        break

  def begin_reindex(self):
    self._set_dirty()

  def _set_dirty(self):
    was_indexing = self._pending_indexer != None
    if self._pending_indexer:
      self._pending_indexer = None
    self._pending_indexer = 1 # set to 1 as indication to step_indexer to create new indexer
    if not was_indexing:
      self.needs_indexing.fire()

  @trace
  def status(self):
    if self._pending_indexer:
      if isinstance(self._pending_indexer, DBIndexer): # is an integer briefly between _set_dirty and first step_indexer
        if self._cur_index:
          status = "syncing: %s, %s" % (self._pending_indexer.progress, self._cur_index.status)
        else:
          status = "first-time sync: %s" % self._pending_indexer.progress
      else:
        status = "sync scheduled"
    else:
      if self._cur_index:
        status = "up-to-date: %s" % self._cur_index.status
      else:
        status = "sync required"

    res = DBStatus()
    res.is_up_to_date = self.is_up_to_date
    res.has_index = self.has_index
    res.status = status
    return res

  @trace
  def step_indexer(self):
    if not self._pending_indexer:
      return

    if not isinstance(self._pending_indexer, DBIndexer):
      self._dir_cache.set_ignores(self.settings.ignores)
      self._pending_indexer = DBIndexer(self.settings.dirs, self._dir_cache)

    if self._pending_indexer.complete:
      self._cur_index = DBIndex(self._pending_indexer)
      self._pending_indexer = None
    else:
      self._pending_indexer.index_a_bit_more()

  def sync(self):
    """Ensures database index is up-to-date"""
    self.check_up_to_date()
    while self._pending_indexer:
      self.step_indexer()

  ###########################################################################
  def _empty_result(self):
    return DBIndexSearchResult()

  @trace
  def search(self, query, max_hits = -1):
    if self._pending_indexer:
      self.step_indexer()
      # step sync might change the db sync status
      if not self._cur_index:
        return self._empty_result()

    if query == '':
      return self._empty_result()

    if max_hits == -1:
      return self._cur_index.search(query)
    else:
      return self._cur_index.search(query, max_hits)
Exemple #16
0
class DB(object):
    def __init__(self, settings):
        self.settings = settings
        self.needs_indexing = Event(
        )  # fired when the database gets dirtied and needs syncing
        self._pending_indexer = None  # non-None if a DBIndexer is running
        self._cur_shard_manager = None  # the current DBShardManager object. This has all the DBIndexShards.
        self._cur_query_cache = None

        self._dir_cache = DirCache()  # thread only state

        self.settings.register('dirs', list, [],
                               self._on_settings_dirs_changed)
        self._on_settings_dirs_changed(None, self.settings.dirs)

        self.settings.register('ignores', list, [],
                               self._on_settings_ignores_changed)
        if self.settings.ignores == []:
            self.settings.ignores = DEFAULT_IGNORES

        self._on_settings_ignores_changed(None, self.settings.ignores)

        self.settings.register('token', str, "",
                               self._on_settings_token_changed)
        self._on_settings_token_changed(None, self.settings.token)

    def close(self):
        if self._cur_shard_manager:
            self._cur_shard_manager.close()
            self._cur_shard_manager = None

    ###########################################################################

    def _on_settings_dirs_changed(self, old, new):
        self._dirs = map(lambda d: DBDir(d), new)
        self._set_dirty()

    @property
    def dirs(self):
        return list(self._dirs)

    def add_dir(self, d):
        real_d = os.path.realpath(d)

        cur = list(self.settings.dirs)
        if real_d in cur:
            raise DBException("Directory %s exists already as %s" %
                              (d, real_d))

        # commit change
        cur.append(real_d)
        self.settings.dirs = cur  # triggers _on_settings_dirs_changed
        return self.dirs[-1]

    def delete_dir(self, d):
        if type(d) != DBDir:
            raise Exception("Expected DBDir")
        cur = list(self.settings.dirs)
        if d.path not in cur:
            raise DBException("not found")
        cur.remove(d.path)
        self.settings.dirs = cur  # triggers _on_settings_dirs_changed

    ###########################################################################

    def _on_settings_ignores_changed(self, old, new):
        self._set_dirty()

    @property
    def ignores(self):
        return list(self.settings.ignores)

    def ignore(self, pattern):
        i = list(self.settings.ignores)
        if pattern in i:
            return
        i.append(pattern)
        self.settings.ignores = i

    def unignore(self, pattern):
        i = list(self.settings.ignores)
        i.remove(pattern)
        self.settings.ignores = i

    ###########################################################################

    def _on_settings_token_changed(self, old, new):
        self._set_dirty()

    @property
    def token(self):
        return self.settings.token

    @token.setter
    def token(self, token):
        self.settings.token = token

    ###########################################################################

    @property
    def has_index(self):
        return self._cur_shard_manager != None

    @property
    def is_up_to_date(self):
        return self._pending_indexer == None

    def begin_reindex(self):
        self._set_dirty()

    def _set_dirty(self):
        was_indexing = self._pending_indexer != None
        if self._pending_indexer:
            self._pending_indexer = None
        self._pending_indexer = 1  # set to 1 as indication to step_indexer to create new indexer
        if not was_indexing:
            self.needs_indexing.fire()

    @traced
    def status(self):
        if self._pending_indexer:
            # Is an integer briefly between _set_dirty and first step_indexer
            if not isinstance(self._pending_indexer, int):
                if self._cur_shard_manager:
                    status = "syncing: %s, %s" % (
                        self._pending_indexer.progress,
                        self._cur_shard_manager.status)
                else:
                    status = "first-time sync: %s" % self._pending_indexer.progress
            else:
                status = "sync scheduled"
        else:
            if self._cur_shard_manager:
                status = "up-to-date: %s" % self._cur_shard_manager.status
            else:
                status = "sync required"

        res = DBStatus()
        res.is_up_to_date = self.is_up_to_date
        res.has_index = self.has_index
        res.status = status
        return res

    @traced
    def step_indexer(self):
        if not self._pending_indexer:
            return

        # _pending_indexer is an integer if recreation should be triggered.
        if isinstance(self._pending_indexer, int):
            self._dir_cache.set_ignores(self.settings.ignores)
            self._pending_indexer = db_indexer.Create(self.settings.dirs,
                                                      self._dir_cache)
            self._pending_indexer_start_time = time.time()

        if self._pending_indexer.complete:
            elapsed = time.time() - self._pending_indexer_start_time
            logging.debug("Indexing with %s took %s seconds",
                          type(self._pending_indexer), elapsed)
            self._cur_shard_manager = DBShardManager(self._pending_indexer)
            self._cur_query_cache = QueryCache()
            self._pending_indexer = None

        else:
            self._pending_indexer.index_a_bit_more()

    def sync(self):
        """Ensures database index is up-to-date"""
        self.begin_reindex()
        while not self.is_up_to_date:
            self.step_indexer()

    ###########################################################################
    @traced
    def search(self, *args, **kwargs):
        """
    Searches the database.

    args/kwargs should be either a Query object, or arguments to the Query-object constructor.
    """
        if self._pending_indexer:
            self.step_indexer()
            # step sync might change the db sync status
            if not self._cur_shard_manager:
                return QueryResult()

        query = Query.from_kargs(args, kwargs)
        return query.execute(self._cur_shard_manager, self._cur_query_cache)