コード例 #1
0
class SearchMigrationTest(TestCase, TempDirMixin):
    """Search index migration testing"""
    def setUp(self):
        self.create_temp()
        self.storage = FileStorage(self.tempdir)
        self.storage.create()

    def tearDown(self):
        self.remove_temp()

    def do_test(self):
        fulltext = Fulltext()
        fulltext.storage = self.storage

        sindex = fulltext.get_source_index()
        self.assertIsNotNone(sindex)
        tindex = fulltext.get_target_index('cs')
        self.assertIsNotNone(tindex)
        writer = sindex.writer()
        writer.update_document(
            pk=1,
            source="source",
            context="context",
            location="location",
        )
        writer.commit()
        writer = tindex.writer()
        writer.update_document(
            pk=1,
            target="target",
            comment="comment"
        )
        writer.commit()
        for item in ('source', 'context', 'location', 'target'):
            self.assertEqual(
                fulltext.search(item, ['cs'], {item: True}),
                set([1])
            )

    def test_nonexisting(self):
        self.do_test()

    def test_nonexisting_dir(self):
        shutil.rmtree(self.tempdir)
        self.tempdir = None
        self.do_test()
コード例 #2
0
def test_storage_creation():
    import tempfile, uuid
    from whoosh import fields
    from whoosh.filedb.filestore import FileStorage

    schema = fields.Schema(text=fields.TEXT)
    uid = uuid.uuid4()
    dirpath = os.path.join(tempfile.gettempdir(), str(uid))
    assert not os.path.exists(dirpath)

    st = FileStorage(dirpath)
    st.create()
    assert os.path.exists(dirpath)

    ix = st.create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("alfa bravo"))
        w.add_document(text=u("bracho charlie"))

    st.destroy()
    assert not os.path.exists(dirpath)
コード例 #3
0
ファイル: test_misc.py プロジェクト: JunjieHu/dl
def test_storage_creation():
    import tempfile, uuid
    from whoosh import fields
    from whoosh.filedb.filestore import FileStorage

    schema = fields.Schema(text=fields.TEXT)
    uid = uuid.uuid4()
    dirpath = os.path.join(tempfile.gettempdir(), str(uid))
    assert not os.path.exists(dirpath)

    st = FileStorage(dirpath)
    st.create()
    assert os.path.exists(dirpath)

    ix = st.create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("alfa bravo"))
        w.add_document(text=u("bracho charlie"))

    st.destroy()
    assert not os.path.exists(dirpath)
コード例 #4
0
ファイル: __init__.py プロジェクト: Galaxyinternship/Galaxy
def _temp_storage(self, name=None):
    path = tempfile.mkdtemp()
    tempstore = FileStorage(path)
    return tempstore.create()
コード例 #5
0
ファイル: test_search.py プロジェクト: pschneider/weblate
class SearchMigrationTest(TestCase, TempDirMixin):
    """Search index migration testing"""
    def setUp(self):
        self.create_temp()
        self.backup = weblate.trans.search.STORAGE
        self.storage = FileStorage(self.tempdir)
        weblate.trans.search.STORAGE = self.storage
        self.storage.create()

    def tearDown(self):
        self.remove_temp()
        weblate.trans.search.STORAGE = self.backup

    def do_test(self, source, target):
        if source is not None:
            self.storage.create_index(source, 'source')
        if target is not None:
            self.storage.create_index(target, 'target-cs')

        sindex = weblate.trans.search.get_source_index()
        self.assertIsNotNone(sindex)
        tindex = weblate.trans.search.get_target_index('cs')
        self.assertIsNotNone(tindex)
        writer = sindex.writer()
        writer.update_document(
            pk=1,
            source="source",
            context="context",
            location="location",
        )
        writer.commit()
        writer = tindex.writer()
        writer.update_document(pk=1, target="target", comment="comment")
        writer.commit()
        for item in ('source', 'context', 'location', 'target'):
            self.assertEqual(fulltext_search(item, ['cs'], {item: True}),
                             set([1]))

    def test_nonexisting(self):
        self.do_test(None, None)

    def test_nonexisting_dir(self):
        shutil.rmtree(self.tempdir)
        self.tempdir = None
        self.do_test(None, None)

    def test_current(self):
        source = weblate.trans.search.SourceSchema
        target = weblate.trans.search.TargetSchema
        self.do_test(source, target)

    def test_2_4(self):
        source = Schema(checksum=ID(stored=True, unique=True),
                        source=TEXT(),
                        context=TEXT(),
                        location=TEXT())
        target = Schema(
            checksum=ID(stored=True, unique=True),
            target=TEXT(),
            comment=TEXT(),
        )
        self.do_test(source, target)

    def test_2_1(self):
        source = Schema(
            checksum=ID(stored=True, unique=True),
            source=TEXT(),
            context=TEXT(),
        )
        target = Schema(
            checksum=ID(stored=True, unique=True),
            target=TEXT(),
        )
        self.do_test(source, target)
コード例 #6
0
 def temp_storage(self, name=None):
     tdir = tempfile.gettempdir()
     name = name or "%s.tmp" % random_name()
     path = os.path.join(tdir, name)
     tempstore = FileStorage(path)
     return tempstore.create()
コード例 #7
0
def setup_index():
    storage = FileStorage(data_dir('memory'))
    storage.create()
    return storage.create_index(TMSchema())
コード例 #8
0
 def temp_storage(self, name=None):
     tdir = tempfile.gettempdir()
     name = name or "%s.tmp" % random_name()
     path = os.path.join(tdir, name)
     tempstore = FileStorage(path)
     return tempstore.create()
コード例 #9
0
ファイル: store.py プロジェクト: mrvanes/pyFFplus
class RedisWhooshStore(SAMLStoreBase
                       ):  # TODO: This needs a gc mechanism for keys (uuids)
    def json_dict(self, name):
        return LRUProxyDict(JSONDict(key='{}_{}'.format(self._name, name),
                                     redis=self._redis,
                                     writeback=True),
                            maxsize=config.cache_size)

    def xml_dict(self, name):
        return LRUProxyDict(XMLDict(key='{}_{}'.format(self._name, name),
                                    redis=self._redis,
                                    writeback=True),
                            maxsize=config.cache_size)

    def __init__(self, *args, **kwargs):
        self._dir = kwargs.pop('directory', '.whoosh')
        clear = bool(kwargs.pop('clear', config.store_clear))
        self._name = kwargs.pop('name', config.store_name)
        self._redis = kwargs.pop('redis', redis())
        if clear:
            shutil.rmtree(self._dir)
        now = datetime.now()
        self._last_index_time = now
        self._last_modified = now
        self._setup()
        if clear:
            self.reset()

    def _setup(self):
        self._redis = getattr(self, '_redis', None)
        if not self._redis:
            self._redis = redis(
            )  # XXX test cases won't get correctly unpicked because of this
        self.schema = Schema(content=NGRAMWORDS(stored=False))
        self.schema.add("object_id", ID(stored=True, unique=True))
        self.schema.add("entity_id", ID(stored=True, unique=True))
        self.schema.add('sha1', ID(stored=True, unique=True))
        for a in list(ATTRS.keys()):
            self.schema.add(a, KEYWORD())
        self.objects = self.xml_dict('objects')
        self.parts = self.json_dict('parts')
        self.storage = FileStorage(os.path.join(self._dir, self._name))
        try:
            self.index = self.storage.open_index(schema=self.schema)
        except BaseException as ex:
            log.warn(ex)
            self.storage.create()
            self.index = self.storage.create_index(self.schema)
            self._reindex()

    def __getstate__(self):
        state = dict()
        for p in ('_dir', '_name', '_last_index_time', '_last_modified'):
            state[p] = getattr(self, p)
        return state

    def __setstate__(self, state):
        self.__dict__.update(state)
        self._setup()

    def __call__(self, *args, **kwargs):
        watched = kwargs.pop('watched', None)
        scheduler = kwargs.pop('scheduler', None)
        if watched is not None and scheduler is not None:
            super(RedisWhooshStore, self).__call__(watched=watched,
                                                   scheduler=scheduler)
            log.debug("indexing using {}".format(scheduler))
            if scheduler is not None:  # and self._last_modified > self._last_index_time and :
                scheduler.add_job(RedisWhooshStore._reindex,
                                  args=[self],
                                  max_instances=1,
                                  coalesce=True,
                                  misfire_grace_time=2 *
                                  config.update_frequency)

    def _reindex(self):
        log.debug("indexing the store...")
        self._last_index_time = datetime.now()
        seen = set()
        refs = set([b2u(s) for s in self.objects.keys()])
        parts = self.parts.values()
        for ref in refs:
            for part in parts:
                if ref in part['items']:
                    seen.add(ref)

        ix = self.storage.open_index()
        lock = ix.lock("reindex")
        try:
            log.debug("waiting for index lock")
            lock.acquire(True)
            log.debug("got index lock")
            with ix.writer() as writer:
                for ref in refs:
                    if ref not in seen:
                        log.debug("removing unseen ref {}".format(ref))
                        del self.objects[ref]
                        del self.parts[ref]

                log.debug("updating index")
                for e in self.objects.values():
                    info = self._index_prep(entity_simple_info(e))
                    ref = object_id(e)
                    writer.add_document(object_id=ref, **info)

                writer.mergetype = CLEAR
        finally:
            try:
                log.debug("releasing index lock")
                lock.release()
            except ThreadError as ex:
                pass

    def dump(self):
        ix = self.storage.open_index()
        from whoosh.query import Every
        with ix.searcher() as searcher:
            for result in ix.searcher().search(Every('object_id')):
                print(result)

    def _index_prep(self, info):
        res = dict()
        if 'entity_attributes' in info:
            for a, v in list(info.pop('entity_attributes').items()):
                info[a] = v

        content = " ".join(
            filter(lambda x: x is not None, [
                info.get(x, '') for x in ('service_name', 'title', 'domain',
                                          'keywords', 'scopes')
            ]))
        res['content'] = content.strip()
        for a, v in info.items():
            k = a
            if a in ATTRS_INV:
                k = ATTRS_INV[a]

            if k in self.schema.names():
                if type(v) in (list, tuple):
                    res[k] = " ".join([vv.lower() for vv in v])
                elif type(v) in six.string_types:
                    res[k] = info[a].lower()
        res['sha1'] = hash_id(info['entity_id'], prefix=False)
        return res

    def update(self, t, tid=None, etag=None, lazy=True):
        relt = root(t)
        assert (relt is not None)

        if relt.tag == "{%s}EntityDescriptor" % NS['md']:
            ref = object_id(relt)
            parts = None
            if ref in self.parts:
                parts = self.parts[ref]
            if etag is not None and (parts is None
                                     or parts.get('etag', None) != etag):
                self.parts[ref] = {
                    'id': relt.get('entityID'),
                    'etag': etag,
                    'count': 1,
                    'items': [ref]
                }
                self.objects[ref] = relt
                self._last_modified = datetime.now()
        elif relt.tag == "{%s}EntitiesDescriptor" % NS['md']:
            if tid is None:
                tid = relt.get('Name')
            if etag is None:
                etag = hex_digest(dumptree(t, pretty_print=False), 'sha256')
            parts = None
            if tid in self.parts:
                parts = self.parts[tid]
            if parts is None or parts.get('etag', None) != etag:
                items = set()
                for e in iter_entities(t):
                    ref = object_id(e)
                    items.add(ref)
                    self.objects[ref] = e
                self.parts[tid] = {
                    'id': tid,
                    'count': len(items),
                    'etag': etag,
                    'items': list(items)
                }
                self._last_modified = datetime.now()

        if not lazy:
            self._reindex()

    @ttl_cache(ttl=config.cache_ttl, maxsize=config.cache_size)
    def collections(self):
        return [b2u(ref) for ref in self.parts.keys()]

    def reset(self):
        for k in ('{}_{}'.format(self._name, 'parts'),
                  '{}_{}'.format(self._name, 'objects')):
            self._redis.delete('{}_{}'.format(self._name, 'parts'))
            self._redis.delete('{}_{}'.format(self._name, 'objects'))

    def size(self, a=None, v=None):
        if a is None:
            return len(self.objects.keys())
        elif a is not None and v is None:
            return len(self.attribute(a))
        else:
            return len(self.lookup("{!s}={!s}".format(a, v)))

    def _attributes(self):
        ix = self.storage.open_index()
        with ix.reader() as reader:
            for n in reader.indexed_field_names():
                if n in ATTRS:
                    yield b2u(ATTRS[n])

    def attributes(self):
        return b2u(list(self._attributes()))

    def attribute(self, a):
        if a in ATTRS_INV:
            n = ATTRS_INV[a]
            ix = self.storage.open_index()
            with ix.searcher() as searcher:
                return b2u(list(searcher.lexicon(n)))
        else:
            return []

    def _prep_key(self, key):
        # import pdb; pdb.set_trace()
        key = key.strip('+')
        key = key.replace('+', ' AND ')
        key = key.replace('-', ' AND NOT ')
        for uri, a in list(ATTRS_INV.items()):
            key = key.replace(uri, a)
        key = " {!s} ".format(key)
        key = re.sub("([^=]+)=(\S+)", "\\1:\\2", key)
        key = re.sub("{([^}]+)}(\S+)", "\\1:\\2", key)
        key = key.strip()

        return key

    def _entities(self):
        lst = set()
        for ref_data in self.parts.values():
            for ref in ref_data['items']:
                e = self.objects.get(ref, None)
                if e is not None:
                    lst.add(e)

        return b2u(list(lst))

    @ttl_cache(ttl=config.cache_ttl, maxsize=config.cache_size)
    def lookup(self, key):
        if key == 'entities' or key is None:
            return self._entities()

        bkey = six.b(key)
        if bkey in self.objects:
            return [self.objects.get(bkey)]

        if bkey in self.parts:
            res = []
            part = self.parts.get(bkey)
            for item in part['items']:
                res.extend(self.lookup(item))
            return res

        key = self._prep_key(key)
        qp = QueryParser("object_id", schema=self.schema)
        q = qp.parse(key)
        lst = set()
        with self.index.searcher() as searcher:
            results = searcher.search(q, limit=None)
            for result in results:
                e = self.objects.get(result['object_id'], None)
                if e is not None:
                    lst.add(e)

        return b2u(list(lst))

    @ttl_cache(ttl=config.cache_ttl, maxsize=config.cache_size)
    def search(self, query=None, path=None, entity_filter=None, related=None):
        if entity_filter:
            query = "{!s} AND {!s}".format(query, entity_filter)
        query = self._prep_key(query)
        qp = MultifieldParser(['content', 'domain'], schema=self.schema)
        q = qp.parse(query)
        lst = set()
        with self.index.searcher() as searcher:
            results = searcher.search(q, limit=None)
            log.debug(results)
            for result in results:
                lst.add(result['object_id'])

        res = list()
        for ref in lst:
            e = self.objects.get(ref, None)
            if e is not None:
                res.append(discojson(e))
        return res
コード例 #10
0
def _temp_storage(self, name=None):
    path = tempfile.mkdtemp()
    tempstore = FileStorage(path)
    return tempstore.create()
コード例 #11
0
ファイル: test_search.py プロジェクト: nblock/weblate
class SearchMigrationTest(TestCase, TempDirMixin):
    """Search index migration testing"""
    def setUp(self):
        self.create_temp()
        self.backup = weblate.trans.search.STORAGE
        self.storage = FileStorage(self.tempdir)
        weblate.trans.search.STORAGE = self.storage
        self.storage.create()

    def tearDown(self):
        self.remove_temp()
        weblate.trans.search.STORAGE = self.backup

    def do_test(self, source, target):
        if source is not None:
            self.storage.create_index(source, 'source')
        if target is not None:
            self.storage.create_index(target, 'target-cs')

        sindex = weblate.trans.search.get_source_index()
        self.assertIsNotNone(sindex)
        tindex = weblate.trans.search.get_target_index('cs')
        self.assertIsNotNone(tindex)
        writer = sindex.writer()
        writer.update_document(
            pk=1,
            source="source",
            context="context",
            location="location",
        )
        writer.commit()
        writer = tindex.writer()
        writer.update_document(
            pk=1,
            target="target",
            comment="comment"
        )
        writer.commit()
        for item in ('source', 'context', 'location', 'target'):
            self.assertEqual(
                fulltext_search(item, ['cs'], {item: True}),
                set([1])
            )

    def test_nonexisting(self):
        self.do_test(None, None)

    def test_nonexisting_dir(self):
        shutil.rmtree(self.tempdir)
        self.tempdir = None
        self.do_test(None, None)

    def test_current(self):
        source = weblate.trans.search.SourceSchema
        target = weblate.trans.search.TargetSchema
        self.do_test(source, target)

    def test_2_4(self):
        source = Schema(
            checksum=ID(stored=True, unique=True),
            source=TEXT(),
            context=TEXT(),
            location=TEXT()
        )
        target = Schema(
            checksum=ID(stored=True, unique=True),
            target=TEXT(),
            comment=TEXT(),
        )
        self.do_test(source, target)

    def test_2_1(self):
        source = Schema(
            checksum=ID(stored=True, unique=True),
            source=TEXT(),
            context=TEXT(),
        )
        target = Schema(
            checksum=ID(stored=True, unique=True),
            target=TEXT(),
        )
        self.do_test(source, target)
コード例 #12
0
ファイル: db.py プロジェクト: mickael9/fac
class DB:
    def __init__(self, config, api):
        self.config = config
        self.api = api
        self.cache_dir = user_cache_dir('fac', appauthor=False)
        self.storage = FileStorage(os.path.join(self.cache_dir, 'index'))

        self.schema = Schema(
            name=TEXT(sortable=True, phrase=True, field_boost=3,
                      analyzer=intraword),
            owner=TEXT(sortable=True, field_boost=2.5,
                       analyzer=intraword),
            title=TEXT(field_boost=2.0, phrase=False),
            summary=TEXT(phrase=True),
            downloads=NUMERIC(sortable=True),
            sort_name=SortColumn(),
            name_id=ID(stored=True),
        )

        try:
            self.index = self.storage.open_index()
        except EmptyIndexError:
            self.index = None

        self.db = JSONFile(os.path.join(self.cache_dir, 'mods.json'))

    def maybe_update(self):
        if self.needs_update():
            self.update()

    def needs_update(self):
        if not self.index or not self.db.get('mods'):
            return True

        last_update = self.db.mtime
        period = int(self.config.get('db', 'update_period'))
        db_age = time.time() - last_update

        return db_age > period

    def update(self):
        with ProgressWidget("Downloading mod database...") as progress:
            mods = self.api.get_mods(progress)

        old_mods = self.db.get('mods', {})

        self.db.mods = {mod.name: mod.data
                        for mod in mods}

        if old_mods != self.db['mods']:
            print("Building search index...")
            self.index = self.storage.create().create_index(self.schema)

            with self.index.writer() as w:
                for mod in mods:
                    w.add_document(
                        name_id=mod.name,
                        name=mod.name,
                        sort_name=mod.name.lower(),
                        title=mod.title.lower(),
                        owner=mod.owner.lower(),
                        summary=mod.summary.lower(),
                        downloads=mod.downloads_count
                    )
                self.db.save()
            print("Updated mods database (%d mods)" % len(mods))
        else:
            print("Index is up to date")
            self.db.utime()

    def search(self, query, sortedby=None, limit=None):
        parser = qparser.MultifieldParser(
            ['owner', 'name', 'title', 'summary'],
            schema=self.schema
        )
        parser.add_plugin(qparser.FuzzyTermPlugin())

        if not isinstance(query, Query):
            query = parser.parse(query or 'name:*')

        with self.index.searcher() as searcher:
            if sortedby:
                facets = []
                for field in sortedby.split(','):
                    reverse = field.startswith('-')
                    if reverse:
                        field = field[1:]

                    if 'sort_' + field in self.schema:
                        field = 'sort_' + field
                    facets.append(FieldFacet(field, reverse=reverse))

                if len(facets) == 1:
                    sortedby = facets[0]
                else:
                    sortedby = MultiFacet(facets)

            for result in searcher.search(
                    query,
                    limit=limit,
                    sortedby=sortedby):

                d = JSONDict(self.db.mods[result['name_id']])
                d.score = result.score
                yield d

    @property
    def mods(self):
        return self.db.mods
コード例 #13
0
class SearchMigrationTest(TestCase):
    """Search index migration testing"""
    def setUp(self):
        self.path = tempfile.mkdtemp()
        self.backup = weblate.trans.search.STORAGE
        self.storage = FileStorage(self.path)
        weblate.trans.search.STORAGE = self.storage
        self.storage.create()

    def tearDown(self):
        if os.path.exists(self.path):
            shutil.rmtree(self.path)
        weblate.trans.search.STORAGE = self.backup

    def do_test(self, source, target):
        if source is not None:
            self.storage.create_index(source, 'source')
        if target is not None:
            self.storage.create_index(target, 'target-cs')

        self.assertIsNotNone(
            weblate.trans.search.get_source_index()
        )
        self.assertIsNotNone(
            weblate.trans.search.get_target_index('cs')
        )

    def test_nonexisting(self):
        self.do_test(None, None)

    def test_nonexisting_dir(self):
        shutil.rmtree(self.path)
        self.do_test(None, None)

    def test_current(self):
        source = weblate.trans.search.SourceSchema
        target = weblate.trans.search.TargetSchema
        self.do_test(source, target)

    def test_2_4(self):
        source = Schema(
            checksum=ID(stored=True, unique=True),
            source=TEXT(),
            context=TEXT(),
            location=TEXT()
        )
        target = Schema(
            checksum=ID(stored=True, unique=True),
            target=TEXT(),
            comment=TEXT(),
        )
        self.do_test(source, target)

    def test_2_1(self):
        source = Schema(
            checksum=ID(stored=True, unique=True),
            source=TEXT(),
            context=TEXT(),
        )
        target = Schema(
            checksum=ID(stored=True, unique=True),
            target=TEXT(),
        )
        self.do_test(source, target)