コード例 #1
0
    def is_xapiancachedb_need_update(self):
        xapian_srcFile = XAPIAN_DB_SOURCE_PATH
        xapian_destFile = os.path.join(UKSC_CACHE_DIR, "xapiandb")

        try:
            src_xapiandb = xapian.Database(xapian_srcFile)
            new_enquire = xapian.Enquire(src_xapiandb)
            new_query = xapian.Query("the_#ukxapiandb#_version")
            new_enquire.set_query(new_query)
            new_matches = new_enquire.get_mset(0, 1)

            for new_item in new_matches:
                new_doc = new_item.document
                if new_doc.get_data() == "XAPIANDB_VERSION":
                    new_version = new_doc.get_value(
                        1)  #valueslot:1 xapiandb version
                    des_xapiandb = xapian.Database(xapian_destFile)
                    old_enquire = xapian.Enquire(des_xapiandb)
                    old_query = xapian.Query("the_#ukxapiandb#_version")
                    old_enquire.set_query(old_query)
                    old_matches = old_enquire.get_mset(0, 1)
                    for old_item in old_matches:
                        old_doc = old_item.document
                        old_version = old_doc.get_value(
                            1)  #valueslot:1 xapiandb version
            #if (Globals.DEBUG_SWITCH):
            print(("old xapiandb  version:", old_version,
                   " new xapiandb version:", new_version))
        except:
            return True
        else:
            if (new_version > old_version):
                return True
            else:
                return False
コード例 #2
0
ファイル: nvim.py プロジェクト: cwoac/nvvim
    def reload_database(self):  # {{{
        ''' reload the database. '''
        # create the xapian handlers
        self.database_handle = xapian.WritableDatabase(
            self.database, xapian.DB_CREATE_OR_OPEN)

        self.query_parser = xapian.QueryParser()
        # needed for incremental search
        self.query_parser.set_database(self.database_handle)
        self.query_parser.set_stemmer(xapian.Stem(self.language))
        self.query_parser.set_stemming_strategy(self.query_parser.STEM_SOME)
        self.query_parser.add_prefix("title", "S")

        self.term_generator = xapian.TermGenerator()
        self.term_generator.set_stemmer(xapian.Stem(self.language))
        try:
            self.term_generator.set_stemming_strategy(
                self.term_generator.STEM_SOME)
        except AttributeError:
            pass

        self.enquire = xapian.Enquire(self.database_handle)
        self.sorted_e = xapian.Enquire(self.database_handle)
        # Value 2 is the lowercase form of the title
        self.sorted_e.set_sort_by_value(2, False)
コード例 #3
0
def test_eset_iter():
    """Test iterators over ESets.

    """
    db = setup_database()
    query = xapian.Query(xapian.Query.OP_OR, "was", "it")
    rset = xapian.RSet()
    rset.add_document(3)

    context("getting eset items without a query")
    enquire = xapian.Enquire(db)
    eset = enquire.get_eset(10, rset)
    items = [item for item in eset]
    expect(len(items), 3)
    expect(len(items), len(eset))

    context("getting eset items with a query")
    enquire = xapian.Enquire(db)
    enquire.set_query(query)
    eset = enquire.get_eset(10, rset)
    items2 = [item for item in eset]
    expect(len(items2), 2)
    expect(len(items2), len(eset))

    context("comparing eset items with a query to those without")
    expect(items2[0].term, items[0].term)
    expect(items2[1].term, items[2].term)

    context("comparing eset weights with a query to those without")
    expect(items2[0].weight, items[0].weight)
    expect(items2[1].weight, items[2].weight)
コード例 #4
0
def test_weight_normalise():
    """Test normalising of query weights using the OP_SCALE_WEIGHT feature.

    This test first runs a search (asking for no results) to get the maximum
    possible weight for a query, and then checks that the results of
    MSet.get_max_possible() match this.

    This tests that the get_max_possible() value is correct (though it isn't
    guaranteed to be at a tight bound), and that the SCALE_WEIGHT query can
    compensate correctly.

    """
    db = setup_database()
    for query in (
            "it",
            "was",
            "it was",
            "it was four",
            "it was four five",
            "\"was it warm\" four notpresent",
            "notpresent",
    ):
        context(
            "checking query %r using OP_SCALE_WEIGHT to normalise the weights"
            % query)
        qp = xapian.QueryParser()
        query1 = qp.parse_query(query)
        enquire = xapian.Enquire(db)
        enquire.set_query(query1)
        mset1 = enquire.get_mset(0, 0)

        # Check the max_attained value is 0 - this gives us some reassurance
        # that the match didn't actually do the work of calculating any
        # results.
        expect(mset1.get_max_attained(), 0)

        max_possible = mset1.get_max_possible()
        if query == "notpresent":
            expect(max_possible, 0)
            continue
        mult = 1.0 / max_possible
        query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)

        enquire = xapian.Enquire(db)
        enquire.set_query(query2)
        mset2 = enquire.get_mset(0, 10)
        # max_possible should be 1 (excluding rounding errors) for mset2
        expect(int(mset2.get_max_possible() * 1000000.0 + 0.5), 1000000)
        for item in mset2:
            expect(item.weight > 0, True)
            expect(item.weight <= 1, True)
コード例 #5
0
def xapian_get_bitset(index, query):
    """
    Queries a Xapian index.
    Returns: an intbitset containing all record ids
    """
    if not DATABASES:
        xapian_init_databases()

    result = intbitset()

    database = DATABASES[index]
    enquire = xapian.Enquire(database)
    query_string = query
    qp = xapian.QueryParser()
    stemmer = xapian.Stem("english")
    qp.set_stemmer(stemmer)
    qp.set_database(database)
    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
    pattern = qp.parse_query(query_string, xapian.QueryParser.FLAG_PHRASE)
    enquire.set_query(pattern)
    matches = enquire.get_mset(0, database.get_lastdocid())

    for match in matches:
        result.add(match.docid)

    return result
コード例 #6
0
    def __init__(self, dbpath='simplehaha'):
        database = xapian.Database(dbpath)
        enquire = xapian.Enquire(database)
        qp = xapian.QueryParser()
        stemmer = xapian.Stem("english")
        qp.set_stemmer(stemmer)
        qp.set_database(database)
        qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
        self.qp = qp
        self.enquire = enquire
        self.emotionvi = 0
        self.keywordsvi = 1
        self.timestampvi = 2
        self.loctvi = 3
        self.reploctvi = 4
        self.emotiononlyvi = 5
        #usernamevi = 6
        self.hashtagsvi = 7
        #uidvi = 8
        #repnameslistvi = 9
        #widvi = 10
        self.maxitems = 1000000000

        pool = redis.ConnectionPool(host='localhost', port=6379, db=1)
        self.r = redis.Redis(connection_pool=pool)
        self.r.flushdb()
        self.lowkeywords_set_rds = 'lowkeywords'

        pool1 = redis.ConnectionPool(host='localhost', port=6379, db=2)
        self.r1 = redis.Redis(connection_pool=pool1)
        self.r1.flushdb()
        self.keywords_hash_rds = 'keywords_hash'
コード例 #7
0
 def test_reinstall_purchased_xapian(self):
     db = StoreDatabase("/var/cache/software-center/xapian", self.cache)
     db.open(use_axi=False)
     # now create purchased debs xapian index (in memory because
     # we store the repository passwords in here)
     old_db_len = len(db)
     query = add_from_purchased_but_needs_reinstall_data(
         self.available_to_me, db, self.cache)
     # ensure we have a new item (the available for reinstall one)
     self.assertEqual(len(db), old_db_len + 1)
     # query
     enquire = xapian.Enquire(db.xapiandb)
     enquire.set_query(query)
     matches = enquire.get_mset(0, len(db))
     self.assertEqual(len(matches), 1)
     for m in matches:
         doc = db.xapiandb.get_document(m.docid)
         self.assertEqual(doc.get_value(XapianValues.PKGNAME), "photobomb")
         self.assertEqual(
             doc.get_value(XapianValues.ARCHIVE_SIGNING_KEY_ID),
             "1024R/75254D99")
         self.assertEqual(
             doc.get_value(XapianValues.ARCHIVE_DEB_LINE),
             "deb https://username:random3atoken@"
             "private-ppa.launchpad.net/commercial-ppa-uploaders"
             "/photobomb/ubuntu precise main")
コード例 #8
0
ファイル: qa_server.py プロジェクト: anoop2019/simple-qa
    def _build_index(self, filepath, recreate=False):
        """
            save txt to LevelDB

            Input:
                - filepath: txt file path, support .gzip, .bzip2, and .txt file
                - recreate: bool, True will force recreate db, default is False
        """
        cached_index = filepath + ".index"

        if os.path.exists(cached_index):
            if recreate:
                shutil.rmtree(cached_index)
        else:
            recreate = True

        stemmer = xapian.Stem("english")

        if not recreate:
            database = xapian.Database(cached_index)
        else:
            database = xapian.WritableDatabase(cached_index,
                                               xapian.DB_CREATE_OR_OPEN)
            indexer = xapian.TermGenerator()
            indexer.set_stemmer(stemmer)

            ext = os.path.splitext(filepath)[-1]
            if ext == ".bz2":
                import bz2
                open_func = bz2.open
            elif ext == ".gz":
                import gzip
                open_func = gzip.open
            else:
                open_func = open

            with open_func(filepath, mode="rt", encoding="utf-8") as f:
                totN, totP, totS = 0, 0, 0
                for l in tqdm(f, desc="Building index", unit=" lines"):
                    l = l.strip()
                    if len(l) < 1:
                        if totS > 0: totP += 1
                        totS = 0
                        continue
                    for sent in nltk.sent_tokenize(l):
                        sent.strip()
                        doc = xapian.Document()
                        doc.set_data(sent)
                        indexer.set_document(doc)
                        indexer.index_text(sent)
                        database.add_document(doc)

                        totN += 1
                        totS += 1

        self.parser = xapian.QueryParser()
        self.parser.set_stemmer(stemmer)
        self.parser.set_database(database)
        self.parser.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
        self.enquire = xapian.Enquire(database)
コード例 #9
0
def search():
    database = xapian.Database('indexes/')
    enquire = xapian.Enquire(database)
    running = 1
    while int(running):
        str = raw_input("input the key words:")
        terms = []
        a = jieba.cut_for_search(str)
        for b in a:
            terms.append(b.encode("utf-8"))
        qp = xapian.QueryParser()  #建立查询分析
        qp.set_database(database)
        qp.set_default_op(xapian.Query.OP_AND)  #设置查询策略
        #query = qp.parse_query(terms)
        query = xapian.Query(xapian.Query.OP_OR, terms)  #查询函数,搞不懂
        enquire.set_query(query)
        matches = enquire.get_mset(0, 10)
        print "%i results found" % matches.get_matches_estimated()
        for match in matches:
            a = match.document.get_data()
            d = eval(a)
            print "贴吧:", d["title"]
            print "作者:", d["reply"]["name"]
            print "回复:", d["reply"]["content"]
            print "时间:", d["reply"]["time"]
        running = raw_input("again?(1(yse)/0(no) :")
    print "thank you for using!"
コード例 #10
0
def test_matchingterms_iter():
    """Test Enquire.matching_terms iterator.

    """
    db = setup_database()
    query = xapian.Query(xapian.Query.OP_OR, ("was", "it", "warm", "two"))

    enquire = xapian.Enquire(db)
    enquire.set_query(query)
    mset = enquire.get_mset(0, 10)
    for item in mset:

        # Make a list of the term names
        mterms = []
        for term in enquire.matching_terms(item.docid):
            mterms.append(term)

        mterms2 = []
        for term in enquire.matching_terms(item):
            mterms2.append(term)
        expect(mterms, mterms2)

    mterms = []
    for term in enquire.matching_terms(mset.get_hit(0)):
        mterms.append(term)
    expect(mterms, ['it', 'two', 'warm', 'was'])
コード例 #11
0
 def get_most_popular_applications_for_mimetype(self,
                                                mimetype,
                                                only_uninstalled=True,
                                                num=3):
     """ return a list of the most popular applications for the given
         mimetype
     """
     # sort by popularity by default
     enquire = xapian.Enquire(self.xapiandb)
     enquire.set_sort_by_value_then_relevance(XapianValues.POPCON)
     # query mimetype
     query = xapian.Query("AM%s" % mimetype)
     enquire.set_query(query)
     # mset just needs to be "big enough""
     matches = enquire.get_mset(0, 100)
     apps = []
     for match in matches:
         doc = match.document
         app = Application(self.get_appname(doc),
                           self.get_pkgname(doc),
                           popcon=self.get_popcon(doc))
         if only_uninstalled:
             if app.get_details(self).pkg_state == PkgStates.UNINSTALLED:
                 apps.append(app)
         else:
             apps.append(app)
         if len(apps) == num:
             break
     return apps
コード例 #12
0
ファイル: search.py プロジェクト: neeserg/WSTA_fact_checker
def search_query(claim):
    stopWords=set(stopwords.words('english'))
    claim = word_tokenize(claim)
    claim = " ".join([w for w in claim if w not in stopWords])
    #print(claim)
    claim = noun_phrases(claim)
    #print(claim)
    db=xapian.Database('/home/xusheng/Downloads/ano-titles')
    query_parser=xapian.QueryParser()
    query_parser.set_stemmer(xapian.Stem('en'))
    query_parser.set_stemming_strategy(query_parser.STEM_SOME)
    #query = query_parser.parse_query("title:"+claim)
    query =query_parser.parse_query(claim)

    enquire=xapian.Enquire(db)
    enquire.set_query(query)
    matches=[]

    for match in enquire.get_mset(0,5):
        match_doc=json.loads(match.document.get_data().decode('utf8')) #the match data is parse as python dict.
        doc_title=match_doc.get('title')

        matches.append(match_doc)

    return matches
コード例 #13
0
    def search(self, searchterm, extractlength=32):
        # Parse query string
        query = self.__queryparser.parse_query(searchterm)

        # Set offset and limit for pagination
        offset, limit = 0, self.__db.get_doccount()

        # Start query session
        enquire = xapian.Enquire(self.__db)
        enquire.set_query(query)

        # Display matches
        matches = enquire.get_mset(offset, limit)

        results = []
        for match in matches:
            content = match.document.get_data()
            extract = TextMachine(extractlength,
                                  '*%s*').process(searchterm, content)

            result = {
                "rank": match.rank,
                "docid": match.docid,
                "text": extract
            }

            results.append(result)

        return (matches.get_matches_estimated(), matches.size(), results)
コード例 #14
0
    def __init__(self, path=None, name='master_timeline_weibo', stub=None, include_remote=False, schema=Schema, schema_version=SCHEMA_VERSION):
        def create(dbpath):
            return _database(dbpath)

        def merge(db1, db2):
            db1.add_database(db2)
            return db1

        if stub:
            # 如果是list,默认全部为文件
            if isinstance(stub, list):
                self.database = reduce(merge,
                                       map(_stub_database, stub))
            elif os.path.isfile(stub):
                self.database = _stub_database(stub)
            elif os.path.isdir(stub):
                self.database = reduce(merge,
                                       map(_stub_database, [os.path.join(stub, p) for p in os.listdir(stub)]))
        else:
            self.database = reduce(merge,
                                   map(create, [os.path.join(path, p) for p in os.listdir(path) if p.startswith('_%s' % name)]))

        self.schema = getattr(schema, 'v%s' % schema_version)
        enquire = xapian.Enquire(self.database)
        enquire.set_weighting_scheme(xapian.BoolWeight())  # 使用最简单的weight模型提升效率
        enquire.set_docid_order(xapian.Enquire.DONT_CARE)  # 不关心mset的顺序

        if 'collapse_valueno' in self.schema:
            enquire.set_collapse_key(self.schema['collapse_valueno'])

        self.enquire = enquire
        self.include_remote = include_remote
コード例 #15
0
 def _update_channel_list_installed_view(self):
     # see comments for _update_channel_list_available_view() method above
     child = self.iter_children(self.installed_iter)
     iters_to_kill = set()
     while child:
         iters_to_kill.add(child)
         child = self.iter_next(child)
     # iterate the channels and add as subnodes of the installed node
     for channel in self.channel_manager.channels_installed_only:
         # check for no installed items for each channel and do not
         # append the channel item in this case
         enquire = xapian.Enquire(self.db.xapiandb)
         query = channel.query
         enquire.set_query(query)
         matches = enquire.get_mset(0, len(self.db))
         # only check channels that have a small number of items
         add_channel_item = True
         if len(matches) < 200:
             add_channel_item = False
             for m in matches:
                 doc = m.document
                 pkgname = self.db.get_pkgname(doc)
                 if (pkgname in self.cache
                         and self.cache[pkgname].is_installed):
                     add_channel_item = True
                     break
         if add_channel_item:
             self.append(self.installed_iter, [
                 channel.icon, channel.display_name, ViewPages.CHANNEL,
                 channel, None
             ])
     # delete the old ones
     for child in iters_to_kill:
         self.remove(child)
コード例 #16
0
    def xmlrpc_search(self, text, page):
        # TODO: Run queries in threads because it's blocking operation.
        if page < 0:
            return
        try:
            query = self.query_parser.parse_query(text)
        except xapian.QueryParserError:
            return
        enquire = xapian.Enquire(self.db)
        enquire.set_query(query)
        self.db.reopen()

        def process_match(match):
            doc = match.document
            return dict(id=doc.get_value(Indexer.ID),
                        user=doc.get_value(Indexer.USER),
                        date=float(doc.get_value(Indexer.DATE_ORIG)),
                        type=doc.get_value(Indexer.TYPE),
                        tags_info=doc.get_value(Indexer.TAGS_INFO),
                        text=doc.get_data().decode('utf-8'),
                        percent=match.percent)

        matches = enquire.get_mset(page * self.PAGE_SIZE, self.PAGE_SIZE)
        estimated = matches.get_matches_estimated()
        results = map(process_match, matches)
        return dict(estimated=estimated, results=results)
コード例 #17
0
ファイル: searcher.py プロジェクト: gnuaha7/arachne
    def search(self,
               query,
               offset,
               count,
               check_at_least,
               site_ids=(),
               filetype=SEARCH_ALL):
        """Query the index.

        The `query` argument is the user supplied query string. The `sites` and
        `filetype` arguments can be used to restrict the domain of the search.
        """
        if type(query) is not unicode:
            query = query.decode('utf-8')
        enquire = xapian.Enquire(self._db)
        xapian_query = self._parse_query(query, site_ids, filetype)
        enquire.set_query(xapian_query)
        mset = enquire.get_mset(offset, count, check_at_least)
        results = []
        for match in mset:
            result = {}
            doc = match.get_document()
            result['url'] = doc.get_data().decode('utf-8')
            value = doc.get_value(IndexProcessor.IS_DIR_SLOT).decode('utf-8')
            result['is_dir'] = (value == IndexProcessor.TRUE_VALUE)
            results.append(result)
        estimated_total = mset.get_matches_estimated()
        return (estimated_total, results)
コード例 #18
0
def test_director_exception():
    """Test handling of an exception raised in a director.

    """
    db = setup_database()
    query = xapian.Query('it')
    enq = xapian.Enquire(db)
    enq.set_query(query)

    class TestException(Exception):
        def __init__(self, a, b):
            Exception.__init__(self, a + b)

    rset = xapian.RSet()
    rset.add_document(1)

    class EDecider(xapian.ExpandDecider):
        def __call__(self, term):
            raise TestException("foo", "bar")

    edecider = EDecider()
    expect_exception(TestException, "foobar", edecider, "foo")
    expect_exception(TestException, "foobar", enq.get_eset, 10, rset, edecider)

    class MDecider(xapian.MatchDecider):
        def __call__(self, doc):
            raise TestException("foo", "bar")

    mdecider = MDecider()
    expect_exception(TestException, "foobar", mdecider, xapian.Document())
    expect_exception(TestException, "foobar", enq.get_mset, 0, 10, None,
                     mdecider)
コード例 #19
0
ファイル: processor.py プロジェクト: gnuaha7/arachne
 def _rmtree(self, site_id, dirpath):
     """Remove documents for entries in the given directory tree. The
     document of the root of the directory tree is also removed.
     """
     enquire = xapian.Enquire(self._db)
     enquire.set_docid_order(xapian.Enquire.DONT_CARE)
     site_id_query = xapian.Query(self.SITE_ID_PREFIX + site_id)
     # Remove document of the directory itself.
     path_query = xapian.Query(xapian.Query.OP_VALUE_RANGE,
                               self.PATH_SLOT, dirpath, dirpath)
     query = xapian.Query(xapian.Query.OP_FILTER, site_id_query, path_query)
     enquire.set_query(query)
     for match in enquire.get_mset(0, self._db.get_doccount()):
         doc = match.get_document()
         self._db.delete_document(doc.get_docid())
     # Remove documents of the decendants.
     dirname_start = dirpath.rstrip(u'/') + u'/'
     dirname_end = dirname_start + u'\U0010ffff'
     dirname_query = xapian.Query(xapian.Query.OP_VALUE_RANGE,
                                  self.DIRNAME_SLOT, dirname_start,
                                  dirname_end)
     query = xapian.Query(xapian.Query.OP_FILTER, site_id_query,
                          dirname_query)
     enquire.set_query(query)
     for match in enquire.get_mset(0, self._db.get_doccount()):
         doc = match.get_document()
         self._db.delete_document(doc.get_docid())
コード例 #20
0
ファイル: bench.py プロジェクト: dongshige/wikidpad
 def searcher(self):
     path = os.path.join(self.options.dir,
                         "%s_xappy" % self.options.indexname)
     self.db = xapian.Database(path)
     self.enq = xapian.Enquire(self.db)
     self.qp = xapian.QueryParser()
     self.qp.set_database(self.db)
コード例 #21
0
def test_scale_weight():
    """Test query OP_SCALE_WEIGHT feature.

    """
    db = setup_database()
    for mult in (0, 1, 2.5):
        context(
            "checking queries with OP_SCALE_WEIGHT with a multiplier of %r" %
            mult)
        query1 = xapian.Query("it")
        query2 = xapian.Query(xapian.Query.OP_SCALE_WEIGHT, query1, mult)

        enquire = xapian.Enquire(db)
        enquire.set_query(query1)
        mset1 = enquire.get_mset(0, 10)
        enquire.set_query(query2)
        mset2 = enquire.get_mset(0, 10)
        if mult <= 0:
            expected = [(0, item.docid) for item in mset1]
            expected.sort()
        else:
            expected = [(int(item.weight * mult * 1000000), item.docid)
                        for item in mset1]
        expect([(int(item.weight * 1000000), item.docid) for item in mset2],
               expected)

    context("checking queries with OP_SCALE_WEIGHT with a multiplier of -1")
    query1 = xapian.Query("it")
    expect_exception(
        xapian.InvalidArgumentError,
        "Xapian::Query: SCALE_WEIGHT requires a non-negative parameter.",
        xapian.Query, xapian.Query.OP_SCALE_WEIGHT, query1, -1)
コード例 #22
0
def search(dbpath, querystring, offset=0, pagesize=100, ident=0):
    # offset - defines starting point within result set
    # pagesize - defines number of records to retrieve

    # Open the database we're going to search.
    db = xapian.Database(dbpath)

    # Set up a QueryParser with a stemmer and suitable prefixes
    queryparser = xapian.QueryParser()
    queryparser.set_stemmer(xapian.Stem("pt"))
    queryparser.set_stemming_strategy(queryparser.STEM_SOME)
    # Start of prefix configuration.
    #queryparser.add_prefix("text", "XD")
    # End of prefix configuration.

    # And parse the query
    query = queryparser.parse_query(querystring)

    # Use an Enquire object on the database to run the query
    enquire = xapian.Enquire(db)
    enquire.set_query(query)

    # And print out something about each match
    matches = []
    for match in enquire.get_mset(offset, pagesize):
        field = match.document.get_data()
        print(u"%(ident)s Q0 %(id)s %(rank)i %(weight)s danielatkinson_filipemoreira" % {
            'ident': ident,
            'rank': match.rank,
            'weight': match.weight,
            'id': field.split(":")[0]
            })
        matches.append(match.docid)
コード例 #23
0
ファイル: models.py プロジェクト: lamby/nm2
def query(keywords):
    """
    Get changelog entries matching the given keywords
    """
    xdb = xapian.Database(MINECHANGELOGS_INDEXDIR)

    q = None
    for a in keywords:
        a = a.strip()
        if not a: continue
        if ' ' in a:
            a = a.split()
            p = xapian.Query(xapian.Query.OP_PHRASE, a)
        else:
            p = xapian.Query(a)
        if q is None:
            q = p
        else:
            q = xapian.Query(xapian.Query.OP_OR, q, p)
    if q is None: return

    enquire = xapian.Enquire(xdb)
    enquire.set_query(q)
    enquire.set_sort_by_value(0, True)

    first = 0
    while True:
        matches = enquire.get_mset(first, 100)
        count = matches.size()
        if count == 0: break
        for m in matches:
            yield m.document.get_data()
        first += 100
コード例 #24
0
def search(dbpath, querystring, offset=0, pagesize=10):
    # offset - defines starting point within result set
    # pagesize - defines number of records to retrieve

    # Open the database we're going to search.
    db = xapian.Database(dbpath)

    # Set up a QueryParser with a stemmer and suitable prefixes
    queryparser = xapian.QueryParser()
    queryparser.set_stemmer(xapian.Stem("en"))
    queryparser.set_stemming_strategy(queryparser.STEM_SOME)
    queryparser.add_prefix("title", "S")
    queryparser.add_prefix("description", "XD")

    # And parse the query
    query = queryparser.parse_query(querystring)

    # Use an Enquire object on the database to run the query
    enquire = xapian.Enquire(db)
    enquire.set_query(query)

    # And print out something about each match
    matches = []
    for match in enquire.get_mset(offset, pagesize):
        fields = json.loads(match.document.get_data())
        print u"%(rank)i: #%(docid)3.3i %(title)s" % {
            'rank': match.rank + 1,
            'docid': match.docid,
            'title': fields.get('TITLE', u''),
        }
        matches.append(match.docid)

    # Finally, make sure we log the query and displayed results
    support.log_matches(querystring, offset, pagesize, matches)
コード例 #25
0
def search(dbpath, querystring, offset=0, pagesize=10):
    # offset - defines starting point within result set
    # pagesize - defines number of records to retrive
    db = xapian.Database(dbpath)
    queryparser = xapian.QueryParser()

    # choose a language
    queryparser.set_stemmer(xapian.Stem("en"))
    queryparser.set_stemming_strategy(queryparser.STEM_SOME)

    queryparser.add_prefix("title", "S")
    queryparser.add_prefix("description", "XD")

    query = queryparser.parse_query(querystring)

    enquire = xapian.Enquire(db)
    enquire.set_query(query)

    matches = []
    ret = ""
    for match in enquire.get_mset(offset, pagesize):
        fields = json.loads(match.document.get_data())
        tmp = u"%(rank)i: #%(docid)3.3i %(title)s" % {
            'rank': match.rank + 1,
            'docid': match.docid,
            'title': fields.get('TITLE', u''),
        }
        ret += tmp
        ret += '\n'
        matches.append(match.docid)
    support.log_matches(querystring, offset, pagesize, matches)
    return ret


### END of function
コード例 #26
0
ファイル: testxapian.py プロジェクト: pombredanne/pytst
 def keyPressed(self, event):
     self.list.delete(0, END)
     start = time()
     if self.entry.get():
         query_parser = xapian.QueryParser()
         enq = xapian.Enquire(ti)
         query = query_parser.parse_query(
             self.entry.get(), query_parser.FLAG_WILDCARD)
         print query.get_description()
         enq.set_query(query)
         elapsed = time() - start
         result = enq.get_mset(0, 100)
         count = 0
         for doc in result:
             count += 1
             ln = doc[4].get_data()
             r = doc[1]
             i = ln.rindex(':')
             d = ln[:i].strip()
             l = int(ln[i + 1:])
             self.list.insert(
                 END, '%.2f:%s:%i:%s' %
                 (r, d, l, linecache.getline(d, l + 1).strip()))
         self.label.config(text='%i lines in %.2fs' %
                           (count, elapsed))
コード例 #27
0
    def find(self, wordlist):
        '''look up all the words in the wordlist.
        If none are found return an empty dictionary
        * more rules here
        '''
        if not wordlist:
            return {}

        database = self._get_database()

        enquire = xapian.Enquire(database)
        stemmer = xapian.Stem("english")
        terms = []
        for term in [
                word.upper() for word in wordlist
                if self.minlength <= len(word) <= self.maxlength
        ]:
            if not self.is_stopword(term):
                terms.append(stemmer(s2b(term.lower())))
        query = xapian.Query(xapian.Query.OP_AND, terms)

        enquire.set_query(query)
        matches = enquire.get_mset(0, database.get_doccount())

        return [tuple(b2s(m.document.get_data()).split(':')) for m in matches]
コード例 #28
0
    def handle_query(self, q):
        database = xapian.Database(self.db_path)
        enquire = xapian.Enquire(database)
        qp = xapian.QueryParser()
        stemmer = xapian.Stem("english")
        qp.set_stemmer(stemmer)
        qp.set_database(database)
        qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
        query = qp.parse_query(q)

        # Find the top 100 results for the query.
        enquire.set_query(query)
        matches = enquire.get_mset(0, 100)

        results = []

        for m in matches:
            data = m.document.get_data()
            if not isinstance(data, string_types):
                data = data.decode("utf-8")
            context = self.extract_context(data)
            results.append((m.document.get_value(self.DOC_PATH),
                            m.document.get_value(self.DOC_TITLE),
                            ''.join(context)))

        return results
コード例 #29
0
    def search(self, server_guid, store_guid, folder_ids, fields_terms, query,
               log):
        """ handle query; see links in the top for a description of the Xapian API """

        db = self.open_db(server_guid, store_guid, log=log)
        if not db:
            return [], ''
        qp = xapian.QueryParser()
        qp.add_prefix("sourcekey", "XK:")
        qp.add_prefix("folderid", "XF:")
        suggest = []
        for fields, terms in fields_terms:
            for field in fields:
                qp.add_prefix('mapi%d' % field, "XM%d:" % field)
            for term in terms:
                suggest.append(db.get_spelling_suggestion(term) or term)
        log.info('performing query: %s' % query)
        qp.set_database(db)
        query = qp.parse_query(
            query,
            xapian.QueryParser.FLAG_BOOLEAN | xapian.QueryParser.FLAG_PHRASE
            | xapian.QueryParser.FLAG_WILDCARD)
        enquire = xapian.Enquire(db)
        enquire.set_query(query)
        matches = []
        for match in enquire.get_mset(0, db.get_doccount(
        )):  # XXX catch exception if database is being updated?
            matches.append(match.document.get_value(0))
        db.close()
        return matches, None  # XXX get_spelling_suggestion, decode utf-*? ' '.join(suggest)
コード例 #30
0
def parse_query(parser, search_strings, verbose=True):
    str_to_prefix = {'section': 'AE', 'type': 'AT', 'category': 'AC'}
    for st in search_strings:
        (search_prefix, search_term) = st.split(":")
        if search_prefix == "section":
            t = str_to_prefix[search_prefix]
            s = search_term.lower()
            query = xapian.Query(t + s)
            for pre in ["universe", "multiverse", "restricted"]:
                query = xapian.Query(xapian.Query.OP_OR, query,
                                     xapian.Query("%s%s/%s" % (t, pre, s)))
                query = xapian.Query(xapian.Query.OP_OR, query,
                                     xapian.Query("XS%s/%s" % (pre, s)))

        else:
            query = xapian.Query(str_to_prefix[search_prefix] +
                                 search_term.lower())
        enquire = xapian.Enquire(db)
        enquire.set_query(query)
        with ExecutionTime("Search took"):
            mset = enquire.get_mset(0, db.get_doccount())
            print "Found %i documents for search '%s'" % (len(mset), st)
            if verbose:
                for m in mset:
                    doc = m.document
                    appname = doc.get_data()
                    pkgname = doc.get_value(XAPIAN_VALUE_PKGNAME)
                    print "%s ; %s" % (appname, pkgname)
        print