Ejemplo n.º 1
0
    def testEquality(self):

        bq1 = BooleanQuery()
        bq1.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD)
        bq1.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD)

        nested1 = BooleanQuery()
        nested1.add(TermQuery(Term("field", "nestedvalue1")),
                    BooleanClause.Occur.SHOULD)
        nested1.add(TermQuery(Term("field", "nestedvalue2")),
                    BooleanClause.Occur.SHOULD)
        bq1.add(nested1, BooleanClause.Occur.SHOULD)

        bq2 = BooleanQuery()
        bq2.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD)
        bq2.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD)

        nested2 = BooleanQuery()
        nested2.add(TermQuery(Term("field", "nestedvalue1")),
                    BooleanClause.Occur.SHOULD)
        nested2.add(TermQuery(Term("field", "nestedvalue2")),
                    BooleanClause.Occur.SHOULD)
        bq2.add(nested2, BooleanClause.Occur.SHOULD)

        self.assert_(bq1.equals(bq2))
Ejemplo n.º 2
0
    def testBraces(self):
        self.assertConversion(TermQuery(Term('unqualified', 'cats')), '(cats)')
        innerQuery = BooleanQuery()
        innerQuery.add(TermQuery(Term('unqualified', 'cats')), BooleanClause.Occur.MUST)
        innerQuery.add(TermQuery(Term('unqualified', 'dogs')), BooleanClause.Occur.MUST)
        outerQuery = BooleanQuery()
        outerQuery.add(innerQuery, BooleanClause.Occur.SHOULD)
        outerQuery.add(TermQuery(Term('unqualified', 'mice')), BooleanClause.Occur.SHOULD)

        self.assertConversion(outerQuery, '(cats AND dogs) OR mice')
Ejemplo n.º 3
0
    def testParenthesisMust2(self):

        q3 = BooleanQuery()
        q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
        q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
        q4 = BooleanQuery()
        q4.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD))
        q4.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD))
        q2 = BooleanQuery()
        q2.add(q3, BooleanClause.Occur.SHOULD)
        q2.add(q4, BooleanClause.Occur.MUST)
        self.assertEqual(1, self.search(q2))
Ejemplo n.º 4
0
def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        command = unicode(command, 'UTF-8')
        if command == '':
            return

        print
        print "Searching for:", command
        querys = BooleanQuery()
        command_dict = parseCommand(command)
        for k, v in command_dict.iteritems():
            query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v)
            querys.add(query, BooleanClause.Occur.MUST)

        scoreDocs = searcher.search(querys, 50).scoreDocs
        print "%s total matching documents." % len(scoreDocs)

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            print '------------------------------------------------'
            print 'title:', doc.get('title')
            print 'url:', doc.get('url')
            print 'src:', doc.get('src')
Ejemplo n.º 5
0
def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        if command == '':
            print "=== [ QUIT ] ==="
            return

        print
        print "Searching for:", command

        command_dict = parseCommand(command)
        querys = BooleanQuery()
        for k, v in command_dict.iteritems():
            if 'contents' == k:
                v = " ".join(jieba.cut(v))
            if DEBUG_MODE:
                print k, v
            query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v)
            querys.add(query, BooleanClause.Occur.MUST)
        scoreDocs = searcher.search(querys, MAX_ITEMS_PER_PAGE).scoreDocs
        print "%s total matching documents." % len(scoreDocs)

        for idx, scoreDoc in enumerate(scoreDocs):
            doc = searcher.doc(scoreDoc.doc)
            # # explanation = searcher.explain(query, scoreDoc.doc)
            print "-- #", str(idx + 1), "--"
            print '\ttitle:\t', doc.get("title")
            print '\turl:\t', doc.get("url")
            print '\tpath:\t', doc.get("path")
            print '\tname:\t', doc.get("name")
            print
Ejemplo n.º 6
0
def run_pic(valueFromOut, searcher, analyzer):
    command = valueFromOut

    seg_list = jieba.cut(command)
    command = " ".join(seg_list)
    if command == '':
        return

    result = []
    command_dict = parseCommand(command)
    querys = BooleanQuery()
    for k, v in command_dict.iteritems():
        query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v)
        querys.add(query, BooleanClause.Occur.MUST)
    scoreDocs = searcher.search(querys, 10).scoreDocs
    print "%s total matching documents." % len(scoreDocs)

    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        partResult = {}

        partResult['title'] = doc.get('title')
        partResult['url'] = doc.get('url')
        partResult['imgurl'] = doc.get('imgurl')

        result.append(partResult)

    return result
Ejemplo n.º 7
0
def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        command = unicode(command, 'UTF-8')
        if command == '':
            return

        print
        print "Searching for:", command #朱莉与茱莉娅

        # final = jieba.cut(command)
        # query = QueryParser(Version.LUCENE_CURRENT, "contents",
        #                     analyzer).parse(' '.join(final))
        
        querys = BooleanQuery()
        command_dict = parseCommand(command)
        for k,v in command_dict.iteritems():            
            if(k=='site'):
                t = Term('url','*'+v.strip()+'*')
                query = WildcardQuery(t)
            else:
                query = QueryParser(Version.LUCENE_CURRENT, k,analyzer).parse(v)
            querys.add(query, BooleanClause.Occur.MUST)
        
        scoreDocs = searcher.search(querys, 50).scoreDocs
        print "%s total matching documents." % len(scoreDocs)

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            print '------------------------------------------'
            #print 'path:', doc.get("path"), 'name:', doc.get("name"),'site:', doc.get('site')
            print 'title:',doc.get('title'),
            print 'url:',doc.get('url')
Ejemplo n.º 8
0
    def search(**kwargs):
        vm_env.attachCurrentThread()
        query = BooleanQuery() 

        print("Searched keywords:")
        for field_name, keywords in kwargs.items():
            # assert field_name in SearchConfig.searchable_fields

            # keywords = list(filter(None, jieba.cut(keywords, cut_all=True)))
            keywords = list(filter(None, (k.strip() for k in jieba.cut_for_search(keywords))))
            
            for kw in keywords:
                print(kw)

            # construct query
            for kw in keywords:
                q = QueryParser(Version.LUCENE_CURRENT, field_name, analyzer).parse(kw)
                query.add(q, BooleanClause.Occur.SHOULD)

            if field_name == 'keywords':
                for kw in keywords:
                    q = QueryParser(Version.LUCENE_CURRENT, 'ent_name', analyzer).parse(kw)
                    query.add(q, BooleanClause.Occur.SHOULD)

        # search
        scoreDocs = searcher.search(query, 50).scoreDocs

        return [retrieve(searcher.doc(scoreDoc.doc)) for scoreDoc in scoreDocs]
Ejemplo n.º 9
0
def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        command = unicode(command)

        if command == '':
            return

        command_dict = parseCommand(command)

        seg_list = jieba.cut(command_dict['contents'])
        command_dict['contents'] = (" ".join(seg_list))
        querys = BooleanQuery()
        for k, v in command_dict.iteritems():
            query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v)
            querys.add(query, BooleanClause.Occur.MUST)

        print
        print "Searching for:", command

        scoreDocs = searcher.search(querys, 50).scoreDocs
        print "%s total matching documents." % len(scoreDocs)

        for i, scoreDoc in enumerate(scoreDocs):
            doc = searcher.doc(scoreDoc.doc)
            print 'path:', doc.get("path"), \
                '\nname:', doc.get("name"), \
                '\ntitle:', doc.get("title"), \
                "url:",doc.get("url"), \
                "\nsite:",doc.get("site"), "\n"
Ejemplo n.º 10
0
def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        if command == '':
            return
        print
        print "Searching for:", command

        command_dict = parseCommand(command)
        querys = BooleanQuery()
        for k, v in command_dict.iteritems():
            query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v)
            querys.add(query, BooleanClause.Occur.MUST)

        scoreDocs = searcher.search(querys, 50).scoreDocs

        finalDocTitles = []
        for i, scoreDoc in enumerate(scoreDocs):
            doc = searcher.doc(scoreDoc.doc)
            if (doc.get("title") not in finalDocTitles):
                print 'title:', doc.get("title"), 'url:', doc.get(
                    "url"), 'score:', scoreDoc.score, 'contents:', doc.get(
                        'contents')
                finalDocTitles.append(doc.get("title"))
            # print 'explain:', searcher.explain(query, scoreDoc.doc)
        print "%s total matching documents." % len(finalDocTitles)
Ejemplo n.º 11
0
def run_img(command):
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    STORE_DIR = "index2"
    directory = SimpleFSDirectory(File(STORE_DIR))
    searcher = IndexSearcher(DirectoryReader.open(directory))
    analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT)
    querys = BooleanQuery()
    query_content = QueryParser(Version.LUCENE_CURRENT, "urlcontent",
                                analyzer).parse(command)
    query_title = QueryParser(Version.LUCENE_CURRENT, "title",
                              analyzer).parse(command)
    querys.add(query_content, BooleanClause.Occur.SHOULD)
    querys.add(query_title, BooleanClause.Occur.SHOULD)
    scoreDocs = searcher.search(querys, 50).scoreDocs
    if len(scoreDocs) == 0:
        print "WARNING: No result"
    result = []
    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        print doc.get("title")
        data = {}
        data['title'] = doc.get('title')
        data['url'] = doc.get('url')
        data['imgurl'] = doc.get('imgurl')
        result.append(data)
    return result
def text_search(command):
    envir.vm_env.attachCurrentThread()
    command_dict = parseCommand(command, "contents")
    querys = BooleanQuery()
    for k, v in command_dict.iteritems():
        query = QueryParser(Version.LUCENE_CURRENT, k,
                            envir.analyzer).parse(v)
        querys.add(query, BooleanClause.Occur.MUST)

    scoreDocs = envir.text_searcher.search(querys, 30).scoreDocs
    res = []

    query_highlight = QueryParser(Version.LUCENE_CURRENT, k,
                                  envir.analyzer).parse(command_dict["contents"])
    myhighlighter = Highlighter(
        SimpleHTMLFormatter(), QueryScorer(query_highlight))
    myhighlighter.setTextFragmenter(SimpleFragmenter(50))
    for scoreDoc in scoreDocs:
        # find texts which are around the keyword
        doc = envir.text_searcher.doc(scoreDoc.doc)
        text = doc.get("contents")
        key_text = "".join((myhighlighter.getBestFragments(
            envir.analyzer, "contents", text, 3)))
        key_text = re.sub('\s', '', key_text)
        temp = [doc.get("title"), doc.get('url'), key_text]
        res.append(temp)
    return res
Ejemplo n.º 13
0
 def createDrilldownQuery(self, luceneQuery, drilldownQueries):
     q = BooleanQuery(True)
     if luceneQuery:
         q.add(luceneQuery, BooleanClause.Occur.MUST)
     for field, path in drilldownQueries:
         q.add(TermQuery(self._fieldRegistry.makeDrilldownTerm(field, path)), BooleanClause.Occur.MUST);
     return q
Ejemplo n.º 14
0
def lucene_sample_query_parse(sampleq, ftypes):
    fields = []
    queries = []
    booleans = []
    bq = BooleanQuery()
    for query_tuple in sampleq:
        (field, op_, value) = re.split(snapconf.RANGE_QUERY_OPS, query_tuple)
        m=snapconf.RANGE_QUERY_FIELD_PATTERN.search(query_tuple)
        if m is None or field is None:
            continue
        op=m.group(1)
        if op not in snapconf.operators:
            sys.stderr.write("bad operator %s in range query,exiting\n" % (str(op)))
            sys.exit(-1)
        field_w_type = snapconf.SAMPLE_HEADER_FIELDS_TYPE_MAP[field]
        (fieldtypechar, ftype_method) = ftypes[field_w_type]
        #range query
        if fieldtypechar == 'i' or fieldtypechar == 'f':
            bq.add(lucene_range_query_parse(field_w_type, op, value, fieldtypechar, ftype_method), BOOLEAN_OCCUR)
        #phrase query
        elif ' ' in value or '\t' in value:
            pquery = PhraseQuery()
            [pquery.add(Term(field_w_type, v.lower())) for v in re.split(r'\s+',value)]
            #force exact phrase matching only
            pquery.setSlop(0)
            bq.add(pquery, BOOLEAN_OCCUR)
        #term query
        else:
            bq.add(TermQuery(Term(field_w_type, value.lower())), BOOLEAN_OCCUR)
        sys.stderr.write("value + fields: %s %s\n" % (value.lower(), field_w_type))
    return bq
Ejemplo n.º 15
0
def search_kw(kw, mode):
    vm_env.attachCurrentThread()
    lists = []
    l = jieba.cut(kw)
    query = BooleanQuery()
    for i in l:
        ii = QueryParser(Version.LUCENE_CURRENT, "introduction",
                         analyzer).parse(i)
        query.add(ii, BooleanClause.Occur.MUST)
    if mode:
        sf = SortField("score", SortField.Type.STRING, True)
        s = Sort(sf)
    else:
        sf = SortField("comments", SortField.Type.FLOAT, True)
        s = Sort(sf)
    scoreDocs = searcher1.search(query, 20, s).scoreDocs
    for scoreDoc in scoreDocs:
        movie = []
        doc = searcher1.doc(scoreDoc.doc)
        ####
        movie.append(doc.get("url"))
        movie.append(doc.get("picture"))
        movie.append(doc.get("title"))
        movie.append(doc.get("score"))
        movie.append(doc.get("genre"))
        movie.append(doc.get("stars"))
        movie.append(doc.get("comments"))
        #####
        lists.append(movie)

    return lists
Ejemplo n.º 16
0
def func1(genre, year):
    vm_env.attachCurrentThread()
    lists = []
    query = BooleanQuery()
    if genre != "111":
        item = QueryParser(Version.LUCENE_CURRENT, "genre",
                           analyzer).parse(genre)
        query.add(item, BooleanClause.Occur.MUST)
    if year != "111":
        item = QueryParser(Version.LUCENE_CURRENT, "year",
                           analyzer).parse(year)
        query.add(item, BooleanClause.Occur.MUST)
    sf = SortField("score", SortField.Type.STRING, True)
    s = Sort(sf)
    scoreDocs = searcher1.search(query, 20, s).scoreDocs
    for scoreDoc in scoreDocs:
        movie = []
        doc = searcher1.doc(scoreDoc.doc)
        movie.append(doc.get("url"))
        movie.append(doc.get("picture"))
        movie.append(doc.get("title"))
        movie.append(doc.get("score"))
        movie.append(doc.get("genre"))
        movie.append(doc.get("stars"))
        movie.append(doc.get("comments"))
        lists.append(movie)
    return lists
Ejemplo n.º 17
0
 def get_or_query(self, queries):
     """Creates an OR Boolean query from multiple Lucene queries."""
     # empty boolean query with Similarity.coord() disabled
     bq = BooleanQuery(False)
     for q in queries:
         bq.add(q, BooleanClause.Occur.SHOULD)
     return bq
Ejemplo n.º 18
0
def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."
        command = raw_input("Query:")
        command = unicode(command, 'utf-8')
        if command == '':
            return

        print "Searching for:", command
        
        command_dict = parseCommand(command)
        querys = BooleanQuery()
        for k, v in command_dict.iteritems():
            print k, v
            query = QueryParser(Version.LUCENE_CURRENT, k,
                                analyzer).parse(v)
            querys.add(query, BooleanClause.Occur.MUST)
        scoreDocs = searcher.search(querys, 10).scoreDocs
        print "%s total matching documents." % len(scoreDocs)

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
##            explanation = searcher.explain(query, scoreDoc.doc)
            print "------------------------"
            print 'path:', doc.get("path")
            print 'name:', doc.get("name")
            print 'title:', doc.get('title')
            print 'url:', doc.get("url")
Ejemplo n.º 19
0
def main():
    _vm = lucene.initVM(vmargs=['-Djava.awt.headless=true'])

    query = BooleanQuery()
    query.add(MatchAllDocsQuery(), BooleanClause.Occur.MUST)
    query.add(TermQuery(Term('type', 'user')), BooleanClause.Occur.MUST)
    i = 0
    with zh_iatd.create_searcher() as searcher:
        with open('pagerank_data.txt', 'w') as fout:
            reslst = searcher.searcher.search(query, 100)
            initval = 1.0 / reslst.totalHits
            while len(reslst.scoreDocs) > 0:
                for x in reslst.scoreDocs:
                    realdoc = searcher.searcher.doc(x.doc)
                    obj = document_to_obj(realdoc)
                    if not obj.data.followed_users is None:
                        print '{0:8}'.format(i), '  user', obj.index, len(
                            obj.data.followed_users)
                        fout.write('{0}\t{1}\t{2}\n'.format(
                            obj.index, initval, ' '.join(
                                (x.encode('utf8')
                                 for x in obj.data.followed_users))))
                    else:
                        print '{0:8}'.format(i), 'I user', obj.index
                    i += 1
                reslst = searcher.searcher.searchAfter(reslst.scoreDocs[-1],
                                                       query, 100)
Ejemplo n.º 20
0
    def ch_seach(self,
                 command_dict,
                 target_range=None,
                 targets=('title', 'author', 'text', 'likes', 'imgurl',
                          'label')):
        res = []

        querys = BooleanQuery()
        for key, value in command_dict.items():
            if key not in ['author', 'title', 'label', 'content']:
                continue
            query = QueryParser(Version.LUCENE_CURRENT, key,
                                self.Analyzer).parse(utils.jieba_seg(value[0]))
            if value[1]:
                querys.add(query, BooleanClause.Occur.MUST)
            else:
                querys.add(query, BooleanClause.Occur.SHOULD)
        totalDocs = self.chSearcher.search(querys, utils.MAX_RESULTS).scoreDocs

        total_match = len(totalDocs)
        if target_range is None:
            scoreDocs = totalDocs[:]
        else:
            scoreDocs = totalDocs[max(0, int(target_range[0])
                                      ):min(total_match, int(target_range[1]))]
        del totalDocs

        for i, scoreDoc in enumerate(scoreDocs):
            doc = self.chSearcher.doc(scoreDoc.doc)
            res.append({key: doc.get(key) for key in targets})

        return total_match, res
Ejemplo n.º 21
0
    def testWildcards(self):
        query = PrefixQuery(Term('unqualified', 'prefix'))
        self.assertConversion(query, 'prefix*')
        self.assertConversion(query, 'PREfix*')
        query = PrefixQuery(Term('field', 'prefix'))
        self.assertConversion(query, 'field="PREfix*"')
        self.assertConversion(query, 'field=prefix*')
        query = PrefixQuery(Term('field', 'oc-0123'))
        self.assertConversion(query, 'field="oc-0123*"')
        query = TermQuery(Term('field', 'p'))
        self.assertConversion(query, 'field="P*"')
        #only prefix queries for now
        query = TermQuery(Term('field', 'post'))
        self.assertConversion(query, 'field="*post"')

        query = TermQuery(Term('field', 'prefix'))
        self.assertConversion(query, 'field=prefix**')

        result = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings()).compose(parseCql("prefix*"))

        query = BooleanQuery()
        left = PrefixQuery(Term("field0", "prefix"))
        left.setBoost(0.2)
        query.add(left, BooleanClause.Occur.SHOULD)

        right = PrefixQuery(Term("field1", "prefix"))
        right.setBoost(2.0)
        query.add(right, BooleanClause.Occur.SHOULD)

        self.assertEquals(type(query), type(result))
        self.assertEquals(repr(query), repr(result))
Ejemplo n.º 22
0
    def testFlat(self):

        q = BooleanQuery()
        q.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
        q.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
        q.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD))
        q.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD))
        self.assertEqual(1, self.search(q))
Ejemplo n.º 23
0
 def visitSCOPED_CLAUSE(self, node):
     clause = CqlVisitor.visitSCOPED_CLAUSE(self, node)
     if len(clause) == 1:
         return clause[0]
     lhs, operator, rhs = clause
     query = BooleanQuery()
     query.add(lhs, LHS_OCCUR[operator])
     query.add(rhs, RHS_OCCUR[operator])
     return query
Ejemplo n.º 24
0
def search_dianping(province, kind, query):
    STORE_DIR = "index"
    vm_env.attachCurrentThread()
    #base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
    directory = SimpleFSDirectory(File(STORE_DIR))
    searcher = IndexSearcher(DirectoryReader.open(directory))
    analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT)

    allowed_opt = ['food', 'foodshop']

    if kind not in allowed_opt:
        return None
    if query == '':
        return None

    command = '%s:%s province:%s' % (kind, query, province)
    command = unicode(command, 'utf8', 'ignore')
    command_dict = parseCommand(command)
    querys = BooleanQuery()
    for k, v in command_dict.iteritems():
        query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v)
        querys.add(query, BooleanClause.Occur.MUST)
    scoreDocs = searcher.search(querys, 50).scoreDocs
    #比较评分
    max_rank = 0
    best_shop = ''
    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        cur_shop = doc.get("foodshop").split()[-1]
        cur_rank = float(doc.get('rank'))
        if cur_rank > max_rank:
            max_rank = cur_rank
            best_shop = cur_shop

    result = {}
    for scoreDoc in scoreDocs:
        doc = searcher.doc(scoreDoc.doc)
        cur_shop = doc.get("foodshop").split()[-1]
        cur_rank = float(doc.get('rank'))

        if cur_rank == max_rank:
            result['name'] = cur_shop.encode('utf8', 'ignore')
            result['rank'] = doc.get('rank').encode('utf8', 'ignore')
            result['food'] = doc.get('food').encode('utf8', 'ignore')
            result['location'] = doc.get('location').encode('utf8', 'ignore')
            result['tel'] = doc.get('tel').encode('utf8', 'ignore')
            result['environment_score'] = doc.get('environment_score').encode(
                'utf8', 'ignore')
            result['flavour_score'] = doc.get('flavour_score').encode(
                'utf8', 'ignore')
            result['service_score'] = doc.get('service_score').encode(
                'utf8', 'ignore')
            result['price_level'] = doc.get('price_level').encode(
                'utf8', 'ignore')

    del searcher
    return result
Ejemplo n.º 25
0
def firstsearch(searcher, analyzer, command):
    if len(command.split()) > 1:
        return []
    querys = BooleanQuery()
    query = QueryParser(Version.LUCENE_CURRENT, "name_not_cut",
                        analyzer).parse(command)
    querys.add(query, BooleanClause.Occur.MUST)
    scoreDocs = searcher.search(querys, 1000).scoreDocs
    return scoreDocs
Ejemplo n.º 26
0
def run(searcher, analyzer, command):

    commandsplit = command.split()
    maxlen = len(commandsplit[0])
    maxindex = 0
    for i in range(len(commandsplit)):
        if maxlen < len(commandsplit[i]):
            maxlen = len(commandsplit[i])
            maxindex = i
    commands = " ".join(jieba.cut(command.split()[maxindex])).split()
    querys = BooleanQuery()
    for i in commands:
        try:
            query = QueryParser(Version.LUCENE_CURRENT, "name",
                                analyzer).parse(i)
            querys.add(query, BooleanClause.Occur.MUST)
        except:
            continue
    scoreDocs = searcher.search(querys, 50).scoreDocs
    if len(scoreDocs) == 0:
        querys = BooleanQuery()
        for i in commands:
            for j in i:
                try:
                    query = QueryParser(Version.LUCENE_CURRENT, "not_seg",
                                        analyzer).parse(j)
                    querys.add(query, BooleanClause.Occur.MUST)
                except:
                    continue
        scoreDocs = searcher.search(querys, 50).scoreDocs
    temp = []
    if len(scoreDocs) > 0:
        doc = searcher.doc(scoreDocs[0].doc)
        temp = [
            doc.get("org"),
            doc.get("path"),
            doc.get("price"),
            doc.get("imgsrc")
        ]
    else:
        temp = ['unknown'] * 4
    return temp
Ejemplo n.º 27
0
def run(searcher, analyzer):
    while True:
        print
        print "Hit enter with no input to quit."

        command = raw_input("Query:")
        #command = unicode(command, 'GBK')
        command = unicode(command, 'utf8')
        if command == '':
            return

        print
        print 'searching for : ' + command
        command_dict = parseCommand(command)
        querys = BooleanQuery()
        for k, v in command_dict.iteritems():
            query = QueryParser(Version.LUCENE_CURRENT, k, analyzer).parse(v)
            querys.add(query, BooleanClause.Occur.MUST)
        scoreDocs = searcher.search(querys, 50).scoreDocs
        print "%s total matching documents." % len(scoreDocs)

        #比较评分
        max_rank = 0
        best_shop = ''
        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            cur_shop = doc.get("foodshop").split()[-1]
            cur_rank = float(doc.get('rank'))
            if cur_rank > max_rank:
                max_rank = cur_rank
                best_shop = cur_shop
        result = {}
        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            cur_shop = doc.get("foodshop").split()[-1]
            cur_rank = float(doc.get('rank'))

            if cur_rank == max_rank:
                result['name'] = cur_shop.encode('utf8', 'ignore')
                result['rank'] = doc.get('rank').encode('utf8', 'ignore')
                result['food'] = doc.get('food').encode('utf8', 'ignore')
                result['location'] = doc.get('location').encode(
                    'utf8', 'ignore')
                result['tel'] = doc.get('tel').encode('utf8', 'ignore')
                result['environment_score'] = doc.get(
                    'environment_score').encode('utf8', 'ignore')
                result['flavour_score'] = doc.get('flavour_score').encode(
                    'utf8', 'ignore')
                result['service_score'] = doc.get('service_score').encode(
                    'utf8', 'ignore')
                result['price_level'] = doc.get('price_level').encode(
                    'utf8', 'ignore')
        print result
Ejemplo n.º 28
0
 def search(self, value, stopwords=[], min_length=0):
     words = [
         x for x in nltk.word_tokenize(value)
         if x not in stopwords and len(x) > min_length
     ]
     query = BooleanQuery()
     query1 = PhraseQuery()
     query1.setSlop(2)
     query2 = PhraseQuery()
     query2.setSlop(2)
     query3 = PhraseQuery()
     query3.setSlop(2)
     for word in words:
         query1.add(Term("wiki_name_analyzed", word))
         query2.add(Term("wiki_name_analyzed_nopunct", word))
         query3.add(Term("wiki_name_analyzed_nopunct_nostop", word))
     query.add(query1, BooleanClause.Occur.SHOULD)
     query.add(query2, BooleanClause.Occur.SHOULD)
     query.add(query3, BooleanClause.Occur.SHOULD)
     scoreDocs = self.searcher.search(query,
                                      self.num_docs_to_return).scoreDocs
     if len(scoreDocs) > 0:
         #self.printDocs(scoreDocs)
         return scoreDocs
     query = BooleanQuery()
     for word in words:
         query_word = BooleanQuery()
         query_word.add(TermQuery(Term("wiki_name_analyzed", word)),
                        BooleanClause.Occur.SHOULD)
         query_word.add(TermQuery(Term("wiki_name_analyzed_nopunct", word)),
                        BooleanClause.Occur.SHOULD)
         query_word.add(
             TermQuery(Term("wiki_name_analyzed_nopunct_nostop", word)),
             BooleanClause.Occur.SHOULD)
         query.add(query_word, BooleanClause.Occur.MUST)
     scoreDocs = self.searcher.search(query,
                                      self.num_docs_to_return).scoreDocs
     return scoreDocs
Ejemplo n.º 29
0
    def testUnqualifiedTermFields(self):
        composer = LuceneQueryComposer(unqualifiedTermFields=[("field0", 0.2), ("field1", 2.0)], luceneSettings=LuceneSettings())
        ast = parseCql("value")
        result = composer.compose(ast)
        query = BooleanQuery()
        left = TermQuery(Term("field0", "value"))
        left.setBoost(0.2)
        query.add(left, BooleanClause.Occur.SHOULD)

        right = TermQuery(Term("field1", "value"))
        right.setBoost(2.0)
        query.add(right, BooleanClause.Occur.SHOULD)

        self.assertEquals(type(query), type(result))
        self.assertEquals(repr(query), repr(result))
Ejemplo n.º 30
0
def func2(name):
    vm_env.attachCurrentThread()
    lists = []
    query = BooleanQuery()

    item = QueryParser(Version.LUCENE_CURRENT, "name", analyzer).parse(name)
    query.add(item, BooleanClause.Occur.MUST)
    scoreDocs = searcher2.search(query, 20).scoreDocs
    for scoreDoc in scoreDocs:
        list = []
        doc = searcher2.doc(scoreDoc.doc)
        list.append(doc.get("picture"))
        list.append(doc.get("url"))
        list.append(doc.get("name"))
        lists.append(list)
    return lists