Esempio n. 1
0
def test_add_sortable():
    st = RamStorage()
    schema = fields.Schema(chapter=fields.ID(stored=True), price=fields.NUMERIC)
    ix = st.create_index(schema)
    with ix.writer() as w:
        w.add_document(chapter=u("alfa"), price=100)
        w.add_document(chapter=u("bravo"), price=200)
        w.add_document(chapter=u("charlie"), price=300)
        w.add_document(chapter=u("delta"), price=400)
    with ix.writer() as w:
        w.add_document(chapter=u("bravo"), price=500)
        w.add_document(chapter=u("alfa"), price=600)
        w.add_document(chapter=u("delta"), price=100)
        w.add_document(chapter=u("charlie"), price=200)
        w.merge = False

    with ix.reader() as r:
        assert not r.has_column("chapter")
        assert not r.has_column("price")

    with ix.writer() as w:
        sorting.add_sortable(w, "chapter", sorting.StoredFieldFacet("chapter"))
        sorting.add_sortable(w, "price", sorting.FieldFacet("price"))
        w.schema.test = 100

    with ix.reader() as r:
        assert r.has_column("chapter")
        assert r.has_column("price")

        chapr = r.column_reader("chapter")
        pricer = r.column_reader("price")
        assert chapr[0] == "alfa"
        assert pricer[0] == 100
Esempio n. 2
0
def test_groupby_phrase():
    domain = {
        "Alan Ball": "Tel Aviv",
        "Alan Charles": "San Francisco",
        "Alan Darwin": "London",
        "Alan Eames": "Paris"
    }

    schema = fields.Schema(name=fields.TEXT(stored=True),
                           city=fields.TEXT(stored=True),
                           city_g=fields.ID(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for name, city in domain.items():
            w.add_document(name=u(name), city=u(city), city_g=u(city))

    with ix.searcher() as s:
        q = query.Term("name", "alan")
        r = s.search(q, groupedby="city_g")
        keys = sorted([str(x) for x in r.groups().keys()])
        assert_equal(keys, ["London", "Paris", "San Francisco", "Tel Aviv"])

        sff = sorting.StoredFieldFacet("city")
        r = s.search(q, groupedby=sff)
        keys = sorted([str(x) for x in r.groups().keys()])
        assert_equal(keys, ["London", "Paris", "San Francisco", "Tel Aviv"])
Esempio n. 3
0
    def search(self, query, limit=10, ranking=CUSTOM):
        """Returns a list of sorted Document based on query"""
        logger.info(
            "Received search request: Query: %s | Limit: %d | Ranking: %s",
            query, limit, ranking)

        try:
            scoring_method = self.scorers_dict[ranking]
        except KeyError:
            logger.error("Invalid ranking: %s", ranking)
            raise ValueError("Ranking must be one of these: %s",
                             ', '.join(self.rankings))

        docs = []

        with self.ix.searcher(weighting=scoring_method) as s:
            if ranking == SearchEngine.CUSTOM:
                q = self.qp_custom.parse(query)
                results = s.search(q, limit=max(limit, 100))

                if not results.is_empty():
                    # max_score = max([r.score for r in results])
                    max_pagerank = math.log10(
                        max([r.fields()["pagerank"] for r in results]) + 1)
                    result_list = []
                    for result in results:
                        fields = result.fields()
                        # result.score = result.score/max_score
                        pagerank_normalised = math.log10(fields["pagerank"] +
                                                         1) / max_pagerank
                        result.score = 0.6 * result.score + 0.4 * pagerank_normalised
                        result_list.append(result)
                    result_list.sort(key=lambda x: x.score, reverse=True)
                    for i, result in enumerate(result_list[:limit]):
                        doc = Document(**result.fields())
                        docs.append(doc)
                        # print(str(i) + ". ", doc.url, result.score, result.rank, result.combined, doc.pagerank)
                    return docs
            else:
                facet = None
                reverse = False
                q = self.qp.parse(query)

                if ranking == SearchEngine.PAGERANK:
                    reverse = True
                    facet = sorting.StoredFieldFacet('pagerank')

                results = s.search(q,
                                   limit=limit,
                                   sortedby=facet,
                                   reverse=reverse)

                logger.info("\tMatched docs: %d", len(results))
                logger.info("\tScored docs: %d", results.scored_length())

                for result in results:
                    docs.append(Document(**result.fields()))
        return docs
Esempio n. 4
0
def test_nested_children():
    schema = fields.Schema(t=fields.ID(stored=True),
                           track=fields.NUMERIC(stored=True),
                           album_name=fields.TEXT(stored=True),
                           song_name=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        with w.group():
            w.add_document(t=u("album"), album_name=u("alfa bravo charlie"))
            w.add_document(t=u("track"),
                           track=1,
                           song_name=u("delta echo foxtrot"))
            w.add_document(t=u("track"),
                           track=2,
                           song_name=u("golf hotel india"))
            w.add_document(t=u("track"),
                           track=3,
                           song_name=u("juliet kilo lima"))
        with w.group():
            w.add_document(t=u("album"), album_name=u("mike november oskar"))
            w.add_document(t=u("track"),
                           track=1,
                           song_name=u("papa quebec romeo"))
            w.add_document(t=u("track"),
                           track=2,
                           song_name=u("sierra tango ultra"))
            w.add_document(t=u("track"),
                           track=3,
                           song_name=u("victor whiskey xray"))
        with w.group():
            w.add_document(t=u("album"), album_name=u("yankee zulu one"))
            w.add_document(t=u("track"),
                           track=1,
                           song_name=u("two three four"))
            w.add_document(t=u("track"),
                           track=2,
                           song_name=u("five six seven"))
            w.add_document(t=u("track"),
                           track=3,
                           song_name=u("eight nine ten"))

    with ix.searcher() as s:
        pq = query.Term("t", "album")
        aq = query.Term("album_name", "november")

        r = s.search(query.NestedChildren(pq, pq), limit=None)
        assert len(r) == 9
        assert [str(hit["t"]) for hit in r] == ["track"] * 9

        ncq = query.NestedChildren(pq, aq)
        assert list(ncq.docs(s)) == [5, 6, 7]
        r = s.search(ncq, limit=None)
        assert len(r) == 3
        assert [str(hit["song_name"]) for hit in r] == [
            "papa quebec romeo", "sierra tango ultra", "victor whiskey xray"
        ]

        zq = query.NestedChildren(pq, query.Term("album_name", "zulu"))
        f = sorting.StoredFieldFacet("song_name")
        r = s.search(zq, sortedby=f)
        assert [hit["track"] for hit in r] == [3, 2, 1]
Esempio n. 5
0
if reindex or not index.exists_in(dirname):
    tags = []
    for _ in xrange(tagcount):
        tag = u"".join(
            random.choice(string.ascii_lowercase) for _ in xrange(5))
        tags.append(tag)

    ix = index.create_in(dirname, schema)
    t = now()
    with ix.writer() as w:
        for i in xrange(doccount):
            doc = u" ".join(random.sample(tags, random.randint(10, 20)))
            w.add_document(tags=doc)
            if not i % 10000:
                print i
    print now() - t

ix = index.open_dir(dirname)
with ix.searcher() as s:
    tags = list(s.lexicon("tags"))
    facet = sorting.FieldFacet("tags", allow_overlap=True)
    qtag = random.choice(tags)
    print "tag=", qtag
    q = query.Term("tags", qtag)
    r = s.search(q, groupedby={"tags": facet})
    print r.runtime

    facet = sorting.StoredFieldFacet("tags", allow_overlap=True)
    r = s.search(q, groupedby={"tags": facet})
    print r.runtime