Python NGRAMWORDS Beispiele, whoosh.fields.NGRAMWORDS Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: import.py Projekt: eoglethorpe/sidr

    def indexloc(self):
        from sidr.orm import db
        import json
        from whoosh.index import create_in, open_dir, exists_in
        from whoosh import fields, qparser, query
        schema = fields.Schema(gid=fields.TEXT(stored=True),
                               country_code=fields.ID(stored=True),
                               names=fields.NGRAMWORDS(stored=True,
                                                       minsize=3,
                                                       maxsize=15))
        if not exists_in("indexer", indexname="adms"):
            ix = create_in("indexer", schema, indexname="adms")
        ix = open_dir("indexer", indexname="adms")
        writer = ix.writer()
        """
        with ix.searcher() as s:
            qp = qparser.QueryParser("names", schema=ix.schema)
            q = qp.parse(u"Westonia")
            # results = s.search(q, limit=20, filter=query.Term("country_code", "AU"))
            results = s.documents()
            # results = searcher.search('hey', terms=True)
            # qp = qparser.QueryParser("content", ix.schema)
            # results = searcher.search(user_q)
            for res in results:
                print(repr(res))
        """

        rows = db.engine.execute('SELECT * FROM geoname')
        for row in rows:
            writer.add_document(
                gid=str(row['id']),
                country_code=row['country_code'],
                names="%s , %s , %s" %
                (row['name'], row['asciiname'], row['name_alternate']))
        writer.commit()

Beispiel #2

0

Datei anzeigen

Datei: test_parsing.py Projekt: altamir-bricks/whooshing-opendata

def test_ngramwords():
    schema = fields.Schema(grams=fields.NGRAMWORDS(queryor=True))
    parser = default.QueryParser('grams', schema)

    q = parser.parse(u("Hello Tom"))
    assert q.__class__ == query.And
    assert q[0].__class__ == query.Or
    assert q[1].__class__ == query.Term
    assert q[0][0].text == "hell"
    assert q[0][1].text == "ello"
    assert q[1].text == "tom"

Beispiel #3

0

Datei anzeigen

Datei: test_parsing.py Projekt: datakortet/whoosh

def test_ngramwords():
    schema = fields.Schema(grams=fields.NGRAMWORDS(queryor=True))
    parser = default.QueryParser('grams', schema)

    q = parser.parse(u("Hello Tom"))
    assert_equal(q.__class__, query.And)
    assert_equal(q[0].__class__, query.Or)
    assert_equal(q[1].__class__, query.Term)
    assert_equal(q[0][0].text, "hell")
    assert_equal(q[0][1].text, "ello")
    assert_equal(q[1].text, "tom")

Beispiel #4

0

Datei anzeigen

def test_highlight_ngrams():
    schema = fields.Schema(text=fields.NGRAMWORDS(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("Multiplication and subtraction are good"))

    with ix.searcher() as s:
        qp = qparser.QueryParser("text", ix.schema)
        q = qp.parse(u("multiplication"))
        r = s.search(q)
        assert r.scored_length() == 1

        r.fragmenter = highlight.SentenceFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        snippet = r[0].highlights("text")
        assert snippet == "MULTIPLICATIon and subtracTION are good"

Beispiel #5

0

Datei anzeigen

def test_nested_skip():
    schema = fields.Schema(
        id=fields.ID(unique=True, stored=True),
        name=fields.TEXT(stored=True),
        name_ngrams=fields.NGRAMWORDS(minsize=4, field_boost=1.2),
        type=fields.TEXT,
    )

    domain = [(u"book_1", u"The Dark Knight Returns", u"book"),
              (u"chapter_1", u"The Dark Knight Returns", u"chapter"),
              (u"chapter_2", u"The Dark Knight Triumphant", u"chapter"),
              (u"chapter_3", u"Hunt the Dark Knight", u"chapter"),
              (u"chapter_4", u"The Dark Knight Falls", u"chapter")]

    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for id, name, typ in domain:
                w.add_document(id=id, name=name, name_ngrams=name, type=typ)

        with ix.searcher() as s:
            all_parents = query.Term("type", "book")
            wanted_parents = query.Term("name", "dark")
            children_of_wanted_parents = query.NestedChildren(
                all_parents, wanted_parents)

            r1 = s.search(children_of_wanted_parents)
            assert r1.scored_length() == 4
            assert [hit["id"] for hit in r1
                    ] == ["chapter_1", "chapter_2", "chapter_3", "chapter_4"]

            wanted_children = query.And(
                [query.Term("type", "chapter"),
                 query.Term("name", "hunt")])

            r2 = s.search(wanted_children)
            assert r2.scored_length() == 1
            assert [hit["id"] for hit in r2] == ["chapter_3"]

            complex_query = query.And(
                [children_of_wanted_parents, wanted_children])

            r3 = s.search(complex_query)
            assert r3.scored_length() == 1
            assert [hit["id"] for hit in r3] == ["chapter_3"]

Beispiel #6

0

Datei anzeigen

class CatalogueResourceSchema(fields.SchemaClass):

    pk = fields.ID(stored=True, unique=True)
    vendor_name = fields.ID
    name = fields.TEXT(stored=True)
    vendor = fields.TEXT(stored=True, spelling=True)
    version = fields.TEXT(stored=True)
    template_uri = fields.STORED
    type = fields.TEXT(stored=True)
    creation_date = fields.DATETIME
    title = fields.TEXT(stored=True, spelling=True)
    image = fields.STORED
    smartphoneimage = fields.STORED
    description = fields.TEXT(stored=True, spelling=True)
    wiring = fields.TEXT(spelling=True)
    public = fields.BOOLEAN
    users = fields.KEYWORD(commas=True)
    groups = fields.KEYWORD(commas=True)
    content = fields.NGRAMWORDS()

Beispiel #7

0

Datei anzeigen

Datei: schema.py Projekt: rdzanyM/science_points

from whoosh import fields

schema = fields.Schema(
    # Identifier of this entry in the respective DB table
    id=fields.ID(stored=True),
    # Name (or names) of this journal/conference/publisher
    name=fields.NGRAMWORDS(queryor=True, stored=True),
    # Names of science domains for this entry
    domains=fields.KEYWORD(commas=True, stored=True),
)

Beispiel #8

0

Datei anzeigen

Datei: test_analysis.py Projekt: CuteCha/dssm-theano

def test_ngramwords_tokenizer():
    tk = analysis.CommaSeparatedTokenizer()
    tags = fields.NGRAMWORDS(minsize=3, maxsize=50, tokenizer=tk, stored=True,
                             queryor=True)
    schema = fields.Schema(tags=tags)