コード例 #1
0
    def test_lengths(self):
        s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                          f2=fields.KEYWORD(stored=True, scorable=True))
        ix = self.make_index("testindex", s)

        try:
            w = ix.writer()
            tokens = u"ABCDEFG"
            from itertools import cycle, islice
            lengths = [10, 20, 2, 102, 45, 3, 420, 2]
            for length in lengths:
                w.add_document(f2=u" ".join(islice(cycle(tokens), length)))
            w.commit()
            dr = ix.doc_reader()
            ls1 = [
                dr.doc_field_length(i, "f1") for i in xrange(0, len(lengths))
            ]
            ls2 = [
                dr.doc_field_length(i, "f2") for i in xrange(0, len(lengths))
            ]
            self.assertEqual(ls1, [0] * len(lengths))
            self.assertEqual(ls2, lengths)
            dr.close()

            ix.close()
        finally:
            self.destroy_index("testindex")
コード例 #2
0
def make_whoosh_schema():
    """
    Creates and returns the whoosh schema being used.
    Note: typically you will want to retrieve the schema from
          the index itself (ix.schema).
          This function exists to create a schema object during
          the creation of the index.
    """
    from whoosh import fields
    schema = fields.Schema(name=fields.TEXT,
                           rules_text=fields.TEXT,
                           flavor_text=fields.TEXT,
                           sets=fields.KEYWORD(stored=True),
                           types=fields.KEYWORD(stored=True),
                           subtypes=fields.KEYWORD(stored=True),
                           power=fields.NUMERIC,
                           toughness=fields.NUMERIC,
                           cmc=fields.NUMERIC,
                           mana_cost=fields.KEYWORD,
                           white=fields.NUMERIC,
                           blue=fields.NUMERIC,
                           black=fields.NUMERIC,
                           red=fields.NUMERIC,
                           green=fields.NUMERIC,
                           legal_formats=fields.KEYWORD(stored=True),
                           data_obj=fields.STORED)
    return schema
コード例 #3
0
    def test_merged_lengths(self):
        s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                          f2=fields.KEYWORD(stored=True, scorable=True))
        st = store.RamStorage()
        ix = index.Index(st, s, create=True)
        w = writing.IndexWriter(ix)
        w.add_document(f1=u"A B C", f2=u"X")
        w.add_document(f1=u"B C D E", f2=u"Y Z")
        w.commit()

        w = writing.IndexWriter(ix)
        w.add_document(f1=u"A", f2=u"B C D E X Y")
        w.add_document(f1=u"B C", f2=u"X")
        w.commit(writing.NO_MERGE)

        w = writing.IndexWriter(ix)
        w.add_document(f1=u"A B X Y Z", f2=u"B C")
        w.add_document(f1=u"Y X", f2=u"A B")
        w.commit(writing.NO_MERGE)

        dr = ix.doc_reader()
        self.assertEqual(dr[0]["f1"], u"A B C")
        self.assertEqual(dr.doc_field_length(0, "f1"), 3)
        self.assertEqual(dr.doc_field_length(2, "f2"), 6)
        self.assertEqual(dr.doc_field_length(4, "f1"), 5)
コード例 #4
0
ファイル: test_indexing.py プロジェクト: rwinterw/Whoosh
    def test_merged_lengths(self):
        s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                          f2=fields.KEYWORD(stored=True, scorable=True))
        st = RamStorage()
        ix = st.create_index(s)
        w = ix.writer()
        w.add_document(f1=u"A B C", f2=u"X")
        w.add_document(f1=u"B C D E", f2=u"Y Z")
        w.commit()

        w = ix.writer()
        w.add_document(f1=u"A", f2=u"B C D E X Y")
        w.add_document(f1=u"B C", f2=u"X")
        w.commit(NO_MERGE)

        w = ix.writer()
        w.add_document(f1=u"A B X Y Z", f2=u"B C")
        w.add_document(f1=u"Y X", f2=u"A B")
        w.commit(NO_MERGE)

        dr = ix.reader()
        self.assertEqual(dr.stored_fields(0)["f1"], u"A B C")
        self.assertEqual(dr.doc_field_length(0, "f1"), 3)
        self.assertEqual(dr.doc_field_length(2, "f2"), 6)
        self.assertEqual(dr.doc_field_length(4, "f1"), 5)
        dr.close()
コード例 #5
0
ファイル: search.py プロジェクト: mahrozapradana/bestja
class OffersSchema(fields.SchemaClass):
    pk = fields.ID(unique=True, stored=True)
    slug = fields.ID(stored=True)
    name = fields.TEXT(stored=True)
    wishes = fields.KEYWORD(commas=True)
    target_group = fields.KEYWORD(commas=True)
    organization = fields.TEXT(stored=True, sortable=True)
コード例 #6
0
    def create_index(self):
        if not os.path.exists("twitter_index"):
            os.mkdir("twitter_index")


        schema = fields.Schema(tweet_id=fields.TEXT(stored=True),
                                batch=fields.NUMERIC(stored=True),
                                content=fields.TEXT(stored=True),
                                posted=fields.DATETIME(stored=True),
                                owner_sn=fields.TEXT(stored=True),
                                owner_id=fields.TEXT(stored=True),
                                owner_name=fields.TEXT(stored=True),
                                isRT=fields.BOOLEAN(stored=True),
                                timesRT=fields.NUMERIC(stored=True),
                                timesFav= fields.NUMERIC(stored=True),
                                orig_timesRT=fields.NUMERIC(stored=True),
                                orig_timesFav= fields.NUMERIC(stored=True),
                                hashtags=fields.KEYWORD(stored=True),
                                orgnlTweet = fields.TEXT(stored=True),
                                mentions=fields.KEYWORD(stored=True),
                                media = fields.TEXT(stored=True),
                                url = fields.TEXT(stored=True),
                                liwc=fields.TEXT(stored=True))


        self.INDEX = index.create_in("twitter_index", schema, indexname="TWTTR")
        print("New searching index succesfully created")

        return self.INDEX
コード例 #7
0
class WorkspaceSchema(fields.SchemaClass):

    id = fields.ID(stored=True, unique=True)
    owner = fields.TEXT(stored=True, spelling=True)
    name = fields.TEXT(stored=True, spelling=True)
    description = fields.NGRAM(stored=True, minsize=1, phrase=True)
    lastmodified = fields.DATETIME(stored=True)
    longdescription = fields.NGRAM(stored=True, minsize=1, phrase=True)
    public = fields.BOOLEAN(stored=True)
    users = fields.KEYWORD(commas=True)
    groups = fields.KEYWORD(commas=True)
    shared = fields.BOOLEAN(stored=True)
コード例 #8
0
ファイル: whoosh_index.py プロジェクト: t-8ch/devpi
 def project_schema(self):
     return fields.Schema(
         path=fields.ID(stored=True, unique=True),
         name=fields.ID(stored=True),
         user=fields.ID(stored=True),
         index=fields.ID(stored=True),
         classifiers=fields.KEYWORD(commas=True, scorable=True),
         keywords=fields.KEYWORD(stored=True, commas=False, scorable=True),
         version=fields.STORED(),
         doc_version=fields.STORED(),
         type=fields.ID(stored=True),
         text_path=fields.STORED(),
         text_title=fields.STORED(),
         text=fields.TEXT(analyzer=NgramWordAnalyzer(), stored=False, phrase=False))
コード例 #9
0
def test_or_nots2():
    # Issue #286
    schema = fields.Schema(a=fields.KEYWORD(stored=True),
                           b=fields.KEYWORD(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)
    with ix.writer() as w:
        w.add_document(b=u("bravo"))

    with ix.searcher() as s:
        q = query.Or([query.Term("a", "alfa"),
                      query.Not(query.Term("b", "alfa"))
                      ])
        r = s.search(q)
        assert len(r) == 1
コード例 #10
0
def _create_index():
    s = fields.Schema(f1=fields.KEYWORD(stored=True),
                      f2=fields.KEYWORD,
                      f3=fields.KEYWORD)
    st = RamStorage()
    ix = st.create_index(s)
    return ix
コード例 #11
0
def test_scoring():
    schema = fields.Schema(kind=fields.ID,
                           name=fields.KEYWORD(scorable=True, stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        with w.group():
            w.add_document(kind=u("class"), name=u("Index"))
            w.add_document(kind=u("method"), name=u("add document"))
            w.add_document(kind=u("method"), name=u("add reader"))
            w.add_document(kind=u("method"), name=u("close"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Accumulator"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("get result"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Calculator"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("add all"))
            w.add_document(kind=u("method"), name=u("add some"))
            w.add_document(kind=u("method"), name=u("multiply"))
            w.add_document(kind=u("method"), name=u("close"))

    with ix.searcher() as s:
        q = query.NestedParent(query.Term("kind", "class"),
                               query.Term("name", "add"))
        r = s.search(q)
        assert [hit["name"]
                for hit in r] == ["Calculator", "Index", "Accumulator"]
コード例 #12
0
 def create_whoosh_schema(self):
     schema_classname = "WhooshSchema"
     schema_classname = str(schema_classname)
     attrs = OrderedDict()
     for c in self.columns:
         if c in self.ngram_columns:
             field = fields.NGRAM(
                 minsize=self.ngram_minsize,
                 maxsize=self.ngram_maxsize,
                 stored=True,
             )
         elif c in self.phrase_columns:
             field = fields.TEXT(stored=True)
         elif c in self.keyword_columns:
             field = fields.KEYWORD(
                 lowercase=self.keyword_lowercase,
                 commas=self.keyword_commas,
                 stored=True,
             )
         else:
             field = fields.STORED()
         attrs[c] = field
     SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs)
     schema = SchemaClass()
     return schema
コード例 #13
0
 def _create_index(self):
     s = fields.Schema(f1 = fields.KEYWORD(stored = True),
                       f2 = fields.KEYWORD,
                       f3 = fields.KEYWORD)
     st = store.RamStorage()
     ix = index.Index(st, s, create = True)
     return ix
コード例 #14
0
    def create_whoosh_schema(self):
        """
        Dynamically create whoosh.fields.SchemaClass schema object.

        It defines how you index your dataset.

        :rtype: SchemaClass
        """
        schema_classname = "WhooshSchema"
        schema_classname = str(schema_classname)
        attrs = OrderedDict()
        for c_setting in self.columns:
            if c_setting.type_is_ngram:
                field = fields.NGRAM(
                    minsize=c_setting.ngram_minsize,
                    maxsize=c_setting.ngram_maxsize,
                    stored=True,
                )
            elif c_setting.type_is_phrase:
                field = fields.TEXT(stored=True)
            elif c_setting.type_is_keyword:
                field = fields.KEYWORD(
                    lowercase=c_setting.keyword_lowercase,
                    commas=c_setting.keyword_commas,
                    stored=True,
                )
            else:
                field = fields.STORED()
            attrs[c_setting.name] = field
        SchemaClass = type(schema_classname, (fields.SchemaClass,), attrs)
        schema = SchemaClass() # type: SchemaClass
        return schema
コード例 #15
0
def test_batchsize_eq_doccount():
    check_multi()
    schema = fields.Schema(a=fields.KEYWORD(stored=True))
    with TempIndex(schema) as ix:
        with ix.writer(procs=4, batchsize=10) as w:
            for i in xrange(10):
                w.add_document(a=u(str(i)))
コード例 #16
0
class Fields(object):
    INDEX_DIR = os.path.join(Globals.BASE_DIR, 'fields')
    INDEX = None
    SCHEMA = fields.Schema(name=fields.TEXT(analyzer=analysis.FancyAnalyzer(),
                                            stored=True,
                                            chars=True),
                           tags=fields.KEYWORD(scorable=True))

    @classmethod
    def get_index(cls):
        if cls.INDEX is None:
            if not os.path.exists(cls.INDEX_DIR):
                os.mkdir(cls.INDEX_DIR)
            if index.exists_in(cls.INDEX_DIR):
                cls.INDEX = index.open_dir(cls.INDEX_DIR)
            else:
                cls.INDEX = index.create_in(cls.INDEX_DIR, cls.SCHEMA)
                writer = cls.INDEX.writer()
                for att in Definitions.all_atts():
                    writer.add_document(name=unicode(att['ov']))
                writer.add_document(name=u'ov')
                writer.add_document(name=u'nov')
                writer.add_document(name=u'id')
                writer.commit()
        return cls.INDEX

    @classmethod
    def search(cls, query_string):
        qp = qparser.MultifieldParser(cls.SCHEMA.names(), schema=cls.SCHEMA)
        q = qp.parse(query_string)
        s = cls.get_index().searcher()
        results = s.search(q, limit=None)
        return [r['name'] for r in results]
コード例 #17
0
ファイル: test_sorting.py プロジェクト: datakortet/whoosh
def test_query_facet2():
    domain = u("abcdefghi")
    schema = fields.Schema(v=fields.KEYWORD(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for i, ltr in enumerate(domain):
            v = "%s %s" % (ltr, domain[0 - i])
            w.add_document(v=v)

    with ix.searcher() as s:
        q1 = query.TermRange("v", "a", "c")
        q2 = query.TermRange("v", "d", "f")
        q3 = query.TermRange("v", "g", "i")

        facets = sorting.Facets()
        facets.add_query("myfacet", {
            "a-c": q1,
            "d-f": q2,
            "g-i": q3
        },
                         allow_overlap=True)
        r = s.search(query.Every(), groupedby=facets)
        assert_equal(r.groups("myfacet"), {
            'a-c': [0, 1, 2, 7, 8],
            'd-f': [4, 5],
            'g-i': [3, 6]
        })
コード例 #18
0
    def test_creation(self):
        s = fields.Schema()
        s.add("content", fields.TEXT(phrase=True))
        s.add("title", fields.TEXT(stored=True))
        s.add("path", fields.ID(stored=True))
        s.add("tags", fields.KEYWORD(stored=True))
        s.add("quick", fields.NGRAM)
        s.add("note", fields.STORED)
        st = store.RamStorage()

        ix = index.Index(st, s, create=True)
        w = writing.IndexWriter(ix)
        w.add_document(title=u"First",
                       content=u"This is the first document",
                       path=u"/a",
                       tags=u"first second third",
                       quick=u"First document",
                       note=u"This is the first document")
        w.start_document()
        w.add_field("content", u"Let's try this again")
        w.add_field("title", u"Second")
        w.add_field("path", u"/b")
        w.add_field("tags", u"Uno Dos Tres")
        w.add_field("quick", u"Second document")
        w.add_field("note", u"This is the second document")
        w.end_document()

        w.commit()
コード例 #19
0
ファイル: test_indexing.py プロジェクト: CuteCha/dssm-theano
def test_index_decimals():
    from decimal import Decimal

    schema = fields.Schema(name=fields.KEYWORD(stored=True),
                           num=fields.NUMERIC(int))
    ix = RamStorage().create_index(schema)

    with ix.writer() as w:
        with pytest.raises(TypeError):
            w.add_document(name=u("hello"), num=Decimal("3.2"))

    schema = fields.Schema(name=fields.KEYWORD(stored=True),
                           num=fields.NUMERIC(Decimal, decimal_places=5))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(name=u("hello"), num=Decimal("3.2"))
コード例 #20
0
ファイル: test_flexible.py プロジェクト: CuteCha/dssm-theano
def test_removefield():
    schema = fields.Schema(id=fields.ID(stored=True),
                           content=fields.TEXT,
                           city=fields.KEYWORD(stored=True))
    with TempIndex(schema, "removefield") as ix:
        w = ix.writer()
        w.add_document(id=u("b"), content=u("bravo"), city=u("baghdad"))
        w.add_document(id=u("c"), content=u("charlie"), city=u("cairo"))
        w.add_document(id=u("d"), content=u("delta"), city=u("dakar"))
        w.commit()

        with ix.searcher() as s:
            assert s.document(id=u("c")) == {"id": "c", "city": "cairo"}

        w = ix.writer()
        w.remove_field("content")
        w.remove_field("city")
        w.commit()

        ixschema = ix._current_schema()
        assert ixschema.names() == ["id"]
        assert ixschema.stored_names() == ["id"]

        with ix.searcher() as s:
            assert ("content", b("charlie")) not in s.reader()
            assert s.document(id=u("c")) == {"id": u("c")}
コード例 #21
0
def test_closed_searcher():
    from whoosh.reading import ReaderClosed

    schema = fields.Schema(key=fields.KEYWORD(stored=True, sortable=True))

    with TempStorage() as st:
        ix = st.create_index(schema)
        with ix.writer() as w:
            w.add_document(key=u"alfa")
            w.add_document(key=u"bravo")
            w.add_document(key=u"charlie")
            w.add_document(key=u"delta")
            w.add_document(key=u"echo")

        s = ix.searcher()
        r = s.search(query.TermRange("key", "b", "d"))
        s.close()
        assert s.is_closed
        with pytest.raises(ReaderClosed):
            assert r[0]["key"] == "bravo"
        with pytest.raises(ReaderClosed):
            s.reader().column_reader("key")
        with pytest.raises(ReaderClosed):
            s.suggest("key", "brovo")

        s = ix.searcher()
        r = s.search(query.TermRange("key", "b", "d"))
        assert r[0]
        assert r[0]["key"] == "bravo"
        c = s.reader().column_reader("key")
        assert c[1] == "bravo"
        assert s.suggest("key", "brovo") == ["bravo"]
コード例 #22
0
def test_overlapping_vector():
    schema = fields.Schema(id=fields.STORED, tags=fields.KEYWORD(vector=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(id=0, tags=u("alfa bravo charlie"))
        w.add_document(id=1, tags=u("bravo charlie delta"))
        w.add_document(id=2, tags=u("charlie delta echo"))
        w.add_document(id=3, tags=u("delta echo alfa"))
        w.add_document(id=4, tags=u("echo alfa bravo"))

    with ix.searcher() as s:
        of = sorting.FieldFacet("tags", allow_overlap=True)
        cat = of.categorizer(s)
        assert cat._use_vectors

        r = s.search(query.Every(), groupedby={"tags": of})
        assert r.groups("tags") == {'alfa': [0, 3, 4], 'bravo': [0, 1, 4],
                                    'charlie': [0, 1, 2], 'delta': [1, 2, 3],
                                    'echo': [2, 3, 4]}

        fcts = sorting.Facets()
        fcts.add_field("tags", allow_overlap=True)
        r = s.search(query.Every(), groupedby=fcts)
        assert r.groups("tags") == {'alfa': [0, 3, 4], 'bravo': [0, 1, 4],
                                    'charlie': [0, 1, 2], 'delta': [1, 2, 3],
                                    'echo': [2, 3, 4]}
コード例 #23
0
def test_not_order():
    schema = fields.Schema(id=fields.STORED,
                           count=fields.KEYWORD(lowercase=True),
                           cats=fields.KEYWORD(lowercase=True))
    qp = default.QueryParser("count", schema)

    q1 = qp.parse(u("(NOT (count:0) AND cats:1)"))
    assert q1.__class__ == query.And
    assert q1[0].__class__ == query.Not
    assert q1[1].__class__ == query.Term
    assert q1.__unicode__() == '(NOT count:0 AND cats:1)'

    q2 = qp.parse(u("(cats:1 AND NOT (count:0))"))
    assert q2.__class__ == query.And
    assert q2[0].__class__ == query.Term
    assert q2[1].__class__ == query.Not
    assert q2.__unicode__() == '(cats:1 AND NOT count:0)'
コード例 #24
0
def test_batchsize_eq_doccount():
    from whoosh.filedb.multiproc import MpWriter

    schema = fields.Schema(a=fields.KEYWORD(stored=True))
    with TempIndex(schema) as ix:
        with ix.writer(procs=4, batchsize=10) as w:
            for i in xrange(10):
                w.add_document(a=u(str(i)))
コード例 #25
0
ファイル: test_fields.py プロジェクト: CuteCha/dssm-theano
def test_creation2():
    s = fields.Schema(a=fields.ID(stored=True),
                      b=fields.ID,
                      c=fields.KEYWORD(scorable=True))

    assert s.names() == ["a", "b", "c"]
    assert "a" in s
    assert "b" in s
    assert "c" in s
コード例 #26
0
class CatalogueResourceSchema(fields.SchemaClass):

    pk = fields.ID(stored=True, unique=True)
    vendor_name = fields.ID
    name = fields.TEXT(stored=True)
    vendor = fields.TEXT(stored=True, spelling=True)
    version = fields.TEXT(stored=True)
    template_uri = fields.STORED
    type = fields.TEXT(stored=True)
    creation_date = fields.DATETIME
    title = fields.TEXT(stored=True, spelling=True)
    image = fields.STORED
    smartphoneimage = fields.STORED
    description = fields.TEXT(stored=True, spelling=True)
    wiring = fields.TEXT(spelling=True)
    public = fields.BOOLEAN
    users = fields.KEYWORD(commas=True)
    groups = fields.KEYWORD(commas=True)
    content = fields.NGRAMWORDS()
コード例 #27
0
def test_nested_delete():
    schema = fields.Schema(kind=fields.ID,
                           name=fields.KEYWORD(scorable=True, stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        with w.group():
            w.add_document(kind=u("class"), name=u("Index"))
            w.add_document(kind=u("method"), name=u("add document"))
            w.add_document(kind=u("method"), name=u("add reader"))
            w.add_document(kind=u("method"), name=u("close"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Accumulator"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("get result"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Calculator"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("add all"))
            w.add_document(kind=u("method"), name=u("add some"))
            w.add_document(kind=u("method"), name=u("multiply"))
            w.add_document(kind=u("method"), name=u("close"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Deleter"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("delete"))

    # Delete "Accumulator" class
    with ix.writer() as w:
        q = query.NestedParent(query.Term("kind", "class"),
                               query.Term("name", "Accumulator"))
        w.delete_by_query(q)

    # Check that Accumulator AND ITS METHODS are deleted
    with ix.searcher() as s:
        r = s.search(query.Term("kind", "class"))
        assert sorted(hit["name"]
                      for hit in r) == ["Calculator", "Deleter", "Index"]

        names = [fs["name"] for _, fs in s.iter_docs()]
        assert names == [
            "Index", "add document", "add reader", "close", "Calculator",
            "add", "add all", "add some", "multiply", "close", "Deleter",
            "add", "delete"
        ]

    # Delete any class with a close method
    with ix.writer() as w:
        q = query.NestedParent(query.Term("kind", "class"),
                               query.Term("name", "close"))
        w.delete_by_query(q)

    # Check the CLASSES AND METHODS are gone
    with ix.searcher() as s:
        names = [fs["name"] for _, fs in s.iter_docs()]
        assert names == ["Deleter", "add", "delete"]
コード例 #28
0
class TweetSchema(fields.SchemaClass):
    id = fields.ID(stored=True, unique=True)
    url = fields.ID(stored=True, unique=True)

    text = fields.TEXT(stored=True)
    source = fields.TEXT(stored=True)

    reply = fields.BOOLEAN(stored=True)
    in_reply_to_id = fields.TEXT(stored=True)
    in_reply_to_name = fields.TEXT(stored=True)

    user_mentions = fields.KEYWORD(stored=True)
    hashtags = fields.KEYWORD(stored=True)
    urls = fields.KEYWORD(stored=True)

    geo = fields.BOOLEAN(stored=True)
    latitude = fields.NUMERIC(stored=True)
    longitude = fields.NUMERIC(stored=True)

    date = fields.DATETIME(stored=True)
コード例 #29
0
def test_lengths():
    s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True),
                      f2=fields.KEYWORD(stored=True, scorable=True))
    with TempIndex(s, "testlengths") as ix:
        w = ix.writer()
        tokens = u("ABCDEFG")
        from itertools import cycle, islice
        lengths = [10, 20, 2, 102, 45, 3, 420, 2]
        for length in lengths:
            w.add_document(f2=u(" ").join(islice(cycle(tokens), length)))
        w.commit()

        with ix.reader() as dr:
            ls1 = [dr.doc_field_length(i, "f1")
                   for i in xrange(0, len(lengths))]
            assert_equal(ls1, [0] * len(lengths))
            ls2 = [dr.doc_field_length(i, "f2")
                   for i in xrange(0, len(lengths))]
            assert_equal(ls2, [byte_to_length(length_to_byte(l))
                               for l in lengths])
コード例 #30
0
ファイル: test_fields.py プロジェクト: CuteCha/dssm-theano
def test_creation1():
    s = fields.Schema()
    s.add("content", fields.TEXT(phrase=True))
    s.add("title", fields.TEXT(stored=True))
    s.add("path", fields.ID(stored=True))
    s.add("tags", fields.KEYWORD(stored=True))
    s.add("quick", fields.NGRAM)
    s.add("note", fields.STORED)

    assert s.names() == ["content", "note", "path", "quick", "tags", "title"]
    assert "content" in s
    assert "buzz" not in s
    assert isinstance(s["tags"], fields.KEYWORD)