Example #1
0
    def create_index(self):
        if not os.path.exists("twitter_index"):
            os.mkdir("twitter_index")


        schema = fields.Schema(tweet_id=fields.TEXT(stored=True),
                                batch=fields.NUMERIC(stored=True),
                                content=fields.TEXT(stored=True),
                                posted=fields.DATETIME(stored=True),
                                owner_sn=fields.TEXT(stored=True),
                                owner_id=fields.TEXT(stored=True),
                                owner_name=fields.TEXT(stored=True),
                                isRT=fields.BOOLEAN(stored=True),
                                timesRT=fields.NUMERIC(stored=True),
                                timesFav= fields.NUMERIC(stored=True),
                                orig_timesRT=fields.NUMERIC(stored=True),
                                orig_timesFav= fields.NUMERIC(stored=True),
                                hashtags=fields.KEYWORD(stored=True),
                                orgnlTweet = fields.TEXT(stored=True),
                                mentions=fields.KEYWORD(stored=True),
                                media = fields.TEXT(stored=True),
                                url = fields.TEXT(stored=True),
                                liwc=fields.TEXT(stored=True))


        self.INDEX = index.create_in("twitter_index", schema, indexname="TWTTR")
        print("New searching index succesfully created")

        return self.INDEX
Example #2
0
def get_schema():
    return fields.Schema(id=fields.ID(stored=True),
                         num=fields.NUMERIC(stored=True),
                         frac=fields.NUMERIC(float, stored=True),
                         tag=fields.ID(stored=True),
                         ev=fields.ID,
                         )
Example #3
0
 def __init__(self, index_dir: str):
     ts = TurkishStemmer()
     self.__schema = fields.Schema(
         message=fields.TEXT(stored=True,
                             field_boost=1.5,
                             analyzer=analysis.StemmingAnalyzer()
                             | analysis.NgramFilter(minsize=2, maxsize=5)),
         meta_content=fields.TEXT(
             stored=True,
             analyzer=analysis.StemmingAnalyzer()
             | analysis.NgramFilter(minsize=2, maxsize=5)),
         message_id=fields.NUMERIC(stored=True, bits=64),
         chat_id=fields.NUMERIC(stored=True, bits=64),
         message_tr=fields.TEXT(
             stored=False,
             field_boost=1.5,
             analyzer=analysis.StemmingAnalyzer(stemfn=ts.stem,
                                                stoplist=STOP_WORDS_TR)
             | analysis.NgramFilter(minsize=2, maxsize=5)),
         meta_content_tr=fields.TEXT(
             stored=False,
             analyzer=analysis.StemmingAnalyzer(stemfn=ts.stem,
                                                stoplist=STOP_WORDS_TR)
             | analysis.NgramFilter(minsize=2, maxsize=5)),
     )
     if not os.path.isdir(index_dir):
         os.mkdir(index_dir)
         self.__index = index.create_in(index_dir, self.__schema)
     else:
         self.__index = index.open_dir(index_dir)
Example #4
0
def test_numeric():
    schema = fields.Schema(id=fields.ID(stored=True),
                           integer=fields.NUMERIC(int),
                           floating=fields.NUMERIC(float))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("a"), integer=5820, floating=1.2)
    w.add_document(id=u("b"), integer=22, floating=2.3)
    w.add_document(id=u("c"), integer=78, floating=3.4)
    w.add_document(id=u("d"), integer=13, floating=4.5)
    w.add_document(id=u("e"), integer=9, floating=5.6)
    w.commit()

    with ix.searcher() as s:
        qp = qparser.QueryParser("integer", schema)

        q = qp.parse(u("5820"))
        r = s.search(q)
        assert len(r) == 1
        assert r[0]["id"] == "a"

    with ix.searcher() as s:
        r = s.search(qp.parse("floating:4.5"))
        assert len(r) == 1
        assert r[0]["id"] == "d"

    q = qp.parse("integer:*")
    assert q.__class__ == query.Every
    assert q.field() == "integer"

    q = qp.parse("integer:5?6")
    assert q == query.NullQuery
Example #5
0
class ProductSchema(fields.SchemaClass):
    '''Indexing schema for Products.'''
    ID = fields.NUMERIC(stored=True)
    image = fields.ID(stored=True)
    name = fields.TEXT(stored=True)
    description = fields.TEXT(stored=True)
    price = fields.NUMERIC(stored=True)
Example #6
0
class test_translate():
    domain = [
        ("alfa", 100, 50),
        ("bravo", 20, 80),
        ("charlie", 10, 10),
        ("delta", 82, 39),
        ("echo", 20, 73),
        ("foxtrot", 81, 59),
        ("golf", 39, 93),
        ("hotel", 57, 48),
        ("india", 84, 75),
    ]

    schema = fields.Schema(name=fields.TEXT(sortable=True),
                           a=fields.NUMERIC(sortable=True),
                           b=fields.NUMERIC(sortable=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for name, a, b in domain:
            w.add_document(name=u(name), a=a, b=b)

    with ix.searcher() as s:
        q = query.Every()

        # Baseline: just sort by a field
        r = s.search(q, sortedby="a")
        assert " ".join([
            hit["name"] for hit in r
        ]) == "charlie bravo echo golf hotel foxtrot delta india alfa"

        # Sort by reversed name
        target = [x[0] for x in sorted(domain, key=lambda x: x[0][::-1])]
        tf = sorting.TranslateFacet(lambda name: name[::-1],
                                    sorting.FieldFacet("name"))
        r = s.search(q, sortedby=tf)
        assert [hit["name"] for hit in r] == target

        # Sort by average of a and b
        def avg(a, b):
            return (a + b) / 2

        target = [
            x[0] for x in sorted(domain, key=lambda x: (x[1] + x[2]) / 2)
        ]
        af = sorting.FieldFacet("a")
        bf = sorting.FieldFacet("b")
        tf = sorting.TranslateFacet(avg, af, bf)
        r = s.search(q, sortedby=tf)
        assert [hit["name"] for hit in r] == target
Example #7
0
def crear_esquema():
    licorSchema = fields.Schema(
        id=fields.NUMERIC(stored=True),
        titulo=fields.TEXT(sortable=True, field_boost=1.5),
        descripcion=fields.TEXT,
        categoria=fields.TEXT(sortable=True),
        precio=fields.NUMERIC(Decimal, decimal_places=2, sortable=True),
        precioGroup=fields.NUMERIC(sortable=True),
        origen=fields.TEXT(sortable=True),
        graduacion=fields.NUMERIC(sortable=True),
        enStock=fields.BOOLEAN(stored=True),
        urlProducto=fields.TEXT(field_boost=0.5),
    )

    return licorSchema
Example #8
0
    def populate(self, items, text_fields):
        '''
        Populates the index with the given items.

        items: a list of items where each item is represented as a dictionary
        text_fields: a list of the searchable text fields in the items

        Example:
        index.populate([
            {'uid': 1, 'name': 'ABC', 'description': 'ABC is good'},
            {'uid': 2, 'name': 'DEF', 'description': 'DEF is great'},
        ], ['description'])
        '''

        fields = {}
        for field in items[0]:
            if field in text_fields:
                fields[field] = wf.TEXT(stored=True)
            elif isinstance(items[0][field], str):
                fields[field] = wf.ID(stored=True)
            else:
                fields[field] = wf.NUMERIC(stored=True)

        schema = wf.Schema(**fields)

        os.makedirs(self.path, exist_ok=True)

        self.index = wi.create_in(self.path, schema)

        writer = self.index.writer()
        for item in items:
            writer.add_document(**item)
        writer.commit()
Example #9
0
def test_index_decimals():
    from decimal import Decimal

    schema = fields.Schema(name=fields.KEYWORD(stored=True),
                           num=fields.NUMERIC(int))
    ix = RamStorage().create_index(schema)

    with ix.writer() as w:
        with pytest.raises(TypeError):
            w.add_document(name=u("hello"), num=Decimal("3.2"))

    schema = fields.Schema(name=fields.KEYWORD(stored=True),
                           num=fields.NUMERIC(Decimal, decimal_places=5))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(name=u("hello"), num=Decimal("3.2"))
def test_column_query():
    schema = fields.Schema(id=fields.STORED,
                           a=fields.ID(sortable=True),
                           b=fields.NUMERIC(sortable=True))
    with TempIndex(schema, "columnquery") as ix:
        with ix.writer(codec=W3Codec()) as w:
            w.add_document(id=1, a=u("alfa"), b=10)
            w.add_document(id=2, a=u("bravo"), b=20)
            w.add_document(id=3, a=u("charlie"), b=30)
            w.add_document(id=4, a=u("delta"), b=40)
            w.add_document(id=5, a=u("echo"), b=50)
            w.add_document(id=6, a=u("foxtrot"), b=60)

        with ix.searcher() as s:
            def check(q):
                return [s.stored_fields(docnum)["id"] for docnum in q.docs(s)]

            q = query.ColumnQuery("a", u("bravo"))
            assert check(q) == [2]

            q = query.ColumnQuery("b", 30)
            assert check(q) == [3]

            q = query.ColumnQuery("a", lambda v: v != u("delta"))
            assert check(q) == [1, 2, 3, 5, 6]

            q = query.ColumnQuery("b", lambda v: v > 30)
            assert check(q) == [4, 5, 6]
Example #11
0
def test_gtlt():
    schema = fields.Schema(a=fields.KEYWORD, b=fields.NUMERIC,
                           c=fields.KEYWORD,
                           d=fields.NUMERIC(float), e=fields.DATETIME)
    qp = qparser.QueryParser("a", schema)
    qp.add_plugin(plugins.GtLtPlugin())
    qp.add_plugin(dateparse.DateParserPlugin())

    q = qp.parse(u("a:hello b:>100 c:<=z there"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 4)
    assert_equal(q[0], query.Term("a", "hello"))
    assert_equal(q[1], query.NumericRange("b", 100, None, startexcl=True))
    assert_equal(q[2], query.TermRange("c", None, 'z'))
    assert_equal(q[3], query.Term("a", "there"))

    q = qp.parse(u("hello e:>'29 mar 2001' there"))
    assert_equal(q.__class__, query.And)
    assert_equal(len(q), 3)
    assert_equal(q[0], query.Term("a", "hello"))
    # As of this writing, date ranges don't support startexcl/endexcl
    assert_equal(q[1], query.DateRange("e", datetime(2001, 3, 29, 0, 0), None))
    assert_equal(q[2], query.Term("a", "there"))

    q = qp.parse(u("a:> alfa c:<= bravo"))
    assert_equal(text_type(q), "(a:a: AND a:alfa AND a:c: AND a:bravo)")

    qp.remove_plugin_class(plugins.FieldsPlugin)
    qp.remove_plugin_class(plugins.RangePlugin)
    q = qp.parse(u("hello a:>500 there"))
    assert_equal(text_type(q), "(a:hello AND a:a: AND a:500 AND a:there)")
Example #12
0
def test_decimal_numeric():
    from decimal import Decimal

    f = fields.NUMERIC(int, decimal_places=4)
    schema = fields.Schema(id=fields.ID(stored=True), deci=f)
    ix = RamStorage().create_index(schema)

    # assert f.from_text(f.to_text(Decimal("123.56"))), Decimal("123.56"))

    w = ix.writer()
    w.add_document(id=u("a"), deci=Decimal("123.56"))
    w.add_document(id=u("b"), deci=Decimal("0.536255"))
    w.add_document(id=u("c"), deci=Decimal("2.5255"))
    w.add_document(id=u("d"), deci=Decimal("58"))
    w.commit()

    with ix.searcher() as s:
        qp = qparser.QueryParser("deci", schema)
        q = qp.parse(u("123.56"))
        r = s.search(q)
        assert len(r) == 1
        assert r[0]["id"] == "a"

        r = s.search(qp.parse(u("0.536255")))
        assert len(r) == 1
        assert r[0]["id"] == "b"
Example #13
0
def make_index():
    ana = analysis.StandardAnalyzer(stoplist=None)
    sc = fields.Schema(id=fields.ID(stored=True),
                       text=fields.TEXT(analyzer=ana,
                                        vector=formats.Frequency()),
                       subs=fields.NUMERIC(int, stored=True))
    ix = RamIndex(sc)
    ix.add_document(
        id=u("fieldtype"),
        text=u("The FieldType object supports the following attributes"),
        subs=56)
    ix.add_document(id=u("format"),
                    text=u("the storage format for the field contents"),
                    subs=100)
    ix.add_document(
        id=u("vector"),
        text=u("the storage format for the field vectors (forward index)"),
        subs=23)
    ix.add_document(
        id=u("scorable"),
        text=u("whether searches against this field may be scored."),
        subs=34)
    ix.add_document(
        id=u("stored"),
        text=u(
            "whether the content of this field is stored for each document."),
        subs=575)
    ix.add_document(
        id=u("unique"),
        text=u("whether this field value is unique to each document."),
        subs=2)
    ix.add_document(id=u("const"),
                    text=u("The constructor for the base field type simply"),
                    subs=58204)
    return ix
Example #14
0
def test_decimal_ranges():
    from decimal import Decimal

    schema = fields.Schema(id=fields.STORED,
                           num=fields.NUMERIC(int, decimal_places=2))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    count = Decimal("0.0")
    inc = Decimal("0.2")
    for _ in xrange(500):
        w.add_document(id=str(count), num=count)
        count += inc
    w.commit()

    with ix.searcher() as s:
        qp = qparser.QueryParser("num", schema)

        def check(qs, start, end):
            q = qp.parse(qs)
            result = [s.stored_fields(d)["id"] for d in q.docs(s)]

            target = []
            count = Decimal(start)
            limit = Decimal(end)
            while count <= limit:
                target.append(str(count))
                count += inc

            assert result == target

        check("[10.2 to 80.8]", "10.2", "80.8")
        check("{10.2 to 80.8]", "10.4", "80.8")
        check("[10.2 to 80.8}", "10.2", "80.6")
        check("{10.2 to 80.8}", "10.4", "80.6")
Example #15
0
class Organization(fields.SchemaClass):
    # numero_de_da : Numéro de la Déclaration d'Activité -
    numero_de_da = fields.ID(stored=True, unique=True)
    # form_total : Nombre de formateurs -
    form_total = fields.NUMERIC(stored=True)
    # da_siren : Numéro de SIREN de la structure -
    da_siren = fields.ID(stored=True, unique=True)
    # da_no_etab : Numéro d'établissement de la structure -
    da_no_etab = fields.ID(stored=True)
    # da_raison_sociale : Raison Sociale -
    da_raison_sociale = fields.TEXT(stored=True,
                                    analyzer=ngram_analyzer,
                                    phrase=False)
    # adr_rue_physique : Voie de l'adresse physique -
    adr_rue_physique = fields.TEXT(stored=True)
    # adr_rue_complement_physique : Complément de l'adresse physique -
    adr_rue_complement_physique = fields.TEXT(stored=True)
    # adr_code_postal_physique : Code postal de l'adresse physique -
    adr_code_postal_physique = fields.ID(stored=True)
    # adr_ville_physique : Ville de l'adresse physique -
    adr_ville_physique = fields.TEXT(stored=True)
    # adr_rue_postale : Voie de l'adresse postale -
    adr_rue_postale = fields.TEXT(stored=True)
    # adr_rue_complement_postale : Complément de l'adresse postale -
    adr_rue_complement_postale = fields.TEXT(stored=True)
    # adr_code_postal_postale : Code postal de l'adresse postale -
    adr_code_postal_postale = fields.ID(stored=True)
    # adr_ville_postale : Ville de l'adresse postale
    adr_ville_postale = fields.TEXT(stored=True)
Example #16
0
def test_query_facet_overlap():
    domain = u("abcdefghi")
    schema = fields.Schema(v=fields.KEYWORD(stored=True),
                           num=fields.NUMERIC(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for i, ltr in enumerate(domain):
            v = "%s %s" % (ltr, domain[8 - i])
            w.add_document(num=i, v=v)

    with ix.searcher() as s:
        q1 = query.TermRange("v", "a", "c")
        q2 = query.TermRange("v", "d", "f")
        q3 = query.TermRange("v", "g", "i")

        facets = sorting.Facets()
        facets.add_query("myfacet", {
            "a-c": q1,
            "d-f": q2,
            "g-i": q3
        },
                         allow_overlap=True)
        r = s.search(query.Every(), groupedby=facets)
        gr = r.groups("myfacet")
        assert r.groups("myfacet") == {
            'a-c': [0, 1, 2, 6, 7, 8],
            'd-f': [3, 4, 5],
            'g-i': [0, 1, 2, 6, 7, 8]
        }
Example #17
0
def test_numeric_errors():
    f = fields.NUMERIC(int, bits=16, signed=True)
    schema = fields.Schema(f=f)

    with pytest.raises(ValueError):
        list(f.index(-32769))
    with pytest.raises(ValueError):
        list(f.index(32768))
Example #18
0
def test_index_numeric():
    schema = fields.Schema(a=fields.NUMERIC(int, 32, signed=False),
                           b=fields.NUMERIC(int, 32, signed=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(a=1, b=1)
    with ix.searcher() as s:
        assert list(s.lexicon("a")) == \
                     [b('\x00\x00\x00\x00\x01'), b('\x04\x00\x00\x00\x00'),
                      b('\x08\x00\x00\x00\x00'), b('\x0c\x00\x00\x00\x00'),
                      b('\x10\x00\x00\x00\x00'), b('\x14\x00\x00\x00\x00'),
                      b('\x18\x00\x00\x00\x00'), b('\x1c\x00\x00\x00\x00')]
        assert list(s.lexicon("b")) == \
                     [b('\x00\x80\x00\x00\x01'), b('\x04\x08\x00\x00\x00'),
                      b('\x08\x00\x80\x00\x00'), b('\x0c\x00\x08\x00\x00'),
                      b('\x10\x00\x00\x80\x00'), b('\x14\x00\x00\x08\x00'),
                      b('\x18\x00\x00\x00\x80'), b('\x1c\x00\x00\x00\x08')]
def test_sort_text_field():
    domain = (("Visual Display of Quantitative Information, The", 10),
              ("Envisioning Information", 10),
              ("Visual Explanations", 10),
              ("Beautiful Evidence", -10),
              ("Visual and Statistical Thinking", -10),
              ("Cognitive Style of Powerpoint", -10))
    sorted_titles = sorted(d[0] for d in domain)

    schema = fields.Schema(title=fields.TEXT(stored=True, sortable=True),
                           num=fields.NUMERIC(sortable=True))

    def test(ix):
        with ix.searcher() as s:
            # Sort by title
            r = s.search(query.Every(), sortedby="title")
            titles = [hit["title"] for hit in r]
            assert titles == sorted_titles

            # Sort by reverse title
            facet = sorting.FieldFacet("title", reverse=True)
            r = s.search(query.Every(), sortedby=facet)
            assert [hit["title"] for hit in r] == list(reversed(sorted_titles))

            # Sort by num (-10 to 10) first, and within that, by reverse title
            facet = sorting.MultiFacet()
            facet.add_field("num")
            facet.add_field("title", reverse=True)

            r = s.search(query.Every(), sortedby=facet)
            target = ["Visual and Statistical Thinking",
                      "Cognitive Style of Powerpoint",
                      "Beautiful Evidence",
                      "Visual Explanations",
                      "Visual Display of Quantitative Information, The",
                      "Envisioning Information",
                      ]
            assert [hit["title"] for hit in r] == target

    # Single segment
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for title, num in domain:
                w.add_document(title=u(title), num=num)
        test(ix)

    # Multisegment
    with TempIndex(schema) as ix:
        # Segment 1
        with ix.writer() as w:
            for title, num in domain[:3]:
                w.add_document(title=u(title), num=num)
        # Segment 2
        with ix.writer() as w:
            for title, num in domain[3:]:
                w.add_document(title=u(title), num=num)
            w.merge = False
        test(ix)
Example #20
0
def test_memory_codec():
    from whoosh.codec import memory
    from whoosh.searching import Searcher

    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(a=fields.TEXT(vector=True),
                           b=fields.STORED,
                           c=fields.NUMERIC(stored=True, sortable=True),
                           d=fields.TEXT(analyzer=ana, spelling=True))

    codec = memory.MemoryCodec()
    with codec.writer(schema) as w:
        w.add_document(a=u("alfa bravo charlie"),
                       b="hello",
                       c=100,
                       d=u("quelling whining echoing"))
        w.add_document(a=u("bravo charlie delta"),
                       b=1000,
                       c=200,
                       d=u("rolling timing yelling"))
        w.add_document(a=u("charlie delta echo"),
                       b=5.5,
                       c=300,
                       d=u("using opening pulling"))
        w.add_document(a=u("delta echo foxtrot"),
                       b=True,
                       c=-100,
                       d=u("aching selling dipping"))
        w.add_document(a=u("echo foxtrot india"),
                       b=None,
                       c=-200,
                       d=u("filling going hopping"))

    reader = codec.reader(schema)
    s = Searcher(reader)

    assert ("a", "delta") in reader
    q = query.Term("a", "delta")
    r = s.search(q)
    assert len(r) == 3
    assert [hit["b"] for hit in r] == [1000, 5.5, True]

    assert (" ".join(
        s.field_terms("a")) == "alfa bravo charlie delta echo foxtrot india")

    cfield = schema["c"]
    c_sortables = cfield.sortable_terms(reader, "c")
    c_values = [cfield.from_bytes(t) for t in c_sortables]
    assert c_values, [-200, -100, 100, 200, 300]

    assert reader.has_column("c")
    c_values = list(reader.column_reader("c"))
    assert c_values == [100, 200, 300, -100, -200]

    assert s.has_vector(2, "a")
    v = s.vector(2, "a")
    assert " ".join(v.all_ids()) == "charlie delta echo"
class TweetSchema(fields.SchemaClass):
    id = fields.ID(stored=True, unique=True)
    url = fields.ID(stored=True, unique=True)

    text = fields.TEXT(stored=True)
    source = fields.TEXT(stored=True)

    reply = fields.BOOLEAN(stored=True)
    in_reply_to_id = fields.TEXT(stored=True)
    in_reply_to_name = fields.TEXT(stored=True)

    user_mentions = fields.KEYWORD(stored=True)
    hashtags = fields.KEYWORD(stored=True)
    urls = fields.KEYWORD(stored=True)

    geo = fields.BOOLEAN(stored=True)
    latitude = fields.NUMERIC(stored=True)
    longitude = fields.NUMERIC(stored=True)

    date = fields.DATETIME(stored=True)
Example #22
0
 def _init_schema():
     schema = fields.Schema()
     schema.add("id", fields.ID(unique=True, stored=True))
     schema.add("short_id", fields.ID(stored=True))
     schema.add("status", fields.ID(stored=True))
     schema.add("started", fields.DATETIME(stored=True))
     schema.add("stopped", fields.DATETIME(stored=True))
     schema.add("pkg_type", fields.ID(stored=True))
     schema.add("pkg_name", fields.ID(stored=True))
     schema.add("pkg_version", fields.ID(stored=True))
     schema.add("model_name", fields.ID(stored=True))
     schema.add("op_name", fields.ID(stored=True))
     schema.add("label", fields.TEXT(stored=True))
     schema.add("scalar_*", fields.NUMERIC(float, stored=True), glob=True)
     schema.add("flagi_*", fields.NUMERIC(int, stored=True), glob=True)
     schema.add("flagf_*", fields.NUMERIC(int, stored=True), glob=True)
     schema.add("flagb_*", fields.BOOLEAN(stored=True), glob=True)
     schema.add("flags_*", fields.ID(stored=True), glob=True)
     schema.add("priv_*", fields.STORED, glob=True)
     return schema
Example #23
0
def create_index(directory):
    ''' create_index creates a Whoosh index for clips in the given directory '''
    global clip_index

    if not os.path.exists(directory):
        os.makedirs(directory)

    if index.exists_in(directory):
        clip_index = index.open_dir(directory)
    else:
        schema = fields.Schema(id=fields.NUMERIC(stored=True), title=fields.TEXT, description=fields.TEXT, tags=fields.TEXT(stored=True), user=fields.TEXT)
        clip_index = index.create_in(directory, schema)
Example #24
0
def test_numeric_support():
    intf = fields.NUMERIC(int, shift_step=0)
    longf = fields.NUMERIC(long_type, shift_step=0)
    floatf = fields.NUMERIC(float, shift_step=0)

    def roundtrip(obj, num):
        assert_equal(obj.from_text(obj.to_text(num)), num)

    roundtrip(intf, 0)
    roundtrip(intf, 12345)
    roundtrip(intf, -12345)
    roundtrip(longf, 0)
    roundtrip(longf, 85020450482)
    roundtrip(longf, -85020450482)
    roundtrip(floatf, 0)
    roundtrip(floatf, 582.592)
    roundtrip(floatf, -582.592)
    roundtrip(floatf, -99.42)

    from random import shuffle

    def roundtrip_sort(obj, start, end, step):
        count = start
        rng = []
        while count < end:
            rng.append(count)
            count += step

        scrabled = rng[:]
        shuffle(scrabled)
        round = [
            obj.from_text(t)
            for t in sorted([obj.to_text(n) for n in scrabled])
        ]
        assert_equal(round, rng)

    roundtrip_sort(intf, -100, 100, 1)
    roundtrip_sort(longf, -58902, 58249, 43)
    roundtrip_sort(floatf, -99.42, 99.83, 2.38)
Example #25
0
def test_numeric_ranges_unsigned():
    values = [1, 10, 100, 1000, 2, 20, 200, 2000, 9, 90, 900, 9000]
    schema = fields.Schema(num2=fields.NUMERIC(stored=True, signed=False))

    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        for v in values:
            w.add_document(num2=v)

    with ix.searcher() as s:
        q = query.NumericRange("num2", 55, None, True, False)
        r = s.search(q, limit=None)
        for hit in r:
            assert int(hit["num2"]) >= 55
def test_multivalue():
    schema = fields.Schema(s=fields.TEXT(sortable=True),
                           n=fields.NUMERIC(sortable=True))
    ix = RamStorage().create_index(schema)
    with ix.writer(codec=W3Codec()) as w:
        w.add_document(s=u("alfa foxtrot charlie").split(), n=[100, 200, 300])
        w.add_document(s=u("juliet bravo india").split(), n=[10, 20, 30])

    with ix.reader() as r:
        scr = r.column_reader("s")
        assert list(scr) == ["alfa", "juliet"]

        ncr = r.column_reader("n")
        assert list(ncr) == [100, 10]
Example #27
0
def test_update_numeric():
    schema = fields.Schema(num=fields.NUMERIC(unique=True, stored=True),
                           text=fields.ID(stored=True))
    with TempIndex(schema, "updatenum") as ix:
        nums = list(range(5)) * 3
        random.shuffle(nums)
        for num in nums:
            with ix.writer() as w:
                w.update_document(num=num, text=text_type(num))

        with ix.searcher() as s:
            results = [d["text"] for _, d in s.iter_docs()]
            results = " ".join(sorted(results))
            assert results == "0 1 2 3 4"
Example #28
0
class PydocSchema(fields.SchemaClass):
    path = fields.STORED

    title = fields.TEXT(stored=True,
                        sortable=True,
                        spelling=True,
                        analyzer=ana)
    tgrams = fields.NGRAMWORDS

    content = fields.TEXT(spelling=True, analyzer=ana)

    chapter = fields.ID(sortable=True)

    size = fields.NUMERIC(sortable=True)
    rev = fields.NUMERIC(sortable=True)
    revised = fields.DATETIME(sortable=True)

    modref = fields.TEXT(analyzer=tech_ana, phrase=False)
    clsref = fields.TEXT(analyzer=tech_ana, phrase=False)
    funcref = fields.TEXT(analyzer=tech_ana, phrase=False)
    pep = fields.TEXT(analyzer=tech_ana, phrase=False)

    cls = fields.TEXT(analyzer=cls_ana)
    mod = fields.TEXT(analyzer=tech_ana, phrase=False)
Example #29
0
 def project_schema(self):
     return fields.Schema(
         path=fields.ID(stored=True, unique=True),
         name=fields.ID(stored=True),
         user=fields.ID(stored=True),
         index=fields.ID(stored=True),
         serial=fields.NUMERIC(stored=True),
         classifiers=fields.KEYWORD(commas=True, scorable=True),
         keywords=fields.KEYWORD(stored=True, commas=False, scorable=True),
         version=fields.STORED(),
         doc_version=fields.STORED(),
         type=fields.ID(stored=True),
         text_path=fields.STORED(),
         text_title=fields.STORED(),
         text=fields.TEXT(analyzer=NgramWordAnalyzer(), stored=False, phrase=False))
Example #30
0
def test_missing_overlap():
    schema = fields.Schema(a=fields.NUMERIC(stored=True),
                           b=fields.KEYWORD(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(a=0, b=u("one two"))
        w.add_document(a=1)
        w.add_document(a=2, b=u("two three"))
        w.add_document(a=3)
        w.add_document(a=4, b=u("three four"))

    with ix.searcher() as s:
        facet = sorting.FieldFacet("b", allow_overlap=True)
        r = s.search(query.Every(), groupedby=facet)
        target = {"one": [0], "two": [0, 2], "three": [2, 4],"four": [4],
                  None: [1, 3]}
        assert r.groups() == target