def create_index(self): if not os.path.exists("twitter_index"): os.mkdir("twitter_index") schema = fields.Schema(tweet_id=fields.TEXT(stored=True), batch=fields.NUMERIC(stored=True), content=fields.TEXT(stored=True), posted=fields.DATETIME(stored=True), owner_sn=fields.TEXT(stored=True), owner_id=fields.TEXT(stored=True), owner_name=fields.TEXT(stored=True), isRT=fields.BOOLEAN(stored=True), timesRT=fields.NUMERIC(stored=True), timesFav= fields.NUMERIC(stored=True), orig_timesRT=fields.NUMERIC(stored=True), orig_timesFav= fields.NUMERIC(stored=True), hashtags=fields.KEYWORD(stored=True), orgnlTweet = fields.TEXT(stored=True), mentions=fields.KEYWORD(stored=True), media = fields.TEXT(stored=True), url = fields.TEXT(stored=True), liwc=fields.TEXT(stored=True)) self.INDEX = index.create_in("twitter_index", schema, indexname="TWTTR") print("New searching index succesfully created") return self.INDEX
def get_schema(): return fields.Schema(id=fields.ID(stored=True), num=fields.NUMERIC(stored=True), frac=fields.NUMERIC(float, stored=True), tag=fields.ID(stored=True), ev=fields.ID, )
def __init__(self, index_dir: str): ts = TurkishStemmer() self.__schema = fields.Schema( message=fields.TEXT(stored=True, field_boost=1.5, analyzer=analysis.StemmingAnalyzer() | analysis.NgramFilter(minsize=2, maxsize=5)), meta_content=fields.TEXT( stored=True, analyzer=analysis.StemmingAnalyzer() | analysis.NgramFilter(minsize=2, maxsize=5)), message_id=fields.NUMERIC(stored=True, bits=64), chat_id=fields.NUMERIC(stored=True, bits=64), message_tr=fields.TEXT( stored=False, field_boost=1.5, analyzer=analysis.StemmingAnalyzer(stemfn=ts.stem, stoplist=STOP_WORDS_TR) | analysis.NgramFilter(minsize=2, maxsize=5)), meta_content_tr=fields.TEXT( stored=False, analyzer=analysis.StemmingAnalyzer(stemfn=ts.stem, stoplist=STOP_WORDS_TR) | analysis.NgramFilter(minsize=2, maxsize=5)), ) if not os.path.isdir(index_dir): os.mkdir(index_dir) self.__index = index.create_in(index_dir, self.__schema) else: self.__index = index.open_dir(index_dir)
def test_numeric(): schema = fields.Schema(id=fields.ID(stored=True), integer=fields.NUMERIC(int), floating=fields.NUMERIC(float)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=u("a"), integer=5820, floating=1.2) w.add_document(id=u("b"), integer=22, floating=2.3) w.add_document(id=u("c"), integer=78, floating=3.4) w.add_document(id=u("d"), integer=13, floating=4.5) w.add_document(id=u("e"), integer=9, floating=5.6) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("integer", schema) q = qp.parse(u("5820")) r = s.search(q) assert len(r) == 1 assert r[0]["id"] == "a" with ix.searcher() as s: r = s.search(qp.parse("floating:4.5")) assert len(r) == 1 assert r[0]["id"] == "d" q = qp.parse("integer:*") assert q.__class__ == query.Every assert q.field() == "integer" q = qp.parse("integer:5?6") assert q == query.NullQuery
class ProductSchema(fields.SchemaClass): '''Indexing schema for Products.''' ID = fields.NUMERIC(stored=True) image = fields.ID(stored=True) name = fields.TEXT(stored=True) description = fields.TEXT(stored=True) price = fields.NUMERIC(stored=True)
class test_translate(): domain = [ ("alfa", 100, 50), ("bravo", 20, 80), ("charlie", 10, 10), ("delta", 82, 39), ("echo", 20, 73), ("foxtrot", 81, 59), ("golf", 39, 93), ("hotel", 57, 48), ("india", 84, 75), ] schema = fields.Schema(name=fields.TEXT(sortable=True), a=fields.NUMERIC(sortable=True), b=fields.NUMERIC(sortable=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: for name, a, b in domain: w.add_document(name=u(name), a=a, b=b) with ix.searcher() as s: q = query.Every() # Baseline: just sort by a field r = s.search(q, sortedby="a") assert " ".join([ hit["name"] for hit in r ]) == "charlie bravo echo golf hotel foxtrot delta india alfa" # Sort by reversed name target = [x[0] for x in sorted(domain, key=lambda x: x[0][::-1])] tf = sorting.TranslateFacet(lambda name: name[::-1], sorting.FieldFacet("name")) r = s.search(q, sortedby=tf) assert [hit["name"] for hit in r] == target # Sort by average of a and b def avg(a, b): return (a + b) / 2 target = [ x[0] for x in sorted(domain, key=lambda x: (x[1] + x[2]) / 2) ] af = sorting.FieldFacet("a") bf = sorting.FieldFacet("b") tf = sorting.TranslateFacet(avg, af, bf) r = s.search(q, sortedby=tf) assert [hit["name"] for hit in r] == target
def crear_esquema(): licorSchema = fields.Schema( id=fields.NUMERIC(stored=True), titulo=fields.TEXT(sortable=True, field_boost=1.5), descripcion=fields.TEXT, categoria=fields.TEXT(sortable=True), precio=fields.NUMERIC(Decimal, decimal_places=2, sortable=True), precioGroup=fields.NUMERIC(sortable=True), origen=fields.TEXT(sortable=True), graduacion=fields.NUMERIC(sortable=True), enStock=fields.BOOLEAN(stored=True), urlProducto=fields.TEXT(field_boost=0.5), ) return licorSchema
def populate(self, items, text_fields): ''' Populates the index with the given items. items: a list of items where each item is represented as a dictionary text_fields: a list of the searchable text fields in the items Example: index.populate([ {'uid': 1, 'name': 'ABC', 'description': 'ABC is good'}, {'uid': 2, 'name': 'DEF', 'description': 'DEF is great'}, ], ['description']) ''' fields = {} for field in items[0]: if field in text_fields: fields[field] = wf.TEXT(stored=True) elif isinstance(items[0][field], str): fields[field] = wf.ID(stored=True) else: fields[field] = wf.NUMERIC(stored=True) schema = wf.Schema(**fields) os.makedirs(self.path, exist_ok=True) self.index = wi.create_in(self.path, schema) writer = self.index.writer() for item in items: writer.add_document(**item) writer.commit()
def test_index_decimals(): from decimal import Decimal schema = fields.Schema(name=fields.KEYWORD(stored=True), num=fields.NUMERIC(int)) ix = RamStorage().create_index(schema) with ix.writer() as w: with pytest.raises(TypeError): w.add_document(name=u("hello"), num=Decimal("3.2")) schema = fields.Schema(name=fields.KEYWORD(stored=True), num=fields.NUMERIC(Decimal, decimal_places=5)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(name=u("hello"), num=Decimal("3.2"))
def test_column_query(): schema = fields.Schema(id=fields.STORED, a=fields.ID(sortable=True), b=fields.NUMERIC(sortable=True)) with TempIndex(schema, "columnquery") as ix: with ix.writer(codec=W3Codec()) as w: w.add_document(id=1, a=u("alfa"), b=10) w.add_document(id=2, a=u("bravo"), b=20) w.add_document(id=3, a=u("charlie"), b=30) w.add_document(id=4, a=u("delta"), b=40) w.add_document(id=5, a=u("echo"), b=50) w.add_document(id=6, a=u("foxtrot"), b=60) with ix.searcher() as s: def check(q): return [s.stored_fields(docnum)["id"] for docnum in q.docs(s)] q = query.ColumnQuery("a", u("bravo")) assert check(q) == [2] q = query.ColumnQuery("b", 30) assert check(q) == [3] q = query.ColumnQuery("a", lambda v: v != u("delta")) assert check(q) == [1, 2, 3, 5, 6] q = query.ColumnQuery("b", lambda v: v > 30) assert check(q) == [4, 5, 6]
def test_gtlt(): schema = fields.Schema(a=fields.KEYWORD, b=fields.NUMERIC, c=fields.KEYWORD, d=fields.NUMERIC(float), e=fields.DATETIME) qp = qparser.QueryParser("a", schema) qp.add_plugin(plugins.GtLtPlugin()) qp.add_plugin(dateparse.DateParserPlugin()) q = qp.parse(u("a:hello b:>100 c:<=z there")) assert_equal(q.__class__, query.And) assert_equal(len(q), 4) assert_equal(q[0], query.Term("a", "hello")) assert_equal(q[1], query.NumericRange("b", 100, None, startexcl=True)) assert_equal(q[2], query.TermRange("c", None, 'z')) assert_equal(q[3], query.Term("a", "there")) q = qp.parse(u("hello e:>'29 mar 2001' there")) assert_equal(q.__class__, query.And) assert_equal(len(q), 3) assert_equal(q[0], query.Term("a", "hello")) # As of this writing, date ranges don't support startexcl/endexcl assert_equal(q[1], query.DateRange("e", datetime(2001, 3, 29, 0, 0), None)) assert_equal(q[2], query.Term("a", "there")) q = qp.parse(u("a:> alfa c:<= bravo")) assert_equal(text_type(q), "(a:a: AND a:alfa AND a:c: AND a:bravo)") qp.remove_plugin_class(plugins.FieldsPlugin) qp.remove_plugin_class(plugins.RangePlugin) q = qp.parse(u("hello a:>500 there")) assert_equal(text_type(q), "(a:hello AND a:a: AND a:500 AND a:there)")
def test_decimal_numeric(): from decimal import Decimal f = fields.NUMERIC(int, decimal_places=4) schema = fields.Schema(id=fields.ID(stored=True), deci=f) ix = RamStorage().create_index(schema) # assert f.from_text(f.to_text(Decimal("123.56"))), Decimal("123.56")) w = ix.writer() w.add_document(id=u("a"), deci=Decimal("123.56")) w.add_document(id=u("b"), deci=Decimal("0.536255")) w.add_document(id=u("c"), deci=Decimal("2.5255")) w.add_document(id=u("d"), deci=Decimal("58")) w.commit() with ix.searcher() as s: qp = qparser.QueryParser("deci", schema) q = qp.parse(u("123.56")) r = s.search(q) assert len(r) == 1 assert r[0]["id"] == "a" r = s.search(qp.parse(u("0.536255"))) assert len(r) == 1 assert r[0]["id"] == "b"
def make_index(): ana = analysis.StandardAnalyzer(stoplist=None) sc = fields.Schema(id=fields.ID(stored=True), text=fields.TEXT(analyzer=ana, vector=formats.Frequency()), subs=fields.NUMERIC(int, stored=True)) ix = RamIndex(sc) ix.add_document( id=u("fieldtype"), text=u("The FieldType object supports the following attributes"), subs=56) ix.add_document(id=u("format"), text=u("the storage format for the field contents"), subs=100) ix.add_document( id=u("vector"), text=u("the storage format for the field vectors (forward index)"), subs=23) ix.add_document( id=u("scorable"), text=u("whether searches against this field may be scored."), subs=34) ix.add_document( id=u("stored"), text=u( "whether the content of this field is stored for each document."), subs=575) ix.add_document( id=u("unique"), text=u("whether this field value is unique to each document."), subs=2) ix.add_document(id=u("const"), text=u("The constructor for the base field type simply"), subs=58204) return ix
def test_decimal_ranges(): from decimal import Decimal schema = fields.Schema(id=fields.STORED, num=fields.NUMERIC(int, decimal_places=2)) ix = RamStorage().create_index(schema) w = ix.writer() count = Decimal("0.0") inc = Decimal("0.2") for _ in xrange(500): w.add_document(id=str(count), num=count) count += inc w.commit() with ix.searcher() as s: qp = qparser.QueryParser("num", schema) def check(qs, start, end): q = qp.parse(qs) result = [s.stored_fields(d)["id"] for d in q.docs(s)] target = [] count = Decimal(start) limit = Decimal(end) while count <= limit: target.append(str(count)) count += inc assert result == target check("[10.2 to 80.8]", "10.2", "80.8") check("{10.2 to 80.8]", "10.4", "80.8") check("[10.2 to 80.8}", "10.2", "80.6") check("{10.2 to 80.8}", "10.4", "80.6")
class Organization(fields.SchemaClass): # numero_de_da : Numéro de la Déclaration d'Activité - numero_de_da = fields.ID(stored=True, unique=True) # form_total : Nombre de formateurs - form_total = fields.NUMERIC(stored=True) # da_siren : Numéro de SIREN de la structure - da_siren = fields.ID(stored=True, unique=True) # da_no_etab : Numéro d'établissement de la structure - da_no_etab = fields.ID(stored=True) # da_raison_sociale : Raison Sociale - da_raison_sociale = fields.TEXT(stored=True, analyzer=ngram_analyzer, phrase=False) # adr_rue_physique : Voie de l'adresse physique - adr_rue_physique = fields.TEXT(stored=True) # adr_rue_complement_physique : Complément de l'adresse physique - adr_rue_complement_physique = fields.TEXT(stored=True) # adr_code_postal_physique : Code postal de l'adresse physique - adr_code_postal_physique = fields.ID(stored=True) # adr_ville_physique : Ville de l'adresse physique - adr_ville_physique = fields.TEXT(stored=True) # adr_rue_postale : Voie de l'adresse postale - adr_rue_postale = fields.TEXT(stored=True) # adr_rue_complement_postale : Complément de l'adresse postale - adr_rue_complement_postale = fields.TEXT(stored=True) # adr_code_postal_postale : Code postal de l'adresse postale - adr_code_postal_postale = fields.ID(stored=True) # adr_ville_postale : Ville de l'adresse postale adr_ville_postale = fields.TEXT(stored=True)
def test_query_facet_overlap(): domain = u("abcdefghi") schema = fields.Schema(v=fields.KEYWORD(stored=True), num=fields.NUMERIC(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: for i, ltr in enumerate(domain): v = "%s %s" % (ltr, domain[8 - i]) w.add_document(num=i, v=v) with ix.searcher() as s: q1 = query.TermRange("v", "a", "c") q2 = query.TermRange("v", "d", "f") q3 = query.TermRange("v", "g", "i") facets = sorting.Facets() facets.add_query("myfacet", { "a-c": q1, "d-f": q2, "g-i": q3 }, allow_overlap=True) r = s.search(query.Every(), groupedby=facets) gr = r.groups("myfacet") assert r.groups("myfacet") == { 'a-c': [0, 1, 2, 6, 7, 8], 'd-f': [3, 4, 5], 'g-i': [0, 1, 2, 6, 7, 8] }
def test_numeric_errors(): f = fields.NUMERIC(int, bits=16, signed=True) schema = fields.Schema(f=f) with pytest.raises(ValueError): list(f.index(-32769)) with pytest.raises(ValueError): list(f.index(32768))
def test_index_numeric(): schema = fields.Schema(a=fields.NUMERIC(int, 32, signed=False), b=fields.NUMERIC(int, 32, signed=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(a=1, b=1) with ix.searcher() as s: assert list(s.lexicon("a")) == \ [b('\x00\x00\x00\x00\x01'), b('\x04\x00\x00\x00\x00'), b('\x08\x00\x00\x00\x00'), b('\x0c\x00\x00\x00\x00'), b('\x10\x00\x00\x00\x00'), b('\x14\x00\x00\x00\x00'), b('\x18\x00\x00\x00\x00'), b('\x1c\x00\x00\x00\x00')] assert list(s.lexicon("b")) == \ [b('\x00\x80\x00\x00\x01'), b('\x04\x08\x00\x00\x00'), b('\x08\x00\x80\x00\x00'), b('\x0c\x00\x08\x00\x00'), b('\x10\x00\x00\x80\x00'), b('\x14\x00\x00\x08\x00'), b('\x18\x00\x00\x00\x80'), b('\x1c\x00\x00\x00\x08')]
def test_sort_text_field(): domain = (("Visual Display of Quantitative Information, The", 10), ("Envisioning Information", 10), ("Visual Explanations", 10), ("Beautiful Evidence", -10), ("Visual and Statistical Thinking", -10), ("Cognitive Style of Powerpoint", -10)) sorted_titles = sorted(d[0] for d in domain) schema = fields.Schema(title=fields.TEXT(stored=True, sortable=True), num=fields.NUMERIC(sortable=True)) def test(ix): with ix.searcher() as s: # Sort by title r = s.search(query.Every(), sortedby="title") titles = [hit["title"] for hit in r] assert titles == sorted_titles # Sort by reverse title facet = sorting.FieldFacet("title", reverse=True) r = s.search(query.Every(), sortedby=facet) assert [hit["title"] for hit in r] == list(reversed(sorted_titles)) # Sort by num (-10 to 10) first, and within that, by reverse title facet = sorting.MultiFacet() facet.add_field("num") facet.add_field("title", reverse=True) r = s.search(query.Every(), sortedby=facet) target = ["Visual and Statistical Thinking", "Cognitive Style of Powerpoint", "Beautiful Evidence", "Visual Explanations", "Visual Display of Quantitative Information, The", "Envisioning Information", ] assert [hit["title"] for hit in r] == target # Single segment with TempIndex(schema) as ix: with ix.writer() as w: for title, num in domain: w.add_document(title=u(title), num=num) test(ix) # Multisegment with TempIndex(schema) as ix: # Segment 1 with ix.writer() as w: for title, num in domain[:3]: w.add_document(title=u(title), num=num) # Segment 2 with ix.writer() as w: for title, num in domain[3:]: w.add_document(title=u(title), num=num) w.merge = False test(ix)
def test_memory_codec(): from whoosh.codec import memory from whoosh.searching import Searcher ana = analysis.StemmingAnalyzer() schema = fields.Schema(a=fields.TEXT(vector=True), b=fields.STORED, c=fields.NUMERIC(stored=True, sortable=True), d=fields.TEXT(analyzer=ana, spelling=True)) codec = memory.MemoryCodec() with codec.writer(schema) as w: w.add_document(a=u("alfa bravo charlie"), b="hello", c=100, d=u("quelling whining echoing")) w.add_document(a=u("bravo charlie delta"), b=1000, c=200, d=u("rolling timing yelling")) w.add_document(a=u("charlie delta echo"), b=5.5, c=300, d=u("using opening pulling")) w.add_document(a=u("delta echo foxtrot"), b=True, c=-100, d=u("aching selling dipping")) w.add_document(a=u("echo foxtrot india"), b=None, c=-200, d=u("filling going hopping")) reader = codec.reader(schema) s = Searcher(reader) assert ("a", "delta") in reader q = query.Term("a", "delta") r = s.search(q) assert len(r) == 3 assert [hit["b"] for hit in r] == [1000, 5.5, True] assert (" ".join( s.field_terms("a")) == "alfa bravo charlie delta echo foxtrot india") cfield = schema["c"] c_sortables = cfield.sortable_terms(reader, "c") c_values = [cfield.from_bytes(t) for t in c_sortables] assert c_values, [-200, -100, 100, 200, 300] assert reader.has_column("c") c_values = list(reader.column_reader("c")) assert c_values == [100, 200, 300, -100, -200] assert s.has_vector(2, "a") v = s.vector(2, "a") assert " ".join(v.all_ids()) == "charlie delta echo"
class TweetSchema(fields.SchemaClass): id = fields.ID(stored=True, unique=True) url = fields.ID(stored=True, unique=True) text = fields.TEXT(stored=True) source = fields.TEXT(stored=True) reply = fields.BOOLEAN(stored=True) in_reply_to_id = fields.TEXT(stored=True) in_reply_to_name = fields.TEXT(stored=True) user_mentions = fields.KEYWORD(stored=True) hashtags = fields.KEYWORD(stored=True) urls = fields.KEYWORD(stored=True) geo = fields.BOOLEAN(stored=True) latitude = fields.NUMERIC(stored=True) longitude = fields.NUMERIC(stored=True) date = fields.DATETIME(stored=True)
def _init_schema(): schema = fields.Schema() schema.add("id", fields.ID(unique=True, stored=True)) schema.add("short_id", fields.ID(stored=True)) schema.add("status", fields.ID(stored=True)) schema.add("started", fields.DATETIME(stored=True)) schema.add("stopped", fields.DATETIME(stored=True)) schema.add("pkg_type", fields.ID(stored=True)) schema.add("pkg_name", fields.ID(stored=True)) schema.add("pkg_version", fields.ID(stored=True)) schema.add("model_name", fields.ID(stored=True)) schema.add("op_name", fields.ID(stored=True)) schema.add("label", fields.TEXT(stored=True)) schema.add("scalar_*", fields.NUMERIC(float, stored=True), glob=True) schema.add("flagi_*", fields.NUMERIC(int, stored=True), glob=True) schema.add("flagf_*", fields.NUMERIC(int, stored=True), glob=True) schema.add("flagb_*", fields.BOOLEAN(stored=True), glob=True) schema.add("flags_*", fields.ID(stored=True), glob=True) schema.add("priv_*", fields.STORED, glob=True) return schema
def create_index(directory): ''' create_index creates a Whoosh index for clips in the given directory ''' global clip_index if not os.path.exists(directory): os.makedirs(directory) if index.exists_in(directory): clip_index = index.open_dir(directory) else: schema = fields.Schema(id=fields.NUMERIC(stored=True), title=fields.TEXT, description=fields.TEXT, tags=fields.TEXT(stored=True), user=fields.TEXT) clip_index = index.create_in(directory, schema)
def test_numeric_support(): intf = fields.NUMERIC(int, shift_step=0) longf = fields.NUMERIC(long_type, shift_step=0) floatf = fields.NUMERIC(float, shift_step=0) def roundtrip(obj, num): assert_equal(obj.from_text(obj.to_text(num)), num) roundtrip(intf, 0) roundtrip(intf, 12345) roundtrip(intf, -12345) roundtrip(longf, 0) roundtrip(longf, 85020450482) roundtrip(longf, -85020450482) roundtrip(floatf, 0) roundtrip(floatf, 582.592) roundtrip(floatf, -582.592) roundtrip(floatf, -99.42) from random import shuffle def roundtrip_sort(obj, start, end, step): count = start rng = [] while count < end: rng.append(count) count += step scrabled = rng[:] shuffle(scrabled) round = [ obj.from_text(t) for t in sorted([obj.to_text(n) for n in scrabled]) ] assert_equal(round, rng) roundtrip_sort(intf, -100, 100, 1) roundtrip_sort(longf, -58902, 58249, 43) roundtrip_sort(floatf, -99.42, 99.83, 2.38)
def test_numeric_ranges_unsigned(): values = [1, 10, 100, 1000, 2, 20, 200, 2000, 9, 90, 900, 9000] schema = fields.Schema(num2=fields.NUMERIC(stored=True, signed=False)) ix = RamStorage().create_index(schema) with ix.writer() as w: for v in values: w.add_document(num2=v) with ix.searcher() as s: q = query.NumericRange("num2", 55, None, True, False) r = s.search(q, limit=None) for hit in r: assert int(hit["num2"]) >= 55
def test_multivalue(): schema = fields.Schema(s=fields.TEXT(sortable=True), n=fields.NUMERIC(sortable=True)) ix = RamStorage().create_index(schema) with ix.writer(codec=W3Codec()) as w: w.add_document(s=u("alfa foxtrot charlie").split(), n=[100, 200, 300]) w.add_document(s=u("juliet bravo india").split(), n=[10, 20, 30]) with ix.reader() as r: scr = r.column_reader("s") assert list(scr) == ["alfa", "juliet"] ncr = r.column_reader("n") assert list(ncr) == [100, 10]
def test_update_numeric(): schema = fields.Schema(num=fields.NUMERIC(unique=True, stored=True), text=fields.ID(stored=True)) with TempIndex(schema, "updatenum") as ix: nums = list(range(5)) * 3 random.shuffle(nums) for num in nums: with ix.writer() as w: w.update_document(num=num, text=text_type(num)) with ix.searcher() as s: results = [d["text"] for _, d in s.iter_docs()] results = " ".join(sorted(results)) assert results == "0 1 2 3 4"
class PydocSchema(fields.SchemaClass): path = fields.STORED title = fields.TEXT(stored=True, sortable=True, spelling=True, analyzer=ana) tgrams = fields.NGRAMWORDS content = fields.TEXT(spelling=True, analyzer=ana) chapter = fields.ID(sortable=True) size = fields.NUMERIC(sortable=True) rev = fields.NUMERIC(sortable=True) revised = fields.DATETIME(sortable=True) modref = fields.TEXT(analyzer=tech_ana, phrase=False) clsref = fields.TEXT(analyzer=tech_ana, phrase=False) funcref = fields.TEXT(analyzer=tech_ana, phrase=False) pep = fields.TEXT(analyzer=tech_ana, phrase=False) cls = fields.TEXT(analyzer=cls_ana) mod = fields.TEXT(analyzer=tech_ana, phrase=False)
def project_schema(self): return fields.Schema( path=fields.ID(stored=True, unique=True), name=fields.ID(stored=True), user=fields.ID(stored=True), index=fields.ID(stored=True), serial=fields.NUMERIC(stored=True), classifiers=fields.KEYWORD(commas=True, scorable=True), keywords=fields.KEYWORD(stored=True, commas=False, scorable=True), version=fields.STORED(), doc_version=fields.STORED(), type=fields.ID(stored=True), text_path=fields.STORED(), text_title=fields.STORED(), text=fields.TEXT(analyzer=NgramWordAnalyzer(), stored=False, phrase=False))
def test_missing_overlap(): schema = fields.Schema(a=fields.NUMERIC(stored=True), b=fields.KEYWORD(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(a=0, b=u("one two")) w.add_document(a=1) w.add_document(a=2, b=u("two three")) w.add_document(a=3) w.add_document(a=4, b=u("three four")) with ix.searcher() as s: facet = sorting.FieldFacet("b", allow_overlap=True) r = s.search(query.Every(), groupedby=facet) target = {"one": [0], "two": [0, 2], "three": [2, 4],"four": [4], None: [1, 3]} assert r.groups() == target