def test_resetsearchindexes_command_existing_dir(self, getdefaultlocale_mock): self.options["interactive"] = False os.mkdir(self.new_index_dir) index.create_in(self.new_index_dir, fields.Schema(content=fields.TEXT), 'resource') self.assertTrue(os.path.exists(self.new_index_dir)) with self.settings(WIRECLOUD_INDEX_DIR=self.new_index_dir): try: call_command('resetsearchindexes', **self.options) except SystemExit: raise CommandError('') self.options['stdout'].seek(0) self.assertEqual(self.options['stdout'].read(), '') self.options['stderr'].seek(0) self.assertEqual(self.options['stderr'].read(), '') self.assertTrue(os.path.exists(self.new_index_dir)) for search_index in get_available_search_engines(): self.assertTrue( index.exists_in(self.new_index_dir, indexname=search_index.indexname))
def test_merged_lengths(): s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True), f2=fields.KEYWORD(stored=True, scorable=True)) with TempIndex(s, "mergedlengths") as ix: w = ix.writer() w.add_document(f1=u("A B C"), f2=u("X")) w.add_document(f1=u("B C D E"), f2=u("Y Z")) w.commit() w = ix.writer() w.add_document(f1=u("A"), f2=u("B C D E X Y")) w.add_document(f1=u("B C"), f2=u("X")) w.commit(NO_MERGE) w = ix.writer() w.add_document(f1=u("A B X Y Z"), f2=u("B C")) w.add_document(f1=u("Y X"), f2=u("A B")) w.commit(NO_MERGE) with ix.reader() as dr: assert_equal(dr.stored_fields(0)["f1"], u("A B C")) assert_equal(dr.doc_field_length(0, "f1"), 3) assert_equal(dr.doc_field_length(2, "f2"), 6) assert_equal(dr.doc_field_length(4, "f1"), 5)
def test_lengths_ram(): s = fields.Schema(f1=fields.KEYWORD(stored=True, scorable=True), f2=fields.KEYWORD(stored=True, scorable=True)) st = RamStorage() ix = st.create_index(s) w = ix.writer() w.add_document(f1=u("A B C D E"), f2=u("X Y Z")) w.add_document(f1=u("B B B B C D D Q"), f2=u("Q R S T")) w.add_document(f1=u("D E F"), f2=u("U V A B C D E")) w.commit() dr = ix.reader() assert_equal(dr.stored_fields(0)["f1"], "A B C D E") assert_equal(dr.doc_field_length(0, "f1"), 5) assert_equal(dr.doc_field_length(1, "f1"), 8) assert_equal(dr.doc_field_length(2, "f1"), 3) assert_equal(dr.doc_field_length(0, "f2"), 3) assert_equal(dr.doc_field_length(1, "f2"), 4) assert_equal(dr.doc_field_length(2, "f2"), 7) assert_equal(dr.field_length("f1"), 16) assert_equal(dr.field_length("f2"), 14) assert_equal(dr.max_field_length("f1"), 8) assert_equal(dr.max_field_length("f2"), 7)
def test_resultspage(): schema = fields.Schema(id=fields.STORED, content=fields.TEXT) ix = RamStorage().create_index(schema) domain = ("alfa", "bravo", "bravo", "charlie", "delta") w = ix.writer() for i, lst in enumerate(permutations(domain, 3)): w.add_document(id=text_type(i), content=u(" ").join(lst)) w.commit() with ix.searcher() as s: q = query.Term("content", u("bravo")) r = s.search(q, limit=10) tops = list(r) rp = s.search_page(q, 1, pagelen=5) assert_equal(rp.scored_length(), 5) assert_equal(list(rp), tops[0:5]) assert_equal(rp[10:], []) rp = s.search_page(q, 2, pagelen=5) assert_equal(list(rp), tops[5:10]) rp = s.search_page(q, 1, pagelen=10) assert_equal(len(rp), 54) assert_equal(rp.pagecount, 6) rp = s.search_page(q, 6, pagelen=10) assert_equal(len(list(rp)), 4) assert rp.is_last_page() assert_raises(ValueError, s.search_page, q, 0) assert_raises(ValueError, s.search_page, q, 7) rp = s.search_page(query.Term("content", "glonk"), 1) assert_equal(len(rp), 0) assert rp.is_last_page()
def test_extend_empty(): schema = fields.Schema(id=fields.STORED, words=fields.KEYWORD) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(id=1, words=u("alfa bravo charlie")) w.add_document(id=2, words=u("bravo charlie delta")) w.add_document(id=3, words=u("charlie delta echo")) w.add_document(id=4, words=u("delta echo foxtrot")) w.add_document(id=5, words=u("echo foxtrot golf")) w.commit() with ix.searcher() as s: # Get an empty results object r1 = s.search(query.Term("words", u("hotel"))) # Copy it r1c = r1.copy() # Get a non-empty results object r2 = s.search(query.Term("words", u("delta"))) # Copy it r2c = r2.copy() # Extend r1 with r2 r1c.extend(r2c) assert_equal([hit["id"] for hit in r1c], [2, 3, 4]) assert_equal(r1c.scored_length(), 3)
def test_lengths_ram(self): s = fields.Schema(f1 = fields.KEYWORD(stored = True, scorable = True), f2 = fields.KEYWORD(stored = True, scorable = True)) st = RamStorage() ix = st.create_index(s) w = ix.writer() w.add_document(f1 = u"A B C D E", f2 = u"X Y Z") w.add_document(f1 = u"B B B B C D D Q", f2 = u"Q R S T") w.add_document(f1 = u"D E F", f2 = u"U V A B C D E") w.commit() dr = ix.reader() ls1 = [dr.doc_field_length(i, "f1") for i in xrange(0, 3)] ls2 = [dr.doc_field_length(i, "f2") for i in xrange(0, 3)] self.assertEqual(dr.stored_fields(0)["f1"], "A B C D E") self.assertEqual(dr.doc_field_length(0, "f1"), 5) self.assertEqual(dr.doc_field_length(1, "f1"), 8) self.assertEqual(dr.doc_field_length(2, "f1"), 3) self.assertEqual(dr.doc_field_length(0, "f2"), 3) self.assertEqual(dr.doc_field_length(1, "f2"), 4) self.assertEqual(dr.doc_field_length(2, "f2"), 7) self.assertEqual(ix.field_length("f1"), 16) self.assertEqual(ix.field_length("f2"), 14)
def test_euro_chars(): schema = fields.Schema(text=fields.TEXT) qp = default.QueryParser("text", schema) q = qp.parse(u("stra\xdfe")) assert q.__class__ == query.Term assert q.text == u("stra\xdfe")
def test_andmaybe_none(): schema = fields.Schema(f=fields.TEXT, year=fields.NUMERIC) qp = default.QueryParser("f", schema) _ = qp.parse(u("Dahmen ANDMAYBE @year:[2000 TO]"))
def get_schema(self): return fields.Schema(post_id=fields.ID(stored=True), tags=fields.KEYWORD(commas=True), title=fields.TEXT(stored=True), text=fields.TEXT)
DEFAULT_SIGNATURE = """ Thanks! The Tree.io Team http://www.tree.io """ # # Search index (Whoosh) # SEARCH_DISABLED = False SEARCH_ENGINE = 'db' from whoosh import fields WHOOSH_SCHEMA = fields.Schema(id=fields.ID(stored=True, unique=True), name=fields.TEXT(stored=True), type=fields.TEXT(stored=True), content=fields.TEXT, url=fields.ID(stored=True)) WHOOSH_INDEX = path.join(PROJECT_ROOT, 'storage/search') # # CACHING # #CACHE_BACKEND = 'dummy://' CACHE_BACKEND = 'locmem://?timeout=30' #CACHE_BACKEND = 'memcached://127.0.0.1:11211/?timeout=30' #CACHE_BACKEND="johnny.backends.locmem://" JOHNNY_MIDDLEWARE_KEY_PREFIX = 'jc_treeio'
def test_analyzing_terms(): schema = fields.Schema(text=fields.TEXT(analyzer=analysis.StemmingAnalyzer())) qp = default.QueryParser("text", schema) q = qp.parse(u("Indexed!")) assert_equal(q.__class__, query.Term) assert_equal(q.text, "index")
def test_empty_index(self): schema = fields.Schema(key=fields.ID(stored=True), value=fields.TEXT) st = store.RamStorage() self.assertRaises(index.EmptyIndexError, index.Index, st, schema)
def _do_merge(writerclass): schema = fields.Schema(key=fields.ID(stored=True, unique=True), value=fields.TEXT(stored=True, spelling=True, vector=True)) domain = {"a": "aa", "b": "bb cc", "c": "cc dd ee", "d": "dd ee ff gg", "e": "ee ff gg hh ii", "f": "ff gg hh ii jj kk", "g": "gg hh ii jj kk ll mm", "h": "hh ii jj kk ll mm nn oo", "i": "ii jj kk ll mm nn oo pp qq ww ww ww ww ww ww", "j": "jj kk ll mm nn oo pp qq rr ss", "k": "kk ll mm nn oo pp qq rr ss tt uu"} with TempIndex(schema) as ix: w = ix.writer() for key in "abc": w.add_document(key=u(key), value=u(domain[key])) w.commit() w = ix.writer() for key in "def": w.add_document(key=u(key), value=u(domain[key])) w.commit(merge=False) w = writerclass(ix, procs=3) del domain["b"] w.delete_by_term("key", u("b")) domain["e"] = "xx yy zz" w.update_document(key=u("e"), value=u(domain["e"])) for key in "ghijk": w.add_document(key=u(key), value=u(domain[key])) w.commit(optimize=True) assert len(ix._segments()) == 1 with ix.searcher() as s: r = s.reader() assert s.doc_count() == len(domain) assert "".join(r.field_terms("key")) == "acdefghijk" assert " ".join(r.field_terms("value")) == "aa cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu ww xx yy zz" for key in domain: docnum = s.document_number(key=key) assert docnum is not None length = r.doc_field_length(docnum, "value") assert length assert _byten(len(domain[key].split())) == length sf = r.stored_fields(docnum) assert domain[key] == sf["value"] words = sorted(set((" ".join(domain.values())).split())) assert words == list(r.field_terms("value")) for word in words: hits = s.search(query.Term("value", word)) for hit in hits: assert word in hit["value"].split()
def test_short_prefix(): s = fields.Schema(name=fields.ID, value=fields.TEXT) qp = qparser.QueryParser("value", schema=s) q = qp.parse(u("s*")) assert_equal(q.__class__.__name__, "Prefix") assert_equal(q.text, "s")
def test_empty_index(): schema = fields.Schema(key=fields.ID(stored=True), value=fields.TEXT) st = RamStorage() assert_raises(index.EmptyIndexError, st.open_index, schema=schema)
def test_badnames(): s = fields.Schema() with pytest.raises(fields.FieldConfigurationError): s.add("_test", fields.ID) with pytest.raises(fields.FieldConfigurationError): s.add("a f", fields.ID)
def __init__(self, notebooks): # Index directory of whoosh, located in notebookPath. self.schema = fields.Schema( path = fields.TEXT(stored=True), title = fields.TEXT(stored=True), content = fields.TEXT(stored=True), tags = fields.KEYWORD(commas=True)) self.notebookName = notebooks[0][0] self.notebookPath = notebooks[0][1] self.notePath = os.path.join(self.notebookPath, "notes").replace(os.sep, '/') self.htmlPath = os.path.join(self.notebookPath, "html", "notes").replace(os.sep, '/') self.indexdir = os.path.join(self.notePath, ".indexdir").replace(os.sep, '/') self.attachmentPath = os.path.join(self.notebookPath, "attachments").replace(os.sep, '/') self.configfile = os.path.join(self.notebookPath, "notebook.conf").replace(os.sep, '/') cssPath = os.path.join(self.notebookPath, "css").replace(os.sep, '/') self.cssfile = os.path.join(cssPath, "notebook.css").replace(os.sep, '/') self.searchcssfile = os.path.join(cssPath, "search-window.css").replace(os.sep, '/') self.qsettings = QSettings(self.configfile, QSettings.IniFormat) if os.path.exists(self.configfile): self.extensions = readListFromSettings(self.qsettings, "extensions") self.fileExt = self.qsettings.value("fileExt") self.attachmentImage = self.qsettings.value("attachmentImage") self.attachmentDocument = self.qsettings.value("attachmentDocument") self.version = self.qsettings.value("version") self.geometry = self.qsettings.value("geometry") self.windowstate = self.qsettings.value("windowstate") self.mathjax = self.qsettings.value('mathJax') if 'extensionsConfig' not in set(self.qsettings.childGroups()): self.extcfg = self.qsettings.value('extensionsConfig', defaultValue={}) writeDictToSettings(self.qsettings, 'extensionsConfig', self.extcfg) else: self.extcfg = readDictFromSettings(self.qsettings, 'extensionsConfig') else: self.extensions = [] self.fileExt = "" self.attachmentImage = [] self.attachmentDocument = [] self.version = None self.geometry = None self.windowstate = None self.mathjax = '' self.extcfg = {} self.faulty_exts=[] # Default enabled python-markdown extensions. # http://pythonhosted.org/Markdown/extensions/index.html if not self.extensions: self.extensions = [ 'nl2br' # newline to break , 'strkundr' # bold-italics-underline-delete style , 'codehilite' # code syntax highlight , 'fenced_code' # code block , 'headerid' # add id to headers , 'headerlink' # add anchor to headers , 'footnotes' , 'asciimathml' ] writeListToSettings(self.qsettings, "extensions", self.extensions) while True: print(self.extensions) try: markdown.markdown("",extensions=self.extensions) except AttributeError as e: remove_this = NOT_EXT.findall(e.args[0])[0] if remove_this in self.extensions: print("Found invalid markdown extension",remove_this,". Please consider removing it.") print('If you want to permanently disable this, just hit OK in the Notebook Settings dialog') self.extensions.remove(remove_this) self.faulty_exts.append(remove_this) except ImportError as e: if e.name.startswith('mdx_') and e.name[4:] in self.extensions: print('Found missing markdown extension', e.name[4:], ', temporarily disabling.') print('If you want to permanently disable this, just hit OK in the Notebook Settings dialog') self.extensions.remove(e.name[4:]) self.faulty_exts.append(e.name[4:]) elif e.name in self.extensions: print('Found missing markdown extension', e.name, ', temporarily disabling.') print('If you want to permanently disable this, just hit OK in the Notebook Settings dialog') self.extensions.remove(e.name) self.faulty_exts.append(e.name) else: self.md = markdown.Markdown(self.extensions, extension_configs=self.extcfg) break # Default file extension name if not self.fileExt: self.fileExt = ".md" self.qsettings.setValue("fileExt", self.fileExt) # Image file types that will be copied to attachmentDir # Inserted as image link if not self.attachmentImage: self.attachmentImage = [".jpg", ".jpeg", ".png", ".gif", ".svg"] self.qsettings.setValue("attachmentImage", self.attachmentImage) # Document file types that will be copied to attachmentDir # Inserted as link if not self.attachmentDocument: self.attachmentDocument = [".pdf", ".doc", ".odt"] self.qsettings.setValue("attachmentDocument", self.attachmentDocument) # Migrate notebookPath to v0.3.0 folder structure if not self.version: notebookDir = QDir(self.notebookPath) # move all markdown files to notes/ dirList = notebookDir.entryList(QDir.Dirs | QDir.NoDotAndDotDot) if 'css' in dirList: dirList.remove('css') fileList = notebookDir.entryList(['*.md', '*.mkd', '*.markdown']) notebookDir.mkdir('notes') for d in dirList + fileList: notebookDir.rename(d, os.path.join('notes', d).replace(os.sep, '/')) # remove .indexdir folder oldIndexDir = QDir(os.path.join(self.notebookPath, '.indexdir'.replace(os.sep, '/'))) indexFileList = oldIndexDir.entryList() for f in indexFileList: oldIndexDir.remove(f) notebookDir.rmdir('.indexdir') # rename notes.css to css/notebook.css oldCssFile = os.path.join(self.notebookPath, 'notes.css').replace(os.sep, '/') QDir().mkpath(cssPath) if os.path.exists(oldCssFile): QFile.rename(oldCssFile, self.cssfile) self.version = '0' if not self.mathjax: self.mathjax = 'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' self.qsettings.setValue('mathJax', self.mathjax)
def _do_basic(writerclass): # Create the domain data # List of individual words added to the index words = [] # List of string values added to the index docs = [] # A ring buffer for creating string values buf = deque() for ls in permutations(u("abcd")): word = "".join(ls) # Remember this word is in the index (to check lexicon) words.append(word) # Add this word on to the end, pop the first word off to create N word # documents where N <= 10 buf.append(word) if len(buf) > 10: buf.popleft() # Create a copy of the buffer and shuffle it to create a document value # and add it to the list of document values doc = list(buf) random.shuffle(doc) docs.append(" ".join(doc)) # Shuffle the list of document values random.shuffle(docs) schema = fields.Schema(text=fields.TEXT(stored=True, spelling=True, vector=True), row=fields.NUMERIC(stored=True)) with TempIndex(schema, storage_debug=True) as ix: # Add the domain data to the index with writerclass(ix, procs=3) as w: for i, value in enumerate(docs): w.add_document(text=value, row=i) with ix.searcher() as s: r = s.reader() # Check the lexicon for word, term in izip(words, r.field_terms("text")): assert word == term # Check the doc count assert r.doc_count_all() == len(docs) # Check the word graph assert r.has_word_graph("text") flat = [w.decode("latin1") for w in r.word_graph("text").flatten()] assert flat == words # Check there are lengths total = sum(r.doc_field_length(docnum, "text", 0) for docnum in xrange(r.doc_count_all())) assert total > 0 # Check per-doc info for i, value in enumerate(docs): pieces = value.split() docnum = s.document_number(row=i) # Check stored value sv = r.stored_fields(docnum) assert sv["text"] == value # Check vectors vr = r.vector(docnum, "text") # Get the terms and positions from the vector matcher iv = list(vr.items_as("positions")) # What the vector should look like ov = sorted((text, [i]) for i, text in enumerate(pieces)) assert iv == ov # Check field length assert r.doc_field_length(docnum, "text") == len(pieces)
def test_stopped(): schema = fields.Schema(text=fields.TEXT) qp = default.QueryParser("text", schema) q = qp.parse(u("a b"), debug=True) assert_equal(q, query.NullQuery)
class ISPWhoosh(object): """ Helper class to index the ISP model with Whoosh to allow full-text search """ schema = fields.Schema( id=fields.ID(unique=True, stored=True), is_ffdn_member=fields.BOOLEAN(), is_disabled=fields.BOOLEAN(), name=fields.TEXT(), shortname=fields.TEXT(), description=fields.TEXT(), covered_areas=fields.KEYWORD(scorable=True, commas=True, lowercase=True), step=fields.NUMERIC(signed=False), ) primary_key=schema._fields['id'] @staticmethod def get_index_dir(): return current_app.config.get('WHOOSH_INDEX_DIR', 'whoosh') @classmethod def get_index(cls): idxdir = cls.get_index_dir() if index.exists_in(idxdir): idx = index.open_dir(idxdir) else: if not os.path.exists(idxdir): os.makedirs(idxdir) idx = index.create_in(idxdir, cls.schema) return idx @classmethod def _search(cls, s, terms): return s.search(qparser.MultifieldParser([ 'name', 'shortname', 'description', 'covered_areas' ], schema=cls.schema).parse(terms), mask=whoosh.query.Term('is_disabled', True)) @classmethod def search(cls, terms): with ISPWhoosh.get_index().searcher() as s: sres = cls._search(s, terms) ranks = {} for rank, r in enumerate(sres): ranks[r['id']] = rank if not len(ranks): return [] _res = ISP.query.filter(ISP.id.in_(ranks.keys())) return sorted(_res, key=lambda r: ranks[r.id]) @classmethod def update_document(cls, writer, model): kw = { 'id': unicode(model.id), '_stored_id': model.id, 'is_ffdn_member': model.is_ffdn_member, 'is_disabled': model.is_disabled, 'name': model.name, 'shortname': model.shortname, 'description': model.json.get('description'), 'covered_areas': ','.join(model.covered_areas_names()), 'step': model.json.get('progressStatus') } writer.update_document(**kw) @classmethod def _after_flush(cls, app, changes): isp_changes = [] for change in changes: if change[0].__class__ == ISP: update = change[1] in ('update', 'insert') isp_changes.append((update, change[0])) if not len(changes): return idx = cls.get_index() with idx.writer() as writer: for update, model in isp_changes: if update: cls.update_document(writer, model) else: writer.delete_by_term(cls.primary_key, model.id)
def test_write_empty_vector(): schema = fields.Schema(text=fields.TEXT(vector=True)) with TempIndex(schema) as ix: with ix.writer() as w: w.add_document( text=u". . . . . . . . . . . . . . . . . . . . . . . . 1")
def test_nested_children(): schema = fields.Schema(t=fields.ID(stored=True), track=fields.NUMERIC(stored=True), album_name=fields.TEXT(stored=True), song_name=fields.TEXT(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: with w.group(): w.add_document(t=u("album"), album_name=u("alfa bravo charlie")) w.add_document(t=u("track"), track=1, song_name=u("delta echo foxtrot")) w.add_document(t=u("track"), track=2, song_name=u("golf hotel india")) w.add_document(t=u("track"), track=3, song_name=u("juliet kilo lima")) with w.group(): w.add_document(t=u("album"), album_name=u("mike november oskar")) w.add_document(t=u("track"), track=1, song_name=u("papa quebec romeo")) w.add_document(t=u("track"), track=2, song_name=u("sierra tango ultra")) w.add_document(t=u("track"), track=3, song_name=u("victor whiskey xray")) with w.group(): w.add_document(t=u("album"), album_name=u("yankee zulu one")) w.add_document(t=u("track"), track=1, song_name=u("two three four")) w.add_document(t=u("track"), track=2, song_name=u("five six seven")) w.add_document(t=u("track"), track=3, song_name=u("eight nine ten")) with ix.searcher() as s: pq = query.Term("t", "album") aq = query.Term("album_name", "november") r = s.search(query.NestedChildren(pq, pq), limit=None) assert len(r) == 9 assert [str(hit["t"]) for hit in r] == ["track"] * 9 ncq = query.NestedChildren(pq, aq) assert list(ncq.docs(s)) == [5, 6, 7] r = s.search(ncq, limit=None) assert len(r) == 3 assert [str(hit["song_name"]) for hit in r] == [ "papa quebec romeo", "sierra tango ultra", "victor whiskey xray" ] zq = query.NestedChildren(pq, query.Term("album_name", "zulu")) f = sorting.StoredFieldFacet("song_name") r = s.search(zq, sortedby=f) assert [hit["track"] for hit in r] == [3, 2, 1]
import os from whoosh import index, store, fields from whoosh.index import create_in from whoosh.qparser import QueryParser from django.db.models.signals import post_syncdb from django.conf import settings PAGES_WHOOSH_SCHEMA = fields.Schema(title=fields.TEXT(stored=True), content=fields.TEXT(stored=True), url=fields.ID(stored=True, unique=True)) def create_index(sender=None, **kwargs): if not os.path.exists(settings.HAYSTACK_WHOOSH_PATH): os.mkdir(settings.HAYSTACK_WHOOSH_PATH) ix = create_in(settings.HAYSTACK_WHOOSH_PATH, PAGES_WHOOSH_SCHEMA, "ZORNA_PAGES") post_syncdb.connect(create_index)
from whoosh import index, fields, qparser import os schema = fields.Schema(title=fields.TEXT(stored=True), description=fields.TEXT(stored=True), transcript=fields.TEXT, url=fields.STORED) if not os.path.exists("index"): os.mkdir("index") search_index = index.create_in("index", schema) else: search_index = index.open_dir("index") queryparser = qparser.QueryParser("transcript", schema) def search(q): with search_index.searcher() as s: results = s.search(queryparser.parse(q)) r = list(results) r.sort(key=lambda x: x.rank) return [res.fields() for res in r] def add_to_index(title, description, transcript, url): w = search_index.writer() w.add_document(title=title, description=description, transcript=transcript, url=url)
def test_unicode_num(): schema = fields.Schema(num=fields.NUMERIC) parser = default.QueryParser(u("num"), schema=schema) q = parser.parse(u("num:1")) _ = text_type(q)
def test_add_reader(): schema = fields.Schema(i=fields.ID(stored=True, unique=True), a=fields.TEXT(stored=True, spelling=True), b=fields.TEXT(vector=True)) with TempIndex(schema, "addreader") as ix: with ix.writer() as w: w.add_document(i=u("0"), a=u("alfa bravo charlie delta"), b=u("able baker coxwell dog")) w.add_document(i=u("1"), a=u("bravo charlie delta echo"), b=u("elf fabio gong hiker")) w.add_document(i=u("2"), a=u("charlie delta echo foxtrot"), b=u("india joker king loopy")) w.add_document(i=u("3"), a=u("delta echo foxtrot golf"), b=u("mister noogie oompah pancake")) with ix.writer() as w: w.delete_by_term("i", "1") w.delete_by_term("i", "3") with ix.writer() as w: w.add_document(i=u("4"), a=u("hotel india juliet kilo"), b=u("quick rhubarb soggy trap")) w.add_document(i=u("5"), a=u("india juliet kilo lima"), b=u("umber violet weird xray")) with ix.reader() as r: assert_equal(r.doc_count_all(), 4) sfs = list(r.all_stored_fields()) assert_equal(sfs, [ { "i": u("4"), "a": u("hotel india juliet kilo") }, { "i": u("5"), "a": u("india juliet kilo lima") }, { "i": u("0"), "a": u("alfa bravo charlie delta") }, { "i": u("2"), "a": u("charlie delta echo foxtrot") }, ]) assert_equal(list(r.lexicon("a")), [ "alfa", "bravo", "charlie", "delta", "echo", "foxtrot", "hotel", "india", "juliet", "kilo", "lima" ]) vs = [] for docnum in r.all_doc_ids(): v = r.vector(docnum, "b") vs.append(list(v.all_ids())) assert_equal(vs, [["quick", "rhubarb", "soggy", "trap"], ["umber", "violet", "weird", "xray"], ["able", "baker", "coxwell", "dog"], ["india", "joker", "king", "loopy"]]) gr = r.word_graph("a") assert_equal(list(gr.flatten_strings()), [ "alfa", "bravo", "charlie", "delta", "echo", "foxtrot", "hotel", "india", "juliet", "kilo", "lima", ])
def test_empty_querystring(): s = fields.Schema(content=fields.TEXT, title=fields.TEXT, id=fields.ID) qp = default.QueryParser("content", s) q = qp.parse(u("")) assert q == query.NullQuery
from __future__ import absolute_import from __future__ import print_function from __future__ import unicode_literals from __future__ import division import hashlib import whoosh.fields as F # This scheme defines the structure of a single knowhow snippet. SCHEMA = F.Schema( # unique identifier id=F.ID(unique=True, stored=True), # a multi-valued analyzed field tag=F.KEYWORD(stored=True, field_boost=2.0), # the text content of the snippet content=F.TEXT(stored=True), # all searchable fields, for use as a default field text=F.TEXT(stored=False), # when the snippet was last modified updated=F.DATETIME(stored=True), ) # Function to create a hasher object for generating id of a snippet. IdGenerator = hashlib.sha256 # The number of hexadecimal characters in an id ID_LENGTH = IdGenerator().digest_size * 2 def identifier(doc): """
def test_stopped(): schema = fields.Schema(text=fields.TEXT) qp = default.QueryParser("text", schema) q = qp.parse(u("a b")) assert q == query.NullQuery
def test_noscorables2(): schema = fields.Schema(field=fields.ID) with TempIndex(schema, "noscorables2") as ix: writer = ix.writer() writer.add_document(field=u('foo')) writer.commit()