def test_toindex_dirname(): # name fields in ascending order as whoosh sorts fields on the way out tbl = (('f0', 'f1', 'f2', 'f3', 'f4'), (u'AAA', 12, 4.3, True, datetime.datetime.now()), (u'BBB', 6, 3.4, False, datetime.datetime(1900, 01, 31)), (u'CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25))) schema = Schema(f0=TEXT(stored=True), f1=NUMERIC(int, stored=True), f2=NUMERIC(float, stored=True), f3=BOOLEAN(stored=True), f4=DATETIME(stored=True)) toindex(tbl, dirname, schema=schema) actual = fromindex(dirname) ieq(tbl, actual)
def test_fromindex_docnum_field(): schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) ix = create_in(dirname, schema) writer = ix.writer() writer.add_document(title=u"First document", path=u"/a", content=u"This is the first document we've added!") writer.add_document(title=u"Second document", path=u"/b", content=u"The second one is even more interesting!") writer.commit() # N.B., fields get sorted expect = ((u'docnum', u'path', u'title'), (0, u'/a', u'First document'), (1, u'/b', u'Second document')) actual = fromindex(dirname, docnum_field='docnum') ieq(expect, actual)
from whoosh.index import create_in from whoosh.fields import * schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) index = create_in('tmp/example', schema) writer = index.writer() writer.add_document(title=u"First document", path=u"/a", content=u"This is the first document we've added!") writer.add_document(title=u"Second document", path=u"/b", content=u"The second one is even more interesting!") writer.commit() # extract documents as a table from petl import look from petlx.index import fromindex tbl = fromindex('tmp/example') look(tbl) # toindex ######### import datetime tbl = (('f0', 'f1', 'f2', 'f3', 'f4'), (u'AAA', 12, 4.3, True, datetime.datetime.now()), (u'BBB', 6, 3.4, False, datetime.datetime(1900, 01, 31)), (u'CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25))) from petl import look from petlx.index import toindex, fromindex # here is the table we want to load into an index look(tbl)
# set up an index and load some documents via the Whoosh API from whoosh.index import create_in from whoosh.fields import * schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) index = create_in('tmp/example', schema) writer = index.writer() writer.add_document(title=u"First document", path=u"/a", content=u"This is the first document we've added!") writer.add_document(title=u"Second document", path=u"/b", content=u"The second one is even more interesting!") writer.commit() # extract documents as a table from petl import look from petlx.index import fromindex tbl = fromindex('tmp/example') look(tbl) # toindex ######### import datetime tbl = (('f0', 'f1', 'f2', 'f3', 'f4'), (u'AAA', 12, 4.3, True, datetime.datetime.now()), (u'BBB', 6, 3.4, False, datetime.datetime(1900, 01, 31)), (u'CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25))) from petl import look from petlx.index import toindex, fromindex # here is the table we want to load into an index