def appendindex(tbl, index_or_dirname, indexname=None, merge=True, optimize=False): """ Load all rows from `tbl` into a Whoosh index, adding them to any existing data in the index. .. versionadded:: 0.16 Parameters ---------- tbl A table-like object (row container) containing the data to be loaded. index_or_dirname Either an instance of `whoosh.index.Index` or a string containing the directory path where the index is to be stored. indexname String containing the name of the index, if multiple indexes are stored in the same directory. merge Merge small segments during commit? optimize Merge all segments together? """ try: import whoosh except ImportError as e: raise UnsatisfiedDependency(e, dep_message) else: # deal with polymorphic argument if isinstance(index_or_dirname, basestring): dirname = index_or_dirname index = whoosh.index.open_dir(dirname, indexname=indexname, readonly=False) needs_closing = True elif isinstance(index_or_dirname, whoosh.index.Index): index = index_or_dirname needs_closing = False else: raise Exception('expected string or index, found %r' % index_or_dirname) writer = index.writer() try: for d in dicts(tbl): writer.add_document(**d) writer.commit(merge=merge, optimize=optimize) except Exception as e: writer.cancel() raise finally: if needs_closing: index.close()
def test_dicts_shortrows(): table = (('foo', 'bar'), ('a', 1), ('b',)) actual = dicts(table) expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': None}) ieq(expect, actual)
def test_dicts(): table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = dicts(table) expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': 2}) ieq(expect, actual)
def toindex(tbl, index_or_dirname, schema=None, indexname=None, merge=False, optimize=False): """ Load all rows from `tbl` into a Whoosh index. N.B., this will clear any existing data in the index before loading. E.g.:: >>> from petl import look >>> from petlx.index import toindex, fromindex >>> # here is the table we want to load into an index ... look(tbl) +--------+------+------+-------+--------------------------------------------------+ | 'f0' | 'f1' | 'f2' | 'f3' | 'f4' | +========+======+======+=======+==================================================+ | u'AAA' | 12 | 4.3 | True | datetime.datetime(2014, 6, 30, 14, 7, 2, 333199) | +--------+------+------+-------+--------------------------------------------------+ | u'BBB' | 6 | 3.4 | False | datetime.datetime(1900, 1, 31, 0, 0) | +--------+------+------+-------+--------------------------------------------------+ | u'CCC' | 42 | 7.8 | True | datetime.datetime(2100, 12, 25, 0, 0) | +--------+------+------+-------+--------------------------------------------------+ >>> # define a schema for the index ... from whoosh.fields import * >>> schema = Schema(f0=TEXT(stored=True), ... f1=NUMERIC(int, stored=True), ... f2=NUMERIC(float, stored=True), ... f3=BOOLEAN(stored=True), ... f4=DATETIME(stored=True)) >>> # load data ... toindex(tbl, 'tmp/example', schema=schema) >>> # look what it did ... look(fromindex('tmp/example')) +--------+------+------+-------+--------------------------------------------------+ | 'f0' | 'f1' | 'f2' | 'f3' | 'f4' | +========+======+======+=======+==================================================+ | u'AAA' | 12 | 4.3 | True | datetime.datetime(2014, 6, 30, 14, 7, 2, 333199) | +--------+------+------+-------+--------------------------------------------------+ | u'BBB' | 6 | 3.4 | False | datetime.datetime(1900, 1, 31, 0, 0) | +--------+------+------+-------+--------------------------------------------------+ | u'CCC' | 42 | 7.8 | True | datetime.datetime(2100, 12, 25, 0, 0) | +--------+------+------+-------+--------------------------------------------------+ .. versionadded:: 0.16 Parameters ---------- tbl A table-like object (row container) containing the data to be loaded. index_or_dirname Either an instance of `whoosh.index.Index` or a string containing the directory path where the index is to be stored. indexname String containing the name of the index, if multiple indexes are stored in the same directory. merge Merge small segments during commit? optimize Merge all segments together? """ try: import whoosh except ImportError as e: raise UnsatisfiedDependency(e, dep_message) else: # deal with polymorphic argument if isinstance(index_or_dirname, basestring): dirname = index_or_dirname index = whoosh.index.create_in(dirname, schema, indexname=indexname) needs_closing = True elif isinstance(index_or_dirname, whoosh.index.Index): index = index_or_dirname needs_closing = False else: raise Exception('expected string or index, found %r' % index_or_dirname) writer = index.writer() try: for d in dicts(tbl): writer.add_document(**d) writer.commit(merge=merge, optimize=optimize, mergetype=whoosh.writing.CLEAR) except: writer.cancel() raise finally: if needs_closing: index.close()