Exemple #1
0
def appendindex(tbl, index_or_dirname, indexname=None, merge=True,
                optimize=False):
    """
    Load all rows from `tbl` into a Whoosh index, adding them to any existing
    data in the index.

    .. versionadded:: 0.16

    Parameters
    ----------

    tbl
        A table-like object (row container) containing the data to be loaded.
    index_or_dirname
        Either an instance of `whoosh.index.Index` or a string containing the
        directory path where the index is to be stored.
    indexname
        String containing the name of the index, if multiple indexes are stored
        in the same directory.
    merge
        Merge small segments during commit?
    optimize
        Merge all segments together?

    """
    try:
        import whoosh
    except ImportError as e:
        raise UnsatisfiedDependency(e, dep_message)
    else:

        # deal with polymorphic argument
        if isinstance(index_or_dirname, basestring):
            dirname = index_or_dirname
            index = whoosh.index.open_dir(dirname, indexname=indexname,
                                          readonly=False)
            needs_closing = True
        elif isinstance(index_or_dirname, whoosh.index.Index):
            index = index_or_dirname
            needs_closing = False
        else:
            raise Exception('expected string or index, found %r'
                            % index_or_dirname)

        writer = index.writer()
        try:

            for d in dicts(tbl):
                writer.add_document(**d)
            writer.commit(merge=merge, optimize=optimize)

        except Exception as e:
            writer.cancel()
            raise

        finally:
            if needs_closing:
                index.close()
Exemple #2
0
def test_dicts_shortrows():
    table = (('foo', 'bar'), ('a', 1), ('b',))
    actual = dicts(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': None})
    ieq(expect, actual)
Exemple #3
0
def test_dicts():
    table = (('foo', 'bar'), ('a', 1), ('b', 2))
    actual = dicts(table)
    expect = ({'foo': 'a', 'bar': 1}, {'foo': 'b', 'bar': 2})
    ieq(expect, actual)
Exemple #4
0
def toindex(tbl, index_or_dirname, schema=None, indexname=None, merge=False,
            optimize=False):
    """
    Load all rows from `tbl` into a Whoosh index. N.B., this will clear any
    existing data in the index before loading. E.g.::

        >>> from petl import look
        >>> from petlx.index import toindex, fromindex
        >>> # here is the table we want to load into an index
        ... look(tbl)
        +--------+------+------+-------+--------------------------------------------------+
        | 'f0'   | 'f1' | 'f2' | 'f3'  | 'f4'                                             |
        +========+======+======+=======+==================================================+
        | u'AAA' |   12 |  4.3 | True  | datetime.datetime(2014, 6, 30, 14, 7, 2, 333199) |
        +--------+------+------+-------+--------------------------------------------------+
        | u'BBB' |    6 |  3.4 | False | datetime.datetime(1900, 1, 31, 0, 0)             |
        +--------+------+------+-------+--------------------------------------------------+
        | u'CCC' |   42 |  7.8 | True  | datetime.datetime(2100, 12, 25, 0, 0)            |
        +--------+------+------+-------+--------------------------------------------------+

        >>> # define a schema for the index
        ... from whoosh.fields import *
        >>> schema = Schema(f0=TEXT(stored=True),
        ...                 f1=NUMERIC(int, stored=True),
        ...                 f2=NUMERIC(float, stored=True),
        ...                 f3=BOOLEAN(stored=True),
        ...                 f4=DATETIME(stored=True))
        >>> # load data
        ... toindex(tbl, 'tmp/example', schema=schema)
        >>> # look what it did
        ... look(fromindex('tmp/example'))
        +--------+------+------+-------+--------------------------------------------------+
        | 'f0'   | 'f1' | 'f2' | 'f3'  | 'f4'                                             |
        +========+======+======+=======+==================================================+
        | u'AAA' |   12 |  4.3 | True  | datetime.datetime(2014, 6, 30, 14, 7, 2, 333199) |
        +--------+------+------+-------+--------------------------------------------------+
        | u'BBB' |    6 |  3.4 | False | datetime.datetime(1900, 1, 31, 0, 0)             |
        +--------+------+------+-------+--------------------------------------------------+
        | u'CCC' |   42 |  7.8 | True  | datetime.datetime(2100, 12, 25, 0, 0)            |
        +--------+------+------+-------+--------------------------------------------------+

    .. versionadded:: 0.16

    Parameters
    ----------

    tbl
        A table-like object (row container) containing the data to be loaded.
    index_or_dirname
        Either an instance of `whoosh.index.Index` or a string containing the
        directory path where the index is to be stored.
    indexname
        String containing the name of the index, if multiple indexes are stored
        in the same directory.
    merge
        Merge small segments during commit?
    optimize
        Merge all segments together?

    """
    try:
        import whoosh
    except ImportError as e:
        raise UnsatisfiedDependency(e, dep_message)
    else:

        # deal with polymorphic argument
        if isinstance(index_or_dirname, basestring):
            dirname = index_or_dirname
            index = whoosh.index.create_in(dirname, schema,
                                           indexname=indexname)
            needs_closing = True
        elif isinstance(index_or_dirname, whoosh.index.Index):
            index = index_or_dirname
            needs_closing = False
        else:
            raise Exception('expected string or index, found %r'
                            % index_or_dirname)

        writer = index.writer()
        try:

            for d in dicts(tbl):
                writer.add_document(**d)
            writer.commit(merge=merge, optimize=optimize,
                          mergetype=whoosh.writing.CLEAR)

        except:
            writer.cancel()
            raise

        finally:
            if needs_closing:
                index.close()
Exemple #5
0
def appendindex(tbl,
                index_or_dirname,
                indexname=None,
                merge=True,
                optimize=False):
    """
    Load all rows from `tbl` into a Whoosh index, adding them to any existing
    data in the index.

    .. versionadded:: 0.16

    Parameters
    ----------

    tbl
        A table-like object (row container) containing the data to be loaded.
    index_or_dirname
        Either an instance of `whoosh.index.Index` or a string containing the
        directory path where the index is to be stored.
    indexname
        String containing the name of the index, if multiple indexes are stored
        in the same directory.
    merge
        Merge small segments during commit?
    optimize
        Merge all segments together?

    """
    try:
        import whoosh
    except ImportError as e:
        raise UnsatisfiedDependency(e, dep_message)
    else:

        # deal with polymorphic argument
        if isinstance(index_or_dirname, basestring):
            dirname = index_or_dirname
            index = whoosh.index.open_dir(dirname,
                                          indexname=indexname,
                                          readonly=False)
            needs_closing = True
        elif isinstance(index_or_dirname, whoosh.index.Index):
            index = index_or_dirname
            needs_closing = False
        else:
            raise Exception('expected string or index, found %r' %
                            index_or_dirname)

        writer = index.writer()
        try:

            for d in dicts(tbl):
                writer.add_document(**d)
            writer.commit(merge=merge, optimize=optimize)

        except Exception as e:
            writer.cancel()
            raise

        finally:
            if needs_closing:
                index.close()
Exemple #6
0
def toindex(tbl,
            index_or_dirname,
            schema=None,
            indexname=None,
            merge=False,
            optimize=False):
    """
    Load all rows from `tbl` into a Whoosh index. N.B., this will clear any
    existing data in the index before loading. E.g.::

        >>> from petl import look
        >>> from petlx.index import toindex, fromindex
        >>> # here is the table we want to load into an index
        ... look(tbl)
        +--------+------+------+-------+--------------------------------------------------+
        | 'f0'   | 'f1' | 'f2' | 'f3'  | 'f4'                                             |
        +========+======+======+=======+==================================================+
        | u'AAA' |   12 |  4.3 | True  | datetime.datetime(2014, 6, 30, 14, 7, 2, 333199) |
        +--------+------+------+-------+--------------------------------------------------+
        | u'BBB' |    6 |  3.4 | False | datetime.datetime(1900, 1, 31, 0, 0)             |
        +--------+------+------+-------+--------------------------------------------------+
        | u'CCC' |   42 |  7.8 | True  | datetime.datetime(2100, 12, 25, 0, 0)            |
        +--------+------+------+-------+--------------------------------------------------+

        >>> # define a schema for the index
        ... from whoosh.fields import *
        >>> schema = Schema(f0=TEXT(stored=True),
        ...                 f1=NUMERIC(int, stored=True),
        ...                 f2=NUMERIC(float, stored=True),
        ...                 f3=BOOLEAN(stored=True),
        ...                 f4=DATETIME(stored=True))
        >>> # load data
        ... toindex(tbl, 'tmp/example', schema=schema)
        >>> # look what it did
        ... look(fromindex('tmp/example'))
        +--------+------+------+-------+--------------------------------------------------+
        | 'f0'   | 'f1' | 'f2' | 'f3'  | 'f4'                                             |
        +========+======+======+=======+==================================================+
        | u'AAA' |   12 |  4.3 | True  | datetime.datetime(2014, 6, 30, 14, 7, 2, 333199) |
        +--------+------+------+-------+--------------------------------------------------+
        | u'BBB' |    6 |  3.4 | False | datetime.datetime(1900, 1, 31, 0, 0)             |
        +--------+------+------+-------+--------------------------------------------------+
        | u'CCC' |   42 |  7.8 | True  | datetime.datetime(2100, 12, 25, 0, 0)            |
        +--------+------+------+-------+--------------------------------------------------+

    .. versionadded:: 0.16

    Parameters
    ----------

    tbl
        A table-like object (row container) containing the data to be loaded.
    index_or_dirname
        Either an instance of `whoosh.index.Index` or a string containing the
        directory path where the index is to be stored.
    indexname
        String containing the name of the index, if multiple indexes are stored
        in the same directory.
    merge
        Merge small segments during commit?
    optimize
        Merge all segments together?

    """
    try:
        import whoosh
    except ImportError as e:
        raise UnsatisfiedDependency(e, dep_message)
    else:

        # deal with polymorphic argument
        if isinstance(index_or_dirname, basestring):
            dirname = index_or_dirname
            index = whoosh.index.create_in(dirname,
                                           schema,
                                           indexname=indexname)
            needs_closing = True
        elif isinstance(index_or_dirname, whoosh.index.Index):
            index = index_or_dirname
            needs_closing = False
        else:
            raise Exception('expected string or index, found %r' %
                            index_or_dirname)

        writer = index.writer()
        try:

            for d in dicts(tbl):
                writer.add_document(**d)
            writer.commit(merge=merge,
                          optimize=optimize,
                          mergetype=whoosh.writing.CLEAR)

        except:
            writer.cancel()
            raise

        finally:
            if needs_closing:
                index.close()