コード例 #1
0
ファイル: test_index_whoosh.py プロジェクト: ianfiske/petlx
def test_toindex_dirname():

    # name fields in ascending order as whoosh sorts fields on the way out
    tbl = (('f0', 'f1', 'f2', 'f3', 'f4'),
           (u'AAA', 12, 4.3, True, datetime.datetime.now()),
           (u'BBB', 6, 3.4, False, datetime.datetime(1900, 01, 31)),
           (u'CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25)))

    schema = Schema(f0=TEXT(stored=True),
                    f1=NUMERIC(int, stored=True),
                    f2=NUMERIC(float, stored=True),
                    f3=BOOLEAN(stored=True),
                    f4=DATETIME(stored=True))

    toindex(tbl, dirname, schema=schema)

    actual = fromindex(dirname)
    ieq(tbl, actual)
コード例 #2
0
ファイル: test_index_whoosh.py プロジェクト: ianfiske/petlx
def test_fromindex_docnum_field():

    schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)

    ix = create_in(dirname, schema)
    writer = ix.writer()
    writer.add_document(title=u"First document", path=u"/a",
                        content=u"This is the first document we've added!")
    writer.add_document(title=u"Second document", path=u"/b",
                        content=u"The second one is even more interesting!")
    writer.commit()

    # N.B., fields get sorted
    expect = ((u'docnum', u'path', u'title'),
              (0, u'/a', u'First document'),
              (1, u'/b', u'Second document'))
    actual = fromindex(dirname, docnum_field='docnum')
    ieq(expect, actual)
コード例 #3
0
def test_toindex_dirname():

    # name fields in ascending order as whoosh sorts fields on the way out
    tbl = (('f0', 'f1', 'f2', 'f3', 'f4'), (u'AAA', 12, 4.3, True,
                                            datetime.datetime.now()),
           (u'BBB', 6, 3.4, False, datetime.datetime(1900, 01, 31)),
           (u'CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25)))

    schema = Schema(f0=TEXT(stored=True),
                    f1=NUMERIC(int, stored=True),
                    f2=NUMERIC(float, stored=True),
                    f3=BOOLEAN(stored=True),
                    f4=DATETIME(stored=True))

    toindex(tbl, dirname, schema=schema)

    actual = fromindex(dirname)
    ieq(tbl, actual)
コード例 #4
0
def test_fromindex_docnum_field():

    schema = Schema(title=TEXT(stored=True),
                    path=ID(stored=True),
                    content=TEXT)

    ix = create_in(dirname, schema)
    writer = ix.writer()
    writer.add_document(title=u"First document",
                        path=u"/a",
                        content=u"This is the first document we've added!")
    writer.add_document(title=u"Second document",
                        path=u"/b",
                        content=u"The second one is even more interesting!")
    writer.commit()

    # N.B., fields get sorted
    expect = ((u'docnum', u'path', u'title'), (0, u'/a', u'First document'),
              (1, u'/b', u'Second document'))
    actual = fromindex(dirname, docnum_field='docnum')
    ieq(expect, actual)
コード例 #5
0
from whoosh.index import create_in
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
index = create_in('tmp/example', schema)
writer = index.writer()
writer.add_document(title=u"First document",
                    path=u"/a",
                    content=u"This is the first document we've added!")
writer.add_document(title=u"Second document",
                    path=u"/b",
                    content=u"The second one is even more interesting!")
writer.commit()
# extract documents as a table
from petl import look
from petlx.index import fromindex
tbl = fromindex('tmp/example')
look(tbl)

# toindex
#########

import datetime
tbl = (('f0', 'f1', 'f2', 'f3', 'f4'), (u'AAA', 12, 4.3, True,
                                        datetime.datetime.now()),
       (u'BBB', 6, 3.4, False, datetime.datetime(1900, 01, 31)),
       (u'CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25)))

from petl import look
from petlx.index import toindex, fromindex
# here is the table we want to load into an index
look(tbl)
コード例 #6
0
ファイル: examples_index.py プロジェクト: ianfiske/petlx
# set up an index and load some documents via the Whoosh API
from whoosh.index import create_in
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
index = create_in('tmp/example', schema)
writer = index.writer()
writer.add_document(title=u"First document", path=u"/a",
                    content=u"This is the first document we've added!")
writer.add_document(title=u"Second document", path=u"/b",
                    content=u"The second one is even more interesting!")
writer.commit()
# extract documents as a table
from petl import look
from petlx.index import fromindex
tbl = fromindex('tmp/example')
look(tbl)


# toindex
#########

import datetime
tbl = (('f0', 'f1', 'f2', 'f3', 'f4'),
       (u'AAA', 12, 4.3, True, datetime.datetime.now()),
       (u'BBB', 6, 3.4, False, datetime.datetime(1900, 01, 31)),
       (u'CCC', 42, 7.8, True, datetime.datetime(2100, 12, 25)))

from petl import look
from petlx.index import toindex, fromindex
# here is the table we want to load into an index