Exemple #1
0
def test_extras():
    st = RamStorage()
    hw = HashWriter(st.create_file("test"))
    hw.extras["test"] = 100
    hw.extras["blah"] = "foo"
    hw.close()

    hr = HashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()

    hw = OrderedHashWriter(st.create_file("test"))
    hw.extras["test"] = 100
    hw.extras["blah"] = "foo"
    hw.close()

    hr = HashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()

    hr = OrderedHashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()
Exemple #2
0
def test_extras():
    st = RamStorage()
    hw = HashWriter(st.create_file("test"))
    hw.extras["test"] = 100
    hw.extras["blah"] = "foo"
    hw.close()

    hr = HashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()

    hw = OrderedHashWriter(st.create_file("test"))
    hw.extras["test"] = 100
    hw.extras["blah"] = "foo"
    hw.close()

    hr = HashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()

    hr = OrderedHashReader(st.open_file("test"), st.file_length("test"))
    assert hr.extras["test"] == 100
    assert hr.extras["blah"] == "foo"
    hr.close()
Exemple #3
0
def test_wordfile():
    import os.path

    files = os.listdir(".")
    testdir = "tests"
    fname = "english-words.10.gz"
    if testdir in files:
        path = os.path.join(testdir, fname)
    elif fname in files:
        path = fname
    else:
        return

    if not os.path.exists(path):
        return
    wordfile = gzip.open(path, "r")
    cor = spelling.GraphCorrector.from_word_list(
        word.decode("latin-1") for word in wordfile)
    wordfile.close()

    #dawg.dump_dawg(cor.word_graph)
    assert_equal(cor.suggest("specail"), ["special"])

    st = RamStorage()
    gf = st.create_file("test.dawg")
    cor.to_file(gf)

    gf = st.open_file("test.dawg")
    cor = spelling.GraphCorrector.from_graph_file(gf)

    assert_equal(cor.suggest("specail", maxdist=1), ["special"])
    gf.close()
def test_wordfile():
    import os.path

    files = os.listdir(".")
    testdir = "tests"
    fname = "english-words.10.gz"
    if testdir in files:
        path = os.path.join(testdir, fname)
    elif fname in files:
        path = fname
    else:
        return

    if not os.path.exists(path):
        return
    wordfile = gzip.open(path, "r")
    cor = spelling.GraphCorrector.from_word_list(word.decode("latin-1")
                                                 for word in wordfile)
    wordfile.close()

    #dawg.dump_dawg(cor.word_graph)
    assert_equal(cor.suggest("specail"), ["special"])

    st = RamStorage()
    gf = st.create_file("test.dawg")
    cor.to_file(gf)

    gf = st.open_file("test.dawg")
    cor = spelling.GraphCorrector.from_graph_file(gf)

    assert_equal(cor.suggest("specail", maxdist=1), ["special"])
    gf.close()
Exemple #5
0
def test_ondisk():
    bs = BitSet([10, 11, 30, 50, 80])

    st = RamStorage()
    f = st.create_file("test")
    size = bs.to_disk(f)
    f.close()

    f = st.open_file("test")
    b = OnDiskBitSet(f, 0, size)
    assert list(b) == list(bs)

    assert b.after(0) == 10
    assert b.after(10) == 11
    assert b.after(80) is None
    assert b.after(99) is None

    assert b.before(0) is None
    assert b.before(99) == 80
    assert b.before(80) == 50
    assert b.before(10) is None

    f.seek(0)
    b = BitSet.from_disk(f, size)
    assert list(b) == list(bs)
Exemple #6
0
def test_ondisk():
    bs = BitSet([10, 11, 30, 50, 80])

    st = RamStorage()
    f = st.create_file("test")
    size = bs.to_disk(f)
    f.close()

    f = st.open_file("test")
    b = OnDiskBitSet(f, 0, size)
    assert list(b) == list(bs)

    assert b.after(0) == 10
    assert b.after(10) == 11
    assert b.after(80) is None
    assert b.after(99) is None

    assert b.before(0) is None
    assert b.before(99) == 80
    assert b.before(80) == 50
    assert b.before(10) is None

    f.seek(0)
    b = BitSet.from_disk(f, size)
    assert list(b) == list(bs)
Exemple #7
0
def test_hash_single():
    st = RamStorage()
    hw = HashWriter(st.create_file("test.hsh"))
    hw.add(b("alfa"), b("bravo"))
    hw.close()

    hr = HashReader.open(st, "test.hsh")
    assert hr.get(b("alfa")) == b("bravo")
    assert hr.get(b("foo")) is None
Exemple #8
0
def test_hash_single():
    st = RamStorage()
    hw = HashWriter(st.create_file("test.hsh"))
    hw.add(b("alfa"), b("bravo"))
    hw.close()

    hr = HashReader.open(st, "test.hsh")
    assert hr.get(b("alfa")) == b("bravo")
    assert hr.get(b("foo")) is None
def _rt(c, values, default):
    # Continuous
    st = RamStorage()
    f = st.create_file("test1")
    f.write(b("hello"))
    w = c.writer(f)
    for docnum, v in enumerate(values):
        w.add(docnum, v)
    w.finish(len(values))
    length = f.tell() - 5
    f.close()

    f = st.open_file("test1")
    r = c.reader(f, 5, length, len(values))
    assert values == list(r)
    for x in range(len(values)):
        assert values[x] == r[x]
    f.close()

    # Sparse
    doccount = len(values) * 7 + 15
    target = [default] * doccount

    f = st.create_file("test2")
    f.write(b("hello"))
    w = c.writer(f)
    for docnum, v in izip(xrange(10, doccount, 7), values):
        target[docnum] = v
        w.add(docnum, v)
    w.finish(doccount)
    length = f.tell() - 5
    f.close()

    f = st.open_file("test2")
    r = c.reader(f, 5, length, doccount)
    assert target == list(r)
    for x in range(doccount):
        assert target[x] == r[x]

    lr = r.load()
    assert target == list(lr)
    f.close()
def _rt(c, values, default):
    # Continuous
    st = RamStorage()
    f = st.create_file("test1")
    f.write(b("hello"))
    w = c.writer(f)
    for docnum, v in enumerate(values):
        w.add(docnum, v)
    w.finish(len(values))
    length = f.tell() - 5
    f.close()

    f = st.open_file("test1")
    r = c.reader(f, 5, length, len(values))
    assert values == list(r)
    for x in range(len(values)):
        assert values[x] == r[x]
    f.close()

    # Sparse
    doccount = len(values) * 7 + 15
    target = [default] * doccount

    f = st.create_file("test2")
    f.write(b("hello"))
    w = c.writer(f)
    for docnum, v in izip(xrange(10, doccount, 7), values):
        target[docnum] = v
        w.add(docnum, v)
    w.finish(doccount)
    length = f.tell() - 5
    f.close()

    f = st.open_file("test2")
    r = c.reader(f, 5, length, doccount)
    assert target == list(r)
    for x in range(doccount):
        assert target[x] == r[x]

    lr = r.load()
    assert target == list(lr)
    f.close()
Exemple #11
0
def test_find_self():
    wordlist = sorted(u("book bake bike bone").split())
    st = RamStorage()
    f = st.create_file("test")
    spelling.wordlist_to_graph_file(wordlist, f)

    gr = fst.GraphReader(st.open_file("test"))
    gc = spelling.GraphCorrector(gr)
    assert gc.suggest("book")[0] != "book"
    assert gc.suggest("bake")[0] != "bake"
    assert gc.suggest("bike")[0] != "bike"
    assert gc.suggest("bone")[0] != "bone"
Exemple #12
0
def test_find_self():
    wordlist = sorted(u("book bake bike bone").split())
    st = RamStorage()
    f = st.create_file("test")
    spelling.wordlist_to_graph_file(wordlist, f)

    gr = fst.GraphReader(st.open_file("test"))
    gc = spelling.GraphCorrector(gr)
    assert gc.suggest("book")[0] != "book"
    assert gc.suggest("bake")[0] != "bake"
    assert gc.suggest("bike")[0] != "bike"
    assert gc.suggest("bone")[0] != "bone"
Exemple #13
0
def test_hash_extras():
    st = RamStorage()
    hw = HashWriter(st.create_file("test.hsh"))
    hw.extras["test"] = 100
    hw.add(b("foo"), b("bar"))
    hw.add(b("glonk"), b("baz"))
    hw.close()

    hr = HashReader.open(st, "test.hsh")
    assert hr.extras["test"] == 100
    assert hr.get(b("foo")) == b("bar")
    assert hr.get(b("baz")) is None
    hr.close()
Exemple #14
0
def test_insert_bytes():
    # This test is only meaningful on Python 3
    domain = [b("alfa"), b("bravo"), b("charlie")]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    cur = fst.GraphReader(st.open_file("test")).cursor()
    assert list(cur.flatten()) == domain
Exemple #15
0
def test_insert_bytes():
    # This test is only meaningful on Python 3
    domain = [b("alfa"), b("bravo"), b("charlie")]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    cur = fst.GraphReader(st.open_file("test")).cursor()
    assert list(cur.flatten()) == domain
Exemple #16
0
def test_hash_extras():
    st = RamStorage()
    hw = HashWriter(st.create_file("test.hsh"))
    hw.extras["test"] = 100
    hw.add(b("foo"), b("bar"))
    hw.add(b("glonk"), b("baz"))
    hw.close()

    hr = HashReader.open(st, "test.hsh")
    assert hr.extras["test"] == 100
    assert hr.get(b("foo")) == b("bar")
    assert hr.get(b("baz")) is None
    hr.close()
Exemple #17
0
def test_checksum_file():
    from whoosh.filedb.structfile import ChecksumFile
    from zlib import crc32

    def wr(f):
        f.write(b("Testing"))
        f.write_int(-100)
        f.write_varint(10395)
        f.write_string(b("Hello"))
        f.write_ushort(32959)

    st = RamStorage()
    # Write a file normally
    f = st.create_file("control")
    wr(f)
    f.close()
    # Checksum the contents
    f = st.open_file("control")
    target = crc32(f.read()) & 0xffffffff
    f.close()

    # Write a file with checksumming
    f = st.create_file("test")
    cf = ChecksumFile(f)
    wr(cf)
    assert cf.checksum() == target
    f.close()

    # Read the file with checksumming
    f = st.open_file("test")
    cf = ChecksumFile(f)
    assert cf.read(7) == b("Testing")
    assert cf.read_int() == -100
    assert cf.read_varint() == 10395
    assert cf.read_string() == b("Hello")
    assert cf.read_ushort() == 32959
    assert cf.checksum() == target
    cf.close()
Exemple #18
0
def test_checksum_file():
    from whoosh.filedb.structfile import ChecksumFile
    from zlib import crc32

    def wr(f):
        f.write(b("Testing"))
        f.write_int(-100)
        f.write_varint(10395)
        f.write_string(b("Hello"))
        f.write_ushort(32959)

    st = RamStorage()
    # Write a file normally
    f = st.create_file("control")
    wr(f)
    f.close()
    # Checksum the contents
    f = st.open_file("control")
    target = crc32(f.read()) & 0xffffffff
    f.close()

    # Write a file with checksumming
    f = st.create_file("test")
    cf = ChecksumFile(f)
    wr(cf)
    assert cf.checksum() == target
    f.close()

    # Read the file with checksumming
    f = st.open_file("test")
    cf = ChecksumFile(f)
    assert cf.read(7) == b("Testing")
    assert cf.read_int() == -100
    assert cf.read_varint() == 10395
    assert cf.read_string() == b("Hello")
    assert cf.read_ushort() == 32959
    assert cf.checksum() == target
    cf.close()
Exemple #19
0
def test_insert_unicode():
    domain = [u("\u280b\u2817\u2801\u281d\u2809\u2811"),
              u("\u65e5\u672c"),
              u("\uc774\uc124\ud76c"),
              ]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    cur = fst.GraphReader(st.open_file("test")).cursor()
    assert list(cur.flatten_strings()) == domain
Exemple #20
0
def test_termindex():
    terms = [("a", "alfa"), ("a", "bravo"), ("a", "charlie"), ("a", "delta"),
             ("b", "able"), ("b", "baker"), ("b", "dog"), ("b", "easy")]
    st = RamStorage()

    tw = TermIndexWriter(st.create_file("test.trm"))
    for i, t in enumerate(terms):
        tw.add(t, FileTermInfo(1.0, i))
    tw.close()

    tr = TermIndexReader(st.open_file("test.trm"))
    for i, (t1, t2) in enumerate(zip(tr.keys(), terms)):
        assert_equal(t1, t2)
        ti = tr.get(t1)
        assert_equal(ti.weight(), 1.0)
        assert_equal(ti.doc_frequency(), i)
Exemple #21
0
def test_insert_unicode():
    domain = [
        u("\u280b\u2817\u2801\u281d\u2809\u2811"),
        u("\u65e5\u672c"),
        u("\uc774\uc124\ud76c"),
    ]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    cur = fst.GraphReader(st.open_file("test")).cursor()
    assert list(cur.flatten_strings()) == domain
Exemple #22
0
def test_termindex():
    terms = [("a", "alfa"), ("a", "bravo"), ("a", "charlie"), ("a", "delta"),
             ("b", "able"), ("b", "baker"), ("b", "dog"), ("b", "easy")]
    st = RamStorage()
    
    tw = TermIndexWriter(st.create_file("test.trm"))
    for i, t in enumerate(terms):
        tw.add(t, FileTermInfo(1.0, i))
    tw.close()
    
    tr = TermIndexReader(st.open_file("test.trm"))
    for i, (t1, t2) in enumerate(zip(tr.keys(), terms)):
        assert_equal(t1, t2)
        ti = tr.get(t1)
        assert_equal(ti.weight(), 1.0)
        assert_equal(ti.doc_frequency(), i)
Exemple #23
0
def test_within_unicode():
    domain = [u("\u280b\u2817\u2801\u281d\u2809\u2811"),
              u("\u65e5\u672c"),
              u("\uc774\uc124\ud76c"),
              ]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    gr = fst.GraphReader(st.open_file("test"))
    s = list(fst.within(gr, u("\uc774.\ud76c")))
    assert s == [u("\uc774\uc124\ud76c")]
Exemple #24
0
def test_within_unicode():
    domain = [
        u("\u280b\u2817\u2801\u281d\u2809\u2811"),
        u("\u65e5\u672c"),
        u("\uc774\uc124\ud76c"),
    ]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    gr = fst.GraphReader(st.open_file("test"))
    s = list(fst.within(gr, u("\uc774.\ud76c")))
    assert s == [u("\uc774\uc124\ud76c")]
Exemple #25
0
def test_types():
    st = RamStorage()

    types = ((fst.IntValues, 100, 0),
             (fst.BytesValues, b('abc'), b('')),
             (fst.ArrayValues("i"), array("i", [0, 123, 42]), array("i")),
             (fst.IntListValues, [0, 6, 97], []))

    for t, v, z in types:
        assert t.common(None, v) is None
        assert t.common(v, None) is None
        assert t.common(None, None) is None
        assert t.subtract(v, None) == v
        assert t.subtract(None, v) is None
        assert t.subtract(None, None) is None
        assert t.add(v, None) == v
        assert t.add(None, v) == v
        assert t.add(None, None) is None
        f = st.create_file("test")
        t.write(f, v)
        t.write(f, z)
        f.close()
        f = st.open_file("test")
        assert t.read(f) == v
        assert t.read(f) == z

    assert fst.IntValues.common(100, 20) == 20
    assert fst.IntValues.add(20, 80) == 100
    assert fst.IntValues.subtract(100, 80) == 20

    assert fst.BytesValues.common(b("abc"), b("abc")) == b("abc")
    assert fst.BytesValues.common(b("abcde"), b("abfgh")) == b("ab")
    assert fst.BytesValues.common(b("abcde"), b("ab")) == b("ab")
    assert fst.BytesValues.common(b("ab"), b("abcde")) == b("ab")
    assert fst.BytesValues.common(None, b("abcde")) is None
    assert fst.BytesValues.common(b("ab"), None) is None

    a1 = array("i", [0, 12, 123, 42])
    a2 = array("i", [0, 12, 420])
    cm = array("i", [0, 12])
    assert fst.ArrayValues.common(a1, a1) == a1
    assert fst.ArrayValues.common(a1, a2) == cm
    assert fst.ArrayValues.common(a2, a1) == cm
    assert fst.ArrayValues.common(None, a1) is None
    assert fst.ArrayValues.common(a2, None) is None
Exemple #26
0
def test_random_termkeys():
    def random_fieldname():
        return "".join(chr(random.randint(65, 90)) for _ in xrange(1, 20))
    
    def random_token():
        return "".join(unichr(random.randint(0, 0xd7ff)) for _ in xrange(1, 20))
    
    domain = sorted([(random_fieldname(), random_token()) for _ in xrange(1000)])
    
    st = RamStorage()
    tw = TermIndexWriter(st.create_file("test.trm"))
    for term in domain:
        tw.add(term, FileTermInfo(1.0, 1))
    tw.close()
    
    tr = TermIndexReader(st.open_file("test.trm"))
    for term in domain:
        assert term in tr
def test_multistream():
    domain = [("a", "12345"), ("b", "abc"), ("c", "AaBbC"),
              ("a", "678"), ("c", "cDdEeF"), ("b", "defgh"),
              ("b", "ijk"), ("c", "fGgHh"), ("a", "9abc")]

    st = RamStorage()
    msw = compound.CompoundWriter(st)
    files = dict((name, msw.create_file(name)) for name in "abc")
    for name, data in domain:
        files[name].write(b(data))
    f = st.create_file("test")
    msw.save_as_compound(f)

    f = st.open_file("test")
    msr = compound.CompoundStorage(f)
    assert msr.open_file("a").read() == b("123456789abc")
    assert msr.open_file("b").read() == b("abcdefghijk")
    assert msr.open_file("c").read() == b("AaBbCcDdEeFfGgHh")
def test_multistream():
    domain = [("a", "12345"), ("b", "abc"), ("c", "AaBbC"),
              ("a", "678"), ("c", "cDdEeF"), ("b", "defgh"),
              ("b", "ijk"), ("c", "fGgHh"), ("a", "9abc")]

    st = RamStorage()
    msw = compound.CompoundWriter(st)
    files = dict((name, msw.create_file(name)) for name in "abc")
    for name, data in domain:
        files[name].write(b(data))
    f = st.create_file("test")
    msw.save_as_compound(f)

    f = st.open_file("test")
    msr = compound.CompoundStorage(f)
    assert msr.open_file("a").read() == b("123456789abc")
    assert msr.open_file("b").read() == b("abcdefghijk")
    assert msr.open_file("c").read() == b("AaBbCcDdEeFfGgHh")
Exemple #29
0
def test_types():
    st = RamStorage()

    types = ((fst.IntValues, 100, 0), (fst.BytesValues, b('abc'), b('')),
             (fst.ArrayValues("i"), array("i", [0, 123, 42]),
              array("i")), (fst.IntListValues, [0, 6, 97], []))

    for t, v, z in types:
        assert t.common(None, v) is None
        assert t.common(v, None) is None
        assert t.common(None, None) is None
        assert t.subtract(v, None) == v
        assert t.subtract(None, v) is None
        assert t.subtract(None, None) is None
        assert t.add(v, None) == v
        assert t.add(None, v) == v
        assert t.add(None, None) is None
        f = st.create_file("test")
        t.write(f, v)
        t.write(f, z)
        f.close()
        f = st.open_file("test")
        assert t.read(f) == v
        assert t.read(f) == z

    assert fst.IntValues.common(100, 20) == 20
    assert fst.IntValues.add(20, 80) == 100
    assert fst.IntValues.subtract(100, 80) == 20

    assert fst.BytesValues.common(b("abc"), b("abc")) == b("abc")
    assert fst.BytesValues.common(b("abcde"), b("abfgh")) == b("ab")
    assert fst.BytesValues.common(b("abcde"), b("ab")) == b("ab")
    assert fst.BytesValues.common(b("ab"), b("abcde")) == b("ab")
    assert fst.BytesValues.common(None, b("abcde")) is None
    assert fst.BytesValues.common(b("ab"), None) is None

    a1 = array("i", [0, 12, 123, 42])
    a2 = array("i", [0, 12, 420])
    cm = array("i", [0, 12])
    assert fst.ArrayValues.common(a1, a1) == a1
    assert fst.ArrayValues.common(a1, a2) == cm
    assert fst.ArrayValues.common(a2, a1) == cm
    assert fst.ArrayValues.common(None, a1) is None
    assert fst.ArrayValues.common(a2, None) is None
Exemple #30
0
def test_types():
    st = RamStorage()

    types = ((dawg.IntValues, 100, 0), (dawg.BytesValues, b('abc'), b('')),
             (dawg.ArrayValues("i"), array("i", [0, 123, 42]),
              array("i")), (dawg.IntListValues, [0, 6, 97], []))

    for t, v, z in types:
        assert_equal(t.common(None, v), None)
        assert_equal(t.common(v, None), None)
        assert_equal(t.common(None, None), None)
        assert_equal(t.subtract(v, None), v)
        assert_equal(t.subtract(None, v), None)
        assert_equal(t.subtract(None, None), None)
        assert_equal(t.add(v, None), v)
        assert_equal(t.add(None, v), v)
        assert_equal(t.add(None, None), None)
        f = st.create_file("test")
        t.write(f, v)
        t.write(f, z)
        f.close()
        f = st.open_file("test")
        assert_equal(t.read(f), v)
        assert_equal(t.read(f), z)

    assert_equal(dawg.IntValues.common(100, 20), 20)
    assert_equal(dawg.IntValues.add(20, 80), 100)
    assert_equal(dawg.IntValues.subtract(100, 80), 20)

    assert_equal(dawg.BytesValues.common(b("abc"), b("abc")), b("abc"))
    assert_equal(dawg.BytesValues.common(b("abcde"), b("abfgh")), b("ab"))
    assert_equal(dawg.BytesValues.common(b("abcde"), b("ab")), b("ab"))
    assert_equal(dawg.BytesValues.common(b("ab"), b("abcde")), b("ab"))
    assert_equal(dawg.BytesValues.common(None, b("abcde")), None)
    assert_equal(dawg.BytesValues.common(b("ab"), None), None)

    a1 = array("i", [0, 12, 123, 42])
    a2 = array("i", [0, 12, 420])
    cm = array("i", [0, 12])
    assert_equal(dawg.ArrayValues.common(a1, a1), a1)
    assert_equal(dawg.ArrayValues.common(a1, a2), cm)
    assert_equal(dawg.ArrayValues.common(a2, a1), cm)
    assert_equal(dawg.ArrayValues.common(None, a1), None)
    assert_equal(dawg.ArrayValues.common(a2, None), None)
Exemple #31
0
def test_block():
    st = RamStorage()
    f = st.create_file("postfile")

    b = current(f, 0)
    b.append(0, 1.0, '', 1)
    b.append(1, 2.0, '', 2)
    b.append(2, 12.0, '', 6)
    b.append(5, 6.5, '', 420)
    assert b

    assert_equal(len(b), 4)
    assert_equal(list(b.ids), [0, 1, 2, 5])
    assert_equal(list(b.weights), [1.0, 2.0, 12.0, 6.5])
    assert_equal(b.values, None)
    assert_equal(b.min_length(), 1)
    assert_equal(b.max_length(), byte_to_length(length_to_byte(420)))
    assert_equal(b.max_weight(), 12.0)
    assert_equal(b.max_wol(), 2.0)

    ti = FileTermInfo()
    ti.add_block(b)
    assert_equal(ti.weight(), 21.5)
    assert_equal(ti.doc_frequency(), 4)
    assert_equal(ti.min_length(), 1)
    assert_equal(ti.max_length(), byte_to_length(length_to_byte(420)))
    assert_equal(ti.max_weight(), 12.0)
    assert_equal(ti.max_wol(), 2.0)

    b.write(compression=3)
    f.close()
    f = st.open_file("postfile")
    bb = current.from_file(f, 0)

    bb.read_ids()
    assert_equal(list(bb.ids), [0, 1, 2, 5])
    bb.read_weights()
    assert_equal(list(bb.weights), [1.0, 2.0, 12.0, 6.5])
    bb.read_values()
    assert_equal(b.values, None)
    assert_equal(bb.min_length(), 1)
    assert_equal(bb.max_length(), byte_to_length(length_to_byte(420)))
    assert_equal(bb.max_weight(), 12.0)
    assert_equal(bb.max_wol(), 2.0)
Exemple #32
0
def test_random_termkeys():
    def random_fieldname():
        return "".join(chr(random.randint(65, 90)) for _ in xrange(1, 20))

    def random_token():
        return "".join(
            unichr(random.randint(0, 0xd7ff)) for _ in xrange(1, 20))

    domain = sorted([(random_fieldname(), random_token())
                     for _ in xrange(1000)])

    st = RamStorage()
    tw = TermIndexWriter(st.create_file("test.trm"))
    for term in domain:
        tw.add(term, FileTermInfo(1.0, 1))
    tw.close()

    tr = TermIndexReader(st.open_file("test.trm"))
    for term in domain:
        assert term in tr
    def rw(size):
        st = RamStorage()

        f = st.create_file("test")
        cw = col.writer(f)
        for i in xrange(size):
            cw.add(i, hex(i).encode("latin1"))
        cw.finish(size)
        length = f.tell()
        f.close()

        f = st.open_file("test")
        cr = col.reader(f, 0, length, size)
        for i in xrange(size):
            v = cr[i]
            # Column ignores additional unique values after 65535
            if i <= 65535 - 1:
                assert v == hex(i).encode("latin1")
            else:
                assert v == b('')
        f.close()
    def rw(size):
        st = RamStorage()

        f = st.create_file("test")
        cw = col.writer(f)
        for i in xrange(size):
            cw.add(i, hex(i).encode("latin1"))
        cw.finish(size)
        length = f.tell()
        f.close()

        f = st.open_file("test")
        cr = col.reader(f, 0, length, size)
        for i in xrange(size):
            v = cr[i]
            # Column ignores additional unique values after 65535
            if i <= 65535 - 1:
                assert v == hex(i).encode("latin1")
            else:
                assert v == b('')
        f.close()
Exemple #35
0
def test_lowlevel_block_writing():
    st = RamStorage()
    f = st.create_file("postfile")
    fpw = FilePostingWriter(f, blocklimit=4)
    fmt = formats.Frequency()
    fpw.start(fmt)
    fpw.write(0, 1.0, fmt.encode(1.0), 1)
    fpw.write(1, 2.0, fmt.encode(2.0), 2)
    fpw.write(2, 12.0, fmt.encode(12.0), 6)
    fpw.write(5, 6.5, fmt.encode(6.5), 420)

    fpw.write(11, 1.5, fmt.encode(1.5), 1)
    fpw.write(12, 2.5, fmt.encode(2.5), 2)
    fpw.write(26, 100.5, fmt.encode(100.5), 21)
    fpw.write(50, 8.0, fmt.encode(8.0), 1020)
    ti = fpw.finish()

    assert_equal(ti.weight(), 134.0)
    assert_equal(ti.doc_frequency(), 8)
    assert_equal(ti.min_length(), 1)
    assert_equal(ti.max_length(), byte_to_length(length_to_byte(1020)))
    assert_equal(ti.max_weight(), 100.5)
    assert_equal(ti.max_wol(), 100.5 / byte_to_length(length_to_byte(21)))
Exemple #36
0
def words_to_corrector(words):
    st = RamStorage()
    f = st.create_file("test")
    spelling.wordlist_to_graph_file(words, f)
    f = st.open_file("test")
    return spelling.GraphCorrector(fst.GraphReader(f))
Exemple #37
0
def words_to_corrector(words):
    st = RamStorage()
    f = st.create_file("test")
    spelling.wordlist_to_graph_file(words, f)
    f = st.open_file("test")
    return spelling.GraphCorrector(fst.GraphReader(f))