Esempio n. 1
0
def test_random_hash():
    with TempStorage("randomhash") as st:
        domain = "abcdefghijklmnopqrstuvwxyz"
        domain += domain.upper()
        times = 1000
        minlen = 1
        maxlen = len(domain)

        samp = dict((randstring(domain, minlen, maxlen),
                     randstring(domain, minlen, maxlen))
                    for _ in xrange(times))

        hwf = st.create_file("test.hsh")
        hw = HashWriter(hwf)
        for k, v in iteritems(samp):
            hw.add(k, v)
        hw.close()

        keys = list(samp.keys())
        random.shuffle(keys)
        hrf = st.open_file("test.hsh")
        hr = HashReader(hrf)
        for k in keys:
            v = hr[k]
            assert_equal(v, b(samp[k]))
        hr.close()
def test_huge_postfile():
    with TempStorage("hugeindex") as st:
        pf = st.create_file("test.pst")

        gb5 = 5 * 1024 * 1024 * 1024
        pf.seek(gb5)
        pf.write("\x00\x00\x00\x00")
        assert_equal(pf.tell(), gb5 + 4)

        fpw = FilePostingWriter(pf)
        format = formats.Frequency(None)
        offset = fpw.start(format)
        for i in xrange(10):
            fpw.write(i, float(i), struct.pack("!I", i), 10)
        posttotal = fpw.finish()
        assert_equal(posttotal, 10)
        fpw.close()

        pf = st.open_file("test.pst")
        pfr = FilePostingReader(pf, offset, format)
        i = 0
        while pfr.is_active():
            assert_equal(pfr.id(), i)
            assert_equal(pfr.weight(), float(i))
            assert_equal(pfr.value(), struct.pack("!I", i))
            pfr.next()
            i += 1
        pf.close()
Esempio n. 3
0
def test_hash_contents():
    samp = [
        ('alfa', 'bravo'),
        ('charlie', 'delta'),
        ('echo', 'foxtrot'),
        ('golf', 'hotel'),
        ('india', 'juliet'),
        ('kilo', 'lima'),
        ('mike', 'november'),
        ('oskar', 'papa'),
        ('quebec', 'romeo'),
        ('sierra', 'tango'),
        ('ultra', 'victor'),
        ('whiskey', 'xray'),
    ]
    # Convert to bytes
    samp = set((b(k), b(v)) for k, v in samp)

    with TempStorage("hashcontents") as st:
        hwf = st.create_file("test.hsh")
        hw = HashWriter(hwf)
        hw.add_all(samp)
        hw.close()

        hrf = st.open_file("test.hsh")
        hr = HashReader(hrf)
        assert_equal(set(hr.items()), samp)
        hr.close()
Esempio n. 4
0
def test_bigtable():
    with TempStorage("bigtable") as st:

        def randstring(min, max):
            return "".join(
                chr(randint(1, 255)) for _ in xrange(randint(min, max)))

        count = 100000
        samp = dict(
            (randstring(1, 50), randstring(1, 50)) for _ in xrange(count))

        fhw = HashWriter(st.create_file("big.hsh"))
        fhw.add_all(iteritems(samp))
        fhw.close()

        fhr = HashReader(st.open_file("big.hsh"))
        keys = list(samp.keys())
        shuffle(keys)
        for key in keys:
            assert_equal(samp[key], fhr[key])

        set1 = set(iteritems(samp))
        set2 = set(fhr.items())
        assert_equal(set1, set2)

        fhr.close()
Esempio n. 5
0
def test_ordered_closest():
    keys = [
        'alfa', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', 'golf',
        'hotel', 'india', 'juliet', 'kilo', 'lima', 'mike', 'november'
    ]
    # Make into bytes for Python 3
    keys = [b(k) for k in keys]
    values = [b('')] * len(keys)

    with TempStorage("orderedclosest") as st:
        hwf = st.create_file("test.hsh")
        hw = OrderedHashWriter(hwf)
        hw.add_all(zip(keys, values))
        hw.close()

        hrf = st.open_file("test.hsh")
        hr = OrderedHashReader(hrf)
        ck = hr.closest_key
        assert_equal(ck(b('')), b('alfa'))
        assert_equal(ck(b(' ')), b('alfa'))
        assert_equal(ck(b('alfa')), b('alfa'))
        assert_equal(ck(b('bravot')), b('charlie'))
        assert_equal(ck(b('charlie')), b('charlie'))
        assert_equal(ck(b('kiloton')), b('lima'))
        assert_equal(ck(b('oskar')), None)
        assert_equal(list(hr.keys()), keys)
        assert_equal(list(hr.values()), values)
        assert_equal(list(hr.keys_from(b('f'))), keys[5:])
        hr.close()
Esempio n. 6
0
def test_random_hash():
    from string import ascii_letters as domain

    times = 1000
    minlen = 1
    maxlen = len(domain)

    def randstring():
        s = "".join(random.sample(domain, random.randint(minlen, maxlen)))
        return b(s)

    with TempStorage("randomhash") as st:
        samp = dict((randstring(), randstring()) for _ in xrange(times))

        hwf = st.create_file("test.hsh")
        hw = HashWriter(hwf)
        for k, v in iteritems(samp):
            hw.add(k, v)
        hw.close()

        keys = list(samp.keys())
        random.shuffle(keys)
        hrf = st.open_file("test.hsh")
        hr = HashReader(hrf)
        for k in keys:
            assert_equal(hr[k], samp[k])
        hr.close()
Esempio n. 7
0
def test_threaded_filelock():
    with TempStorage("threadedfilelock") as st:
        lock1 = st.lock("testlock")
        result = []

        # The thread function tries to acquire the lock and then quits
        def fn():
            lock2 = st.lock("testlock")
            gotit = try_for(lock2.acquire, 1.0, 0.1)
            if gotit:
                result.append(True)
                lock2.release()

        t = threading.Thread(target=fn)

        # Acquire the lock in this thread
        lock1.acquire()
        # Start the other thread trying to acquire the lock
        t.start()
        # Wait for a bit
        time.sleep(0.15)
        # Release the lock
        lock1.release()
        # Wait for the other thread to finish
        t.join()
        # If the other thread got the lock, it should have appended True to the
        # "results" list.
        assert_equal(result, [True])
Esempio n. 8
0
def test_within():
    with TempStorage() as st:
        gwrite(enlist("0 00 000 001 01 010 011 1 10 100 101 11 110 111"), st)
        gr = greader(st)
        s = set(dawg.within(gr, "01", k=1))
        gr.close()
    assert_equal(
        s, set(["0", "00", "01", "011", "010", "001", "10", "101", "1", "11"]))
Esempio n. 9
0
def test_words():
    words = enlist("alfa alpaca amtrak bellow fellow fiona zebulon")
    with TempStorage() as st:
        gwrite(words, st)
        gr = greader(st)
        cur = dawg.Cursor(gr)
        assert_equal(list(cur.flatten_strings()), words)
        gr.close()
Esempio n. 10
0
def test_stored_fields():
    codec = default_codec()
    fieldobj = fields.TEXT(stored=True)
    with TempStorage("storedfields") as st:
        seg = codec.new_segment(st, "test")

        dw = codec.per_document_writer(st, seg)
        dw.start_doc(0)
        dw.add_field("a", fieldobj, "hello", 1)
        dw.add_field("b", fieldobj, "there", 1)
        dw.finish_doc()

        dw.start_doc(1)
        dw.add_field("a", fieldobj, "one", 1)
        dw.add_field("b", fieldobj, "two", 1)
        dw.add_field("c", fieldobj, "three", 1)
        dw.finish_doc()

        dw.start_doc(2)
        dw.finish_doc()

        dw.start_doc(3)
        dw.add_field("a", fieldobj, "alfa", 1)
        dw.add_field("b", fieldobj, "bravo", 1)
        dw.finish_doc()

        dw.close()

        dr = codec.stored_fields_reader(st, seg)
        assert_equal(dr[0], {"a": "hello", "b": "there"})
        # Note: access out of order
        assert_equal(dr[3], {"a": "alfa", "b": "bravo"})
        assert_equal(dr[1], {"a": "one", "b": "two", "c": "three"})
        dr.close()

        dr = codec.stored_fields_reader(st, seg)
        sfs = list(dr)
        assert_equal(sfs, [
            {
                "a": "hello",
                "b": "there"
            },
            {
                "a": "one",
                "b": "two",
                "c": "three"
            },
            {},
            {
                "a": "alfa",
                "b": "bravo"
            },
        ])
        dr.close()
Esempio n. 11
0
def test_filelock_simple():
    with TempStorage("simplefilelock") as st:
        lock1 = st.lock("testlock")
        lock2 = st.lock("testlock")
        assert lock1 is not lock2

        assert lock1.acquire()
        assert st.file_exists("testlock")
        assert not lock2.acquire()
        lock1.release()
        assert lock2.acquire()
        assert not lock1.acquire()
        lock2.release()
Esempio n. 12
0
def test_dawg():
    from whoosh.support.dawg import DawgBuilder

    with TempStorage() as st:
        df = st.create_file("test.dawg")

        dw = DawgBuilder(field_root=True)
        dw.insert(["test"] + list("special"))
        dw.insert(["test"] + list("specials"))
        dw.write(df)

        assert_equal(list(dawg.flatten(dw.root.edge("test"))),
                     ["special", "specials"])
Esempio n. 13
0
def test_hash():
    with TempStorage("hash") as st:
        hwf = st.create_file("test.hsh")
        hw = HashWriter(hwf)
        hw.add(b("foo"), b("bar"))
        hw.add(b("glonk"), b("baz"))
        hw.close()

        hrf = st.open_file("test.hsh")
        hr = HashReader(hrf)
        assert_equal(hr.get(b("foo")), b("bar"))
        assert_equal(hr.get(b("baz")), None)
        hr.close()
Esempio n. 14
0
def test_stored_fields():
    with TempStorage("storedfields") as st:
        sf = st.create_file("test.sf")
        sfw = StoredFieldWriter(sf, ["a", "b"])
        sfw.append({"a": "hello", "b": "there"})
        sfw.append({"a": "one", "b": "two"})
        sfw.append({"a": "alfa", "b": "bravo"})
        sfw.close()

        sf = st.open_file("test.sf")
        sfr = StoredFieldReader(sf)
        assert_equal(sfr[0], {"a": "hello", "b": "there"})
        assert_equal(sfr[2], {"a": "alfa", "b": "bravo"})
        assert_equal(sfr[1], {"a": "one", "b": "two"})
        sfr.close()
Esempio n. 15
0
def _fst_roundtrip(domain, t):
    with TempStorage() as st:
        f = st.create_file("test")
        gw = dawg.GraphWriter(f, vtype=t)
        gw.start_field("_")
        for key, value in domain:
            gw.insert(key, value)
        gw.finish_field()
        gw.close()

        f = st.open_file("test")
        gr = dawg.GraphReader(f, vtype=t)
        cur = dawg.Cursor(gr)
        assert_equal(list(cur.flatten_v()), domain)
        f.close()
Esempio n. 16
0
def test_termkey():
    with TempStorage("termkey") as st:
        tw = TermIndexWriter(st.create_file("test.trm"))
        tw.add(("alfa", u("bravo")), FileTermInfo(1.0, 3))
        tw.add(("alfa", u('\xc3\xa6\xc3\xaf\xc5\ufffd\xc3\xba')),
               FileTermInfo(4.0, 6))
        tw.add(("text", u('\xe6\u2014\xa5\xe6\u0153\xac\xe8\xaa\u017e')),
               FileTermInfo(7.0, 9))
        tw.close()

        tr = TermIndexReader(st.open_file("test.trm"))
        assert ("alfa", u("bravo")) in tr
        assert ("alfa", u('\xc3\xa6\xc3\xaf\xc5\ufffd\xc3\xba')) in tr
        assert ("text", u('\xe6\u2014\xa5\xe6\u0153\xac\xe8\xaa\u017e')) in tr
        tr.close()
Esempio n. 17
0
def test_ordered_hash():
    times = 10000
    with TempStorage("orderedhash") as st:
        hwf = st.create_file("test.hsh")
        hw = HashWriter(hwf)
        hw.add_all((b("%08x" % x), b(str(x))) for x in xrange(times))
        hw.close()

        keys = list(range(times))
        random.shuffle(keys)
        hrf = st.open_file("test.hsh")
        hr = HashReader(hrf)
        for x in keys:
            assert_equal(hr[b("%08x" % x)], b(str(x)))
        hr.close()
Esempio n. 18
0
def test_readwrite():
    schema = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
    with TempStorage("threading") as st:
        domain = ("alfa", "bravo", "charlie", "delta", "echo", "foxtrot",
                  "golf", "hotel", "india", "juliet", "kilo", "lima", "mike",
                  "november", "oscar", "papa", "quebec", "romeo", "sierra",
                  "tango", "uniform", "victor", "whiskey", "xray", "yankee",
                  "zulu")

        class WriterThread(threading.Thread):
            def run(self):
                ix = st.create_index(dir, schema)
                num = 0

                for i in xrange(50):
                    print(i)
                    w = ix.writer()
                    for _ in xrange(random.randint(1, 100)):
                        content = u(" ").join(
                            random.sample(domain, random.randint(5, 20)))
                        w.add_document(id=text_type(num), content=content)
                        num += 1
                    w.commit()

                    time.sleep(0.1)

        class SearcherThread(threading.Thread):
            def run(self):
                print(self.name + " starting")
                for _ in xrange(10):
                    ix = st.open_index()
                    s = ix.searcher()
                    q = query.Term("content", random.choice(domain))
                    s.search(q, limit=10)
                    s.close()
                    ix.close()
                    time.sleep(0.1)
                print(self.name + " done")

        wt = WriterThread()
        wt.start()
        time.sleep(0.5)
        for _ in xrange(20):
            SearcherThread().start()
            time.sleep(0.5)
        wt.join()
Esempio n. 19
0
def test_readwrite():
    with TempStorage("readwrite") as st:
        format = Frequency()
        postings = make_postings()

        postfile = st.create_file("readwrite")
        fpw = FilePostingWriter(postfile, blocklimit=8)
        fpw.start(format)
        for id, freq in postings:
            fpw.write(id, float(freq), format.encode(freq), 0)
        fpw.finish()
        fpw.close()

        postfile = st.open_file("readwrite")
        fpr = FilePostingReader(postfile, 0, format)
        assert_equal(postings, list(fpr.items_as("frequency")))
        postfile.close()
Esempio n. 20
0
def roundtrip(postings, format, astype):
    with TempStorage("roundtrip") as st:
        postfile = st.create_file(astype)
        getweight = format.decoder("weight")
        fpw = FilePostingWriter(postfile, blocklimit=8)
        fpw.start(format)
        for id, value in postings:
            v = format.encode(value)
            fpw.write(id, getweight(v), v, 0)
        fpw.finish()
        fpw.close()

        postfile = st.open_file(astype)
        fpr = FilePostingReader(postfile, 0, format)
        readback = list(fpr.items_as(astype))
        postfile.close()
        return readback
Esempio n. 21
0
def test_fields():
    with TempStorage() as st:
        f = st.create_file("test")
        gw = dawg.GraphWriter(f)
        gw.start_field("f1")
        gw.insert("a")
        gw.insert("aa")
        gw.insert("ab")
        gw.finish_field()
        gw.start_field("f2")
        gw.insert("ba")
        gw.insert("baa")
        gw.insert("bab")
        gw.close()

        gr = dawg.GraphReader(st.open_file("test"))
        cur1 = dawg.Cursor(gr, gr.root("f1"))
        cur2 = dawg.Cursor(gr, gr.root("f2"))
        assert_equal(list(cur1.flatten_strings()), ["a", "aa", "ab"])
        assert_equal(list(cur2.flatten_strings()), ["ba", "baa", "bab"])
        gr.close()
Esempio n. 22
0
def _roundtrip(content, format_, astype, ana=None):
    with TempStorage("roundtrip") as st:
        codec = default_codec()
        seg = codec.new_segment(st, "")
        ana = ana or analysis.StandardAnalyzer()
        field = fields.FieldType(format=format_, analyzer=ana)

        fw = codec.field_writer(st, seg)
        fw.start_field("f1", field)
        for text, _, weight, valuestring in sorted(field.index(content)):
            fw.start_term(text)
            fw.add(0, weight, valuestring, None)
            fw.finish_term()
        fw.finish_field()
        fw.close()

        tr = codec.terms_reader(st, seg)
        ps = []
        for fieldname, text in tr.keys():
            m = tr.matcher(fieldname, text, format_)
            ps.append((text, m.value_as(astype)))
        tr.close()
        return ps
Esempio n. 23
0
def test_random():
    def randstring():
        length = random.randint(1, 10)
        a = array("B", (random.randint(0, 255) for _ in xrange(length)))
        return array_tobytes(a)

    keys = sorted(randstring() for _ in xrange(1000))

    with TempStorage() as st:
        gwrite(keys, st)
        gr = greader(st)
        cur = dawg.Cursor(gr)
        s1 = cur.flatten()
        s2 = sorted(set(keys))
        for i, (k1, k2) in enumerate(zip(s1, s2)):
            assert k1 == k2, "%s: %r != %r" % (i, k1, k2)

        sample = list(keys)
        random.shuffle(sample)
        for key in sample:
            cur.reset()
            cur.find_path(key)
            assert_equal(cur.prefix_bytes(), key)
        gr.close()
Esempio n. 24
0
def test_simple_compound_mmap():
    with TempStorage("compound") as st:
        assert st.supports_mmap
        _test_simple_compound(st)