def _rt(c, values, default):
    # Continuous
    st = RamStorage()
    f = st.create_file("test1")
    f.write(b("hello"))
    w = c.writer(f)
    for docnum, v in enumerate(values):
        w.add(docnum, v)
    w.finish(len(values))
    length = f.tell() - 5
    f.close()

    f = st.open_file("test1")
    r = c.reader(f, 5, length, len(values))
    assert values == list(r)
    for x in range(len(values)):
        assert values[x] == r[x]
    f.close()

    # Sparse
    doccount = len(values) * 7 + 15
    target = [default] * doccount

    f = st.create_file("test2")
    f.write(b("hello"))
    w = c.writer(f)
    for docnum, v in izip(xrange(10, doccount, 7), values):
        target[docnum] = v
        w.add(docnum, v)
    w.finish(doccount)
    length = f.tell() - 5
    f.close()

    f = st.open_file("test2")
    r = c.reader(f, 5, length, doccount)
    assert target == list(r)
    for x in range(doccount):
        assert target[x] == r[x]

    lr = r.load()
    assert target == list(lr)
    f.close()
Exemple #2
0
 def __eq__(self, other):
     for a, b in izip(self, other):
         if a != b:
             return False
     return True
Exemple #3
0
 def key_to_name(self, key):
     return tuple(catter.key_to_name(keypart)
                  for catter, keypart
                  in izip(self.catters, key))
 def __iter__(self):
     for idset, offset in izip(self.idsets, self.offsets):
         for docnum in idset:
             yield docnum + offset
Exemple #5
0
 def all_doc_ids(self):
     for r, offset in izip(self._readers, self._doc_offsets):
         for docnum in r.all_doc_ids():
             yield docnum + offset
Exemple #6
0
 def deleted_docs(self):
     for r, offset in izip(self._readers, self._doc_offsets):
         for docnum in r.deleted_docs():
             yield docnum + offset
Exemple #7
0
    def all_items(self):
        values = self._values
        if values is None:
            values = repeat('')

        return izip(self._ids, values)
Exemple #8
0
def _do_basic(writerclass):
    # Create the domain data

    # List of individual words added to the index
    words = []
    # List of string values added to the index
    docs = []
    # A ring buffer for creating string values
    buf = deque()
    for ls in permutations(u("abcd")):
        word = "".join(ls)
        # Remember this word is in the index (to check lexicon)
        words.append(word)

        # Add this word on to the end, pop the first word off to create N word
        # documents where N <= 10
        buf.append(word)
        if len(buf) > 10:
            buf.popleft()
        # Create a copy of the buffer and shuffle it to create a document value
        # and add it to the list of document values
        doc = list(buf)
        random.shuffle(doc)
        docs.append(" ".join(doc))
    # Shuffle the list of document values
    random.shuffle(docs)

    schema = fields.Schema(text=fields.TEXT(stored=True, spelling=True,
                                            vector=True),
                           row=fields.NUMERIC(stored=True))

    with TempIndex(schema, storage_debug=True) as ix:
        # Add the domain data to the index
        with writerclass(ix, procs=3) as w:
            for i, value in enumerate(docs):
                w.add_document(text=value, row=i)

        with ix.searcher() as s:
            r = s.reader()

            # Check the lexicon
            for word, term in izip(words, r.field_terms("text")):
                assert word == term
            # Check the doc count
            assert r.doc_count_all() == len(docs)

            # Check the word graph
            assert r.has_word_graph("text")
            flat = [w.decode("latin1") for w in r.word_graph("text").flatten()]
            assert flat == words

            # Check there are lengths
            total = sum(r.doc_field_length(docnum, "text", 0)
                        for docnum in xrange(r.doc_count_all()))
            assert total > 0

            # Check per-doc info
            for i, value in enumerate(docs):
                pieces = value.split()
                docnum = s.document_number(row=i)

                # Check stored value
                sv = r.stored_fields(docnum)
                assert sv["text"] == value

                # Check vectors
                vr = r.vector(docnum, "text")
                # Get the terms and positions from the vector matcher
                iv = list(vr.items_as("positions"))
                # What the vector should look like
                ov = sorted((text, [i]) for i, text in enumerate(pieces))
                assert iv == ov

                # Check field length
                assert r.doc_field_length(docnum, "text") == len(pieces)