def test_random_multistream(): letters = "abcdefghijklmnopqrstuvwxyz" def randstring(n): s = "".join(random.choice(letters) for _ in xrange(n)) return s.encode("latin1") domain = {} for _ in xrange(100): name = randstring(random.randint(5, 10)) value = randstring(2500) domain[name] = value outfiles = dict((name, BytesIO(value)) for name, value in domain.items()) with TempStorage() as st: msw = compound.CompoundWriter(st, buffersize=1024) mfiles = {} for name in domain: mfiles[name] = msw.create_file(name) while outfiles: name = random.choice(list(outfiles.keys())) v = outfiles[name].read(1000) mfiles[name].write(v) if len(v) < 1000: del outfiles[name] f = st.create_file("test") msw.save_as_compound(f) f = st.open_file("test") msr = compound.CompoundStorage(f) for name, value in domain.items(): assert msr.open_file(name).read() == value msr.close()
def test_multistream(): domain = [("a", "12345"), ("b", "abc"), ("c", "AaBbC"), ("a", "678"), ("c", "cDdEeF"), ("b", "defgh"), ("b", "ijk"), ("c", "fGgHh"), ("a", "9abc")] st = RamStorage() msw = compound.CompoundWriter(st) files = dict((name, msw.create_file(name)) for name in "abc") for name, data in domain: files[name].write(b(data)) f = st.create_file("test") msw.save_as_compound(f) f = st.open_file("test") msr = compound.CompoundStorage(f) assert msr.open_file("a").read() == b("123456789abc") assert msr.open_file("b").read() == b("abcdefghijk") assert msr.open_file("c").read() == b("AaBbCcDdEeFfGgHh")
def __init__(self, codec, storage, segment): self._codec = codec self._storage = storage self._segment = segment tempst = storage.temp_storage("%s.tmp" % segment.indexname) self._cols = compound.CompoundWriter(tempst) self._colwriters = {} self._create_column("_stored", STORED_COLUMN) self._fieldlengths = defaultdict(int) self._doccount = 0 self._docnum = None self._storedfields = None self._indoc = False self.is_closed = False # We'll wait to create the vector file until someone actually tries # to add a vector self._vpostfile = None