def test_uncommitted_read_on_closed(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') writer = Writer(path, compression='gzip') length = 1000 for index in range(length): writer.append_record(str(index).encode()) writer.close() reader = Reader(path, uncommitted_bucket_visible=True) assert reader.get_records_num() == length for index in range(length): assert index == int(reader.get(index).decode()) reader.close()
def test_read_write(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') writer = Writer(path, compression=None) length = 1000 for index in range(length): writer.append_record(str(index).encode()) writer.flush() reader = Reader(path, uncommitted_bucket_visible=False) assert reader.get_records_num() == 0 reader.close() writer.close()
def test_simple_int(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') writer = Writer(path, compression=None) for index in range(1000): writer.append_record(str(index).encode(), index={'subset': 'train', 'subtask': 'domain'}) for index in range(500): writer.append_record(str(index).encode(), index={'subset': 'val', 'subtask': 'domain'}) writer.close() writer = Writer(path, compression=None) for index in range(100): writer.append_record(str(index).encode(), index={'subset': 'train', 'subtask': 'domain'}) for index in range(100): writer.append_record(str(index).encode(), index={'subset': 'val', 'subtask': 'domain'}) writer.close() reader = Reader(path) for index in range(1000): assert index == int(reader.get(index, {'subset': 'train', 'subtask': 'domain'})) for index in range(1000, 1100): assert index - 1000 == int(reader.get(index, {'subset': 'train', 'subtask': 'domain'})) for index in range(500): assert index == int(reader.get(index, {'subset': 'val', 'subtask': 'domain'})) for index in range(500, 600): assert index - 500 == int(reader.get(index, {'subset': 'val', 'subtask': 'domain'})) reader.close()