def test_uncommitted_read_on_closed(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression='gzip')
            length = 1000
            for index in range(length):
                writer.append_record(str(index).encode())
            writer.close()

            reader = Reader(path, uncommitted_bucket_visible=True)
            assert reader.get_records_num() == length
            for index in range(length):
                assert index == int(reader.get(index).decode())
            reader.close()
    def test_read_write(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)

            length = 1000
            for index in range(length):
                writer.append_record(str(index).encode())
                writer.flush()

                reader = Reader(path, uncommitted_bucket_visible=False)
                assert reader.get_records_num() == 0
                reader.close()

            writer.close()
Exemple #3
0
    def test_simple_int(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)
            for index in range(1000):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'train',
                                            'subtask': 'domain'})
            for index in range(500):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'val',
                                            'subtask': 'domain'})
            writer.close()

            writer = Writer(path, compression=None)
            for index in range(100):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'train',
                                            'subtask': 'domain'})
            for index in range(100):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'val',
                                            'subtask': 'domain'})
            writer.close()

            reader = Reader(path)
            for index in range(1000):
                assert index == int(reader.get(index, {'subset': 'train',
                                                       'subtask': 'domain'}))
            for index in range(1000, 1100):
                assert index - 1000 == int(reader.get(index,
                                                      {'subset': 'train',
                                                       'subtask': 'domain'}))
            for index in range(500):
                assert index == int(reader.get(index, {'subset': 'val',
                                                       'subtask': 'domain'}))
            for index in range(500, 600):
                assert index - 500 == int(reader.get(index,
                                                     {'subset': 'val',
                                                      'subtask': 'domain'}))
            reader.close()