Exemple #1
0
    def test_simple_int(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)
            for index in range(1000):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'train',
                                            'subtask': 'domain'})
            for index in range(500):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'val',
                                            'subtask': 'domain'})
            writer.close()

            writer = Writer(path, compression=None)
            for index in range(100):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'train',
                                            'subtask': 'domain'})
            for index in range(100):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'val',
                                            'subtask': 'domain'})
            writer.close()

            reader = Reader(path)
            for index in range(1000):
                assert index == int(reader.get(index, {'subset': 'train',
                                                       'subtask': 'domain'}))
            for index in range(1000, 1100):
                assert index - 1000 == int(reader.get(index,
                                                      {'subset': 'train',
                                                       'subtask': 'domain'}))
            for index in range(500):
                assert index == int(reader.get(index, {'subset': 'val',
                                                       'subtask': 'domain'}))
            for index in range(500, 600):
                assert index - 500 == int(reader.get(index,
                                                     {'subset': 'val',
                                                      'subtask': 'domain'}))
            reader.close()
    def test_uncommitted_read_on_closed(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression='gzip')
            length = 1000
            for index in range(length):
                writer.append_record(str(index).encode())
            writer.close()

            reader = Reader(path, uncommitted_bucket_visible=True)
            assert reader.get_records_num() == length
            for index in range(length):
                assert index == int(reader.get(index).decode())
            reader.close()
Exemple #3
0
    def test_simple_int(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)
            length = 1000
            for index in range(length):
                writer.append_record(str(index).encode())
            writer.close()

            reader = Reader(path)
            assert reader.get_records_num() == length

            for index in range(length):
                assert index == int(reader.get(index).decode())
Exemple #4
0
    def test_simple_binary(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)
            length = 5000
            for index in range(length):
                entry = (str(index) * index).encode()
                writer.append_record(entry)
            writer.close()

            reader = Reader(path)
            assert reader.get_records_num() == length

            for index in range(length):
                entry = (str(index) * index).encode()
                assert entry == reader.get(index)
Exemple #5
0
    def test_append_mode_binary(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            length = 1000
            chunks = 5
            chunk_len = length // chunks

            for chunk in range(chunks):
                writer = Writer(path, rewrite=False)
                for index in range(chunk * chunk_len, (chunk + 1) * chunk_len):
                    entry = str(index).encode()
                    writer.append_record(entry)
                writer.close()

            reader = Reader(path)
            assert reader.get_records_num() == length

            for index in range(length):
                entry = str(index).encode()
                assert entry == reader.get(index)
Exemple #6
0
    def test_write_mode_binary(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            length = 1000

            writer = Writer(path, rewrite=True)
            for index in range(length // 2):
                writer.append_record(b'0')
            writer.close()

            writer = Writer(path, rewrite=True)
            for index in range(length // 2, length):
                entry = str(index).encode()
                writer.append_record(entry)
            writer.close()

            reader = Reader(path)
            assert reader.get_records_num() == length // 2

            for index in range(length // 2, length):
                entry = str(index).encode()
                assert entry == reader.get(index - length // 2)