def test_gzip_compression(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression='gzip')
            length = 1000
            for i in range(length):
                writer.append_record(str(i).encode())
            writer.close()

            reader = Reader(path)
            assert reader.get_records_num() == length

            for index in range(length):
                assert index == int(reader.get(index).decode())
    def test_read_write(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)

            length = 1000
            for index in range(length):
                writer.append_record(str(index).encode())
                writer.flush()

                reader = Reader(path, uncommitted_bucket_visible=False)
                assert reader.get_records_num() == 0
                reader.close()

            writer.close()
Exemple #3
0
    def test_simple_binary(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)
            length = 5000
            for index in range(length):
                entry = (str(index) * index).encode()
                writer.append_record(entry)
            writer.close()

            reader = Reader(path)
            assert reader.get_records_num() == length

            for index in range(length):
                entry = (str(index) * index).encode()
                assert entry == reader.get(index)
Exemple #4
0
    def test_append_mode_binary(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            length = 1000
            chunks = 5
            chunk_len = length // chunks

            for chunk in range(chunks):
                writer = Writer(path, rewrite=False)
                for index in range(chunk * chunk_len, (chunk + 1) * chunk_len):
                    entry = str(index).encode()
                    writer.append_record(entry)
                writer.close()

            reader = Reader(path)
            assert reader.get_records_num() == length

            for index in range(length):
                entry = str(index).encode()
                assert entry == reader.get(index)
Exemple #5
0
    def test_write_mode_binary(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            length = 1000

            writer = Writer(path, rewrite=True)
            for index in range(length // 2):
                writer.append_record(b'0')
            writer.close()

            writer = Writer(path, rewrite=True)
            for index in range(length // 2, length):
                entry = str(index).encode()
                writer.append_record(entry)
            writer.close()

            reader = Reader(path)
            assert reader.get_records_num() == length // 2

            for index in range(length // 2, length):
                entry = str(index).encode()
                assert entry == reader.get(index - length // 2)
Exemple #6
0
    def test_simple_int(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)
            for index in range(1000):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'train',
                                            'subtask': 'domain'})
            for index in range(500):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'val',
                                            'subtask': 'domain'})
            writer.close()

            writer = Writer(path, compression=None)
            for index in range(100):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'train',
                                            'subtask': 'domain'})
            for index in range(100):
                writer.append_record(str(index).encode(),
                                     index={'subset': 'val',
                                            'subtask': 'domain'})
            writer.close()

            reader = Reader(path)
            for index in range(1000):
                assert index == int(reader.get(index, {'subset': 'train',
                                                       'subtask': 'domain'}))
            for index in range(1000, 1100):
                assert index - 1000 == int(reader.get(index,
                                                      {'subset': 'train',
                                                       'subtask': 'domain'}))
            for index in range(500):
                assert index == int(reader.get(index, {'subset': 'val',
                                                       'subtask': 'domain'}))
            for index in range(500, 600):
                assert index - 500 == int(reader.get(index,
                                                     {'subset': 'val',
                                                      'subtask': 'domain'}))
            reader.close()
    def test_dirty_read(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression='gzip')

            length = 1000
            for index in range(length):
                writer.append_record(str(index).encode())
                writer.flush()

                reader = Reader(path, uncommitted_bucket_visible=True)
                assert reader.get_records_num() == index + 1
                assert index == int(reader.get(index).decode())
                reader.close()

            writer.close()
    def test_modification_time(self):
        with tempfile.TemporaryDirectory() as temp_dir:
            path = os.path.join(temp_dir, 'loss')

            writer = Writer(path, compression=None)
            length = 100
            for index in range(length):
                writer.append_record(str(index).encode())
            writer.close()

            reader = Reader(path)
            first_mod_time = reader.get_modification_time()

            writer = Writer(path, compression=None)
            length = 100
            for index in range(length):
                writer.append_record(str(index).encode())
            writer.close()

            reader = Reader(path)
            second_mod_time = reader.get_modification_time()

            assert first_mod_time != second_mod_time