Esempio n. 1
0
    def write(self, key: str, value: np.ndarray) -> str:
        import h5py
        from lhotse.features.compression import lilcom_compress_chunked

        serialized_feats = lilcom_compress_chunked(value,
                                                   tick_power=self.tick_power,
                                                   chunk_size=self.chunk_size)
        dset = self.hdf.create_dataset(key,
                                       dtype=h5py.vlen_dtype(
                                           np.dtype('uint8')),
                                       shape=(len(serialized_feats), ))
        for idx, feat in enumerate(serialized_feats):
            dset[idx] = np.frombuffer(feat, dtype=np.uint8)
        return key
Esempio n. 2
0
    def write(self, key: str, value: np.ndarray) -> str:
        from lhotse.features.compression import lilcom_compress_chunked

        serialized_feats = lilcom_compress_chunked(value,
                                                   tick_power=self.tick_power,
                                                   chunk_size=self.CHUNK_SIZE)
        offsets = [self.curr_offset]
        for idx, feat in enumerate(serialized_feats):
            nbytes = self.file.write(feat)
            offsets.append(nbytes)
            self.curr_offset += nbytes

        # Returns keys like: "14601,31,23,42".
        # The first number is the offset for the whole array,
        # and the following numbers are relative offsets for each chunk.
        # These offsets are relative to the previous chunk start.
        return ",".join(map(str, offsets))