Exemple #1
0
    def load(
            self,
            start: Optional[Seconds] = None,
            duration: Optional[Seconds] = None,
    ) -> np.ndarray:
        # noinspection PyArgumentList
        storage = get_reader(self.storage_type)(self.storage_path)
        left_offset_frames, right_offset_frames = 0, None

        if start is None:
            start = self.start
        # In case the caller requested only a sub-span of the features, trim them.
        # Left trim
        if start < self.start - 1e-5:
            raise ValueError(f"Cannot load features for recording {self.recording_id} starting from {start}s. "
                             f"The available range is ({self.start}, {self.end}) seconds.")
        if not isclose(start, self.start):
            left_offset_frames = compute_num_frames(start - self.start, frame_shift=self.frame_shift,
                                                    sampling_rate=self.sampling_rate)
        # Right trim
        end = start + duration if duration is not None else None
        if duration is not None and not isclose(end, self.end):
            right_offset_frames = left_offset_frames + compute_num_frames(duration, frame_shift=self.frame_shift,
                                                                          sampling_rate=self.sampling_rate)

        # Load and return the features (subset) from the storage
        return storage.read(
            self.storage_key,
            left_offset_frames=left_offset_frames,
            right_offset_frames=right_offset_frames
        )
Exemple #2
0
    def load(
            self,
            start: Optional[Seconds] = None,
            duration: Optional[Seconds] = None,
    ) -> np.ndarray:
        # noinspection PyArgumentList
        storage = get_reader(self.storage_type)(self.storage_path)
        left_offset_frames, right_offset_frames = 0, None

        if start is None:
            start = self.start
        # In case the caller requested only a sub-span of the features, trim them.
        # Left trim
        if start < self.start - 1e-5:
            raise ValueError(f"Cannot load features for recording {self.recording_id} starting from {start}s. "
                             f"The available range is ({self.start}, {self.end}) seconds.")
        if not isclose(start, self.start):
            left_offset_frames = round((start - self.start) / self.frame_shift)

        # Right trim
        end = start + duration if duration is not None else None
        if duration is not None and not isclose(end, self.end):
            # Note the "minus" sign below before round - we're slicing a numpy array, e.g. a[20:-100]
            right_offset_frames = -round((self.end - end) / self.frame_shift)
            # When duration is specified and very close to the original duration, right_offset_frames can be zero;
            # the conditional below is a safe-guard against these cases.
            if right_offset_frames == 0:
                right_offset_frames = None

        # Load and return the features (subset) from the storage
        return storage.read(
            self.storage_key,
            left_offset_frames=left_offset_frames,
            right_offset_frames=right_offset_frames
        )
Exemple #3
0
    def load(self) -> np.ndarray:
        """
        Load the array from the underlying storage.
        """
        from lhotse.features.io import get_reader

        # noinspection PyArgumentList
        storage = get_reader(self.storage_type)(self.storage_path)
        # Load and return the array from the storage
        return storage.read(self.storage_key)
Exemple #4
0
    def load(
        self,
        start: Optional[Seconds] = None,
        duration: Optional[Seconds] = None,
    ) -> np.ndarray:
        """
        Load the array from the underlying storage.
        Optionally perform a partial read along the ``temporal_dim``.

        :param start: when specified, we'll offset the read by ``start`` after
            converting it to a number of frames based on ``self.frame_shift``.
        :param duration: when specified, we'll limit the read to a number of
            frames equivalent to ``duration`` under ``self.frame_shift``.
        :return: A numpy array or a relevant slice of it.
        """
        from lhotse.features.io import get_reader

        # noinspection PyArgumentList
        storage = get_reader(self.array.storage_type)(self.array.storage_path)
        left_offset_frames, right_offset_frames = 0, None

        if start is None:
            start = self.start
        # In case the caller requested only a sub-span of the features, trim them.
        # Left trim
        if start < self.start - 1e-5:
            raise ValueError(
                f"Cannot load array starting from {start}s. "
                f"The available range is ({self.start}, {self.end}) seconds.")
        if not isclose(start, self.start):
            left_offset_frames = seconds_to_frames(
                start - self.start,
                frame_shift=self.frame_shift,
                max_index=self.shape[self.temporal_dim],
            )
        # Right trim
        if duration is not None:
            right_offset_frames = left_offset_frames + seconds_to_frames(
                duration,
                frame_shift=self.frame_shift,
                max_index=self.shape[self.temporal_dim],
            )

        # Load and return the features (subset) from the storage
        return storage.read(
            self.array.storage_key,
            left_offset_frames=left_offset_frames,
            right_offset_frames=right_offset_frames,
        )
Exemple #5
0
def test_chunky_writer_left_right_offsets_equal(writer_type, ext):
    # Generate small random numbers that are nicely compressed with lilcom
    arr = np.log(np.random.uniform(size=(11, 80)).astype(np.float32) / 100)

    with NamedTemporaryFile(suffix=ext) as f:

        with writer_type(f.name) as writer:
            key = writer.write("dummy-key", arr)

        f.flush()
        reader = get_reader(writer.name)(f.name)

        # Reading full array -- works as expected
        arr1 = reader.read(key)
        np.testing.assert_almost_equal(arr, arr1, decimal=1)

        # Reading an empty subset should return an empty array
        arr2 = reader.read(key, left_offset_frames=0, right_offset_frames=0)
        assert arr2.shape == (0,)