def load( self, start: Optional[Seconds] = None, duration: Optional[Seconds] = None, ) -> np.ndarray: # noinspection PyArgumentList storage = get_reader(self.storage_type)(self.storage_path) left_offset_frames, right_offset_frames = 0, None if start is None: start = self.start # In case the caller requested only a sub-span of the features, trim them. # Left trim if start < self.start - 1e-5: raise ValueError(f"Cannot load features for recording {self.recording_id} starting from {start}s. " f"The available range is ({self.start}, {self.end}) seconds.") if not isclose(start, self.start): left_offset_frames = compute_num_frames(start - self.start, frame_shift=self.frame_shift, sampling_rate=self.sampling_rate) # Right trim end = start + duration if duration is not None else None if duration is not None and not isclose(end, self.end): right_offset_frames = left_offset_frames + compute_num_frames(duration, frame_shift=self.frame_shift, sampling_rate=self.sampling_rate) # Load and return the features (subset) from the storage return storage.read( self.storage_key, left_offset_frames=left_offset_frames, right_offset_frames=right_offset_frames )
def load( self, start: Optional[Seconds] = None, duration: Optional[Seconds] = None, ) -> np.ndarray: # noinspection PyArgumentList storage = get_reader(self.storage_type)(self.storage_path) left_offset_frames, right_offset_frames = 0, None if start is None: start = self.start # In case the caller requested only a sub-span of the features, trim them. # Left trim if start < self.start - 1e-5: raise ValueError(f"Cannot load features for recording {self.recording_id} starting from {start}s. " f"The available range is ({self.start}, {self.end}) seconds.") if not isclose(start, self.start): left_offset_frames = round((start - self.start) / self.frame_shift) # Right trim end = start + duration if duration is not None else None if duration is not None and not isclose(end, self.end): # Note the "minus" sign below before round - we're slicing a numpy array, e.g. a[20:-100] right_offset_frames = -round((self.end - end) / self.frame_shift) # When duration is specified and very close to the original duration, right_offset_frames can be zero; # the conditional below is a safe-guard against these cases. if right_offset_frames == 0: right_offset_frames = None # Load and return the features (subset) from the storage return storage.read( self.storage_key, left_offset_frames=left_offset_frames, right_offset_frames=right_offset_frames )
def load(self) -> np.ndarray: """ Load the array from the underlying storage. """ from lhotse.features.io import get_reader # noinspection PyArgumentList storage = get_reader(self.storage_type)(self.storage_path) # Load and return the array from the storage return storage.read(self.storage_key)
def load( self, start: Optional[Seconds] = None, duration: Optional[Seconds] = None, ) -> np.ndarray: """ Load the array from the underlying storage. Optionally perform a partial read along the ``temporal_dim``. :param start: when specified, we'll offset the read by ``start`` after converting it to a number of frames based on ``self.frame_shift``. :param duration: when specified, we'll limit the read to a number of frames equivalent to ``duration`` under ``self.frame_shift``. :return: A numpy array or a relevant slice of it. """ from lhotse.features.io import get_reader # noinspection PyArgumentList storage = get_reader(self.array.storage_type)(self.array.storage_path) left_offset_frames, right_offset_frames = 0, None if start is None: start = self.start # In case the caller requested only a sub-span of the features, trim them. # Left trim if start < self.start - 1e-5: raise ValueError( f"Cannot load array starting from {start}s. " f"The available range is ({self.start}, {self.end}) seconds.") if not isclose(start, self.start): left_offset_frames = seconds_to_frames( start - self.start, frame_shift=self.frame_shift, max_index=self.shape[self.temporal_dim], ) # Right trim if duration is not None: right_offset_frames = left_offset_frames + seconds_to_frames( duration, frame_shift=self.frame_shift, max_index=self.shape[self.temporal_dim], ) # Load and return the features (subset) from the storage return storage.read( self.array.storage_key, left_offset_frames=left_offset_frames, right_offset_frames=right_offset_frames, )
def test_chunky_writer_left_right_offsets_equal(writer_type, ext): # Generate small random numbers that are nicely compressed with lilcom arr = np.log(np.random.uniform(size=(11, 80)).astype(np.float32) / 100) with NamedTemporaryFile(suffix=ext) as f: with writer_type(f.name) as writer: key = writer.write("dummy-key", arr) f.flush() reader = get_reader(writer.name)(f.name) # Reading full array -- works as expected arr1 = reader.read(key) np.testing.assert_almost_equal(arr, arr1, decimal=1) # Reading an empty subset should return an empty array arr2 = reader.read(key, left_offset_frames=0, right_offset_frames=0) assert arr2.shape == (0,)