예제 #1
0
파일: io.py 프로젝트: lhotse-speech/lhotse
 def read(self,
          key: str,
          left_offset_frames: int = 0,
          right_offset_frames: Optional[int] = None) -> np.ndarray:
     # We are manually adding the slash to join the base URL and the key.
     if key.startswith('/'):
         key = key[1:]
     with SmartOpen.open(f'{self.base_url}/{key}', 'rb') as f:
         arr = lilcom.decompress(f.read())
     return arr[left_offset_frames:right_offset_frames]
예제 #2
0
파일: io.py 프로젝트: desh2608/lhotse
 def write(self, key: str, value: np.ndarray) -> str:
     # We are manually adding the slash to join the base URL and the key.
     if key.startswith("/"):
         key = key[1:]
     # Add lilcom extension.
     if not key.endswith(".llc"):
         key = key + ".llc"
     output_features_url = f"{self.base_url}/{key}"
     serialized_feats = lilcom.compress(value, tick_power=self.tick_power)
     with SmartOpen.open(output_features_url, "wb") as f:
         f.write(serialized_feats)
     return key
예제 #3
0
    def load_audio(
        self,
        offset: Seconds = 0.0,
        duration: Optional[Seconds] = None,
    ) -> np.ndarray:
        """
        Load the AudioSource (from files, commands, or URLs) with soundfile,
        accounting for many audio formats and multi-channel inputs.
        Returns numpy array with shapes: (n_samples,) for single-channel,
        (n_channels, n_samples) for multi-channel.

        Note: The elements in the returned array are in the range [-1.0, 1.0]
        and are of dtype `np.floatt32`.
        """
        assert self.type in ('file', 'command', 'url')

        # TODO: refactor when another source type is added
        source = self.source

        if self.type == 'command':
            if offset != 0.0 or duration is not None:
                # TODO(pzelasko): How should we support chunking for commands?
                #                 We risk being very inefficient when reading many chunks from the same file
                #                 without some caching scheme, because we'll be re-running commands.
                warnings.warn(
                    'You requested a subset of a recording that is read from disk via a bash command. '
                    'Expect large I/O overhead if you are going to read many chunks like these, '
                    'since every time we will read the whole file rather than its subset.'
                )
            source = BytesIO(run(self.source, shell=True, stdout=PIPE).stdout)
            samples, sampling_rate = read_audio(source,
                                                offset=offset,
                                                duration=duration)

        elif self.type == 'url':
            if offset != 0.0 or duration is not None:
                # TODO(pzelasko): How should we support chunking for URLs?
                #                 We risk being very inefficient when reading many chunks from the same file
                #                 without some caching scheme, because we'll be re-running commands.
                warnings.warn(
                    'You requested a subset of a recording that is read from URL. '
                    'Expect large I/O overhead if you are going to read many chunks like these, '
                    'since every time we will download the whole file rather than its subset.'
                )
            with SmartOpen.open(self.source, 'rb') as f:
                source = BytesIO(f.read())
                samples, sampling_rate = read_audio(source,
                                                    offset=offset,
                                                    duration=duration)

        else:  # self.type == 'file'
            samples, sampling_rate = read_audio(
                source,
                offset=offset,
                duration=duration,
                force_audioread=extension_contains('.opus', Path(self.source)))

        # explicit sanity check for duration as soundfile does not complain here
        if duration is not None:
            num_samples = samples.shape[0] if len(
                samples.shape) == 1 else samples.shape[1]
            available_duration = num_samples / sampling_rate
            if available_duration < duration - 1e-3:  # set the allowance as 1ms to avoid float error
                raise ValueError(
                    f'Requested more audio ({duration}s) than available ({available_duration}s)'
                )

        return samples.astype(np.float32)