예제 #1
0
    def load_audio(
        self,
        offset: Seconds = 0.0,
        duration: Optional[Seconds] = None,
    ) -> np.ndarray:
        """
        Load the AudioSource (from files, commands, or URLs) with soundfile,
        accounting for many audio formats and multi-channel inputs.
        Returns numpy array with shapes: (n_samples,) for single-channel,
        (n_channels, n_samples) for multi-channel.

        Note: The elements in the returned array are in the range [-1.0, 1.0]
        and are of dtype `np.floatt32`.
        """
        assert self.type in ('file', 'command', 'url')

        # TODO: refactor when another source type is added
        source = self.source

        if self.type == 'command':
            if offset != 0.0 or duration is not None:
                # TODO(pzelasko): How should we support chunking for commands?
                #                 We risk being very inefficient when reading many chunks from the same file
                #                 without some caching scheme, because we'll be re-running commands.
                warnings.warn(
                    'You requested a subset of a recording that is read from disk via a bash command. '
                    'Expect large I/O overhead if you are going to read many chunks like these, '
                    'since every time we will read the whole file rather than its subset.'
                )
            source = BytesIO(run(self.source, shell=True, stdout=PIPE).stdout)
            samples, sampling_rate = read_audio(source,
                                                offset=offset,
                                                duration=duration)

        elif self.type == 'url':
            if offset != 0.0 or duration is not None:
                # TODO(pzelasko): How should we support chunking for URLs?
                #                 We risk being very inefficient when reading many chunks from the same file
                #                 without some caching scheme, because we'll be re-running commands.
                warnings.warn(
                    'You requested a subset of a recording that is read from URL. '
                    'Expect large I/O overhead if you are going to read many chunks like these, '
                    'since every time we will download the whole file rather than its subset.'
                )
            with SmartOpen.open(self.source, 'rb') as f:
                source = BytesIO(f.read())
                samples, sampling_rate = read_audio(source,
                                                    offset=offset,
                                                    duration=duration)

        else:  # self.type == 'file'
            samples, sampling_rate = read_audio(
                source,
                offset=offset,
                duration=duration,
                force_audioread=extension_contains('.opus', Path(self.source)))

        # explicit sanity check for duration as soundfile does not complain here
        if duration is not None:
            num_samples = samples.shape[0] if len(
                samples.shape) == 1 else samples.shape[1]
            available_duration = num_samples / sampling_rate
            if available_duration < duration - 1e-3:  # set the allowance as 1ms to avoid float error
                raise ValueError(
                    f'Requested more audio ({duration}s) than available ({available_duration}s)'
                )

        return samples.astype(np.float32)
예제 #2
0
 def __init__(self, path: Pathlike) -> None:
     self.path = path
     self._len = None
     assert extension_contains(".jsonl", self.path)