예제 #1
0
    def __call__(self, inputs: np.array) -> Tuple[np.array, int, List[str]]:
        """
        Args:
            inputs (:obj:`np.array`):
                The raw waveform of audio received. By default sampled at `self.sampling_rate`.
                The shape of this array is `T`, where `T` is the time axis
        Return:
            A :obj:`tuple` containing:
              - :obj:`np.array`:
                 The return shape of the array must be `C'`x`T'`
              - a :obj:`int`: the sampling rate as an int in Hz.
              - a :obj:`List[str]`: the annotation for each out channel.
                    This can be the name of the instruments for audio source separation
                    or some annotation for speech enhancement. The length must be `C'`.
        """
        _inputs = torch.from_numpy(inputs).unsqueeze(0)
        sample = S2THubInterface.get_model_input(self.task, _inputs)
        text = S2THubInterface.get_prediction(self.task, self.model,
                                              self.generator, sample)

        if self.tts_model is None:
            return np.zeros((0, )), self.sampling_rate, [text]
        else:
            tts_sample = TTSHubInterface.get_model_input(self.tts_task, text)
            wav, sr = TTSHubInterface.get_prediction(self.tts_task,
                                                     self.tts_model,
                                                     self.tts_generator,
                                                     tts_sample)
            return wav.unsqueeze(0).numpy(), sr, [text]
예제 #2
0
 def from_pretrained(
         cls,
         model_name_or_path,
         checkpoint_file="model.pt",
         data_name_or_path=".",
         config_yaml="config.yaml",
         **kwargs,
 ):
     from fairseq import hub_utils
     x = hub_utils.from_pretrained(
         model_name_or_path,
         checkpoint_file,
         data_name_or_path,
         archive_map=cls.hub_models(),
         config_yaml=config_yaml,
         **kwargs,
     )
     return S2THubInterface(x["args"], x["task"], x["models"][0])