Ejemplo n.º 1
0
    def test_audio_4(self):
        if self.data_dir is None:
            unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables")

        audio_table = AudioTable.load_audio_files(self.conn, self.data_dir+'listingFile.txt')
        if audio_table is not None:
            self.assertTrue(AudioTable.extract_audio_features(self.conn, audio_table) is not None)
        else:
            self.assertTrue(False)
Ejemplo n.º 2
0
    def transcribe(self,
                   audio_path,
                   max_path_size=100,
                   alpha=1.0,
                   beta=0.0,
                   gpu=None):
        """
        Transcribe the audio file into text.

        Notice that for this API, we are assuming that the speech-to-test models published by SAS Viya 3.4 will be used.
        Please download the acoustic and language model files from here:
        https://support.sas.com/documentation/prod-p/vdmml/zip/speech_19w21.zip

        Parameters
        ----------
        audio_path : string
            Specifies the location of the audio file (client-side, absolute/relative).
        max_path_size : int, optional
            Specifies the maximum number of paths kept as candidates of the final results during the decoding process.
            Default = 100
        alpha : double, optional
            Specifies the weight of the language model, relative to the acoustic model.
            Default = 1.0
        beta : double, optional
            Specifies the weight of the sentence length, relative to the acoustic model.
            Default = 0.0
        gpu : class : `dlpy.model.Gpu`, optional
            When specified, the action uses  Graphics Processing Unit hardware.
            The simplest way to use GPU processing is to specify "gpu=1". In this case, the default values of
            other GPU parameters are used.
            Setting gpu=1 enables all available GPU devices for use. Setting gpu=0 disables GPU processing.

        Returns
        -------
        string
            Transcribed text from audio file located at 'audio_path'.

        """

        # check if acoustic model is loaded
        if self.acoustic_model is None:
            raise DLPyError(
                "acoustic model not found. "
                "Please load the acoustic model with \"load_acoustic_model\" before calling \"transcribe\"."
            )

        # check if language model is loaded
        if self.language_model_caslib is None:
            raise DLPyError(
                "language model not found. "
                "Please load the language model with \"load_language_model\" before calling \"transcribe\"."
            )

        # step 1: preparation and segmentation
        listing_path_after_caslib, listing_path_local, segment_path_after_caslib_list, segment_path_local_list = \
            segment_audio(audio_path, self.local_path, self.data_path_after_caslib, 10, 16000, 2)
        segment_path_list = [
            self.data_caslib_path + segment_path_after_caslib
            for segment_path_after_caslib in segment_path_after_caslib_list
        ]

        # step 2: load audio
        try:
            audio_table = AudioTable.load_audio_files(
                self.conn,
                path=listing_path_after_caslib,
                caslib=self.data_caslib)
        except DLPyError as err:
            if "cannot load audio files, something is wrong!" in str(err):
                clean_audio(listing_path_local, segment_path_local_list)
                raise DLPyError(
                    "Error: Cannot load the audio files. "
                    "Please verify that \"data_path\" and \"local_path\" are pointing to the same position."
                )
            raise err

        # step 3: extract features
        feature_table = AudioTable.extract_audio_features(self.conn,
                                                          table=audio_table,
                                                          n_output_frames=3500,
                                                          copyvars=["_path_"])

        # step 4: score features
        self.acoustic_model.score(table=feature_table,
                                  model="asr",
                                  init_weights="asr_weights",
                                  copy_vars=["_path_"],
                                  gpu=gpu,
                                  casout=dict(name="score_table",
                                              replace=True))
        score_table = self.conn.CASTable(name="score_table")

        # step 5: decode scores
        rt = self.conn.retrieve("langModel.lmDecode",
                                _messagelevel='error',
                                table=score_table,
                                casout=dict(name="result_table", replace=True),
                                langModelTable=dict(
                                    name=self.language_model_name,
                                    caslib=self.language_model_caslib),
                                blankLabel=" ",
                                spaceLabel="&",
                                maxPathSize=max_path_size,
                                alpha=alpha,
                                beta=beta,
                                copyvars=["_path_"])
        if rt.severity > 1:
            for msg in rt.messages:
                print(msg)
            raise DLPyError("Failed to decode the scores.")
        result_table = self.conn.CASTable(name="result_table")

        # step 6: concatenate results
        result_dict = dict(
            zip(list(result_table["_path_"]),
                list(result_table["_audio_content_"])))
        result_list = [
            result_dict[segment_path] for segment_path in segment_path_list
        ]
        result_list = [result.strip() for result in result_list]
        result_list = [result for result in result_list if len(result) > 0]
        result = " ".join(result_list)

        # step 7: cleaning
        clean_audio(listing_path_local, segment_path_local_list)

        return result
Ejemplo n.º 3
0
 def test_audio_5(self):
     self.assertTrue(
         AudioTable.extract_audio_features(self.s, 'string') is None)