Example #1
0
    def test_audio_12(self):
        if self.data_dir is None:
            unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables")

        with self.assertRaises(DLPyError):
            AudioTable.load_audio_files(self.conn, self.data_dir.join('listingFile.txt'),
                                        caslib=self.conn.caslibinfo().CASLibInfo['Name'][0])
Example #2
0
    def test_audio_5(self):
        if self.data_dir is None:
            unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables")

        audio_table = AudioTable.load_audio_files(self.conn, self.data_dir+'listingFile.txt')
        fe = self.conn.fetch(audio_table)
        self.assertTrue('recording' in fe.Fetch.ix[0]['_path_'])
Example #3
0
    def test_audio_2(self):
        if self.data_dir is None:
            unittest.TestCase.skipTest(
                self, "DLPY_DATA_DIR is not set in the environment variables")

        self.assertTrue(
            AudioTable.load_audio_files(self.s, self.data_dir +
                                        'listingFile.txt') is not None)
Example #4
0
    def test_audio_11(self):
        if self.data_dir is None:
            unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables")

        print(self.data_dir+'listingFile.txt')

        self.assertTrue(AudioTable.load_audio_files(self.conn, self.data_dir+'listingFile.txt',
                                                    self.conn.CASTable('hebele')) is not None)
Example #5
0
    def test_audio_4(self):
        if self.data_dir is None:
            unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables")

        audio_table = AudioTable.load_audio_files(self.conn, self.data_dir+'listingFile.txt')
        if audio_table is not None:
            self.assertTrue(AudioTable.extract_audio_features(self.conn, audio_table) is not None)
        else:
            self.assertTrue(False)
Example #6
0
    def test_audio_local_audio_path(self):

        if self.local_dir is None:
            unittest.TestCase.skipTest(
                self,
                "DLPY_DATA_DIR_LOCAL is not set in the environment variables")

        if self.server_dir is None:
            unittest.TestCase.skipTest(
                self,
                "DLPY_DATA_DIR_SERVER is not set in the environment variables")

        try:
            import soundfile as sf
            import sounddevice as sd
        except (ModuleNotFoundError, ImportError):
            unittest.TestCase.skipTest(
                self,
                "skipping, soundfile and sounddevice packages are not installed"
            )

        local_audio_dir = os.path.join(self.local_dir, 'lang_id', 'train')
        server_audio_dir = self.server_dir + 'lang_id' + '/train'

        audio_table = AudioTable.load_audio_files(
            self.s,
            local_audio_path=local_audio_dir,
            server_audio_path=server_audio_dir)
        numrows = audio_table.numrows()
        print(numrows['numrows'])
        self.assertEqual(numrows['numrows'], 15)

        # test extracting features
        feature_table = audio_table.create_audio_feature_table()
        print(feature_table.summary())

        # test extracting features with labels
        feature_table = audio_table.create_audio_feature_table(label_level=-2)
        print(feature_table.freq(inputs='_label_'))
        print(feature_table.label_freq)
        print(feature_table.feature_vars)
        self.assertEqual(feature_table.feature_vars[0], '_f0_v0_')
Example #7
0
    def test_audio_local_audio_path_specgram_label_level(self):

        if self.local_dir is None:
            unittest.TestCase.skipTest(
                self,
                "DLPY_DATA_DIR_LOCAL is not set in the environment variables")

        if self.server_dir is None:
            unittest.TestCase.skipTest(
                self,
                "DLPY_DATA_DIR_SERVER is not set in the environment variables")

        try:
            import soundfile as sf
            import sounddevice as sd
        except (ModuleNotFoundError, ImportError):
            unittest.TestCase.skipTest(
                self,
                "skipping, soundfile and sounddevice packages are not installed"
            )

        local_audio_dir = os.path.join(self.local_dir, 'lang_id', 'train')
        server_audio_dir = self.server_dir + 'lang_id' + '/train'

        image_table = AudioTable.load_audio_files(
            self.s,
            local_audio_path=local_audio_dir,
            server_audio_path=server_audio_dir,
            as_specgram=True,
            label_level=-2)
        numrows = image_table.numrows()
        print(numrows['numrows'])
        self.assertEqual(numrows['numrows'], 15)

        print(image_table.columns)

        print(image_table.label_freq)

        self.assertEqual(image_table.label_freq['Frequency'][0], 5)

        image_table.show(id='_path_', ncol=2, nimages=2)
Example #8
0
    def transcribe(self,
                   audio_path,
                   max_path_size=100,
                   alpha=1.0,
                   beta=0.0,
                   gpu=None):
        """
        Transcribe the audio file into text.

        Notice that for this API, we are assuming that the speech-to-test models published by SAS Viya 3.4 will be used.
        Please download the acoustic and language model files from here:
        https://support.sas.com/documentation/prod-p/vdmml/zip/speech_19w21.zip

        Parameters
        ----------
        audio_path : string
            Specifies the location of the audio file (client-side, absolute/relative).
        max_path_size : int, optional
            Specifies the maximum number of paths kept as candidates of the final results during the decoding process.
            Default = 100
        alpha : double, optional
            Specifies the weight of the language model, relative to the acoustic model.
            Default = 1.0
        beta : double, optional
            Specifies the weight of the sentence length, relative to the acoustic model.
            Default = 0.0
        gpu : class : `dlpy.model.Gpu`, optional
            When specified, the action uses  Graphics Processing Unit hardware.
            The simplest way to use GPU processing is to specify "gpu=1". In this case, the default values of
            other GPU parameters are used.
            Setting gpu=1 enables all available GPU devices for use. Setting gpu=0 disables GPU processing.

        Returns
        -------
        string
            Transcribed text from audio file located at 'audio_path'.

        """

        # check if acoustic model is loaded
        if self.acoustic_model is None:
            raise DLPyError(
                "acoustic model not found. "
                "Please load the acoustic model with \"load_acoustic_model\" before calling \"transcribe\"."
            )

        # check if language model is loaded
        if self.language_model_caslib is None:
            raise DLPyError(
                "language model not found. "
                "Please load the language model with \"load_language_model\" before calling \"transcribe\"."
            )

        # step 1: preparation and segmentation
        listing_path_after_caslib, listing_path_local, segment_path_after_caslib_list, segment_path_local_list = \
            segment_audio(audio_path, self.local_path, self.data_path_after_caslib, 10, 16000, 2)
        segment_path_list = [
            self.data_caslib_path + segment_path_after_caslib
            for segment_path_after_caslib in segment_path_after_caslib_list
        ]

        # step 2: load audio
        try:
            audio_table = AudioTable.load_audio_files(
                self.conn,
                path=listing_path_after_caslib,
                caslib=self.data_caslib)
        except DLPyError as err:
            if "cannot load audio files, something is wrong!" in str(err):
                clean_audio(listing_path_local, segment_path_local_list)
                raise DLPyError(
                    "Error: Cannot load the audio files. "
                    "Please verify that \"data_path\" and \"local_path\" are pointing to the same position."
                )
            raise err

        # step 3: extract features
        feature_table = AudioTable.extract_audio_features(self.conn,
                                                          table=audio_table,
                                                          n_output_frames=3500,
                                                          copyvars=["_path_"])

        # step 4: score features
        self.acoustic_model.score(table=feature_table,
                                  model="asr",
                                  init_weights="asr_weights",
                                  copy_vars=["_path_"],
                                  gpu=gpu,
                                  casout=dict(name="score_table",
                                              replace=True))
        score_table = self.conn.CASTable(name="score_table")

        # step 5: decode scores
        rt = self.conn.retrieve("langModel.lmDecode",
                                _messagelevel='error',
                                table=score_table,
                                casout=dict(name="result_table", replace=True),
                                langModelTable=dict(
                                    name=self.language_model_name,
                                    caslib=self.language_model_caslib),
                                blankLabel=" ",
                                spaceLabel="&",
                                maxPathSize=max_path_size,
                                alpha=alpha,
                                beta=beta,
                                copyvars=["_path_"])
        if rt.severity > 1:
            for msg in rt.messages:
                print(msg)
            raise DLPyError("Failed to decode the scores.")
        result_table = self.conn.CASTable(name="result_table")

        # step 6: concatenate results
        result_dict = dict(
            zip(list(result_table["_path_"]),
                list(result_table["_audio_content_"])))
        result_list = [
            result_dict[segment_path] for segment_path in segment_path_list
        ]
        result_list = [result.strip() for result in result_list]
        result_list = [result for result in result_list if len(result) > 0]
        result = " ".join(result_list)

        # step 7: cleaning
        clean_audio(listing_path_local, segment_path_local_list)

        return result
Example #9
0
 def test_audio_1(self):
     self.assertTrue(AudioTable.load_audio_files(self.s, "/u/") is None)