def test_audio_12(self): if self.data_dir is None: unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables") with self.assertRaises(DLPyError): AudioTable.load_audio_files(self.conn, self.data_dir.join('listingFile.txt'), caslib=self.conn.caslibinfo().CASLibInfo['Name'][0])
def test_audio_5(self): if self.data_dir is None: unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables") audio_table = AudioTable.load_audio_files(self.conn, self.data_dir+'listingFile.txt') fe = self.conn.fetch(audio_table) self.assertTrue('recording' in fe.Fetch.ix[0]['_path_'])
def test_audio_2(self): if self.data_dir is None: unittest.TestCase.skipTest( self, "DLPY_DATA_DIR is not set in the environment variables") self.assertTrue( AudioTable.load_audio_files(self.s, self.data_dir + 'listingFile.txt') is not None)
def test_audio_11(self): if self.data_dir is None: unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables") print(self.data_dir+'listingFile.txt') self.assertTrue(AudioTable.load_audio_files(self.conn, self.data_dir+'listingFile.txt', self.conn.CASTable('hebele')) is not None)
def test_audio_4(self): if self.data_dir is None: unittest.TestCase.skipTest(self, "DLPY_DATA_DIR is not set in the environment variables") audio_table = AudioTable.load_audio_files(self.conn, self.data_dir+'listingFile.txt') if audio_table is not None: self.assertTrue(AudioTable.extract_audio_features(self.conn, audio_table) is not None) else: self.assertTrue(False)
def test_audio_local_audio_path(self): if self.local_dir is None: unittest.TestCase.skipTest( self, "DLPY_DATA_DIR_LOCAL is not set in the environment variables") if self.server_dir is None: unittest.TestCase.skipTest( self, "DLPY_DATA_DIR_SERVER is not set in the environment variables") try: import soundfile as sf import sounddevice as sd except (ModuleNotFoundError, ImportError): unittest.TestCase.skipTest( self, "skipping, soundfile and sounddevice packages are not installed" ) local_audio_dir = os.path.join(self.local_dir, 'lang_id', 'train') server_audio_dir = self.server_dir + 'lang_id' + '/train' audio_table = AudioTable.load_audio_files( self.s, local_audio_path=local_audio_dir, server_audio_path=server_audio_dir) numrows = audio_table.numrows() print(numrows['numrows']) self.assertEqual(numrows['numrows'], 15) # test extracting features feature_table = audio_table.create_audio_feature_table() print(feature_table.summary()) # test extracting features with labels feature_table = audio_table.create_audio_feature_table(label_level=-2) print(feature_table.freq(inputs='_label_')) print(feature_table.label_freq) print(feature_table.feature_vars) self.assertEqual(feature_table.feature_vars[0], '_f0_v0_')
def test_audio_local_audio_path_specgram_label_level(self): if self.local_dir is None: unittest.TestCase.skipTest( self, "DLPY_DATA_DIR_LOCAL is not set in the environment variables") if self.server_dir is None: unittest.TestCase.skipTest( self, "DLPY_DATA_DIR_SERVER is not set in the environment variables") try: import soundfile as sf import sounddevice as sd except (ModuleNotFoundError, ImportError): unittest.TestCase.skipTest( self, "skipping, soundfile and sounddevice packages are not installed" ) local_audio_dir = os.path.join(self.local_dir, 'lang_id', 'train') server_audio_dir = self.server_dir + 'lang_id' + '/train' image_table = AudioTable.load_audio_files( self.s, local_audio_path=local_audio_dir, server_audio_path=server_audio_dir, as_specgram=True, label_level=-2) numrows = image_table.numrows() print(numrows['numrows']) self.assertEqual(numrows['numrows'], 15) print(image_table.columns) print(image_table.label_freq) self.assertEqual(image_table.label_freq['Frequency'][0], 5) image_table.show(id='_path_', ncol=2, nimages=2)
def transcribe(self, audio_path, max_path_size=100, alpha=1.0, beta=0.0, gpu=None): """ Transcribe the audio file into text. Notice that for this API, we are assuming that the speech-to-test models published by SAS Viya 3.4 will be used. Please download the acoustic and language model files from here: https://support.sas.com/documentation/prod-p/vdmml/zip/speech_19w21.zip Parameters ---------- audio_path : string Specifies the location of the audio file (client-side, absolute/relative). max_path_size : int, optional Specifies the maximum number of paths kept as candidates of the final results during the decoding process. Default = 100 alpha : double, optional Specifies the weight of the language model, relative to the acoustic model. Default = 1.0 beta : double, optional Specifies the weight of the sentence length, relative to the acoustic model. Default = 0.0 gpu : class : `dlpy.model.Gpu`, optional When specified, the action uses Graphics Processing Unit hardware. The simplest way to use GPU processing is to specify "gpu=1". In this case, the default values of other GPU parameters are used. Setting gpu=1 enables all available GPU devices for use. Setting gpu=0 disables GPU processing. Returns ------- string Transcribed text from audio file located at 'audio_path'. """ # check if acoustic model is loaded if self.acoustic_model is None: raise DLPyError( "acoustic model not found. " "Please load the acoustic model with \"load_acoustic_model\" before calling \"transcribe\"." ) # check if language model is loaded if self.language_model_caslib is None: raise DLPyError( "language model not found. " "Please load the language model with \"load_language_model\" before calling \"transcribe\"." ) # step 1: preparation and segmentation listing_path_after_caslib, listing_path_local, segment_path_after_caslib_list, segment_path_local_list = \ segment_audio(audio_path, self.local_path, self.data_path_after_caslib, 10, 16000, 2) segment_path_list = [ self.data_caslib_path + segment_path_after_caslib for segment_path_after_caslib in segment_path_after_caslib_list ] # step 2: load audio try: audio_table = AudioTable.load_audio_files( self.conn, path=listing_path_after_caslib, caslib=self.data_caslib) except DLPyError as err: if "cannot load audio files, something is wrong!" in str(err): clean_audio(listing_path_local, segment_path_local_list) raise DLPyError( "Error: Cannot load the audio files. " "Please verify that \"data_path\" and \"local_path\" are pointing to the same position." ) raise err # step 3: extract features feature_table = AudioTable.extract_audio_features(self.conn, table=audio_table, n_output_frames=3500, copyvars=["_path_"]) # step 4: score features self.acoustic_model.score(table=feature_table, model="asr", init_weights="asr_weights", copy_vars=["_path_"], gpu=gpu, casout=dict(name="score_table", replace=True)) score_table = self.conn.CASTable(name="score_table") # step 5: decode scores rt = self.conn.retrieve("langModel.lmDecode", _messagelevel='error', table=score_table, casout=dict(name="result_table", replace=True), langModelTable=dict( name=self.language_model_name, caslib=self.language_model_caslib), blankLabel=" ", spaceLabel="&", maxPathSize=max_path_size, alpha=alpha, beta=beta, copyvars=["_path_"]) if rt.severity > 1: for msg in rt.messages: print(msg) raise DLPyError("Failed to decode the scores.") result_table = self.conn.CASTable(name="result_table") # step 6: concatenate results result_dict = dict( zip(list(result_table["_path_"]), list(result_table["_audio_content_"]))) result_list = [ result_dict[segment_path] for segment_path in segment_path_list ] result_list = [result.strip() for result in result_list] result_list = [result for result in result_list if len(result) > 0] result = " ".join(result_list) # step 7: cleaning clean_audio(listing_path_local, segment_path_local_list) return result
def test_audio_1(self): self.assertTrue(AudioTable.load_audio_files(self.s, "/u/") is None)