def show_open_dialog(self): self.audioFile = QtGui.QFileDialog.getOpenFileName( self, 'Open audio file', '', "Audio Files (*.wav)", None, QtGui.QFileDialog.DontUseNativeDialog) if self.audioFile != "": self.featuresTbl.setRowCount(0) self.featuresTbl.setColumnCount(0) self.audioClassInput.setText("") fileName = str(self.audioFile) self.audio_signal, self.audio_fs = FileReader.read_audio(fileName) self.silenced_signal, self.audio_fs = self.mfcc.remove_silence( fileName) self.fsValLbl.setText(": " + str(self.audio_fs) + " Hz") self.sampleValLbl.setText(": " + str(len(self.audio_signal)) + " | " + str(len(self.silenced_signal)) + " (silenced)") self.audioFilenameLbl.setText(": " + fileName[fileName.rfind('/') + 1:len(fileName)]) self.audioClassInput.setText(FileReader.get_output_class(fileName)) self.audioPlayBtn.setDisabled(False) self.extractSaveBtn.setDisabled(False) self.player.set_audio_source(self.audioFile)
def show_open_dialog(self): self.audioFile = QtGui.QFileDialog.getOpenFileName(self, 'Open audio file', '', "Audio Files (*.wav)", None, QtGui.QFileDialog.DontUseNativeDialog) if self.audioFile != "": fileName = str(self.audioFile) self.audio_signal, self.audio_fs = FileReader.read_audio(fileName) self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(fileName) self.fsValLbl.setText(": " + str(self.audio_fs) + " Hz") self.sampleValLbl.setText( ": " + str(len(self.audio_signal)) + " | " + str(len(self.silenced_signal)) + " (silenced)") self.audioFilenameLbl.setText(": " + fileName[fileName.rfind('/') + 1:len(fileName)]) self.audioPlayBtn.setDisabled(False) self.clear_all_layout() fig = Figure() self.origSignalPlot = fig.add_subplot(111) self.origSignalPlot.plot(self.audio_signal) self.add_figure(fig, self.originalPlotLyt) self.extractSaveBtn.setDisabled(False) self.player.set_audio_source(self.audioFile) self.testDataTab.setCurrentIndex(0)
def run(self): self.emit(QtCore.SIGNAL("update()")) self.mfcc.frame_size = int(self.par.frameSizeVal.currentText()) self.mfcc.overlap = self.mfcc.frame_size / 2 for index, file_audio in enumerate(self.audio_files): file_audio = str(file_audio) self.audio_signal, self.audio_fs = FileReader.read_audio( file_audio) self.silenced_signal, self.audio_fs = self.mfcc.remove_silence( file_audio) self.num_frames, self.framed_signal = self.mfcc.frame_blocking( self.silenced_signal) self.windowed_signal = self.mfcc.hamm_window(self.framed_signal) self.fft_signal = self.mfcc.calc_fft(self.windowed_signal) self.log_energy, self.fbank = self.mfcc.fbank( self.fft_signal, self.audio_fs) self.features = self.mfcc.features(self.log_energy) # var = [st.variance(self.features[:,i]) for i in xrange(self.mfcc.num_filter)] # [self.all_features.append(self.features[i,:]) for i in xrange(self.features.shape[0])] # self.variances.append(var) features = [] if TYPE == 1: file_id = self.db.insert("files", {"file_path": file_audio}) for i in xrange(self.features.shape[0]): # [31, 28, 29, 30, 27, 26, 25, 24, 23, 22, 20, 21, 19 # features.append([file_id, i, self.features[i, 1:14], str(self.par.featuresTbl.item(index,1).text())]) features.append([ file_id, i, self.features[ i, [1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13]], str(self.par.featuresTbl.item(index, 1).text()) ]) self.db.insert_features(features) else: output_class_id = self.db.insert( "output_classes", { "file_path": file_audio, "class": str(FileReader.get_output_class(file_audio)) }) for i in xrange(self.features.shape[0]): features.append( [output_class_id, i, self.features[i, 1:14]]) self.db.insert_features(features) self.emit(QtCore.SIGNAL("update()")) # self.variances = np.asarray(self.variances) # rata2 = [st.mean(self.variances[:,i]) for i in xrange(self.mfcc.num_filter)] # self.write_excel(rata2) # print str(np.sort(rata2)) # print str(np.argsort(rata2)) self.emit(QtCore.SIGNAL("finish()"))
def remove_silence(self, audio): call([ 'sox', audio, FileReader.add_temp(audio), 'silence', self.above_period, self.duration, self.threshold, self.below_period, self.duration, self.threshold ]) silenced_signal, silenced_fs = FileReader.read_audio( FileReader.add_temp(audio)) os.remove(FileReader.add_temp(audio)) return [silenced_signal, silenced_fs]
def run(self): self.emit(QtCore.SIGNAL("update()")) self.mfcc.frame_size = int(self.par.frameSizeVal.currentText()) self.mfcc.overlap = self.mfcc.frame_size / 2 speaker_correct = 0 speaker_word_correct = 0 for index, file_audio in enumerate(self.audio_files): file_audio = str(file_audio) self.audio_signal, self.audio_fs = FileReader.read_audio( file_audio) self.silenced_signal, self.audio_fs = self.mfcc.remove_silence( file_audio) self.num_frames, self.framed_signal = self.mfcc.frame_blocking( self.silenced_signal) self.windowed_signal = self.mfcc.hamm_window(self.framed_signal) self.fft_signal = self.mfcc.calc_fft(self.windowed_signal) self.log_energy, self.fbank = self.mfcc.fbank( self.fft_signal, self.audio_fs) self.features = self.mfcc.features(self.log_energy) self.lvq = LVQ(str(self.par.databaseSelect.currentText())) # result = self.lvq.test_data(self.features[:, 1:14]) # [31, 28, 29, 30, 27, 26, 25, 24, 23, 22, 20, 21, 19] result = self.lvq.test_data( self.features[:, [1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13]]) print "vote for file " + str(index) + " : " + str(result) # full = str(result[1][0]) if len(result) >= 2 else str(result[0][0]) full = str(result[0][0]) speaker = full[:full.rfind('-')] if full.rfind('-') != -1 else full word = full[full.rfind('-') + 1:] if full.rfind('-') != -1 else "-" self.par.featuresTbl.setItem(index, 2, QtGui.QTableWidgetItem(speaker)) self.par.featuresTbl.setItem(index, 3, QtGui.QTableWidgetItem(word)) if speaker == self.par.featuresTbl.item(index, 0).text(): speaker_correct += 1 if speaker == self.par.featuresTbl.item( index, 0).text() and word == self.par.featuresTbl.item( index, 1).text(): speaker_word_correct += 1 self.par.speaker_word_acc = (speaker_word_correct / float(len(self.audio_files))) * 100 self.par.speaker_only_acc = (speaker_correct / float(len(self.audio_files))) * 100 self.emit(QtCore.SIGNAL("update()")) self.emit(QtCore.SIGNAL("finish()"))
def run(self): self.emit(QtCore.SIGNAL("update()")) self.mfcc.frame_size = int(self.par.frameSizeVal.currentText()) self.mfcc.overlap = self.mfcc.frame_size / 2 for index, file_audio in enumerate(self.audio_files): file_audio = str(file_audio) self.audio_signal, self.audio_fs = FileReader.read_audio(file_audio) self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(file_audio) self.num_frames, self.framed_signal = self.mfcc.frame_blocking(self.silenced_signal) self.windowed_signal = self.mfcc.hamm_window(self.framed_signal) self.fft_signal = self.mfcc.calc_fft(self.windowed_signal) self.log_energy, self.fbank = self.mfcc.fbank(self.fft_signal, self.audio_fs) self.features = self.mfcc.features(self.log_energy) # var = [st.variance(self.features[:,i]) for i in xrange(self.mfcc.num_filter)] # [self.all_features.append(self.features[i,:]) for i in xrange(self.features.shape[0])] # self.variances.append(var) features = [] if TYPE == 1: file_id = self.db.insert("files", {"file_path": file_audio}) for i in xrange(self.features.shape[0]): # [31, 28, 29, 30, 27, 26, 25, 24, 23, 22, 20, 21, 19 # features.append([file_id, i, self.features[i, 1:14], str(self.par.featuresTbl.item(index,1).text())]) features.append([file_id, i, self.features[i, [1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13]], str(self.par.featuresTbl.item(index, 1).text())]) self.db.insert_features(features) else: output_class_id = self.db.insert("output_classes", {"file_path": file_audio, "class": str(FileReader.get_output_class(file_audio))}) for i in xrange(self.features.shape[0]): features.append([output_class_id, i, self.features[i, 1:14]]) self.db.insert_features(features) self.emit(QtCore.SIGNAL("update()")) # self.variances = np.asarray(self.variances) # rata2 = [st.mean(self.variances[:,i]) for i in xrange(self.mfcc.num_filter)] # self.write_excel(rata2) # print str(np.sort(rata2)) # print str(np.argsort(rata2)) self.emit(QtCore.SIGNAL("finish()"))
def run(self): self.emit(QtCore.SIGNAL("update()")) self.mfcc.frame_size = int(self.par.frameSizeVal.currentText()) self.mfcc.overlap = self.mfcc.frame_size/2 speaker_correct = 0 speaker_word_correct = 0 for index,file_audio in enumerate(self.audio_files): file_audio = str(file_audio) self.audio_signal, self.audio_fs = FileReader.read_audio(file_audio) self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(file_audio) self.num_frames, self.framed_signal = self.mfcc.frame_blocking(self.silenced_signal) self.windowed_signal = self.mfcc.hamm_window(self.framed_signal) self.fft_signal = self.mfcc.calc_fft(self.windowed_signal) self.log_energy, self.fbank = self.mfcc.fbank(self.fft_signal, self.audio_fs) self.features = self.mfcc.features(self.log_energy) self.lvq = LVQ(str(self.par.databaseSelect.currentText())) # result = self.lvq.test_data(self.features[:, 1:14]) # [31, 28, 29, 30, 27, 26, 25, 24, 23, 22, 20, 21, 19] result = self.lvq.test_data(self.features[:, [1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13]]) print "vote for file " + str(index) + " : " + str(result) # full = str(result[1][0]) if len(result) >= 2 else str(result[0][0]) full = str(result[0][0]) speaker = full[:full.rfind('-')] if full.rfind('-') != -1 else full word = full[full.rfind('-')+1:] if full.rfind('-') != -1 else "-" self.par.featuresTbl.setItem(index, 2, QtGui.QTableWidgetItem(speaker)) self.par.featuresTbl.setItem(index, 3, QtGui.QTableWidgetItem(word)) if speaker == self.par.featuresTbl.item(index,0).text(): speaker_correct += 1 if speaker == self.par.featuresTbl.item(index,0).text() and word == self.par.featuresTbl.item(index,1).text(): speaker_word_correct += 1 self.par.speaker_word_acc = (speaker_word_correct / float(len(self.audio_files))) * 100 self.par.speaker_only_acc = (speaker_correct / float(len(self.audio_files))) * 100 self.emit(QtCore.SIGNAL("update()")) self.emit(QtCore.SIGNAL("finish()"))
def remove_silence(self, audio): call(['sox', audio, FileReader.add_temp(audio), 'silence', self.above_period, self.duration, self.threshold, self.below_period, self.duration, self.threshold]) silenced_signal, silenced_fs = FileReader.read_audio(FileReader.add_temp(audio)) os.remove(FileReader.add_temp(audio)) return [silenced_signal, silenced_fs]