def show_open_dialog(self):
        self.audioFile = QtGui.QFileDialog.getOpenFileName(
            self, 'Open audio file', '', "Audio Files (*.wav)", None,
            QtGui.QFileDialog.DontUseNativeDialog)
        if self.audioFile != "":
            self.featuresTbl.setRowCount(0)
            self.featuresTbl.setColumnCount(0)
            self.audioClassInput.setText("")

            fileName = str(self.audioFile)
            self.audio_signal, self.audio_fs = FileReader.read_audio(fileName)
            self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(
                fileName)

            self.fsValLbl.setText(": " + str(self.audio_fs) + " Hz")
            self.sampleValLbl.setText(": " + str(len(self.audio_signal)) +
                                      " | " + str(len(self.silenced_signal)) +
                                      " (silenced)")
            self.audioFilenameLbl.setText(": " + fileName[fileName.rfind('/') +
                                                          1:len(fileName)])
            self.audioClassInput.setText(FileReader.get_output_class(fileName))

            self.audioPlayBtn.setDisabled(False)

            self.extractSaveBtn.setDisabled(False)
            self.player.set_audio_source(self.audioFile)
    def show_open_dialog(self):
        self.audioFile = QtGui.QFileDialog.getOpenFileName(self, 'Open audio file',
                                                           '',
                                                           "Audio Files (*.wav)",
                                                           None, QtGui.QFileDialog.DontUseNativeDialog)

        if self.audioFile != "":
            fileName = str(self.audioFile)
            self.audio_signal, self.audio_fs = FileReader.read_audio(fileName)
            self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(fileName)

            self.fsValLbl.setText(": " + str(self.audio_fs) + " Hz")
            self.sampleValLbl.setText(
                ": " + str(len(self.audio_signal)) + " | " + str(len(self.silenced_signal)) + " (silenced)")
            self.audioFilenameLbl.setText(": " + fileName[fileName.rfind('/') + 1:len(fileName)])

            self.audioPlayBtn.setDisabled(False)

            self.clear_all_layout()

            fig = Figure()
            self.origSignalPlot = fig.add_subplot(111)
            self.origSignalPlot.plot(self.audio_signal)
            self.add_figure(fig, self.originalPlotLyt)

            self.extractSaveBtn.setDisabled(False)
            self.player.set_audio_source(self.audioFile)

            self.testDataTab.setCurrentIndex(0)
    def run(self):
        self.emit(QtCore.SIGNAL("update()"))
        self.mfcc.frame_size = int(self.par.frameSizeVal.currentText())
        self.mfcc.overlap = self.mfcc.frame_size / 2
        for index, file_audio in enumerate(self.audio_files):
            file_audio = str(file_audio)
            self.audio_signal, self.audio_fs = FileReader.read_audio(
                file_audio)
            self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(
                file_audio)
            self.num_frames, self.framed_signal = self.mfcc.frame_blocking(
                self.silenced_signal)
            self.windowed_signal = self.mfcc.hamm_window(self.framed_signal)
            self.fft_signal = self.mfcc.calc_fft(self.windowed_signal)
            self.log_energy, self.fbank = self.mfcc.fbank(
                self.fft_signal, self.audio_fs)
            self.features = self.mfcc.features(self.log_energy)
            # var = [st.variance(self.features[:,i]) for i in xrange(self.mfcc.num_filter)]
            # [self.all_features.append(self.features[i,:]) for i in xrange(self.features.shape[0])]
            # self.variances.append(var)
            features = []

            if TYPE == 1:
                file_id = self.db.insert("files", {"file_path": file_audio})
                for i in xrange(self.features.shape[0]):
                    # [31, 28, 29, 30, 27, 26, 25, 24, 23, 22, 20, 21, 19
                    # features.append([file_id, i, self.features[i, 1:14], str(self.par.featuresTbl.item(index,1).text())])
                    features.append([
                        file_id, i, self.features[
                            i, [1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13]],
                        str(self.par.featuresTbl.item(index, 1).text())
                    ])

                self.db.insert_features(features)

            else:
                output_class_id = self.db.insert(
                    "output_classes", {
                        "file_path": file_audio,
                        "class": str(FileReader.get_output_class(file_audio))
                    })
                for i in xrange(self.features.shape[0]):
                    features.append(
                        [output_class_id, i, self.features[i, 1:14]])
                self.db.insert_features(features)

            self.emit(QtCore.SIGNAL("update()"))

        # self.variances = np.asarray(self.variances)
        # rata2 = [st.mean(self.variances[:,i]) for i in xrange(self.mfcc.num_filter)]
        # self.write_excel(rata2)
        # print str(np.sort(rata2))
        # print str(np.argsort(rata2))
        self.emit(QtCore.SIGNAL("finish()"))
예제 #4
0
 def remove_silence(self, audio):
     call([
         'sox', audio,
         FileReader.add_temp(audio), 'silence', self.above_period,
         self.duration, self.threshold, self.below_period, self.duration,
         self.threshold
     ])
     silenced_signal, silenced_fs = FileReader.read_audio(
         FileReader.add_temp(audio))
     os.remove(FileReader.add_temp(audio))
     return [silenced_signal, silenced_fs]
    def run(self):
        self.emit(QtCore.SIGNAL("update()"))
        self.mfcc.frame_size = int(self.par.frameSizeVal.currentText())
        self.mfcc.overlap = self.mfcc.frame_size / 2
        speaker_correct = 0
        speaker_word_correct = 0

        for index, file_audio in enumerate(self.audio_files):
            file_audio = str(file_audio)
            self.audio_signal, self.audio_fs = FileReader.read_audio(
                file_audio)
            self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(
                file_audio)
            self.num_frames, self.framed_signal = self.mfcc.frame_blocking(
                self.silenced_signal)
            self.windowed_signal = self.mfcc.hamm_window(self.framed_signal)
            self.fft_signal = self.mfcc.calc_fft(self.windowed_signal)
            self.log_energy, self.fbank = self.mfcc.fbank(
                self.fft_signal, self.audio_fs)
            self.features = self.mfcc.features(self.log_energy)

            self.lvq = LVQ(str(self.par.databaseSelect.currentText()))
            # result = self.lvq.test_data(self.features[:, 1:14])
            # [31, 28, 29, 30, 27, 26, 25, 24, 23, 22, 20, 21, 19]
            result = self.lvq.test_data(
                self.features[:, [1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13]])
            print "vote for file " + str(index) + " : " + str(result)
            # full = str(result[1][0]) if len(result) >= 2 else str(result[0][0])
            full = str(result[0][0])
            speaker = full[:full.rfind('-')] if full.rfind('-') != -1 else full
            word = full[full.rfind('-') + 1:] if full.rfind('-') != -1 else "-"
            self.par.featuresTbl.setItem(index, 2,
                                         QtGui.QTableWidgetItem(speaker))
            self.par.featuresTbl.setItem(index, 3,
                                         QtGui.QTableWidgetItem(word))

            if speaker == self.par.featuresTbl.item(index, 0).text():
                speaker_correct += 1

            if speaker == self.par.featuresTbl.item(
                    index, 0).text() and word == self.par.featuresTbl.item(
                        index, 1).text():
                speaker_word_correct += 1

            self.par.speaker_word_acc = (speaker_word_correct /
                                         float(len(self.audio_files))) * 100
            self.par.speaker_only_acc = (speaker_correct /
                                         float(len(self.audio_files))) * 100

            self.emit(QtCore.SIGNAL("update()"))

        self.emit(QtCore.SIGNAL("finish()"))
    def run(self):
        self.emit(QtCore.SIGNAL("update()"))
        self.mfcc.frame_size = int(self.par.frameSizeVal.currentText())
        self.mfcc.overlap = self.mfcc.frame_size / 2
        for index, file_audio in enumerate(self.audio_files):
            file_audio = str(file_audio)
            self.audio_signal, self.audio_fs = FileReader.read_audio(file_audio)
            self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(file_audio)
            self.num_frames, self.framed_signal = self.mfcc.frame_blocking(self.silenced_signal)
            self.windowed_signal = self.mfcc.hamm_window(self.framed_signal)
            self.fft_signal = self.mfcc.calc_fft(self.windowed_signal)
            self.log_energy, self.fbank = self.mfcc.fbank(self.fft_signal, self.audio_fs)
            self.features = self.mfcc.features(self.log_energy)
            # var = [st.variance(self.features[:,i]) for i in xrange(self.mfcc.num_filter)]
            # [self.all_features.append(self.features[i,:]) for i in xrange(self.features.shape[0])]
            # self.variances.append(var)
            features = []


            if TYPE == 1:
                file_id = self.db.insert("files", {"file_path": file_audio})
                for i in xrange(self.features.shape[0]):
                    # [31, 28, 29, 30, 27, 26, 25, 24, 23, 22, 20, 21, 19
                    # features.append([file_id, i, self.features[i, 1:14], str(self.par.featuresTbl.item(index,1).text())])
                    features.append([file_id, i, self.features[i, [1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13]],
                                     str(self.par.featuresTbl.item(index, 1).text())])

                self.db.insert_features(features)

            else:
                output_class_id = self.db.insert("output_classes",
                                                 {"file_path": file_audio,
                                                  "class": str(FileReader.get_output_class(file_audio))})
                for i in xrange(self.features.shape[0]):
                    features.append([output_class_id, i, self.features[i, 1:14]])
                self.db.insert_features(features)

            self.emit(QtCore.SIGNAL("update()"))

        # self.variances = np.asarray(self.variances)
        # rata2 = [st.mean(self.variances[:,i]) for i in xrange(self.mfcc.num_filter)]
        # self.write_excel(rata2)
        # print str(np.sort(rata2))
        # print str(np.argsort(rata2))
        self.emit(QtCore.SIGNAL("finish()"))
    def run(self):
        self.emit(QtCore.SIGNAL("update()"))
        self.mfcc.frame_size = int(self.par.frameSizeVal.currentText())
        self.mfcc.overlap = self.mfcc.frame_size/2
        speaker_correct = 0
        speaker_word_correct = 0

        for index,file_audio in enumerate(self.audio_files):
            file_audio = str(file_audio)
            self.audio_signal, self.audio_fs = FileReader.read_audio(file_audio)
            self.silenced_signal, self.audio_fs = self.mfcc.remove_silence(file_audio)
            self.num_frames, self.framed_signal = self.mfcc.frame_blocking(self.silenced_signal)
            self.windowed_signal = self.mfcc.hamm_window(self.framed_signal)
            self.fft_signal = self.mfcc.calc_fft(self.windowed_signal)
            self.log_energy, self.fbank = self.mfcc.fbank(self.fft_signal, self.audio_fs)
            self.features = self.mfcc.features(self.log_energy)

            self.lvq = LVQ(str(self.par.databaseSelect.currentText()))
            # result = self.lvq.test_data(self.features[:, 1:14])
            # [31, 28, 29, 30, 27, 26, 25, 24, 23, 22, 20, 21, 19]
            result = self.lvq.test_data(self.features[:, [1, 2, 3, 4, 5, 7, 6, 9, 8, 10, 11, 12, 13]])
            print "vote for file " + str(index) + " : " + str(result)
            # full = str(result[1][0]) if len(result) >= 2 else str(result[0][0])
            full = str(result[0][0])
            speaker = full[:full.rfind('-')] if full.rfind('-') != -1 else full
            word = full[full.rfind('-')+1:] if full.rfind('-') != -1 else "-"
            self.par.featuresTbl.setItem(index, 2, QtGui.QTableWidgetItem(speaker))
            self.par.featuresTbl.setItem(index, 3, QtGui.QTableWidgetItem(word))

            if speaker == self.par.featuresTbl.item(index,0).text():
                speaker_correct += 1

            if speaker == self.par.featuresTbl.item(index,0).text() and word == self.par.featuresTbl.item(index,1).text():
                speaker_word_correct += 1

            self.par.speaker_word_acc = (speaker_word_correct / float(len(self.audio_files))) * 100
            self.par.speaker_only_acc = (speaker_correct / float(len(self.audio_files))) * 100

            self.emit(QtCore.SIGNAL("update()"))

        self.emit(QtCore.SIGNAL("finish()"))
예제 #8
0
 def remove_silence(self, audio):
     call(['sox', audio, FileReader.add_temp(audio), 'silence', self.above_period, self.duration, self.threshold, self.below_period, self.duration, self.threshold])
     silenced_signal, silenced_fs = FileReader.read_audio(FileReader.add_temp(audio))
     os.remove(FileReader.add_temp(audio))
     return [silenced_signal, silenced_fs]