예제 #1
0
파일: app.py 프로젝트: mamut/voicecmd
class VoiceCmd:
    def __init__(self, config):
        self.config = config
        self.commands = {}
        self.command_names = {}
        commands_folder = self.config["commands_folder"]
        for index, command_name in enumerate(os.listdir(commands_folder)):
            cmd = Command(command_name, index, os.path.join(commands_folder, command_name))
            self.commands[command_name] = cmd
            self.command_names[index] = command_name

        self.svm = LibSvm(svm_type="c_svc", kernel_type="linear")
        x, y = [], []
        for command in self.commands.itervalues():
            for feature in command.objects:
                x.append(feature)
                y.append(command.index)
        self.svm.learn(x, y)

    def run(self):
        print "Speak now."
        signal = self.read_voice()
        # signal = load_wav('commands/calculator/command1.wav')
        print "Recording stopped"
        decision = self.predict(signal)
        print decision
        self.commands[decision].execute()

    def predict(self, signal):
        return self.command_names[int(self.svm.pred(signal))]

    def distance(self, template, query):
        return dtw(template, query)[0]

    def read_voice(self):
        rate, fpb, seconds, channels = 44100, 1024, 3, 1
        read = []
        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16, channels=channels, rate=rate, input=True, frames_per_buffer=fpb)
        chunks_needed = rate / fpb * seconds
        for i in xrange(chunks_needed):
            data = stream.read(fpb)
            read.append(data)
        stream.close()
        p.terminate()
        data = "".join(read)

        debug = wave.open("debug.wav", "wb")
        debug.setnchannels(channels)
        debug.setsampwidth(p.get_sample_size(pyaudio.paInt16))
        debug.setframerate(rate)
        debug.writeframes(data)
        debug.close()

        out = unpack_wav(data, chunks_needed * fpb, channels)
        return out
예제 #2
0
def bench_mlpy(X, y, T, valid):
#
#       .. MLPy ..
#
    from mlpy import LibSvm
    start = datetime.now()
    clf = LibSvm(kernel_type='rbf', C=1., gamma=1./sigma)
    clf.learn(X, y.astype(np.float64))
    score = np.mean(clf.pred(T) == valid)
    return score, datetime.now() - start
예제 #3
0
def bench_mlpy(X, y, T, valid):
#
#       .. MLPy ..
#
    from mlpy import LibSvm
    start = datetime.now()
    clf = LibSvm(kernel_type='rbf', C=1., gamma=1. / sigma)
    clf.learn(X, y.astype(np.float64))
    score = np.mean(clf.pred(T) == valid)
    return score, datetime.now() - start
예제 #4
0
    def metric(self):
        totalTimer = Timer()
        with totalTimer:
            model = LibSvm(**self.build_opts)
            model.learn(self.data_split[0], self.data_split[1])

            if len(self.data) >= 2:
                predictions = model.pred(self.data[1])

        metric = {}
        metric["runtime"] = totalTimer.ElapsedTime()

        if len(self.data) == 3:
            confusionMatrix = Metrics.ConfusionMatrix(self.data[2],
                                                      predictions)
            metric['ACC'] = Metrics.AverageAccuracy(confusionMatrix)
            metric['MCC'] = Metrics.MCCMultiClass(confusionMatrix)
            metric['Precision'] = Metrics.AvgPrecision(confusionMatrix)
            metric['Recall'] = Metrics.AvgRecall(confusionMatrix)
            metric['MSE'] = Metrics.SimpleMeanSquaredError(
                self.data[2], predictions)

        return metric
예제 #5
0
class VoiceCmd:
    def __init__(self, config):
        self.config = config
        self.commands = {}
        self.command_names = {}
        commands_folder = self.config['commands_folder']
        for index, command_name in enumerate(os.listdir(commands_folder)):
            cmd = Command(command_name, index,
                          os.path.join(commands_folder, command_name))
            self.commands[command_name] = cmd
            self.command_names[index] = command_name

        self.svm = LibSvm(svm_type='c_svc', kernel_type='linear')
        x, y = [], []
        for command in self.commands.itervalues():
            for feature in command.objects:
                x.append(feature)
                y.append(command.index)
        self.svm.learn(x, y)

    def run(self):
        print "Speak now."
        signal = self.read_voice()
        #signal = load_wav('commands/calculator/command1.wav')
        print "Recording stopped"
        decision = self.predict(signal)
        print decision
        self.commands[decision].execute()

    def predict(self, signal):
        return self.command_names[int(self.svm.pred(signal))]

    def distance(self, template, query):
        return dtw(template, query)[0]

    def read_voice(self):
        rate, fpb, seconds, channels = 44100, 1024, 3, 1
        read = []
        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16,
                        channels=channels,
                        rate=rate,
                        input=True,
                        frames_per_buffer=fpb)
        chunks_needed = rate / fpb * seconds
        for i in xrange(chunks_needed):
            data = stream.read(fpb)
            read.append(data)
        stream.close()
        p.terminate()
        data = ''.join(read)

        debug = wave.open('debug.wav', 'wb')
        debug.setnchannels(channels)
        debug.setsampwidth(p.get_sample_size(pyaudio.paInt16))
        debug.setframerate(rate)
        debug.writeframes(data)
        debug.close()

        out = unpack_wav(data, chunks_needed * fpb, channels)
        return out