Example #1
0
    def processTrainingSet(self, basedir="sounds/train/", signalword="oknavsa", savedir="data/"):

        clips = [clip for clip in os.listdir(basedir) if clip.endswith(".wav")]

        try:
            with open("sounds/train/thresh.dat","r") as fh:
                lines = [line.strip() for line in fh.readlines() if len(line)>1]
                bestThresh = {line.split(",")[0]:int(line.split(",")[1]) for line in lines}
        except:
            bestThresh = {}

        tr = Trigger()

        self.YXtot = []
        fnames = []
        durations = []
        self.keywordDurations = []

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore",category=Warning)

            for clip in clips:

                if clip in bestThresh: tr.setParams({"THRESHOLD": bestThresh[clip]})
                else: tr.setParams({"THRESHOLD": 600})

                if clip.lower().startswith(signalword): isSignal = True
                elif clip.lower().startswith("random"): isSignal = False
                elif clip.lower().startswith("background"): isSignal = False
                else: continue

                tr.readWav(basedir+clip)
                subsamples = tr.getSubsamples()
                framerate = tr.getFramerate()

                if self.verbosity > 1: print "Loading clip %s (isSignal: %i) ==> %i subsamples" % (clip, isSignal, len(subsamples))

                for ss in subsamples:
                    duration = self.getSampleDuration(ss, framerate) 
                    if isSignal: self.keywordDurations.append(duration)

                    self.YXtot.append( self.getFeatures(ss,framerate,isSignal) )
                    fnames.append(clip)
                    durations.append(duration)

        self.YXtot = np.array(self.YXtot)

        self.keywordDurations = np.array(self.keywordDurations)

        outputname = "%simagedata_%i_%i.npy" % (savedir,self.Nfreq,self.Ntime)
        outputname_meta = "%smetadata_%i_%i.npy" % (savedir,self.Nfreq,self.Ntime)
        np.save(outputname, self.YXtot)
        np.save(outputname_meta, self.keywordDurations)
        if self.verbosity > 1: print "made %s and %s" % (outputname, outputname_meta)

        idx_test, YXtest = self.trainAndTest()

        return np.array(fnames)[idx_test], np.array(durations)[idx_test], YXtest
Example #2
0
File: do.py Project: Amarang/navsa
parser = Parser()
if haveLeds:
    from Lights import Lights
    led = Lights()
    led.stop()
    tr = Trigger(verbose=True, led=led, AMBIENT_MULT = 1.9)
else:
    tr = Trigger(verbose=True)

# proc.processTrainingSet(basedir="sounds/train/", signalword="oknavsa", savedir="data/")
# proc.processTrainingSet(basedir="16khz/", signalword="oknavsa", savedir="data/")
proc.loadTrainData("data/imagedata_15_15.npy")

#if not in this range, we want to not fingerprint it to save time and trouble
lower,upper = proc.getKeywordDurationRange()
tr.setParams({"MIN_WORD_TIME": lower, "MAX_WORD_TIME": upper})

tr.setParams({"THRESHOLD": 150})
# tr.getAmbientLevels(duration=0.7)


print "Now will score realtime audio"

def myCallback(trigger, data, data_raw):
    print
    framerate = trigger.getFramerate()
    if not trigger.hasSaidKeyword():
        t0 = time.time()
        confidence = proc.getKeywordProbability(data, framerate)
        print "took %.2fms to classify %.2fs clip, score: %.2f" \
                % (1000.0*(time.time()-t0), 1.0*len(data)/framerate, confidence)