Exemple #1
0
    def processTrainingSet(self, basedir="sounds/train/", signalword="oknavsa", savedir="data/"):

        clips = [clip for clip in os.listdir(basedir) if clip.endswith(".wav")]

        try:
            with open("sounds/train/thresh.dat","r") as fh:
                lines = [line.strip() for line in fh.readlines() if len(line)>1]
                bestThresh = {line.split(",")[0]:int(line.split(",")[1]) for line in lines}
        except:
            bestThresh = {}

        tr = Trigger()

        self.YXtot = []
        fnames = []
        durations = []
        self.keywordDurations = []

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore",category=Warning)

            for clip in clips:

                if clip in bestThresh: tr.setParams({"THRESHOLD": bestThresh[clip]})
                else: tr.setParams({"THRESHOLD": 600})

                if clip.lower().startswith(signalword): isSignal = True
                elif clip.lower().startswith("random"): isSignal = False
                elif clip.lower().startswith("background"): isSignal = False
                else: continue

                tr.readWav(basedir+clip)
                subsamples = tr.getSubsamples()
                framerate = tr.getFramerate()

                if self.verbosity > 1: print "Loading clip %s (isSignal: %i) ==> %i subsamples" % (clip, isSignal, len(subsamples))

                for ss in subsamples:
                    duration = self.getSampleDuration(ss, framerate) 
                    if isSignal: self.keywordDurations.append(duration)

                    self.YXtot.append( self.getFeatures(ss,framerate,isSignal) )
                    fnames.append(clip)
                    durations.append(duration)

        self.YXtot = np.array(self.YXtot)

        self.keywordDurations = np.array(self.keywordDurations)

        outputname = "%simagedata_%i_%i.npy" % (savedir,self.Nfreq,self.Ntime)
        outputname_meta = "%smetadata_%i_%i.npy" % (savedir,self.Nfreq,self.Ntime)
        np.save(outputname, self.YXtot)
        np.save(outputname_meta, self.keywordDurations)
        if self.verbosity > 1: print "made %s and %s" % (outputname, outputname_meta)

        idx_test, YXtest = self.trainAndTest()

        return np.array(fnames)[idx_test], np.array(durations)[idx_test], YXtest