def __init__(self, stream, control, windowLength=2048, windowHop=800, sampleRate=16000, scaleScoresByFequency=True, gaussianStdDev=2): ''' At the least, an EventDetector has to be initialized with: stream: a stream of audio samples that contains events control: a spec of a control, including its tine frequencies and envelope information ''' stream.addEventListener(self) self.executorPool = ThreadPoolExecutor(max_workers=1) self.windowLength = windowLength self.windowHop = windowHop self.sampleRate = sampleRate self.scaleScoresOnFrequency = scaleScoresByFequency self.fft = Fft(self.windowLength, self.sampleRate) ''' Transfer control characteristics to detector specifications ''' self.onsetAmplitude = control.onsetAmplitude self.transientTime = control.transientSeconds self.steadyStateTime = control.steadyStateSeconds tineBins = [self.fft.getFrequencyBin(freq) for freq in control.tineFrequencies] self.classes = dict(enumerate(tineBins)) ''' Precompute Gaussian windows so that we don't have to repeat this work. ''' ''' Also, multiple each Gaussian window by the bin index to scale higher frequencies more than lower frequencies. ''' self.gaussianWindows = [ self._getGaussianWindow(self.fft.getBinCount(), gaussianStdDev, bin_) for bin_ in tineBins] ''' State variables ''' self.fullSignal = np.zeros(2) self.windowIndex = -1 self.lastWindowStart = None self.firstRecentOnsetWindow = -maxint - 1 self.eventListeners = [] self.recentDetections = [] self.strikeIndex = 0
class EventDetector(object): def __init__(self, stream, control, windowLength=2048, windowHop=800, sampleRate=16000, scaleScoresByFequency=True, gaussianStdDev=2): ''' At the least, an EventDetector has to be initialized with: stream: a stream of audio samples that contains events control: a spec of a control, including its tine frequencies and envelope information ''' stream.addEventListener(self) self.executorPool = ThreadPoolExecutor(max_workers=1) self.windowLength = windowLength self.windowHop = windowHop self.sampleRate = sampleRate self.scaleScoresOnFrequency = scaleScoresByFequency self.fft = Fft(self.windowLength, self.sampleRate) ''' Transfer control characteristics to detector specifications ''' self.onsetAmplitude = control.onsetAmplitude self.transientTime = control.transientSeconds self.steadyStateTime = control.steadyStateSeconds tineBins = [self.fft.getFrequencyBin(freq) for freq in control.tineFrequencies] self.classes = dict(enumerate(tineBins)) ''' Precompute Gaussian windows so that we don't have to repeat this work. ''' ''' Also, multiple each Gaussian window by the bin index to scale higher frequencies more than lower frequencies. ''' self.gaussianWindows = [ self._getGaussianWindow(self.fft.getBinCount(), gaussianStdDev, bin_) for bin_ in tineBins] ''' State variables ''' self.fullSignal = np.zeros(2) self.windowIndex = -1 self.lastWindowStart = None self.firstRecentOnsetWindow = -maxint - 1 self.eventListeners = [] self.recentDetections = [] self.strikeIndex = 0 def onNewStreamData(self, chunk): ''' While this might look like a useless wrapper, in fact we need to perform processing asynchronously so that PySoundcard doesn't get overloaded by our processing. ''' self.executorPool.submit(self._processChunk, chunk) def finishProcessing(self): ''' The following call waits for all tasks to complete before continuing. ''' self.executorPool.shutdown(wait=True) self.executorPool = ThreadPoolExecutor(max_workers=1) def _processChunk(self, chunk): state = None if np.any(chunk): chunk = de_stereo(chunk) self.fullSignal = np.concatenate((self.fullSignal, chunk)) ''' Slide window as far as it can go through the new signal ''' while True: windowIndex, window = self._getNextWindow() if windowIndex == -1: break state = self._getWindowState( windowIndex, self.firstRecentOnsetWindow, self.windowHop, self.windowLength, self.sampleRate, self.transientTime, self.steadyStateTime) if state == WindowState.NEUTRAL: if self._hasTransientOnset(window): logging.info("Found transient at window %d", windowIndex) self.strikeIndex += 1 self.firstRecentOnsetWindow = windowIndex self.recentDetections = [] elif state == WindowState.STEADY_STATE: ''' We only scan an classify windows that are considered in steady state. This means large frequencies from the stirke have subsided. ''' logging.debug("Steady state at window %d", windowIndex) classScores = self.scanWindow(window) for classIndex, score in classScores.items(): self.recentDetections.append( Event(classIndex, score, windowIndex, self.strikeIndex)) event = self._findEvent(self.recentDetections) if event is not None: self._reportEvent(event) else: continue def scanWindow(self, window=[]): fftCoefficients = self.fft.performFft(window) ''' Normalize FFT so that the frequency bins sum to 1 ''' fftNormal = fftCoefficients / sum(fftCoefficients) ''' Find scores for all possible classifications. ''' scores = {} for classId, _ in self.classes.items(): gaussianWindow = self.gaussianWindows[classId] score = fftNormal.dot(gaussianWindow) ''' Scale by the value of the tine bin if option specified to aid the shorter-time, softer higher frequencies to get detected. ''' if self.scaleScoresOnFrequency: score *= (self.classes[classId] * self.classes[classId]) scores[classId] = score return scores def _findEvent(self, detections, minWindows=1): ''' Filter through recent detections to determine if there has been an event. Effectively a smoother. ''' windows = set([d.windowIndex for d in detections]) if len(windows) < minWindows: return scores = defaultdict(int) for detection in detections: scores[detection.classId] += detection.score bestClass, highestScore = max(scores.items(), key=lambda item: item[1]) detectionsWithClass = filter(lambda d: d.classId == bestClass, detections) recentDetection = detectionsWithClass[-1] return Event( bestClass, highestScore, recentDetection.windowIndex, recentDetection.strikeIndex) def _reportEvent(self, event): for listener in self.eventListeners: listener.onEventDetected(event) def _getNextWindow(self): if self.lastWindowStart is None: newWindowStart = 0 self.windowIndex = 0 else: newWindowStart = self.lastWindowStart + self.windowHop self.windowIndex += 1 if (newWindowStart + self.windowLength) < len(self.fullSignal): window = self.fullSignal[newWindowStart:newWindowStart + self.windowLength] logging.debug( "Returning window with start %d, end %d", newWindowStart, newWindowStart + self.windowLength) self.lastWindowStart = newWindowStart return self.windowIndex, window else: return -1, None def _hasTransientOnset(self, signal): ''' Check whether a window contains onset of a transient response to strike ''' THRESHOLD_ONSET_STRENGTH = 10 # not sure how we found this number onset_strength = librosa.onset.onset_strength(y=signal, sr=self.sampleRate) loudestAmplitude = max(np.abs(signal)) if (loudestAmplitude > self.onsetAmplitude and np.any(onset_strength > THRESHOLD_ONSET_STRENGTH)): return True else: return False def _getWindowState( self, windowIndex, recentFirstOnsetWindowIndex, hopLength, windowLength, sampleRate, transientTime, steadyStateTime): ''' Check whether window represents transient, steady state, or something else ''' hopsToJustifyTransient = (windowLength / hopLength) - 1 timeOfHop = float(hopLength) / sampleRate hopsInTransient = transientTime / timeOfHop steadyStateStart = recentFirstOnsetWindowIndex + hopsToJustifyTransient + hopsInTransient hopsInSteadyState = steadyStateTime / timeOfHop steadyStateEnd = steadyStateStart + hopsInSteadyState if windowIndex >= steadyStateStart and windowIndex <= steadyStateEnd: return WindowState.STEADY_STATE elif windowIndex >= recentFirstOnsetWindowIndex and windowIndex < steadyStateStart: return WindowState.TRANSIENT else: return WindowState.NEUTRAL def _getGaussianWindow(self, length, std, center=None): ''' Center: the index of the Gaussian window that represents the center. ''' if center is None: center = length / 2 ''' By default, Scipy Gaussian function puts center in middle of window. To allow us to move it either all the way to the left or all the way to the right, we generate the full window at twice the length, and then align it to the center that the user specified. ''' scipyGaussian = scipy.signal.gaussian(M=length * 2, std=std) scipyCenter = length shiftAmount = scipyCenter - center return scipyGaussian[shiftAmount:shiftAmount + length] def addEventListener(self, listener): self.eventListeners.append(listener)