def illustrate_match(self, analyzer, ht, filename): """ Show the query fingerprints and the matching ones plotted over a spectrogram """ # Make the spectrogram # d, sr = librosa.load(filename, sr=analyzer.target_sr) d, sr = audio_read.audio_read(filename, sr=analyzer.target_sr, channels=1) sgram = np.abs( stft.stft(d, n_fft=analyzer.n_fft, hop_length=analyzer.n_hop, window=np.hanning(analyzer.n_fft + 2)[1:-1])) sgram = 20.0 * np.log10(np.maximum(sgram, np.max(sgram) / 1e6)) sgram = sgram - np.mean(sgram) # High-pass filter onset emphasis # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits # spectrogram enhancement if self.illustrate_hpf: HPF_POLE = 0.98 sgram = np.array([ scipy.signal.lfilter([1, -1], [1, -HPF_POLE], s_row) for s_row in sgram ])[:-1, ] sgram = sgram - np.max(sgram) librosa.display.specshow(sgram, sr=sr, hop_length=analyzer.n_hop, y_axis='linear', x_axis='time', cmap='gray_r', vmin=-80.0, vmax=0) # Do the match? q_hashes = analyzer.wavfile2hashes(filename) # Run query, get back the hashes for match zero results, matchhashes = self.match_hashes(ht, q_hashes, hashesfor=0) if self.sort_by_time: results = sorted(results, key=lambda x: -x[2]) # Convert the hashes to landmarks lms = audfprint_analyze.hashes2landmarks(q_hashes) mlms = audfprint_analyze.hashes2landmarks(matchhashes) # Overplot on the spectrogram plt.plot( np.array([[x[0], x[0] + x[3]] for x in lms]).T, np.array([[x[1], x[2]] for x in lms]).T, '.-g') plt.plot( np.array([[x[0], x[0] + x[3]] for x in mlms]).T, np.array([[x[1], x[2]] for x in mlms]).T, '.-r') # Add title plt.title("Matched as " + ht.names[results[0][0]].split("/")[1].split(".")[0]) # Display plt.savefig("./src/static/sgram" + uuid.uuid4().hex + ".png", bbox_inces="tight") # plt.show() # Return return results
def process_audio(af, auddata, audctr): # audio processing (y,sr) = ar.audio_read(af,sr=SR) win_length = SPF hop_length = int(SPF*OVERLAP) [a,g,e] = aud.lpc_analysis(y,LPC_ORDER,window_step=hop_length,window_size=win_length) lsf = aud.lpc_to_lsf(a) lsf = lsf[int((MARGIN)*int(1/OVERLAP)):int((SAMPLE_LEN+MARGIN)*int(1/OVERLAP)),:] lsf_concat = np.concatenate((lsf[::2,:],lsf[1::2,:]),axis=1) # MAGIC NUMBERS for half overlap g = g[int((MARGIN)*int(1/OVERLAP)):int((SAMPLE_LEN+MARGIN)*int(1/OVERLAP)),:] g_concat = np.concatenate((g[::2,:],g[1::2,:]),axis=1) # MAGIC NUMBERS for half overlap feat = np.concatenate((lsf_concat,g_concat),axis=1) auddata[audctr:int(audctr+SAMPLE_LEN),:] = feat audctr = int(audctr+SAMPLE_LEN) return audctr
def illustrate_match(self, analyzer, ht, filename): """ Show the query fingerprints and the matching ones plotted over a spectrogram """ # Make the spectrogram #d, sr = librosa.load(filename, sr=analyzer.target_sr) d, sr = audio_read.audio_read(filename, sr=analyzer.target_sr, channels=1) sgram = np.abs(librosa.stft(d, n_fft=analyzer.n_fft, hop_length=analyzer.n_hop, window=np.hanning(analyzer.n_fft+2)[1:-1])) sgram = 20.0*np.log10(np.maximum(sgram, np.max(sgram)/1e6)) sgram = sgram - np.mean(sgram) # High-pass filter onset emphasis # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits # spectrogram enhancement if self.illustrate_hpf: HPF_POLE = 0.98 sgram = np.array([scipy.signal.lfilter([1, -1], [1, -HPF_POLE], s_row) for s_row in sgram])[:-1,] sgram = sgram - np.max(sgram) librosa.display.specshow(sgram, sr=sr, hop_length=analyzer.n_hop, y_axis='linear', x_axis='time', cmap='gray_r', vmin=-80.0, vmax=0) # Do the match? q_hashes = analyzer.wavfile2hashes(filename) # Run query, get back the hashes for match zero results, matchhashes = self.match_hashes(ht, q_hashes, hashesfor=0) if self.sort_by_time: results = sorted(results, key=lambda x: -x[2]) # Convert the hashes to landmarks lms = audfprint_analyze.hashes2landmarks(q_hashes) mlms = audfprint_analyze.hashes2landmarks(matchhashes) # Overplot on the spectrogram plt.plot(np.array([[x[0], x[0]+x[3]] for x in lms]).T, np.array([[x[1], x[2]] for x in lms]).T, '.-g') plt.plot(np.array([[x[0], x[0]+x[3]] for x in mlms]).T, np.array([[x[1], x[2]] for x in mlms]).T, '.-r') # Add title plt.title(filename + " : Matched as " + ht.names[results[0][0]] + (" with %d of %d hashes" % (len(matchhashes), len(q_hashes)))) # Display plt.show() # Return return results
def wavfile2peaks(self, filename, shifts=None, return_spectrogram=False): """ Read a soundfile and return its landmark peaks as a list of (time, bin) pairs. If specified, resample to sr first. shifts > 1 causes hashes to be extracted from multiple shifts of waveform, to reduce frame effects. """ ext = os.path.splitext(filename)[1] if ext == PRECOMPPKEXT: # short-circuit - precomputed fingerprint file peaks = peaks_load(filename) dur = np.max(peaks, axis=0)[0] * self.n_hop / self.target_sr else: try: # [d, sr] = librosa.load(filename, sr=self.target_sr) d, sr = audio_read.audio_read(filename, sr=self.target_sr, channels=1) except Exception as e: # audioread.NoBackendError: message = "wavfile2peaks: Error reading " + filename if self.fail_on_error: print(e) raise IOError(message) print(message, "skipping") d = [] sr = self.target_sr # Store duration in a global because it's hard to handle dur = len(d) / sr if shifts is None or shifts < 2: if return_spectrogram: peaks, sgram = self.find_peaks(d, sr, return_spectrogram=return_spectrogram) else: peaks = self.find_peaks(d, sr, return_spectrogram=return_spectrogram) else: # Calculate hashes with optional part-frame shifts peaklists = [] for shift in range(shifts): shiftsamps = int(shift / self.shifts * self.n_hop) peaklists.append(self.find_peaks(d[shiftsamps:], sr)) peaks = peaklists # instrumentation to track total amount of sound processed self.soundfiledur = dur self.soundfiletotaldur += dur self.soundfilecount += 1 if return_spectrogram: return peaks, sgram else: return peaks
def wavfile2peaks(self, filename, shifts=None): """ Read a soundfile and return its landmark peaks as a list of (time, bin) pairs. If specified, resample to sr first. shifts > 1 causes hashes to be extracted from multiple shifts of waveform, to reduce frame effects. """ ext = os.path.splitext(filename)[1] if ext == PRECOMPPKEXT: # short-circuit - precomputed fingerprint file peaks = peaks_load(filename) dur = np.max(peaks, axis=0)[0] * self.n_hop / self.target_sr else: try: # [d, sr] = librosa.load(filename, sr=self.target_sr) d, sr = audio_read.audio_read(filename, sr=self.target_sr, channels=1) except Exception as e: # audioread.NoBackendError: message = "wavfile2peaks: Error reading " + filename if self.fail_on_error: print(e) raise IOError(message) print(message, "skipping") d = [] sr = self.target_sr # Store duration in a global because it's hard to handle dur = len(d) / sr if shifts is None or shifts < 2: peaks = self.find_peaks(d, sr) else: # Calculate hashes with optional part-frame shifts peaklists = [] for shift in range(shifts): shiftsamps = int(shift / self.shifts * self.n_hop) peaklists.append(self.find_peaks(d[shiftsamps:], sr)) peaks = peaklists # instrumentation to track total amount of sound processed self.soundfiledur = dur self.soundfiletotaldur += dur self.soundfilecount += 1 return peaks
def wavfile2samples(self, filename, label=True, subsample=None, subratio=None): landmarks = self.peaks2landmarks(self.wavfile2peaks(filename)) d, sr = audio_read.audio_read(filename, sr=self.target_sr, channels=1) peaks,sgram,sgramo = self.find_peaks_sgram(d, sr) if subsample and subsample<len(landmarks): if subratio: subsample = int(len(landmarks)*subratio) index = np.random.choice(len(landmarks), subsample, replace=False) landmarks = [ landmarks[idx] for idx in index] lms_map = {} for lm in landmarks: lms_map[lm] = 0.0 # probs probs = np.zeros((len(landmarks),1)) if label: test_cnt = 0.0 # move a slide peaklist = self.wavfile2peaks(filename, 40) peaklist = peaklist[5:35] # test with wgn for db in range(40,121): test_d = wgn(d, db/3.0) peaklist.append(self.find_peaks(test_d, sr)) for idx in range(len(peaklist)): test_cnt += 1.0 lms_test = self.peaks2landmarks(peaklist[idx]) for (t1,f1,f2,dt) in lms_test: for t in range(t1,t1+1): key = (t,f1,f2,dt) if key in lms_map: lms_map[key] += 1.0 break for idx, key in enumerate(landmarks): probs[idx] = lms_map[key] / test_cnt # features feats_list = [] (Freq,Time) = np.shape(sgram) for idx in range(len(landmarks)): (Freq,Time) = (float(Freq),float(Time)) (t1,f1,f2,dt) = landmarks[idx] t2 = t1 + dt # make sure f1 < f2 if f1 > f2 or (f1==f2 and t1 > t2): f1,f2 = f2,f1 t1,t2 = t2,t1 feats_1 = [t1, t2, f1, f2, t2-t1, f2-f1] # ratio feats_2 = [t1/Time, t2/Time, f1/Freq, f2/Freq, (t2-t1)/Time, (f2-f1)/Freq] # distance dist = [math.sqrt(feats_1[4]**2+feats_2[5]**2), math.sqrt(feats_2[4]**2+feats_2[5]**2)] # energy feats_e = [sgram[f1][t1], sgram[f2][t2]] feats_e.extend([feats_e[0]+feats_e[1], feats_e[0]*feats_e[1]]) feats_e.extend([(feats_e[1]-feats_e[2]), (feats_e[1]-feats_e[2])/dist[0], (feats_e[1]-feats_e[2])/dist[1]]) feats_eo = [sgramo[f1][t1], sgramo[f2][t2]] feats_eo.extend([feats_eo[0]+feats_eo[1], feats_eo[0]*feats_eo[1]]) feats_eo.extend([(feats_eo[1]-feats_eo[2]), (feats_eo[1]-feats_eo[2])/dist[0], (feats_eo[1]-feats_eo[2])/dist[1]]) # distance dist += [math.sqrt(feats_1[4]**2+feats_2[5]**2+(feats_eo[0]-feats_eo[1])**2)] dist += [math.sqrt(feats_2[4]**2+feats_2[5]**2+(feats_e[0]-feats_e[1])**2)] # engery surrounding locs = [(-1,1),(0,1),(1,1),(-1,0),(1,0),(-1,-1),(0,-1),(1,-1)] poss = [(-1,1),(0,1),(1,1),(1,0)] feats_surs = [] sgrams = [sgram,sgramo] fts = [(f1, t1), (f2, t2)] for (fi,ti) in fts: for sgrami in sgrams: ''' sq_i = squares(sgrami, fi, ti, 2) feats_sur_i = np.concatenate(sq_i.tolist()).tolist() feats_sur_i.extend([sq_i[2*loc[0]][2*loc[1]]-2*sq_i[loc[0]][loc[1]] for loc in locs]) feats_sur_i.extend([curvature(sq_i,pos) for pos in poss]) ''' sq_i = squares(sgrami, fi, ti, 1) feats_sur_i = np.concatenate(sq_i.tolist()).tolist() feats_sur_i.extend([curvature(sq_i,pos) for pos in poss]) feats_surs.append(feats_sur_i) # delta E / delta x feats_delta = [ (feats_e[1]-feats_e[0])/(f2-f1+0.1), (feats_e[1]-feats_e[0])/(t2-t1+0.1), (t2-t1)/(f2-f1+0.1) ] feats_delta.extend([ (feats_eo[1]-feats_eo[0])/(f2-f1+0.1), (feats_eo[1]-feats_eo[0])/(t2-t1+0.1) ]) # Freq*Energy feats_fe_1 = [feats_eo[0]*f1, feats_eo[0]*math.log1p(f1), math.log1p(feats_eo[0])*f1] feats_fe_2 = [feats_eo[1]*f2, feats_eo[1]*math.log1p(f2), math.log1p(feats_eo[1])*f2] feats_fe_12 = (np.array(feats_fe_1)*np.array(feats_fe_2)).tolist() feats_fe = feats_fe_1 + feats_fe_2 + feats_fe_12 # line points ''' line = [ ( f1+(f2-f1)*i/10.0, t1+(t2-t1)*i/10.0 ) for i in range(1,10)] line_values = [ value_at(sgram,p[0],p[1]) for p in line] line_valueso = [ value_at(sgramo,p[0],p[1]) for p in line] feats_line = line_values + line_valueso + [np.mean(line_values),np.std(line_values),np.mean(line_valueso),np.std(line_valueso)] ''' # square points ''' square_line = [] for i in range(1,6): for j in range(1,6): square_line.append( (f1+(f2-f1)*i/6.0, t1+(t2-t1)*i/6.0) ) sql_values = [ value_at(sgram,p[0],p[1]) for p in square_line] sql_valueso = [ value_at(sgramo,p[0],p[1]) for p in square_line] feats_sql = sql_values + sql_valueso + [np.mean(sql_values),np.std(sql_values),np.mean(sql_valueso),np.std(sql_valueso)] ''' # append to feats feats = [Time,Freq] feats.extend(feats_1) feats.extend(feats_2) feats.extend(dist) feats.extend(feats_e) feats.extend(feats_eo) for feats_sur_i in feats_surs: feats.extend(feats_sur_i) feats.extend(feats_delta) feats.extend(feats_fe) #feats.extend(feats_line) #feats.extend(feats_sql) feats_list.append(feats) return np.array(feats_list), probs