Python audio_read Examples, audio_read.audio_read Python Examples

Example #1

0

Show file

 def illustrate_match(self, analyzer, ht, filename):
     """ Show the query fingerprints and the matching ones
         plotted over a spectrogram """
     # Make the spectrogram
     # d, sr = librosa.load(filename, sr=analyzer.target_sr)
     d, sr = audio_read.audio_read(filename,
                                   sr=analyzer.target_sr,
                                   channels=1)
     sgram = np.abs(
         stft.stft(d,
                   n_fft=analyzer.n_fft,
                   hop_length=analyzer.n_hop,
                   window=np.hanning(analyzer.n_fft + 2)[1:-1]))
     sgram = 20.0 * np.log10(np.maximum(sgram, np.max(sgram) / 1e6))
     sgram = sgram - np.mean(sgram)
     # High-pass filter onset emphasis
     # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits
     # spectrogram enhancement
     if self.illustrate_hpf:
         HPF_POLE = 0.98
         sgram = np.array([
             scipy.signal.lfilter([1, -1], [1, -HPF_POLE], s_row)
             for s_row in sgram
         ])[:-1, ]
     sgram = sgram - np.max(sgram)
     librosa.display.specshow(sgram,
                              sr=sr,
                              hop_length=analyzer.n_hop,
                              y_axis='linear',
                              x_axis='time',
                              cmap='gray_r',
                              vmin=-80.0,
                              vmax=0)
     # Do the match?
     q_hashes = analyzer.wavfile2hashes(filename)
     # Run query, get back the hashes for match zero
     results, matchhashes = self.match_hashes(ht, q_hashes, hashesfor=0)
     if self.sort_by_time:
         results = sorted(results, key=lambda x: -x[2])
     # Convert the hashes to landmarks
     lms = audfprint_analyze.hashes2landmarks(q_hashes)
     mlms = audfprint_analyze.hashes2landmarks(matchhashes)
     # Overplot on the spectrogram
     plt.plot(
         np.array([[x[0], x[0] + x[3]] for x in lms]).T,
         np.array([[x[1], x[2]] for x in lms]).T, '.-g')
     plt.plot(
         np.array([[x[0], x[0] + x[3]] for x in mlms]).T,
         np.array([[x[1], x[2]] for x in mlms]).T, '.-r')
     # Add title
     plt.title("Matched as " +
               ht.names[results[0][0]].split("/")[1].split(".")[0])
     # Display
     plt.savefig("./src/static/sgram" + uuid.uuid4().hex + ".png",
                 bbox_inces="tight")
     # plt.show()
     # Return
     return results

Example #2

0

Show file

File: process_data.py Project: QuYingKun/vid2speech-new-

def process_audio(af, auddata, audctr):
	# audio processing
	(y,sr) = ar.audio_read(af,sr=SR)
	win_length = SPF
	hop_length = int(SPF*OVERLAP)
	[a,g,e] = aud.lpc_analysis(y,LPC_ORDER,window_step=hop_length,window_size=win_length)
	lsf = aud.lpc_to_lsf(a)
	lsf = lsf[int((MARGIN)*int(1/OVERLAP)):int((SAMPLE_LEN+MARGIN)*int(1/OVERLAP)),:]
	lsf_concat = np.concatenate((lsf[::2,:],lsf[1::2,:]),axis=1) # MAGIC NUMBERS for half overlap
	g = g[int((MARGIN)*int(1/OVERLAP)):int((SAMPLE_LEN+MARGIN)*int(1/OVERLAP)),:]		
	g_concat = np.concatenate((g[::2,:],g[1::2,:]),axis=1) # MAGIC NUMBERS for half overlap
	feat = np.concatenate((lsf_concat,g_concat),axis=1)
	auddata[audctr:int(audctr+SAMPLE_LEN),:] = feat
	audctr = int(audctr+SAMPLE_LEN)
	return audctr

Example #3

0

Show file

File: audfprint_match.py Project: wanjinchang/audfprint

 def illustrate_match(self, analyzer, ht, filename):
     """ Show the query fingerprints and the matching ones
         plotted over a spectrogram """
     # Make the spectrogram
     #d, sr = librosa.load(filename, sr=analyzer.target_sr)
     d, sr = audio_read.audio_read(filename, sr=analyzer.target_sr, channels=1)
     sgram = np.abs(librosa.stft(d, n_fft=analyzer.n_fft,
                                 hop_length=analyzer.n_hop,
                                 window=np.hanning(analyzer.n_fft+2)[1:-1]))
     sgram = 20.0*np.log10(np.maximum(sgram, np.max(sgram)/1e6))
     sgram = sgram - np.mean(sgram)
     # High-pass filter onset emphasis
     # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits
     # spectrogram enhancement
     if self.illustrate_hpf:
         HPF_POLE = 0.98
         sgram = np.array([scipy.signal.lfilter([1, -1],
                                                [1, -HPF_POLE], s_row)
                           for s_row in sgram])[:-1,]
     sgram = sgram - np.max(sgram)
     librosa.display.specshow(sgram, sr=sr, hop_length=analyzer.n_hop,
                              y_axis='linear', x_axis='time',
                              cmap='gray_r', vmin=-80.0, vmax=0)
     # Do the match?
     q_hashes = analyzer.wavfile2hashes(filename)
     # Run query, get back the hashes for match zero
     results, matchhashes = self.match_hashes(ht, q_hashes, hashesfor=0)
     if self.sort_by_time:
         results = sorted(results, key=lambda x: -x[2])
     # Convert the hashes to landmarks
     lms = audfprint_analyze.hashes2landmarks(q_hashes)
     mlms = audfprint_analyze.hashes2landmarks(matchhashes)
     # Overplot on the spectrogram
     plt.plot(np.array([[x[0], x[0]+x[3]] for x in lms]).T,
              np.array([[x[1], x[2]] for x in lms]).T,
              '.-g')
     plt.plot(np.array([[x[0], x[0]+x[3]] for x in mlms]).T,
              np.array([[x[1], x[2]] for x in mlms]).T,
              '.-r')
     # Add title
     plt.title(filename + " : Matched as " + ht.names[results[0][0]]
               + (" with %d of %d hashes" % (len(matchhashes),
                                             len(q_hashes))))
     # Display
     plt.show()
     # Return
     return results

Example #4

0

Show file

File: audfprint_analyze.py Project: mthormati/searchproj

    def wavfile2peaks(self, filename, shifts=None, return_spectrogram=False):
        """ Read a soundfile and return its landmark peaks as a
            list of (time, bin) pairs.  If specified, resample to sr first.
            shifts > 1 causes hashes to be extracted from multiple shifts of
            waveform, to reduce frame effects.  """
        ext = os.path.splitext(filename)[1]
        if ext == PRECOMPPKEXT:
            # short-circuit - precomputed fingerprint file
            peaks = peaks_load(filename)
            dur = np.max(peaks, axis=0)[0] * self.n_hop / self.target_sr
        else:
            try:
                # [d, sr] = librosa.load(filename, sr=self.target_sr)
                d, sr = audio_read.audio_read(filename, sr=self.target_sr, channels=1)
            except Exception as e:  # audioread.NoBackendError:
                message = "wavfile2peaks: Error reading " + filename
                if self.fail_on_error:
                    print(e)
                    raise IOError(message)
                print(message, "skipping")
                d = []
                sr = self.target_sr
            # Store duration in a global because it's hard to handle
            dur = len(d) / sr
            if shifts is None or shifts < 2:
                if return_spectrogram:
                    peaks, sgram = self.find_peaks(d, sr, return_spectrogram=return_spectrogram)
                else:
                    peaks = self.find_peaks(d, sr, return_spectrogram=return_spectrogram)
            else:
                # Calculate hashes with optional part-frame shifts
                peaklists = []
                for shift in range(shifts):
                    shiftsamps = int(shift / self.shifts * self.n_hop)
                    peaklists.append(self.find_peaks(d[shiftsamps:], sr))
                peaks = peaklists

        # instrumentation to track total amount of sound processed
        self.soundfiledur = dur
        self.soundfiletotaldur += dur
        self.soundfilecount += 1
        if return_spectrogram:
            return peaks, sgram
        else:
            return peaks

Example #5

0

Show file

File: audfprint_analyze.py Project: dpwe/audfprint

    def wavfile2peaks(self, filename, shifts=None):
        """ Read a soundfile and return its landmark peaks as a
            list of (time, bin) pairs.  If specified, resample to sr first.
            shifts > 1 causes hashes to be extracted from multiple shifts of
            waveform, to reduce frame effects.  """
        ext = os.path.splitext(filename)[1]
        if ext == PRECOMPPKEXT:
            # short-circuit - precomputed fingerprint file
            peaks = peaks_load(filename)
            dur = np.max(peaks, axis=0)[0] * self.n_hop / self.target_sr
        else:
            try:
                # [d, sr] = librosa.load(filename, sr=self.target_sr)
                d, sr = audio_read.audio_read(filename, sr=self.target_sr, channels=1)
            except Exception as e:  # audioread.NoBackendError:
                message = "wavfile2peaks: Error reading " + filename
                if self.fail_on_error:
                    print(e)
                    raise IOError(message)
                print(message, "skipping")
                d = []
                sr = self.target_sr
            # Store duration in a global because it's hard to handle
            dur = len(d) / sr
            if shifts is None or shifts < 2:
                peaks = self.find_peaks(d, sr)
            else:
                # Calculate hashes with optional part-frame shifts
                peaklists = []
                for shift in range(shifts):
                    shiftsamps = int(shift / self.shifts * self.n_hop)
                    peaklists.append(self.find_peaks(d[shiftsamps:], sr))
                peaks = peaklists

        # instrumentation to track total amount of sound processed
        self.soundfiledur = dur
        self.soundfiletotaldur += dur
        self.soundfilecount += 1
        return peaks

Example #6

0

Show file

File: audfprint_analyze.py Project: h2597650/Crawler

    def wavfile2samples(self, filename, label=True, subsample=None, subratio=None):
        landmarks = self.peaks2landmarks(self.wavfile2peaks(filename))
        d, sr = audio_read.audio_read(filename, sr=self.target_sr, channels=1)
        peaks,sgram,sgramo = self.find_peaks_sgram(d, sr)
        if subsample and subsample<len(landmarks):
            if subratio:
                subsample = int(len(landmarks)*subratio)
            index = np.random.choice(len(landmarks), subsample, replace=False)
            landmarks = [ landmarks[idx] for idx in index]
        lms_map = {}
        for lm in landmarks:
            lms_map[lm] = 0.0

        # probs
        probs = np.zeros((len(landmarks),1))
        if label:
            test_cnt = 0.0
            # move a slide
            peaklist = self.wavfile2peaks(filename, 40)
            peaklist = peaklist[5:35]
            # test with wgn
            for db in range(40,121):
                test_d = wgn(d, db/3.0)
                peaklist.append(self.find_peaks(test_d, sr))
            for idx in range(len(peaklist)):
                test_cnt += 1.0
                lms_test = self.peaks2landmarks(peaklist[idx])
                for (t1,f1,f2,dt) in lms_test:
                    for t in range(t1,t1+1):
                        key = (t,f1,f2,dt)
                        if key in lms_map:
                            lms_map[key] += 1.0
                            break
            for idx, key in enumerate(landmarks):
                probs[idx] = lms_map[key] / test_cnt
        # features
        feats_list = []
        (Freq,Time) = np.shape(sgram)
        for idx in range(len(landmarks)):
            (Freq,Time) = (float(Freq),float(Time))
            (t1,f1,f2,dt) = landmarks[idx]
            t2 = t1 + dt
            # make sure f1 < f2
            if f1 > f2 or (f1==f2 and t1 > t2):
                f1,f2 = f2,f1
                t1,t2 = t2,t1
            feats_1 = [t1, t2, f1, f2, t2-t1, f2-f1]
            # ratio
            feats_2 = [t1/Time, t2/Time, f1/Freq, f2/Freq, (t2-t1)/Time, (f2-f1)/Freq]
            # distance
            dist = [math.sqrt(feats_1[4]**2+feats_2[5]**2), math.sqrt(feats_2[4]**2+feats_2[5]**2)]
            # energy
            feats_e = [sgram[f1][t1], sgram[f2][t2]]
            feats_e.extend([feats_e[0]+feats_e[1], feats_e[0]*feats_e[1]])
            feats_e.extend([(feats_e[1]-feats_e[2]), (feats_e[1]-feats_e[2])/dist[0], (feats_e[1]-feats_e[2])/dist[1]])
            feats_eo = [sgramo[f1][t1], sgramo[f2][t2]]
            feats_eo.extend([feats_eo[0]+feats_eo[1], feats_eo[0]*feats_eo[1]])
            feats_eo.extend([(feats_eo[1]-feats_eo[2]), (feats_eo[1]-feats_eo[2])/dist[0], (feats_eo[1]-feats_eo[2])/dist[1]])
            # distance
            dist += [math.sqrt(feats_1[4]**2+feats_2[5]**2+(feats_eo[0]-feats_eo[1])**2)]
            dist += [math.sqrt(feats_2[4]**2+feats_2[5]**2+(feats_e[0]-feats_e[1])**2)]
            # engery surrounding
            locs = [(-1,1),(0,1),(1,1),(-1,0),(1,0),(-1,-1),(0,-1),(1,-1)]
            poss = [(-1,1),(0,1),(1,1),(1,0)]
            feats_surs = []
            sgrams = [sgram,sgramo]
            fts = [(f1, t1), (f2, t2)]
            for (fi,ti) in fts:
                for sgrami in sgrams:
                    '''
                    sq_i = squares(sgrami, fi, ti, 2)
                    feats_sur_i = np.concatenate(sq_i.tolist()).tolist()
                    feats_sur_i.extend([sq_i[2*loc[0]][2*loc[1]]-2*sq_i[loc[0]][loc[1]] for loc in locs])
                    feats_sur_i.extend([curvature(sq_i,pos) for pos in poss])
                    '''
                    sq_i = squares(sgrami, fi, ti, 1)
                    feats_sur_i = np.concatenate(sq_i.tolist()).tolist()
                    feats_sur_i.extend([curvature(sq_i,pos) for pos in poss])
                    feats_surs.append(feats_sur_i)
            # delta E / delta x
            feats_delta = [ (feats_e[1]-feats_e[0])/(f2-f1+0.1), (feats_e[1]-feats_e[0])/(t2-t1+0.1), (t2-t1)/(f2-f1+0.1) ]
            feats_delta.extend([ (feats_eo[1]-feats_eo[0])/(f2-f1+0.1), (feats_eo[1]-feats_eo[0])/(t2-t1+0.1) ])
            # Freq*Energy
            feats_fe_1 = [feats_eo[0]*f1, feats_eo[0]*math.log1p(f1), math.log1p(feats_eo[0])*f1]
            feats_fe_2 = [feats_eo[1]*f2, feats_eo[1]*math.log1p(f2), math.log1p(feats_eo[1])*f2]
            feats_fe_12 = (np.array(feats_fe_1)*np.array(feats_fe_2)).tolist()
            feats_fe = feats_fe_1 + feats_fe_2 + feats_fe_12
            # line points
            '''
            line = [ ( f1+(f2-f1)*i/10.0, t1+(t2-t1)*i/10.0 ) for i in range(1,10)]
            line_values = [ value_at(sgram,p[0],p[1]) for p in line]
            line_valueso = [ value_at(sgramo,p[0],p[1]) for p in line]
            feats_line = line_values + line_valueso + [np.mean(line_values),np.std(line_values),np.mean(line_valueso),np.std(line_valueso)]
            '''
            # square points
            '''
            square_line = []
            for i in range(1,6):
                for j in range(1,6):
                    square_line.append( (f1+(f2-f1)*i/6.0, t1+(t2-t1)*i/6.0) )
            sql_values = [ value_at(sgram,p[0],p[1]) for p in square_line]
            sql_valueso = [ value_at(sgramo,p[0],p[1]) for p in square_line]
            feats_sql = sql_values + sql_valueso + [np.mean(sql_values),np.std(sql_values),np.mean(sql_valueso),np.std(sql_valueso)]
            '''
            # append to feats
            feats = [Time,Freq] 
            feats.extend(feats_1)
            feats.extend(feats_2)
            feats.extend(dist)
            feats.extend(feats_e)
            feats.extend(feats_eo)
            for feats_sur_i in feats_surs:
                feats.extend(feats_sur_i)
            feats.extend(feats_delta)
            feats.extend(feats_fe)
            #feats.extend(feats_line)
            #feats.extend(feats_sql)
            feats_list.append(feats)
        return np.array(feats_list), probs