def features_gtzan(filename, directory=""): # Calculate spectrogram (normalizes wavfile) converter = spec.Wav2Spectrogram() s = converter.convert(open(directory + filename), window_length=2048, dft_length=2048, window_step=1024, spectrum_type='magnitude', save_metadata=True) # Extract low-level features, derivatives, and run texture windows d = diff.Diff() features = (cent.Centroid(), roll.Rolloff(), flat.Flatness(), flux.Flux(), mfcc.Mfcc()) all_feats = None for f in features: track = f.calc_track(s) # Feature track all_feats = join.Join().join([all_feats, track]) dtrack = d.calc_track(track) # Differentiate all_feats = join.Join().join([all_feats, dtrack]) ddtrack = d.calc_track(dtrack) # Differentiate again all_feats = join.Join().join([all_feats, ddtrack]) # Texture window t = tex.ToTextureWindow().to_texture(all_feats, 40) # Statistics s = stats.Stats() d = s.stats([t], mean=True, variance=True) return d
plt.ylabel('Energia (dB)') plt.show() display.Audio(fname) import mir3.modules.features.flux as flux fname = 'audio/tabla.wav' wav2spec = spec.Wav2Spectrogram( ) # Objeto que converte arquivos wav para espectrogramas s = wav2spec.convert(open(fname, 'rb'), window_length=1024, window_step=512, spectrum_type='magnitude') fx = flux.Flux() f = fx.calc_track(s) T = f.metadata.sampling_configuration.ofs t = np.linspace(0, len(f.data) / float(T), len(f.data)) f.data += 10**(-2) # Isto evita divisoes por zero ao calcular o log10 a seguir plt.figure(figsize=(10, 6)) plt.plot(t, np.log10(f.data / np.max(f.data))) plt.xlabel('Tempo (s)') plt.ylabel('Fluxo espectral (dB)') plt.show() display.Audio(fname) ##Vemos um fenômeno interessante, desta vez. As batidas principais
def calculate_features_per_band(self, frequency_band, also_one_band=False, discard_bin_zero=False): """ :param frequency_band: FrequencyBand :param also_one_band: boolean :param discard_bin_zero: boolean :return: list[FeatureTrack] """ flatness = feat_flat.Flatness() energy = feat_energy.Energy() flux = feat_flux.Flux() centroid = feat_centroid.Centroid() rolloff = feat_rolloff.Rolloff() lowenergy = feat_lowenergy.LowEnergy() bands = [b for b in frequency_band.bands()] if also_one_band: bands.append((int(frequency_band.low), int(frequency_band.high))) for b in bands: lowbin = self.spectrogram.freq_bin(b[0]) if lowbin == 0: if discard_bin_zero: lowbin = 1 highbin = self.spectrogram.freq_bin(b[1]) #print "calculating features for band in bin range: ", lowbin, highbin features = [] flatness_feature = flatness.calc_track_band( self.spectrogram, lowbin, highbin) flatness_feature.metadata.feature += ("_" + str(b[0])) + ( "_" + str(b[1])) features.append(flatness_feature) energy_feature = energy.calc_track_band(self.spectrogram, lowbin, highbin) energy_feature.metadata.feature += ("_" + str(b[0])) + ("_" + str(b[1])) features.append(energy_feature) flux_feature = flux.calc_track_band(self.spectrogram, lowbin, highbin) flux_feature.metadata.feature += ("_" + str(b[0])) + ("_" + str(b[1])) features.append(flux_feature) centroid_feature = centroid.calc_track_band( self.spectrogram, lowbin, highbin) centroid_feature.metadata.feature += ("_" + str(b[0])) + ( "_" + str(b[1])) features.append(centroid_feature) rolloff_feature = rolloff.calc_track_band(self.spectrogram, lowbin, highbin) rolloff_feature.metadata.feature += ("_" + str(b[0])) + ("_" + str(b[1])) features.append(rolloff_feature) lowenergy_feature = lowenergy.calc_track_band( self.spectrogram, 10, lowbin, highbin) lowenergy_feature.metadata.feature += ("_" + str(b[0])) + ( "_" + str(b[1])) features.append(lowenergy_feature) self.features_per_band = len(features) self.band_features = np.hstack((self.band_features, features)) #MFCC hack t = track.FeatureTrack() t.data = mfcc.mfcc(self.spectrogram, 13) t.metadata.sampling_configuration = self.spectrogram.metadata.sampling_configuration feature = "" for i in range(13): feature = feature + "MFCC_" + str(i) + " " t.metadata.feature = feature t.metadata.filename = self.spectrogram.metadata.input.name self.band_features = np.hstack((self.band_features, t)) #Zero crossings t = track.FeatureTrack() t.data = tdomf.zero_crossings(self.audio_data, 1024, 512) t.metadata.sampling_configuration.fs = self.samplingrate t.metadata.sampling_configuration.ofs = self.samplingrate / 1024 t.metadata.sampling_configuration.window_length = 512 t.metadata.feature = "TDZeroCrossings" t.metadata.filename = self.spectrogram.metadata.input.name self.band_features = np.hstack((self.band_features, t))
f = fness.calc_track(s) f1 = [np.average(f.data)] flat_rock.append(f1) centr = cent.Centroid() centroid = centr.calc_track(s) centroid1 = [np.average(centroid.data)] cent_rock.append(centroid1) # roff = roll.Rolloff() # roll_off = roff.calc_track(s) # roll_off1 = [np.average(roll_off)] # rolloff_rock.append(roll_off1) en = energ.Energy() energy = en.calc_track(s) energy1 = [np.average(energy.data)] energy_rock.append(energy1) fl = specfl.Flux() flux = fl.calc_track(s) flux1 = [np.average(flux.data)] sflux_rock.append(flux1) aux = [np.average(f.data)*np.average(centroid.data)*np.average(energy.data)*np.average(flux.data)] cent_en_rock.append(aux) flat_pop = [[]] cent_pop = [[]] rolloff_pop = [[]] energy_pop = [[]] sflux_pop = [[]] cent_en_pop = [[]] for fname in pop: wav2spec = spectrogram.Wav2Spectrogram() # Objeto que converte arquivos wav para espectrogramas s = wav2spec.convert(open(fname, 'rb'), window_length=1024, window_step=512, spectrum_type='magnitude')