def create_features(): #Function extracts features from all files print("\nConverting LM data to features...") #prints to user when the function starts up features_list = [] #creates an empty list to hold features from all files for i, (file_name) in enumerate(file_names): #for each smaller .wav file in specified directory print("File " + str(i+1) + "...") #prints to use what file extracting features from full_file_name = source_location + "\\" + \ file_name #get full name of file, inc directory rate, data = wvf.read(full_file_name) #read in file as .wav as data and its sampling rate if audio_processing_choice == "chroma": #if feature extraction choice is 'chroma', data = np.asarray( #process data by flattening data to 1D, taking every [float(datum) for datum in #other value of data, converting each to floats, and data.flatten()[0::2]]) #converting to a numpy array features = chroma_stft(y=data, sr=rate).T #This modified data is passed to chroma function #with sampling rate and result transposed #to give ('# frames' x '12 features') features = np.repeat(features, 3, axis=1) #Append several copies of this horizontally to give #36 features (enough for CNNv3 to work with) elif audio_processing_choice == "cqt": #else if feature extraction choice is 'cqt', data = np.asarray( #process data by flattening data to 1D, taking every [float(datum) for datum in #other value of data, converting each to floats, and data.flatten()[0::2]]) #converting to a numpy array features = chroma_cqt(y=data, sr=rate, #This modified data is passed to cqt function with n_chroma=reduced_dim).T #sampling rate and result transposed to give #('# frames' x 'reduced_dim features') elif audio_processing_choice == "mfcc": #else if feature extraction choice is 'mfcc', features = mfcc(signal=data, samplerate=rate, #pass .wav data directly with sampling rate winlen=frame_time_len, #to 'mfcc' function and result is feature vector as winstep=frame_time_len, #('# frames' x 'reduced_dim features') numcep=reduced_dim, nfilt=reduced_dim*2, nfft= frame_len) elif audio_processing_choice == "fbank": features = fbank(signal=data, samplerate=rate, #else if feature extraction choice is 'fbank', winlen=frame_time_len, #pass .wav data directly with sampling rate winstep=frame_time_len, #to 'fbank' function and result is feature vector as nfilt=reduced_dim, #('# frames' x 'reduced_dim features') nfft=frame_len)[0] #(with only first item from list as this is the numpy #array we're interested in; the other being array #of energies in each frame) else: #else if feature extraction choice is anything else features = logfbank(signal=data, #pass .wav data directly with sampling rate samplerate=rate, #to 'logfbank' function and result is feature vector winlen=frame_time_len, #as ('# frames' x 'reduced_dim features') winstep=frame_time_len, nfilt=reduced_dim, nfft=frame_len) features_list.append(features) #Add the extracted features of current .wav file to #list, and return this list after features return features_list #of all files have been extracted
def beat_synchronous_chroma(song, sr): from librosa.beat import beat_track from librosa.feature import chroma_cqt from librosa.util import sync hop_length = 1024 tempo, beat_frames = beat_track(y=song, sr=sr) chromagram = chroma_cqt(y=song, sr=sr, hop_length=hop_length) return sync(chromagram, beat_frames)
def get_chromagram(y, sr, chroma): """ returns chromagram Parameters ---------- y : number > 0 [scalar] audio sr: number > 0 [scalar] target sampling rate chroma: str chroma-samplerate-framesize-overlap Returns ------- list of chromagrams """ params = get_parameters_chroma(chroma) chroma = params["chroma"] doce_bins_tuned_chroma = None if chroma == 'nnls': doce_bins_tuned_chroma = get_nnls(y, params["sr"], params["fr"], params["off"]) elif chroma == 'cqt': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_cqt(y=y, sr=params["sr"], C=None, hop_length=params["off"], norm=None, # threshold=0.0, window=win, fmin=110, n_chroma=12, n_octaves=4 if params["chroma"] == "cqt" and params["sr"] == 5525 else 5, bins_per_octave=36) elif chroma == 'cens': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_cens(y=y, sr=params["sr"], C=None, hop_length=params["off"], norm=None, window=win, fmin=110, n_chroma=12, n_octaves=5, bins_per_octave=36) elif chroma == 'stft': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_stft(y=y, sr=params["sr"], hop_length=params["off"], norm=None, window=win, n_chroma=12) return doce_bins_tuned_chroma
def compareto(audio, reference): xy, xsr = audio yy, ysr = reference mfccX = feature.mfcc(y=xy, sr=xsr) mfccY = feature.mfcc(y=yy, sr=ysr) chromaX = feature.chroma_cqt(y=xy, sr=xsr) chromaY = feature.chroma_cqt(y=yy, sr=ysr) distances = [] score = 0 D, wp = dtw(mfccX[0], mfccY[0]) score += getscore(wp) * 2 D, wp = dtw(chromaX, chromaY) score += getscore(wp) distances.append(score / 3) return sum(distances) / len(distances)
def encode(self, data: np.ndarray, *args, **kwargs) -> np.ndarray: """ Segments the audio signal of each Chunk into short chroma frames, extracts chromagrams for each frame and concatenates Chunk frame chromagrams into a single Chunk embedding. :param data: a `Batch x Signal Length` ndarray, where `Signal Length` is a number of samples :return: a `Batch x Concatenated Features` ndarray, where `Concatinated Features` is a 12-dimensional feature vector times the number of the chroma frames """ from librosa.feature import chroma_cqt embeds = [] for chunk_data in data: chromagrams = chroma_cqt(y=chunk_data, sr=self.input_sample_rate, n_chroma=12, hop_length=self.hop_length) embeds.append(chromagrams.flatten()) return embeds
def analyse_track(dset, index): """analyse track, extract bpm and distribution of notes from the bass line.""" track = dset[index] mix = track.sum(0).mean(0) ref = mix.std() starts = (abs(mix) >= 1e-2 * ref).float().argmax().item() track = track[..., starts:] cache = CACHE / dset.sig cache.mkdir(exist_ok=True, parents=True) cache_file = cache / f"{index}.pkl" cached = None if cache_file.exists(): cached = try_load(cache_file) if cached is not None: tempo, events, hist_kr = cached if cached is None: drums = track[0].mean(0) if drums.std() > 1e-2 * ref: tempo, events = beat_track(drums.numpy(), units='time', sr=SR) else: print("failed drums", drums.std(), ref) return None, track bass = track[1].mean(0) r = rms(bass) peak = r.max() mask = r >= 0.05 * peak bass = bass[mask] if bass.std() > 1e-2 * ref: kr = torch.from_numpy(chroma_cqt(bass.numpy(), sr=SR)) hist_kr = (kr.max(dim=0, keepdim=True)[0] == kr).float().mean(1) else: print("failed bass", bass.std(), ref) return None, track pickle.dump([tempo, events, hist_kr], open(cache_file, 'wb')) spec = Spec(tempo, events, hist_kr, track, index) return spec, None
def calculate(self, frame): #print("calculating chroma...") y = frame.astype('float32') sr = self.rate mag = np.linalg.norm(y) if mag > .008: chroma = feature.chroma_cqt(y, sr, bins_per_octave=12 * 3) #filtering reduces volume of noise/partials chroma_filtered = np.minimum( chroma, decompose.nn_filter(chroma, aggregate=np.median, metric='cosine')) chroma_smooth = ndimage.median_filter(chroma_filtered, size=(1, 9)) np.place(chroma_smooth, np.isnan(chroma_smooth), [0]) chroma_smooth = np.mean(chroma_smooth, axis=1) else: chroma_smooth = np.array([[0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0], [0]]) self.outputqueue.put_nowait(chroma_smooth) self.signalToOnlineDTW.emit()
import matplotlib.pyplot as plt sampFile = os.listdir("data/Hindustani/mp3/Bhairav") my_file = f"data/Hindustani/mp3/Bhairav/{sampFile[0]}" y, sr = load(my_file) D = np.abs(stft(y)) specshow(amplitude_to_db(D, ref=np.max), y_axis='log', x_axis='time') plt.title("Power Spectrogram") plt.colorbar(format='%+2.0f dB') plt.tight_layout() chroma_cq = chroma_cqt(y=y, sr=sr) specshow(chroma_cq, y_axis='chroma', x_axis='time') plt.title("Chromagram Constant Q Transform") plt.colorbar() plt.tight_layout() tonnetz = tonnetz(y=y, sr=sr) specshow(tonnetz, y_axis='tonnetz') plt.title("Tonnetz Example") plt.colorbar() plt.tight_layout() ms = melspectrogram(y=y, sr=sr) specshow(power_to_db(ms, ref=np.max), y_axis='mel', fmax=8000, x_axis='time') plt.title("Mel Spectrogram Example") plt.colorbar(format="%+2.0f dB")
def chroma_cqt(args): sig = get_sig(args) fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap']) hopsize = nfft - noverlap return rosaft.chroma_cqt(y=sig, sr=fs, hop_length=hopsize)
#dat_24 = np.load("/media/wuyiming/TOSHIBA EXT/midihcqt_24/000005.npz") #spec_dnn = U.Embed(U.PreprocessSpec(dat_24["spec"]),size=7) spec = spec[:, :250, :] spec_dnn = spec_dnn[:250, :] cnn = networks.FullCNNFeatExtractor() cnn.load("fullcnn_crossentropy_6000.model") deepchroma = networks.FeatureDNN() deepchroma.load( "/home/wuyiming/Projects/TranscriptionChordRecognition/dnn3500.model") chroma_cnn = cnn.GetFeature(spec).data[:, 12:24].T chroma_dnn = deepchroma.GetFeature(spec_dnn).data[:, 12:24].T chroma = np.log( 1 + chroma_cqt(wav, sr=C.SR, hop_length=C.H, bins_per_octave=24)[:, :250]) target = chromatemplate.GetConvnetTargetFromPianoroll( U.GetPianoroll( "/media/wuyiming/TOSHIBA EXT/AIST.RWC-MDB-P-2001.SMF_SYNC/RM-P051.SMF_SYNC.MID" )) target = target[10:260, 12:24].T plt.subplot(4, 1, 1) specshow(chroma, y_axis="chroma") plt.ylabel("(a)") plt.subplot(4, 1, 2) specshow(chroma_dnn, y_axis="chroma") plt.ylabel("(b)") plt.subplot(4, 1, 3) specshow(chroma_cnn, y_axis="chroma")
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]): """ extracts features with help of librosa :param soundwave: extracted soundwave from file :param sampling_rate: sampling rate :param feature_list: list of features to compute :param sound_name: type of sound, i.e. dog :return: np.array of all features for the soundwave """ print("Computing features for ",sound_name) if len(feature_list)==0: feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram", "mfcc","rmse","spectral_centroid","spectral_bandwidth", "spectral_contrast","spectral_flatness","spectral_rolloff", "poly_features","tonnetz","zero_crossing_rate"] features=[] #feature_len #"chroma_stft":12 if "chroma_stft" in feature_list: features.append(feat.chroma_stft(soundwave, sampling_rate)) #"chroma_cqt":12 if "chroma_cqt" in feature_list: features.append(feat.chroma_cqt(soundwave, sampling_rate)) #"chroma_cens":12 if "chroma_cens" in feature_list: features.append(feat.chroma_cens(soundwave, sampling_rate)) #"malspectrogram":128 if "melspectrogram" in feature_list: features.append(feat.melspectrogram(soundwave, sampling_rate)) #"mfcc":20 if "mfcc" in feature_list: features.append(feat.mfcc(soundwave, sampling_rate)) #"rmse":1 if "rmse" in feature_list: features.append(feat.rmse(soundwave)) #"spectral_centroid":1 if "spectral_centroid" in feature_list: features.append(feat.spectral_centroid(soundwave, sampling_rate)) #"spectral_bandwidth":1 if "spectral_bandwidth" in feature_list: features.append(feat.spectral_bandwidth(soundwave, sampling_rate)) #"spectral_contrast":7 if "spectral_contrast" in feature_list: features.append(feat.spectral_contrast(soundwave, sampling_rate)) #"spectral_flatness":1 if "spectral_flatness" in feature_list: features.append(feat.spectral_flatness(soundwave)) #"spectral_rolloff":1 if "spectral_rolloff" in feature_list: features.append(feat.spectral_rolloff(soundwave, sampling_rate)) #"poly_features":2 if "poly_features" in feature_list: features.append(feat.poly_features(soundwave, sampling_rate)) #"tonnetz":6 if "tonnetz" in feature_list: features.append(feat.tonnetz(soundwave, sampling_rate)) #"zero_crossing_rate":1 if "zero_crossing_rate" in feature_list: features.append(feat.zero_crossing_rate(soundwave)) return np.concatenate(features)
train_data = np.array([]) counter = 0 train_data = np.array([]) for chunk in train_data_reader: #print(chunk) chunk1 = np.array(chunk) for thing in chunk1: print(counter) thing1 = np.array(thing) #print(thing1) row = np.array([]) cstft = np.mean(lf.chroma_stft(thing1[:-1]).T, axis=0) row = np.concatenate((row, cstft)) cqt = np.mean(lf.chroma_cqt(thing1[:-1]).T, axis=0) row = np.concatenate((row, cqt)) sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0) row = np.concatenate((row, sens)) spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0) row = np.concatenate((row, spcent)) flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0) row = np.concatenate((row, flatness)) rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0) row = np.concatenate((row, rolloff)) mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0) row = np.concatenate((row, mspec)) mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0) row = np.concatenate((row, mfcc)) tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0) row = np.concatenate((row, tonnetz))