Ejemplo n.º 1
0
def create_features():                                      #Function extracts features from all files
    print("\nConverting LM data to features...")            #prints to user when the function starts up
    features_list = []                                      #creates an empty list to hold features from all files
    for i, (file_name) in enumerate(file_names):            #for each smaller .wav file in specified directory
        print("File " + str(i+1) + "...")                   #prints to use what file extracting features from
        full_file_name = source_location + "\\" + \
                         file_name                          #get full name of file, inc directory
        rate, data = wvf.read(full_file_name)               #read in file as .wav as data and its sampling rate

        if audio_processing_choice == "chroma":             #if feature extraction choice is 'chroma',
            data = np.asarray(                              #process data by flattening data to 1D, taking every
                [float(datum) for datum in                  #other value of data, converting each to floats, and
                 data.flatten()[0::2]])                     #converting to a numpy array
            features = chroma_stft(y=data, sr=rate).T       #This modified data is passed to chroma function
                                                            #with sampling rate and result transposed
                                                            #to give ('# frames' x '12 features')
            features = np.repeat(features, 3, axis=1)       #Append several copies of this horizontally to give
                                                            #36 features (enough for CNNv3 to work with)

        elif audio_processing_choice == "cqt":              #else if feature extraction choice is 'cqt',
            data = np.asarray(                              #process data by flattening data to 1D, taking every
                [float(datum) for datum in                  #other value of data, converting each to floats, and
                 data.flatten()[0::2]])                     #converting to a numpy array
            features = chroma_cqt(y=data, sr=rate,          #This modified data is passed to cqt function with
                                  n_chroma=reduced_dim).T   #sampling rate and result transposed to give
                                                            #('# frames' x 'reduced_dim features')

        elif audio_processing_choice == "mfcc":             #else if feature extraction choice is 'mfcc',
            features = mfcc(signal=data, samplerate=rate,   #pass .wav data directly with sampling rate
                            winlen=frame_time_len,          #to 'mfcc' function and result is feature vector as
                            winstep=frame_time_len,         #('# frames' x 'reduced_dim features')
                            numcep=reduced_dim,
                            nfilt=reduced_dim*2,
                            nfft= frame_len)

        elif audio_processing_choice == "fbank":
            features = fbank(signal=data, samplerate=rate,  #else if feature extraction choice is 'fbank',
                             winlen=frame_time_len,         #pass .wav data directly with sampling rate
                             winstep=frame_time_len,        #to 'fbank' function and result is feature vector as
                             nfilt=reduced_dim,             #('# frames' x 'reduced_dim features')
                             nfft=frame_len)[0]             #(with only first item from list as this is the numpy
                                                            #array we're interested in; the other being array
                                                            #of energies in each frame)

        else:                                               #else if feature extraction choice is anything else
            features = logfbank(signal=data,                #pass .wav data directly with sampling rate
                                samplerate=rate,            #to 'logfbank' function and result is feature vector
                                winlen=frame_time_len,      #as ('# frames' x 'reduced_dim features')
                                winstep=frame_time_len,
                                nfilt=reduced_dim,
                                nfft=frame_len)
        features_list.append(features)                      #Add the extracted features of current .wav file to
                                                            #list, and return this list after features
    return features_list                                    #of all files have been extracted
Ejemplo n.º 2
0
def beat_synchronous_chroma(song, sr):
    from librosa.beat import beat_track
    from librosa.feature import chroma_cqt
    from librosa.util import sync

    hop_length = 1024

    tempo, beat_frames = beat_track(y=song, sr=sr)

    chromagram = chroma_cqt(y=song, sr=sr, hop_length=hop_length)

    return sync(chromagram, beat_frames)
Ejemplo n.º 3
0
def get_chromagram(y, sr, chroma):
	"""
		returns chromagram

		Parameters
		----------
		y : number > 0 [scalar]
			audio

		sr: number > 0 [scalar]
			target sampling rate

		chroma: str 
		    chroma-samplerate-framesize-overlap

		
		Returns
		-------
		list of chromagrams	
	"""
	params = get_parameters_chroma(chroma)
	chroma = params["chroma"]
	doce_bins_tuned_chroma = None
	if chroma == 'nnls':
		doce_bins_tuned_chroma = get_nnls(y, params["sr"], params["fr"], params["off"])
	elif chroma == 'cqt':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_cqt(y=y, sr=params["sr"],
		                                    C=None,
		                                    hop_length=params["off"],
		                                    norm=None,
		                                    # threshold=0.0,
		                                    window=win,
		                                    fmin=110,
		                                    n_chroma=12,
		                                    n_octaves=4 if params["chroma"] == "cqt" and params["sr"] == 5525 else 5,
		                                    bins_per_octave=36)
	elif chroma == 'cens':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_cens(y=y, sr=params["sr"],
		                                     C=None,
		                                     hop_length=params["off"],
		                                     norm=None,
		                                     window=win,
		                                     fmin=110,
		                                     n_chroma=12,
		                                     n_octaves=5,
		                                     bins_per_octave=36)
	elif chroma == 'stft':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_stft(y=y, sr=params["sr"], hop_length=params["off"], norm=None, window=win,
		                                     n_chroma=12)
	return doce_bins_tuned_chroma
Ejemplo n.º 4
0
def compareto(audio, reference):
    xy, xsr = audio
    yy, ysr = reference
    
    mfccX = feature.mfcc(y=xy, sr=xsr)
    mfccY = feature.mfcc(y=yy, sr=ysr) 
    
    chromaX = feature.chroma_cqt(y=xy, sr=xsr)
    chromaY = feature.chroma_cqt(y=yy, sr=ysr) 
    
    distances = []
    score = 0
    
    D, wp = dtw(mfccX[0], mfccY[0])
    score += getscore(wp) * 2
    
    D, wp = dtw(chromaX, chromaY)
    score += getscore(wp)
    
    distances.append(score / 3)
    
    return sum(distances) / len(distances)
Ejemplo n.º 5
0
    def encode(self, data: np.ndarray, *args, **kwargs) -> np.ndarray:
        """
        Segments the audio signal of each Chunk into short chroma frames, extracts chromagrams for each frame and
        concatenates Chunk frame chromagrams into a single Chunk embedding.

        :param data: a `Batch x Signal Length` ndarray, where `Signal Length` is a number of samples
        :return: a `Batch x Concatenated Features` ndarray, where `Concatinated Features` is a 12-dimensional feature
        vector times the number of the chroma frames
        """
        from librosa.feature import chroma_cqt
        embeds = []
        for chunk_data in data:
            chromagrams = chroma_cqt(y=chunk_data,
                                     sr=self.input_sample_rate,
                                     n_chroma=12,
                                     hop_length=self.hop_length)
            embeds.append(chromagrams.flatten())
        return embeds
Ejemplo n.º 6
0
def analyse_track(dset, index):
    """analyse track, extract bpm and distribution of notes from the bass line."""
    track = dset[index]
    mix = track.sum(0).mean(0)
    ref = mix.std()

    starts = (abs(mix) >= 1e-2 * ref).float().argmax().item()
    track = track[..., starts:]

    cache = CACHE / dset.sig
    cache.mkdir(exist_ok=True, parents=True)

    cache_file = cache / f"{index}.pkl"
    cached = None
    if cache_file.exists():
        cached = try_load(cache_file)
        if cached is not None:
            tempo, events, hist_kr = cached

    if cached is None:
        drums = track[0].mean(0)
        if drums.std() > 1e-2 * ref:
            tempo, events = beat_track(drums.numpy(), units='time', sr=SR)
        else:
            print("failed drums", drums.std(), ref)
            return None, track

        bass = track[1].mean(0)
        r = rms(bass)
        peak = r.max()
        mask = r >= 0.05 * peak
        bass = bass[mask]
        if bass.std() > 1e-2 * ref:
            kr = torch.from_numpy(chroma_cqt(bass.numpy(), sr=SR))
            hist_kr = (kr.max(dim=0, keepdim=True)[0] == kr).float().mean(1)
        else:
            print("failed bass", bass.std(), ref)
            return None, track

    pickle.dump([tempo, events, hist_kr], open(cache_file, 'wb'))
    spec = Spec(tempo, events, hist_kr, track, index)
    return spec, None
Ejemplo n.º 7
0
    def calculate(self, frame):

        #print("calculating chroma...")
        y = frame.astype('float32')
        sr = self.rate
        mag = np.linalg.norm(y)
        if mag > .008:
            chroma = feature.chroma_cqt(y, sr, bins_per_octave=12 * 3)
            #filtering reduces volume of noise/partials
            chroma_filtered = np.minimum(
                chroma,
                decompose.nn_filter(chroma,
                                    aggregate=np.median,
                                    metric='cosine'))
            chroma_smooth = ndimage.median_filter(chroma_filtered, size=(1, 9))
            np.place(chroma_smooth, np.isnan(chroma_smooth), [0])
            chroma_smooth = np.mean(chroma_smooth, axis=1)
        else:
            chroma_smooth = np.array([[0], [0], [0], [0], [0], [0], [0], [0],
                                      [0], [0], [0], [0]])
        self.outputqueue.put_nowait(chroma_smooth)
        self.signalToOnlineDTW.emit()
Ejemplo n.º 8
0
import matplotlib.pyplot as plt

sampFile = os.listdir("data/Hindustani/mp3/Bhairav")

my_file = f"data/Hindustani/mp3/Bhairav/{sampFile[0]}"

y, sr = load(my_file)

D = np.abs(stft(y))

specshow(amplitude_to_db(D, ref=np.max), y_axis='log', x_axis='time')
plt.title("Power Spectrogram")
plt.colorbar(format='%+2.0f dB')
plt.tight_layout()

chroma_cq = chroma_cqt(y=y, sr=sr)
specshow(chroma_cq, y_axis='chroma', x_axis='time')
plt.title("Chromagram Constant Q Transform")
plt.colorbar()
plt.tight_layout()

tonnetz = tonnetz(y=y, sr=sr)
specshow(tonnetz, y_axis='tonnetz')
plt.title("Tonnetz Example")
plt.colorbar()
plt.tight_layout()

ms = melspectrogram(y=y, sr=sr)
specshow(power_to_db(ms, ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
plt.title("Mel Spectrogram Example")
plt.colorbar(format="%+2.0f dB")
Ejemplo n.º 9
0
def chroma_cqt(args):
    sig = get_sig(args)
    fs, nfft, noverlap = unroll_args(args, ['fs', 'nfft', 'noverlap'])
    hopsize = nfft - noverlap
    return rosaft.chroma_cqt(y=sig, sr=fs, hop_length=hopsize)
Ejemplo n.º 10
0
#dat_24 = np.load("/media/wuyiming/TOSHIBA EXT/midihcqt_24/000005.npz")
#spec_dnn = U.Embed(U.PreprocessSpec(dat_24["spec"]),size=7)

spec = spec[:, :250, :]
spec_dnn = spec_dnn[:250, :]
cnn = networks.FullCNNFeatExtractor()
cnn.load("fullcnn_crossentropy_6000.model")

deepchroma = networks.FeatureDNN()
deepchroma.load(
    "/home/wuyiming/Projects/TranscriptionChordRecognition/dnn3500.model")

chroma_cnn = cnn.GetFeature(spec).data[:, 12:24].T
chroma_dnn = deepchroma.GetFeature(spec_dnn).data[:, 12:24].T
chroma = np.log(
    1 + chroma_cqt(wav, sr=C.SR, hop_length=C.H, bins_per_octave=24)[:, :250])

target = chromatemplate.GetConvnetTargetFromPianoroll(
    U.GetPianoroll(
        "/media/wuyiming/TOSHIBA EXT/AIST.RWC-MDB-P-2001.SMF_SYNC/RM-P051.SMF_SYNC.MID"
    ))
target = target[10:260, 12:24].T

plt.subplot(4, 1, 1)
specshow(chroma, y_axis="chroma")
plt.ylabel("(a)")
plt.subplot(4, 1, 2)
specshow(chroma_dnn, y_axis="chroma")
plt.ylabel("(b)")
plt.subplot(4, 1, 3)
specshow(chroma_cnn, y_axis="chroma")
Ejemplo n.º 11
0
def extract_features(soundwave,sampling_rate,sound_name="test",feature_list=[]):
    """
    extracts features with help of librosa
    :param soundwave: extracted soundwave from file
    :param sampling_rate: sampling rate
    :param feature_list: list of features to compute
    :param sound_name: type of sound, i.e. dog
    :return: np.array of all features for the soundwave
    """
    print("Computing features for ",sound_name)

    if len(feature_list)==0:
        feature_list=["chroma_stft","chroma_cqt","chroma_cens","melspectrogram",
                      "mfcc","rmse","spectral_centroid","spectral_bandwidth",
                      "spectral_contrast","spectral_flatness","spectral_rolloff",
                      "poly_features","tonnetz","zero_crossing_rate"]

    features=[]


    #feature_len
    #"chroma_stft":12
    if "chroma_stft" in feature_list:
        features.append(feat.chroma_stft(soundwave, sampling_rate))

    #"chroma_cqt":12
    if "chroma_cqt" in feature_list:
        features.append(feat.chroma_cqt(soundwave, sampling_rate))

    #"chroma_cens":12
    if "chroma_cens" in feature_list:
        features.append(feat.chroma_cens(soundwave, sampling_rate))

    #"malspectrogram":128
    if "melspectrogram" in feature_list:
        features.append(feat.melspectrogram(soundwave, sampling_rate))

    #"mfcc":20
    if "mfcc" in feature_list:
        features.append(feat.mfcc(soundwave, sampling_rate))

    #"rmse":1
    if "rmse" in feature_list:
        features.append(feat.rmse(soundwave))

    #"spectral_centroid":1
    if "spectral_centroid" in feature_list:
        features.append(feat.spectral_centroid(soundwave, sampling_rate))

    #"spectral_bandwidth":1
    if "spectral_bandwidth" in feature_list:
        features.append(feat.spectral_bandwidth(soundwave, sampling_rate))

    #"spectral_contrast":7
    if "spectral_contrast" in feature_list:
        features.append(feat.spectral_contrast(soundwave, sampling_rate))

    #"spectral_flatness":1
    if "spectral_flatness" in feature_list:
        features.append(feat.spectral_flatness(soundwave))

    #"spectral_rolloff":1
    if "spectral_rolloff" in feature_list:
        features.append(feat.spectral_rolloff(soundwave, sampling_rate))

    #"poly_features":2
    if "poly_features" in feature_list:
        features.append(feat.poly_features(soundwave, sampling_rate))

    #"tonnetz":6
    if "tonnetz" in feature_list:
        features.append(feat.tonnetz(soundwave, sampling_rate))

    #"zero_crossing_rate":1
    if "zero_crossing_rate" in feature_list:
        features.append(feat.zero_crossing_rate(soundwave))


    return np.concatenate(features)
Ejemplo n.º 12
0
train_data = np.array([])

counter = 0
train_data = np.array([])
for chunk in train_data_reader:
    #print(chunk)
    chunk1 = np.array(chunk)
    for thing in chunk1:
        print(counter)
        thing1 = np.array(thing)
        #print(thing1)
        row = np.array([])
        cstft = np.mean(lf.chroma_stft(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, cstft))
        cqt = np.mean(lf.chroma_cqt(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, cqt))
        sens = np.mean(lf.chroma_cens(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, sens))
        spcent = np.mean(lf.spectral_centroid(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, spcent))
        flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, flatness))
        rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, rolloff))
        mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, mspec))
        mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0)
        row = np.concatenate((row, mfcc))
        tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, tonnetz))