예제 #1
0
def generate_cqt(i, file_path, offset=0, duration=None):
    print('[{}] Opening'.format(i), file_path)
    data, sample_rate = load(file_path,
                             sr=None,
                             offset=offset,
                             duration=duration)
    print('[{}] Sample Rate:'.format(i), sample_rate, 'shape:', data.shape)

    if len(data.shape) == 2:
        with Timer('[{}] Converted to mono'.format(i)):
            print('[{}] Converting to mono channel...'.format(i))
            data = to_mono(data)

    with Timer('[{}] Resampling'.format(i)):
        print('[{}] Resampling to'.format(i), TARGET_SAMPLE_RATE, 'Hz...')
        downsampled_data = resample(data,
                                    orig_sr=sample_rate,
                                    target_sr=TARGET_SAMPLE_RATE)
        # downsampled_data = data
        print('[{}] Downsampled to'.format(i), TARGET_SAMPLE_RATE,
              'Hz shape is now', downsampled_data.shape)

    with Timer('[{}] CQT'.format(i)):
        print('[{}] Generating CQT...'.format(i))
        cqt_result = np.abs(
            cqt(downsampled_data,
                sr=TARGET_SAMPLE_RATE,
                hop_length=HOP_LENGTH,
                n_bins=TOTAL_BINS,
                bins_per_octave=BINS_PER_OCTAVE))

    return cqt_result
예제 #2
0
def generate_cqt(file_path, st_status):
    st_status.text('Opening {}'.format(file_path))
    data, sample_rate = auto_load(file_path, sr=None)
    print('Sample Rate:', sample_rate, 'shape:', data.shape)

    if len(data.shape) == 2:
        print('Converting to mono channel...')
        data = to_mono(data)

    st_status.text('Resampling to {} Hz...'.format(TARGET_SAMPLE_RATE))
    downsampled_data = resample(data,
                                orig_sr=sample_rate,
                                target_sr=TARGET_SAMPLE_RATE)
    # downsampled_data = data
    st_status.text('Downsampled to {} Hz, shape is now {}'.format(
        TARGET_SAMPLE_RATE, downsampled_data.shape))

    st_status.text('Generating CQT...')
    cqt_result = np.abs(
        cqt(downsampled_data,
            sr=TARGET_SAMPLE_RATE,
            hop_length=HOP_LENGTH,
            n_bins=TOTAL_BINS,
            bins_per_octave=BINS_PER_OCTAVE))

    return cqt_result
예제 #3
0
def calculateFeaturesForDownbeat(audio):
	datarow = []		
	BINS_PER_OCTAVE = 12
	NUM_OCTAVES = 5
	spectogram = cqt(audio, sr=44100, bins_per_octave=BINS_PER_OCTAVE, n_bins=BINS_PER_OCTAVE*NUM_OCTAVES, hop_length=512)
	spec_db = np.absolute(spectogram)
	spec_db = librosa.amplitude_to_db(spectogram, ref=np.max)
	max_spec = np.max(spec_db) 
	min_spec = np.min(spec_db)
	if max_spec != min_spec:
		spec_db = (spec_db - min_spec) / (max_spec - min_spec)
	else:
		print 'Max equals min!'
		spec_db = (spec_db - np.min(spec_db))
	# Statistics for each frequency bin (each row => aggregate over axis 1 = columns)
	NUM_FRAMES = spectogram.shape[1]
	features_per_half_octave = []
	window_features = np.zeros((4*4*2-1,NUM_OCTAVES*2))
	for i_freq in range(NUM_OCTAVES*2):			# Frequency resolution: every half octave; hop size quarter octave
		features_cur_freq_window = []
		# Window along frequency axis
		f_bin_start = (i_freq * BINS_PER_OCTAVE) / 2
		f_bin_end = ((i_freq + 1) * BINS_PER_OCTAVE) / 2
		for i_time in range(4 * 4 * 2 - 1): 	# Aggregate along time axis in frames of one quarter beat, hop size an eight dbeat
			# Window along time axis
			frame_start = int(i_time * NUM_FRAMES / 32.0)
			frame_end = int((i_time + 2) * NUM_FRAMES / 32.0)
			# Current section of spectogram
			cur_w = spec_db[f_bin_start:f_bin_end,frame_start:frame_end]
			window_features[i_time,i_freq] = np.mean(cur_w)	# Determine how 'loud' this window is approx
	
	freq_axis_features = []
	freq_axis_features.extend(np.mean(window_features,axis=1))
	freq_axis_features.extend(np.var(window_features,axis=1))
	freq_axis_features.extend(skew(window_features,axis=1))
	freq_axis_features.extend(kurtosis(window_features,axis=1))
	
	time_means = np.mean(window_features,axis=0)
	time_vars = np.var(window_features, axis=0)
	freq_axis_features.extend([np.mean(time_means), np.var(time_vars), np.mean(time_vars), np.var(time_vars)])
	
	datarow = freq_axis_features
	# Statistics for each time point (each column => aggregate over axis 0 = rows)
	#~ datarow.extend(np.std(spec_db, axis=0))
	#~ datarow.extend(skew(spec_db, axis=0))
	#~ datarow.extend(kurtosis(spec_db, axis=0))
	# Show
	if isPlot:
		plt.figure()
		plt.subplot(2,1,1)
		librosa.display.specshow(spec_db, x_axis='time', y_axis='cqt_note')
		plt.subplot(2,1,2)
		#~ cosine_distances_t = [distance.cosine(spec_db[i+1,:], spec_db[i,:]) for i in range(spec_db.shape[0]-1)]
		plt.plot(datarow[::4])
		plt.plot(datarow[1::4])
		plt.plot(datarow[2::4])
		plt.plot(datarow[3::4])
	return datarow
예제 #4
0
 def __call__(self, data):
     constantq = cqt(data,
                     sr=self.sampling_rate,
                     hop_length=self.hop_length,
                     n_bins=self.n_bins,
                     bins_per_octave=self.bins_per_octave,
                     window=self.window)
     constantq = np.abs(constantq) if self.is_abs is True else constantq
     return constantq
예제 #5
0
def calcAndWriteFeatureFile(audio, filename):
	'''
		Generate the CQT matrix and write it as a binary array to a file, so that it can be later read by tensorflow with a fixedlengthrecordreader
	'''
	spectogram = cqt(audio, sr=44100, bins_per_octave=12, n_bins= 84)
	spec_db = librosa.amplitude_to_db(spectogram, ref=np.max)
	
	#~ librosa.display.specshow(spec_db, sr=44100)
	#~ plt.show()
	
	spec_db.tofile(filename)
예제 #6
0
# FFT
fourier = fft.fft(channel1)
#print(fourier)
"""plt.figure()
plt.plot(fourier, alpha=0.9, color='blue')
plt.xlabel('k')
plt.ylabel('Amplitude')
plt.show()
"""

# CQT
# On charge le fichier wav avec librosa
x, sr = librosa.load(
    "test.wav", sr=44100,
    mono=True)  # mono=True transforme l'audio en mono (à faire)
cqt = librosa.cqt(x, sr=sr, bins_per_octave=36)
log_cqt = librosa.amplitude_to_db(np.abs(cqt))

# Spectrogram FFT
"""
plt.figure(2, figsize=(8,6))
plt.subplot(211)
Pxx, freqs, bins, im = plt.specgram(channel1, Fs=rate, NFFT=1024, cmap=plt.get_cmap('plasma'))
cbar=plt.colorbar(im)
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
cbar.set_label('Intensity dB')
plt.subplot(212)
Pxx, freqs, bins, im = plt.specgram(channel2, Fs=rate, NFFT=1024, cmap=plt.get_cmap('plasma'))
cbar=plt.colorbar(im)
plt.xlabel('Time (s)')
itemcnt = len(midilist)
i = 5000
for midifile in midilist:
    i += 1
    print("Processing %d" % (i))
    wav, sr = load(midifile, sr=C.SR)
    try:
        pm = pretty_midi.PrettyMIDI(midifile)
        wav = pm.fluidsynth(fs=C.SR)
        target = chromatemplate.GetConvnetTargetFromPianoroll(
            utils.GetPianoroll(midifile))
        fmin = note_to_hz("C1")
        spec = np.vstack([
            np.abs(
                cqt(wav,
                    sr=C.SR,
                    hop_length=C.H,
                    n_bins=C.BIN_CNT,
                    bins_per_octave=C.OCT_BIN,
                    fmin=fmin * (h + 1))).T.astype(np.float32)
            for h in range(C.CQT_H)
        ])
    except (Exception):
        print("Got error.Skip...")
        continue

    minsz = min([spec.shape[1], target.shape[1]])
    np.savez(C.PATH_MIDIHCQT + "additional/" + "%06d.npz" % i,
             spec=spec[:minsz],
             target=target[:minsz])
예제 #8
0
import sys

import numpy as np

import matplotlib.pyplot as plt

import librosa
import librosa.display
from librosa.core import cqt

START = 44100 * 50
audio, sr = librosa.load(sys.argv[1], sr=44100)
audio = audio[START:START + int(sys.argv[2]) * 60480]

FRAME_SIZE = 1024
HOP_SIZE = FRAME_SIZE / 2

#~ spectogram = np.array([np.log(spectrum(w(frame))) for frame in frames])
spectogram = cqt(audio, sr=44100, bins_per_octave=12, n_bins=84)
spec_db = librosa.amplitude_to_db(spectogram, ref=np.max)
print(np.shape(spec_db), spec_db.dtype)

librosa.display.specshow(spec_db, sr=44100)
#~ plt.imshow(spec_db, aspect='auto')
plt.show()
예제 #9
0
parser.add_argument("-d", help="Name of the BLSTM-CRF decoder parameter file. (default: 'nblstm_crf.model')",\
                    type=str, default="nblstm_crf.model", action="store")
args = parser.parse_args()

audio_list = find_files("Datas/audios_estimation")

for audiofile in audio_list:
    fname = audiofile.split("/")[-1]
    print("Processing: %s" % fname)
    #load audio
    y,sr = load(audiofile,sr=C.SR)
    
    #extract Harmonic-CQT from audio
    fmin = note_to_hz("C1")
    hcqt = np.stack([np.abs(cqt(y,sr=C.SR,hop_length=C.H,n_bins=C.BIN_CNT,bins_per_octave=C.OCT_BIN,fmin=fmin*(h+1),filter_scale=2,tuning=None)).T.astype(np.float32) for h in range(C.CQT_H)])
    
    #extract feature using trained CNN extractor
    cnn_feat_extractor = N.FullCNNFeatExtractor()
    cnn_feat_extractor.load(args.f)
    
    feat = cnn_feat_extractor.GetFeature(U.PreprocessSpec(hcqt)).data
    
    #decode label sequence
    decoder = N.NBLSTMCRF()
    decoder.load(args.d)
    
    labels = decoder.argmax(feat)
    
    #convert into .lab file
    labfile = os.path.join("Datas/labs_estimated",fname+".lab")
예제 #10
0
import chromatemplate

config.train = False
config.enable_backprop = False

audiofile = "/home/wuyiming/Projects/ChordData/Audio/16_RWC/050.wav"

wav, sr = load(audiofile, sr=C.SR)
fmin = note_to_hz("C1")
spec = U.PreprocessSpec(
    np.stack([
        np.abs(
            cqt(wav,
                sr=C.SR,
                hop_length=C.H,
                n_bins=C.BIN_CNT,
                bins_per_octave=C.OCT_BIN,
                fmin=fmin * (h + 1),
                filter_scale=2,
                tuning=None)).T.astype(np.float32) for h in range(C.CQT_H)
    ]))
spec_dnn = U.Embed(U.PreprocessSpec(
    np.abs(
        cqt(wav,
            sr=C.SR,
            hop_length=C.H,
            n_bins=144,
            bins_per_octave=24,
            filter_scale=2,
            tuning=None)).T.astype(np.float32)),
                   size=1)
예제 #11
0
            timing.append(time_used)

        print("mean = ", np.mean(timing))
        print("std = ", np.std(timing))

        data = pd.DataFrame(timing, columns=['t_avg'])
        data['Type'] = f'torch_{args.device}'
        data.to_csv(
            Path(__file__).parent / f'./result/CQT2010v1_torch_{args.device}')

    elif args.device == "librosa":
        spec_list = []
        timing = []
        for e in range(5):
            t_start = time.time()
            for i in tqdm.tqdm(y_list, leave=True):
                spec = cqt(i, sr=44100, n_bins=84, bins_per_octave=24, fmin=55)
                spec_list.append(abs(spec))
            time_used = time.time() - t_start
            print(time_used)
            timing.append(time_used)

        print("mean = ", np.mean(timing))
        print("std = ", np.std(timing))

        data = pd.DataFrame(timing, columns=['t_avg'])
        data['Type'] = 'librosa'
        data.to_csv(Path(__file__).parent / f'./result/librosa_CQT')

    else:
        print("Please select a correct device")