def test_replay(jam_in, audio_file): T = muda.deformers.LogspaceTimeStretch() S = muda.deformers.PitchShift(n_semitones=1) P = muda.Pipeline([('time', T), ('pitch', S)]) # Load in the jam and transform it jam_1 = muda.load_jam_audio(jam_in, audio_file) jam_muda = next(P.transform(jam_1)) # Load a fresh copy of the jam jam_2 = muda.load_jam_audio(jam_in, audio_file) # Pop the audio container out of the jam2 sandbox _audio = jam_muda.sandbox.muda._audio jam_new = muda.replay(jam_muda, jam_2) # 1: check all annotations for a1, a2 in zip(jam_muda.annotations, jam_new.annotations): assert a1 == a2 # 2: check audio assert np.allclose(_audio['y'], jam_new.sandbox.muda._audio['y']) assert _audio['sr'] == jam_new.sandbox.muda._audio['sr'] # Verify that the objects are in fact distinct assert jam_muda is not jam_new
def test_reload_jampack(jam_in, audio_file): # This test is to address #42, where mudaboxes reload as dict # instead of Sandbox jam = muda.load_jam_audio(jam_in, audio_file) jam2 = muda.load_jam_audio(six.StringIO(jam.dumps()), audio_file) assert isinstance(jam.sandbox.muda, jams.Sandbox) assert isinstance(jam2.sandbox.muda, jams.Sandbox)
def __test(jam_in, audio_file): jam = muda.load_jam_audio(jam_in, audio_file) assert hasattr(jam.sandbox, 'muda') eq_(jam.file_metadata.duration, librosa.get_duration(**jam.sandbox.muda._audio))
def augment(afile, jfile, deformer, outpath): jam = muda.load_jam_audio(jfile, afile, strict=False) base = root(afile) outfile = os.path.join(outpath, base) for i, jam_out in enumerate(deformer.transform(jam)): muda.save('{}.{}.flac'.format(outfile, i), '{}.{}.jams'.format(outfile, i), jam_out, strict=False)
def test_load_jam_audio(jam_loader, audio_file, validate, strict, fmt): jam = muda.load_jam_audio(jam_loader, audio_file, validate=validate, strict=strict, fmt=fmt) assert hasattr(jam.sandbox, 'muda') duration = librosa.get_duration(**jam.sandbox.muda._audio) assert jam.file_metadata.duration == duration
def run(): #Captura os nomes das pastas names_folders = glob.glob(path_database + '*') for names_ in names_folders: #Captura o caminho completo do audio path_audios = glob.glob(names_ + '/*.' + audio_format) for path_ in path_audios: #Captura o nome do audio sem extensao audio_name = re.sub(names_, "", path_) audio_name = re.sub(r'/', "", audio_name) audio_name_wo_ext = re.sub(r'\.wav', "", audio_name) #Captura o caminho completo no arquivo de anotacao jam_path = re.sub(r'\.wav', ".jams", path_) #Captura o label da classe label = audio_name.split('-')[1] # A variacao de semitons e maior para as classes 1 e 6 if label == '1' or label == '6': semitones = [1, 1.5, 2, 2.5, 3, 3.5] else: semitones = [1.5, 2, 2.5, 3] jam_orig = muda.load_jam_audio(jam_path, path_) #Gerando variacoes no tom ps = muda.deformers.PitchShift(n_semitones=semitones) output_name_ps_pattern = names_ + '/' + audio_name_wo_ext + '-ps-' #O audio modificado e salvo em um novo .wav e as deformacoes aplicadas sao salvas no seu respectivo .jams for i, jam_out in enumerate(ps.transform(jam_orig)): muda.save(output_name_ps_pattern + str(semitones[i]) + '.wav', output_name_ps_pattern + str(semitones[i]) + '.jams', jam_out) #Gerando deformacoes de ruido de fundo bg = muda.deformers.BackgroundNoise(n_samples=1, files=files, weight_min=0.1, weight_max=0.5) output_name_bg_pattern = names_ + '/' + audio_name_wo_ext + '-bg-' #O audio modificado e salvo em um novo .wav e as deformacoes aplicadas sao salvas no seu respectivo .jams for i, jam_out in enumerate(bg.transform(jam_orig)): bg_noise_name = files[i].split('/')[5] bg_noise_name_w_extension = re.sub(r'\.wav', "", bg_noise_name) muda.save( output_name_bg_pattern + bg_noise_name_w_extension + '.wav', output_name_bg_pattern + bg_noise_name_w_extension + '.jams', jam_out)
def deform_audio(aud, ann, deformer, aud_out, ann_out, out_dict): build_dirs(aud_out) build_dirs(ann_out) orig_audio = muda.load_jam_audio(ann, aud) filename = base(aud) n_dict = len(out_dict) for i, jam_out in enumerate(deformer.transform(orig_audio)): out_aud = os.path.join(aud_out,filename+'_'+out_dict[i%n_dict]+'.wav') out_ann = os.path.join(ann_out,filename+'_'+out_dict[i%n_dict]+'.jams') muda.save(out_aud, out_ann, jam_out)
def augment(afile, jfile, deformer, outpath, audio_ext, jams_ext, sr=44100): '''Run the data through muda''' jam = muda.load_jam_audio(jfile, afile, sr=sr) base = milsed.utils.base(afile) outfile = os.path.join(outpath, base) for i, jam_out in enumerate(deformer.transform(jam)): muda.save('{}.{}.{}'.format(outfile, i, audio_ext), '{}.{}.{}'.format(outfile, i, jams_ext), jam_out, strict=False)
def augment(afile, jfile, deformer, outpath, audio_ext, jams_ext): '''Run the data through muda''' jam = muda.load_jam_audio(jfile, afile) base = crema.utils.base(afile) outfile = os.path.join(outpath, base) for i, jam_out in enumerate(deformer.transform(jam)): muda.save('{}.{}.{}'.format(outfile, i, audio_ext), '{}.{}.{}'.format(outfile, i, jams_ext), jam_out, strict=False)
def test_save(): jam = muda.load_jam_audio('data/fixture.jams', 'data/fixture.wav') _, jamfile = tempfile.mkstemp(suffix='.jams') _, audfile = tempfile.mkstemp(suffix='.wav') muda.save(audfile, jamfile, jam) jam2 = muda.load_jam_audio(jamfile, audfile) jam2_raw = jams.load(jamfile) os.unlink(audfile) os.unlink(jamfile) assert hasattr(jam2.sandbox, 'muda') assert '_audio' in jam2.sandbox.muda assert '_audio' not in jam2_raw.sandbox.muda eq_(jam2.file_metadata.duration, librosa.get_duration(**jam2.sandbox.muda['_audio']))
def test_save(jam_in, audio_file, strict, fmt): jam = muda.load_jam_audio(jam_in, audio_file) _, jamfile = tempfile.mkstemp(suffix='.jams') _, audfile = tempfile.mkstemp(suffix='.wav') muda.save(audfile, jamfile, jam, strict=strict, fmt=fmt) jam2 = muda.load_jam_audio(jamfile, audfile, fmt=fmt) jam2_raw = jams.load(jamfile, fmt=fmt) os.unlink(audfile) os.unlink(jamfile) assert hasattr(jam2.sandbox, 'muda') assert '_audio' in jam2.sandbox.muda assert '_audio' not in jam2_raw.sandbox.muda duration = librosa.get_duration(**jam2.sandbox.muda['_audio']) assert jam2.file_metadata.duration == duration
def pitch_shifting(audio_fname, jams_fname, audio_folder, jams_folder, n_samples=5, l=-2, u=2): print(audio_folder, audio_fname) orig = muda.load_jam_audio(os.path.join(jams_folder, jams_fname), os.path.join(audio_folder, audio_fname)) pitchshift = muda.deformers.LinearPitchShift(n_samples=n_samples, lower=l, upper=u) for i, jam_out in enumerate(pitchshift.transform(orig)): muda.save(os.path.join(audio_folder, '{}_{}'.format(i, audio_fname)), os.path.join(jams_folder, '{}_{}'.format(i, jams_fname)), jam_out)
def shift_annotations(jams_path, jams_fname, audio_path, audio_fname): ''' Use the IRConvolution deformer to shift F0 annotations according to the estimated group delay introduced by impulse response ''' ir_muda = muda.deformers.IRConvolution(ir_files='./ir/IR_greathall.wav', n_fft=2048, rolloff_value=-24) # make sure the duration field in the jams file is not null jm = jams.load(os.path.join(jams_path, jams_fname)) jm.annotations[0].duration = jm.file_metadata.duration jm.save(os.path.join(jams_path, jams_fname)) # load jam and associated audio jam = muda.load_jam_audio(os.path.join(jams_path, jams_fname), os.path.join(audio_path, audio_fname)) for s in ir_muda.states(jam): ir_muda.deform_times(jam.annotations[0], s) # store deformed annotations in the reverb folder jam.save(os.path.join(jams_path, 'reverb', jams_fname))
def jam_silence_8k(): return muda.load_jam_audio('tests/data/silence_8k.jams', 'tests/data/silence_8k.wav')
def test_background_short_file(): D = muda.deformers.BackgroundNoise(files='tests/data/fixture.wav') jam_orig = muda.load_jam_audio('tests/data/fixture.jams', 'tests/data/noise_sample.ogg') jam_new = next(D.transform(jam_orig))
def jam_fixture(): return muda.load_jam_audio('tests/data/fixture.jams', 'tests/data/fixture.wav')
import muda import jams import wave jam=jams.JAMS() j_orig = muda.load_jam_audio(jam, '01-Make_It.wav') # f=wave.open('01-Make_It.wav','rb') # params=f.getparams() # n_channels,samp_width,frame_rate,n_frames=params[:4] # # print(samp_width) pitch=muda.deformers.PitchShift(n_semitones=2) stretch=muda.deformers.TimeStretch(rate=0.8) pipline=muda.Pipeline(steps=[('pitch_shift',pitch), ('time_stretch',stretch)]) output_jams=list(pipline.transform(j_orig))
# Extract WAV and JAMS files corresponding to recording unit. wav_paths = sorted(glob.glob(os.path.join(in_unit_dir, "*.wav"))) jam_paths = sorted(glob.glob(os.path.join(in_unit_dir, "*.jams"))) # Loop over examples. for wav_path, jam_path in zip(wav_paths, jam_paths): # Sample a background noise file uniformly at random if aug_str[:5] == "noise": noise_path_id = np.random.randint(0, n_noise_paths) noise_path = noise_paths[noise_path_id] deformer = muda.deformers.BackgroundNoise( n_samples=1, files=[noise_path], weight_min=0.0625, weight_max=0.25) # Load WAV and JAMS files into muda object. jam_original = muda.load_jam_audio(jam_path, wav_path) # Apply data augmentation. jam_transformer = deformer.transform(jam_original) # Get jam from jam iterator. The iterator has only one element. jam = next(jam_transformer) # Split name of WAV path to remove the "_original.wav" suffix. original_wav_name = os.path.split(wav_path)[-1] original_wav_split = original_wav_name.split("_") suffix = "_".join([aug_str, instance_str]) # Generate path of augmented WAV file. wav_suffix = suffix + ".wav" augmented_wav_split = original_wav_split[:-1] + [wav_suffix]
import jams import six noise_file = [ 'background_noise/150993__saphe__street-scene-1.wav', 'background_noise/173955__saphe__street-scene-3.wav', 'background_noise/207208__jormarp__high-street-of-gandia-valencia-spain.wav', 'background_noise/268903__yonts__city-park-tel-aviv-israel.wav' ] TS_param = [0.81, 0.93, 1.07, 1.23] PS1_param = [-2, -1, 1, 2] PS2_param = [-3.5, -2.5, 2.5, 3.5] BG_Range = [0.1, 0.5] JAMs = '7061-6-0-0.jams' wavpath = '7061-6-0-0.wav' j_orig = muda.load_jam_audio(JAMs, wavpath) X = [] # 对比图 # for i in range(len(TS_param)): # TS = muda.deformers.TimeStretch(rate=TS_param[i]) # out_jams = list(TS.transform(j_orig)) # audio = out_jams[0].sandbox.muda._audio['y'] # X.append(audio) # for i in range(len(PS1_param)): # PS1 = muda.deformers.PitchShift(n_semitones=PS1_param[i]) # out_jams = list(PS1.transform(j_orig)) # audio = out_jams[0].sandbox.muda._audio['y'] # X.append(audio) # for i in range(len(PS2_param)): # PS2 = muda.deformers.PitchShift(n_semitones=PS2_param[i]) # out_jams = list(PS2.transform(j_orig))
giantsteps_key = mirdata.initialize('giantsteps_key', data_home='/scratch/qx244/data/gskey') gs_data = giantsteps_key.load_tracks() with open('/scratch/qx244/data/gskey/good_files.json', 'r') as fp: good_files = json.load(fp) pitch_shifter = muda.deformers.LinearPitchShift(n_samples=12, lower=-5, upper=6) for idx in tqdm(good_files.keys()): track = gs_data[idx] track_jams_path = os.path.join('/scratch/qx244/data/gskey/jams/', track.title + '.jams') #check if already augmented: if os.path.isfile( '/scratch/qx244/data/gskey/augmentation/{}.11.jams'.format( track.title)): continue j_orig = muda.load_jam_audio(track_jams_path, track.audio_path) for i, jam_out in enumerate(pitch_shifter.transform(j_orig)): muda.save( '/scratch/qx244/data/gskey/augmentation/{}.{:02d}.ogg'.format( track.title, i), '/scratch/qx244/data/gskey/augmentation/{}.{:02d}.jams'.format( track.title, i), jam_out)
TEMPLATE_FILE_NAME = 'id_05_muda_augment_template.jams' PITCH_SHIFT_LIST = [-1, 1, -2, 2, -2.5, 2.5, -3.5, 3.5] TIME_STRETCH = {'samples': 4, 'lower_bound': -0.3, 'upper_bound': 0.3} for class_id in class_folder: # range(0,len(classfolder)):#len(classfolder) files = os.listdir(os.path.join(SRC_PATH, class_id)) files.sort() category_path = os.path.join(DST_PATH, class_id) if not os.path.exists(category_path): os.makedirs(category_path) for file_name in files: file_path = os.path.join(category_path, file_name.rsplit('.', 1)[0]) j_orig = muda.load_jam_audio(TEMPLATE_FILE_NAME, os.path.join(SRC_PATH, class_id, file_name)) # for pitch_shift in PITCH_SHIFT_LIST: pitch = muda.deformers.PitchShift(n_semitones=pitch_shift) jam_out = pitch.transform(j_orig).next() wav_filename = file_path + '_pitch_{0:+}.wav'.format(pitch_shift) jams_filename = wav_filename.replace('.wav', '.jams') muda.save(wav_filename, jams_filename, jam_out) stretch = muda.deformers.LogspaceTimeStretch( n_samples=TIME_STRETCH['samples'], lower=TIME_STRETCH['lower_bound'], upper=TIME_STRETCH['upper_bound'])
import re import six import muda from copy import deepcopy from nose.tools import eq_, raises def ap_(a, b, msg=None, rtol=1e-5, atol=1e-5): """Shorthand for 'assert np.allclose(a, b, rtol, atol), "%r != %r" % (a, b) """ if not np.allclose(a, b, rtol=rtol, atol=atol): raise AssertionError(msg or "{} != {}".format(a, b)) jam_fixture = muda.load_jam_audio('data/fixture.jams', 'data/fixture.wav') @raises(RuntimeError) def test_raw(): jam_raw = jams.load('data/fixture.jams') D = muda.deformers.TimeStretch(rate=2.0) six.next(D.transform(jam_raw)) def __test_time(jam_orig, jam_new, rate): # Test the track length ap_(librosa.get_duration(**jam_orig.sandbox.muda['_audio']),
def extract_fold(parent_dir, fold, augment_folders, bands=128, frames=128, channels=1, **kwargs): # extract features from original and augmented audio in one fold # This expects a folder for each augmention with a folder inside called jams containing the # JAMS files. The agumentations are replicated through these files. features = np.empty(shape=[0, bands, frames, channels], dtype='float32') # shape : [samples, frames, bands] labels = np.empty(shape=0, dtype=int) for filename in os.listdir(os.path.join(parent_dir, fold)): if filename.endswith(".wav"): audio_path = os.path.join(parent_dir, fold, filename) filename = filename[:-4] # discard extension #extract original data jams_original_path = os.path.join(parent_dir, fold, original_folder, "jams", filename + ".jams") jams_original = load_jam_audio(jams_original_path, audio_path) audio_original = jam_original.sandbox.muda._audio['y'] orig_sr = jam_original.sandbox.muda._audio['sr'] audio_original = resample(audio_original, orig_sr, sr) features_yield = extract_features(audio_original, **kwargs).astype('float32') features = np.concatenate((features, features_yield)) labels_yield = int( filename.split('-') [-3]) # filenames: [fsID]-[classID]-[occurrenceID]-[sliceID] labels = np.append(labels, labels_yield) #replay and extract data from augmentations for augment_folder in augment_folders: for i in range(4): if augment_folder is "pitch1": augmented_filename = filename + "_pitch" + str(i) elif augment_folder is "pitch2": augmented_filename = filename + "_pitch3-" + str(i) else: augmented_filename = filename + "_" + augment_folder + str( i) jams_augmented_path = os.path.join( parent_dir, fold, augment_folder, "jams", augmented_filename + ".jams") jams_augmented = load_jam_audio(jams_augmented_path, audio_path) jams_augmented = replay( jams_augmented, jams_original) # Apply augmentations audio_augmented = jams_augmented.sandbox.muda._audio['y'] audio_augmented = resample(audio_augmented, orig_sr, sr) features_yield = extract_features( audio_augmented, **kwargs).astype('float32') features = np.concatenate((features, features_yield)) labels = np.append(labels, labels_yield) return features, labels
D_PS_p2p5 = muda.deformers.PitchShift(n_semitones=2.5) D_PS_p3p5 = muda.deformers.PitchShift(n_semitones=3.5) # dynamic range compression D_DRC_ms = muda.deformers.DynamicRangeCompression(preset='music standard') D_DRC_fs = muda.deformers.DynamicRangeCompression(preset='film standard') D_DRC_sp = muda.deformers.DynamicRangeCompression(preset='speech') D_DRC_ra = muda.deformers.DynamicRangeCompression(preset='radio') for fn in glob.glob(os.path.join(parent_dir, '*.wav')): print(fn) fn_wav = fn.split('/')[-1].split('.')[0] fn_da = os.path.join(parent_dir_da, fn_wav) # load audio with jams via muda jam = jams.JAMS() j_orig = muda.load_jam_audio(jam, fn, sr=44100) # deformation deform_and_save(j_orig, D_TS_0p81, fn_da+'_TS_0p81') deform_and_save(j_orig, D_TS_0p93, fn_da+'_TS_0p93') deform_and_save(j_orig, D_TS_1p07, fn_da+'_TS_1p07') deform_and_save(j_orig, D_TS_1p23, fn_da+'_TS_1p23') deform_and_save(j_orig, D_PS_m3p5, fn_da+'_PS_m3p5') deform_and_save(j_orig, D_PS_m2p5, fn_da+'_PS_m2p5') deform_and_save(j_orig, D_PS_m2p0, fn_da+'_PS_m2p0') deform_and_save(j_orig, D_PS_m1p0, fn_da+'_PS_m1p0') deform_and_save(j_orig, D_PS_p1p0, fn_da+'_PS_p1p0') deform_and_save(j_orig, D_PS_p2p0, fn_da+'_PS_p2p0') deform_and_save(j_orig, D_PS_p2p5, fn_da+'_PS_p2p5')