def validate_features_dataset(output_dataset_path, ds_validation_path): ds = F.Dataset(output_dataset_path, read_only=True) print(ds) features = {} for key, val in ds.items(): if 'indices_' in key: name = key.split('_')[-1] features[name] = (val, ds[name]) all_indices = [val[0] for val in features.values()] # ====== sampling 250 files ====== # all_files = sampling_iter(it=all_indices[0].keys(), k=250, seed=Config.SUPER_SEED) all_files = [f for f in all_files if all(f in ids for ids in all_indices)] print("#Samples:", ctext(len(all_files), 'cyan')) # ====== ignore the 20-figures warning ====== # with catch_warnings_ignore(RuntimeWarning): for file_name in all_files: X = {} for feat_name, (ids, data) in features.items(): start, end = ids[file_name] X[feat_name] = data[start:end][:].astype('float32') V.plot_multiple_features(features=X, fig_width=20, title='[%s]%s' % (ds['dsname'][file_name], file_name)) V.plot_save(ds_validation_path, dpi=12)
def visualize_latent_space(X_org, X_latent, name, labels, title): """ X_org : [n_samples, n_timesteps, n_features] X_latent : [n_samples, n_timesteps, n_latents] """ assert X_org.shape[0] == X_latent.shape[0] == len(name) == len(labels) assert not np.any(np.isnan(X_org)) assert not np.any(np.isnan(X_latent)) X_org = X_org.astype('float32') X_latent = X_latent.astype('float32') # ====== evaluation of the latent space ====== # n_channels = 1 if X_latent.ndim == 3 else int(np.prod(X_latent.shape[3:])) n_samples = X_org.shape[0] # 1 for original, 1 for mean channel, then the rest n_row = 1 + 1 + n_channels n_col = 3 V.plot_figure(nrow=n_row + 1, ncol=16) # only select 3 random sample for i, idx in enumerate( sampling_iter(it=range(n_samples), k=n_col, seed=1234)): x = X_org[idx] # latent tensor can be 3D or 4D z = X_latent[idx] if z.ndim > 3: z = np.reshape(z, newshape=(z.shape[0], z.shape[1], -1)) elif z.ndim == 2: z = np.reshape(z, newshape=(z.shape[0], z.shape[1], 1)) elif z.ndim == 3: pass else: raise ValueError("No support for z value: %s" % str(z.shape)) # plot original acoustic ax = V.plot_spectrogram(x.T, ax=(n_row, n_col, i + 1), title='Org') if i == 0: ax.set_title("[%s]'%s-%s'" % (str(title), str(name[idx]), str(labels[idx])), fontsize=8) else: ax.set_title("'%s-%s'" % (str(name[idx]), str(labels[idx])), fontsize=8) # plot the mean V.plot_spectrogram(np.mean(z, axis=-1).T, ax=(n_row, n_col, i + 4), title='Zmean') # plot first 25 channels if n_channels > 1: for j in range(min(8, n_channels)): V.plot_spectrogram(z[:, :, j].T, ax=(n_row, n_col, j * 3 + 7 + i), title='Z%d' % j)
def visualize_latent_space(X_org, X_latent, name, labels, title): """ X_org : [n_samples, n_timesteps, n_features] X_latent : [n_samples, n_timesteps, n_latents] """ assert X_org.shape[0] == X_latent.shape[0] == len(name) == len(labels) assert not np.any(np.isnan(X_org)) assert not np.any(np.isnan(X_latent)) X_org = X_org.astype('float32') X_latent = X_latent.astype('float32') # ====== evaluation of the latent space ====== # n_channels = 1 if X_latent.ndim == 3 else int(np.prod(X_latent.shape[3:])) n_samples = X_org.shape[0] # 1 for original, 1 for mean channel, then the rest n_row = 1 + 1 + n_channels n_col = 3 V.plot_figure(nrow=n_row + 1, ncol=16) # only select 3 random sample for i, idx in enumerate( sampling_iter(it=range(n_samples), k= n_col, seed=5218)): x = X_org[idx] # latent tensor can be 3D or 4D z = X_latent[idx] if z.ndim > 3: z = np.reshape(z, newshape=(z.shape[0], z.shape[1], -1)) elif z.ndim == 2: z = np.reshape(z, newshape=(z.shape[0], z.shape[1], 1)) elif z.ndim == 3: pass else: raise ValueError("No support for z value: %s" % str(z.shape)) # plot original acoustic ax = V.plot_spectrogram(x.T, ax=(n_row, n_col, i + 1), title='Org') if i == 0: ax.set_title("[%s]'%s-%s'" % (str(title), str(name[idx]), str(labels[idx])), fontsize=8) else: ax.set_title("'%s-%s'" % (str(name[idx]), str(labels[idx])), fontsize=8) # plot the mean V.plot_spectrogram(np.mean(z, axis=-1).T, ax=(n_row, n_col, i + 4), title='Zmean') # plot first 25 channels if n_channels > 1: for j in range(min(8, n_channels)): V.plot_spectrogram(z[:, :, j].T, ax=(n_row, n_col, j * 3 + 7 + i), title='Z%d' % j)
def prepare_dnn_data(recipe, feat, utt_length, seed=87654321): """ Return ------ train_feeder : Feeder for training valid_feeder : Feeder for validating test_ids : Test indices test_dat : Data array all_speakers : list of all speaker in training set """ # Load dataset frame_length = int(utt_length / FRAME_SHIFT) ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEAT, recipe), read_only=True) X = ds[feat] train_indices = {name: ds['indices'][name] for name in TRAIN_DATA.keys()} test_indices = { name: start_end for name, start_end in ds['indices'].items() if name not in TRAIN_DATA } train_indices, valid_indices = train_valid_test_split(x=list( train_indices.items()), train=0.9, inc_test=False, seed=seed) all_speakers = sorted(set(TRAIN_DATA.values())) n_speakers = max(all_speakers) + 1 print("#Train files:", ctext(len(train_indices), 'cyan')) print("#Valid files:", ctext(len(valid_indices), 'cyan')) print("#Test files:", ctext(len(test_indices), 'cyan')) print("#Speakers:", ctext(n_speakers, 'cyan')) recipes = [ F.recipes.Sequencing(frame_length=frame_length, step_length=frame_length, end='pad', pad_value=0, pad_mode='post', data_idx=0), F.recipes.Name2Label(lambda name: TRAIN_DATA[name], ref_idx=0), F.recipes.LabelOneHot(nb_classes=n_speakers, data_idx=1) ] train_feeder = F.Feeder(data_desc=F.IndexedData(data=X, indices=train_indices), batch_mode='batch', ncpu=7, buffer_size=12) valid_feeder = F.Feeder(data_desc=F.IndexedData(data=X, indices=valid_indices), batch_mode='batch', ncpu=2, buffer_size=4) train_feeder.set_recipes(recipes) valid_feeder.set_recipes(recipes) print(train_feeder) # ====== cache the test data ====== # cache_dat = os.path.join(PATH_EXP, 'test_%s_%d.dat' % (feat, int(utt_length))) cache_ids = os.path.join(PATH_EXP, 'test_%s_%d.ids' % (feat, int(utt_length))) # validate cache files if os.path.exists(cache_ids): with open(cache_ids, 'rb') as f: ids = pickle.load(f) if len(ids) != len(test_indices): os.remove(cache_ids) if os.path.exists(cache_dat): os.remove(cache_dat) elif os.path.exists(cache_dat): os.remove(cache_dat) # caching if not os.path.exists(cache_dat): dat = F.MmapData(cache_dat, dtype='float16', shape=(0, frame_length, X.shape[1])) ids = {} prog = Progbar(target=len(test_indices)) s = 0 for name, (start, end) in test_indices.items(): y = X[start:end] y = segment_axis(y, axis=0, frame_length=frame_length, step_length=frame_length, end='pad', pad_value=0, pad_mode='post') dat.append(y) # update indices ids[name] = (s, s + len(y)) s += len(y) # update progress prog.add(1) dat.flush() dat.close() with open(cache_ids, 'wb') as f: pickle.dump(ids, f) # ====== re-load ====== # dat = F.MmapData(cache_dat, read_only=True) with open(cache_ids, 'rb') as f: ids = pickle.load(f) # ====== save some sample ====== # sample_path = os.path.join(PATH_EXP, 'test_%s_%d.pdf' % (feat, int(utt_length))) V.plot_figure(nrow=9, ncol=6) for i, (name, (start, end)) in enumerate( sampling_iter(it=sorted(ids.items(), key=lambda x: x[0]), k=12, seed=87654321)): x = dat[start:end][:].astype('float32') ax = V.plot_spectrogram(x[np.random.randint(0, len(x))].T, ax=(12, 1, i + 1), title='') ax.set_title(name) V.plot_save(sample_path) return (train_feeder, valid_feeder, ids, dat, all_speakers)
os.mkdir(PATH_ACOUSTIC_FEAT) # ====== remove '_quarter' if you want full training data ====== # FILE_LIST = "voxceleb_files_quarter" TRAIN_LIST = "voxceleb_sys_train_with_labels_quarter" TRIAL_LIST = "voxceleb_trials" # ====== Load the file list ====== # ds = F.load_voxceleb_list() WAV_FILES = {} # dictionary mapping 'file_path' -> 'file_name' for path, channel, name in ds[FILE_LIST]: path = os.path.join(PATH_TO_WAV, path) # validate all files are exist assert os.path.exists(path), path WAV_FILES[path] = name # some sampled files for testing SAMPLED_WAV_FILE = sampling_iter(it=sorted(WAV_FILES.items(), key=lambda x: x[0]), k=8, seed=87654321) # ====== extract the list of all train files ====== # # mapping from name of training file to speaker label TRAIN_DATA = {} for x, y in ds[TRAIN_LIST]: TRAIN_DATA[x] = int(y) # =========================================================================== # Path helpers # =========================================================================== def get_model_path(system_name, args): """Return: exp_dir, model_path, log_path, train_path, test_path""" name = '_'.join([str(system_name).lower(), args.recipe, args.feat]) if 'l' in args:
def prepare_dnn_data(recipe, feat, utt_length, seed=52181208): """ Return ------ train_feeder : Feeder for training valid_feeder : Feeder for validating test_ids : Test indices test_dat : Data array all_speakers : list of all speaker in training set """ # Load dataset frame_length = int(utt_length / FRAME_SHIFT) ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEAT, recipe), read_only=True) X = ds[feat] train_indices = {name: ds['indices'][name] for name in TRAIN_DATA.keys()} test_indices = {name: start_end for name, start_end in ds['indices'].items() if name not in TRAIN_DATA} train_indices, valid_indices = train_valid_test_split( x=list(train_indices.items()), train=0.9, inc_test=False, seed=seed) all_speakers = sorted(set(TRAIN_DATA.values())) n_speakers = max(all_speakers) + 1 print("#Train files:", ctext(len(train_indices), 'cyan')) print("#Valid files:", ctext(len(valid_indices), 'cyan')) print("#Test files:", ctext(len(test_indices), 'cyan')) print("#Speakers:", ctext(n_speakers, 'cyan')) recipes = [ F.recipes.Sequencing(frame_length=frame_length, step_length=frame_length, end='pad', pad_value=0, pad_mode='post', data_idx=0), F.recipes.Name2Label(lambda name:TRAIN_DATA[name], ref_idx=0), F.recipes.LabelOneHot(nb_classes=n_speakers, data_idx=1) ] train_feeder = F.Feeder( data_desc=F.IndexedData(data=X, indices=train_indices), batch_mode='batch', ncpu=7, buffer_size=12) valid_feeder = F.Feeder( data_desc=F.IndexedData(data=X, indices=valid_indices), batch_mode='batch', ncpu=2, buffer_size=4) train_feeder.set_recipes(recipes) valid_feeder.set_recipes(recipes) print(train_feeder) # ====== cache the test data ====== # cache_dat = os.path.join(PATH_EXP, 'test_%s_%d.dat' % (feat, int(utt_length))) cache_ids = os.path.join(PATH_EXP, 'test_%s_%d.ids' % (feat, int(utt_length))) # validate cache files if os.path.exists(cache_ids): with open(cache_ids, 'rb') as f: ids = pickle.load(f) if len(ids) != len(test_indices): os.remove(cache_ids) if os.path.exists(cache_dat): os.remove(cache_dat) elif os.path.exists(cache_dat): os.remove(cache_dat) # caching if not os.path.exists(cache_dat): dat = F.MmapData(cache_dat, dtype='float16', shape=(0, frame_length, X.shape[1])) ids = {} prog = Progbar(target=len(test_indices)) s = 0 for name, (start, end) in test_indices.items(): y = X[start:end] y = segment_axis(y, axis=0, frame_length=frame_length, step_length=frame_length, end='pad', pad_value=0, pad_mode='post') dat.append(y) # update indices ids[name] = (s, s + len(y)) s += len(y) # update progress prog.add(1) dat.flush() dat.close() with open(cache_ids, 'wb') as f: pickle.dump(ids, f) # ====== re-load ====== # dat = F.MmapData(cache_dat, read_only=True) with open(cache_ids, 'rb') as f: ids = pickle.load(f) # ====== save some sample ====== # sample_path = os.path.join(PATH_EXP, 'test_%s_%d.pdf' % (feat, int(utt_length))) V.plot_figure(nrow=9, ncol=6) for i, (name, (start, end)) in enumerate( sampling_iter(it=sorted(ids.items(), key=lambda x: x[0]), k=12, seed=52181208)): x = dat[start:end][:].astype('float32') ax = V.plot_spectrogram(x[np.random.randint(0, len(x))].T, ax=(12, 1, i + 1), title='') ax.set_title(name) V.plot_save(sample_path) return (train_feeder, valid_feeder, ids, dat, all_speakers)
os.mkdir(PATH_ACOUSTIC_FEAT) # ====== remove '_quarter' if you want full training data ====== # FILE_LIST = "voxceleb_files_quarter" TRAIN_LIST = "voxceleb_sys_train_with_labels_quarter" TRIAL_LIST = "voxceleb_trials" # ====== Load the file list ====== # ds = F.load_voxceleb_list() WAV_FILES = {} # dictionary mapping 'file_path' -> 'file_name' for path, channel, name in ds[FILE_LIST]: path = os.path.join(PATH_TO_WAV, path) # validate all files are exist assert os.path.exists(path), path WAV_FILES[path] = name # some sampled files for testing SAMPLED_WAV_FILE = sampling_iter(it=sorted(WAV_FILES.items(), key=lambda x: x[0]), k=8, seed=52181208) # ====== extract the list of all train files ====== # # mapping from name of training file to speaker label TRAIN_DATA = {} for x, y in ds[TRAIN_LIST]: TRAIN_DATA[x] = int(y) # =========================================================================== # Path helpers # =========================================================================== def get_model_path(system_name, args): """Return: exp_dir, model_path, log_path, train_path, test_path""" name = '_'.join([str(system_name).lower(), args.recipe, args.feat]) if 'l' in args: name += '_' + str(int(args.l))