Beispiel #1
0
def validate_features_dataset(output_dataset_path, ds_validation_path):
    ds = F.Dataset(output_dataset_path, read_only=True)
    print(ds)

    features = {}
    for key, val in ds.items():
        if 'indices_' in key:
            name = key.split('_')[-1]
            features[name] = (val, ds[name])

    all_indices = [val[0] for val in features.values()]
    # ====== sampling 250 files ====== #
    all_files = sampling_iter(it=all_indices[0].keys(),
                              k=250,
                              seed=Config.SUPER_SEED)
    all_files = [f for f in all_files if all(f in ids for ids in all_indices)]
    print("#Samples:", ctext(len(all_files), 'cyan'))

    # ====== ignore the 20-figures warning ====== #
    with catch_warnings_ignore(RuntimeWarning):
        for file_name in all_files:
            X = {}
            for feat_name, (ids, data) in features.items():
                start, end = ids[file_name]
                X[feat_name] = data[start:end][:].astype('float32')
            V.plot_multiple_features(features=X,
                                     fig_width=20,
                                     title='[%s]%s' %
                                     (ds['dsname'][file_name], file_name))

    V.plot_save(ds_validation_path, dpi=12)
Beispiel #2
0
def validate_features_dataset(output_dataset_path, ds_validation_path):
  ds = F.Dataset(output_dataset_path, read_only=True)
  print(ds)

  features = {}
  for key, val in ds.items():
    if 'indices_' in key:
      name = key.split('_')[-1]
      features[name] = (val, ds[name])

  all_indices = [val[0] for val in features.values()]
  # ====== sampling 250 files ====== #
  all_files = sampling_iter(it=all_indices[0].keys(), k=250,
                            seed=Config.SUPER_SEED)
  all_files = [f for f in all_files
               if all(f in ids for ids in all_indices)]
  print("#Samples:", ctext(len(all_files), 'cyan'))

  # ====== ignore the 20-figures warning ====== #
  with catch_warnings_ignore(RuntimeWarning):
    for file_name in all_files:
      X = {}
      for feat_name, (ids, data) in features.items():
        start, end = ids[file_name]
        X[feat_name] = data[start:end][:].astype('float32')
      V.plot_multiple_features(features=X, fig_width=20,
            title='[%s]%s' % (ds['dsname'][file_name], file_name))

  V.plot_save(ds_validation_path, dpi=12)
Beispiel #3
0
def visualize_latent_space(X_org, X_latent, name, labels, title):
    """
  X_org : [n_samples, n_timesteps, n_features]
  X_latent : [n_samples, n_timesteps, n_latents]
  """
    assert X_org.shape[0] == X_latent.shape[0] == len(name) == len(labels)
    assert not np.any(np.isnan(X_org))
    assert not np.any(np.isnan(X_latent))
    X_org = X_org.astype('float32')
    X_latent = X_latent.astype('float32')
    # ====== evaluation of the latent space ====== #
    n_channels = 1 if X_latent.ndim == 3 else int(np.prod(X_latent.shape[3:]))
    n_samples = X_org.shape[0]
    # 1 for original, 1 for mean channel, then the rest
    n_row = 1 + 1 + n_channels
    n_col = 3
    V.plot_figure(nrow=n_row + 1, ncol=16)
    # only select 3 random sample
    for i, idx in enumerate(
            sampling_iter(it=range(n_samples), k=n_col, seed=1234)):
        x = X_org[idx]
        # latent tensor can be 3D or 4D
        z = X_latent[idx]
        if z.ndim > 3:
            z = np.reshape(z, newshape=(z.shape[0], z.shape[1], -1))
        elif z.ndim == 2:
            z = np.reshape(z, newshape=(z.shape[0], z.shape[1], 1))
        elif z.ndim == 3:
            pass
        else:
            raise ValueError("No support for z value: %s" % str(z.shape))
        # plot original acoustic
        ax = V.plot_spectrogram(x.T, ax=(n_row, n_col, i + 1), title='Org')
        if i == 0:
            ax.set_title("[%s]'%s-%s'" %
                         (str(title), str(name[idx]), str(labels[idx])),
                         fontsize=8)
        else:
            ax.set_title("'%s-%s'" % (str(name[idx]), str(labels[idx])),
                         fontsize=8)
        # plot the mean
        V.plot_spectrogram(np.mean(z, axis=-1).T,
                           ax=(n_row, n_col, i + 4),
                           title='Zmean')
        # plot first 25 channels
        if n_channels > 1:
            for j in range(min(8, n_channels)):
                V.plot_spectrogram(z[:, :, j].T,
                                   ax=(n_row, n_col, j * 3 + 7 + i),
                                   title='Z%d' % j)
Beispiel #4
0
def visualize_latent_space(X_org, X_latent, name, labels, title):
  """
  X_org : [n_samples, n_timesteps, n_features]
  X_latent : [n_samples, n_timesteps, n_latents]
  """
  assert X_org.shape[0] == X_latent.shape[0] == len(name) == len(labels)
  assert not np.any(np.isnan(X_org))
  assert not np.any(np.isnan(X_latent))
  X_org = X_org.astype('float32')
  X_latent = X_latent.astype('float32')
  # ====== evaluation of the latent space ====== #
  n_channels = 1 if X_latent.ndim == 3 else int(np.prod(X_latent.shape[3:]))
  n_samples = X_org.shape[0]
  # 1 for original, 1 for mean channel, then the rest
  n_row = 1 + 1 + n_channels
  n_col = 3
  V.plot_figure(nrow=n_row + 1, ncol=16)
  # only select 3 random sample
  for i, idx in enumerate(
      sampling_iter(it=range(n_samples), k= n_col, seed=5218)):
    x = X_org[idx]
    # latent tensor can be 3D or 4D
    z = X_latent[idx]
    if z.ndim > 3:
      z = np.reshape(z, newshape=(z.shape[0], z.shape[1], -1))
    elif z.ndim == 2:
      z = np.reshape(z, newshape=(z.shape[0], z.shape[1], 1))
    elif z.ndim == 3:
      pass
    else:
      raise ValueError("No support for z value: %s" % str(z.shape))
    # plot original acoustic
    ax = V.plot_spectrogram(x.T, ax=(n_row, n_col, i + 1), title='Org')
    if i == 0:
      ax.set_title("[%s]'%s-%s'" % (str(title), str(name[idx]), str(labels[idx])),
                   fontsize=8)
    else:
      ax.set_title("'%s-%s'" % (str(name[idx]), str(labels[idx])),
                   fontsize=8)
    # plot the mean
    V.plot_spectrogram(np.mean(z, axis=-1).T,
                       ax=(n_row, n_col, i + 4), title='Zmean')
    # plot first 25 channels
    if n_channels > 1:
      for j in range(min(8, n_channels)):
        V.plot_spectrogram(z[:, :, j].T,
                           ax=(n_row, n_col, j * 3 + 7 + i),
                           title='Z%d' % j)
Beispiel #5
0
def prepare_dnn_data(recipe, feat, utt_length, seed=87654321):
    """
  Return
  ------
  train_feeder : Feeder for training
  valid_feeder : Feeder for validating
  test_ids : Test indices
  test_dat : Data array
  all_speakers : list of all speaker in training set
  """
    # Load dataset
    frame_length = int(utt_length / FRAME_SHIFT)
    ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEAT, recipe), read_only=True)
    X = ds[feat]
    train_indices = {name: ds['indices'][name] for name in TRAIN_DATA.keys()}
    test_indices = {
        name: start_end
        for name, start_end in ds['indices'].items() if name not in TRAIN_DATA
    }
    train_indices, valid_indices = train_valid_test_split(x=list(
        train_indices.items()),
                                                          train=0.9,
                                                          inc_test=False,
                                                          seed=seed)
    all_speakers = sorted(set(TRAIN_DATA.values()))
    n_speakers = max(all_speakers) + 1
    print("#Train files:", ctext(len(train_indices), 'cyan'))
    print("#Valid files:", ctext(len(valid_indices), 'cyan'))
    print("#Test files:", ctext(len(test_indices), 'cyan'))
    print("#Speakers:", ctext(n_speakers, 'cyan'))
    recipes = [
        F.recipes.Sequencing(frame_length=frame_length,
                             step_length=frame_length,
                             end='pad',
                             pad_value=0,
                             pad_mode='post',
                             data_idx=0),
        F.recipes.Name2Label(lambda name: TRAIN_DATA[name], ref_idx=0),
        F.recipes.LabelOneHot(nb_classes=n_speakers, data_idx=1)
    ]
    train_feeder = F.Feeder(data_desc=F.IndexedData(data=X,
                                                    indices=train_indices),
                            batch_mode='batch',
                            ncpu=7,
                            buffer_size=12)
    valid_feeder = F.Feeder(data_desc=F.IndexedData(data=X,
                                                    indices=valid_indices),
                            batch_mode='batch',
                            ncpu=2,
                            buffer_size=4)
    train_feeder.set_recipes(recipes)
    valid_feeder.set_recipes(recipes)
    print(train_feeder)
    # ====== cache the test data ====== #
    cache_dat = os.path.join(PATH_EXP,
                             'test_%s_%d.dat' % (feat, int(utt_length)))
    cache_ids = os.path.join(PATH_EXP,
                             'test_%s_%d.ids' % (feat, int(utt_length)))
    # validate cache files
    if os.path.exists(cache_ids):
        with open(cache_ids, 'rb') as f:
            ids = pickle.load(f)
        if len(ids) != len(test_indices):
            os.remove(cache_ids)
            if os.path.exists(cache_dat):
                os.remove(cache_dat)
    elif os.path.exists(cache_dat):
        os.remove(cache_dat)
    # caching
    if not os.path.exists(cache_dat):
        dat = F.MmapData(cache_dat,
                         dtype='float16',
                         shape=(0, frame_length, X.shape[1]))
        ids = {}
        prog = Progbar(target=len(test_indices))
        s = 0
        for name, (start, end) in test_indices.items():
            y = X[start:end]
            y = segment_axis(y,
                             axis=0,
                             frame_length=frame_length,
                             step_length=frame_length,
                             end='pad',
                             pad_value=0,
                             pad_mode='post')
            dat.append(y)
            # update indices
            ids[name] = (s, s + len(y))
            s += len(y)
            # update progress
            prog.add(1)
        dat.flush()
        dat.close()
        with open(cache_ids, 'wb') as f:
            pickle.dump(ids, f)
    # ====== re-load ====== #
    dat = F.MmapData(cache_dat, read_only=True)
    with open(cache_ids, 'rb') as f:
        ids = pickle.load(f)
    # ====== save some sample ====== #
    sample_path = os.path.join(PATH_EXP,
                               'test_%s_%d.pdf' % (feat, int(utt_length)))
    V.plot_figure(nrow=9, ncol=6)
    for i, (name, (start, end)) in enumerate(
            sampling_iter(it=sorted(ids.items(), key=lambda x: x[0]),
                          k=12,
                          seed=87654321)):
        x = dat[start:end][:].astype('float32')
        ax = V.plot_spectrogram(x[np.random.randint(0, len(x))].T,
                                ax=(12, 1, i + 1),
                                title='')
        ax.set_title(name)
    V.plot_save(sample_path)
    return (train_feeder, valid_feeder, ids, dat, all_speakers)
Beispiel #6
0
    os.mkdir(PATH_ACOUSTIC_FEAT)
# ====== remove '_quarter' if you want full training data ====== #
FILE_LIST = "voxceleb_files_quarter"
TRAIN_LIST = "voxceleb_sys_train_with_labels_quarter"
TRIAL_LIST = "voxceleb_trials"
# ====== Load the file list ====== #
ds = F.load_voxceleb_list()
WAV_FILES = {}  # dictionary mapping 'file_path' -> 'file_name'
for path, channel, name in ds[FILE_LIST]:
    path = os.path.join(PATH_TO_WAV, path)
    # validate all files are exist
    assert os.path.exists(path), path
    WAV_FILES[path] = name
# some sampled files for testing
SAMPLED_WAV_FILE = sampling_iter(it=sorted(WAV_FILES.items(),
                                           key=lambda x: x[0]),
                                 k=8,
                                 seed=87654321)
# ====== extract the list of all train files ====== #
# mapping from name of training file to speaker label
TRAIN_DATA = {}
for x, y in ds[TRAIN_LIST]:
    TRAIN_DATA[x] = int(y)


# ===========================================================================
# Path helpers
# ===========================================================================
def get_model_path(system_name, args):
    """Return: exp_dir, model_path, log_path, train_path, test_path"""
    name = '_'.join([str(system_name).lower(), args.recipe, args.feat])
    if 'l' in args:
Beispiel #7
0
def prepare_dnn_data(recipe, feat, utt_length, seed=52181208):
  """
  Return
  ------
  train_feeder : Feeder for training
  valid_feeder : Feeder for validating
  test_ids : Test indices
  test_dat : Data array
  all_speakers : list of all speaker in training set
  """
  # Load dataset
  frame_length = int(utt_length / FRAME_SHIFT)
  ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEAT, recipe),
                 read_only=True)
  X = ds[feat]
  train_indices = {name: ds['indices'][name]
                   for name in TRAIN_DATA.keys()}
  test_indices = {name: start_end
                  for name, start_end in ds['indices'].items()
                  if name not in TRAIN_DATA}
  train_indices, valid_indices = train_valid_test_split(
      x=list(train_indices.items()), train=0.9, inc_test=False, seed=seed)
  all_speakers = sorted(set(TRAIN_DATA.values()))
  n_speakers = max(all_speakers) + 1
  print("#Train files:", ctext(len(train_indices), 'cyan'))
  print("#Valid files:", ctext(len(valid_indices), 'cyan'))
  print("#Test files:", ctext(len(test_indices), 'cyan'))
  print("#Speakers:", ctext(n_speakers, 'cyan'))
  recipes = [
      F.recipes.Sequencing(frame_length=frame_length, step_length=frame_length,
                           end='pad', pad_value=0, pad_mode='post',
                           data_idx=0),
      F.recipes.Name2Label(lambda name:TRAIN_DATA[name], ref_idx=0),
      F.recipes.LabelOneHot(nb_classes=n_speakers, data_idx=1)
  ]
  train_feeder = F.Feeder(
      data_desc=F.IndexedData(data=X, indices=train_indices),
      batch_mode='batch', ncpu=7, buffer_size=12)
  valid_feeder = F.Feeder(
      data_desc=F.IndexedData(data=X, indices=valid_indices),
      batch_mode='batch', ncpu=2, buffer_size=4)
  train_feeder.set_recipes(recipes)
  valid_feeder.set_recipes(recipes)
  print(train_feeder)
  # ====== cache the test data ====== #
  cache_dat = os.path.join(PATH_EXP, 'test_%s_%d.dat' % (feat, int(utt_length)))
  cache_ids = os.path.join(PATH_EXP, 'test_%s_%d.ids' % (feat, int(utt_length)))
  # validate cache files
  if os.path.exists(cache_ids):
    with open(cache_ids, 'rb') as f:
      ids = pickle.load(f)
    if len(ids) != len(test_indices):
      os.remove(cache_ids)
      if os.path.exists(cache_dat):
        os.remove(cache_dat)
  elif os.path.exists(cache_dat):
    os.remove(cache_dat)
  # caching
  if not os.path.exists(cache_dat):
    dat = F.MmapData(cache_dat, dtype='float16',
                     shape=(0, frame_length, X.shape[1]))
    ids = {}
    prog = Progbar(target=len(test_indices))
    s = 0
    for name, (start, end) in test_indices.items():
      y = X[start:end]
      y = segment_axis(y, axis=0,
                       frame_length=frame_length, step_length=frame_length,
                       end='pad', pad_value=0, pad_mode='post')
      dat.append(y)
      # update indices
      ids[name] = (s, s + len(y))
      s += len(y)
      # update progress
      prog.add(1)
    dat.flush()
    dat.close()
    with open(cache_ids, 'wb') as f:
      pickle.dump(ids, f)
  # ====== re-load ====== #
  dat = F.MmapData(cache_dat, read_only=True)
  with open(cache_ids, 'rb') as f:
    ids = pickle.load(f)
  # ====== save some sample ====== #
  sample_path = os.path.join(PATH_EXP,
                             'test_%s_%d.pdf' % (feat, int(utt_length)))
  V.plot_figure(nrow=9, ncol=6)
  for i, (name, (start, end)) in enumerate(
      sampling_iter(it=sorted(ids.items(), key=lambda x: x[0]), k=12, seed=52181208)):
    x = dat[start:end][:].astype('float32')
    ax = V.plot_spectrogram(x[np.random.randint(0, len(x))].T,
                            ax=(12, 1, i + 1), title='')
    ax.set_title(name)
  V.plot_save(sample_path)
  return (train_feeder, valid_feeder,
          ids, dat, all_speakers)
Beispiel #8
0
  os.mkdir(PATH_ACOUSTIC_FEAT)
# ====== remove '_quarter' if you want full training data ====== #
FILE_LIST = "voxceleb_files_quarter"
TRAIN_LIST = "voxceleb_sys_train_with_labels_quarter"
TRIAL_LIST = "voxceleb_trials"
# ====== Load the file list ====== #
ds = F.load_voxceleb_list()
WAV_FILES = {} # dictionary mapping 'file_path' -> 'file_name'
for path, channel, name in ds[FILE_LIST]:
  path = os.path.join(PATH_TO_WAV, path)
  # validate all files are exist
  assert os.path.exists(path), path
  WAV_FILES[path] = name
# some sampled files for testing
SAMPLED_WAV_FILE = sampling_iter(it=sorted(WAV_FILES.items(),
                                           key=lambda x: x[0]),
                                 k=8, seed=52181208)
# ====== extract the list of all train files ====== #
# mapping from name of training file to speaker label
TRAIN_DATA = {}
for x, y in ds[TRAIN_LIST]:
  TRAIN_DATA[x] = int(y)

# ===========================================================================
# Path helpers
# ===========================================================================
def get_model_path(system_name, args):
  """Return: exp_dir, model_path, log_path, train_path, test_path"""
  name = '_'.join([str(system_name).lower(), args.recipe, args.feat])
  if 'l' in args:
    name += '_' + str(int(args.l))