コード例 #1
0
def evaluate_latent(fn, feeder, title):
    y_true = []
    Z = []
    for outputs in Progbar(feeder.set_batch(batch_mode='file'),
                           name=title,
                           print_report=True,
                           print_summary=False,
                           count_func=lambda x: x[-1].shape[0]):
        name = str(outputs[0])
        idx = int(outputs[1])
        data = outputs[2:]
        assert idx == 0
        y_true.append(name)
        Z.append(fn(*data))
    Z = np.concatenate(Z, axis=0)
    # ====== visualize spectrogram ====== #
    if Z.ndim >= 3:
        sample = np.random.choice(range(len(Z)), size=3, replace=False)
        spec = Z[sample.astype('int32')]
        y = [y_true[int(i)] for i in sample]
        plot_figure(nrow=6, ncol=6)
        for i, (s, tit) in enumerate(zip(spec, y)):
            s = s.reshape(len(s), -1)
            plot_spectrogram(s.T, ax=(1, 3, i + 1), title=tit)
    # ====== visualize each point ====== #
    # flattent to 2D
    Z = np.reshape(Z, newshape=(len(Z), -1))
    # tsne if necessary
    if Z.shape[-1] > 3:
        Z = fast_tsne(Z,
                      n_components=3,
                      n_jobs=8,
                      random_state=K.get_rng().randint(0, 10e8))
    # color and marker
    Z_color = [digit_color_map[i.split('_')[-1]] for i in y_true]
    Z_marker = [gender_marker_map[i.split('_')[1]] for i in y_true]
    plot_figure(nrow=6, ncol=20)
    for i, azim in enumerate((15, 60, 120)):
        plot_scatter(x=Z[:, 0],
                     y=Z[:, 1],
                     z=Z[:, 2],
                     ax=(1, 3, i + 1),
                     size=4,
                     color=Z_color,
                     marker=Z_marker,
                     azim=azim,
                     legend=legends if i == 1 else None,
                     legend_ncol=11,
                     fontsize=10,
                     title=title)
    plot_save(os.path.join(FIG_PATH, '%s.pdf' % title))
コード例 #2
0
def visualize_latent_space(X_org, X_latent, name, labels, title):
    """
  X_org : [n_samples, n_timesteps, n_features]
  X_latent : [n_samples, n_timesteps, n_latents]
  """
    assert X_org.shape[0] == X_latent.shape[0] == len(name) == len(labels)
    assert not np.any(np.isnan(X_org))
    assert not np.any(np.isnan(X_latent))
    X_org = X_org.astype('float32')
    X_latent = X_latent.astype('float32')
    # ====== evaluation of the latent space ====== #
    n_channels = 1 if X_latent.ndim == 3 else int(np.prod(X_latent.shape[3:]))
    n_samples = X_org.shape[0]
    # 1 for original, 1 for mean channel, then the rest
    n_row = 1 + 1 + n_channels
    n_col = 3
    V.plot_figure(nrow=n_row + 1, ncol=16)
    # only select 3 random sample
    for i, idx in enumerate(
            sampling_iter(it=range(n_samples), k=n_col, seed=1234)):
        x = X_org[idx]
        # latent tensor can be 3D or 4D
        z = X_latent[idx]
        if z.ndim > 3:
            z = np.reshape(z, newshape=(z.shape[0], z.shape[1], -1))
        elif z.ndim == 2:
            z = np.reshape(z, newshape=(z.shape[0], z.shape[1], 1))
        elif z.ndim == 3:
            pass
        else:
            raise ValueError("No support for z value: %s" % str(z.shape))
        # plot original acoustic
        ax = V.plot_spectrogram(x.T, ax=(n_row, n_col, i + 1), title='Org')
        if i == 0:
            ax.set_title("[%s]'%s-%s'" %
                         (str(title), str(name[idx]), str(labels[idx])),
                         fontsize=8)
        else:
            ax.set_title("'%s-%s'" % (str(name[idx]), str(labels[idx])),
                         fontsize=8)
        # plot the mean
        V.plot_spectrogram(np.mean(z, axis=-1).T,
                           ax=(n_row, n_col, i + 4),
                           title='Zmean')
        # plot first 25 channels
        if n_channels > 1:
            for j in range(min(8, n_channels)):
                V.plot_spectrogram(z[:, :, j].T,
                                   ax=(n_row, n_col, j * 3 + 7 + i),
                                   title='Z%d' % j)
コード例 #3
0
ファイル: utils.py プロジェクト: imito/odin
def visualize_latent_space(X_org, X_latent, name, labels, title):
  """
  X_org : [n_samples, n_timesteps, n_features]
  X_latent : [n_samples, n_timesteps, n_latents]
  """
  assert X_org.shape[0] == X_latent.shape[0] == len(name) == len(labels)
  assert not np.any(np.isnan(X_org))
  assert not np.any(np.isnan(X_latent))
  X_org = X_org.astype('float32')
  X_latent = X_latent.astype('float32')
  # ====== evaluation of the latent space ====== #
  n_channels = 1 if X_latent.ndim == 3 else int(np.prod(X_latent.shape[3:]))
  n_samples = X_org.shape[0]
  # 1 for original, 1 for mean channel, then the rest
  n_row = 1 + 1 + n_channels
  n_col = 3
  V.plot_figure(nrow=n_row + 1, ncol=16)
  # only select 3 random sample
  for i, idx in enumerate(
      sampling_iter(it=range(n_samples), k= n_col, seed=5218)):
    x = X_org[idx]
    # latent tensor can be 3D or 4D
    z = X_latent[idx]
    if z.ndim > 3:
      z = np.reshape(z, newshape=(z.shape[0], z.shape[1], -1))
    elif z.ndim == 2:
      z = np.reshape(z, newshape=(z.shape[0], z.shape[1], 1))
    elif z.ndim == 3:
      pass
    else:
      raise ValueError("No support for z value: %s" % str(z.shape))
    # plot original acoustic
    ax = V.plot_spectrogram(x.T, ax=(n_row, n_col, i + 1), title='Org')
    if i == 0:
      ax.set_title("[%s]'%s-%s'" % (str(title), str(name[idx]), str(labels[idx])),
                   fontsize=8)
    else:
      ax.set_title("'%s-%s'" % (str(name[idx]), str(labels[idx])),
                   fontsize=8)
    # plot the mean
    V.plot_spectrogram(np.mean(z, axis=-1).T,
                       ax=(n_row, n_col, i + 4), title='Zmean')
    # plot first 25 channels
    if n_channels > 1:
      for j in range(min(8, n_channels)):
        V.plot_spectrogram(z[:, :, j].T,
                           ax=(n_row, n_col, j * 3 + 7 + i),
                           title='Z%d' % j)
コード例 #4
0
ファイル: utils.py プロジェクト: trungnt13/odin-ai
def prepare_dnn_data(recipe, feat, utt_length, seed=87654321):
    """
  Return
  ------
  train_feeder : Feeder for training
  valid_feeder : Feeder for validating
  test_ids : Test indices
  test_dat : Data array
  all_speakers : list of all speaker in training set
  """
    # Load dataset
    frame_length = int(utt_length / FRAME_SHIFT)
    ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEAT, recipe), read_only=True)
    X = ds[feat]
    train_indices = {name: ds['indices'][name] for name in TRAIN_DATA.keys()}
    test_indices = {
        name: start_end
        for name, start_end in ds['indices'].items() if name not in TRAIN_DATA
    }
    train_indices, valid_indices = train_valid_test_split(x=list(
        train_indices.items()),
                                                          train=0.9,
                                                          inc_test=False,
                                                          seed=seed)
    all_speakers = sorted(set(TRAIN_DATA.values()))
    n_speakers = max(all_speakers) + 1
    print("#Train files:", ctext(len(train_indices), 'cyan'))
    print("#Valid files:", ctext(len(valid_indices), 'cyan'))
    print("#Test files:", ctext(len(test_indices), 'cyan'))
    print("#Speakers:", ctext(n_speakers, 'cyan'))
    recipes = [
        F.recipes.Sequencing(frame_length=frame_length,
                             step_length=frame_length,
                             end='pad',
                             pad_value=0,
                             pad_mode='post',
                             data_idx=0),
        F.recipes.Name2Label(lambda name: TRAIN_DATA[name], ref_idx=0),
        F.recipes.LabelOneHot(nb_classes=n_speakers, data_idx=1)
    ]
    train_feeder = F.Feeder(data_desc=F.IndexedData(data=X,
                                                    indices=train_indices),
                            batch_mode='batch',
                            ncpu=7,
                            buffer_size=12)
    valid_feeder = F.Feeder(data_desc=F.IndexedData(data=X,
                                                    indices=valid_indices),
                            batch_mode='batch',
                            ncpu=2,
                            buffer_size=4)
    train_feeder.set_recipes(recipes)
    valid_feeder.set_recipes(recipes)
    print(train_feeder)
    # ====== cache the test data ====== #
    cache_dat = os.path.join(PATH_EXP,
                             'test_%s_%d.dat' % (feat, int(utt_length)))
    cache_ids = os.path.join(PATH_EXP,
                             'test_%s_%d.ids' % (feat, int(utt_length)))
    # validate cache files
    if os.path.exists(cache_ids):
        with open(cache_ids, 'rb') as f:
            ids = pickle.load(f)
        if len(ids) != len(test_indices):
            os.remove(cache_ids)
            if os.path.exists(cache_dat):
                os.remove(cache_dat)
    elif os.path.exists(cache_dat):
        os.remove(cache_dat)
    # caching
    if not os.path.exists(cache_dat):
        dat = F.MmapData(cache_dat,
                         dtype='float16',
                         shape=(0, frame_length, X.shape[1]))
        ids = {}
        prog = Progbar(target=len(test_indices))
        s = 0
        for name, (start, end) in test_indices.items():
            y = X[start:end]
            y = segment_axis(y,
                             axis=0,
                             frame_length=frame_length,
                             step_length=frame_length,
                             end='pad',
                             pad_value=0,
                             pad_mode='post')
            dat.append(y)
            # update indices
            ids[name] = (s, s + len(y))
            s += len(y)
            # update progress
            prog.add(1)
        dat.flush()
        dat.close()
        with open(cache_ids, 'wb') as f:
            pickle.dump(ids, f)
    # ====== re-load ====== #
    dat = F.MmapData(cache_dat, read_only=True)
    with open(cache_ids, 'rb') as f:
        ids = pickle.load(f)
    # ====== save some sample ====== #
    sample_path = os.path.join(PATH_EXP,
                               'test_%s_%d.pdf' % (feat, int(utt_length)))
    V.plot_figure(nrow=9, ncol=6)
    for i, (name, (start, end)) in enumerate(
            sampling_iter(it=sorted(ids.items(), key=lambda x: x[0]),
                          k=12,
                          seed=87654321)):
        x = dat[start:end][:].astype('float32')
        ax = V.plot_spectrogram(x[np.random.randint(0, len(x))].T,
                                ax=(12, 1, i + 1),
                                title='')
        ax.set_title(name)
    V.plot_save(sample_path)
    return (train_feeder, valid_feeder, ids, dat, all_speakers)
コード例 #5
0
ファイル: helpers.py プロジェクト: professorlust/odin-ai
def prepare_dnn_data(save_dir,
                     feat_name=None,
                     utt_length=None,
                     seq_mode=None,
                     min_dur=None,
                     min_utt=None,
                     exclude=None,
                     train_proportion=None,
                     return_dataset=False):
    assert os.path.isdir(save_dir), \
        "Path to '%s' is not a directory" % save_dir
    if feat_name is None:
        feat_name = FEATURE_NAME
    if utt_length is None:
        utt_length = int(_args.utt)
    if seq_mode is None:
        seq_mode = str(_args.seq).strip().lower()
    if min_dur is None:
        min_dur = MINIMUM_UTT_DURATION
    if min_utt is None:
        min_utt = MINIMUM_UTT_PER_SPEAKERS
    if exclude is None:
        exclude = str(_args.exclude).strip()
    print("Minimum duration: %s(s)" % ctext(min_dur, 'cyan'))
    print("Minimum utt/spk : %s(utt)" % ctext(min_utt, 'cyan'))
    # ******************** prepare dataset ******************** #
    path = os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE)
    assert os.path.exists(
        path), "Cannot find acoustic dataset at path: %s" % path
    ds = F.Dataset(path=path, read_only=True)
    rand = np.random.RandomState(seed=Config.SUPER_SEED)
    # ====== find the right feature ====== #
    assert feat_name in ds, "Cannot find feature with name: %s" % feat_name
    X = ds[feat_name]
    ids_name = 'indices_%s' % feat_name
    assert ids_name in ds, "Cannot find indices with name: %s" % ids_name
    # ====== basic path ====== #
    path_filtered_data = os.path.join(save_dir, 'filtered_files.pkl')
    path_train_files = os.path.join(save_dir, 'train_files.pkl')
    path_speaker_info = os.path.join(save_dir, 'speaker_info.pkl')
    # ******************** cannot find cached data ******************** #
    if any(not os.path.exists(p)
           for p in [path_filtered_data, path_train_files, path_speaker_info]):
        # ====== exclude some dataset ====== #
        if len(exclude) > 0:
            exclude_dataset = {i: 1 for i in exclude.split(',')}
            print("* Excluded dataset:", ctext(exclude_dataset, 'cyan'))
            indices = {
                name: (start, end)
                for name, (start, end) in ds[ids_name].items()
                if ds['dsname'][name] not in exclude_dataset
            }
            # special case exclude all the noise data
            if 'noise' in exclude_dataset:
                indices = {
                    name: (start, end)
                    for name, (start, end) in indices.items()
                    if '/' not in name
                }
        else:
            indices = {i: j for i, j in ds[ids_name].items()}
        # ====== down-sampling if necessary ====== #
        if _args.downsample > 1000:
            dataset2name = defaultdict(list)
            # ordering the indices so we sample the same set every time
            for name in sorted(indices.keys()):
                dataset2name[ds['dsname'][name]].append(name)
            n_total_files = len(indices)
            n_sample_files = int(_args.downsample)
            # get the percentage of each dataset
            dataset2per = {
                i: len(j) / n_total_files
                for i, j in dataset2name.items()
            }
            # sampling based on percentage
            _ = {}
            for dsname, flist in dataset2name.items():
                rand.shuffle(flist)
                n_dataset_files = int(dataset2per[dsname] * n_sample_files)
                _.update({i: indices[i] for i in flist[:n_dataset_files]})
            indices = _
        # ====== * filter out "bad" sample ====== #
        indices = filter_utterances(X=X,
                                    indices=indices,
                                    spkid=ds['spkid'],
                                    min_utt=min_utt,
                                    min_dur=min_dur,
                                    remove_min_length=True,
                                    remove_min_uttspk=True,
                                    n_speakers=None,
                                    ncpu=None,
                                    save_path=path_filtered_data)
        # ====== all training file name ====== #
        # modify here to train full dataset
        all_name = sorted(indices.keys())
        rand.shuffle(all_name)
        rand.shuffle(all_name)
        n_files = len(all_name)
        print("#Files:", ctext(n_files, 'cyan'))
        # ====== speaker mapping ====== #
        name2spk = {name: ds['spkid'][name] for name in all_name}
        all_speakers = sorted(set(name2spk.values()))
        spk2label = {spk: i for i, spk in enumerate(all_speakers)}
        name2label = {name: spk2label[spk] for name, spk in name2spk.items()}
        assert len(name2label) == len(all_name)
        print("#Speakers:", ctext(len(all_speakers), 'cyan'))
        # ====== stratify sampling based on speaker ====== #
        valid_name = []
        # create speakers' cluster
        label2name = defaultdict(list)
        for name, label in sorted(name2label.items(), key=lambda x: x[0]):
            label2name[label].append(name)
        # for each speaker with >= 3 utterance
        for label, name_list in sorted(label2name.items(), key=lambda x: x[0]):
            if len(name_list) < 3:
                continue
            n = max(1, int(0.05 * len(name_list)))  # 5% for validation
            valid_name += rand.choice(a=name_list, size=n,
                                      replace=False).tolist()
        # train list is the rest
        _ = set(valid_name)
        train_name = [i for i in all_name if i not in _]
        # ====== split training and validation ====== #
        train_indices = {name: indices[name] for name in train_name}
        valid_indices = {name: indices[name] for name in valid_name}
        # ====== save cached data ====== #
        with open(path_train_files, 'wb') as fout:
            pickle.dump({'train': train_indices, 'valid': valid_indices}, fout)
        with open(path_speaker_info, 'wb') as fout:
            pickle.dump(
                {
                    'all_speakers': all_speakers,
                    'name2label': name2label,
                    'spk2label': spk2label
                }, fout)
    # ******************** load cached data ******************** #
    else:
        with open(path_train_files, 'rb') as fin:
            obj = pickle.load(fin)
            train_indices = obj['train']
            valid_indices = obj['valid']
        with open(path_speaker_info, 'rb') as fin:
            obj = pickle.load(fin)
            all_speakers = obj['all_speakers']
            name2label = obj['name2label']
            spk2label = obj['spk2label']

    # ******************** print log ******************** #

    def summary_indices(ids):
        datasets = defaultdict(int)
        speakers = defaultdict(list)
        text = ''
        for name in sorted(ids.keys()):
            text += name + str(ids[name])
            dsname = ds['dsname'][name]
            datasets[dsname] += 1
            speakers[dsname].append(ds['spkid'][name])
        for dsname in sorted(datasets.keys()):
            print('  %-18s: %s(utt) %s(spk)' %
                  (dsname, ctext('%6d' % datasets[dsname], 'cyan'),
                   ctext(len(set(speakers[dsname])), 'cyan')))
        print('  MD5 checksum:', ctext(crypto.md5_checksum(text), 'lightcyan'))

    # ====== training files ====== #
    print(
        "#Train files:", ctext('%-8d' % len(train_indices), 'cyan'), "#spk:",
        ctext(len(set(name2label[name] for name in train_indices.keys())),
              'cyan'), "#noise:",
        ctext(len([name for name in train_indices.keys() if '/' in name]),
              'cyan'))
    summary_indices(ids=train_indices)
    # ====== valid files ====== #
    print(
        "#Valid files:", ctext('%-8d' % len(valid_indices), 'cyan'), "#spk:",
        ctext(len(set(name2label[name] for name in valid_indices.keys())),
              'cyan'), "#noise:",
        ctext(len([name for name in valid_indices.keys() if '/' in name]),
              'cyan'))
    summary_indices(ids=valid_indices)
    # ******************** create the recipe ******************** #
    assert all(name in name2label for name in train_indices.keys())
    assert all(name in name2label for name in valid_indices.keys())
    recipes = prepare_dnn_feeder_recipe(name2label=name2label,
                                        n_speakers=len(all_speakers),
                                        utt_length=utt_length,
                                        seq_mode=seq_mode)
    # ====== downsample training set for analyzing if required ====== #
    if train_proportion is not None:
        assert 0 < train_proportion < 1
        n_training = len(train_indices)
        train_indices = list(train_indices.items())
        rand.shuffle(train_indices)
        rand.shuffle(train_indices)
        train_indices = dict(train_indices[:int(n_training *
                                                train_proportion)])
    # ====== create feeder ====== #
    train_feeder = F.Feeder(data_desc=F.IndexedData(data=X,
                                                    indices=train_indices),
                            batch_mode='batch',
                            ncpu=NCPU,
                            buffer_size=256)

    valid_feeder = F.Feeder(data_desc=F.IndexedData(data=X,
                                                    indices=valid_indices),
                            batch_mode='batch',
                            ncpu=max(2, NCPU // 4),
                            buffer_size=64)

    train_feeder.set_recipes(recipes)
    valid_feeder.set_recipes(recipes)
    print(train_feeder)
    print(valid_feeder)
    # ====== debugging ====== #
    if IS_DEBUGGING:
        import matplotlib
        matplotlib.use('Agg')
        prog = Progbar(target=len(valid_feeder),
                       print_summary=True,
                       name="Iterating validation set")
        samples = []
        n_visual = 250
        for name, idx, X, y in valid_feeder.set_batch(batch_size=100000,
                                                      batch_mode='file',
                                                      seed=None,
                                                      shuffle_level=0):
            assert idx == 0, "Utterances longer than %.2f(sec)" % (
                100000 * Config.STEP_LENGTH)
            prog['X'] = X.shape
            prog['y'] = y.shape
            prog.add(X.shape[0])
            # random sampling
            if rand.rand(1) < 0.5 and len(samples) < n_visual:
                for i in rand.randint(0, X.shape[0], size=4, dtype='int32'):
                    samples.append((name, X[i], np.argmax(y[i], axis=-1)))
        # plot the spectrogram
        n_visual = len(samples)
        V.plot_figure(nrow=n_visual, ncol=8)
        for i, (name, X, y) in enumerate(samples):
            is_noise = '/' in name
            assert name2label[
                name] == y, "Speaker label mismatch for file: %s" % name
            name = name.split('/')[0]
            dsname = ds['dsname'][name]
            spkid = ds['spkid'][name]
            y = np.argmax(y, axis=-1)
            ax = V.plot_spectrogram(X.T,
                                    ax=(n_visual, 1, i + 1),
                                    title='#%d' % (i + 1))
            ax.set_title(
                '[%s][%s]%s  %s' %
                ('noise' if is_noise else 'clean', dsname, name, spkid),
                fontsize=6)
        # don't need to be high resolutions
        V.plot_save('/tmp/tmp.pdf', dpi=12)
        exit()
    # ====== return ====== #
    if bool(return_dataset):
        return train_feeder, valid_feeder, all_speakers, ds
    return train_feeder, valid_feeder, all_speakers
コード例 #6
0
ファイル: utils.py プロジェクト: imito/odin
def prepare_dnn_data(recipe, feat, utt_length, seed=52181208):
  """
  Return
  ------
  train_feeder : Feeder for training
  valid_feeder : Feeder for validating
  test_ids : Test indices
  test_dat : Data array
  all_speakers : list of all speaker in training set
  """
  # Load dataset
  frame_length = int(utt_length / FRAME_SHIFT)
  ds = F.Dataset(os.path.join(PATH_ACOUSTIC_FEAT, recipe),
                 read_only=True)
  X = ds[feat]
  train_indices = {name: ds['indices'][name]
                   for name in TRAIN_DATA.keys()}
  test_indices = {name: start_end
                  for name, start_end in ds['indices'].items()
                  if name not in TRAIN_DATA}
  train_indices, valid_indices = train_valid_test_split(
      x=list(train_indices.items()), train=0.9, inc_test=False, seed=seed)
  all_speakers = sorted(set(TRAIN_DATA.values()))
  n_speakers = max(all_speakers) + 1
  print("#Train files:", ctext(len(train_indices), 'cyan'))
  print("#Valid files:", ctext(len(valid_indices), 'cyan'))
  print("#Test files:", ctext(len(test_indices), 'cyan'))
  print("#Speakers:", ctext(n_speakers, 'cyan'))
  recipes = [
      F.recipes.Sequencing(frame_length=frame_length, step_length=frame_length,
                           end='pad', pad_value=0, pad_mode='post',
                           data_idx=0),
      F.recipes.Name2Label(lambda name:TRAIN_DATA[name], ref_idx=0),
      F.recipes.LabelOneHot(nb_classes=n_speakers, data_idx=1)
  ]
  train_feeder = F.Feeder(
      data_desc=F.IndexedData(data=X, indices=train_indices),
      batch_mode='batch', ncpu=7, buffer_size=12)
  valid_feeder = F.Feeder(
      data_desc=F.IndexedData(data=X, indices=valid_indices),
      batch_mode='batch', ncpu=2, buffer_size=4)
  train_feeder.set_recipes(recipes)
  valid_feeder.set_recipes(recipes)
  print(train_feeder)
  # ====== cache the test data ====== #
  cache_dat = os.path.join(PATH_EXP, 'test_%s_%d.dat' % (feat, int(utt_length)))
  cache_ids = os.path.join(PATH_EXP, 'test_%s_%d.ids' % (feat, int(utt_length)))
  # validate cache files
  if os.path.exists(cache_ids):
    with open(cache_ids, 'rb') as f:
      ids = pickle.load(f)
    if len(ids) != len(test_indices):
      os.remove(cache_ids)
      if os.path.exists(cache_dat):
        os.remove(cache_dat)
  elif os.path.exists(cache_dat):
    os.remove(cache_dat)
  # caching
  if not os.path.exists(cache_dat):
    dat = F.MmapData(cache_dat, dtype='float16',
                     shape=(0, frame_length, X.shape[1]))
    ids = {}
    prog = Progbar(target=len(test_indices))
    s = 0
    for name, (start, end) in test_indices.items():
      y = X[start:end]
      y = segment_axis(y, axis=0,
                       frame_length=frame_length, step_length=frame_length,
                       end='pad', pad_value=0, pad_mode='post')
      dat.append(y)
      # update indices
      ids[name] = (s, s + len(y))
      s += len(y)
      # update progress
      prog.add(1)
    dat.flush()
    dat.close()
    with open(cache_ids, 'wb') as f:
      pickle.dump(ids, f)
  # ====== re-load ====== #
  dat = F.MmapData(cache_dat, read_only=True)
  with open(cache_ids, 'rb') as f:
    ids = pickle.load(f)
  # ====== save some sample ====== #
  sample_path = os.path.join(PATH_EXP,
                             'test_%s_%d.pdf' % (feat, int(utt_length)))
  V.plot_figure(nrow=9, ncol=6)
  for i, (name, (start, end)) in enumerate(
      sampling_iter(it=sorted(ids.items(), key=lambda x: x[0]), k=12, seed=52181208)):
    x = dat[start:end][:].astype('float32')
    ax = V.plot_spectrogram(x[np.random.randint(0, len(x))].T,
                            ax=(12, 1, i + 1), title='')
    ax.set_title(name)
  V.plot_save(sample_path)
  return (train_feeder, valid_feeder,
          ids, dat, all_speakers)
コード例 #7
0
                              get_vad=True,
                              get_energy=True,
                              get_delta=None,
                              pitch_threshold=0.8,
                              fmin=64,
                              fmax=None,
                              sr_new=None,
                              preemphasis=0.97)

for i, j in feat.iteritems():
    print(i, j.shape)

plt.subplot(7, 1, 1)
plt.plot(y)

plt.subplot(7, 1, 2)
visual.plot_spectrogram(feat['spec'].T, vad=feat['vad'])
plt.subplot(7, 1, 3)
visual.plot_spectrogram(feat['mspec'].T, vad=feat['vad'])
plt.subplot(7, 1, 4)
visual.plot_spectrogram(feat['mfcc'].T, vad=feat['vad'])

plt.subplot(7, 1, 5)
visual.plot_spectrogram(feat['qspec'].T, vad=feat['vad'])
plt.subplot(7, 1, 6)
visual.plot_spectrogram(feat['qmspec'].T, vad=feat['vad'])
plt.subplot(7, 1, 7)
visual.plot_spectrogram(feat['qmfcc'].T, vad=feat['vad'])

visual.plot_show(block=True, tight_layout=False)
コード例 #8
0
ファイル: helpers.py プロジェクト: imito/odin
def prepare_dnn_data(save_dir, feat_name=None,
                     utt_length=None, seq_mode=None,
                     min_dur=None, min_utt=None,
                     exclude=None, train_proportion=None,
                     return_dataset=False):
  assert os.path.isdir(save_dir), \
      "Path to '%s' is not a directory" % save_dir
  if feat_name is None:
    feat_name = FEATURE_NAME
  if utt_length is None:
    utt_length = int(_args.utt)
  if seq_mode is None:
    seq_mode = str(_args.seq).strip().lower()
  if min_dur is None:
    min_dur = MINIMUM_UTT_DURATION
  if min_utt is None:
    min_utt = MINIMUM_UTT_PER_SPEAKERS
  if exclude is None:
    exclude = str(_args.exclude).strip()
  print("Minimum duration: %s(s)" % ctext(min_dur, 'cyan'))
  print("Minimum utt/spk : %s(utt)" % ctext(min_utt, 'cyan'))
  # ******************** prepare dataset ******************** #
  path = os.path.join(PATH_ACOUSTIC_FEATURES, FEATURE_RECIPE)
  assert os.path.exists(path), "Cannot find acoustic dataset at path: %s" % path
  ds = F.Dataset(path=path, read_only=True)
  rand = np.random.RandomState(seed=Config.SUPER_SEED)
  # ====== find the right feature ====== #
  assert feat_name in ds, "Cannot find feature with name: %s" % feat_name
  X = ds[feat_name]
  ids_name = 'indices_%s' % feat_name
  assert ids_name in ds, "Cannot find indices with name: %s" % ids_name
  # ====== basic path ====== #
  path_filtered_data = os.path.join(save_dir, 'filtered_files.pkl')
  path_train_files = os.path.join(save_dir, 'train_files.pkl')
  path_speaker_info = os.path.join(save_dir, 'speaker_info.pkl')
  # ******************** cannot find cached data ******************** #
  if any(not os.path.exists(p) for p in [path_filtered_data,
                                         path_train_files,
                                         path_speaker_info]):
    # ====== exclude some dataset ====== #
    if len(exclude) > 0:
      exclude_dataset = {i: 1 for i in exclude.split(',')}
      print("* Excluded dataset:", ctext(exclude_dataset, 'cyan'))
      indices = {name: (start, end)
                 for name, (start, end) in ds[ids_name].items()
                 if ds['dsname'][name] not in exclude_dataset}
      # special case exclude all the noise data
      if 'noise' in exclude_dataset:
        indices = {name: (start, end)
                   for name, (start, end) in indices.items()
                   if '/' not in name}
    else:
      indices = {i: j for i, j in ds[ids_name].items()}
    # ====== down-sampling if necessary ====== #
    if _args.downsample > 1000:
      dataset2name = defaultdict(list)
      # ordering the indices so we sample the same set every time
      for name in sorted(indices.keys()):
        dataset2name[ds['dsname'][name]].append(name)
      n_total_files = len(indices)
      n_sample_files = int(_args.downsample)
      # get the percentage of each dataset
      dataset2per = {i: len(j) / n_total_files
                     for i, j in dataset2name.items()}
      # sampling based on percentage
      _ = {}
      for dsname, flist in dataset2name.items():
        rand.shuffle(flist)
        n_dataset_files = int(dataset2per[dsname] * n_sample_files)
        _.update({i: indices[i]
                  for i in flist[:n_dataset_files]})
      indices = _
    # ====== * filter out "bad" sample ====== #
    indices = filter_utterances(X=X, indices=indices, spkid=ds['spkid'],
                                min_utt=min_utt, min_dur=min_dur,
                                remove_min_length=True,
                                remove_min_uttspk=True,
                                n_speakers=None, ncpu=None,
                                save_path=path_filtered_data)
    # ====== all training file name ====== #
    # modify here to train full dataset
    all_name = sorted(indices.keys())
    rand.shuffle(all_name); rand.shuffle(all_name)
    n_files = len(all_name)
    print("#Files:", ctext(n_files, 'cyan'))
    # ====== speaker mapping ====== #
    name2spk = {name: ds['spkid'][name]
                for name in all_name}
    all_speakers = sorted(set(name2spk.values()))
    spk2label = {spk: i
                 for i, spk in enumerate(all_speakers)}
    name2label = {name: spk2label[spk]
                  for name, spk in name2spk.items()}
    assert len(name2label) == len(all_name)
    print("#Speakers:", ctext(len(all_speakers), 'cyan'))
    # ====== stratify sampling based on speaker ====== #
    valid_name = []
    # create speakers' cluster
    label2name = defaultdict(list)
    for name, label in sorted(name2label.items(),
                              key=lambda x: x[0]):
      label2name[label].append(name)
    # for each speaker with >= 3 utterance
    for label, name_list in sorted(label2name.items(),
                                   key=lambda x: x[0]):
      if len(name_list) < 3:
        continue
      n = max(1, int(0.05 * len(name_list))) # 5% for validation
      valid_name += rand.choice(a=name_list, size=n, replace=False).tolist()
    # train list is the rest
    _ = set(valid_name)
    train_name = [i for i in all_name if i not in _]
    # ====== split training and validation ====== #
    train_indices = {name: indices[name] for name in train_name}
    valid_indices = {name: indices[name] for name in valid_name}
    # ====== save cached data ====== #
    with open(path_train_files, 'wb') as fout:
      pickle.dump({'train': train_indices, 'valid': valid_indices},
                  fout)
    with open(path_speaker_info, 'wb') as fout:
      pickle.dump({'all_speakers': all_speakers,
                   'name2label': name2label,
                   'spk2label': spk2label},
                  fout)
  # ******************** load cached data ******************** #
  else:
    with open(path_train_files, 'rb') as fin:
      obj = pickle.load(fin)
      train_indices = obj['train']
      valid_indices = obj['valid']
    with open(path_speaker_info, 'rb') as fin:
      obj = pickle.load(fin)
      all_speakers = obj['all_speakers']
      name2label = obj['name2label']
      spk2label = obj['spk2label']

  # ******************** print log ******************** #
  def summary_indices(ids):
    datasets = defaultdict(int)
    speakers = defaultdict(list)
    text = ''
    for name in sorted(ids.keys()):
      text += name + str(ids[name])
      dsname = ds['dsname'][name]
      datasets[dsname] += 1
      speakers[dsname].append(ds['spkid'][name])
    for dsname in sorted(datasets.keys()):
      print('  %-18s: %s(utt) %s(spk)' % (
          dsname,
          ctext('%6d' % datasets[dsname], 'cyan'),
          ctext(len(set(speakers[dsname])), 'cyan')))
    print('  MD5 checksum:', ctext(crypto.md5_checksum(text), 'lightcyan'))
  # ====== training files ====== #
  print("#Train files:", ctext('%-8d' % len(train_indices), 'cyan'),
        "#spk:", ctext(len(set(name2label[name]
                               for name in train_indices.keys())), 'cyan'),
        "#noise:", ctext(len([name for name in train_indices.keys()
                              if '/' in name]), 'cyan'))
  summary_indices(ids=train_indices)
  # ====== valid files ====== #
  print("#Valid files:", ctext('%-8d' % len(valid_indices), 'cyan'),
        "#spk:", ctext(len(set(name2label[name]
                               for name in valid_indices.keys())), 'cyan'),
        "#noise:", ctext(len([name for name in valid_indices.keys()
                              if '/' in name]), 'cyan'))
  summary_indices(ids=valid_indices)
  # ******************** create the recipe ******************** #
  assert all(name in name2label
             for name in train_indices.keys())
  assert all(name in name2label
            for name in valid_indices.keys())
  recipes = prepare_dnn_feeder_recipe(name2label=name2label,
                                      n_speakers=len(all_speakers),
                                      utt_length=utt_length, seq_mode=seq_mode)
  # ====== downsample training set for analyzing if required ====== #
  if train_proportion is not None:
    assert 0 < train_proportion < 1
    n_training = len(train_indices)
    train_indices = list(train_indices.items())
    rand.shuffle(train_indices); rand.shuffle(train_indices)
    train_indices = dict(train_indices[:int(n_training * train_proportion)])
  # ====== create feeder ====== #
  train_feeder = F.Feeder(
      data_desc=F.IndexedData(data=X,
                              indices=train_indices),
      batch_mode='batch', ncpu=NCPU, buffer_size=256)

  valid_feeder = F.Feeder(
      data_desc=F.IndexedData(data=X,
                              indices=valid_indices),
      batch_mode='batch', ncpu=max(2, NCPU // 4), buffer_size=64)

  train_feeder.set_recipes(recipes)
  valid_feeder.set_recipes(recipes)
  print(train_feeder)
  print(valid_feeder)
  # ====== debugging ====== #
  if IS_DEBUGGING:
    import matplotlib
    matplotlib.use('Agg')
    prog = Progbar(target=len(valid_feeder), print_summary=True,
                   name="Iterating validation set")
    samples = []
    n_visual = 250
    for name, idx, X, y in valid_feeder.set_batch(batch_size=100000,
                                                  batch_mode='file',
                                                  seed=None, shuffle_level=0):
      assert idx == 0, "Utterances longer than %.2f(sec)" % (100000 * Config.STEP_LENGTH)
      prog['X'] = X.shape
      prog['y'] = y.shape
      prog.add(X.shape[0])
      # random sampling
      if rand.rand(1) < 0.5 and len(samples) < n_visual:
        for i in rand.randint(0, X.shape[0], size=4, dtype='int32'):
          samples.append((name, X[i], np.argmax(y[i], axis=-1)))
    # plot the spectrogram
    n_visual = len(samples)
    V.plot_figure(nrow=n_visual, ncol=8)
    for i, (name, X, y) in enumerate(samples):
      is_noise = '/' in name
      assert name2label[name] == y, "Speaker label mismatch for file: %s" % name
      name = name.split('/')[0]
      dsname = ds['dsname'][name]
      spkid = ds['spkid'][name]
      y = np.argmax(y, axis=-1)
      ax = V.plot_spectrogram(X.T,
                              ax=(n_visual, 1, i + 1),
                              title='#%d' % (i + 1))
      ax.set_title('[%s][%s]%s  %s' %
                   ('noise' if is_noise else 'clean', dsname, name, spkid),
                   fontsize=6)
    # don't need to be high resolutions
    V.plot_save('/tmp/tmp.pdf', dpi=12)
    exit()
  # ====== return ====== #
  if bool(return_dataset):
    return train_feeder, valid_feeder, all_speakers, ds
  return train_feeder, valid_feeder, all_speakers