Ejemplo n.º 1
0
def test_meanvar_incremental():
    np.random.seed(1234)
    N = 32
    X = np.random.randn(N, 100, 24)
    lengths = [len(x) for x in X]
    X_mean = np.mean(X, axis=(0, 1))
    X_var = np.var(X, axis=(0, 1))
    X_std = np.sqrt(X_var)

    # Check consistency with numpy
    X_mean_inc, X_var_inc = P.meanvar(X)
    assert np.allclose(X_mean, X_mean_inc)
    assert np.allclose(X_var, X_var_inc)

    # Split dataset and compute meanvar incrementaly
    X_a = X[:N // 2]
    X_b = X[N // 2:]
    X_mean_a, X_var_a, last_sample_count = P.meanvar(
        X_a, return_last_sample_count=True)
    assert last_sample_count == np.sum(lengths[:N // 2])
    X_mean_b, X_var_b = P.meanvar(
        X_b, mean_=X_mean_a, var_=X_var_a,
        last_sample_count=last_sample_count)
    assert np.allclose(X_mean, X_mean_b)
    assert np.allclose(X_var, X_var_b)

    # meanstd
    X_mean_a, X_std_a, last_sample_count = P.meanstd(
        X_a, return_last_sample_count=True)
    assert last_sample_count == np.sum(lengths[:N // 2])
    X_mean_b, X_std_b = P.meanstd(
        X_b, mean_=X_mean_a, var_=X_std_a**2,
        last_sample_count=last_sample_count)
    assert np.allclose(X_mean, X_mean_b)
    assert np.allclose(X_std, X_std_b)
Ejemplo n.º 2
0
def test_meanvar():
    # Pick acoustic features for testing
    _, X = example_file_data_sources_for_acoustic_model()
    X = FileSourceDataset(X)
    lengths = [len(x) for x in X]
    D = X[0].shape[-1]
    X_mean, X_var = P.meanvar(X)
    X_std = np.sqrt(X_var)
    assert np.isfinite(X_mean).all()
    assert np.isfinite(X_var).all()
    assert X_mean.shape[-1] == D
    assert X_var.shape[-1] == D

    _, X_std_hat = P.meanstd(X)
    assert np.allclose(X_std, X_std_hat)

    x = X[0]
    x_scaled = P.scale(x, X_mean, X_std)
    assert np.isfinite(x_scaled).all()

    # For padded dataset
    _, X = example_file_data_sources_for_acoustic_model()
    X = PaddedFileSourceDataset(X, 1000)
    # Should get same results with padded features
    X_mean_hat, X_var_hat = P.meanvar(X, lengths)
    assert np.allclose(X_mean, X_mean_hat)
    assert np.allclose(X_var, X_var_hat)

    # Inverse transform
    x = X[0]
    x_hat = P.inv_scale(P.scale(x, X_mean, X_std), X_mean, X_std)
    assert np.allclose(x, x_hat, atol=1e-5)
Ejemplo n.º 3
0
 def _get_t_stat(self):
     return meanvar(self.ts, self._get_lengths(self.ts))
Ejemplo n.º 4
0
            NPYDataSource(outputs_dir, train=train, max_files=max_files))
        # Assuming X and Y are time aligned.
        x_lengths = np.array([len(x) for x in X[phase]])
        y_lengths = np.array([len(y) for y in Y[phase]])
        assert np.allclose(x_lengths, y_lengths)
        utt_lengths[phase] = x_lengths
        print("Size of dataset for {}: {}".format(phase, len(X[phase])))

    # Collect stats for noramlization (from training data)
    # if this becomes performance heavy (not now), this can be done in a separte
    # script
    phase = "train"
    # TODO: ugly?
    if hp == hparams.vc:
        # Collect mean/var from source and target features
        data_mean, data_var, last_sample_count = P.meanvar(
            X[phase], utt_lengths[phase], return_last_sample_count=True)
        data_mean, data_var = P.meanvar(Y[phase],
                                        utt_lengths[phase],
                                        mean_=data_mean,
                                        var_=data_var,
                                        last_sample_count=last_sample_count)
        data_std = np.sqrt(data_var)

        np.save(join(data_dir, "data_mean"), data_mean)
        np.save(join(data_dir, "data_var"), data_var)

        if hp.generator_params["in_dim"] is None:
            hp.generator_params["in_dim"] = data_mean.shape[-1]
        if hp.generator_params["out_dim"] is None:
            hp.generator_params["out_dim"] = data_mean.shape[-1]
Ejemplo n.º 5
0
                          dim=x_dim))
    Y[ty] = FileSourceDataset(
        FeatureFileSource(os.path.join(DATA_ROOT, "Y_{}".format(ty)),
                          dim=y_dim))
    # this triggers file loads, but can be neglectable in terms of performance.
    utt_lengths[ty] = [len(x) for x in X[ty]]

X_min = {}
X_max = {}
Y_mean = {}
Y_var = {}
Y_scale = {}

for typ in ["acoustic", "duration"]:
    X_min[typ], X_max[typ] = minmax(X[typ], utt_lengths[typ])
    Y_mean[typ], Y_var[typ] = meanvar(Y[typ], utt_lengths[typ])
    Y_scale[typ] = np.sqrt(Y_var[typ])

fname_list = [
    'X_min.pkl', 'X_max.pkl', 'Y_mean.pkl', 'Y_var.pkl', 'Y_scale.pkl'
]

with ExitStack() as stack:
    f = [
        stack.enter_context(open(os.path.join(DATA_ROOT, fname), 'wb'))
        for fname in fname_list
    ]
    pickle.dump(X_min, f[0])
    pickle.dump(X_max, f[1])
    pickle.dump(Y_mean, f[2])
    pickle.dump(Y_var, f[3])
Ejemplo n.º 6
0
        Y[ty][phase] = FileSourceDataset(
            BinaryFileSource(join(DATA_ROOT, "Y_{}".format(ty)),
                             dim=y_dim,
                             train=train))
        utt_lengths[ty][phase] = np.array([len(x) for x in X[ty][phase]],
                                          dtype=np.int)

X_min = {}
X_max = {}
Y_mean = {}
Y_var = {}
Y_scale = {}

for typ in ["acoustic"]:
    X_min[typ], X_max[typ] = minmax(X[typ]["train"], utt_lengths[typ]["train"])
    Y_mean[typ], Y_var[typ] = meanvar(Y[typ]["train"],
                                      utt_lengths[typ]["train"])
    Y_scale[typ] = np.sqrt(Y_var[typ])

from torch.utils import data as data_utils

import torch
from torch import nn
from torch.autograd import Variable
from tqdm import tnrange, tqdm
from torch import optim
import torch.nn.functional as F

z_dim = args.z_dim
dropout = args.dropout_ratio

device = 'cuda' if torch.cuda.is_available() else 'cpu'
Ejemplo n.º 7
0
def create_loader(test=False):
    DATA_ROOT = "./data/basic5000"
    X = {"acoustic": {}}
    Y = {"acoustic": {}}
    utt_lengths = {"acoustic": {}}
    for ty in ["acoustic"]:
        for phase in ["train", "test"]:
            train = phase == "train"
            x_dim = (duration_linguistic_dim
                     if ty == "duration" else acoustic_linguisic_dim)
            y_dim = duration_dim if ty == "duration" else acoustic_dim
            X[ty][phase] = FileSourceDataset(
                BinaryFileSource(join(DATA_ROOT, "X_{}".format(ty)),
                                 dim=x_dim,
                                 train=train))
            Y[ty][phase] = FileSourceDataset(
                BinaryFileSource(join(DATA_ROOT, "Y_{}".format(ty)),
                                 dim=y_dim,
                                 train=train))
            utt_lengths[ty][phase] = np.array([len(x) for x in X[ty][phase]],
                                              dtype=np.int)

    X_min = {}
    X_max = {}
    Y_mean = {}
    Y_var = {}
    Y_scale = {}

    for typ in ["acoustic"]:
        X_min[typ], X_max[typ] = minmax(X[typ]["train"],
                                        utt_lengths[typ]["train"])
        Y_mean[typ], Y_var[typ] = meanvar(Y[typ]["train"],
                                          utt_lengths[typ]["train"])
        Y_scale[typ] = np.sqrt(Y_var[typ])

    mora_index_lists = sorted(
        glob(join("data/basic5000/mora_index", "squeezed_*.csv")))
    mora_index_lists_for_model = [
        np.loadtxt(path).reshape(-1) for path in mora_index_lists
    ]

    train_mora_index_lists = []
    test_mora_index_lists = []
    test_not_valid = []

    for i, mora_i in enumerate(mora_index_lists_for_model):
        if (i - 1) % 20 == 0:  # test
            if test:
                test_not_valid.append(i)
            else:
                pass
        elif i % 20 == 0:  # valid
            test_mora_index_lists.append(mora_i)
        else:
            train_mora_index_lists.append(mora_i)

    X_acoustic_train = [
        minmax_scale(
            X["acoustic"]["train"][i],
            X_min["acoustic"],
            X_max["acoustic"],
            feature_range=(0.01, 0.99),
        ) for i in range(len(X["acoustic"]["train"]))
    ]
    Y_acoustic_train = [y for y in Y["acoustic"]["train"]]
    # Y_acoustic_train = [scale(Y["acoustic"]["train"][i], Y_mean["acoustic"], Y_scale["acoustic"]) for i in range(len(Y["acoustic"]["train"]))]
    train_mora_index_lists = [
        train_mora_index_lists[i] for i in range(len(train_mora_index_lists))
    ]

    X_acoustic_test = [
        minmax_scale(
            X["acoustic"]["test"][i],
            X_min["acoustic"],
            X_max["acoustic"],
            feature_range=(0.01, 0.99),
        ) for i in range(len(X["acoustic"]["test"]))
    ]
    Y_acoustic_test = [y for y in Y["acoustic"]["test"]]
    # Y_acoustic_test = [scale(Y["acoustic"]["test"][i], Y_mean["acoustic"], Y_scale["acoustic"])for i in range(len(Y["acoustic"]["test"]))]
    test_mora_index_lists = [
        test_mora_index_lists[i] for i in range(len(test_mora_index_lists))
    ]

    train_loader = [[
        X_acoustic_train[i], Y_acoustic_train[i], train_mora_index_lists[i]
    ] for i in range(len(train_mora_index_lists))]
    test_loader = [[
        X_acoustic_test[i], Y_acoustic_test[i], test_mora_index_lists[i]
    ] for i in range(len(test_mora_index_lists))]

    if test:
        return train_loader, test_loader, test_not_valid_loader
    else:
        return train_loader, test_loader
Ejemplo n.º 8
0
X_melmfcc = FileSourceDataset(MelspecMfccSource(data_root=DATA_ROOT))
print('X_lingusitc with dimension = {}'.format(X_linguistic[0].shape[1]))
print('X_pyworld with dimension = {}'.format(X_pyworld[0].shape[1]))
print('X_melmfcc with dimension = {}'.format(X_melmfcc[0].shape[1]))
# Calculate Scale factors:

print('Calculating scale factors: This process will take longer than 10 minutes...')
#wav_len = [len(y) for y in Y]
#y_min, y_max = minmax(Y, wav_len)

scale_factors = {}
scale_factors['linguistic_len'] = [len(x) for x in X_linguistic]
scale_factors['linguistic_min'], scale_factors['linguistic_max'] = minmax(X_linguistic, scale_factors['linguistic_len'])

scale_factors['pyworld_len'] = [len(x) for x in X_pyworld]
scale_factors['pyworld_mean'], scale_factors['pyworld_var'] = meanvar(X_pyworld, scale_factors['pyworld_len'])
scale_factors['pyworld_std'] = np.sqrt(scale_factors['pyworld_var'])
scale_factors['pyworld_min'], scale_factors['pyworld_max'] = minmax(X_pyworld, scale_factors['pyworld_len'])

scale_factors['melmfcc_mean'], scale_factors['melmfcc_var'] = meanvar(X_melmfcc, scale_factors['pyworld_len'])
scale_factors['melmfcc_std'] = np.sqrt(scale_factors['melmfcc_var'])
scale_factors['melmfcc_min'], scale_factors['melmfcc_max'] = minmax(X_melmfcc, scale_factors['pyworld_len'])
np.save(DST_ROOT + 'scale_factors.npy', scale_factors)

''' To load scale_factors:
    scale_factors = np.load(DST_ROOT + 'scale_factors.npy').item()  '''
scale_factors = np.load(DST_ROOT + 'scale_factors.npy').item()
# <wav>
# Resampling silence index -->  Remove silence   --> IF Trainset: sliding window(winsz=5000, overlap=2500) -->  save .npy
#
# <Features>   
Ejemplo n.º 9
0
    if not exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    X = []
    Y = []
    # utt_lengths = []

    X = FileSourceDataset(TextDataSource())
    Mel = FileSourceDataset(MelSpecDataSource())
    Y = FileSourceDataset(LinearSpecDataSource())

    print("Size of dataset for {}: {}".format(phase, len(X)))

    ty = "acoustic" if hp == hparams_gan.tts_acoustic else "duration"
    X_data_min, X_data_max = P.minmax( X )
    Mel_data_mean, Mel_data_var = P.meanvar( Mel )
    Mel_data_std = np.sqrt( Mel_data_var )

    np.save(join(data_dir, "X_{}_data_min".format(ty)), X_data_min)
    np.save(join(data_dir, "X_{}_data_max".format(ty)), X_data_max)
    np.save(join(data_dir, "Mel_{}_data_mean".format(ty)), Mel_data_mean)
    np.save(join(data_dir, "Mel_{}_data_var".format(ty)), Mel_data_var)

    if hp.discriminator_params["in_dim"] is None:
        sizes = get_static_stream_sizes(
            hp.stream_sizes, hp.has_dynamic_features, len(hp.windows))
        D = int(np.array(sizes[hp.adversarial_streams]).sum())
        if hp.adversarial_streams[0]:
            D -= hp.mask_nth_mgc_for_adv_loss
        if hp.discriminator_linguistic_condition:
            D = D + X_data_min.shape[-1]