Esempio n. 1
0
def open_stream(which_sets=('train', ), port=5557, num_examples=None):

    dataset = Blizzard(which_sets=which_sets)

    if num_examples == None:
        num_examples = dataset.num_examples

    data_stream = DataStream.default_stream(dataset,
                                            iteration_scheme=SequentialScheme(
                                                num_examples, batch_size))

    data_stream = ScaleAndShift(data_stream,
                                scale=1 / data_std,
                                shift=-data_mean / data_std)
    data_stream = Mapping(data_stream,
                          _downsample_and_upsample,
                          add_sources=('upsampled', ))
    data_stream = Mapping(data_stream, _equalize_size)
    data_stream = Mapping(data_stream,
                          _get_residual,
                          add_sources=('residual', ))
    data_stream = FilterSources(data_stream,
                                sources=(
                                    'upsampled',
                                    'residual',
                                ))
    data_stream = Mapping(data_stream, _segment_axis)
    data_stream = Mapping(data_stream, _transpose)
    data_stream = ForceFloatX(data_stream)

    start_server(data_stream, port=port)
Esempio n. 2
0
def define_stream(which_sets=('train',),
                initial_scale=1,
                scale=0.5,
                batch_size=64,
                seq_length=64,
                frame_size=128,
                tbptt_flag = True,
                num_examples=None):

    def _segment_axis(data):
        # Defined inside so that frame_size is available
        x = tuple([numpy.array([segment_axis(x, frame_size, 0) for x in var])
                   for var in data])
        return x

    scale = float(scale)

    dataset = Blizzard(which_sets=which_sets)

    if num_examples is None:
        num_examples = batch_size*(dataset.num_examples/batch_size)

    data_stream = DataStream.default_stream(
            dataset,
            iteration_scheme=SequentialScheme(num_examples, batch_size))

    data_stream = ScaleAndShift(data_stream,
                                scale=1/data_std,
                                shift=-data_mean/float(data_std))

    # Original sampling rate
    data_stream = Resample(data_stream, scale=initial_scale)
    data_stream = Mapping(data_stream, _copy, add_sources=('upsampled',))
    data_stream = Resample(data_stream, scale=scale, which_sources=('upsampled',))
    data_stream = Resample(data_stream, scale=1/scale, which_sources=('upsampled',))

    # data_stream = Mapping(data_stream, _downsample_and_upsample,
    #                       add_sources=('upsampled',))
    data_stream = Mapping(data_stream, _equalize_size)
    data_stream = Mapping(data_stream, _get_residual,
                          add_sources=('residual',))
    data_stream = FilterSources(data_stream,
                                sources=('upsampled', 'residual',))
    data_stream = Mapping(data_stream, _segment_axis)
    data_stream = Mapping(data_stream, _transpose)
    return data_stream
Esempio n. 3
0
# Prepare dataset
#################


def _transpose(data):
    return tuple(array.swapaxes(0, 1) for array in data)


data_dir = os.environ['FUEL_DATA_PATH']
data_dir = os.path.join(data_dir, 'blizzard/', 'sp_standardize.npz')

data_stats = numpy.load(data_dir)
sp_mean = data_stats['sp_mean']
sp_std = data_stats['sp_std']

dataset = Blizzard(which_sets=('train', ), filename="sp_blizzard.hdf5")
data_stream = DataStream.default_stream(
    dataset,
    iteration_scheme=SequentialScheme(
        batch_size * (dataset.num_examples / batch_size), batch_size))
data_stream = FilterSources(data_stream, ('sp', ))
data_stream = ScaleAndShift(data_stream,
                            scale=1 / sp_std,
                            shift=-sp_mean / sp_std,
                            which_sources=('sp', ))
data_stream = Mapping(data_stream, _transpose)
data_stream = SegmentSequence(data_stream, seq_size, add_flag=True)
data_stream = ForceFloatX(data_stream)
train_stream = data_stream

num_valid_examples = 4 * 64
Esempio n. 4
0
from play.datasets.blizzard import Blizzard
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot

from fuel.schemes import SequentialScheme
from fuel.streams import DataStream

dataset = Blizzard(which_sets=('test', ), filename="mgc_blizzard.hdf5")

batch_size = 1

data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            dataset.num_examples, batch_size))

f0, mgc = next(data_stream.get_epoch_iterator())

pyplot.plot(f0[0][:500])
pyplot.savefig('plot_f0.png')
pyplot.close()

pyplot.plot(mgc[0, :500, 3])
pyplot.savefig('plot_mgc.png')
pyplot.close()

pyplot.hist(f0[0, f0[0] > 0])
pyplot.savefig('plot_hist_cond.png')
pyplot.close()

pyplot.hist(mgc.reshape(-1))
Esempio n. 5
0
def _get_residual(data):
    # The order is correct?
    ds = numpy.array([x[0]-x[1] for x in zip(*data)])
    return (ds,)

data_dir = os.environ['FUEL_DATA_PATH']
data_dir = os.path.join(data_dir, 'blizzard/', 'blizzard_standardize.npz')

data_stats = numpy.load(data_dir)
data_mean = data_stats['data_mean']
data_std = data_stats['data_std']

which_sets= ('test',)

dataset = Blizzard(which_sets = which_sets)

data_stream = DataStream.default_stream(
        dataset, iteration_scheme=SequentialScheme(
        dataset.num_examples, batch_size))

epoch_iterator = data_stream.get_epoch_iterator()
raw_audio = next(epoch_iterator)[0]

for i in xrange(n_iter-1):
    x_tr = next(epoch_iterator)[0]
    raw_audio = numpy.hstack([raw_audio, x_tr])

job_id = sys.argv[1]
save_dir = os.environ['RESULTS_DIR']
exp_path = os.path.join(save_dir,'blizzard/', job_id + "/")
Esempio n. 6
0
    return tuple(array.swapaxes(0, 1) for array in data)


def _segment_axis(data):
    x = numpy.array([segment_axis(x, frame_size, 0) for x in data[0]])
    return (x, )


data_dir = os.environ['FUEL_DATA_PATH']
data_dir = os.path.join(data_dir, 'blizzard/', 'blizzard_standardize.npz')

data_stats = numpy.load(data_dir)
data_mean = data_stats['data_mean']
data_std = data_stats['data_std']

dataset = Blizzard(which_sets=('train', ))
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            dataset.num_examples, batch_size))
data_stream = ScaleAndShift(data_stream,
                            scale=1 / data_std,
                            shift=-data_mean / data_std)
data_stream = Mapping(data_stream, _segment_axis)
data_stream = Mapping(data_stream, _transpose)
data_stream = ForceFloatX(data_stream)
train_stream = data_stream

num_valid_examples = 4 * 64 * 5
dataset = Blizzard(which_sets=('valid', ))
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
def _zero_for_unvoiced(data):
    #Multiply by 0 the unvoiced components. Hardcoded.
    return tuple([data[0] * data[3], data[1], data[2], data[3]])


data_dir = os.environ['FUEL_DATA_PATH']
data_dir = os.path.join(data_dir, 'blizzard/', 'sp_standardize.npz')

data_stats = numpy.load(data_dir)
sp_mean = data_stats['sp_mean']
sp_std = data_stats['sp_std']
f0_mean = data_stats['f0_mean']
f0_std = data_stats['f0_std']

dataset = Blizzard(which_sets=('test', ), filename="sp_blizzard_80h_phon.hdf5")

data_stream = DataStream.default_stream(
    dataset,
    iteration_scheme=SequentialScheme(
        batch_size * (dataset.num_examples / batch_size), batch_size))

epoch_iterator = data_stream.get_epoch_iterator()

# data_stream = Mapping(data_stream, _is_nonzero, add_sources = ('voiced',))
# data_stream = ScaleAndShift(data_stream,
#                             scale = 1/sp_std,
#                             shift = -sp_mean/sp_std,
#                             which_sources = ('sp',))
# data_stream = ScaleAndShift(data_stream,
#                             scale = 1/f0_std,
Esempio n. 8
0
mgc = np.apply_along_axis(SPTK.mgcep, 1, frames, order, alpha, gamma)
mgc_sp = np.apply_along_axis(SPTK.mgc2sp, 1, mgc, alpha, gamma,
                             frame_length).real

mgc_sp_test = np.hstack([mgc_sp, mgc_sp[:, ::-1][:, 1:-1]])
mgc_sp_test = mgc_sp_test.copy(order='C')

# Check in original data, that the processing was good

import h5py
import fuel
from play.datasets.blizzard import Blizzard
from fuel.schemes import SequentialScheme
from fuel.streams import DataStream

dataset_mgc = Blizzard(which_sets=('train', 'valid', 'test'),
                       filename="mgc_blizzard.hdf5")
dataset = Blizzard(which_sets=('train', 'valid', 'test'))

batch_size = 2

data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            dataset.num_examples, batch_size))

data_stream_mgc = DataStream.default_stream(dataset_mgc,
                                            iteration_scheme=SequentialScheme(
                                                dataset_mgc.num_examples,
                                                batch_size))

raw = next(data_stream.get_epoch_iterator())[0]
f0, mgc = next(data_stream_mgc.get_epoch_iterator())