コード例 #1
0
 def setUp(self):
     dataset = IterableDataset(
         OrderedDict([('features', [1, 2, 3]), ('targets', [0, 1, 0])]),
         axis_labels={'features': ('batch'), 'targets': ('batch')})
     self.stream = DataStream(dataset)
     self.wrapper = ScaleAndShift(
         self.stream, 2, -1, which_sources=('targets',))
コード例 #2
0
ファイル: test_fuel.py プロジェクト: zhaobozb/test-fuel
def test_transformer():
    from fuel.transformers import ScaleAndShift
    from fuel.datasets import IndexableDataset
    from fuel.streams import DataStream
    from fuel.schemes import ShuffledScheme

    seed = 1234
    rng = numpy.random.RandomState(seed)
    features = rng.randint(256, size=(8, 2, 2))
    targets = rng.randint(4, size=(8, 1))

    dataset = IndexableDataset(indexables=OrderedDict([('features', features),
                                                       ('targets', targets)]),
                               axis_labels=OrderedDict([('features', ('batch', 'height', 'width')),
                                                        ('targets', ('batch', 'index'))]))
    scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=2)
    data_stream = DataStream(dataset=dataset, iteration_scheme=scheme)

    scale = 1.0 / features.std()
    shift = - scale * features.mean()

    standardized_stream = ScaleAndShift(data_stream=data_stream,
                                        scale=scale, shift=shift,
                                        which_sources=('features',))

    for batch in standardized_stream.get_epoch_iterator():
        print(batch)
コード例 #3
0
def test_scale_and_shift():
    stream = DataStream(
        IterableDataset({
            'features': [1, 2, 3],
            'targets': [0, 1, 0]
        }))
    wrapper = ScaleAndShift(stream, 2, -1, which_sources=('targets', ))
    assert list(wrapper.get_epoch_iterator()) == [(1, -1), (2, 1), (3, -1)]
コード例 #4
0
def create_data(data, size, batch_size, _port):
    if data == "train":
        cats = DogsVsCats(('train', ), subset=slice(0, 20000))
        port = _port + 2
    elif data == "valid":
        cats = DogsVsCats(('train', ), subset=slice(20000, 25000))
        port = _port + 3
    print 'port', port
    stream = DataStream.default_stream(cats,
                                       iteration_scheme=ShuffledScheme(
                                           cats.num_examples, batch_size))
    stream_downscale = MinimumImageDimensions(
        stream, size, which_sources=('image_features', ))
    stream_rotate = FlipAsYouCan(stream_downscale, )
    stream_max = ScikitResize(stream_rotate,
                              image_size,
                              which_sources=('image_features', ))
    stream_scale = ScaleAndShift(stream_max,
                                 1. / 255,
                                 0,
                                 which_sources=('image_features', ))
    stream_data = Cast(stream_scale,
                       dtype='float32',
                       which_sources=('image_features', ))
    start_server(stream_data, port=port)
コード例 #5
0
def create_data(data):

    stream = DataStream(data,
                        iteration_scheme=ShuffledScheme(
                            data.num_examples, batch_size))

    # Data Augmentation
    stream = MinimumImageDimensions(stream,
                                    image_size,
                                    which_sources=('image_features', ))
    stream = MaximumImageDimensions(stream,
                                    image_size,
                                    which_sources=('image_features', ))
    stream = RandomHorizontalSwap(stream, which_sources=('image_features', ))
    stream = Random2DRotation(stream, which_sources=('image_features', ))
    #stream = ScikitResize(stream, image_size, which_sources=('image_features',))

    # Data Preprocessing

    # Data Transformation
    stream = ScaleAndShift(stream,
                           1. / 255,
                           0,
                           which_sources=('image_features', ))
    stream = Cast(stream, dtype='float32', which_sources=('image_features', ))
    return stream
コード例 #6
0
ファイル: l3.py プロジェクト: donghyunlee/play
def open_stream(which_sets=('train', ), port=5557, num_examples=None):

    dataset = Blizzard(which_sets=which_sets)

    if num_examples == None:
        num_examples = dataset.num_examples

    data_stream = DataStream.default_stream(dataset,
                                            iteration_scheme=SequentialScheme(
                                                num_examples, batch_size))

    data_stream = ScaleAndShift(data_stream,
                                scale=1 / data_std,
                                shift=-data_mean / data_std)
    data_stream = Mapping(data_stream,
                          _downsample_and_upsample,
                          add_sources=('upsampled', ))
    data_stream = Mapping(data_stream, _equalize_size)
    data_stream = Mapping(data_stream,
                          _get_residual,
                          add_sources=('residual', ))
    data_stream = FilterSources(data_stream,
                                sources=(
                                    'upsampled',
                                    'residual',
                                ))
    data_stream = Mapping(data_stream, _segment_axis)
    data_stream = Mapping(data_stream, _transpose)
    data_stream = ForceFloatX(data_stream)

    start_server(data_stream, port=port)
コード例 #7
0
ファイル: test_transformers.py プロジェクト: markusnagel/fuel
 def setUp(self):
     dataset = IterableDataset(
         OrderedDict([('features', [1, 2, 3]), ('targets', [0, 1, 0])]),
         axis_labels={'features': ('batch'), 'targets': ('batch')})
     self.stream = DataStream(dataset)
     self.wrapper = ScaleAndShift(
         self.stream, 2, -1, which_sources=('targets',))
コード例 #8
0
def batch_iterator(dataset, batchsize, shuffle=False):
    if shuffle:
        train_scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=batchsize)
    else:
        train_scheme = SequentialScheme(examples=dataset.num_examples, batch_size=batchsize)
    stream = DataStream.default_stream(dataset=dataset, iteration_scheme=train_scheme)
    stream_scale = ScaleAndShift(stream, 1./256.0, 0, which_sources=('features',))
    stream_data = Cast(stream_scale, dtype=theano.config.floatX, which_sources=('features',))
    return stream_data.get_epoch_iterator()
コード例 #9
0
def apply_transformers(data_stream):

    data_stream_ = Flatten(data_stream,
                           which_sources=['features_1', 'features_2'])
    data_stream_ = ScaleAndShift(data_stream_,
                                 which_sources=['features_1', 'features_2'],
                                 scale=2.0,
                                 shift=-1.0)

    return data_stream_
コード例 #10
0
def make_gen(batch_size, examples=4):
    file_path_f = file_path
    names_select = i_names
    train_set = H5PYDataset(file_path_f,
                            which_sets=('train', 'test'))

    scheme = SequentialScheme(examples=examples, batch_size=batch_size)

    data_stream_train = DataStream(dataset=train_set, iteration_scheme=scheme)

    stand_stream_train = ScaleAndShift(data_stream=data_stream_train,
                                       scale=scale, shift=shift,
                                       which_sources=(names_select[-1],))
    return stand_stream_train, train_set, data_stream_train
コード例 #11
0
ファイル: test_transformers.py プロジェクト: markusnagel/fuel
class TestScaleAndShift(object):
    def setUp(self):
        dataset = IterableDataset(
            OrderedDict([('features', [1, 2, 3]), ('targets', [0, 1, 0])]),
            axis_labels={'features': ('batch'), 'targets': ('batch')})
        self.stream = DataStream(dataset)
        self.wrapper = ScaleAndShift(
            self.stream, 2, -1, which_sources=('targets',))

    def test_scale_and_shift(self):
        assert_equal(list(self.wrapper.get_epoch_iterator()),
                     [(1, -1), (2, 1), (3, -1)])

    def test_axis_labels_are_passed_through(self):
        assert_equal(self.wrapper.axis_labels, self.stream.axis_labels)
コード例 #12
0
class TestScaleAndShift(object):
    def setUp(self):
        dataset = IterableDataset(
            OrderedDict([('features', [1, 2, 3]), ('targets', [0, 1, 0])]),
            axis_labels={'features': ('batch'), 'targets': ('batch')})
        self.stream = DataStream(dataset)
        self.wrapper = ScaleAndShift(
            self.stream, 2, -1, which_sources=('targets',))

    def test_scale_and_shift(self):
        assert_equal(list(self.wrapper.get_epoch_iterator()),
                     [(1, -1), (2, 1), (3, -1)])

    def test_axis_labels_are_passed_through(self):
        assert_equal(self.wrapper.axis_labels, self.stream.axis_labels)
コード例 #13
0
ファイル: server_stream.py プロジェクト: anirudh9119/play
def define_stream(which_sets=('train',),
                initial_scale=1,
                scale=0.5,
                batch_size=64,
                seq_length=64,
                frame_size=128,
                tbptt_flag = True,
                num_examples=None):

    def _segment_axis(data):
        # Defined inside so that frame_size is available
        x = tuple([numpy.array([segment_axis(x, frame_size, 0) for x in var])
                   for var in data])
        return x

    scale = float(scale)

    dataset = Blizzard(which_sets=which_sets)

    if num_examples is None:
        num_examples = batch_size*(dataset.num_examples/batch_size)

    data_stream = DataStream.default_stream(
            dataset,
            iteration_scheme=SequentialScheme(num_examples, batch_size))

    data_stream = ScaleAndShift(data_stream,
                                scale=1/data_std,
                                shift=-data_mean/float(data_std))

    # Original sampling rate
    data_stream = Resample(data_stream, scale=initial_scale)
    data_stream = Mapping(data_stream, _copy, add_sources=('upsampled',))
    data_stream = Resample(data_stream, scale=scale, which_sources=('upsampled',))
    data_stream = Resample(data_stream, scale=1/scale, which_sources=('upsampled',))

    # data_stream = Mapping(data_stream, _downsample_and_upsample,
    #                       add_sources=('upsampled',))
    data_stream = Mapping(data_stream, _equalize_size)
    data_stream = Mapping(data_stream, _get_residual,
                          add_sources=('residual',))
    data_stream = FilterSources(data_stream,
                                sources=('upsampled', 'residual',))
    data_stream = Mapping(data_stream, _segment_axis)
    data_stream = Mapping(data_stream, _transpose)
    return data_stream
コード例 #14
0
ファイル: test5.py プロジェクト: jpilaul/IFT6266_project
 def create_data(data):
     stream = DataStream.default_stream(data,
                                        iteration_scheme=ShuffledScheme(
                                            data.num_examples, batch_size))
     stream_downscale = MinimumImageDimensions(
         stream, image_size, which_sources=('image_features', ))
     #stream_rotate = Random2DRotation(stream_downscale, which_sources=('image_features',))
     stream_max = ScikitResize(stream_downscale,
                               image_size,
                               which_sources=('image_features', ))
     stream_scale = ScaleAndShift(stream_max,
                                  1. / 255,
                                  0,
                                  which_sources=('image_features', ))
     stream_cast = Cast(stream_scale,
                        dtype='float32',
                        which_sources=('image_features', ))
     #stream_flat = Flatten(stream_scale, which_sources=('image_features',))
     return stream_cast
コード例 #15
0
def monk_music_stream(which_sets=('train', ),
                      batch_size=64,
                      seq_size=128,
                      frame_size=160,
                      num_examples=None,
                      which_sources=('features', )):
    """
    This function generates the stream for the monk_music dataset.
    It doesn't compute incremental windows and instead simply separates the
    dataset into sequences
    """

    dataset = MonkMusic(which_sets=which_sets,
                        filename="dataset.hdf5",
                        load_in_memory=True)

    large_batch_size = batch_size * frame_size * seq_size
    if not num_examples:
        num_examples = large_batch_size * (dataset.num_examples /
                                           large_batch_size)

    # If there are memory problems revert to SequentialScheme
    data_stream = DataStream.default_stream(dataset,
                                            iteration_scheme=SequentialScheme(
                                                num_examples,
                                                large_batch_size))

    data_stream = ScaleAndShift(data_stream,
                                scale=1. / data_stats["std"],
                                shift=-data_stats["mean"] / data_stats["std"])

    data_stream = Mapping(
        data_stream,
        lambda data: _get_subsequences(data, batch_size, seq_size, frame_size))

    data_stream = ForceFloatX(data_stream)

    return data_stream
コード例 #16
0
def make_gen(Nchunks=True, classif=False, train=True):
    '''
        Nchunks==True : 10 chunks in the generator
        Nchunks == False : 1 chunk in the generator
        Makes the distinction between classification/regression
        Makes the distinction between test/train
    '''

    file_path_f = file_path_R
    shift_f = shift_R
    scale_f = scale_R
    if classif:
        file_path_f = file_path_C
        shift_f = shift_C
        scale_f = scale_C

    if Nchunks:
        batch_size = 13
    else:
        batch_size = 130
    t_scheme = SequentialScheme(examples=130, batch_size=batch_size)
    t_source = 'train'
    if not train:
        if Nchunks:
            batch_size = 2
        else:
            batch_size = 20
        t_source = 'test'
        t_scheme = SequentialScheme(examples=20, batch_size=batch_size)

    t_set = H5PYDataset(file_path_f, which_sets=[t_source])
    data_stream_t = DataStream(dataset=t_set, iteration_scheme=t_scheme)

    stand_stream_t = ScaleAndShift(data_stream=data_stream_t,
                                   scale=scale_f, shift=shift_f,
                                   which_sources=t_source)

    return stand_stream_t, t_set, data_stream_t
コード例 #17
0
ファイル: train_softmax.py プロジェクト: fmschleif/lvq
def preprocessing(data_stream):
    return ForceFloatX(ScaleAndShift(data_stream,
                                     1 / 255.0,
                                     0.0,
                                     which_sources=('features', )),
                       which_sources=('features', ))
コード例 #18
0
ファイル: sample_deep_m3.py プロジェクト: donghyunlee/play
save_dir = os.path.join(save_dir,'blizzard/', job_id + "/", "samples/")

main_loop = load(os.path.join(exp_path,exp_file))

rate = 16000

for i, sample in enumerate(raw_audio[:n_samples]):
    pyplot.plot(sample)
    pyplot.savefig(save_dir +"original_%i.png" % i)
    pyplot.close()

    wavfile.write(save_dir + "original_{}.wav".format(i),
        rate, sample)

data_stream = ScaleAndShift(data_stream, scale = 1/data_std, 
                                        shift = -data_mean/data_std)
data_stream = Mapping(data_stream, _downsample_and_upsample, 
                      add_sources=('upsampled',))

epoch_iterator = data_stream.get_epoch_iterator()

raw_audio_std, upsampled_audio = next(epoch_iterator)

for i in xrange(n_iter-1):
    x_tr,y_tr = next(epoch_iterator)
    raw_audio_std = numpy.hstack([raw_audio_std, x_tr])
    upsampled_audio = numpy.hstack([upsampled_audio, y_tr])

for i,(original_, upsampled_) in enumerate(
                                zip(raw_audio_std, upsampled_audio)[:n_samples]):
コード例 #19
0
ファイル: sample_deep_m3.py プロジェクト: soroushmehr/play
        exp_file = file_

save_dir = os.path.join(save_dir, "blizzard/", job_id + "/", "samples/")

main_loop = load(os.path.join(exp_path, exp_file))

rate = 16000

for i, sample in enumerate(raw_audio[:n_samples]):
    pyplot.plot(sample)
    pyplot.savefig(save_dir + "original_%i.png" % i)
    pyplot.close()

    wavfile.write(save_dir + "original_{}.wav".format(i), rate, sample)

data_stream = ScaleAndShift(data_stream, scale=1 / data_std, shift=-data_mean / data_std)
data_stream = Mapping(data_stream, _downsample_and_upsample, add_sources=("upsampled",))

epoch_iterator = data_stream.get_epoch_iterator()

raw_audio_std, upsampled_audio = next(epoch_iterator)

for i in xrange(n_iter - 1):
    x_tr, y_tr = next(epoch_iterator)
    raw_audio_std = numpy.hstack([raw_audio_std, x_tr])
    upsampled_audio = numpy.hstack([upsampled_audio, y_tr])

for i, (original_, upsampled_) in enumerate(zip(raw_audio_std, upsampled_audio)[:n_samples]):

    f, (ax1, ax2) = pyplot.subplots(2, sharex=True, sharey=True)
    ax1.plot(original_)
コード例 #20
0
ファイル: test_transformers.py プロジェクト: DavidDJChen/fuel
def test_scale_and_shift():
    stream = DataStream(
        IterableDataset(
            OrderedDict([('features', [1, 2, 3]), ('targets', [0, 1, 0])])))
    wrapper = ScaleAndShift(stream, 2, -1, which_sources=('targets',))
    assert list(wrapper.get_epoch_iterator()) == [(1, -1), (2, 1), (3, -1)]
コード例 #21
0
ファイル: sp_only.py プロジェクト: anirudh9119/play
data_dir = os.environ['FUEL_DATA_PATH']
data_dir = os.path.join(data_dir, 'blizzard/', 'sp_standardize.npz')

data_stats = numpy.load(data_dir)
sp_mean = data_stats['sp_mean']
sp_std = data_stats['sp_std']

dataset = Blizzard(which_sets=('train', ), filename="sp_blizzard.hdf5")
data_stream = DataStream.default_stream(
    dataset,
    iteration_scheme=SequentialScheme(
        batch_size * (dataset.num_examples / batch_size), batch_size))
data_stream = FilterSources(data_stream, ('sp', ))
data_stream = ScaleAndShift(data_stream,
                            scale=1 / sp_std,
                            shift=-sp_mean / sp_std,
                            which_sources=('sp', ))
data_stream = Mapping(data_stream, _transpose)
data_stream = SegmentSequence(data_stream, seq_size, add_flag=True)
data_stream = ForceFloatX(data_stream)
train_stream = data_stream

num_valid_examples = 4 * 64
dataset = Blizzard(which_sets=('valid', ), filename="sp_blizzard.hdf5")
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            num_valid_examples, batch_size))
data_stream = FilterSources(data_stream, ('sp', ))
data_stream = ScaleAndShift(data_stream,
                            scale=1 / sp_std,
                            shift=-sp_mean / sp_std,
コード例 #22
0
                                  batch_size=args.batch_size)))
    test_stream = Flatten(DataStream.default_stream(dataset_test,
                             iteration_scheme=ShuffledScheme(
                                 examples=dataset_test.num_examples,
                                 batch_size=args.batch_size))
                             )

    shp = next(train_stream.get_epoch_iterator())[0].shape

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1./np.sqrt(np.mean((Xbatch-np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch*scl)
    # scale is applied before shift
    train_stream = ScaleAndShift(train_stream, scl, shft)
    test_stream = ScaleAndShift(test_stream, scl, shft)
    baseline_uniform_noise = 1./255. # appropriate for MNIST and CIFAR10 Fuel datasets, which are scaled [0,1]
    uniform_noise = baseline_uniform_noise/scl

    ## initialize the model
    dpm = model.DiffusionModel(spatial_width, n_colors, uniform_noise=uniform_noise, **model_args)
    dpm.initialize()

    ## set up optimization
    features = T.matrix('features', dtype=theano.config.floatX)
    cost = dpm.cost(features)
    blocks_model = blocks.model.Model(cost)
    cg_nodropout = ComputationGraph(cost)
    if args.dropout_rate > 0:
        # DEBUG this triggers an error on my machine
コード例 #23
0
## Load cifar10 stream
batch_size = 32
num_train_example = slice_train.stop - slice_train.start
num_valid_example = slice_valid.stop - slice_valid.start
num_test_example = slice_test.stop - slice_test.start

train_dataset = CIFAR10(('train', ), subset=slice_train)
train_stream = DataStream.default_stream(train_dataset,
                                         iteration_scheme=SequentialScheme(
                                             train_dataset.num_examples,
                                             batch_size))
train_stream = OneHotEncode10(train_stream, which_sources=('targets', ))
train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', ))
train_stream = MinimumImageDimensions(train_stream, (224, 224),
                                      which_sources=('features', ))
train_stream = ScaleAndShift(train_stream, 1., 0, which_sources=('features', ))
train_stream = Cast(train_stream, 'floatX', which_sources=('features', ))

valid_dataset = CIFAR10(('train', ), subset=slice_valid)
valid_stream = DataStream.default_stream(valid_dataset,
                                         iteration_scheme=SequentialScheme(
                                             valid_dataset.num_examples,
                                             batch_size))
valid_stream = OneHotEncode10(valid_stream, which_sources=('targets', ))
valid_stream = MinimumImageDimensions(valid_stream, (224, 224),
                                      which_sources=('features', ))
valid_stream = ScaleAndShift(valid_stream, 1., 0, which_sources=('features', ))
valid_stream = Cast(valid_stream, 'floatX', which_sources=('features', ))

test_dataset = CIFAR10(('train', ), subset=slice_test)
test_stream = DataStream.default_stream(test_dataset,
コード例 #24
0
#get the test stream
from fuel.datasets.dogs_vs_cats import DogsVsCats
from fuel.streams import DataStream, ServerDataStream
from fuel.schemes import ShuffledScheme, SequentialExampleScheme
from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation
from fuel.transformers import Flatten, Cast, ScaleAndShift
size = (128, 128)
cats = DogsVsCats(('test', ))
stream = DataStream.default_stream(cats,
                                   iteration_scheme=SequentialExampleScheme(
                                       cats.num_examples))
stream_upscale = MaximumImageDimensions(stream,
                                        size,
                                        which_sources=('image_features', ))
stream_scale = ScaleAndShift(stream_upscale,
                             1. / 255,
                             0,
                             which_sources=('image_features', ))
stream_data = Cast(stream_scale,
                   dtype='float32',
                   which_sources=('image_features', ))

#Load the parameters of the model
params = load_parameter_values('convnet_parameters.pkl')
mo = Model(predict)
mo.set_parameter_values(params)
#Create the forward propagation function
fprop = function(mo.inputs, mo.outputs[0], allow_input_downcast=True)
tab = []
i = 1
#Get the prediction for each example of the test set
for data in stream_data.get_epoch_iterator():
コード例 #25
0
                                   iteration_scheme=SequentialScheme(
                                       train.num_examples, batch_size))

# upscaled_stream = MinimumImageDimensions(stream, (100, 100), which_sources=('image_features',))
downscaled_stream = DownscaleMinDimension(stream,
                                          100,
                                          which_sources=('image_features', ))

# Our images are of different sizes, so we'll use a Fuel transformer
# to take random crops of size (32 x 32) from each image
cropped_stream = RandomFixedSizeCrop(downscaled_stream, (100, 100),
                                     which_sources=('image_features', ))

rotated_stream = Random2DRotation(cropped_stream,
                                  math.pi / 6,
                                  which_sources=('image_features', ))
flipped_stream = RandomHorizontalFlip(rotated_stream,
                                      which_sources=('image_features', ))

# We'll use a simple MLP, so we need to flatten the images
# from (channel, width, height) to simply (features,)
float_stream = ScaleAndShift(flipped_stream,
                             1. / 255,
                             0,
                             which_sources=('image_features', ))
float32_stream = Cast(float_stream,
                      numpy.float32,
                      which_sources=('image_features', ))

start_server(float32_stream, port=port)
コード例 #26
0
ファイル: deep_m1.py プロジェクト: donghyunlee/play
    return (x, )


data_dir = os.environ['FUEL_DATA_PATH']
data_dir = os.path.join(data_dir, 'blizzard/', 'blizzard_standardize.npz')

data_stats = numpy.load(data_dir)
data_mean = data_stats['data_mean']
data_std = data_stats['data_std']

dataset = Blizzard(which_sets=('train', ))
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            dataset.num_examples, batch_size))
data_stream = ScaleAndShift(data_stream,
                            scale=1 / data_std,
                            shift=-data_mean / data_std)
data_stream = Mapping(data_stream, _segment_axis)
data_stream = Mapping(data_stream, _transpose)
data_stream = ForceFloatX(data_stream)
train_stream = data_stream

num_valid_examples = 4 * 64 * 5
dataset = Blizzard(which_sets=('valid', ))
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            num_valid_examples,
                                            10 * batch_size))
data_stream = ScaleAndShift(data_stream,
                            scale=1 / data_std,
                            shift=-data_mean / data_std)
コード例 #27
0
data_stream = DataStream.default_stream(
        	dataset, iteration_scheme=ShuffledScheme(
            dataset.num_examples, batch_size))

x_tr = next(data_stream.get_epoch_iterator())
#ipdb.set_trace()

# Standardize data
all_data = numpy.array([])
for batch in data_stream.get_epoch_iterator():
    for element in batch[0]:
        all_data = numpy.hstack([all_data, element])
mean_data = all_data.mean()
std_data = all_data.std()

data_stream = ScaleAndShift(data_stream, scale = 1/std_data, 
                                        shift = -mean_data/std_data)
data_stream = Mapping(data_stream, _segment_axis)
data_stream = Padding(data_stream)
data_stream = Mapping(data_stream, _transpose)
data_stream = ForceFloatX(data_stream)

#################
# Model
#################

activations_x = [Rectifier()]*depth_x

dims_x = [frame_size] + [hidden_size_mlp_x]*(depth_x-1) + \
         [hidden_size_recurrent]

activations_theta = [Rectifier()]*depth_theta
コード例 #28
0
ファイル: kaggle-to-hdf5.py プロジェクト: adetante/ml-kaggle
split_dict = {
    'train': {
        'images': (0, train_feature.shape[0]),
        'targets': (0, train_target.shape[0])
    }
}

f.attrs['split'] = H5PYDataset.create_split_array(split_dict)

f.flush()
f.close()

train_set = H5PYDataset('../../data/dataset.hdf5', which_sets=('train',))
#data_stream = DataStream(dataset=train_set, iteration_scheme=scheme)

#state = train_set.open()
scheme = ShuffledScheme(examples=train_set.num_examples, batch_size=4)

data_stream = DataStream(dataset=train_set, iteration_scheme=scheme)
for data in data_stream.get_epoch_iterator():
    print(data[0], data[1])

standardized_stream = ScaleAndShift(data_stream=data_stream,
    scale=255,shift=0,
    which_sources=('features',))

for data in standardized_stream.get_epoch_iterator():
    print(data[0], data[1])

#train_set.close(state)
コード例 #29
0
ファイル: main_sp.py プロジェクト: anirudh9119/play
data_stats = numpy.load(data_dir)
mgc_mean = data_stats['mgc_mean']
mgc_std = data_stats['mgc_std']
f0_mean = data_stats['f0_mean']
f0_std = data_stats['f0_std']

dataset = Blizzard(which_sets=('train', ), filename="mgc_blizzard.hdf5")
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
                                            dataset.num_examples, batch_size))

data_stream = Mapping(data_stream, _transform_to_sp)

data_stream = ScaleAndShift(data_stream,
                            scale=1 / f0_std,
                            shift=-f0_mean / f0_std,
                            which_sources=('f0', ))
# data_stream = ScaleAndShift(data_stream,
#                             scale = 1/mgc_std,
#                             shift = -mgc_mean/mgc_std,
#                             which_sources = ('mgc',))
data_stream = Mapping(data_stream, _transpose)
data_stream = SegmentSequence(data_stream, seq_size, add_flag=True)

data_stream = ForceFloatX(data_stream)
train_stream = data_stream

num_valid_examples = 4 * 64
dataset = Blizzard(which_sets=('valid', ), filename="mgc_blizzard.hdf5")
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=SequentialScheme(
コード例 #30
0
data_stream = DataStream.default_stream(dataset,
                                        iteration_scheme=ShuffledScheme(
                                            dataset.num_examples, batch_size))

x_tr = next(data_stream.get_epoch_iterator())

# Standardize data
all_data = numpy.array([])
for batch in data_stream.get_epoch_iterator():
    for element in batch[0]:
        all_data = numpy.hstack([all_data, element])
mean_data = all_data.mean()
std_data = all_data.std()

data_stream = ScaleAndShift(data_stream,
                            scale=1 / std_data,
                            shift=-mean_data / std_data)
data_stream = Mapping(data_stream,
                      _downsample_and_upsample,
                      add_sources=('upsampled', ))
data_stream = Mapping(data_stream, _equalize_size)
data_stream = Mapping(data_stream, _get_residual, add_sources=('residual', ))
data_stream = FilterSources(data_stream, sources=(
    'upsampled',
    'residual',
))
data_stream = Mapping(data_stream, _segment_axis)
data_stream = Padding(data_stream)
data_stream = FilterSources(data_stream,
                            sources=('upsampled', 'residual', 'residual_mask'))
data_stream = Mapping(data_stream, _transpose)
コード例 #31
0
ファイル: monk_music.py プロジェクト: TiSU32/ift6266h16
    data_stream = dataset.get_example_stream()
    it = data_stream.get_epoch_iterator()
    sequence = next(it)
    length = len(sequence[0])
    temp = numpy.random.choice(length, 100000)
    temp = map(lambda l: float(l[0]), sequence[0][temp])
    temp = numpy.array(temp)
    data_stats["mean"] = temp.mean()
    data_stats["std"] = temp.std()
    numpy.save(data_stats_file, data_stats)

data_stream = Batch(data_stream=data_stream,
                    iteration_scheme=ConstantScheme(batch_size))

data_stream = ScaleAndShift(data_stream,
                            scale=1. / data_stats["std"],
                            shift=-data_stats["mean"] / data_stats["std"])
#data_stream = Mapping(data_stream, _transpose)
#data_stream = SegmentSequence(data_stream, 16*seq_size, add_flag=True)
data_stream = ForceFloatX(data_stream)
train_stream = data_stream
"""
dataset = MonkMusic(which_sets = ('valid',), filename = "XqaJ2Ol5cC4.hdf5",
    load_in_memory=True)

data_stream = dataset.get_example_stream()
data_stream = Batch(data_stream=data_stream,iteration_scheme=ConstantScheme(batch_size))
data_stream = ScaleAndShift(data_stream,
                             scale = 1/data_stats["std"],
                             shift = -data_stats["mean"]/data_stats["std"])
#data_stream = SegmentSequence(data_stream, 16*seq_size, add_flag=True)