forked from negar-rostamzadeh/LSTM-Attention
-
Notifications
You must be signed in to change notification settings - Fork 0
/
datasets.py
102 lines (79 loc) · 3.49 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import numpy
import theano
from fuel.streams import DataStream
from fuel.transformers import Flatten
from fuel.datasets import H5PYDataset
from fuel.schemes import ShuffledScheme
from fuel.transformers import Transformer
floatX = theano.config.floatX
def get_mnist_streams(num_train_examples, batch_size):
from fuel.datasets import MNIST
dataset = MNIST(("train",))
all_ind = numpy.arange(dataset.num_examples)
rng = numpy.random.RandomState(seed=1)
rng.shuffle(all_ind)
indices_train = all_ind[:num_train_examples]
indices_valid = all_ind[num_train_examples:]
tarin_stream = Flatten(DataStream.default_stream(
dataset,
iteration_scheme=ShuffledScheme(indices_train, batch_size)),
which_sources=('features',))
valid_stream = Flatten(DataStream.default_stream(
dataset,
iteration_scheme=ShuffledScheme(indices_valid, batch_size)),
which_sources=('features',))
return tarin_stream, valid_stream
def get_memory_streams(num_train_examples, batch_size, time_length=15, dim=2):
from fuel.datasets import IterableDataset
numpy.random.seed(0)
num_sequences = num_train_examples / batch_size
# generating random sequences
seq_u = numpy.random.randn(num_sequences, time_length, batch_size, dim)
seq_y = numpy.zeros((num_sequences, time_length, batch_size, dim))
seq_y[:, 1:, :, 0] = seq_u[:, :-1, :, 0] # 1 time-step delay
seq_y[:, 3:, :, 1] = seq_u[:, :-3, :, 1] # 3 time-step delay
seq_y += 0.01 * numpy.random.standard_normal(seq_y.shape)
dataset = IterableDataset({'features': seq_u.astype(floatX),
'targets': seq_y.astype(floatX)})
tarin_stream = DataStream(dataset)
valid_stream = DataStream(dataset)
return tarin_stream, valid_stream
class PreprocessTransformer(Transformer):
def __init__(self, data_stream, **kwargs):
super(PreprocessTransformer, self).__init__(
data_stream, **kwargs)
def get_data(self, request=None):
data = next(self.child_epoch_iterator)
transformed_data = []
B, T, X, Y, C = data[0].shape
transformed_data.append(
numpy.swapaxes(
data[0].reshape(
(B, T, X * Y * C)),
0, 1))
# Now the data shape should be T x B x F
transformed_data.append(data[1])
return transformed_data
class ClutteredMNISTVideo(H5PYDataset):
def __init__(self, which_sets, **kwargs):
kwargs.setdefault('load_in_memory', False)
super(ClutteredMNISTVideo, self).__init__(
"/data/lisatmp3/cooijmat/datasets/cluttered-mnist-video/cluttered-mnist-video.hdf5",
which_sets, **kwargs)
def get_mnist_video_streams(batch_size):
train_dataset = ClutteredMNISTVideo(which_sets=["train"])
valid_dataset = ClutteredMNISTVideo(which_sets=["valid"])
train_ind = numpy.arange(train_dataset.num_examples)
valid_ind = numpy.arange(valid_dataset.num_examples)
rng = numpy.random.RandomState(seed=1)
rng.shuffle(train_ind)
rng.shuffle(valid_ind)
train_datastream = DataStream.default_stream(
train_dataset,
iteration_scheme=ShuffledScheme(train_ind, batch_size))
train_datastream = PreprocessTransformer(train_datastream)
valid_datastream = DataStream.default_stream(
valid_dataset,
iteration_scheme=ShuffledScheme(valid_ind, batch_size))
valid_datastream = PreprocessTransformer(valid_datastream)
return train_datastream, valid_datastream