Beispiel #1
0
 def test_two_sources(self):
     transformer = Padding(Batch(
         DataStream(
             IterableDataset(
                 dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))),
         ConstantScheme(2)))
     assert len(next(transformer.get_epoch_iterator())) == 4
Beispiel #2
0
 def test_two_sources(self):
     transformer = Padding(Batch(
         DataStream(
             IterableDataset(
                 dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))),
         ConstantScheme(2)))
     assert len(next(transformer.get_epoch_iterator())) == 4
 def test_mask_dtype(self):
     transformer = Padding(
         Batch(
             DataStream(IterableDataset(dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2)
         ),
         mask_dtype="uint8",
     )
     assert_equal(str(next(transformer.get_epoch_iterator())[1].dtype), "uint8")
Beispiel #4
0
 def test_mask_sources(self):
     transformer = Padding(Batch(
         DataStream(
             IterableDataset(
                 OrderedDict([('features', [[1], [2, 3]]),
                              ('targets', [[4, 5, 6], [7]])]))),
         ConstantScheme(2)),
         mask_sources=('features',))
     assert_equal(len(next(transformer.get_epoch_iterator())), 3)
Beispiel #5
0
 def test_mask_dtype(self):
     transformer = Padding(Batch(
         DataStream(
             IterableDataset(
                 dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))),
         ConstantScheme(2)),
         mask_dtype='uint8')
     assert_equal(
         str(next(transformer.get_epoch_iterator())[1].dtype), 'uint8')
Beispiel #6
0
 def test_mask_sources(self):
     transformer = Padding(Batch(
         DataStream(
             IterableDataset(
                 OrderedDict([('features', [[1], [2, 3]]),
                              ('targets', [[4, 5, 6], [7]])]))),
         ConstantScheme(2)),
         mask_sources=('features',))
     assert_equal(len(next(transformer.get_epoch_iterator())), 3)
 def test_1d_sequences(self):
     stream = Batch(
         DataStream(IterableDataset([[1], [2, 3], [], [4, 5, 6], [7]])),
         ConstantScheme(2))
     transformer = Padding(stream)
     assert_equal(transformer.sources, ("data", "data_mask"))
     assert_equal(list(transformer.get_epoch_iterator()),
                  [(numpy.array([[1, 0], [2, 3]]),
                    numpy.array([[1, 0], [1, 1]])),
                   (numpy.array([[0, 0, 0], [4, 5, 6]]),
                    numpy.array([[0, 0, 0], [1, 1, 1]])),
                   (numpy.array([[7]]), numpy.array([[1]]))])
Beispiel #8
0
 def test_1d_sequences(self):
     stream = Batch(
         DataStream(IterableDataset([[1], [2, 3], [], [4, 5, 6], [7]])),
         ConstantScheme(2))
     transformer = Padding(stream)
     assert_equal(transformer.sources, ("data", "data_mask"))
     assert_equal(list(transformer.get_epoch_iterator()),
                  [(numpy.array([[1, 0], [2, 3]]),
                    numpy.array([[1, 0], [1, 1]])),
                   (numpy.array([[0, 0, 0], [4, 5, 6]]),
                    numpy.array([[0, 0, 0], [1, 1, 1]])),
                   (numpy.array([[7]]), numpy.array([[1]]))])
def test_padding():
    # 1-D sequences
    stream = Batch(
        DataStream(IterableDataset([[1], [2, 3], [], [4, 5, 6], [7]])),
        ConstantScheme(2))
    mask_stream = Padding(stream)
    assert mask_stream.sources == ("data", "data_mask")
    it = mask_stream.get_epoch_iterator()
    data, mask = next(it)
    assert (data == numpy.array([[1, 0], [2, 3]])).all()
    assert (mask == numpy.array([[1, 0], [1, 1]])).all()
    data, mask = next(it)
    assert (data == numpy.array([[0, 0, 0], [4, 5, 6]])).all()
    assert (mask == numpy.array([[0, 0, 0], [1, 1, 1]])).all()
    data, mask = next(it)
    assert (data == numpy.array([[7]])).all()
    assert (mask == numpy.array([[1]])).all()

    # 2D sequences
    stream2 = Batch(
        DataStream(
            IterableDataset([numpy.ones((3, 4)), 2 * numpy.ones((2, 4))])),
        ConstantScheme(2))
    it = Padding(stream2).get_epoch_iterator()
    data, mask = next(it)
    assert data.shape == (2, 3, 4)
    assert (data[0, :, :] == 1).all()
    assert (data[1, :2, :] == 2).all()
    assert (mask == numpy.array([[1, 1, 1], [1, 1, 0]])).all()

    # 2 sources
    stream3 = Padding(
        Batch(
            DataStream(
                IterableDataset(
                    dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))),
            ConstantScheme(2)))
    assert len(next(stream3.get_epoch_iterator())) == 4
def test_padding():
    # 1-D sequences
    stream = Batch(
        DataStream(
            IterableDataset([[1], [2, 3], [], [4, 5, 6], [7]])),
        ConstantScheme(2))
    mask_stream = Padding(stream)
    assert mask_stream.sources == ("data", "data_mask")
    it = mask_stream.get_epoch_iterator()
    data, mask = next(it)
    assert (data == numpy.array([[1, 0], [2, 3]])).all()
    assert (mask == numpy.array([[1, 0], [1, 1]])).all()
    data, mask = next(it)
    assert (data == numpy.array([[0, 0, 0], [4, 5, 6]])).all()
    assert (mask == numpy.array([[0, 0, 0], [1, 1, 1]])).all()
    data, mask = next(it)
    assert (data == numpy.array([[7]])).all()
    assert (mask == numpy.array([[1]])).all()

    # 2D sequences
    stream2 = Batch(
        DataStream(
            IterableDataset([numpy.ones((3, 4)), 2 * numpy.ones((2, 4))])),
        ConstantScheme(2))
    it = Padding(stream2).get_epoch_iterator()
    data, mask = next(it)
    assert data.shape == (2, 3, 4)
    assert (data[0, :, :] == 1).all()
    assert (data[1, :2, :] == 2).all()
    assert (mask == numpy.array([[1, 1, 1], [1, 1, 0]])).all()

    # 2 sources
    stream3 = Padding(Batch(
        DataStream(
            IterableDataset(
                dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))),
        ConstantScheme(2)))
    assert len(next(stream3.get_epoch_iterator())) == 4
#data_stream = FilterSources(data_stream,
#                          sources = ('features',))
data_stream = Padding(data_stream)
data_stream = Mapping(data_stream, _transpose)
#data_stream = ForceFloatX(data_stream)

dataset = Handwriting(('valid', ))
valid_stream = DataStream.default_stream(dataset,
                                         iteration_scheme=SequentialScheme(
                                             dataset.num_examples,
                                             10 * batch_size))
valid_stream = Padding(valid_stream)
valid_stream = Mapping(valid_stream, _transpose)

x_tr = next(data_stream.get_epoch_iterator())

x = tensor.tensor3('features')
x_mask = tensor.matrix('features_mask')

context = tensor.imatrix('transcripts')
context_mask = tensor.matrix('transcripts_mask')

transition = [
    GatedRecurrent(dim=hidden_size_recurrent, name="gru_{}".format(i))
    for i in range(3)
]

transition = RecurrentStack(transition,
                            name="transition",
                            skip_connections=True)
data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme(dataset.num_examples, batch_size))

# data_stream = FilterSources(data_stream,
#                          sources = ('features',))
data_stream = Padding(data_stream)
data_stream = Mapping(data_stream, _transpose)
# data_stream = ForceFloatX(data_stream)

dataset = Handwriting(("valid",))
valid_stream = DataStream.default_stream(
    dataset, iteration_scheme=SequentialScheme(dataset.num_examples, 10 * batch_size)
)
valid_stream = Padding(valid_stream)
valid_stream = Mapping(valid_stream, _transpose)

x_tr = next(data_stream.get_epoch_iterator())

x = tensor.tensor3("features")
x_mask = tensor.matrix("features_mask")

context = tensor.imatrix("transcripts")
context_mask = tensor.matrix("transcripts_mask")

transition = [GatedRecurrent(dim=hidden_size_recurrent, name="gru_{}".format(i)) for i in range(3)]

transition = RecurrentStack(transition, name="transition", skip_connections=True)

emitter = BivariateGMMEmitter(k=k)

source_names = [name for name in transition.apply.states if "states" in name]