def test_two_sources(self): transformer = Padding(Batch( DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2))) assert len(next(transformer.get_epoch_iterator())) == 4
def test_two_sources(self): transformer = Padding(Batch( DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2))) assert len(next(transformer.get_epoch_iterator())) == 4
def test_mask_dtype(self): transformer = Padding( Batch( DataStream(IterableDataset(dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2) ), mask_dtype="uint8", ) assert_equal(str(next(transformer.get_epoch_iterator())[1].dtype), "uint8")
def test_mask_sources(self): transformer = Padding(Batch( DataStream( IterableDataset( OrderedDict([('features', [[1], [2, 3]]), ('targets', [[4, 5, 6], [7]])]))), ConstantScheme(2)), mask_sources=('features',)) assert_equal(len(next(transformer.get_epoch_iterator())), 3)
def test_mask_dtype(self): transformer = Padding(Batch( DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2)), mask_dtype='uint8') assert_equal( str(next(transformer.get_epoch_iterator())[1].dtype), 'uint8')
def test_mask_sources(self): transformer = Padding(Batch( DataStream( IterableDataset( OrderedDict([('features', [[1], [2, 3]]), ('targets', [[4, 5, 6], [7]])]))), ConstantScheme(2)), mask_sources=('features',)) assert_equal(len(next(transformer.get_epoch_iterator())), 3)
def test_1d_sequences(self): stream = Batch( DataStream(IterableDataset([[1], [2, 3], [], [4, 5, 6], [7]])), ConstantScheme(2)) transformer = Padding(stream) assert_equal(transformer.sources, ("data", "data_mask")) assert_equal(list(transformer.get_epoch_iterator()), [(numpy.array([[1, 0], [2, 3]]), numpy.array([[1, 0], [1, 1]])), (numpy.array([[0, 0, 0], [4, 5, 6]]), numpy.array([[0, 0, 0], [1, 1, 1]])), (numpy.array([[7]]), numpy.array([[1]]))])
def test_1d_sequences(self): stream = Batch( DataStream(IterableDataset([[1], [2, 3], [], [4, 5, 6], [7]])), ConstantScheme(2)) transformer = Padding(stream) assert_equal(transformer.sources, ("data", "data_mask")) assert_equal(list(transformer.get_epoch_iterator()), [(numpy.array([[1, 0], [2, 3]]), numpy.array([[1, 0], [1, 1]])), (numpy.array([[0, 0, 0], [4, 5, 6]]), numpy.array([[0, 0, 0], [1, 1, 1]])), (numpy.array([[7]]), numpy.array([[1]]))])
def test_padding(): # 1-D sequences stream = Batch( DataStream(IterableDataset([[1], [2, 3], [], [4, 5, 6], [7]])), ConstantScheme(2)) mask_stream = Padding(stream) assert mask_stream.sources == ("data", "data_mask") it = mask_stream.get_epoch_iterator() data, mask = next(it) assert (data == numpy.array([[1, 0], [2, 3]])).all() assert (mask == numpy.array([[1, 0], [1, 1]])).all() data, mask = next(it) assert (data == numpy.array([[0, 0, 0], [4, 5, 6]])).all() assert (mask == numpy.array([[0, 0, 0], [1, 1, 1]])).all() data, mask = next(it) assert (data == numpy.array([[7]])).all() assert (mask == numpy.array([[1]])).all() # 2D sequences stream2 = Batch( DataStream( IterableDataset([numpy.ones((3, 4)), 2 * numpy.ones((2, 4))])), ConstantScheme(2)) it = Padding(stream2).get_epoch_iterator() data, mask = next(it) assert data.shape == (2, 3, 4) assert (data[0, :, :] == 1).all() assert (data[1, :2, :] == 2).all() assert (mask == numpy.array([[1, 1, 1], [1, 1, 0]])).all() # 2 sources stream3 = Padding( Batch( DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2))) assert len(next(stream3.get_epoch_iterator())) == 4
def test_padding(): # 1-D sequences stream = Batch( DataStream( IterableDataset([[1], [2, 3], [], [4, 5, 6], [7]])), ConstantScheme(2)) mask_stream = Padding(stream) assert mask_stream.sources == ("data", "data_mask") it = mask_stream.get_epoch_iterator() data, mask = next(it) assert (data == numpy.array([[1, 0], [2, 3]])).all() assert (mask == numpy.array([[1, 0], [1, 1]])).all() data, mask = next(it) assert (data == numpy.array([[0, 0, 0], [4, 5, 6]])).all() assert (mask == numpy.array([[0, 0, 0], [1, 1, 1]])).all() data, mask = next(it) assert (data == numpy.array([[7]])).all() assert (mask == numpy.array([[1]])).all() # 2D sequences stream2 = Batch( DataStream( IterableDataset([numpy.ones((3, 4)), 2 * numpy.ones((2, 4))])), ConstantScheme(2)) it = Padding(stream2).get_epoch_iterator() data, mask = next(it) assert data.shape == (2, 3, 4) assert (data[0, :, :] == 1).all() assert (data[1, :2, :] == 2).all() assert (mask == numpy.array([[1, 1, 1], [1, 1, 0]])).all() # 2 sources stream3 = Padding(Batch( DataStream( IterableDataset( dict(features=[[1], [2, 3]], targets=[[4, 5, 6], [7]]))), ConstantScheme(2))) assert len(next(stream3.get_epoch_iterator())) == 4
#data_stream = FilterSources(data_stream, # sources = ('features',)) data_stream = Padding(data_stream) data_stream = Mapping(data_stream, _transpose) #data_stream = ForceFloatX(data_stream) dataset = Handwriting(('valid', )) valid_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( dataset.num_examples, 10 * batch_size)) valid_stream = Padding(valid_stream) valid_stream = Mapping(valid_stream, _transpose) x_tr = next(data_stream.get_epoch_iterator()) x = tensor.tensor3('features') x_mask = tensor.matrix('features_mask') context = tensor.imatrix('transcripts') context_mask = tensor.matrix('transcripts_mask') transition = [ GatedRecurrent(dim=hidden_size_recurrent, name="gru_{}".format(i)) for i in range(3) ] transition = RecurrentStack(transition, name="transition", skip_connections=True)
data_stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme(dataset.num_examples, batch_size)) # data_stream = FilterSources(data_stream, # sources = ('features',)) data_stream = Padding(data_stream) data_stream = Mapping(data_stream, _transpose) # data_stream = ForceFloatX(data_stream) dataset = Handwriting(("valid",)) valid_stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme(dataset.num_examples, 10 * batch_size) ) valid_stream = Padding(valid_stream) valid_stream = Mapping(valid_stream, _transpose) x_tr = next(data_stream.get_epoch_iterator()) x = tensor.tensor3("features") x_mask = tensor.matrix("features_mask") context = tensor.imatrix("transcripts") context_mask = tensor.matrix("transcripts_mask") transition = [GatedRecurrent(dim=hidden_size_recurrent, name="gru_{}".format(i)) for i in range(3)] transition = RecurrentStack(transition, name="transition", skip_connections=True) emitter = BivariateGMMEmitter(k=k) source_names = [name for name in transition.apply.states if "states" in name]