Ejemplo n.º 1
0
def test_batch_data_alt(samples):
    # data that are:
    # - not in sub-batches
    # - not numpy arrays (and not casted)
    # - variable shape
    # - variable type
    batch_start = 0
    for ex_samp, act_samp in zip(
            samples, corpus.batch_data(samples, subsamples=False)):
        try:
            assert np.allclose(ex_samp, act_samp)
        except TypeError:
            assert ex_samp == act_samp
        batch_start += 1
    assert batch_start == len(samples)
    for batch_size in range(1, len(samples) + 2):
        batch_start = 0
        for act_batch in corpus.batch_data(
                samples, batch_size=batch_size, subsamples=False):
            ex_batch = samples[batch_start:batch_start + batch_size]
            assert len(ex_batch) == len(act_batch)
            for ex_samp, act_samp in zip(ex_batch, act_batch):
                try:
                    assert np.allclose(ex_samp, act_samp)
                except TypeError:
                    assert ex_samp == act_samp
            batch_start += len(act_batch)
        assert batch_start == len(samples)
Ejemplo n.º 2
0
def test_batch_data_alt(samples):
    # data that are:
    # - not in sub-batches
    # - not numpy arrays (and not casted)
    # - variable shape
    # - variable type
    batch_start = 0
    for ex_samp, act_samp in zip(samples,
                                 corpus.batch_data(samples, subsamples=False)):
        try:
            assert np.allclose(ex_samp, act_samp)
        except TypeError:
            assert ex_samp == act_samp
        batch_start += 1
    assert batch_start == len(samples)
    for batch_size in range(1, len(samples) + 2):
        batch_start = 0
        for act_batch in corpus.batch_data(samples,
                                           batch_size=batch_size,
                                           subsamples=False):
            ex_batch = samples[batch_start:batch_start + batch_size]
            assert len(ex_batch) == len(act_batch)
            for ex_samp, act_samp in zip(ex_batch, act_batch):
                try:
                    assert np.allclose(ex_samp, act_samp)
                except TypeError:
                    assert ex_samp == act_samp
            batch_start += len(act_batch)
        assert batch_start == len(samples)
Ejemplo n.º 3
0
def test_batch_data_numpy(samples):
    # samples are numpy data of the same shape and type
    batch_start = 0
    for ex_samp, act_samp in zip(
            samples, corpus.batch_data(samples, subsamples=False)):
        try:
            assert np.allclose(ex_samp, act_samp)
        except TypeError:
            assert ex_samp.tolist() == act_samp.tolist()
        batch_start += 1
    assert batch_start == len(samples)
    for axis in range(-1, len(samples.shape)):
        batch_slice = [slice(None)] * len(samples.shape)
        for batch_size in range(1, len(samples) + 2):
            batch_start = 0
            for act_batch in corpus.batch_data(
                    samples, batch_size=batch_size, subsamples=False,
                    axis=axis):
                ex_batch = samples[batch_start:batch_start + batch_size]
                assert len(ex_batch) == act_batch.shape[axis]
                for samp_idx in range(len(ex_batch)):
                    batch_slice[axis] = samp_idx
                    try:
                        assert np.allclose(
                            ex_batch[samp_idx], act_batch[tuple(batch_slice)])
                    except TypeError:
                        assert (
                            ex_batch[samp_idx].flatten().tolist() ==
                            act_batch[tuple(batch_slice)].flatten().tolist())
                batch_start += len(ex_batch)
        assert batch_start == len(samples)
Ejemplo n.º 4
0
def test_batch_data_tups_alt(samples):
    batch_start = 0
    for ex_samp, act_samp in zip(samples, corpus.batch_data(samples)):
        assert isinstance(act_samp, tuple)
        for ex_sub_samp, act_sub_samp in zip(ex_samp, act_samp):
            try:
                assert np.allclose(ex_sub_samp, act_sub_samp)
            except TypeError:
                assert ex_sub_samp == act_sub_samp
        batch_start += 1
    assert batch_start == len(samples)
    for batch_size in range(1, len(samples) + 2):
        batch_start = 0
        for act_batch in corpus.batch_data(samples, batch_size=batch_size):
            ex_batch = samples[batch_start:batch_start + batch_size]
            assert len(ex_batch[0]) == len(act_batch)  # same num sub-batches
            for sub_batch_idx, act_sub_batch in enumerate(act_batch):
                assert len(act_sub_batch) == len(ex_batch)
                for sub_samp_idx, act_sub_samp in enumerate(act_sub_batch):
                    ex_sub_samp = ex_batch[sub_samp_idx][sub_batch_idx]
                    try:
                        assert np.allclose(ex_sub_samp, act_sub_samp)
                    except TypeError:
                        assert ex_samp == act_samp
            batch_start += len(ex_batch)
        assert batch_start == len(samples)
Ejemplo n.º 5
0
def test_batch_data_numpy(samples):
    # samples are numpy data of the same shape and type
    batch_start = 0
    for ex_samp, act_samp in zip(samples,
                                 corpus.batch_data(samples, subsamples=False)):
        try:
            assert np.allclose(ex_samp, act_samp)
        except TypeError:
            assert ex_samp.tolist() == act_samp.tolist()
        batch_start += 1
    assert batch_start == len(samples)
    for axis in range(-1, len(samples.shape)):
        batch_slice = [slice(None)] * len(samples.shape)
        for batch_size in range(1, len(samples) + 2):
            batch_start = 0
            for act_batch in corpus.batch_data(samples,
                                               batch_size=batch_size,
                                               subsamples=False,
                                               axis=axis):
                ex_batch = samples[batch_start:batch_start + batch_size]
                assert len(ex_batch) == act_batch.shape[axis]
                for samp_idx in range(len(ex_batch)):
                    batch_slice[axis] = samp_idx
                    try:
                        assert np.allclose(ex_batch[samp_idx],
                                           act_batch[batch_slice])
                    except TypeError:
                        assert (ex_batch[samp_idx].flatten().tolist() ==
                                act_batch[batch_slice].flatten().tolist())
                batch_start += len(ex_batch)
        assert batch_start == len(samples)
Ejemplo n.º 6
0
def test_batch_data_tups_alt(samples):
    batch_start = 0
    for ex_samp, act_samp in zip(samples, corpus.batch_data(samples)):
        assert isinstance(act_samp, tuple)
        for ex_sub_samp, act_sub_samp in zip(ex_samp, act_samp):
            try:
                assert np.allclose(ex_sub_samp, act_sub_samp)
            except TypeError:
                assert ex_sub_samp == act_sub_samp
        batch_start += 1
    assert batch_start == len(samples)
    for batch_size in range(1, len(samples) + 2):
        batch_start = 0
        for act_batch in corpus.batch_data(samples, batch_size=batch_size):
            ex_batch = samples[batch_start:batch_start + batch_size]
            assert len(ex_batch[0]) == len(act_batch)  # same num sub-batches
            for sub_batch_idx, act_sub_batch in enumerate(act_batch):
                assert len(act_sub_batch) == len(ex_batch)
                for sub_samp_idx, act_sub_samp in enumerate(act_sub_batch):
                    ex_sub_samp = ex_batch[sub_samp_idx][sub_batch_idx]
                    try:
                        assert np.allclose(ex_sub_samp, act_sub_samp)
                    except TypeError:
                        assert ex_samp == act_samp
            batch_start += len(ex_batch)
        assert batch_start == len(samples)
Ejemplo n.º 7
0
def test_batch_data_tups_numpy(samples):
    input_shapes = tuple(sample.shape for sample in samples[0])
    axes = tuple(product(*(range(len(shape) + 1) for shape in input_shapes)))
    axes += (0, -1)
    batch_start = 0
    for ex_samp, act_samp in zip(samples, corpus.batch_data(samples)):
        assert isinstance(act_samp, tuple)
        for ex_sub_samp, act_sub_samp in zip(ex_samp, act_samp):
            assert ex_sub_samp.shape == act_sub_samp.shape
            try:
                assert np.allclose(ex_sub_samp, act_sub_samp)
            except TypeError:
                assert (
                    ex_sub_samp.flatten().tolist() ==
                    act_sub_samp.flatten().tolist()
                )
        batch_start += 1
    assert batch_start == len(samples)
    for axis in axes:
        batch_slices = tuple(
            [slice(None)] * (len(shape) + 1) for shape in input_shapes)
        if isinstance(axis, int):
            axis_iter = repeat(axis)
        else:
            axis_iter = axis
        for batch_size in range(1, len(samples) + 2):
            batch_start = 0
            for act_batch in corpus.batch_data(
                    samples, subsamples=True, batch_size=batch_size,
                    axis=axis):
                assert len(act_batch) == len(input_shapes)
                assert isinstance(act_batch, tuple)
                ex_batch = samples[batch_start:batch_start + batch_size]
                for sub_batch_idx, sub_axis in zip(
                        range(len(input_shapes)), axis_iter):
                    act_sub_batch = act_batch[sub_batch_idx]
                    assert len(ex_batch) == act_sub_batch.shape[sub_axis]
                    sub_batch_slice = batch_slices[sub_batch_idx]
                    for sub_samp_idx in range(len(ex_batch)):
                        sub_batch_slice[sub_axis] = sub_samp_idx
                        ex_sub_samp = ex_batch[sub_samp_idx, sub_batch_idx]
                        act_sub_samp = act_sub_batch[tuple(sub_batch_slice)]
                        # the == 2 is to account for the case when
                        # ex_sub_samp are going to be np.generics (as
                        # opposed to a arrays)
                        assert (len(input_shapes) == 2) or (
                            ex_sub_samp.shape == act_sub_samp.shape, sub_axis)
                        try:
                            assert np.allclose(ex_sub_samp, act_sub_samp)
                        except TypeError:
                            assert (
                                ex_sub_samp.flatten().tolist() ==
                                act_sub_samp.flatten().tolist()
                            )
                batch_start += len(ex_batch)
            assert batch_start == len(samples)
Ejemplo n.º 8
0
def test_batch_data_tups_numpy(samples):
    input_shapes = tuple(sample.shape for sample in samples[0])
    axes = tuple(product(*(range(len(shape) + 1) for shape in input_shapes)))
    axes += (0, -1)
    batch_start = 0
    for ex_samp, act_samp in zip(samples, corpus.batch_data(samples)):
        assert isinstance(act_samp, tuple)
        for ex_sub_samp, act_sub_samp in zip(ex_samp, act_samp):
            assert ex_sub_samp.shape == act_sub_samp.shape
            try:
                assert np.allclose(ex_sub_samp, act_sub_samp)
            except TypeError:
                assert (ex_sub_samp.flatten().tolist() ==
                        act_sub_samp.flatten().tolist())
        batch_start += 1
    assert batch_start == len(samples)
    for axis in axes:
        batch_slices = tuple([slice(None)] * (len(shape) + 1)
                             for shape in input_shapes)
        if isinstance(axis, int):
            axis_iter = repeat(axis)
        else:
            axis_iter = axis
        for batch_size in range(1, len(samples) + 2):
            batch_start = 0
            for act_batch in corpus.batch_data(samples,
                                               subsamples=True,
                                               batch_size=batch_size,
                                               axis=axis):
                assert len(act_batch) == len(input_shapes)
                assert isinstance(act_batch, tuple)
                ex_batch = samples[batch_start:batch_start + batch_size]
                for sub_batch_idx, sub_axis in zip(range(len(input_shapes)),
                                                   axis_iter):
                    act_sub_batch = act_batch[sub_batch_idx]
                    assert len(ex_batch) == act_sub_batch.shape[sub_axis]
                    sub_batch_slice = batch_slices[sub_batch_idx]
                    for sub_samp_idx in range(len(ex_batch)):
                        sub_batch_slice[sub_axis] = sub_samp_idx
                        ex_sub_samp = ex_batch[sub_samp_idx, sub_batch_idx]
                        act_sub_samp = act_sub_batch[sub_batch_slice]
                        # the == 2 is to account for the case when
                        # ex_sub_samp are going to be np.generics (as
                        # opposed to a arrays)
                        assert (len(input_shapes)
                                == 2) or (ex_sub_samp.shape
                                          == act_sub_samp.shape, sub_axis)
                        try:
                            assert np.allclose(ex_sub_samp, act_sub_samp)
                        except TypeError:
                            assert (ex_sub_samp.flatten().tolist() ==
                                    act_sub_samp.flatten().tolist())
                batch_start += len(ex_batch)
            assert batch_start == len(samples)
Ejemplo n.º 9
0
def test_padding():
    samples = [([1], [2], [3]), ([4, 5], [6, 7]), ([8], [9])]
    l1_batches = tuple(
        corpus.batch_data(
            samples,
            subsamples=False,
            batch_size=1,
            pad_mode='maximum',
            cast_to_array=np.int32,
        ))
    assert len(l1_batches) == 3
    assert np.allclose(l1_batches[0], [[1], [2], [3]])
    assert np.allclose(l1_batches[1], [[4, 5], [6, 7]])
    assert np.allclose(l1_batches[2], [[8], [9]])
    l2_batches = tuple(
        corpus.batch_data(
            samples,
            subsamples=False,
            batch_size=2,
            pad_mode='maximum',
            cast_to_array=np.int32,
        ))
    assert len(l2_batches) == 2
    assert np.allclose(l2_batches[0], [
        [[1, 1], [2, 2], [3, 3]],
        [[4, 5], [6, 7], [6, 7]],
    ])
    assert np.allclose(l2_batches[1], [[8], [9]])
    l3_batches = tuple(
        corpus.batch_data(
            samples,
            subsamples=False,
            batch_size=3,
            pad_mode='wrap',
            cast_to_array=np.int32,
        ))
    assert len(l3_batches) == 1
    assert np.allclose(l3_batches[0], [
        [[1, 1], [2, 2], [3, 3]],
        [[4, 5], [6, 7], [4, 5]],
        [[8, 8], [9, 9], [8, 8]],
    ])
    # if we do not set cast_to_array, no padding should occur
    no_cast_batches = tuple(
        corpus.batch_data(samples,
                          subsamples=False,
                          batch_size=3,
                          pad_mode='wrap'))
    assert len(no_cast_batches) == 1
    assert no_cast_batches[0] == samples
Ejemplo n.º 10
0
def test_padding():
    samples = [([1], [2], [3]), ([4, 5], [6, 7]), ([8], [9])]
    l1_batches = tuple(corpus.batch_data(
        samples, subsamples=False, batch_size=1, pad_mode='maximum',
        cast_to_array=np.int32,
    ))
    assert len(l1_batches) == 3
    assert np.allclose(l1_batches[0], [[1], [2], [3]])
    assert np.allclose(l1_batches[1], [[4, 5], [6, 7]])
    assert np.allclose(l1_batches[2], [[8], [9]])
    l2_batches = tuple(corpus.batch_data(
        samples, subsamples=False, batch_size=2, pad_mode='maximum',
        cast_to_array=np.int32,
    ))
    assert len(l2_batches) == 2
    assert np.allclose(
        l2_batches[0],
        [
            [[1, 1], [2, 2], [3, 3]],
            [[4, 5], [6, 7], [6, 7]],
        ]
    )
    assert np.allclose(l2_batches[1], [[8], [9]])
    l3_batches = tuple(corpus.batch_data(
        samples, subsamples=False, batch_size=3, pad_mode='wrap',
        cast_to_array=np.int32,
    ))
    assert len(l3_batches) == 1
    assert np.allclose(
        l3_batches[0],
        [
            [[1, 1], [2, 2], [3, 3]],
            [[4, 5], [6, 7], [4, 5]],
            [[8, 8], [9, 9], [8, 8]],
        ]
    )
    # if we do not set cast_to_array, no padding should occur
    no_cast_batches = tuple(corpus.batch_data(
        samples, subsamples=False, batch_size=3, pad_mode='wrap'))
    assert len(no_cast_batches) == 1
    assert no_cast_batches[0] == samples