Beispiel #1
0
def test_MLTask_yield_batch_use_intermediate(mocked_ds, mocked_boto3,
                                             mocked_s3_path, mltask_kwargs):
    # Mock some keys to return
    _keys = [
        Key(k) for k in [
            'a/first_key.json',  # Good
            'b/second_key.json',  # Not in subbucket
            'a/third_key.json',  # Good
            'a/third_key.other'
        ]
    ]  # Not json

    # Mock the full return iterable when iterating over objects
    # in a bucket
    mocked_boto3.resource.return_value.Bucket.return_value.objects.all.return_value = _keys

    # Test the yielding
    mltask = MLTask(input_task=SomeTask,
                    use_intermediate_inputs=True,
                    **mltask_kwargs)
    out_keys = []
    for first_idx, last_idx, _in_key, out_key in mltask.yield_batch():
        # Indexes are always dummies in use_intermediate_inputs
        assert first_idx == 0
        assert last_idx == -1
        # Test keys look right
        assert _in_key.endswith('.json')
        assert out_key.endswith('.json')
        assert _in_key != mocked_s3_path
        out_keys.append(out_key)
    assert len(out_keys) == len(set(out_keys))
    assert len(out_keys) == 2
Beispiel #2
0
def test_MLTask_yield_batch_not_use_intermediate(mocked_len, mocked_s3_path,
                                                 mltask_kwargs):
    mltask = MLTask(input_task=SomeTask,
                    n_batches=100,
                    use_intermediate_inputs=False,
                    **mltask_kwargs)
    out_keys = []
    previous_first_idx = -1
    previous_last_idx = -1
    for first_idx, last_idx, _in_key, out_key in mltask.yield_batch():
        assert first_idx < last_idx
        assert first_idx > previous_first_idx
        assert last_idx > previous_last_idx
        assert _in_key == mocked_s3_path()
        out_keys.append(out_key)
        previous_first_idx = first_idx
        previous_last_idx = last_idx
    assert len(out_keys) == mltask.n_batches