Python default_missing_value_for_dtype Examples, catalyst.utils.numpy_utils.default_missing_value_for_dtype Python Examples

Example #1

0

Show file

File: test_adjusted_array.py Project: Aeroglyphic/catalyst-crypto

def _gen_overwrite_1d_array_adjustment_case(dtype):
    """
    Generate test cases for overwrite adjustments.

    The algorithm used here is the same as the one used above for
    multiplicative adjustments.  The only difference is the semantics of how
    the adjustments are expected to modify the arrays.

    This is parameterized on `make_input` and `make_expected_output` functions,
    which take 1-D lists of values and transform them into desired input/output
    arrays. We do this so that we can easily test both vanilla numpy ndarrays
    and our own LabelArray class for strings.
    """
    adjustment_type = {
        float64_dtype: Float641DArrayOverwrite,
        datetime64ns_dtype: Datetime641DArrayOverwrite,
    }[dtype]
    make_expected_dtype = as_dtype(dtype)
    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)

    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = make_expected_dtype([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2],
                                    [2, 2, 2], [2, 2, 2]])

    buffer_as_of[0] = make_expected_dtype([[2, 2, 2], [2, 2, 2], [2, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    vals1 = [1]
    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0,
                        array([coerce_to_dtype(dtype, val) for val in vals1]))
    ]
    buffer_as_of[1] = make_expected_dtype([[1, 2, 2], [2, 2, 2], [2, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    vals3 = [4, 4, 1]
    adjustments[3] = [
        adjustment_type(0, 2, 0, 0,
                        array([coerce_to_dtype(dtype, val) for val in vals3]))
    ]
    buffer_as_of[3] = make_expected_dtype([[4, 2, 2], [4, 2, 2], [1, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    vals4 = [5] * 4
    adjustments[4] = [
        adjustment_type(0, 3, 2, 2,
                        array([coerce_to_dtype(dtype, val) for val in vals4]))
    ]
    buffer_as_of[4] = make_expected_dtype([[4, 2, 5], [4, 2, 5], [1, 2, 5],
                                           [2, 2, 5], [2, 2, 2], [2, 2, 2]])

    vals5 = range(1, 6)
    adjustments[5] = [
        adjustment_type(0, 4, 1, 1,
                        array([coerce_to_dtype(dtype, val) for val in vals5])),
    ]
    buffer_as_of[5] = make_expected_dtype([[4, 1, 5], [4, 2, 5], [1, 3, 5],
                                           [2, 4, 5], [2, 5, 2], [2, 2, 2]])
    return _gen_expectations(
        baseline,
        missing_value,
        adjustments,
        buffer_as_of,
        nrows=6,
        perspective_offsets=(0, 1),
    )

Example #2

0

Show file

File: test_adjusted_array.py Project: Aeroglyphic/catalyst-crypto

class AdjustedArrayTestCase(TestCase):
    @parameterized.expand(
        chain(
            _gen_unadjusted_cases(
                'float',
                make_input=as_dtype(float64_dtype),
                make_expected_output=as_dtype(float64_dtype),
                missing_value=default_missing_value_for_dtype(float64_dtype),
            ),
            _gen_unadjusted_cases(
                'datetime',
                make_input=as_dtype(datetime64ns_dtype),
                make_expected_output=as_dtype(datetime64ns_dtype),
                missing_value=default_missing_value_for_dtype(
                    datetime64ns_dtype),
            ),
            # Test passing an array of strings to AdjustedArray.
            _gen_unadjusted_cases(
                'bytes_ndarray',
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_ndarray',
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_ndarray',
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value='',
            ),
            # Test passing a LabelArray directly to AdjustedArray.
            _gen_unadjusted_cases(
                'bytes_labelarray',
                make_input=as_labelarray(bytes_dtype, b''),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_labelarray',
                make_input=as_labelarray(unicode_dtype, None),
                make_expected_output=as_labelarray(unicode_dtype, None),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_labelarray',
                make_input=(lambda a: LabelArray(
                    a.astype(unicode).astype(object), u'')),
                make_expected_output=as_labelarray(unicode_dtype, ''),
                missing_value='',
            ),
        ))
    def test_no_adjustments(self, name, data, lookback, adjustments,
                            missing_value, perspective_offset,
                            expected_output):

        array = AdjustedArray(data, NOMASK, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            in_out = zip(array.traverse(lookback), expected_output)
            for yielded, expected_yield in in_out:
                check_arrays(yielded, expected_yield)

    @parameterized.expand(_gen_multiplicative_adjustment_cases(float64_dtype))
    def test_multiplicative_adjustments(self, name, data, lookback,
                                        adjustments, missing_value,
                                        perspective_offset, expected):

        array = AdjustedArray(data, NOMASK, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    @parameterized.expand(
        chain(
            _gen_overwrite_adjustment_cases(int64_dtype),
            _gen_overwrite_adjustment_cases(float64_dtype),
            _gen_overwrite_adjustment_cases(datetime64ns_dtype),
            _gen_overwrite_1d_array_adjustment_case(float64_dtype),
            _gen_overwrite_1d_array_adjustment_case(datetime64ns_dtype),
            # There are six cases here:
            # Using np.bytes/np.unicode/object arrays as inputs.
            # Passing np.bytes/np.unicode/object arrays to LabelArray,
            # and using those as input.
            #
            # The outputs should always be LabelArrays.
            _gen_unadjusted_cases(
                'bytes_ndarray',
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_ndarray',
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_ndarray',
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'bytes_labelarray',
                make_input=as_labelarray(bytes_dtype, b''),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_labelarray',
                make_input=as_labelarray(unicode_dtype, u''),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_labelarray',
                make_input=(lambda a: LabelArray(
                    a.astype(unicode).astype(object),
                    None,
                )),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=None,
            ),
        ))
    def test_overwrite_adjustment_cases(self, name, baseline, lookback,
                                        adjustments, missing_value,
                                        perspective_offset, expected):
        array = AdjustedArray(baseline, NOMASK, adjustments, missing_value)

        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    @parameter_space(
        __fail_fast=True,
        dtype=[
            float64_dtype,
            int64_dtype,
            datetime64ns_dtype,
        ],
        missing_value=[0, 10000],
        window_length=[2, 3],
    )
    def test_masking(self, dtype, missing_value, window_length):
        missing_value = coerce_to_dtype(dtype, missing_value)
        baseline_ints = arange(15).reshape(5, 3)
        baseline = baseline_ints.astype(dtype)
        mask = (baseline_ints % 2).astype(bool)
        masked_baseline = where(mask, baseline, missing_value)

        array = AdjustedArray(
            baseline,
            mask,
            adjustments={},
            missing_value=missing_value,
        )

        gen_expected = moving_window(masked_baseline, window_length)
        gen_actual = array.traverse(window_length)
        for expected, actual in zip(gen_expected, gen_actual):
            check_arrays(expected, actual)

    @parameter_space(
        __fail_fast=True,
        dtype=[bytes_dtype, unicode_dtype, object_dtype],
        missing_value=["0", "-1", ""],
        window_length=[2, 3],
    )
    def test_masking_with_strings(self, dtype, missing_value, window_length):
        missing_value = coerce_to_dtype(dtype, missing_value)
        baseline_ints = arange(15).reshape(5, 3)

        # Coerce to string first so that coercion to object gets us an array of
        # string objects.
        baseline = baseline_ints.astype(str).astype(dtype)
        mask = (baseline_ints % 2).astype(bool)

        masked_baseline = LabelArray(baseline, missing_value=missing_value)
        masked_baseline[~mask] = missing_value

        array = AdjustedArray(
            baseline,
            mask,
            adjustments={},
            missing_value=missing_value,
        )

        gen_expected = moving_window(masked_baseline, window_length)
        gen_actual = array.traverse(window_length=window_length)

        for expected, actual in zip(gen_expected, gen_actual):
            check_arrays(expected, actual)

    def test_invalid_lookback(self):

        data = arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, NOMASK, {}, float('nan'))

        with self.assertRaises(WindowLengthTooLong):
            adj_array.traverse(7)

        with self.assertRaises(WindowLengthNotPositive):
            adj_array.traverse(0)

        with self.assertRaises(WindowLengthNotPositive):
            adj_array.traverse(-1)

    def test_array_views_arent_writable(self):

        data = arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, NOMASK, {}, float('nan'))

        for frame in adj_array.traverse(3):
            with self.assertRaises(ValueError):
                frame[0, 0] = 5.0

    def test_bad_input(self):
        msg = "Mask shape \(2L?, 3L?\) != data shape \(5L?, 5L?\)"
        data = arange(25).reshape(5, 5)
        bad_mask = array([[0, 1, 1], [0, 0, 1]], dtype=bool)

        with self.assertRaisesRegexp(ValueError, msg):
            AdjustedArray(data, bad_mask, {}, missing_value=-1)

    def _test_inspect(self):
        data = arange(15, dtype=float).reshape(5, 3)
        adj_array = AdjustedArray(
            data,
            NOMASK,
            {4: [Float64Multiply(2, 3, 0, 0, 4.0)]},
            float('nan'),
        )

        expected = dedent("""\
            Adjusted Array (float64):

            Data:
            array([[  0.,   1.,   2.],
                   [  3.,   4.,   5.],
                   [  6.,   7.,   8.],
                   [  9.,  10.,  11.],
                   [ 12.,  13.,  14.]])

            Adjustments:
            {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \
last_col=0, value=4.000000)]}
            """)
        got = adj_array.inspect()
        self.assertEqual(expected, got)

Example #3

0

Show file

File: test_adjusted_array.py Project: Aeroglyphic/catalyst-crypto

def _gen_multiplicative_adjustment_cases(dtype):
    """
    Generate expected moving windows on a buffer with adjustments.

    We proceed by constructing, at each row, the view of the array we expect in
    in all windows anchored on that row.

    In general, if we have an adjustment to be applied once we process the row
    at index N, should see that adjustment applied to the underlying buffer for
    any window containing the row at index N.

    We then build all legal windows over these buffers.
    """
    adjustment_type = {
        float64_dtype: Float64Multiply,
    }[dtype]

    nrows, ncols = 6, 3
    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = buffer_as_of[0] = full((nrows, ncols), 1, dtype=dtype)

    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0, coerce_to_dtype(dtype, 2)),
    ]
    buffer_as_of[1] = array(
        [[2, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]],
        dtype=dtype)

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    adjustments[3] = [
        adjustment_type(1, 2, 1, 1, coerce_to_dtype(dtype, 3)),
        adjustment_type(0, 1, 0, 0, coerce_to_dtype(dtype, 4)),
    ]
    buffer_as_of[3] = array(
        [[8, 1, 1], [4, 3, 1], [1, 3, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]],
        dtype=dtype)

    adjustments[4] = [adjustment_type(0, 3, 2, 2, coerce_to_dtype(dtype, 5))]
    buffer_as_of[4] = array(
        [[8, 1, 5], [4, 3, 5], [1, 3, 5], [1, 1, 5], [1, 1, 1], [1, 1, 1]],
        dtype=dtype)

    adjustments[5] = [
        adjustment_type(0, 4, 1, 1, coerce_to_dtype(dtype, 6)),
        adjustment_type(2, 2, 2, 2, coerce_to_dtype(dtype, 7)),
    ]
    buffer_as_of[5] = array(
        [[8, 6, 5], [4, 18, 5], [1, 18, 35], [1, 6, 5], [1, 6, 1], [1, 1, 1]],
        dtype=dtype)

    return _gen_expectations(
        baseline,
        default_missing_value_for_dtype(dtype),
        adjustments,
        buffer_as_of,
        nrows,
        perspective_offsets=(0, 1),
    )

Example #4

0

Show file

File: test_adjusted_array.py Project: Aeroglyphic/catalyst-crypto

def _gen_overwrite_adjustment_cases(dtype):
    """
    Generate test cases for overwrite adjustments.

    The algorithm used here is the same as the one used above for
    multiplicative adjustments.  The only difference is the semantics of how
    the adjustments are expected to modify the arrays.

    This is parameterized on `make_input` and `make_expected_output` functions,
    which take 2-D lists of values and transform them into desired input/output
    arrays. We do this so that we can easily test both vanilla numpy ndarrays
    and our own LabelArray class for strings.
    """
    adjustment_type = {
        float64_dtype: Float64Overwrite,
        datetime64ns_dtype: Datetime64Overwrite,
        int64_dtype: Int64Overwrite,
        bytes_dtype: ObjectOverwrite,
        unicode_dtype: ObjectOverwrite,
        object_dtype: ObjectOverwrite,
    }[dtype]
    make_expected_dtype = as_dtype(dtype)
    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)

    if dtype == object_dtype:
        # When we're testing object dtypes, we expect to have strings, but
        # coerce_to_dtype(object, 3) just gives 3 as a Python integer.
        def make_overwrite_value(dtype, value):
            return str(value)
    else:
        make_overwrite_value = coerce_to_dtype

    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = make_expected_dtype([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2],
                                    [2, 2, 2], [2, 2, 2]])

    buffer_as_of[0] = make_expected_dtype([[2, 2, 2], [2, 2, 2], [2, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0, make_overwrite_value(dtype, 1)),
    ]
    buffer_as_of[1] = make_expected_dtype([[1, 2, 2], [2, 2, 2], [2, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    adjustments[3] = [
        adjustment_type(1, 2, 1, 1, make_overwrite_value(dtype, 3)),
        adjustment_type(0, 1, 0, 0, make_overwrite_value(dtype, 4)),
    ]
    buffer_as_of[3] = make_expected_dtype([[4, 2, 2], [4, 3, 2], [2, 3, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    adjustments[4] = [
        adjustment_type(0, 3, 2, 2, make_overwrite_value(dtype, 5))
    ]
    buffer_as_of[4] = make_expected_dtype([[4, 2, 5], [4, 3, 5], [2, 3, 5],
                                           [2, 2, 5], [2, 2, 2], [2, 2, 2]])

    adjustments[5] = [
        adjustment_type(0, 4, 1, 1, make_overwrite_value(dtype, 6)),
        adjustment_type(2, 2, 2, 2, make_overwrite_value(dtype, 7)),
    ]
    buffer_as_of[5] = make_expected_dtype([[4, 6, 5], [4, 6, 5], [2, 6, 7],
                                           [2, 6, 5], [2, 6, 2], [2, 2, 2]])

    return _gen_expectations(
        baseline,
        missing_value,
        adjustments,
        buffer_as_of,
        nrows=6,
        perspective_offsets=(0, 1),
    )

Example #5

0

Show file

File: term.py Project: Aeroglyphic/catalyst-crypto

def validate_dtype(termname, dtype, missing_value):
    """
    Validate a `dtype` and `missing_value` passed to Term.__new__.

    Ensures that we know how to represent ``dtype``, and that missing_value
    is specified for types without default missing values.

    Returns
    -------
    validated_dtype, validated_missing_value : np.dtype, any
        The dtype and missing_value to use for the new term.

    Raises
    ------
    DTypeNotSpecified
        When no dtype was passed to the instance, and the class doesn't
        provide a default.
    NotDType
        When either the class or the instance provides a value not
        coercible to a numpy dtype.
    NoDefaultMissingValue
        When dtype requires an explicit missing_value, but
        ``missing_value`` is NotSpecified.
    """
    if dtype is NotSpecified:
        raise DTypeNotSpecified(termname=termname)

    try:
        dtype = dtype_class(dtype)
    except TypeError:
        raise NotDType(dtype=dtype, termname=termname)

    if not can_represent_dtype(dtype):
        raise UnsupportedDType(dtype=dtype, termname=termname)

    if missing_value is NotSpecified:
        missing_value = default_missing_value_for_dtype(dtype)

    try:
        if (dtype == categorical_dtype):
            # This check is necessary because we use object dtype for
            # categoricals, and numpy will allow us to promote numerical
            # values to object even though we don't support them.
            _assert_valid_categorical_missing_value(missing_value)

        # For any other type, we can check if the missing_value is safe by
        # making an array of that value and trying to safely convert it to
        # the desired type.
        # 'same_kind' allows casting between things like float32 and
        # float64, but not str and int.
        array([missing_value]).astype(dtype=dtype, casting='same_kind')
    except TypeError as e:
        raise TypeError("Missing value {value!r} is not a valid choice "
                        "for term {termname} with dtype {dtype}.\n\n"
                        "Coercion attempt failed with: {error}".format(
                            termname=termname,
                            value=missing_value,
                            dtype=dtype,
                            error=e,
                        ))

    return dtype, missing_value

Example #6

0

Show file

File: term.py Project: zhoukalex/catalyst

def validate_dtype(termname, dtype, missing_value):
    """
    Validate a `dtype` and `missing_value` passed to Term.__new__.

    Ensures that we know how to represent ``dtype``, and that missing_value
    is specified for types without default missing values.

    Returns
    -------
    validated_dtype, validated_missing_value : np.dtype, any
        The dtype and missing_value to use for the new term.

    Raises
    ------
    DTypeNotSpecified
        When no dtype was passed to the instance, and the class doesn't
        provide a default.
    NotDType
        When either the class or the instance provides a value not
        coercible to a numpy dtype.
    NoDefaultMissingValue
        When dtype requires an explicit missing_value, but
        ``missing_value`` is NotSpecified.
    """
    if dtype is NotSpecified:
        raise DTypeNotSpecified(termname=termname)

    try:
        dtype = dtype_class(dtype)
    except TypeError:
        raise NotDType(dtype=dtype, termname=termname)

    if not can_represent_dtype(dtype):
        raise UnsupportedDType(dtype=dtype, termname=termname)

    if missing_value is NotSpecified:
        missing_value = default_missing_value_for_dtype(dtype)

    try:
        if (dtype == categorical_dtype):
            # This check is necessary because we use object dtype for
            # categoricals, and numpy will allow us to promote numerical
            # values to object even though we don't support them.
            _assert_valid_categorical_missing_value(missing_value)

        # For any other type, we can check if the missing_value is safe by
        # making an array of that value and trying to safely convert it to
        # the desired type.
        # 'same_kind' allows casting between things like float32 and
        # float64, but not str and int.
        array([missing_value]).astype(dtype=dtype, casting='same_kind')
    except TypeError as e:
        raise TypeError(
            "Missing value {value!r} is not a valid choice "
            "for term {termname} with dtype {dtype}.\n\n"
            "Coercion attempt failed with: {error}".format(
                termname=termname,
                value=missing_value,
                dtype=dtype,
                error=e,
            )
        )

    return dtype, missing_value

Example #7

0

Show file

File: test_adjusted_array.py Project: zhoukalex/catalyst

def _gen_overwrite_1d_array_adjustment_case(dtype):
    """
    Generate test cases for overwrite adjustments.

    The algorithm used here is the same as the one used above for
    multiplicative adjustments.  The only difference is the semantics of how
    the adjustments are expected to modify the arrays.

    This is parameterized on `make_input` and `make_expected_output` functions,
    which take 1-D lists of values and transform them into desired input/output
    arrays. We do this so that we can easily test both vanilla numpy ndarrays
    and our own LabelArray class for strings.
    """
    adjustment_type = {
        float64_dtype: Float641DArrayOverwrite,
        datetime64ns_dtype: Datetime641DArrayOverwrite,
    }[dtype]
    make_expected_dtype = as_dtype(dtype)
    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)

    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = make_expected_dtype([[2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2]])

    buffer_as_of[0] = make_expected_dtype([[2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    vals1 = [1]
    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(
            0, 0, 0, 0,
            array([coerce_to_dtype(dtype, val) for val in vals1])
        )
    ]
    buffer_as_of[1] = make_expected_dtype([[1, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    vals3 = [4, 4, 1]
    adjustments[3] = [
        adjustment_type(
            0, 2, 0, 0,
            array([coerce_to_dtype(dtype, val) for val in vals3])
        )
    ]
    buffer_as_of[3] = make_expected_dtype([[4, 2, 2],
                                           [4, 2, 2],
                                           [1, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    vals4 = [5] * 4
    adjustments[4] = [
        adjustment_type(
            0, 3, 2, 2,
            array([coerce_to_dtype(dtype, val) for val in vals4]))
    ]
    buffer_as_of[4] = make_expected_dtype([[4, 2, 5],
                                           [4, 2, 5],
                                           [1, 2, 5],
                                           [2, 2, 5],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    vals5 = range(1, 6)
    adjustments[5] = [
        adjustment_type(
            0, 4, 1, 1,
            array([coerce_to_dtype(dtype, val) for val in vals5])),
    ]
    buffer_as_of[5] = make_expected_dtype([[4, 1, 5],
                                           [4, 2, 5],
                                           [1, 3, 5],
                                           [2, 4, 5],
                                           [2, 5, 2],
                                           [2, 2, 2]])
    return _gen_expectations(
        baseline,
        missing_value,
        adjustments,
        buffer_as_of,
        nrows=6,
        perspective_offsets=(0, 1),
    )

Example #8

0

Show file

File: test_adjusted_array.py Project: zhoukalex/catalyst

def _gen_overwrite_adjustment_cases(dtype):
    """
    Generate test cases for overwrite adjustments.

    The algorithm used here is the same as the one used above for
    multiplicative adjustments.  The only difference is the semantics of how
    the adjustments are expected to modify the arrays.

    This is parameterized on `make_input` and `make_expected_output` functions,
    which take 2-D lists of values and transform them into desired input/output
    arrays. We do this so that we can easily test both vanilla numpy ndarrays
    and our own LabelArray class for strings.
    """
    adjustment_type = {
        float64_dtype: Float64Overwrite,
        datetime64ns_dtype: Datetime64Overwrite,
        int64_dtype: Int64Overwrite,
        bytes_dtype: ObjectOverwrite,
        unicode_dtype: ObjectOverwrite,
        object_dtype: ObjectOverwrite,
    }[dtype]
    make_expected_dtype = as_dtype(dtype)
    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)

    if dtype == object_dtype:
        # When we're testing object dtypes, we expect to have strings, but
        # coerce_to_dtype(object, 3) just gives 3 as a Python integer.
        def make_overwrite_value(dtype, value):
            return str(value)
    else:
        make_overwrite_value = coerce_to_dtype

    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = make_expected_dtype([[2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2]])

    buffer_as_of[0] = make_expected_dtype([[2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0, make_overwrite_value(dtype, 1)),
    ]
    buffer_as_of[1] = make_expected_dtype([[1, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    adjustments[3] = [
        adjustment_type(1, 2, 1, 1, make_overwrite_value(dtype, 3)),
        adjustment_type(0, 1, 0, 0, make_overwrite_value(dtype, 4)),
    ]
    buffer_as_of[3] = make_expected_dtype([[4, 2, 2],
                                           [4, 3, 2],
                                           [2, 3, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    adjustments[4] = [
        adjustment_type(0, 3, 2, 2, make_overwrite_value(dtype, 5))
    ]
    buffer_as_of[4] = make_expected_dtype([[4, 2, 5],
                                           [4, 3, 5],
                                           [2, 3, 5],
                                           [2, 2, 5],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    adjustments[5] = [
        adjustment_type(0, 4, 1, 1, make_overwrite_value(dtype, 6)),
        adjustment_type(2, 2, 2, 2, make_overwrite_value(dtype, 7)),
    ]
    buffer_as_of[5] = make_expected_dtype([[4, 6, 5],
                                           [4, 6, 5],
                                           [2, 6, 7],
                                           [2, 6, 5],
                                           [2, 6, 2],
                                           [2, 2, 2]])

    return _gen_expectations(
        baseline,
        missing_value,
        adjustments,
        buffer_as_of,
        nrows=6,
        perspective_offsets=(0, 1),
    )

Example #9

0

Show file

File: test_adjusted_array.py Project: zhoukalex/catalyst

def _gen_multiplicative_adjustment_cases(dtype):
    """
    Generate expected moving windows on a buffer with adjustments.

    We proceed by constructing, at each row, the view of the array we expect in
    in all windows anchored on that row.

    In general, if we have an adjustment to be applied once we process the row
    at index N, should see that adjustment applied to the underlying buffer for
    any window containing the row at index N.

    We then build all legal windows over these buffers.
    """
    adjustment_type = {
        float64_dtype: Float64Multiply,
    }[dtype]

    nrows, ncols = 6, 3
    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = buffer_as_of[0] = full((nrows, ncols), 1, dtype=dtype)

    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0, coerce_to_dtype(dtype, 2)),
    ]
    buffer_as_of[1] = array([[2, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1]], dtype=dtype)

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    adjustments[3] = [
        adjustment_type(1, 2, 1, 1, coerce_to_dtype(dtype, 3)),
        adjustment_type(0, 1, 0, 0, coerce_to_dtype(dtype, 4)),
    ]
    buffer_as_of[3] = array([[8, 1, 1],
                             [4, 3, 1],
                             [1, 3, 1],
                             [1, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1]], dtype=dtype)

    adjustments[4] = [
        adjustment_type(0, 3, 2, 2, coerce_to_dtype(dtype, 5))
    ]
    buffer_as_of[4] = array([[8, 1, 5],
                             [4, 3, 5],
                             [1, 3, 5],
                             [1, 1, 5],
                             [1, 1, 1],
                             [1, 1, 1]], dtype=dtype)

    adjustments[5] = [
        adjustment_type(0, 4, 1, 1, coerce_to_dtype(dtype, 6)),
        adjustment_type(2, 2, 2, 2, coerce_to_dtype(dtype, 7)),
    ]
    buffer_as_of[5] = array([[8,  6,  5],
                             [4, 18,  5],
                             [1, 18, 35],
                             [1,  6,  5],
                             [1,  6,  1],
                             [1,  1,  1]], dtype=dtype)

    return _gen_expectations(
        baseline,
        default_missing_value_for_dtype(dtype),
        adjustments,
        buffer_as_of,
        nrows,
        perspective_offsets=(0, 1),
    )