コード例 #1
0
ファイル: utils.py プロジェクト: t330883522/zipline
def make_default_missing_values_for_expr(expr):
    """为表达式生成各字段的缺省默认值"""
    missing_values = {}
    for name, type_ in expr.dshape.measure.fields:
        # 可选项目,需要使用选项内部类型
        if isinstance(type_, Option):
            from_t = type_.ty
        else:
            from_t = type_

        if isinstance(from_t, Date):
            missing_values[name] = default_missing_value_for_dtype(
                datetime64ns_dtype)
        elif isinstance(from_t, DateTime):
            missing_values[name] = default_missing_value_for_dtype(
                datetime64ns_dtype)
        elif isinstance(from_t, String):
            missing_values[name] = 'unknown'
        elif from_t in boolean:
            missing_values[name] = False
        #elif from_t in integral:
        elif from_t is int64_dtype:
            missing_values[name] = -1
        else:
            missing_values[name] = np.nan
    return missing_values
コード例 #2
0
def _gen_expectations(baseline, adjustments, buffer_as_of, nrows):

    missing_value = default_missing_value_for_dtype(baseline.dtype)
    for windowlen in valid_window_lengths(nrows):

        num_legal_windows = num_windows_of_length_M_on_buffers_of_length_N(
            windowlen, nrows)

        yield (
            "dtype_%s_length_%d" % (baseline.dtype, windowlen),
            baseline,
            windowlen,
            adjustments,
            missing_value,
            [
                # This is a nasty expression...
                #
                # Reading from right to left: we want a slice of length
                # 'windowlen', starting at 'offset', from the buffer on which
                # we've applied all adjustments corresponding to the last row
                # of the data, which will be (offset + windowlen - 1).
                buffer_as_of[offset + windowlen - 1][offset:offset + windowlen]
                for offset in range(num_legal_windows)
            ],
        )
コード例 #3
0
def _gen_expectations(baseline, adjustments, buffer_as_of, nrows):

    missing_value = default_missing_value_for_dtype(baseline.dtype)
    for windowlen in valid_window_lengths(nrows):

        num_legal_windows = num_windows_of_length_M_on_buffers_of_length_N(
            windowlen, nrows
        )

        yield (
            "dtype_%s_length_%d" % (baseline.dtype, windowlen),
            baseline,
            windowlen,
            adjustments,
            missing_value,
            [
                # This is a nasty expression...
                #
                # Reading from right to left: we want a slice of length
                # 'windowlen', starting at 'offset', from the buffer on which
                # we've applied all adjustments corresponding to the last row
                # of the data, which will be (offset + windowlen - 1).
                buffer_as_of[offset + windowlen - 1][offset:offset + windowlen]
                for offset in range(num_legal_windows)
            ],
        )
コード例 #4
0
ファイル: term.py プロジェクト: Giruvegan/zipline
def validate_dtype(termname, dtype, missing_value):
    """
    Validate a `dtype` and `missing_value` passed to Term.__new__.

    Ensures that we know how to represent ``dtype``, and that missing_value
    is specified for types without default missing values.

    Returns
    -------
    validated_dtype, validated_missing_value : np.dtype, any
        The dtype and missing_value to use for the new term.

    Raises
    ------
    DTypeNotSpecified
        When no dtype was passed to the instance, and the class doesn't
        provide a default.
    NotDType
        When either the class or the instance provides a value not
        coercible to a numpy dtype.
    NoDefaultMissingValue
        When dtype requires an explicit missing_value, but
        ``missing_value`` is NotSpecified.
    """
    if dtype is NotSpecified:
        raise DTypeNotSpecified(termname=termname)

    try:
        dtype = dtype_class(dtype)
    except TypeError:
        raise NotDType(dtype=dtype, termname=termname)

    if not can_represent_dtype(dtype):
        raise UnsupportedDType(dtype=dtype, termname=termname)

    if missing_value is NotSpecified:
        missing_value = default_missing_value_for_dtype(dtype)

    try:
        if dtype == categorical_dtype:
            # This check is necessary because we use object dtype for
            # categoricals, and numpy will allow us to promote numerical
            # values to object even though we don't support them.
            _assert_valid_categorical_missing_value(missing_value)

        # For any other type, we can check if the missing_value is safe by
        # making an array of that value and trying to safely convert it to
        # the desired type.
        # 'same_kind' allows casting between things like float32 and
        # float64, but not str and int.
        array([missing_value]).astype(dtype=dtype, casting="same_kind")
    except TypeError as e:
        raise TypeError(
            "Missing value {value!r} is not a valid choice "
            "for term {termname} with dtype {dtype}.\n\n"
            "Coercion attempt failed with: {error}".format(termname=termname, value=missing_value, dtype=dtype, error=e)
        )

    return dtype, missing_value
コード例 #5
0
ファイル: term.py プロジェクト: zahid0/zipline-trader
def validate_dtype(termname, dtype, missing_value):
    """
    Validate a `dtype` and `missing_value` passed to Term.__new__.

    Ensures that we know how to represent ``dtype``, and that missing_value
    is specified for types without default missing values.

    Returns
    -------
    validated_dtype, validated_missing_value : np.dtype, any
        The dtype and missing_value to use for the new term.

    Raises
    ------
    DTypeNotSpecified
        When no dtype was passed to the instance, and the class doesn't
        provide a default.
    NotDType
        When either the class or the instance provides a value not
        coercible to a numpy dtype.
    NoDefaultMissingValue
        When dtype requires an explicit missing_value, but
        ``missing_value`` is NotSpecified.
    """
    if dtype is NotSpecified:
        raise DTypeNotSpecified(termname=termname)

    try:
        dtype = dtype_class(dtype)
    except TypeError:
        raise NotDType(dtype=dtype, termname=termname)

    if not can_represent_dtype(dtype):
        raise UnsupportedDType(dtype=dtype, termname=termname)

    if missing_value is NotSpecified:
        missing_value = default_missing_value_for_dtype(dtype)

    try:
        _coerce_to_dtype(missing_value, dtype)
    except TypeError as e:
        raise TypeError(
            "Missing value {value!r} is not a valid choice "
            "for term {termname} with dtype {dtype}.\n\n"
            "Coercion attempt failed with: {error}".format(
                termname=termname,
                value=missing_value,
                dtype=dtype,
                error=e,
            )
        )

    return dtype, missing_value
コード例 #6
0
ファイル: utils.py プロジェクト: t330883522/zipline
def make_default_missing_values_for_df(dtypes):
    """DataFrame对象各字段生成缺省默认值"""
    missing_values = {}
    # 此处name为字段名称
    for f_name, type_ in dtypes.items():
        name = type_.name
        if name.startswith('int'):
            missing_values[f_name] = 0
        elif name.startswith('object'):
            missing_values[f_name] = 'unknown'
        else:
            missing_values[f_name] = default_missing_value_for_dtype(type_)
    return missing_values
コード例 #7
0
def make_default_missing_values_for_expr(expr):
    """数据集输出时的字段缺省默认值"""
    missing_values = {}
    for name, type_ in expr.dshape.measure.fields:
        n_type = datashape_type_to_numpy(type_)
        if n_type is object_dtype:
            missing_values[name] = '未定义'
        elif n_type is bool_dtype:
            missing_values[name] = False
        elif n_type is int64_dtype:
            missing_values[name] = -1
        else:
            missing_values[name] = default_missing_value_for_dtype(n_type)
    return missing_values
コード例 #8
0
    def validate_dtype(termname, dtype, missing_value):
        """
        Validate a `dtype` and `missing_value` passed to Term.__new__.

        Ensures that we know how to represent ``dtype``, and that missing_value
        is specified for types without default missing values.

        Returns
        -------
        validated_dtype, validated_missing_value : np.dtype, any
            The dtype and missing_value to use for the new term.

        Raises
        ------
        DTypeNotSpecified
            When no dtype was passed to the instance, and the class doesn't
            provide a default.
        NotDType
            When either the class or the instance provides a value not
            coercible to a numpy dtype.
        NoDefaultMissingValue
            When dtype requires an explicit missing_value, but
            ``missing_value`` is NotSpecified.
        """
        if dtype is NotSpecified:
            raise DTypeNotSpecified(termname=termname)
        try:
            dtype = dtype_class(dtype)
        except TypeError:
            raise NotDType(dtype=dtype, termname=termname)

        if not can_represent_dtype(dtype):
            raise UnsupportedDType(dtype=dtype, termname=termname)

        if missing_value is NotSpecified:
            missing_value = default_missing_value_for_dtype(dtype)

        return dtype, missing_value
コード例 #9
0
ファイル: term.py プロジェクト: FrankVigilante/zipline
    def validate_dtype(termname, dtype, missing_value):
        """
        Validate a `dtype` and `missing_value` passed to Term.__new__.

        Ensures that we know how to represent ``dtype``, and that missing_value
        is specified for types without default missing values.

        Returns
        -------
        validated_dtype, validated_missing_value : np.dtype, any
            The dtype and missing_value to use for the new term.

        Raises
        ------
        DTypeNotSpecified
            When no dtype was passed to the instance, and the class doesn't
            provide a default.
        NotDType
            When either the class or the instance provides a value not
            coercible to a numpy dtype.
        NoDefaultMissingValue
            When dtype requires an explicit missing_value, but
            ``missing_value`` is NotSpecified.
        """
        if dtype is NotSpecified:
            raise DTypeNotSpecified(termname=termname)
        try:
            dtype = dtype_class(dtype)
        except TypeError:
            raise NotDType(dtype=dtype, termname=termname)

        if not can_represent_dtype(dtype):
            raise UnsupportedDType(dtype=dtype, termname=termname)

        if missing_value is NotSpecified:
            missing_value = default_missing_value_for_dtype(dtype)

        return dtype, missing_value
コード例 #10
0
def _gen_unadjusted_cases(dtype):

    nrows = 6
    ncols = 3
    data = arange(nrows * ncols).astype(dtype).reshape(nrows, ncols)
    missing_value = default_missing_value_for_dtype(dtype)

    for windowlen in valid_window_lengths(nrows):

        num_legal_windows = num_windows_of_length_M_on_buffers_of_length_N(
            windowlen, nrows)

        yield (
            "dtype_%s_length_%d" % (dtype, windowlen),
            data,
            windowlen,
            {},
            missing_value,
            [
                data[offset:offset + windowlen]
                for offset in range(num_legal_windows)
            ],
        )
コード例 #11
0
def _gen_unadjusted_cases(dtype):

    nrows = 6
    ncols = 3
    data = arange(nrows * ncols).astype(dtype).reshape(nrows, ncols)
    missing_value = default_missing_value_for_dtype(dtype)

    for windowlen in valid_window_lengths(nrows):

        num_legal_windows = num_windows_of_length_M_on_buffers_of_length_N(
            windowlen, nrows
        )

        yield (
            "dtype_%s_length_%d" % (dtype, windowlen),
            data,
            windowlen,
            {},
            missing_value,
            [
                data[offset:offset + windowlen]
                for offset in range(num_legal_windows)
            ],
        )
コード例 #12
0
def _gen_multiplicative_adjustment_cases(dtype):
    """
    Generate expected moving windows on a buffer with adjustments.

    We proceed by constructing, at each row, the view of the array we expect in
    in all windows anchored on that row.

    In general, if we have an adjustment to be applied once we process the row
    at index N, should see that adjustment applied to the underlying buffer for
    any window containing the row at index N.

    We then build all legal windows over these buffers.
    """
    adjustment_type = {
        float64_dtype: Float64Multiply,
    }[dtype]

    nrows, ncols = 6, 3
    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = buffer_as_of[0] = full((nrows, ncols), 1, dtype=dtype)

    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0, coerce_to_dtype(dtype, 2)),
    ]
    buffer_as_of[1] = array(
        [[2, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]],
        dtype=dtype)

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    adjustments[3] = [
        adjustment_type(1, 2, 1, 1, coerce_to_dtype(dtype, 3)),
        adjustment_type(0, 1, 0, 0, coerce_to_dtype(dtype, 4)),
    ]
    buffer_as_of[3] = array(
        [[8, 1, 1], [4, 3, 1], [1, 3, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1]],
        dtype=dtype)

    adjustments[4] = [adjustment_type(0, 3, 2, 2, coerce_to_dtype(dtype, 5))]
    buffer_as_of[4] = array(
        [[8, 1, 5], [4, 3, 5], [1, 3, 5], [1, 1, 5], [1, 1, 1], [1, 1, 1]],
        dtype=dtype)

    adjustments[5] = [
        adjustment_type(0, 4, 1, 1, coerce_to_dtype(dtype, 6)),
        adjustment_type(2, 2, 2, 2, coerce_to_dtype(dtype, 7)),
    ]
    buffer_as_of[5] = array(
        [[8, 6, 5], [4, 18, 5], [1, 18, 35], [1, 6, 5], [1, 6, 1], [1, 1, 1]],
        dtype=dtype)

    return _gen_expectations(
        baseline,
        default_missing_value_for_dtype(dtype),
        adjustments,
        buffer_as_of,
        nrows,
        perspective_offsets=(0, 1),
    )
コード例 #13
0
def _gen_overwrite_adjustment_cases(dtype):
    """
    Generate test cases for overwrite adjustments.

    The algorithm used here is the same as the one used above for
    multiplicative adjustments.  The only difference is the semantics of how
    the adjustments are expected to modify the arrays.

    This is parameterized on `make_input` and `make_expected_output` functions,
    which take 2-D lists of values and transform them into desired input/output
    arrays. We do this so that we can easily test both vanilla numpy ndarrays
    and our own LabelArray class for strings.
    """
    adjustment_type = {
        float64_dtype: Float64Overwrite,
        datetime64ns_dtype: Datetime64Overwrite,
        int64_dtype: Int64Overwrite,
        bytes_dtype: ObjectOverwrite,
        unicode_dtype: ObjectOverwrite,
        object_dtype: ObjectOverwrite,
        bool_dtype: BooleanOverwrite,
    }[dtype]
    make_expected_dtype = as_dtype(dtype)
    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)

    if dtype == object_dtype:
        # When we're testing object dtypes, we expect to have strings, but
        # coerce_to_dtype(object, 3) just gives 3 as a Python integer.
        def make_overwrite_value(dtype, value):
            return str(value)
    else:
        make_overwrite_value = coerce_to_dtype

    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = make_expected_dtype([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2],
                                    [2, 2, 2], [2, 2, 2]])

    buffer_as_of[0] = make_expected_dtype([[2, 2, 2], [2, 2, 2], [2, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0, make_overwrite_value(dtype, 1)),
    ]
    buffer_as_of[1] = make_expected_dtype([[1, 2, 2], [2, 2, 2], [2, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    adjustments[3] = [
        adjustment_type(1, 2, 1, 1, make_overwrite_value(dtype, 3)),
        adjustment_type(0, 1, 0, 0, make_overwrite_value(dtype, 4)),
    ]
    buffer_as_of[3] = make_expected_dtype([[4, 2, 2], [4, 3, 2], [2, 3, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    adjustments[4] = [
        adjustment_type(0, 3, 2, 2, make_overwrite_value(dtype, 5))
    ]
    buffer_as_of[4] = make_expected_dtype([[4, 2, 5], [4, 3, 5], [2, 3, 5],
                                           [2, 2, 5], [2, 2, 2], [2, 2, 2]])

    adjustments[5] = [
        adjustment_type(0, 4, 1, 1, make_overwrite_value(dtype, 6)),
        adjustment_type(2, 2, 2, 2, make_overwrite_value(dtype, 7)),
    ]
    buffer_as_of[5] = make_expected_dtype([[4, 6, 5], [4, 6, 5], [2, 6, 7],
                                           [2, 6, 5], [2, 6, 2], [2, 2, 2]])

    return _gen_expectations(
        baseline,
        missing_value,
        adjustments,
        buffer_as_of,
        nrows=6,
        perspective_offsets=(0, 1),
    )
コード例 #14
0
class AdjustedArrayTestCase(TestCase):

    @parameterized.expand(
        chain(
            _gen_unadjusted_cases(
                'float',
                make_input=as_dtype(float64_dtype),
                make_expected_output=as_dtype(float64_dtype),
                missing_value=default_missing_value_for_dtype(float64_dtype),
            ),
            _gen_unadjusted_cases(
                'datetime',
                make_input=as_dtype(datetime64ns_dtype),
                make_expected_output=as_dtype(datetime64ns_dtype),
                missing_value=default_missing_value_for_dtype(
                    datetime64ns_dtype
                ),
            ),
            # Test passing an array of strings to AdjustedArray.
            _gen_unadjusted_cases(
                'bytes_ndarray',
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_ndarray',
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_ndarray',
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value='',
            ),
            # Test passing a LabelArray directly to AdjustedArray.
            _gen_unadjusted_cases(
                'bytes_labelarray',
                make_input=as_labelarray(bytes_dtype, b''),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_labelarray',
                make_input=as_labelarray(unicode_dtype, None),
                make_expected_output=as_labelarray(unicode_dtype, None),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_labelarray',
                make_input=(
                    lambda a: LabelArray(a.astype(unicode).astype(object), u'')
                ),
                make_expected_output=as_labelarray(unicode_dtype, ''),
                missing_value='',
            ),
        )
    )
    def test_no_adjustments(self,
                            name,
                            data,
                            lookback,
                            adjustments,
                            missing_value,
                            perspective_offset,
                            expected_output):

        array = AdjustedArray(data, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            in_out = zip(array.traverse(lookback), expected_output)
            for yielded, expected_yield in in_out:
                check_arrays(yielded, expected_yield)

    @parameterized.expand(_gen_multiplicative_adjustment_cases(float64_dtype))
    def test_multiplicative_adjustments(self,
                                        name,
                                        data,
                                        lookback,
                                        adjustments,
                                        missing_value,
                                        perspective_offset,
                                        expected):

        array = AdjustedArray(data, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    @parameterized.expand(
        chain(
            _gen_overwrite_adjustment_cases(int64_dtype),
            _gen_overwrite_adjustment_cases(float64_dtype),
            _gen_overwrite_adjustment_cases(datetime64ns_dtype),
            _gen_overwrite_1d_array_adjustment_case(float64_dtype),
            _gen_overwrite_1d_array_adjustment_case(datetime64ns_dtype),
            # There are six cases here:
            # Using np.bytes/np.unicode/object arrays as inputs.
            # Passing np.bytes/np.unicode/object arrays to LabelArray,
            # and using those as input.
            #
            # The outputs should always be LabelArrays.
            _gen_unadjusted_cases(
                'bytes_ndarray',
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_ndarray',
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_ndarray',
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'bytes_labelarray',
                make_input=as_labelarray(bytes_dtype, b''),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_labelarray',
                make_input=as_labelarray(unicode_dtype, u''),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_labelarray',
                make_input=(
                    lambda a: LabelArray(
                        a.astype(unicode).astype(object),
                        None,
                    )
                ),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=None,
            ),
        )
    )
    def test_overwrite_adjustment_cases(self,
                                        name,
                                        baseline,
                                        lookback,
                                        adjustments,
                                        missing_value,
                                        perspective_offset,
                                        expected):
        array = AdjustedArray(baseline, adjustments, missing_value)

        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    def test_invalid_lookback(self):

        data = arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, {}, float('nan'))

        with self.assertRaises(WindowLengthTooLong):
            adj_array.traverse(7)

        with self.assertRaises(WindowLengthNotPositive):
            adj_array.traverse(0)

        with self.assertRaises(WindowLengthNotPositive):
            adj_array.traverse(-1)

    def test_array_views_arent_writable(self):

        data = arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, {}, float('nan'))

        for frame in adj_array.traverse(3):
            with self.assertRaises(ValueError):
                frame[0, 0] = 5.0

    def test_inspect(self):
        data = arange(15, dtype=float).reshape(5, 3)
        adj_array = AdjustedArray(
            data,
            {4: [Float64Multiply(2, 3, 0, 0, 4.0)]},
            float('nan'),
        )

        expected = dedent(
            """\
            Adjusted Array (float64):

            Data:
            array([[  0.,   1.,   2.],
                   [  3.,   4.,   5.],
                   [  6.,   7.,   8.],
                   [  9.,  10.,  11.],
                   [ 12.,  13.,  14.]])

            Adjustments:
            {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \
last_col=0, value=4.000000)]}
            """
        )
        got = adj_array.inspect()
        self.assertEqual(expected, got)
コード例 #15
0
def validate_dtype(termname, dtype, missing_value):
    """
    Validate a `dtype` and `missing_value` passed to Term.__new__.

    Ensures that we know how to represent ``dtype``, and that missing_value
    is specified for types without default missing values.

    Returns
    -------
    validated_dtype, validated_missing_value : np.dtype, any
        The dtype and missing_value to use for the new term.

    Raises
    ------
    DTypeNotSpecified
        When no dtype was passed to the instance, and the class doesn't
        provide a default.
    NotDType
        When either the class or the instance provides a value not
        coercible to a numpy dtype.
    NoDefaultMissingValue
        When dtype requires an explicit missing_value, but
        ``missing_value`` is NotSpecified.
    """
    if dtype is NotSpecified:
        raise DTypeNotSpecified(termname=termname)

    try:
        dtype = dtype_class(dtype)
    except TypeError:
        raise NotDType(dtype=dtype, termname=termname)

    if not can_represent_dtype(dtype):
        raise UnsupportedDType(dtype=dtype, termname=termname)

    if missing_value is NotSpecified:
        missing_value = default_missing_value_for_dtype(dtype)

    try:
        if (dtype == categorical_dtype):
            # This check is necessary because we use object dtype for
            # categoricals, and numpy will allow us to promote numerical
            # values to object even though we don't support them.
            _assert_valid_categorical_missing_value(missing_value)

        # For any other type, we can check if the missing_value is safe by
        # making an array of that value and trying to safely convert it to
        # the desired type.
        # 'same_kind' allows casting between things like float32 and
        # float64, but not str and int.
        array([missing_value]).astype(dtype=dtype, casting='same_kind')
    except TypeError as e:
        raise TypeError(
            "Missing value {value!r} is not a valid choice "
            "for term {termname} with dtype {dtype}.\n\n"
            "Coercion attempt failed with: {error}".format(
                termname=termname,
                value=missing_value,
                dtype=dtype,
                error=e,
            )
        )

    return dtype, missing_value
コード例 #16
0
def _gen_overwrite_1d_array_adjustment_case(dtype):
    """
    Generate test cases for overwrite adjustments.

    The algorithm used here is the same as the one used above for
    multiplicative adjustments.  The only difference is the semantics of how
    the adjustments are expected to modify the arrays.

    This is parameterized on `make_input` and `make_expected_output` functions,
    which take 1-D lists of values and transform them into desired input/output
    arrays. We do this so that we can easily test both vanilla numpy ndarrays
    and our own LabelArray class for strings.
    """
    adjustment_type = {
        float64_dtype: Float641DArrayOverwrite,
        datetime64ns_dtype: Datetime641DArrayOverwrite,
    }[dtype]
    make_expected_dtype = as_dtype(dtype)
    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)

    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = make_expected_dtype([[2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2]])

    buffer_as_of[0] = make_expected_dtype([[2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    vals1 = [1]
    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(
            0, 0, 0, 0,
            array([coerce_to_dtype(dtype, val) for val in vals1])
        )
    ]
    buffer_as_of[1] = make_expected_dtype([[1, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    vals3 = [4, 4, 1]
    adjustments[3] = [
        adjustment_type(
            0, 2, 0, 0,
            array([coerce_to_dtype(dtype, val) for val in vals3])
        )
    ]
    buffer_as_of[3] = make_expected_dtype([[4, 2, 2],
                                           [4, 2, 2],
                                           [1, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    vals4 = [5] * 4
    adjustments[4] = [
        adjustment_type(
            0, 3, 2, 2,
            array([coerce_to_dtype(dtype, val) for val in vals4]))
    ]
    buffer_as_of[4] = make_expected_dtype([[4, 2, 5],
                                           [4, 2, 5],
                                           [1, 2, 5],
                                           [2, 2, 5],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    vals5 = range(1, 6)
    adjustments[5] = [
        adjustment_type(
            0, 4, 1, 1,
            array([coerce_to_dtype(dtype, val) for val in vals5])),
    ]
    buffer_as_of[5] = make_expected_dtype([[4, 1, 5],
                                           [4, 2, 5],
                                           [1, 3, 5],
                                           [2, 4, 5],
                                           [2, 5, 2],
                                           [2, 2, 2]])
    return _gen_expectations(
        baseline,
        missing_value,
        adjustments,
        buffer_as_of,
        nrows=6,
        perspective_offsets=(0, 1),
    )
コード例 #17
0
def _gen_overwrite_adjustment_cases(dtype):
    """
    Generate test cases for overwrite adjustments.

    The algorithm used here is the same as the one used above for
    multiplicative adjustments.  The only difference is the semantics of how
    the adjustments are expected to modify the arrays.

    This is parameterized on `make_input` and `make_expected_output` functions,
    which take 2-D lists of values and transform them into desired input/output
    arrays. We do this so that we can easily test both vanilla numpy ndarrays
    and our own LabelArray class for strings.
    """
    adjustment_type = {
        float64_dtype: Float64Overwrite,
        datetime64ns_dtype: Datetime64Overwrite,
        int64_dtype: Int64Overwrite,
        bytes_dtype: ObjectOverwrite,
        unicode_dtype: ObjectOverwrite,
        object_dtype: ObjectOverwrite,
    }[dtype]
    make_expected_dtype = as_dtype(dtype)
    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)

    if dtype == object_dtype:
        # When we're testing object dtypes, we expect to have strings, but
        # coerce_to_dtype(object, 3) just gives 3 as a Python integer.
        def make_overwrite_value(dtype, value):
            return str(value)
    else:
        make_overwrite_value = coerce_to_dtype

    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = make_expected_dtype([[2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2],
                                    [2, 2, 2]])

    buffer_as_of[0] = make_expected_dtype([[2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0, make_overwrite_value(dtype, 1)),
    ]
    buffer_as_of[1] = make_expected_dtype([[1, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    adjustments[3] = [
        adjustment_type(1, 2, 1, 1, make_overwrite_value(dtype, 3)),
        adjustment_type(0, 1, 0, 0, make_overwrite_value(dtype, 4)),
    ]
    buffer_as_of[3] = make_expected_dtype([[4, 2, 2],
                                           [4, 3, 2],
                                           [2, 3, 2],
                                           [2, 2, 2],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    adjustments[4] = [
        adjustment_type(0, 3, 2, 2, make_overwrite_value(dtype, 5))
    ]
    buffer_as_of[4] = make_expected_dtype([[4, 2, 5],
                                           [4, 3, 5],
                                           [2, 3, 5],
                                           [2, 2, 5],
                                           [2, 2, 2],
                                           [2, 2, 2]])

    adjustments[5] = [
        adjustment_type(0, 4, 1, 1, make_overwrite_value(dtype, 6)),
        adjustment_type(2, 2, 2, 2, make_overwrite_value(dtype, 7)),
    ]
    buffer_as_of[5] = make_expected_dtype([[4, 6, 5],
                                           [4, 6, 5],
                                           [2, 6, 7],
                                           [2, 6, 5],
                                           [2, 6, 2],
                                           [2, 2, 2]])

    return _gen_expectations(
        baseline,
        missing_value,
        adjustments,
        buffer_as_of,
        nrows=6,
        perspective_offsets=(0, 1),
    )
コード例 #18
0
def _gen_multiplicative_adjustment_cases(dtype):
    """
    Generate expected moving windows on a buffer with adjustments.

    We proceed by constructing, at each row, the view of the array we expect in
    in all windows anchored on that row.

    In general, if we have an adjustment to be applied once we process the row
    at index N, should see that adjustment applied to the underlying buffer for
    any window containing the row at index N.

    We then build all legal windows over these buffers.
    """
    adjustment_type = {
        float64_dtype: Float64Multiply,
    }[dtype]

    nrows, ncols = 6, 3
    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = buffer_as_of[0] = full((nrows, ncols), 1, dtype=dtype)

    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0, coerce_to_dtype(dtype, 2)),
    ]
    buffer_as_of[1] = array([[2, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1]], dtype=dtype)

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    adjustments[3] = [
        adjustment_type(1, 2, 1, 1, coerce_to_dtype(dtype, 3)),
        adjustment_type(0, 1, 0, 0, coerce_to_dtype(dtype, 4)),
    ]
    buffer_as_of[3] = array([[8, 1, 1],
                             [4, 3, 1],
                             [1, 3, 1],
                             [1, 1, 1],
                             [1, 1, 1],
                             [1, 1, 1]], dtype=dtype)

    adjustments[4] = [
        adjustment_type(0, 3, 2, 2, coerce_to_dtype(dtype, 5))
    ]
    buffer_as_of[4] = array([[8, 1, 5],
                             [4, 3, 5],
                             [1, 3, 5],
                             [1, 1, 5],
                             [1, 1, 1],
                             [1, 1, 1]], dtype=dtype)

    adjustments[5] = [
        adjustment_type(0, 4, 1, 1, coerce_to_dtype(dtype, 6)),
        adjustment_type(2, 2, 2, 2, coerce_to_dtype(dtype, 7)),
    ]
    buffer_as_of[5] = array([[8,  6,  5],
                             [4, 18,  5],
                             [1, 18, 35],
                             [1,  6,  5],
                             [1,  6,  1],
                             [1,  1,  1]], dtype=dtype)

    return _gen_expectations(
        baseline,
        default_missing_value_for_dtype(dtype),
        adjustments,
        buffer_as_of,
        nrows,
        perspective_offsets=(0, 1),
    )
コード例 #19
0
class AdjustedArrayTestCase(TestCase):
    @parameterized.expand(
        chain(
            _gen_unadjusted_cases(
                'float',
                make_input=as_dtype(float64_dtype),
                make_expected_output=as_dtype(float64_dtype),
                missing_value=default_missing_value_for_dtype(float64_dtype),
            ),
            _gen_unadjusted_cases(
                'datetime',
                make_input=as_dtype(datetime64ns_dtype),
                make_expected_output=as_dtype(datetime64ns_dtype),
                missing_value=default_missing_value_for_dtype(
                    datetime64ns_dtype),
            ),
            # Test passing an array of strings to AdjustedArray.
            _gen_unadjusted_cases(
                'bytes_ndarray',
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_ndarray',
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_ndarray',
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value='',
            ),
            # Test passing a LabelArray directly to AdjustedArray.
            _gen_unadjusted_cases(
                'bytes_labelarray',
                make_input=as_labelarray(bytes_dtype, b''),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_labelarray',
                make_input=as_labelarray(unicode_dtype, None),
                make_expected_output=as_labelarray(unicode_dtype, None),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_labelarray',
                make_input=(lambda a: LabelArray(
                    a.astype(unicode).astype(object), u'')),
                make_expected_output=as_labelarray(unicode_dtype, ''),
                missing_value='',
            ),
        ))
    def test_no_adjustments(self, name, data, lookback, adjustments,
                            missing_value, perspective_offset,
                            expected_output):

        array = AdjustedArray(data, NOMASK, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            in_out = zip(array.traverse(lookback), expected_output)
            for yielded, expected_yield in in_out:
                check_arrays(yielded, expected_yield)

    @parameterized.expand(_gen_multiplicative_adjustment_cases(float64_dtype))
    def test_multiplicative_adjustments(self, name, data, lookback,
                                        adjustments, missing_value,
                                        perspective_offset, expected):

        array = AdjustedArray(data, NOMASK, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    @parameterized.expand(
        chain(
            _gen_overwrite_adjustment_cases(float64_dtype),
            _gen_overwrite_adjustment_cases(datetime64ns_dtype),
            _gen_overwrite_1d_array_adjustment_case(float64_dtype),
            _gen_overwrite_1d_array_adjustment_case(datetime64ns_dtype),
            # There are six cases here:
            # Using np.bytes/np.unicode/object arrays as inputs.
            # Passing np.bytes/np.unicode/object arrays to LabelArray,
            # and using those as input.
            #
            # The outputs should always be LabelArrays.
            _gen_unadjusted_cases(
                'bytes_ndarray',
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_ndarray',
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_ndarray',
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'bytes_labelarray',
                make_input=as_labelarray(bytes_dtype, b''),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_labelarray',
                make_input=as_labelarray(unicode_dtype, u''),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_labelarray',
                make_input=(lambda a: LabelArray(
                    a.astype(unicode).astype(object),
                    None,
                )),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=None,
            ),
        ))
    def test_overwrite_adjustment_cases(self, name, baseline, lookback,
                                        adjustments, missing_value,
                                        perspective_offset, expected):
        array = AdjustedArray(baseline, NOMASK, adjustments, missing_value)

        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    @parameter_space(
        __fail_fast=True,
        dtype=[
            float64_dtype,
            int64_dtype,
            datetime64ns_dtype,
        ],
        missing_value=[0, 10000],
        window_length=[2, 3],
    )
    def test_masking(self, dtype, missing_value, window_length):
        missing_value = coerce_to_dtype(dtype, missing_value)
        baseline_ints = arange(15).reshape(5, 3)
        baseline = baseline_ints.astype(dtype)
        mask = (baseline_ints % 2).astype(bool)
        masked_baseline = where(mask, baseline, missing_value)

        array = AdjustedArray(
            baseline,
            mask,
            adjustments={},
            missing_value=missing_value,
        )

        gen_expected = moving_window(masked_baseline, window_length)
        gen_actual = array.traverse(window_length)
        for expected, actual in zip(gen_expected, gen_actual):
            check_arrays(expected, actual)

    @parameter_space(
        __fail_fast=True,
        dtype=[bytes_dtype, unicode_dtype, object_dtype],
        missing_value=["0", "-1", ""],
        window_length=[2, 3],
    )
    def test_masking_with_strings(self, dtype, missing_value, window_length):
        missing_value = coerce_to_dtype(dtype, missing_value)
        baseline_ints = arange(15).reshape(5, 3)

        # Coerce to string first so that coercion to object gets us an array of
        # string objects.
        baseline = baseline_ints.astype(str).astype(dtype)
        mask = (baseline_ints % 2).astype(bool)

        masked_baseline = LabelArray(baseline, missing_value=missing_value)
        masked_baseline[~mask] = missing_value

        array = AdjustedArray(
            baseline,
            mask,
            adjustments={},
            missing_value=missing_value,
        )

        gen_expected = moving_window(masked_baseline, window_length)
        gen_actual = array.traverse(window_length=window_length)

        for expected, actual in zip(gen_expected, gen_actual):
            check_arrays(expected, actual)

    def test_invalid_lookback(self):

        data = arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, NOMASK, {}, float('nan'))

        with self.assertRaises(WindowLengthTooLong):
            adj_array.traverse(7)

        with self.assertRaises(WindowLengthNotPositive):
            adj_array.traverse(0)

        with self.assertRaises(WindowLengthNotPositive):
            adj_array.traverse(-1)

    def test_array_views_arent_writable(self):

        data = arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, NOMASK, {}, float('nan'))

        for frame in adj_array.traverse(3):
            with self.assertRaises(ValueError):
                frame[0, 0] = 5.0

    def test_bad_input(self):
        msg = "Mask shape \(2L?, 3L?\) != data shape \(5L?, 5L?\)"
        data = arange(25).reshape(5, 5)
        bad_mask = array([[0, 1, 1], [0, 0, 1]], dtype=bool)

        with self.assertRaisesRegexp(ValueError, msg):
            AdjustedArray(data, bad_mask, {}, missing_value=-1)

    def test_inspect(self):
        data = arange(15, dtype=float).reshape(5, 3)
        adj_array = AdjustedArray(
            data,
            NOMASK,
            {4: [Float64Multiply(2, 3, 0, 0, 4.0)]},
            float('nan'),
        )

        expected = dedent("""\
            Adjusted Array (float64):

            Data:
            array([[  0.,   1.,   2.],
                   [  3.,   4.,   5.],
                   [  6.,   7.,   8.],
                   [  9.,  10.,  11.],
                   [ 12.,  13.,  14.]])

            Adjustments:
            {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \
last_col=0, value=4.000000)]}
            """)
        got = adj_array.inspect()
        self.assertEqual(expected, got)
コード例 #20
0
def _gen_overwrite_1d_array_adjustment_case(dtype):
    """
    Generate test cases for overwrite adjustments.

    The algorithm used here is the same as the one used above for
    multiplicative adjustments.  The only difference is the semantics of how
    the adjustments are expected to modify the arrays.

    This is parameterized on `make_input` and `make_expected_output` functions,
    which take 1-D lists of values and transform them into desired input/output
    arrays. We do this so that we can easily test both vanilla numpy ndarrays
    and our own LabelArray class for strings.
    """
    adjustment_type = {
        bool_dtype: Boolean1DArrayOverwrite,
        float64_dtype: Float641DArrayOverwrite,
        datetime64ns_dtype: Datetime641DArrayOverwrite,
    }[dtype]
    make_expected_dtype = as_dtype(dtype)
    missing_value = default_missing_value_for_dtype(datetime64ns_dtype)

    adjustments = {}
    buffer_as_of = [None] * 6
    baseline = make_expected_dtype([[2, 2, 2], [2, 2, 2], [2, 2, 2], [2, 2, 2],
                                    [2, 2, 2], [2, 2, 2]])

    buffer_as_of[0] = make_expected_dtype([[2, 2, 2], [2, 2, 2], [2, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    vals1 = [1]
    # Note that row indices are inclusive!
    adjustments[1] = [
        adjustment_type(0, 0, 0, 0,
                        array([coerce_to_dtype(dtype, val) for val in vals1]))
    ]
    buffer_as_of[1] = make_expected_dtype([[1, 2, 2], [2, 2, 2], [2, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    # No adjustment at index 2.
    buffer_as_of[2] = buffer_as_of[1]

    vals3 = [4, 4, 1]
    adjustments[3] = [
        adjustment_type(0, 2, 0, 0,
                        array([coerce_to_dtype(dtype, val) for val in vals3]))
    ]
    buffer_as_of[3] = make_expected_dtype([[4, 2, 2], [4, 2, 2], [1, 2, 2],
                                           [2, 2, 2], [2, 2, 2], [2, 2, 2]])

    vals4 = [5] * 4
    adjustments[4] = [
        adjustment_type(0, 3, 2, 2,
                        array([coerce_to_dtype(dtype, val) for val in vals4]))
    ]
    buffer_as_of[4] = make_expected_dtype([[4, 2, 5], [4, 2, 5], [1, 2, 5],
                                           [2, 2, 5], [2, 2, 2], [2, 2, 2]])

    vals5 = range(1, 6)
    adjustments[5] = [
        adjustment_type(0, 4, 1, 1,
                        array([coerce_to_dtype(dtype, val) for val in vals5])),
    ]
    buffer_as_of[5] = make_expected_dtype([[4, 1, 5], [4, 2, 5], [1, 3, 5],
                                           [2, 4, 5], [2, 5, 2], [2, 2, 2]])
    return _gen_expectations(
        baseline,
        missing_value,
        adjustments,
        buffer_as_of,
        nrows=6,
        perspective_offsets=(0, 1),
    )
コード例 #21
0
class AdjustedArrayTestCase(TestCase):
    def test_traverse_invalidating(self):
        data = arange(5 * 3, dtype='f8').reshape(5, 3)
        original_data = data.copy()
        adjustments = {2: [Float64Multiply(0, 4, 0, 2, 2.0)]}
        adjusted_array = AdjustedArray(data, adjustments, float('nan'))

        for _ in adjusted_array.traverse(1, copy=False):
            pass

        assert_equal(data, original_data * 2)

        with self.assertRaises(ValueError) as e:
            adjusted_array.traverse(1)

        assert_equal(
            str(e.exception),
            'cannot traverse invalidated AdjustedArray',
        )

    def test_copy(self):
        data = arange(5 * 3, dtype='f8').reshape(5, 3)
        original_data = data.copy()
        adjustments = {2: [Float64Multiply(0, 4, 0, 2, 2.0)]}
        adjusted_array = AdjustedArray(data, adjustments, float('nan'))
        traverse_copy = adjusted_array.copy()
        clean_copy = adjusted_array.copy()

        a_it = adjusted_array.traverse(2, copy=False)
        b_it = traverse_copy.traverse(2, copy=False)
        for a, b in zip(a_it, b_it):
            assert_equal(a, b)

        with self.assertRaises(ValueError) as e:
            adjusted_array.copy()

        assert_equal(
            str(e.exception),
            'cannot copy invalidated AdjustedArray',
        )

        # the clean copy should have the original data even though the
        # original adjusted array has it's data mutated in place
        assert_equal(clean_copy.data, original_data)
        assert_equal(adjusted_array.data, original_data * 2)

    @parameterized.expand(
        chain(
            _gen_unadjusted_cases(
                'float',
                make_input=as_dtype(float64_dtype),
                make_expected_output=as_dtype(float64_dtype),
                missing_value=default_missing_value_for_dtype(float64_dtype),
            ),
            _gen_unadjusted_cases(
                'datetime',
                make_input=as_dtype(datetime64ns_dtype),
                make_expected_output=as_dtype(datetime64ns_dtype),
                missing_value=default_missing_value_for_dtype(
                    datetime64ns_dtype),
            ),
            # Test passing an array of strings to AdjustedArray.
            _gen_unadjusted_cases(
                'bytes_ndarray',
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_ndarray',
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_ndarray',
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value='',
            ),
            # Test passing a LabelArray directly to AdjustedArray.
            _gen_unadjusted_cases(
                'bytes_labelarray',
                make_input=as_labelarray(bytes_dtype, b''),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_labelarray',
                make_input=as_labelarray(unicode_dtype, None),
                make_expected_output=as_labelarray(unicode_dtype, None),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_labelarray',
                make_input=(lambda a: LabelArray(
                    a.astype(unicode).astype(object), u'')),
                make_expected_output=as_labelarray(unicode_dtype, ''),
                missing_value='',
            ),
        ))
    def test_no_adjustments(self, name, data, lookback, adjustments,
                            missing_value, perspective_offset,
                            expected_output):

        array = AdjustedArray(data, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            in_out = zip(array.traverse(lookback), expected_output)
            for yielded, expected_yield in in_out:
                check_arrays(yielded, expected_yield)

    @parameterized.expand(_gen_multiplicative_adjustment_cases(float64_dtype))
    def test_multiplicative_adjustments(self, name, data, lookback,
                                        adjustments, missing_value,
                                        perspective_offset, expected):

        array = AdjustedArray(data, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    @parameterized.expand(
        chain(
            _gen_overwrite_adjustment_cases(bool_dtype),
            _gen_overwrite_adjustment_cases(int64_dtype),
            _gen_overwrite_adjustment_cases(float64_dtype),
            _gen_overwrite_adjustment_cases(datetime64ns_dtype),
            _gen_overwrite_1d_array_adjustment_case(float64_dtype),
            _gen_overwrite_1d_array_adjustment_case(datetime64ns_dtype),
            _gen_overwrite_1d_array_adjustment_case(bool_dtype),
            # There are six cases here:
            # Using np.bytes/np.unicode/object arrays as inputs.
            # Passing np.bytes/np.unicode/object arrays to LabelArray,
            # and using those as input.
            #
            # The outputs should always be LabelArrays.
            _gen_unadjusted_cases(
                'bytes_ndarray',
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_ndarray',
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_ndarray',
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'bytes_labelarray',
                make_input=as_labelarray(bytes_dtype, b''),
                make_expected_output=as_labelarray(bytes_dtype, b''),
                missing_value=b'',
            ),
            _gen_unadjusted_cases(
                'unicode_labelarray',
                make_input=as_labelarray(unicode_dtype, u''),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=u'',
            ),
            _gen_unadjusted_cases(
                'object_labelarray',
                make_input=(lambda a: LabelArray(
                    a.astype(unicode).astype(object),
                    None,
                )),
                make_expected_output=as_labelarray(unicode_dtype, u''),
                missing_value=None,
            ),
        ))
    def test_overwrite_adjustment_cases(self, name, baseline, lookback,
                                        adjustments, missing_value,
                                        perspective_offset, expected):
        array = AdjustedArray(baseline, adjustments, missing_value)

        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    def test_object1darrayoverwrite(self):
        pairs = [u + l for u, l in product(ascii_uppercase, ascii_lowercase)]
        categories = pairs + ['~' + c for c in pairs]
        baseline = LabelArray(
            array([[''.join((r, c)) for c in 'abc'] for r in ascii_uppercase]),
            None,
            categories,
        )
        full_expected = baseline.copy()

        def flip(cs):
            if cs is None:
                return None
            if cs[0] != '~':
                return '~' + cs
            return cs

        def make_overwrite(fr, lr, fc, lc):
            fr, lr, fc, lc = map(ord, (fr, lr, fc, lc))
            fr -= ord('A')
            lr -= ord('A')
            fc -= ord('a')
            lc -= ord('a')

            return Object1DArrayOverwrite(
                fr,
                lr,
                fc,
                lc,
                baseline[fr:lr + 1, fc].map(flip),
            )

        overwrites = {
            3: [make_overwrite('A', 'B', 'a', 'a')],
            4: [make_overwrite('A', 'C', 'b', 'c')],
            5: [make_overwrite('D', 'D', 'a', 'b')],
        }

        it = AdjustedArray(baseline, overwrites, None).traverse(3)

        window = next(it)
        expected = full_expected[:3]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:2, 0] = LabelArray(['~Aa', '~Ba'], None)
        expected = full_expected[1:4]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:3, 1:3] = LabelArray(
            [['~Ab', '~Ac'], ['~Bb', '~Bc'], ['~Cb', '~Cb']], None)
        expected = full_expected[2:5]
        check_arrays(window, expected)

        window = next(it)
        full_expected[3, :2] = '~Da'
        expected = full_expected[3:6]
        check_arrays(window, expected)

    def test_invalid_lookback(self):

        data = arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, {}, float('nan'))

        with self.assertRaises(WindowLengthTooLong):
            adj_array.traverse(7)

        with self.assertRaises(WindowLengthNotPositive):
            adj_array.traverse(0)

        with self.assertRaises(WindowLengthNotPositive):
            adj_array.traverse(-1)

    def test_array_views_arent_writable(self):

        data = arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, {}, float('nan'))

        for frame in adj_array.traverse(3):
            with self.assertRaises(ValueError):
                frame[0, 0] = 5.0

    def test_inspect(self):
        data = arange(15, dtype=float).reshape(5, 3)
        adj_array = AdjustedArray(
            data,
            {4: [Float64Multiply(2, 3, 0, 0, 4.0)]},
            float('nan'),
        )

        expected = dedent("""\
            Adjusted Array (float64):

            Data:
            array([[  0.,   1.,   2.],
                   [  3.,   4.,   5.],
                   [  6.,   7.,   8.],
                   [  9.,  10.,  11.],
                   [ 12.,  13.,  14.]])

            Adjustments:
            {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \
last_col=0, value=4.000000)]}
            """)
        got = adj_array.inspect()
        self.assertEqual(expected, got)

    def test_update_labels(self):
        data = array([
            ['aaa', 'bbb', 'ccc'],
            ['ddd', 'eee', 'fff'],
            ['ggg', 'hhh', 'iii'],
            ['jjj', 'kkk', 'lll'],
            ['mmm', 'nnn', 'ooo'],
        ])
        label_array = LabelArray(data, missing_value='')

        adj_array = AdjustedArray(
            data=label_array,
            adjustments={4: [ObjectOverwrite(2, 3, 0, 0, 'ppp')]},
            missing_value='',
        )

        expected_data = array([
            ['aaa-foo', 'bbb-foo', 'ccc-foo'],
            ['ddd-foo', 'eee-foo', 'fff-foo'],
            ['ggg-foo', 'hhh-foo', 'iii-foo'],
            ['jjj-foo', 'kkk-foo', 'lll-foo'],
            ['mmm-foo', 'nnn-foo', 'ooo-foo'],
        ])
        expected_label_array = LabelArray(expected_data, missing_value='')

        expected_adj_array = AdjustedArray(
            data=expected_label_array,
            adjustments={4: [ObjectOverwrite(2, 3, 0, 0, 'ppp-foo')]},
            missing_value='',
        )

        adj_array.update_labels(lambda x: x + '-foo')

        # Check that the mapped AdjustedArray has the expected baseline
        # values and adjustment values.
        check_arrays(adj_array.data, expected_adj_array.data)
        self.assertEqual(adj_array.adjustments, expected_adj_array.adjustments)

    A = Float64Multiply(0, 4, 1, 1, 0.5)
    B = Float64Overwrite(3, 3, 4, 4, 4.2)
    C = Float64Multiply(0, 2, 0, 0, 0.14)
    D = Float64Overwrite(0, 3, 0, 0, 4.0)
    E = Float64Overwrite(0, 0, 1, 1, 3.7)
    F = Float64Multiply(0, 4, 3, 3, 10.0)
    G = Float64Overwrite(5, 5, 4, 4, 1.7)
    H = Float64Multiply(0, 4, 2, 2, 0.99)
    S = Float64Multiply(0, 1, 4, 4, 5.06)

    @parameterized.expand([(
        # Initial adjustments
        {
            1: [A, B],
            2: [C],
            4: [D],
        },

        # Adjustments to add
        {
            1: [E],
            2: [F, G],
            3: [H, S],
        },

        # Expected adjustments with 'append'
        {
            1: [A, B, E],
            2: [C, F, G],
            3: [H, S],
            4: [D],
        },

        # Expected adjustments with 'prepend'
        {
            1: [E, A, B],
            2: [F, G, C],
            3: [H, S],
            4: [D],
        },
    )])
    def test_update_adjustments(self, initial_adjustments, adjustments_to_add,
                                expected_adjustments_with_append,
                                expected_adjustments_with_prepend):
        methods = ['append', 'prepend']
        expected_outputs = [
            expected_adjustments_with_append, expected_adjustments_with_prepend
        ]

        for method, expected_output in zip(methods, expected_outputs):
            data = arange(30, dtype=float).reshape(6, 5)
            adjusted_array = AdjustedArray(data, initial_adjustments,
                                           float('nan'))

            adjusted_array.update_adjustments(adjustments_to_add, method)
            self.assertEqual(adjusted_array.adjustments, expected_output)
コード例 #22
0
class TestAdjustedArray:
    def test_traverse_invalidating(self):
        data = np.arange(5 * 3, dtype="f8").reshape(5, 3)
        original_data = data.copy()
        adjustments = {2: [Float64Multiply(0, 4, 0, 2, 2.0)]}
        adjusted_array = AdjustedArray(data, adjustments, float("nan"))

        for _ in adjusted_array.traverse(1, copy=False):
            pass

        assert_equal(data, original_data * 2)

        err_msg = "cannot traverse invalidated AdjustedArray"
        with pytest.raises(ValueError, match=err_msg):
            adjusted_array.traverse(1)

    def test_copy(self):
        data = np.arange(5 * 3, dtype="f8").reshape(5, 3)
        original_data = data.copy()
        adjustments = {2: [Float64Multiply(0, 4, 0, 2, 2.0)]}
        adjusted_array = AdjustedArray(data, adjustments, float("nan"))
        traverse_copy = adjusted_array.copy()
        clean_copy = adjusted_array.copy()

        a_it = adjusted_array.traverse(2, copy=False)
        b_it = traverse_copy.traverse(2, copy=False)
        for a, b in zip(a_it, b_it):
            assert_equal(a, b)

        err_msg = "cannot copy invalidated AdjustedArray"
        with pytest.raises(ValueError, match=err_msg):
            adjusted_array.copy()

        # the clean copy should have the original data even though the
        # original adjusted array has it's data mutated in place
        assert_equal(clean_copy.data, original_data)
        assert_equal(adjusted_array.data, original_data * 2)

    @pytest.mark.parametrize(
        """name, data, lookback, adjustments, missing_value,\
            perspective_offset, expected_output""",
        chain(
            _gen_unadjusted_cases(
                "float",
                make_input=as_dtype(float64_dtype),
                make_expected_output=as_dtype(float64_dtype),
                missing_value=default_missing_value_for_dtype(float64_dtype),
            ),
            _gen_unadjusted_cases(
                "datetime",
                make_input=as_dtype(datetime64ns_dtype),
                make_expected_output=as_dtype(datetime64ns_dtype),
                missing_value=default_missing_value_for_dtype(datetime64ns_dtype),
            ),
            # Test passing an array of strings to AdjustedArray.
            _gen_unadjusted_cases(
                "bytes_ndarray",
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b""),
                missing_value=b"",
            ),
            _gen_unadjusted_cases(
                "unicode_ndarray",
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, ""),
                missing_value="",
            ),
            _gen_unadjusted_cases(
                "object_ndarray",
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, ""),
                missing_value="",
            ),
            # Test passing a LabelArray directly to AdjustedArray.
            _gen_unadjusted_cases(
                "bytes_labelarray",
                make_input=as_labelarray(bytes_dtype, b""),
                make_expected_output=as_labelarray(bytes_dtype, b""),
                missing_value=b"",
            ),
            _gen_unadjusted_cases(
                "unicode_labelarray",
                make_input=as_labelarray(unicode_dtype, None),
                make_expected_output=as_labelarray(unicode_dtype, None),
                missing_value="",
            ),
            _gen_unadjusted_cases(
                "object_labelarray",
                make_input=(lambda a: LabelArray(a.astype(unicode).astype(object), "")),
                make_expected_output=as_labelarray(unicode_dtype, ""),
                missing_value="",
            ),
        ),
    )
    def test_no_adjustments(
        self,
        name,
        data,
        lookback,
        adjustments,
        missing_value,
        perspective_offset,
        expected_output,
    ):

        array = AdjustedArray(data, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            in_out = zip(array.traverse(lookback), expected_output)
            for yielded, expected_yield in in_out:
                check_arrays(yielded, expected_yield)

    @pytest.mark.parametrize(
        "name, data, lookback, adjustments, missing_value,\
        perspective_offset, expected",
        _gen_multiplicative_adjustment_cases(float64_dtype),
    )
    def test_multiplicative_adjustments(
        self,
        name,
        data,
        lookback,
        adjustments,
        missing_value,
        perspective_offset,
        expected,
    ):

        array = AdjustedArray(data, adjustments, missing_value)
        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    @pytest.mark.parametrize(
        "name, baseline, lookback, adjustments,\
        missing_value, perspective_offset, expected",
        chain(
            _gen_overwrite_adjustment_cases(bool_dtype),
            _gen_overwrite_adjustment_cases(int64_dtype),
            _gen_overwrite_adjustment_cases(float64_dtype),
            _gen_overwrite_adjustment_cases(datetime64ns_dtype),
            _gen_overwrite_1d_array_adjustment_case(float64_dtype),
            _gen_overwrite_1d_array_adjustment_case(datetime64ns_dtype),
            _gen_overwrite_1d_array_adjustment_case(bool_dtype),
            # There are six cases here:
            # Using np.bytes/np.unicode/object arrays as inputs.
            # Passing np.bytes/np.unicode/object arrays to LabelArray,
            # and using those as input.
            #
            # The outputs should always be LabelArrays.
            _gen_unadjusted_cases(
                "bytes_ndarray",
                make_input=as_dtype(bytes_dtype),
                make_expected_output=as_labelarray(bytes_dtype, b""),
                missing_value=b"",
            ),
            _gen_unadjusted_cases(
                "unicode_ndarray",
                make_input=as_dtype(unicode_dtype),
                make_expected_output=as_labelarray(unicode_dtype, ""),
                missing_value="",
            ),
            _gen_unadjusted_cases(
                "object_ndarray",
                make_input=lambda a: a.astype(unicode).astype(object),
                make_expected_output=as_labelarray(unicode_dtype, ""),
                missing_value="",
            ),
            _gen_unadjusted_cases(
                "bytes_labelarray",
                make_input=as_labelarray(bytes_dtype, b""),
                make_expected_output=as_labelarray(bytes_dtype, b""),
                missing_value=b"",
            ),
            _gen_unadjusted_cases(
                "unicode_labelarray",
                make_input=as_labelarray(unicode_dtype, ""),
                make_expected_output=as_labelarray(unicode_dtype, ""),
                missing_value="",
            ),
            _gen_unadjusted_cases(
                "object_labelarray",
                make_input=(
                    lambda a: LabelArray(
                        a.astype(unicode).astype(object),
                        None,
                    )
                ),
                make_expected_output=as_labelarray(unicode_dtype, ""),
                missing_value=None,
            ),
        ),
    )
    def test_overwrite_adjustment_cases(
        self,
        name,
        baseline,
        lookback,
        adjustments,
        missing_value,
        perspective_offset,
        expected,
    ):
        array = AdjustedArray(baseline, adjustments, missing_value)

        for _ in range(2):  # Iterate 2x ensure adjusted_arrays are re-usable.
            window_iter = array.traverse(
                lookback,
                perspective_offset=perspective_offset,
            )
            for yielded, expected_yield in zip_longest(window_iter, expected):
                check_arrays(yielded, expected_yield)

    def test_object1darrayoverwrite(self):
        pairs = [u + l for u, l in product(ascii_uppercase, ascii_lowercase)]
        categories = pairs + ["~" + c for c in pairs]
        baseline = LabelArray(
            np.array([["".join((r, c)) for c in "abc"] for r in ascii_uppercase]),
            None,
            categories,
        )
        full_expected = baseline.copy()

        def flip(cs):
            if cs is None:
                return None
            if cs[0] != "~":
                return "~" + cs
            return cs

        def make_overwrite(fr, lr, fc, lc):
            fr, lr, fc, lc = map(ord, (fr, lr, fc, lc))
            fr -= ord("A")
            lr -= ord("A")
            fc -= ord("a")
            lc -= ord("a")

            return Object1DArrayOverwrite(
                fr,
                lr,
                fc,
                lc,
                baseline[fr : lr + 1, fc].map(flip),
            )

        overwrites = {
            3: [make_overwrite("A", "B", "a", "a")],
            4: [make_overwrite("A", "C", "b", "c")],
            5: [make_overwrite("D", "D", "a", "b")],
        }

        it = AdjustedArray(baseline, overwrites, None).traverse(3)

        window = next(it)
        expected = full_expected[:3]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:2, 0] = LabelArray(["~Aa", "~Ba"], None)
        expected = full_expected[1:4]
        check_arrays(window, expected)

        window = next(it)
        full_expected[0:3, 1:3] = LabelArray(
            [["~Ab", "~Ac"], ["~Bb", "~Bc"], ["~Cb", "~Cb"]], None
        )
        expected = full_expected[2:5]
        check_arrays(window, expected)

        window = next(it)
        full_expected[3, :2] = "~Da"
        expected = full_expected[3:6]
        check_arrays(window, expected)

    def test_invalid_lookback(self):

        data = np.arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, {}, float("nan"))

        with pytest.raises(WindowLengthTooLong):
            adj_array.traverse(7)

        with pytest.raises(WindowLengthNotPositive):
            adj_array.traverse(0)

        with pytest.raises(WindowLengthNotPositive):
            adj_array.traverse(-1)

    def test_array_views_arent_writable(self):

        data = np.arange(30, dtype=float).reshape(6, 5)
        adj_array = AdjustedArray(data, {}, float("nan"))

        for frame in adj_array.traverse(3):
            with pytest.raises(ValueError):
                frame[0, 0] = 5.0

    def test_inspect(self):
        data = np.arange(15, dtype=float).reshape(5, 3)
        adj_array = AdjustedArray(
            data,
            {4: [Float64Multiply(2, 3, 0, 0, 4.0)]},
            float("nan"),
        )
        # TODO: CHECK WHY DO I NEED TO FIX THE INDENT IN THE EXPECTED?
        expected = dedent(
            """\
            Adjusted Array (float64):

            Data:
            array([[ 0.,  1.,  2.],
                   [ 3.,  4.,  5.],
                   [ 6.,  7.,  8.],
                   [ 9., 10., 11.],
                   [12., 13., 14.]])

            Adjustments:
            {4: [Float64Multiply(first_row=2, last_row=3, first_col=0, \
last_col=0, value=4.000000)]}
            """
        )
        got = adj_array.inspect()
        assert expected == got

    def test_update_labels(self):
        data = np.array(
            [
                ["aaa", "bbb", "ccc"],
                ["ddd", "eee", "fff"],
                ["ggg", "hhh", "iii"],
                ["jjj", "kkk", "lll"],
                ["mmm", "nnn", "ooo"],
            ]
        )
        label_array = LabelArray(data, missing_value="")

        adj_array = AdjustedArray(
            data=label_array,
            adjustments={4: [ObjectOverwrite(2, 3, 0, 0, "ppp")]},
            missing_value="",
        )

        expected_data = np.array(
            [
                ["aaa-foo", "bbb-foo", "ccc-foo"],
                ["ddd-foo", "eee-foo", "fff-foo"],
                ["ggg-foo", "hhh-foo", "iii-foo"],
                ["jjj-foo", "kkk-foo", "lll-foo"],
                ["mmm-foo", "nnn-foo", "ooo-foo"],
            ]
        )
        expected_label_array = LabelArray(expected_data, missing_value="")

        expected_adj_array = AdjustedArray(
            data=expected_label_array,
            adjustments={4: [ObjectOverwrite(2, 3, 0, 0, "ppp-foo")]},
            missing_value="",
        )

        adj_array.update_labels(lambda x: x + "-foo")

        # Check that the mapped AdjustedArray has the expected baseline
        # values and adjustment values.
        check_arrays(adj_array.data, expected_adj_array.data)
        assert adj_array.adjustments == expected_adj_array.adjustments

    A = Float64Multiply(0, 4, 1, 1, 0.5)
    B = Float64Overwrite(3, 3, 4, 4, 4.2)
    C = Float64Multiply(0, 2, 0, 0, 0.14)
    D = Float64Overwrite(0, 3, 0, 0, 4.0)
    E = Float64Overwrite(0, 0, 1, 1, 3.7)
    F = Float64Multiply(0, 4, 3, 3, 10.0)
    G = Float64Overwrite(5, 5, 4, 4, 1.7)
    H = Float64Multiply(0, 4, 2, 2, 0.99)
    S = Float64Multiply(0, 1, 4, 4, 5.06)

    @pytest.mark.parametrize(
        "initial_adjustments, adjustments_to_add,\
        expected_adjustments_with_append, expected_adjustments_with_prepend",
        [
            (
                # Initial adjustments
                {
                    1: [A, B],
                    2: [C],
                    4: [D],
                },
                # Adjustments to add
                {
                    1: [E],
                    2: [F, G],
                    3: [H, S],
                },
                # Expected adjustments with 'append'
                {
                    1: [A, B, E],
                    2: [C, F, G],
                    3: [H, S],
                    4: [D],
                },
                # Expected adjustments with 'prepend'
                {
                    1: [E, A, B],
                    2: [F, G, C],
                    3: [H, S],
                    4: [D],
                },
            )
        ],
    )
    def test_update_adjustments(
        self,
        initial_adjustments,
        adjustments_to_add,
        expected_adjustments_with_append,
        expected_adjustments_with_prepend,
    ):
        methods = ["append", "prepend"]
        expected_outputs = [
            expected_adjustments_with_append,
            expected_adjustments_with_prepend,
        ]

        for method, expected_output in zip(methods, expected_outputs):
            data = np.arange(30, dtype=float).reshape(6, 5)
            adjusted_array = AdjustedArray(data, initial_adjustments, float("nan"))

            adjusted_array.update_adjustments(adjustments_to_add, method)
            assert adjusted_array.adjustments == expected_output