def test_get_item_mask(mask):
    arg = np.array([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]])
    rarray = RaggedArray(arg, dtype='int16')
    mask = np.array(mask, dtype='bool')

    assert_ragged_arrays_equal(rarray[mask],
                               RaggedArray(arg[mask], dtype='int16'))
def test_construct_ragged_array():
    rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]],
                         dtype='int32')

    # Check flat array
    assert rarray.flat_array.dtype == 'int32'
    assert np.array_equal(
        rarray.flat_array,
        np.array([1, 2, 10, 20, 30, 11, 22, 33, 44], dtype='int32'))

    # Check start indices
    assert rarray.start_indices.dtype == 'uint8'
    assert np.array_equal(rarray.start_indices,
                          np.array([0, 2, 2, 5, 5], dtype='uint64'))

    # Check len
    assert len(rarray) == 5

    # Check isna
    assert rarray.isna().dtype == 'bool'
    assert np.array_equal(rarray.isna(), [False, True, False, True, False])

    # Check nbytes
    expected = (
        9 * np.int32().nbytes +  # flat_array
        5 * np.uint8().nbytes  # start_indices
    )
    assert rarray.nbytes == expected

    # Check dtype
    assert type(rarray.dtype) == RaggedDtype
Beispiel #3
0
def test_get_item_list(inds):
    arg = np.array([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]])
    rarray = RaggedArray(arg, dtype='int16')

    assert_ragged_arrays_equal(
        rarray[inds],
        RaggedArray(arg[inds], dtype='int16'))
def test_factorization():
    arg = np.array([[1, 2], [], [1, 2], None, [11, 22, 33, 44]])
    rarray = RaggedArray(arg, dtype='int16')
    labels, uniques = rarray.factorize()

    np.testing.assert_array_equal(labels, [0, -1, 0, -1, 1])
    assert_ragged_arrays_equal(
        uniques, RaggedArray([[1, 2], [11, 22, 33, 44]], dtype='int16'))
def test_concat_same_type():
    arg1 = [[1, 2], [], [10, 20], None, [11, 22, 33, 44]]
    rarray1 = RaggedArray(arg1, dtype='float32')

    arg2 = [[100, 200], None, [99, 100, 101]]
    rarray2 = RaggedArray(arg2, dtype='float32')

    arg3 = [None, [27, 28]]
    rarray3 = RaggedArray(arg3, dtype='float32')

    result = RaggedArray._concat_same_type([rarray1, rarray2, rarray3])
    expected = RaggedArray(arg1 + arg2 + arg3, dtype='float32')

    assert_ragged_arrays_equal(result, expected)
def test_take():
    #
    rarray = RaggedArray._from_sequence([[1, 2], [], [10, 20], None,
                                         [11, 22, 33, 44]])

    # allow_fill False
    result = rarray.take([0, 2, 1, -1, -2, 0], allow_fill=False)
    expected = RaggedArray([[1, 2], [10, 20], [], [11, 22, 33, 44], None,
                            [1, 2]])
    assert_ragged_arrays_equal(result, expected)

    # allow fill True
    result = rarray.take([0, 2, 1, -1, -1, 0], allow_fill=True)
    expected = RaggedArray([[1, 2], [10, 20], [], None, None, [1, 2]])
    assert_ragged_arrays_equal(result, expected)
def test_get_item_scalar():
    arg = [[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]]
    rarray = RaggedArray(arg, dtype='float16')

    # Forward
    for i, expected in enumerate(arg):
        result = rarray[i]
        if expected is None:
            expected = np.array([], dtype='float16')

        if isinstance(result, np.ndarray):
            assert result.dtype == 'float16'
        else:
            assert np.isnan(result)

        np.testing.assert_array_equal(result, expected)

    # Reversed
    for i, expected in enumerate(arg):
        result = rarray[i - 5]
        if expected is None:
            expected = np.array([], dtype='float16')

        if isinstance(result, np.ndarray):
            assert result.dtype == 'float16'
        else:
            assert np.isnan(result)
        np.testing.assert_array_equal(result, expected)
def data():
    """Length-100 array for this type.
        * data[0] and data[1] should both be non missing
        * data[0] and data[1] should not gbe equal
        """
    return RaggedArray([[0, 1], [1, 2, 3, 4], [], [-1, -2], []] * 20,
                       dtype='float64')
def data_for_grouping():
    """Data for factorization, grouping, and unique tests.
    Expected to be like [B, B, NA, NA, A, A, B, C]
    Where A < B < C and NA is missing
    """
    return RaggedArray([[1, 0], [1, 0], [], [], [0, 0], [0, 0], [1, 0], [2,
                                                                         0]])
def test_series_construction():
    arg = [[0, 1], [1.0, 2, 3.0, 4], None, [-1, -2]] * 2
    rs = pd.Series(arg, dtype='Ragged[int64]')
    ra = rs.array

    expected = RaggedArray(arg, dtype='int64')
    assert_ragged_arrays_equal(ra, expected)
def test_equality_validation(other):
    # Build RaggedArray
    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
    ra1 = RaggedArray(arg1, dtype='int32')

    # invalid scalar
    with pytest.raises(ValueError, match="Cannot check equality"):
        ra1 == other
def test_array_eq_ragged():
    # Build RaggedArray
    arg1 = [[1, 2], [], [1, 2], [3, 2, 1], [11, 22, 33, 44]]
    ra1 = RaggedArray(arg1, dtype='int32')

    # Build RaggedArray
    arg2 = [[1, 2], [2, 3, 4, 5], [1, 2], [11, 22, 33], [11]]
    ra2 = RaggedArray(arg2, dtype='int32')

    # Check equality
    result = ra1 == ra2
    expected = np.array([1, 0, 1, 0, 0], dtype='bool')
    np.testing.assert_array_equal(result, expected)

    # Check non-equality
    result_negated = ra1 != ra2
    expected_negated = ~expected
    np.testing.assert_array_equal(result_negated, expected_negated)
def test_get_item_slice():
    arg = [[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]]
    rarray = RaggedArray(arg, dtype='int16')

    # Slice everything
    assert_ragged_arrays_equal(rarray[:], rarray)

    # Slice all but the first
    assert_ragged_arrays_equal(rarray[1:], RaggedArray(arg[1:], dtype='int16'))

    # Slice all but the last
    assert_ragged_arrays_equal(rarray[:-1], RaggedArray(arg[:-1],
                                                        dtype='int16'))

    # Slice middle
    assert_ragged_arrays_equal(rarray[2:-1],
                               RaggedArray(arg[2:-1], dtype='int16'))

    # Empty slice
    assert_ragged_arrays_equal(rarray[2:1], RaggedArray(arg[2:1],
                                                        dtype='int16'))
def test_start_indices_dtype():
    # The start_indices dtype should be an unsiged int that is only as large
    # as needed to handle the length of the flat array

    # Empty
    rarray = RaggedArray([[]], dtype='int64')
    assert rarray.start_indices.dtype == np.dtype('uint8')
    np.testing.assert_array_equal(rarray.start_indices, [0])

    # Small
    rarray = RaggedArray([[23, 24]], dtype='int64')
    assert rarray.start_indices.dtype == np.dtype('uint8')
    np.testing.assert_array_equal(rarray.start_indices, [0])

    # Max uint8
    max_uint8 = np.iinfo('uint8').max
    rarray = RaggedArray([np.zeros(max_uint8), []], dtype='int64')
    assert rarray.start_indices.dtype == np.dtype('uint8')
    np.testing.assert_array_equal(rarray.start_indices, [0, max_uint8])

    # Min uint16
    rarray = RaggedArray([np.zeros(max_uint8 + 1), []], dtype='int64')
    assert rarray.start_indices.dtype == np.dtype('uint16')
    np.testing.assert_array_equal(rarray.start_indices, [0, max_uint8 + 1])

    # Max uint16
    max_uint16 = np.iinfo('uint16').max
    rarray = RaggedArray([np.zeros(max_uint16), []], dtype='int64')
    assert rarray.start_indices.dtype == np.dtype('uint16')
    np.testing.assert_array_equal(rarray.start_indices, [0, max_uint16])

    # Min uint32
    rarray = RaggedArray([np.zeros(max_uint16 + 1), []], dtype='int64')
    assert rarray.start_indices.dtype == np.dtype('uint32')
    np.testing.assert_array_equal(rarray.start_indices, [0, max_uint16 + 1])
def test_array_eq_scalar(scalar):
    # Build RaggedArray
    arg1 = [[1, 2], [], [1, 2], [1, 3], [11, 22, 33, 44]]
    ra = RaggedArray(arg1, dtype='int32')

    # Check equality
    result = ra == scalar
    expected = np.array([1, 0, 1, 0, 0], dtype='bool')
    np.testing.assert_array_equal(result, expected)

    # Check non-equality
    result_negated = ra != scalar
    expected_negated = ~expected
    np.testing.assert_array_equal(result_negated, expected_negated)
def test_array_eq_numpy2d():
    # Construct arrays
    ra = RaggedArray([[1, 2], [1], [1, 2], None, [33, 44]], dtype='int32')
    npa = np.array([[1, 2], [2, 3], [1, 2], [0, 1], [11, 22]], dtype='int32')

    # Check equality
    result = ra == npa
    expected = np.array([1, 0, 1, 0, 0], dtype='bool')
    np.testing.assert_array_equal(result, expected)

    # Check non-equality
    result_negated = ra != npa
    expected_negated = ~expected
    np.testing.assert_array_equal(result_negated, expected_negated)
def test_copy():
    # Create reference ragged array
    original = RaggedArray._from_sequence([[1, 2], [], [1, 2], None,
                                           [11, 22, 33, 44]])

    # Copy reference array
    copied = original.copy(deep=True)

    # Make sure arrays are equal
    assert_ragged_arrays_equal(original, copied)

    # Modify buffer in original
    original.flat_array[0] = 99
    assert original.flat_array[0] == 99

    # Make sure copy was not modified
    assert copied.flat_array[0] == 1
Beispiel #18
0
def test_validate_ragged_array_fastpath():
    start_indices = np.array([0, 2, 5, 6, 6, 11], dtype='uint16')
    flat_array = np.array(
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='float32')

    valid_dict = dict(start_indices=start_indices, flat_array=flat_array)

    # Valid args
    RaggedArray(valid_dict)

    # ## start_indices validation ##
    #
    # not ndarray
    with pytest.raises(ValueError) as ve:
        RaggedArray(dict(valid_dict, start_indices=25))
    ve.match('start_indices property of a RaggedArray')

    # not unsiged int
    with pytest.raises(ValueError) as ve:
        RaggedArray(dict(valid_dict,
                         start_indices=start_indices.astype('float32')))
    ve.match('start_indices property of a RaggedArray')

    # not 1d
    with pytest.raises(ValueError) as ve:
        RaggedArray(dict(valid_dict, start_indices=np.array([start_indices])))
    ve.match('start_indices property of a RaggedArray')

    # ## flat_array validation ##
    #
    # not ndarray
    with pytest.raises(ValueError) as ve:
        RaggedArray(dict(valid_dict, flat_array='foo'))
    ve.match('flat_array property of a RaggedArray')

    # not 1d
    with pytest.raises(ValueError) as ve:
        RaggedArray(dict(valid_dict, flat_array=np.array([flat_array])))
    ve.match('flat_array property of a RaggedArray')

    # ## start_indices out of bounds validation ##
    #
    bad_start_indices = start_indices.copy()
    bad_start_indices[-1] = 99
    with pytest.raises(ValueError) as ve:
        RaggedArray(dict(valid_dict, start_indices=bad_start_indices))
    ve.match('start_indices must be less than')
def test_array_eq_numpy1():
    # Build RaggedArray
    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]

    # Construct arrays
    ra = RaggedArray(arg1, dtype='int32')
    npa = np.array([[1, 2], [2], [1, 2], None, [10, 20, 30, 40]],
                   dtype='object')

    # Check equality
    result = ra == npa
    expected = np.array([1, 0, 1, 1, 0], dtype='bool')
    np.testing.assert_array_equal(result, expected)

    # Check non-equality
    result_negated = ra != npa
    expected_negated = ~expected
    np.testing.assert_array_equal(result_negated, expected_negated)
def test_construct_ragged_array_fastpath():

    start_indices = np.array([0, 2, 5, 6, 6, 11], dtype='uint16')
    flat_array = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='float32')

    rarray = RaggedArray(
        dict(start_indices=start_indices, flat_array=flat_array))

    # Check that arrays were accepted unchanged
    assert np.array_equal(rarray.start_indices, start_indices)
    assert np.array_equal(rarray.flat_array, flat_array)

    # Check interpretation as ragged array
    object_array = np.asarray(rarray)
    expected_lists = [[0, 1], [2, 3, 4], [5], [], [6, 7, 8, 9, 10], []]
    expected_array = np.array(
        [np.array(v, dtype='float32') for v in expected_lists], dtype='object')

    assert len(object_array) == len(expected_array)
    for a1, a2 in zip(object_array, expected_array):
        np.testing.assert_array_equal(a1, a2)
def test_isna():
    rarray = RaggedArray(
        [[], [1, 3], [10, 20, 30], None, [11, 22, 33, 44], []], dtype='int32')

    np.testing.assert_array_equal(
        rarray.isna(), np.array([True, False, False, True, False, True]))
def test_get_item_scalar_out_of_bounds(index):
    rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]])
    with pytest.raises(IndexError):
        rarray[index]
def test_construct_ragged_array_from_ragged_array():
    rarray = RaggedArray([[1, 2], [], [10, 20, 30], np.nan, [11, 22, 33, 44]],
                         dtype='int32')

    result = RaggedArray(rarray)
    assert_ragged_arrays_equal(result, rarray)
def data_for_sorting():
    """Length-3 array with a known sort order.
    This should be three items [B, C, A] with
    A < B < C
    """
    return RaggedArray([[1, 0], [2, 0], [0, 0]])
def data_missing_for_sorting():
    """Length-3 array with a known sort order.
    This should be three items [B, NA, A] with
    A < B and NA missing.
    """
    return RaggedArray([[1, 0], [], [0, 0]])
def data_missing():
    """Length-2 array with [NA, Valid]"""
    return RaggedArray([[], [-1, 0, 1]], dtype='int16')
def test_flat_array_type_inference(arg, expected):
    rarray = RaggedArray(arg)
    assert rarray.flat_array.dtype == np.dtype(expected)
def test_from_sequence():
    sequence = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
    rarray = RaggedArray._from_sequence(sequence)

    assert_ragged_arrays_equal(rarray, RaggedArray(sequence))
    result = ra1 == ra2
    expected = np.array([1, 0, 1, 0, 0], dtype='bool')
    np.testing.assert_array_equal(result, expected)

    # Check non-equality
    result_negated = ra1 != ra2
    expected_negated = ~expected
    np.testing.assert_array_equal(result_negated, expected_negated)


@pytest.mark.parametrize(
    'other',
    [
        'a string',  # Incompatible scalars
        32,
        RaggedArray([[0, 1], [2, 3, 4]]),  # RaggedArray of wrong length
        np.array([[0, 1], [2, 3, 4]], dtype='object'),  # 1D array wrong length
        np.array([[0, 1], [2, 3]], dtype='int32'),  # 2D array wrong row count
    ])
def test_equality_validation(other):
    # Build RaggedArray
    arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]]
    ra1 = RaggedArray(arg1, dtype='int32')

    # invalid scalar
    with pytest.raises(ValueError, match="Cannot check equality"):
        ra1 == other


# Pandas-provided extension array tests
# -------------------------------------
def test_pandas_array_construction():
    arg = [[0, 1], [1, 2, 3, 4], None, [-1, -2]] * 2
    ra = pd.array(arg, dtype='ragged[int64]')

    expected = RaggedArray(arg, dtype='int64')
    assert_ragged_arrays_equal(ra, expected)