def test_get_item_mask(mask): arg = np.array([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]]) rarray = RaggedArray(arg, dtype='int16') mask = np.array(mask, dtype='bool') assert_ragged_arrays_equal(rarray[mask], RaggedArray(arg[mask], dtype='int16'))
def test_construct_ragged_array(): rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]], dtype='int32') # Check flat array assert rarray.flat_array.dtype == 'int32' assert np.array_equal( rarray.flat_array, np.array([1, 2, 10, 20, 30, 11, 22, 33, 44], dtype='int32')) # Check start indices assert rarray.start_indices.dtype == 'uint8' assert np.array_equal(rarray.start_indices, np.array([0, 2, 2, 5, 5], dtype='uint64')) # Check len assert len(rarray) == 5 # Check isna assert rarray.isna().dtype == 'bool' assert np.array_equal(rarray.isna(), [False, True, False, True, False]) # Check nbytes expected = ( 9 * np.int32().nbytes + # flat_array 5 * np.uint8().nbytes # start_indices ) assert rarray.nbytes == expected # Check dtype assert type(rarray.dtype) == RaggedDtype
def test_get_item_list(inds): arg = np.array([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]]) rarray = RaggedArray(arg, dtype='int16') assert_ragged_arrays_equal( rarray[inds], RaggedArray(arg[inds], dtype='int16'))
def test_factorization(): arg = np.array([[1, 2], [], [1, 2], None, [11, 22, 33, 44]]) rarray = RaggedArray(arg, dtype='int16') labels, uniques = rarray.factorize() np.testing.assert_array_equal(labels, [0, -1, 0, -1, 1]) assert_ragged_arrays_equal( uniques, RaggedArray([[1, 2], [11, 22, 33, 44]], dtype='int16'))
def test_concat_same_type(): arg1 = [[1, 2], [], [10, 20], None, [11, 22, 33, 44]] rarray1 = RaggedArray(arg1, dtype='float32') arg2 = [[100, 200], None, [99, 100, 101]] rarray2 = RaggedArray(arg2, dtype='float32') arg3 = [None, [27, 28]] rarray3 = RaggedArray(arg3, dtype='float32') result = RaggedArray._concat_same_type([rarray1, rarray2, rarray3]) expected = RaggedArray(arg1 + arg2 + arg3, dtype='float32') assert_ragged_arrays_equal(result, expected)
def test_take(): # rarray = RaggedArray._from_sequence([[1, 2], [], [10, 20], None, [11, 22, 33, 44]]) # allow_fill False result = rarray.take([0, 2, 1, -1, -2, 0], allow_fill=False) expected = RaggedArray([[1, 2], [10, 20], [], [11, 22, 33, 44], None, [1, 2]]) assert_ragged_arrays_equal(result, expected) # allow fill True result = rarray.take([0, 2, 1, -1, -1, 0], allow_fill=True) expected = RaggedArray([[1, 2], [10, 20], [], None, None, [1, 2]]) assert_ragged_arrays_equal(result, expected)
def test_get_item_scalar(): arg = [[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]] rarray = RaggedArray(arg, dtype='float16') # Forward for i, expected in enumerate(arg): result = rarray[i] if expected is None: expected = np.array([], dtype='float16') if isinstance(result, np.ndarray): assert result.dtype == 'float16' else: assert np.isnan(result) np.testing.assert_array_equal(result, expected) # Reversed for i, expected in enumerate(arg): result = rarray[i - 5] if expected is None: expected = np.array([], dtype='float16') if isinstance(result, np.ndarray): assert result.dtype == 'float16' else: assert np.isnan(result) np.testing.assert_array_equal(result, expected)
def data(): """Length-100 array for this type. * data[0] and data[1] should both be non missing * data[0] and data[1] should not gbe equal """ return RaggedArray([[0, 1], [1, 2, 3, 4], [], [-1, -2], []] * 20, dtype='float64')
def data_for_grouping(): """Data for factorization, grouping, and unique tests. Expected to be like [B, B, NA, NA, A, A, B, C] Where A < B < C and NA is missing """ return RaggedArray([[1, 0], [1, 0], [], [], [0, 0], [0, 0], [1, 0], [2, 0]])
def test_series_construction(): arg = [[0, 1], [1.0, 2, 3.0, 4], None, [-1, -2]] * 2 rs = pd.Series(arg, dtype='Ragged[int64]') ra = rs.array expected = RaggedArray(arg, dtype='int64') assert_ragged_arrays_equal(ra, expected)
def test_equality_validation(other): # Build RaggedArray arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]] ra1 = RaggedArray(arg1, dtype='int32') # invalid scalar with pytest.raises(ValueError, match="Cannot check equality"): ra1 == other
def test_array_eq_ragged(): # Build RaggedArray arg1 = [[1, 2], [], [1, 2], [3, 2, 1], [11, 22, 33, 44]] ra1 = RaggedArray(arg1, dtype='int32') # Build RaggedArray arg2 = [[1, 2], [2, 3, 4, 5], [1, 2], [11, 22, 33], [11]] ra2 = RaggedArray(arg2, dtype='int32') # Check equality result = ra1 == ra2 expected = np.array([1, 0, 1, 0, 0], dtype='bool') np.testing.assert_array_equal(result, expected) # Check non-equality result_negated = ra1 != ra2 expected_negated = ~expected np.testing.assert_array_equal(result_negated, expected_negated)
def test_get_item_slice(): arg = [[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]] rarray = RaggedArray(arg, dtype='int16') # Slice everything assert_ragged_arrays_equal(rarray[:], rarray) # Slice all but the first assert_ragged_arrays_equal(rarray[1:], RaggedArray(arg[1:], dtype='int16')) # Slice all but the last assert_ragged_arrays_equal(rarray[:-1], RaggedArray(arg[:-1], dtype='int16')) # Slice middle assert_ragged_arrays_equal(rarray[2:-1], RaggedArray(arg[2:-1], dtype='int16')) # Empty slice assert_ragged_arrays_equal(rarray[2:1], RaggedArray(arg[2:1], dtype='int16'))
def test_start_indices_dtype(): # The start_indices dtype should be an unsiged int that is only as large # as needed to handle the length of the flat array # Empty rarray = RaggedArray([[]], dtype='int64') assert rarray.start_indices.dtype == np.dtype('uint8') np.testing.assert_array_equal(rarray.start_indices, [0]) # Small rarray = RaggedArray([[23, 24]], dtype='int64') assert rarray.start_indices.dtype == np.dtype('uint8') np.testing.assert_array_equal(rarray.start_indices, [0]) # Max uint8 max_uint8 = np.iinfo('uint8').max rarray = RaggedArray([np.zeros(max_uint8), []], dtype='int64') assert rarray.start_indices.dtype == np.dtype('uint8') np.testing.assert_array_equal(rarray.start_indices, [0, max_uint8]) # Min uint16 rarray = RaggedArray([np.zeros(max_uint8 + 1), []], dtype='int64') assert rarray.start_indices.dtype == np.dtype('uint16') np.testing.assert_array_equal(rarray.start_indices, [0, max_uint8 + 1]) # Max uint16 max_uint16 = np.iinfo('uint16').max rarray = RaggedArray([np.zeros(max_uint16), []], dtype='int64') assert rarray.start_indices.dtype == np.dtype('uint16') np.testing.assert_array_equal(rarray.start_indices, [0, max_uint16]) # Min uint32 rarray = RaggedArray([np.zeros(max_uint16 + 1), []], dtype='int64') assert rarray.start_indices.dtype == np.dtype('uint32') np.testing.assert_array_equal(rarray.start_indices, [0, max_uint16 + 1])
def test_array_eq_scalar(scalar): # Build RaggedArray arg1 = [[1, 2], [], [1, 2], [1, 3], [11, 22, 33, 44]] ra = RaggedArray(arg1, dtype='int32') # Check equality result = ra == scalar expected = np.array([1, 0, 1, 0, 0], dtype='bool') np.testing.assert_array_equal(result, expected) # Check non-equality result_negated = ra != scalar expected_negated = ~expected np.testing.assert_array_equal(result_negated, expected_negated)
def test_array_eq_numpy2d(): # Construct arrays ra = RaggedArray([[1, 2], [1], [1, 2], None, [33, 44]], dtype='int32') npa = np.array([[1, 2], [2, 3], [1, 2], [0, 1], [11, 22]], dtype='int32') # Check equality result = ra == npa expected = np.array([1, 0, 1, 0, 0], dtype='bool') np.testing.assert_array_equal(result, expected) # Check non-equality result_negated = ra != npa expected_negated = ~expected np.testing.assert_array_equal(result_negated, expected_negated)
def test_copy(): # Create reference ragged array original = RaggedArray._from_sequence([[1, 2], [], [1, 2], None, [11, 22, 33, 44]]) # Copy reference array copied = original.copy(deep=True) # Make sure arrays are equal assert_ragged_arrays_equal(original, copied) # Modify buffer in original original.flat_array[0] = 99 assert original.flat_array[0] == 99 # Make sure copy was not modified assert copied.flat_array[0] == 1
def test_validate_ragged_array_fastpath(): start_indices = np.array([0, 2, 5, 6, 6, 11], dtype='uint16') flat_array = np.array( [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='float32') valid_dict = dict(start_indices=start_indices, flat_array=flat_array) # Valid args RaggedArray(valid_dict) # ## start_indices validation ## # # not ndarray with pytest.raises(ValueError) as ve: RaggedArray(dict(valid_dict, start_indices=25)) ve.match('start_indices property of a RaggedArray') # not unsiged int with pytest.raises(ValueError) as ve: RaggedArray(dict(valid_dict, start_indices=start_indices.astype('float32'))) ve.match('start_indices property of a RaggedArray') # not 1d with pytest.raises(ValueError) as ve: RaggedArray(dict(valid_dict, start_indices=np.array([start_indices]))) ve.match('start_indices property of a RaggedArray') # ## flat_array validation ## # # not ndarray with pytest.raises(ValueError) as ve: RaggedArray(dict(valid_dict, flat_array='foo')) ve.match('flat_array property of a RaggedArray') # not 1d with pytest.raises(ValueError) as ve: RaggedArray(dict(valid_dict, flat_array=np.array([flat_array]))) ve.match('flat_array property of a RaggedArray') # ## start_indices out of bounds validation ## # bad_start_indices = start_indices.copy() bad_start_indices[-1] = 99 with pytest.raises(ValueError) as ve: RaggedArray(dict(valid_dict, start_indices=bad_start_indices)) ve.match('start_indices must be less than')
def test_array_eq_numpy1(): # Build RaggedArray arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]] # Construct arrays ra = RaggedArray(arg1, dtype='int32') npa = np.array([[1, 2], [2], [1, 2], None, [10, 20, 30, 40]], dtype='object') # Check equality result = ra == npa expected = np.array([1, 0, 1, 1, 0], dtype='bool') np.testing.assert_array_equal(result, expected) # Check non-equality result_negated = ra != npa expected_negated = ~expected np.testing.assert_array_equal(result_negated, expected_negated)
def test_construct_ragged_array_fastpath(): start_indices = np.array([0, 2, 5, 6, 6, 11], dtype='uint16') flat_array = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype='float32') rarray = RaggedArray( dict(start_indices=start_indices, flat_array=flat_array)) # Check that arrays were accepted unchanged assert np.array_equal(rarray.start_indices, start_indices) assert np.array_equal(rarray.flat_array, flat_array) # Check interpretation as ragged array object_array = np.asarray(rarray) expected_lists = [[0, 1], [2, 3, 4], [5], [], [6, 7, 8, 9, 10], []] expected_array = np.array( [np.array(v, dtype='float32') for v in expected_lists], dtype='object') assert len(object_array) == len(expected_array) for a1, a2 in zip(object_array, expected_array): np.testing.assert_array_equal(a1, a2)
def test_isna(): rarray = RaggedArray( [[], [1, 3], [10, 20, 30], None, [11, 22, 33, 44], []], dtype='int32') np.testing.assert_array_equal( rarray.isna(), np.array([True, False, False, True, False, True]))
def test_get_item_scalar_out_of_bounds(index): rarray = RaggedArray([[1, 2], [], [10, 20, 30], None, [11, 22, 33, 44]]) with pytest.raises(IndexError): rarray[index]
def test_construct_ragged_array_from_ragged_array(): rarray = RaggedArray([[1, 2], [], [10, 20, 30], np.nan, [11, 22, 33, 44]], dtype='int32') result = RaggedArray(rarray) assert_ragged_arrays_equal(result, rarray)
def data_for_sorting(): """Length-3 array with a known sort order. This should be three items [B, C, A] with A < B < C """ return RaggedArray([[1, 0], [2, 0], [0, 0]])
def data_missing_for_sorting(): """Length-3 array with a known sort order. This should be three items [B, NA, A] with A < B and NA missing. """ return RaggedArray([[1, 0], [], [0, 0]])
def data_missing(): """Length-2 array with [NA, Valid]""" return RaggedArray([[], [-1, 0, 1]], dtype='int16')
def test_flat_array_type_inference(arg, expected): rarray = RaggedArray(arg) assert rarray.flat_array.dtype == np.dtype(expected)
def test_from_sequence(): sequence = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]] rarray = RaggedArray._from_sequence(sequence) assert_ragged_arrays_equal(rarray, RaggedArray(sequence))
result = ra1 == ra2 expected = np.array([1, 0, 1, 0, 0], dtype='bool') np.testing.assert_array_equal(result, expected) # Check non-equality result_negated = ra1 != ra2 expected_negated = ~expected np.testing.assert_array_equal(result_negated, expected_negated) @pytest.mark.parametrize( 'other', [ 'a string', # Incompatible scalars 32, RaggedArray([[0, 1], [2, 3, 4]]), # RaggedArray of wrong length np.array([[0, 1], [2, 3, 4]], dtype='object'), # 1D array wrong length np.array([[0, 1], [2, 3]], dtype='int32'), # 2D array wrong row count ]) def test_equality_validation(other): # Build RaggedArray arg1 = [[1, 2], [], [1, 2], None, [11, 22, 33, 44]] ra1 = RaggedArray(arg1, dtype='int32') # invalid scalar with pytest.raises(ValueError, match="Cannot check equality"): ra1 == other # Pandas-provided extension array tests # -------------------------------------
def test_pandas_array_construction(): arg = [[0, 1], [1, 2, 3, 4], None, [-1, -2]] * 2 ra = pd.array(arg, dtype='ragged[int64]') expected = RaggedArray(arg, dtype='int64') assert_ragged_arrays_equal(ra, expected)