def test_infer_categories(self): """ Test that categories are inferred in sorted order if they're not explicitly passed. """ arr1d = LabelArray(self.strs, missing_value='') codes1d = arr1d.as_int_array() self.assertEqual(arr1d.shape, self.strs.shape) self.assertEqual(arr1d.shape, codes1d.shape) categories = arr1d.categories unique_rowvalues = set(self.rowvalues) # There should be an entry in categories for each unique row value, and # each integer stored in the data array should be an index into # categories. self.assertEqual(list(categories), sorted(set(self.rowvalues))) self.assertEqual( set(codes1d.ravel()), set(range(len(unique_rowvalues))) ) for idx, value in enumerate(arr1d.categories): check_arrays( self.strs == value, arr1d.as_int_array() == idx, ) # It should be equivalent to pass the same set of categories manually. arr1d_explicit_categories = LabelArray( self.strs, missing_value='', categories=arr1d.categories, ) check_arrays(arr1d, arr1d_explicit_categories) for shape in (9, 3), (3, 9), (3, 3, 3): strs2d = self.strs.reshape(shape) arr2d = LabelArray(strs2d, missing_value='') codes2d = arr2d.as_int_array() self.assertEqual(arr2d.shape, shape) check_arrays(arr2d.categories, categories) for idx, value in enumerate(arr2d.categories): check_arrays(strs2d == value, codes2d == idx)
def test_infer_categories(self): """ Test that categories are inferred in sorted order if they're not explicitly passed. """ arr1d = LabelArray(self.strs, missing_value='') codes1d = arr1d.as_int_array() self.assertEqual(arr1d.shape, self.strs.shape) self.assertEqual(arr1d.shape, codes1d.shape) categories = arr1d.categories unique_rowvalues = set(self.rowvalues) # There should be an entry in categories for each unique row value, and # each integer stored in the data array should be an index into # categories. self.assertEqual(list(categories), sorted(set(self.rowvalues))) self.assertEqual( set(codes1d.ravel()), set(range(len(unique_rowvalues))) ) for idx, value in enumerate(arr1d.categories): check_arrays( self.strs == value, arr1d.as_int_array() == idx, ) # It should be equivalent to pass the same set of categories manually. arr1d_explicit_categories = LabelArray( self.strs, missing_value='', categories=arr1d.categories, ) check_arrays(arr1d, arr1d_explicit_categories) for shape in (9, 3), (3, 9), (3, 3, 3): strs2d = self.strs.reshape(shape) arr2d = LabelArray(strs2d, missing_value='') codes2d = arr2d.as_int_array() self.assertEqual(arr2d.shape, shape) check_arrays(arr2d.categories, categories) for idx, value in enumerate(arr2d.categories): check_arrays(strs2d == value, codes2d == idx)
def test_string_not_equal(self, compval, missing, labelarray_dtype): compval = labelarray_dtype.type(compval) class C(Classifier): dtype = categorical_dtype missing_value = missing inputs = () window_length = 0 c = C() # There's no significance to the values here other than that they # contain a mix of the comparison value and other values. data = LabelArray( np.asarray( [ ["", "a", "ab", "ba"], ["z", "ab", "a", "ab"], ["aa", "ab", "", "ab"], ["aa", "a", "ba", "ba"], ], dtype=labelarray_dtype, ), missing_value=missing, ) expected = (data.as_int_array() != data.reverse_categories.get( compval, -1)) & (data.as_int_array() != data.reverse_categories[C.missing_value]) self.check_terms( terms={ "ne": c != compval, }, expected={ "ne": expected, }, initial_workspace={c: data}, mask=self.build_mask(self.ones_mask(shape=data.shape)), )
def test_string_not_equal(self, compval, missing, labelarray_dtype): compval = labelarray_dtype.type(compval) class C(Classifier): dtype = categorical_dtype missing_value = missing inputs = () window_length = 0 c = C() # There's no significance to the values here other than that they # contain a mix of the comparison value and other values. data = LabelArray( np.asarray( [['', 'a', 'ab', 'ba'], ['z', 'ab', 'a', 'ab'], ['aa', 'ab', '', 'ab'], ['aa', 'a', 'ba', 'ba']], dtype=labelarray_dtype, ), missing_value=missing, ) expected = ( (data.as_int_array() != data.reverse_categories.get(compval, -1)) & (data.as_int_array() != data.reverse_categories[C.missing_value]) ) self.check_terms( terms={ 'ne': c != compval, }, expected={ 'ne': expected, }, initial_workspace={c: data}, mask=self.build_mask(self.ones_mask(shape=data.shape)), )