Python Categorical Examples, riptable.Categorical Python Examples

Example #1

0

Show file

File: test_str.py Project: rtosholdings/riptable

 def test_cat_base_index_0(self):
     cat = rt.Categorical(np.tile([0, 1], 100), ['abc ', 'bcd'],
                          base_index=0)
     result = cat.str.removetrailing()
     expected = rt.Categorical(np.tile([0, 1], 100),
                               np.asarray(['abc', 'bcd']).astype('S4'),
                               base_index=0)
     assert_array_or_cat_equal(result, expected)

Example #2

0

Show file

    def test_fill_forward(self):
        """
        Test that Categorical.fill_forward fills values forward *per group*.
        """
        data = rt.FA([1.0, 4.0, 9.0, 16.0, np.nan, np.nan])
        cat = rt.Categorical(['A', 'B', 'A', 'B', 'A', 'B'])

        result = cat.fill_forward(data)

        # The result of this function should be a Dataset.
        assert isinstance(result, rt.Dataset)

        # The dataset should have the same number of rows as the data arrays
        # we operated on (an invariant of apply_nonreduce/"scan"/"prefix sum").
        assert result.shape[0] == len(data)

        # The dataset should have (N+M) columns, where N is the number
        # of keys within the Categorical and M is the number of columns
        # we performed the operation on.
        expected_col_count = len(cat.category_dict) + 1
        assert result.shape[1] == expected_col_count

        # Check the resulting data; the dtype of the data should be the
        # same as the original column.
        assert_array_equal(result[0], rt.FA([1.0, 4.0, 9.0, 16.0, 9.0, 16.0]))

Example #3

0

Show file

    def test_single_col_categoricals(self, func, func_name: str, data_dtype):
        values = [0, 1, 1, 2, 2, 2, 3, 3, 3, 4]
        bin_ids = ['a', 'b', 'c', 'd', 'e']
        #data = np.random.rand(10) + np.random.randint(0, 10, size=10)
        data = np.array([
            7.19200901, 0.14907245, 2.28258397, 5.07872708, 0.76125165,
            1.32797916, 3.40280423, 4.48942476, 6.98713656, 4.39541456
        ])

        data = rt.FastArray(data, dtype=data_dtype)

        map = {'vs': data, 'ks': values}

        pd_data = pd.DataFrame(map).groupby(by='ks')
        rt_data = rt.Categorical(values=values,
                                 categories=bin_ids,
                                 base_index=0)

        pd_out = self.groupby_func(pd_data, func, None)
        rt_out = self.groupby_func(rt_data, func, data)

        col_index = 'Count' if func_name == 'count' else 0

        assert_array_almost_equal(rt_out[col_index],
                                  pd_out['vs'].values,
                                  decimal=3)

Example #4

0

Show file

File: test_interop_pyarrow.py Project: rtosholdings/riptable

class TestPyarrowConvertDataset:
    @pytest.mark.parametrize(('rt_dset',), [
        pytest.param(rt.Dataset({}), id='empty'),
        pytest.param(rt.Dataset({
            'ink_capacity': rt.FA([15, 10, 15, 25, 10, 15, 25, 15]),
            'purchase_date': rt.Date(['2019-06-19', '2019-06-19', '2020-01-15', '2020-05-22', '2020-02-10', '2020-02-10', '2020-03-17', '2020-03-17']),
            'country_code': rt.Categorical(
                # Country codes -- adapted from TestCategorical.test_hstack_fails_for_different_mode_cats.
                [36, 36, 344, 840, 840, 124, 36, 484],
                {
                    'IRL': 372, 'USA': 840, 'AUS': 36, 'HKG': 344, 'JPN': 392,
                    'MEX': 484, 'KHM': 116, 'THA': 764, 'JAM': 388, 'ARM': 51
                }, ordered=True)
            })
        )
    ])
    def test_roundtrip_rt_pa_rt(self, rt_dset: rt.Dataset) -> None:
        """Test round-tripping from rt.Dataset to pyarrow.Table and back."""
        result_pa_tbl = rt_dset.to_arrow()
        result_rt_dset = rt.Dataset.from_arrow(result_pa_tbl, zero_copy_only=False)

        assert rt_dset.keys() == result_rt_dset.keys()
        for col_name in rt_dset.keys():
            # relaxed_cat_check=True, because we're not trying to test specific details of Categorical conversion
            # here, we're more interested in the dataset-level stuff.
            assert_array_or_cat_equal(rt_dset[col_name], result_rt_dset[col_name], relaxed_cat_check=True)

Example #5

0

Show file

    def test_unordered_categorical_disallowed(self):
        """Test which verifies rt.nanmin raises an exception if called with an unordered Categorical."""
        # Create an unordered Categorical.
        cat = rt.Categorical(["PA", "NY", "NY", "AL", "LA", "PA", "CA", "IL", "IL", "FL", "FL", "LA"], ordered=False)
        assert not cat.ordered

        with pytest.raises(ValueError):
            rt.nanmin(cat)

Example #6

0

Show file

File: test_categorical_autotest_aggregated_functions.py Project: rtosholdings/riptable

 def test_aggs_sum_symb_0_10_ncols_7(self):
     test_class = categorical_base(7, 0.10, "sum")
     cat = rt.Categorical(
         values=test_class.bin_ids,
         categories=test_class.keys,
         base_index=default_base_index,
     )
     cat = cat.sum(rt.Dataset(test_class.data))
     gb = pd.DataFrame(test_class.data)
     gb = gb.groupby(test_class.bin_ids).sum()
     for k, v in test_class.data.items():
         safe_assert(remove_nan(gb[k]), remove_nan(cat[k]))

Example #7

0

Show file

File: test_categorical_autotest_aggregated_functions.py Project: rtosholdings/riptable

 def test_aggs_mean_symb_0_40_ncols_6(self):
     test_class = categorical_base(6, 0.40, "mean")
     cat = rt.Categorical(
         values=test_class.bin_ids,
         categories=test_class.keys,
         base_index=default_base_index,
     )
     cat = cat.mean(rt.Dataset(test_class.data))
     gb = pd.DataFrame(test_class.data)
     gb = gb.groupby(test_class.bin_ids).mean()
     for k, v in test_class.data.items():
         safe_assert(remove_nan(gb[k]), remove_nan(cat[k]))

Example #8

0

Show file

File: test_categorical_autotest_aggregated_functions.py Project: rtosholdings/riptable

 def test_aggs_var_symb_0_25_ncols_5(self):
     test_class = categorical_base(5, 0.25, "var")
     cat = rt.Categorical(
         values=test_class.bin_ids,
         categories=test_class.keys,
         base_index=default_base_index,
     )
     cat = cat.var(rt.Dataset(test_class.data))
     gb = pd.DataFrame(test_class.data)
     gb = gb.groupby(test_class.bin_ids).var()
     for k, v in test_class.data.items():
         safe_assert(remove_nan(gb[k]), remove_nan(cat[k]))

Example #9

0

Show file

    def test_alignmk(self):
        ds1 = rt.Dataset()
        ds1['Time'] = [0, 1, 4, 6, 8, 9, 11, 16, 19, 30]
        ds1['Px'] = [10, 12, 15, 11, 10, 9, 13, 7, 9, 10]

        ds2 = rt.Dataset()
        ds2['Time'] = [0, 0, 5, 7, 8, 10, 12, 15, 17, 20]
        ds2['Vols'] = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]

        # Categorical keys
        ds1['Ticker'] = rt.Categorical(['Test'] * 10)
        ds2['Ticker'] = rt.Categorical(['Test', 'Blah'] * 5)
        res = alignmk(ds1.Ticker, ds2.Ticker, ds1.Time, ds2.Time)
        target = rt.FastArray([0, 0, 0, 2, 4, 4, 4, 6, 8, 8])
        assert_array_equal(res, target)

        # char array keys
        ds1['Ticker'] = rt.FastArray(['Test'] * 10)
        ds2['Ticker'] = rt.FastArray(['Test', 'Blah'] * 5)
        res = alignmk(ds1.Ticker, ds2.Ticker, ds1.Time, ds2.Time)
        target = rt.FastArray([0, 0, 0, 2, 4, 4, 4, 6, 8, 8])
        assert_array_equal(res, target)

Example #10

0

Show file

    def test_ordered_categorical_returns_scalar(self):
        """
        Test which verifies rt.nanmin returns a scalar (Python object or numpy scalar) representing the min Category given an ordered Categorical.
        """
        # Create an ordered Categorical (aka 'ordinal').
        cat = rt.Categorical(
            ["PA", "NY", "", "NY", "AL", "LA", "PA", "", "CA", "IL", "IL", "FL", "FL", "LA"], ordered=True)
        assert cat.ordered

        result = rt.nanmin(cat)

        # The result should either be a Python string, a numpy string scalar, or a Categorical scalar (if we implement one).
        is_py_str = isinstance(result, (bytes, str))
        is_np_scalar = isinstance(result, np.str)
        is_rt_cat = isinstance(result, rt.Categorical)
        assert is_py_str or is_np_scalar or is_rt_cat

        # Check the result is correct.
        assert result == "PA"

Example #11

0

Show file

    def test_groupby_categorical_sort(self):
        """
        Test that groupby on a categorical sorts the dataset correctly
        """
        ds = rt.Dataset()
        cats = ['z', 'y', 'x', 'w', 'a', 'b', 'c', 'd']
        vals = [0, 1, 2, 3, 4, 5, 6, 7]
        expected = dict(zip(cats, vals))

        ds["Cat"] = rt.Categorical([cats[xx % len(cats)] for xx in range(100)])

        # two identical columns
        ds["Value1"] = [vals[xx % len(cats)] for xx in range(100)]
        ds["Value2"] = [vals[xx % len(cats)] for xx in range(100)]

        grp = ds.groupby("Cat").mean()
        grp["Expected"] = [expected[xx] for xx in grp.Cat.astype('U')]

        diff = rt.sum(rt.abs(grp.Expected - grp.Value1))
        diff += rt.sum(rt.abs(grp.Expected - grp.Value2))

        assert diff <= 1e-9

Example #12

0

Show file

def get_all_categorical_data() -> List[rt.Categorical]:
    """Returns a list of all the Categorical test data of all supported CategoryModes."""
    return [
        rt.Categorical(data) for data in get_categorical_data_factory_method()
    ]

Example #13

0

Show file

    def test_accum_table(self):

        # Create the test data

        def unpivot(frame):
            N, K = frame.shape
            data = {
                'value': frame.values.ravel('F'),
                'variable': np.asarray(frame.columns).repeat(N),
                'date': np.tile(np.asarray(frame.index), K),
            }
            return pd.DataFrame(data, columns=['date', 'variable', 'value'])

        np.random.seed(1234)
        df = unpivot(
            pd.concat([tm.makeTimeDataFrame(),
                       tm.makeTimeDataFrame()]))
        ds = dataset_from_pandas_df(df)
        ds.date = DateTimeNano(ds.date, from_tz='NYC').to_iso()
        ds.date = rt.FastArray([d[:10] for d in ds.date])
        ds.variable = rt.Categorical(ds.variable)
        ds.date = rt.Categorical(ds.date)

        at = rt.AccumTable(ds.date, ds.variable)

        # Add and view inner tables with totals
        at['Sum'] = at.sum(ds.value)
        self.assertEqual(at['Sum'].shape, (3, 7))
        assert_array_almost_equal(at['Sum']['A'],
                                  np.array([0.47, -0.79, 1.72]),
                                  decimal=2)

        vw = at.gen('Sum')
        self.assertEqual(vw.shape, (3, 7))
        assert_array_almost_equal(vw['A'],
                                  np.array([0.47, -0.79, 1.72]),
                                  decimal=2)

        assert_array_almost_equal(vw['Sum'],
                                  np.array([-0.10, -5.02, 5.37]),
                                  decimal=2)
        assert_array_almost_equal(vw.footer_get_values(columns=['Sum'])['Sum'],
                                  np.array([0.25]),
                                  decimal=2)

        at['Mean'] = at.mean(ds.value)
        self.assertEqual(at['Mean'].shape, (3, 7))
        assert_array_almost_equal(at['Mean']['A'],
                                  np.array([0.24, -0.39, 0.86]),
                                  decimal=2)

        at['Half'] = at['Mean'] / at['Sum']
        self.assertEqual(at['Half'].shape, (3, 7))
        assert_array_almost_equal(at['Half']['A'],
                                  np.array([0.5, 0.5, 0.5]),
                                  decimal=2)

        # Add and view inner tables with blanks

        at['Blanks'] = at['Sum'].copy()
        at['Blanks']['C'] = 0.0
        for col in at['Blanks'][:, 1:]:
            at['Blanks'][col][2] = np.nan

        vw = at.gen('Blanks')
        self.assertEqual(vw.shape, (2, 9))
        assert_array_almost_equal(vw['A'], np.array([0.47, -0.79]), decimal=2)
        assert_array_almost_equal(vw['Blanks'],
                                  np.array([-0.10, -5.02]),
                                  decimal=2)
        self.assertAlmostEqual(vw.footer_get_dict()['Blanks']['Blanks'],
                               0.245,
                               places=2)

        vw = at.gen('Blanks', remove_blanks=False)
        self.assertEqual(vw.shape, (3, 10))
        assert_array_almost_equal(vw['A'],
                                  np.array([0.47, -0.79, np.nan]),
                                  decimal=2)
        assert_array_almost_equal(vw['Blanks'],
                                  np.array([-0.10, -5.02, np.nan]),
                                  decimal=2)

        # Test division with zeros and nans
        at['Bad'] = at['Blanks'] / at['Half']
        self.assertEqual(at['Blanks'].shape, (3, 7))
        vw = at.gen('Bad')
        self.assertEqual(vw.shape, (2, 10))
        vw = at.gen('Blanks')
        self.assertEqual(vw.shape, (2, 10))
        vw = at.gen('Half')
        self.assertEqual(vw.shape, (3, 11))

        # Set margin columns to the right

        at.set_margin_columns(['Blanks', 'Mean'])
        vw = at.gen('Half')
        self.assertEqual(vw.shape, (3, 9))
        self.assertEqual(vw.keys()[6], 'Half')
        self.assertEqual(vw.keys()[7], 'Blanks')
        self.assertEqual(vw.keys()[8], 'Mean')
        self.assertEqual(list(vw.footer_get_dict().keys()),
                         ['Half', 'Sum', 'Mean', 'Blanks', 'Bad'])

        vw = at.gen()
        self.assertEqual(vw.keys()[6], 'Half')

        vw = at.gen('Sum')
        self.assertEqual(vw.keys()[6], 'Sum')
        self.assertEqual(vw.keys()[7], 'Blanks')
        self.assertEqual(vw.keys()[8], 'Mean')
        self.assertEqual(list(vw.footer_get_dict().keys()),
                         ['Sum', 'Mean', 'Half', 'Blanks', 'Bad'])

        # Set footer rows at the bottom

        at.set_footer_rows(['Mean'])
        vw = at.gen('Half')
        self.assertEqual(vw.shape, (3, 9))
        self.assertEqual(vw.keys()[6], 'Half')
        self.assertEqual(vw.keys()[7], 'Blanks')
        self.assertEqual(vw.keys()[8], 'Mean')
        self.assertEqual(list(vw.footer_get_dict().keys()), ['Half', 'Mean'])

        vw = at.gen('Sum')
        self.assertEqual(vw.keys()[6], 'Sum')
        self.assertEqual(vw.keys()[7], 'Blanks')
        self.assertEqual(vw.keys()[8], 'Mean')
        self.assertEqual(list(vw.footer_get_dict().keys()), ['Sum', 'Mean'])

        # Access view Dataset elements

        vw = at.gen('Sum')
        assert_array_equal(
            vw.date, rt.FastArray(['2000-01-03', '2000-01-04', '2000-01-05']))
        assert_array_almost_equal(vw['Sum'],
                                  np.array([-0.10, -5.02, 5.37]),
                                  decimal=2)
        assert_almost_equal(vw[vw.date == '2000-01-03', 'A'][0],
                            0.47355353,
                            decimal=2)
        assert_almost_equal(
            list(vw.footer_get_values('Sum', columns=['A']).values())[0],
            1.409830,
            decimal=2,
        )

Example #14

0

Show file

File: test_interop_pyarrow.py Project: rtosholdings/riptable

class TestPyarrowConvertCategorical:
    @pytest.mark.parametrize(('rt_cat',), [
        # TODO: Add test cases for CategoryMode.IntEnum; at present, it appears IntEnum support is broken, can't seem to create a Categorical in that mode.
        # pytest.param(rt.Categorical([]), id='empty', marks=pytest.mark.skip(reason="rt.Categorical does not support creation from an empty list/array.")),
        pytest.param(rt.Categorical(['red', 'red', 'green', 'blue', 'green', 'red', 'blue'], ordered=False), id='CategoryMode.StringArray'),
        pytest.param(rt.Categorical(['red', 'red', 'green', 'blue', 'green', 'red', 'blue'], ordered=True), id='CategoryMode.StringArray--ordered'),
        pytest.param(rt.Categorical(['red', 'red', 'green', 'blue', 'green', 'red', 'blue'], dtype=np.int8, ordered=False), id='CategoryMode.StringArray;int8;ordered=False'),
        pytest.param(rt.Categorical(['red', 'red', 'green', 'blue', 'green', 'red', 'blue'], dtype=np.int8, ordered=True), id='CategoryMode.StringArray;int8;ordered=True'),
        pytest.param(rt.Categorical([f"x{i}" for i in range(0, 127)], dtype=np.int8), id="max number of categories for a signed int backing array without causing overflow"),
        # N.B. The test cases below for Categorical[Date] require pyarrow 5.0.0 or higher; dictionary-encoded date32() arrays didn't work before then.
        pytest.param(
            rt.Categorical(
                rt.Date(['2019-06-19', '2019-06-19', '2020-01-15', '2020-05-22', '2020-02-10', '2020-02-10', '2020-03-17', '2020-03-17']),
                ordered=False),
            id="Categorical[Date];ordered=False"),
        pytest.param(
            rt.Categorical(
                rt.Date(['2019-06-19', '2019-06-19', '2020-01-15', '2020-05-22', '2020-02-10', '2020-02-10', '2020-03-17', '2020-03-17']),
                ordered=True),
            id="Categorical[Date];ordered=True"
        ),
        pytest.param(rt.Categorical(
            # Country codes -- adapted from TestCategorical.test_hstack_fails_for_different_mode_cats.
            [36, 36, 344, 840, 840, 372, 840, 372, 840, 124, 840, 124, 36, 484],
            {
                'IRL': 372, 'USA': 840, 'AUS': 36, 'HKG': 344, 'JPN': 392,
                'MEX': 484, 'KHM': 116, 'THA': 764, 'JAM': 388, 'ARM': 51
            },
            ordered=False
        ), id="CategoryMode.Dictionary;ordered=False;Unicode"),
        pytest.param(rt.Categorical(
            # Country codes -- adapted from TestCategorical.test_hstack_fails_for_different_mode_cats.
            [36, 36, 344, 840, 840, 372, 840, 372, 840, 124, 840, 124, 36, 484],
            {
                'IRL': 372, 'USA': 840, 'AUS': 36, 'HKG': 344, 'JPN': 392,
                'MEX': 484, 'KHM': 116, 'THA': 764, 'JAM': 388, 'ARM': 51
            },
            ordered=True
        ), id="CategoryMode.Dictionary;ordered=True;Unicode"),
        pytest.param(rt.Categorical(
            # Country codes -- adapted from TestCategorical.test_hstack_fails_for_different_mode_cats.
            [36, 36, 344, 840, 840, 372, 840, 372, 840, 124, 840, 124, 36, 484],
            {
                b'IRL': 372, b'USA': 840, b'AUS': 36, b'HKG': 344, b'JPN': 392,
                b'MEX': 484, b'KHM': 116, b'THA': 764, b'JAM': 388, b'ARM': 51
            },
            ordered=False
        ), id="CategoryMode.Dictionary;ordered=False;ASCII"),
        pytest.param(rt.Categorical(
            # Country codes -- adapted from TestCategorical.test_hstack_fails_for_different_mode_cats.
            [36, 36, 344, 840, 840, 372, 840, 372, 840, 124, 840, 124, 36, 484],
            {
                b'IRL': 372, b'USA': 840, b'AUS': 36, b'HKG': 344, b'JPN': 392,
                b'MEX': 484, b'KHM': 116, b'THA': 764, b'JAM': 388, b'ARM': 51
            },
            ordered=True
        ), id="CategoryMode.Dictionary;ordered=True;ASCII"),
        pytest.param(rt.Categorical(
            [
                rt.FastArray(['Cyan', 'Magenta', 'Yellow', 'Black', 'Magenta', 'Cyan', 'Black', 'Yellow']).set_name('InkColor'),
                rt.Date(['2019-06-19', '2019-06-19', '2020-01-15', '2020-05-22', '2020-02-10', '2020-02-10', '2020-03-17', '2020-03-17']).set_name('CartridgeInstallDate')
            ]
        ), id="CategoryMode.MultiKey")
    ])
    @pytest.mark.parametrize('output_writable', [False, True])
    @pytest.mark.parametrize('have_nulls', [False, True])
    def test_roundtrip_rt_pa_rt(self, rt_cat: rt.Categorical, output_writable: bool, have_nulls: bool) -> None:
        """Test round-tripping from rt.Categorical to pyarrow.Array/pyarrow.Table and back."""
        orig_cat_shape = rt_cat.shape
        if have_nulls:
            # riptable's filtering/masking uses a valid mask (where False means null/NA).
            indices = np.arange(len(rt_cat))
            valid_mask = indices % 3 != 1
            rt_cat = rt_cat.filter(valid_mask)
            assert rt_cat.shape == orig_cat_shape

            # isfiltered() doesn't work as expected for Dictionary/IntEnum-mode Categorical as of riptable 1.1.0.
            filtered_element_count = (rt.isnan(rt_cat._fa) if rt_cat.category_mode in (rt.rt_enum.CategoryMode.Dictionary, rt.rt_enum.CategoryMode.IntEnum) else rt_cat.isfiltered()).sum()
            assert filtered_element_count == (len(rt_cat) - valid_mask.sum())

        result_pa_arr = rt_cat.to_arrow()

        # Verify the pyarrow array has the correct length, number of categories, etc.
        assert len(rt_cat) == len(result_pa_arr)
        assert pat.is_dictionary(result_pa_arr.type)
        assert len(result_pa_arr.dictionary) >= len(next(iter(rt_cat.category_dict.values()))), \
            "The number of categories in the pyarrow array's dictionary is smaller than the number of categories in the input Categorical."

        if have_nulls:
            assert valid_mask.sum() > 0
            assert (len(rt_cat) - valid_mask.sum()) == result_pa_arr.null_count

        # TEMP: Certain cases are marked as XFAIL here due to issues in Categorical.
        #         * Cannot create a pre-filtered (i.e. filtered at construction time) Dictionary- or IntEnum-mode Categorical.
        #         * Filtering a Dictionary- or IntEnum-mode Categorical causes unused categories to be dropped,
        #           which is not the same behavior as for other Categorical modes.
        #         * MultiKey Categoricals can't be created with an explicit list of category arrays + an index array,
        #           like what is supported for other Categorical modes.
        if rt_cat.category_mode == rt.rt_enum.CategoryMode.MultiKey or (have_nulls and rt_cat.category_mode == rt.rt_enum.CategoryMode.Dictionary):
            pytest.xfail("Expected failure due to issues with the Categorical constructor and/or filtering.")

        result_cat = rt.Categorical.from_arrow(result_pa_arr, zero_copy_only=False, writable=output_writable)

        # relaxed_cat_check <==> rt_cat.ordered, because if the categories are ordered, we expect them to be
        # in the same position after being roundtripped, so they should be mapped to the same integer before/after.
        # multi-key cats always seem to be ordered, even if ordered=False is specified when creating them.
        # TODO: Remove CategoryMode.Dictionary from the relaxed_cat_check here -- it's failing because our encoding in
        #       pyarrow doesn't currenly preserve unused entries from the name <-> code mapping. Once that's fixed
        #       we should be able to use the stronger equality check.
        assert_array_or_cat_equal(rt_cat, result_cat, relaxed_cat_check=rt_cat.ordered or rt_cat.category_mode == rt.rt_enum.CategoryMode.MultiKey or rt_cat.category_mode == rt.rt_enum.CategoryMode.Dictionary)

    @pytest.mark.parametrize(('num_cats', 'dtype'), [
        pytest.param(127, np.uint8),
        pytest.param(128, np.uint8),
        pytest.param(129, np.uint8),
        pytest.param(32769, np.uint16)
    ])
    @pytest.mark.parametrize('ordered', [False, True])
    @pytest.mark.parametrize('output_writable', [False, True])
    @pytest.mark.parametrize('have_nulls', [False, True])
    def test_pa_to_rt_unsigned(self, num_cats, dtype, ordered: bool, output_writable: bool, have_nulls: bool) -> None:
        # Create a numpy array containing `num_cats` distinct strings.
        cat_labels = np.array([f"x{i}" for i in range(0, num_cats)])
        indices = np.arange(num_cats, dtype=dtype)

        # Create the pyarrow dict-encoded array.
        if have_nulls:
            # pyarrow uses an INvalid mask (where True means null/NA).
            invalid_mask = indices % 7 == 3
            pa_indices = pa.array(indices, mask=invalid_mask)
            pa_arr = pa.DictionaryArray.from_arrays(pa_indices, cat_labels, ordered=ordered)
        else:
            pa_arr = pa.DictionaryArray.from_arrays(indices, cat_labels, ordered=ordered)

        assert len(pa_arr.dictionary) == num_cats

        # Create the Categorical from the pyarrow array.
        result_cat = rt.Categorical.from_arrow(pa_arr, zero_copy_only=False, writable=output_writable)

        if have_nulls:
            result_invalid_mask = result_cat.isfiltered()
            assert_array_equal(result_invalid_mask, invalid_mask)