def test_combiner_computation(self, data, expected_accumulator_output, expected_extract_output, compute_idf=True): combiner = categorical_encoding._CategoricalEncodingCombiner( compute_idf=compute_idf) expected_accumulator = combiner._create_accumulator() expected_accumulator = self.update_accumulator( expected_accumulator, expected_accumulator_output) self.validate_accumulator_computation(combiner, data, expected_accumulator) self.validate_accumulator_extract(combiner, data, expected_extract_output)
def test_combiner_api_compatibility_int_mode(self): data = np.array([[1, 2, 3, 4], [1, 2, 3, 0]]) combiner = categorical_encoding._CategoricalEncodingCombiner( compute_idf=False) expected_accumulator_output = { "max_element": np.array(4), "num_documents": np.array(2), } expected_extract_output = { "num_elements": np.array(5), } expected_accumulator = combiner._create_accumulator() expected_accumulator = self.update_accumulator( expected_accumulator, expected_accumulator_output) self.validate_accumulator_serialize_and_deserialize( combiner, data, expected_accumulator) self.validate_accumulator_uniqueness(combiner, data) self.validate_accumulator_extract(combiner, data, expected_extract_output)