def test_onetarget(self, generate_dataframe_one_target): input_target_data = generate_dataframe_one_target expected_categories = { "target1": np.array(["below50", "above49"], dtype=object) } expected_bit_array = bit_mask(100) expected_number_classes = {"target1": 2} expected_bit_arrays_var_class = { "target1": { "below50": indexes2bitset(np.arange(50)), "above49": indexes2bitset(np.arange(50, 100)) } } expected_counts = {"target1": {"below50": 50, "above49": 50}} expected_prob_var_class = { "target1": { "below50": 0.50, "above49": 0.50 } } output_categoricaltarget = CategoricalTarget(input_target_data) assert expected_bit_array == output_categoricaltarget.bit_array np.testing.assert_array_equal( expected_categories["target1"], output_categoricaltarget.categories["target1"]) assert expected_number_classes == output_categoricaltarget.number_classes assert expected_bit_arrays_var_class == output_categoricaltarget.bit_arrays_var_class assert expected_counts == output_categoricaltarget.counts assert expected_prob_var_class == output_categoricaltarget.prob_var_class
def create_item(indexes, variable_name, min_val, max_val, description, number_operations): """ Creates a class of type Item from the values of a NumericAttribute. Parameters ---------- indexes : np.ndarray Array of indexes where the item is present in the training datastructure. variable_name : str Name of the attribute/variable that this item is attached to. min_val : float Minimum value covered by this item. item > min_val. max_val : float Maximum value covered by this item. item < max_val. description : str Text describing the interval defined by the item. item < max_val = 1; min_val < item < max_val = 2. number_operations : int Number of logical operators used to define the interval. Returns ---------- Item : Item class object Item with the characteristics described by the arguments. """ bit_array = indexes2bitset(indexes) activation_function = partial(activation_numeric, attribute_name=variable_name, minval=min_val, maxval=max_val) return Item(bit_array, variable_name, description, number_operations, activation_function)
def test_add_rule_2items(self, search_parameters, generate_input_dataframe_two_target_normal, generate_subgroup_2item_numeric_and_nominal): data = generate_input_dataframe_two_target_normal input_target_model, input_max_depth, input_beam_width, input_minsupp, input_max_rules, input_alpha_gain = search_parameters subgroup2add = generate_subgroup_2item_numeric_and_nominal input_task = "discovery" output_ruleset = GaussianRuleList(data, input_task, input_max_depth, input_beam_width, input_minsupp, input_max_rules, input_alpha_gain) output_ruleset.add_rule(subgroup2add, data) expected_number_instances = data.number_instances expected_bitset_uncovered = indexes2bitset( [i for i in range(expected_number_instances) if i > 1000]) expected_bitset_covered = indexes2bitset( [i for i in range(expected_number_instances) if i < 1000 + 1]) expected_number_rules = 1 expected_length_model = universal_code_integers(1) + universal_code_integers(2) +\ uniform_combination_code(2, 2) +\ universal_code_integers_maximum(1, 2) + uniform_code(10)+ \ universal_code_integers_maximum(1, 1) + uniform_code(2) actual_numberinstances1 = popcount(output_ruleset.subgroups[0].bitarray) + \ popcount(output_ruleset.bitset_uncovered) actual_numberinstances2 = output_ruleset.support_covered + output_ruleset.support_uncovered actual_numberinstances3 = popcount(output_ruleset.bitset_covered) + \ popcount(output_ruleset.bitset_uncovered) actual_numberinstances4 = output_ruleset.subgroups[ 0].usage + output_ruleset.default_rule_statistics.usage assert expected_number_instances == actual_numberinstances1 assert expected_number_instances == actual_numberinstances2 assert expected_number_instances == actual_numberinstances3 assert expected_number_instances == actual_numberinstances4 assert expected_bitset_uncovered == output_ruleset.bitset_uncovered assert expected_bitset_covered == output_ruleset.bitset_covered assert expected_number_rules == output_ruleset.number_rules assert expected_length_model == pytest.approx( output_ruleset.length_model)
def init_bitarrays_class( self, target_values ) -> Tuple[Dict[Any, np.ndarray], Dict[Any, np.ndarray]]: """ Initializes the bit array values for each category. Returns ---------- Dict[gmpy2.mpz] : A dictionary of the bitarray values. """ for namecol, colvals in target_values.iteritems(): self.bit_arrays_var_class[namecol] = dict() self.counts[namecol] = dict() self.prob_var_class[namecol] = dict() for icat, category in enumerate(self.categories[namecol]): category_indexes = np.where(colvals.values == category)[0] self.bit_arrays_var_class[namecol][category] = indexes2bitset( category_indexes) self.counts[namecol][category] = len(category_indexes) self.prob_var_class[namecol][category] = self.counts[namecol][ category] / target_values.shape[0] return self.bit_arrays_var_class, self.counts, self.prob_var_class
def create_items(self) -> Tuple[List[Item], Dict[int, int]]: """ Creates a list of items from the nominal atrribute. Makes a list of items using equality relationship with the categories. Example: x= blue_eyes could be the description of one of the items, for the NominalAttribute.name = "eye_colour". Returns ---------- List[Item] : List of Items A list of all items based on the possible categories (only with equality relationships, not logical ORs). """ self.cardinality_operator = {1: len(self.categories)} number_operators = 1 for category in self.categories: vector_category = np.where(self.values == category)[0] bit_array = indexes2bitset(vector_category) description = str(self.name) + " = " + str(category) activation_function = partial(activation_nominal, attribute_name=self.name, category=category) self.items.append( Item(bit_array, self.name, description, number_operators, activation_function)) return self.items, self.cardinality_operator
def test_allconsecutive_array(self): test_input = np.array([0,1, 2, 3],dtype = np.int32) expected_bitarray = mpz(15) actual_bitarray = indexes2bitset(test_input) assert expected_bitarray == actual_bitarray
def test_dtypefloat_array(self): test_input = np.array([4],dtype = np.float64) expected_bitarray = mpz(16) actual_bitarray = indexes2bitset(test_input) assert expected_bitarray == actual_bitarray
def test_oneatend_array(self): test_input = np.array([4],dtype = np.int32) expected_bitarray = mpz(16) actual_bitarray = indexes2bitset(test_input) assert expected_bitarray == actual_bitarray
def test_oneinbeggining_array(self): test_input = np.array([0],dtype = np.int32) expected_bitarray = mpz(1) actual_bitarray = indexes2bitset(test_input) assert expected_bitarray == actual_bitarray
def test_empty_array(self): test_input = np.array([],dtype = np.int32) expected_bitarray = mpz(0) actual_bitarray = indexes2bitset(test_input) assert expected_bitarray == actual_bitarray