예제 #1
0
    def test_onetarget(self, generate_dataframe_one_target):
        input_target_data = generate_dataframe_one_target

        expected_categories = {
            "target1": np.array(["below50", "above49"], dtype=object)
        }
        expected_bit_array = bit_mask(100)
        expected_number_classes = {"target1": 2}
        expected_bit_arrays_var_class = {
            "target1": {
                "below50": indexes2bitset(np.arange(50)),
                "above49": indexes2bitset(np.arange(50, 100))
            }
        }
        expected_counts = {"target1": {"below50": 50, "above49": 50}}
        expected_prob_var_class = {
            "target1": {
                "below50": 0.50,
                "above49": 0.50
            }
        }

        output_categoricaltarget = CategoricalTarget(input_target_data)

        assert expected_bit_array == output_categoricaltarget.bit_array
        np.testing.assert_array_equal(
            expected_categories["target1"],
            output_categoricaltarget.categories["target1"])
        assert expected_number_classes == output_categoricaltarget.number_classes
        assert expected_bit_arrays_var_class == output_categoricaltarget.bit_arrays_var_class
        assert expected_counts == output_categoricaltarget.counts
        assert expected_prob_var_class == output_categoricaltarget.prob_var_class
예제 #2
0
def create_item(indexes, variable_name, min_val, max_val, description,
                number_operations):
    """ Creates a class of type Item from the values of a NumericAttribute.

    Parameters
    ----------
    indexes : np.ndarray
        Array of indexes where the item is present in the training datastructure.
    variable_name : str
        Name of the attribute/variable that this item is attached to.
    min_val : float
        Minimum value covered by this item. item > min_val.
    max_val : float
        Maximum value covered by this item. item < max_val.
    description : str
        Text describing the interval defined by the item. item < max_val = 1;  min_val <  item < max_val = 2.
    number_operations : int
        Number of logical operators used to define the interval.
    Returns
    ----------
    Item : Item class object
        Item with the characteristics described by the arguments.
    """
    bit_array = indexes2bitset(indexes)
    activation_function = partial(activation_numeric,
                                  attribute_name=variable_name,
                                  minval=min_val,
                                  maxval=max_val)
    return Item(bit_array, variable_name, description, number_operations,
                activation_function)
예제 #3
0
    def test_add_rule_2items(self, search_parameters,
                             generate_input_dataframe_two_target_normal,
                             generate_subgroup_2item_numeric_and_nominal):
        data = generate_input_dataframe_two_target_normal
        input_target_model, input_max_depth, input_beam_width, input_minsupp, input_max_rules, input_alpha_gain = search_parameters
        subgroup2add = generate_subgroup_2item_numeric_and_nominal
        input_task = "discovery"

        output_ruleset = GaussianRuleList(data, input_task, input_max_depth,
                                          input_beam_width, input_minsupp,
                                          input_max_rules, input_alpha_gain)
        output_ruleset.add_rule(subgroup2add, data)

        expected_number_instances = data.number_instances
        expected_bitset_uncovered = indexes2bitset(
            [i for i in range(expected_number_instances) if i > 1000])
        expected_bitset_covered = indexes2bitset(
            [i for i in range(expected_number_instances) if i < 1000 + 1])
        expected_number_rules = 1
        expected_length_model = universal_code_integers(1) + universal_code_integers(2) +\
                                uniform_combination_code(2, 2) +\
                                universal_code_integers_maximum(1, 2) + uniform_code(10)+ \
                                universal_code_integers_maximum(1, 1) + uniform_code(2)

        actual_numberinstances1 = popcount(output_ruleset.subgroups[0].bitarray) + \
                                  popcount(output_ruleset.bitset_uncovered)
        actual_numberinstances2 = output_ruleset.support_covered + output_ruleset.support_uncovered
        actual_numberinstances3 = popcount(output_ruleset.bitset_covered) + \
                                  popcount(output_ruleset.bitset_uncovered)
        actual_numberinstances4 = output_ruleset.subgroups[
            0].usage + output_ruleset.default_rule_statistics.usage

        assert expected_number_instances == actual_numberinstances1
        assert expected_number_instances == actual_numberinstances2
        assert expected_number_instances == actual_numberinstances3
        assert expected_number_instances == actual_numberinstances4
        assert expected_bitset_uncovered == output_ruleset.bitset_uncovered
        assert expected_bitset_covered == output_ruleset.bitset_covered
        assert expected_number_rules == output_ruleset.number_rules
        assert expected_length_model == pytest.approx(
            output_ruleset.length_model)
예제 #4
0
    def init_bitarrays_class(
            self, target_values
    ) -> Tuple[Dict[Any, np.ndarray], Dict[Any, np.ndarray]]:
        """ Initializes the bit array values for each category.

        Returns
        ----------
        Dict[gmpy2.mpz] :
            A dictionary of the bitarray values.
        """
        for namecol, colvals in target_values.iteritems():
            self.bit_arrays_var_class[namecol] = dict()
            self.counts[namecol] = dict()
            self.prob_var_class[namecol] = dict()
            for icat, category in enumerate(self.categories[namecol]):
                category_indexes = np.where(colvals.values == category)[0]
                self.bit_arrays_var_class[namecol][category] = indexes2bitset(
                    category_indexes)
                self.counts[namecol][category] = len(category_indexes)
                self.prob_var_class[namecol][category] = self.counts[namecol][
                    category] / target_values.shape[0]
        return self.bit_arrays_var_class, self.counts, self.prob_var_class
예제 #5
0
    def create_items(self) -> Tuple[List[Item], Dict[int, int]]:
        """ Creates a list of items from the nominal atrribute.

        Makes a list of items using equality relationship with the categories. Example: x= blue_eyes could be the
        description of one of the items, for the NominalAttribute.name = "eye_colour".

        Returns
        ----------
        List[Item] : List of Items
            A list of all items based on the possible categories (only with equality relationships, not logical ORs).
        """
        self.cardinality_operator = {1: len(self.categories)}
        number_operators = 1
        for category in self.categories:
            vector_category = np.where(self.values == category)[0]
            bit_array = indexes2bitset(vector_category)
            description = str(self.name) + " = " + str(category)
            activation_function = partial(activation_nominal,
                                          attribute_name=self.name,
                                          category=category)
            self.items.append(
                Item(bit_array, self.name, description, number_operators,
                     activation_function))
        return self.items, self.cardinality_operator
예제 #6
0
 def test_allconsecutive_array(self):
     test_input = np.array([0,1, 2, 3],dtype = np.int32)
     expected_bitarray = mpz(15)
     actual_bitarray = indexes2bitset(test_input)
     assert expected_bitarray == actual_bitarray
예제 #7
0
 def test_dtypefloat_array(self):
     test_input = np.array([4],dtype = np.float64)
     expected_bitarray = mpz(16)
     actual_bitarray = indexes2bitset(test_input)
     assert expected_bitarray == actual_bitarray
예제 #8
0
 def test_oneatend_array(self):
     test_input = np.array([4],dtype = np.int32)
     expected_bitarray = mpz(16)
     actual_bitarray = indexes2bitset(test_input)
     assert expected_bitarray == actual_bitarray
예제 #9
0
 def test_oneinbeggining_array(self):
     test_input = np.array([0],dtype = np.int32)
     expected_bitarray = mpz(1)
     actual_bitarray = indexes2bitset(test_input)
     assert expected_bitarray == actual_bitarray
예제 #10
0
 def test_empty_array(self):
     test_input = np.array([],dtype = np.int32)
     expected_bitarray = mpz(0)
     actual_bitarray = indexes2bitset(test_input)
     assert expected_bitarray == actual_bitarray