def fit(self,X,Y): """Fit the model according to the given training datastructure. Parameters ---------- df : pandas dataframe with name variables with last column as target variable. Returns ------- self : object """ is_nominal_target = is_classifier(self) start_time = time() #self._rulelist = _fit_rulelist( # X,Y, self.target_model, self.max_depth,self.beam_width,self.min_support, self.n_cutpoints, # self.task,self.discretization,self.max_rules,self.alpha_gain) data = Data(input_data=X, n_cutpoints=self.n_cutpoints, discretization=self.discretization, target_data=Y, target_model=self.target_model, min_support=self.min_support) if is_nominal_target: self._rulelist = CategoricalRuleList(data, self.task, self.max_depth, self.beam_width, self.min_support, self.max_rules, self.alpha_gain) else: self._rulelist = GaussianRuleList(data, self.task, self.max_depth, self.beam_width, self.min_support, self.max_rules, self.alpha_gain) self._rulelist = greedy_and_beamsearch(data, self._rulelist) self._rulelist.add_description() self.runtime = time() - start_time self.number_rules = self._rulelist.number_rules self.rule_sets = [bitset2indexes(bitset) for bitset in self._rulelist.bitset_rules] return self
def generate_items(self, bitset_uncovered) -> Iterator[Item]: #TODO: make dynamic generation of items based on "candidate" if self.discretization == 'static': for item in self.items: yield item elif self.discretization == 'dynamic': indexes = np.array(bitset2indexes(bitset_uncovered)) items, cardinality_operator = self.create_items(indexes) for item in items: yield item
def replace_stats(self, data, bitarray_indices): self.usage = self.update_usage(bitarray_indices) indices_subgroup = bitset2indexes(bitarray_indices) target_values = data.targets_info.array_data[indices_subgroup, :] if self.usage > 2: for index_column in range(data.number_targets): self._compute_statistic_free(data, index_column, target_values[:, index_column]) elif self.usage <= 2: self._not_enough_points(data) return self
def replace_stats(self, data, bitarray_indices): self.usage = self.update_usage(bitarray_indices) indices_subgroup = bitset2indexes(bitarray_indices) if data.number_targets == 1: column_values = data.targets_info.array_data[indices_subgroup, 0] self.rss[0] = compute_RSS(column_values, self.mean) #mean = compute_mean_special(datastructure.target_data_test, indices_subgroup, index_column) #self.rss[0] = compute_RSS_special(datastructure.target_data_test[:,0], indices_subgroup, self.mean) elif data.number_targets > 1: target_values = data.targets_info.array_data[indices_subgroup, :] for icol, column_values in enumerate(target_values.T): self.rss[icol] = compute_RSS(column_values, self.mean[icol]) return self
def test_oneatend_array(self): test_input = mpz(16) expected_bitarray = np.array([4], dtype = np.int32) actual_bitarray = bitset2indexes(test_input) np.testing.assert_array_equal(expected_bitarray,actual_bitarray)
def test_oneinbeggining_array(self): test_input = mpz(1) expected_bitarray = np.array([0]) actual_bitarray = bitset2indexes(test_input) np.testing.assert_array_equal(expected_bitarray,actual_bitarray)
def test_empty_array(self): test_input = mpz(0) expected_bitarray = np.array([], dtype = np.int32) actual_bitarray = bitset2indexes(test_input) np.testing.assert_array_equal(expected_bitarray,actual_bitarray)
def test_allconsecutive_array(self): test_input = mpz(15) expected_bitarray = np.array([0,1, 2, 3],dtype = np.int32) actual_bitarray = bitset2indexes(test_input) np.testing.assert_array_equal(expected_bitarray,actual_bitarray)