def execute(self, task): result = [] queue = [(float("-inf"), ps.Conjunction([]))] operator = ps.StaticSpecializationOperator(task.search_space) task.qf.calculate_constant_statistics(task) while queue: q, old_description = heappop(queue) q = -q if not (q > ps.minimum_required_quality(result, task)): break for candidate_description in operator.refinements(old_description): sg = candidate_description statistics = task.qf.calculate_statistics(sg, task.data) ps.add_if_required(result, sg, task.qf.evaluate(sg, statistics), task) optimistic_estimate = task.qf.optimistic_estimate( sg, statistics) # compute refinements and fill the queue if len( candidate_description ) < task.depth and optimistic_estimate >= ps.minimum_required_quality( result, task): heappush(queue, (-optimistic_estimate, candidate_description)) result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): if not isinstance(task.qf, ps.StandardQFNumeric): warnings.warn( "BSD_numeric so far is only implemented for StandardQFNumeric") self.pop_size = len(task.data) sorted_data = task.data.sort_values(task.target.get_attributes(), ascending=False) # generate target bitset self.target_values = sorted_data[task.target.get_attributes() [0]].to_numpy() task.qf.calculate_constant_statistics(task) self.evaluate = task.qf.evaluate # generate selector bitsets self.bitsets = {} for sel in task.search_space: # generate bitset self.bitsets[sel] = sel.covers(sorted_data) result = self.search_internal(task, [], task.search_space, [], np.ones(len(sorted_data), dtype=bool)) result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): task.qf.calculate_constant_statistics(task) result = [] all_selectors = chain.from_iterable( combinations(task.search_space, r) for r in range(1, task.depth + 1)) if self.show_progress: try: from tqdm import tqdm def binomial(x, y): try: binom = factorial(x) // factorial(y) // factorial(x - y) except ValueError: binom = 0 return binom total = sum( binomial(len(task.search_space), k) for k in range(1, task.depth + 1)) all_selectors = tqdm(all_selectors, total=total) except ImportError: pass for selectors in all_selectors: sg = ps.Conjunction(selectors) statistics = task.qf.calculate_statistics(sg, task.data) quality = task.qf.evaluate(sg, statistics) ps.add_if_required(result, sg, quality, task) result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): self.operator = ps.StaticSpecializationOperator(task.search_space) task.qf.calculate_constant_statistics(task) result = [] with self.apply_representation(task.data, task.search_space) as representation: self.search_internal(task, result, representation.Conjunction([])) result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): result = [] queue = [] operator = ps.StaticGeneralizationOperator(task.search_space) # init the first level for sel in task.search_space: queue.append((float("-inf"), ps.Disjunction([sel]))) task.qf.calculate_constant_statistics(task) while queue: q, candidate_description = heappop(queue) q = -q if q < ps.minimum_required_quality(result, task): break sg = candidate_description statistics = task.qf.calculate_statistics(sg, task.data) quality = task.qf.evaluate(sg, statistics) ps.add_if_required(result, sg, quality, task, statistics=statistics) qual = ps.minimum_required_quality(result, task) if (quality, sg) in result: new_queue = [] for q_tmp, c_tmp in queue: if (-q_tmp) > qual: heappush(new_queue, (q_tmp, c_tmp)) queue = new_queue optimistic_estimate = task.qf.optimistic_estimate(sg, statistics) # else: # ps.add_if_required(result, sg, task.qf.evaluate_from_dataset(task.data, sg), task) # optimistic_estimate = task.qf.optimistic_generalisation_from_dataset(task.data, sg) if qf_is_bounded else float("inf") # compute refinements and fill the queue if len(candidate_description) < task.depth and ( optimistic_estimate / self.alpha**(len(candidate_description) + 1) ) >= ps.minimum_required_quality(result, task): # print(qual) # print(optimistic_estimate) self.refined[len(candidate_description)] += 1 # print(str(candidate_description)) for new_description in operator.refinements( candidate_description): heappush(queue, (-optimistic_estimate, new_description)) else: self.discarded[len(candidate_description)] += 1 result.sort(key=lambda x: x[0], reverse=True) for qual, sg in result: print("{} {}".format(qual, sg)) print("discarded " + str(self.discarded)) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): if not isinstance(task.qf, ps.BoundedInterestingnessMeasure): raise RuntimeWarning( "Quality function is unbounded, long runtime expected") task.qf.calculate_constant_statistics(task) with self.representation_type(task.data, task.search_space) as representation: combine_selectors = getattr(representation.__class__, self.combination_name) result = [] # init the first level next_level_candidates = [] for sel in task.search_space: next_level_candidates.append(combine_selectors([sel])) # level-wise search depth = 1 while next_level_candidates: # check sgs from the last level if self.use_vectorization: promising_candidates = self.get_next_level_candidates_vectorized( task, result, next_level_candidates) else: promising_candidates = self.get_next_level_candidates( task, result, next_level_candidates) if depth == task.depth: break if self.use_repruning: promising_candidates = self.reprune_lower_levels( promising_candidates, depth) next_level_candidates_no_pruning = self.next_level( promising_candidates) # select those selectors and build a subgroup from them # for which all subsets of length depth (=candidate length -1) are in the set of promising candidates set_promising_candidates = set( tuple(p) for p in promising_candidates) next_level_candidates = [ combine_selectors(selectors) for selectors in next_level_candidates_no_pruning if all((subset in set_promising_candidates) for subset in combinations(selectors, depth)) ] depth = depth + 1 result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): # adapt beam width to the result set size if desired if self.beam_width_adaptive: self.beam_width = task.result_set_size # check if beam size is to small for result set if self.beam_width < task.result_set_size: raise RuntimeError( 'Beam width in the beam search algorithm is smaller than the result set size!' ) task.qf.calculate_constant_statistics(task) # init beam = [(0, ps.Conjunction([]), task.qf.calculate_statistics(slice(None), task.data))] last_beam = None depth = 0 while beam != last_beam and depth < task.depth: last_beam = beam.copy() for (_, last_sg, _) in last_beam: if not getattr(last_sg, 'visited', False): setattr(last_sg, 'visited', True) for sel in task.search_space: # create a clone new_selectors = list(last_sg.selectors) if sel not in new_selectors: new_selectors.append(sel) sg = ps.Conjunction(new_selectors) statistics = task.qf.calculate_statistics( sg, task.data) quality = task.qf.evaluate(sg, statistics) ps.add_if_required(beam, sg, quality, task, check_for_duplicates=True, statistics=statistics) depth += 1 # TODO make sure there is no bug here result = beam[:task.result_set_size] result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): result = [] queue = [(float("-inf"), ps.Conjunction([]))] operator = SpecializationOperator(data=task.data.drop(['target'], axis=1), n_bins=self.n_bins, max_features=self.max_features, intervals_only=self.intervals_only, binning=self.binning, specialization=self.specialization, search_space=task.search_space) task.qf.calculate_constant_statistics(task.data, task.target) while queue: q, old_description = heappop(queue) q = -q if not q > ps.minimum_required_quality(result, task): break for candidate_description in operator.refinements(old_description): score_eval = task.qf.evaluate(candidate_description, task.target, task.data, None) ps.add_if_required(result, candidate_description, score_eval, task) if len(candidate_description) < task.depth: heappush(queue, (-score_eval, candidate_description)) result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task, use_optimistic_estimates=True): task.qf.calculate_constant_statistics(task) result = self.search_internal(task, [], task.search_space, [], use_optimistic_estimates) result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)