def get_next_level_candidates(self, task, result, next_level_candidates): promising_candidates = [] optimistic_estimate_function = getattr(task.qf, self.optimistic_estimate_name) for sg in next_level_candidates: statistics = task.qf.calculate_statistics(sg, task.data) ps.add_if_required(result, sg, task.qf.evaluate(sg, statistics), task, statistics=statistics) optimistic_estimate = optimistic_estimate_function(sg, statistics) if optimistic_estimate >= ps.minimum_required_quality( result, task): if ps.constraints_hold(task.constraints_monotone, sg, statistics, task.data): promising_candidates.append( (optimistic_estimate, sg.selectors)) min_quality = ps.minimum_required_quality(result, task) promising_candidates = [ selectors for estimate, selectors in promising_candidates if estimate > min_quality ] return promising_candidates
def execute(self, task): result = [] queue = [(float("-inf"), ps.Conjunction([]))] operator = ps.StaticSpecializationOperator(task.search_space) task.qf.calculate_constant_statistics(task) while queue: q, old_description = heappop(queue) q = -q if not (q > ps.minimum_required_quality(result, task)): break for candidate_description in operator.refinements(old_description): sg = candidate_description statistics = task.qf.calculate_statistics(sg, task.data) ps.add_if_required(result, sg, task.qf.evaluate(sg, statistics), task) optimistic_estimate = task.qf.optimistic_estimate( sg, statistics) # compute refinements and fill the queue if len( candidate_description ) < task.depth and optimistic_estimate >= ps.minimum_required_quality( result, task): heappush(queue, (-optimistic_estimate, candidate_description)) result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): result = [] queue = [] operator = ps.StaticGeneralizationOperator(task.search_space) # init the first level for sel in task.search_space: queue.append((float("-inf"), ps.Disjunction([sel]))) task.qf.calculate_constant_statistics(task) while queue: q, candidate_description = heappop(queue) q = -q if q < ps.minimum_required_quality(result, task): break sg = candidate_description statistics = task.qf.calculate_statistics(sg, task.data) quality = task.qf.evaluate(sg, statistics) ps.add_if_required(result, sg, quality, task, statistics=statistics) qual = ps.minimum_required_quality(result, task) if (quality, sg) in result: new_queue = [] for q_tmp, c_tmp in queue: if (-q_tmp) > qual: heappush(new_queue, (q_tmp, c_tmp)) queue = new_queue optimistic_estimate = task.qf.optimistic_estimate(sg, statistics) # else: # ps.add_if_required(result, sg, task.qf.evaluate_from_dataset(task.data, sg), task) # optimistic_estimate = task.qf.optimistic_generalisation_from_dataset(task.data, sg) if qf_is_bounded else float("inf") # compute refinements and fill the queue if len(candidate_description) < task.depth and ( optimistic_estimate / self.alpha**(len(candidate_description) + 1) ) >= ps.minimum_required_quality(result, task): # print(qual) # print(optimistic_estimate) self.refined[len(candidate_description)] += 1 # print(str(candidate_description)) for new_description in operator.refinements( candidate_description): heappush(queue, (-optimistic_estimate, new_description)) else: self.discarded[len(candidate_description)] += 1 result.sort(key=lambda x: x[0], reverse=True) for qual, sg in result: print("{} {}".format(qual, sg)) print("discarded " + str(self.discarded)) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): result = [] queue = [] measure_statistics_based = hasattr( task.qf, 'optimistic_estimate_from_statistics') # init the first level for sel in task.search_space: queue.append((float("-inf"), [sel])) while queue: q, candidate_description = heappop(queue) q = -q if q < ps.minimum_required_quality(result, task): break sg = ps.Subgroup(task.target, candidate_description) if measure_statistics_based: statistics = sg.get_base_statistics(task.data) ps.add_if_required( result, sg, task.qf.evaluate_from_statistics(*statistics), task) optimistic_estimate = task.qf.optimistic_estimate_from_statistics( *statistics) if isinstance( task.qf, ps.BoundedInterestingnessMeasure) else float("inf") else: ps.add_if_required( result, sg, task.qf.evaluate_from_dataset(task.data, sg), task) optimistic_estimate = task.qf.optimistic_estimate_from_dataset( task.data, sg) if isinstance( task.qf, ps.BoundedInterestingnessMeasure) else float("inf") # compute refinements and fill the queue if len( candidate_description ) < task.depth and optimistic_estimate >= ps.minimum_required_quality( result, task): # iterate over all selectors that are behind the last selector contained in the evaluated candidate # according to the initial order index_of_last_selector = min( task.search_space.index(candidate_description[-1]), len(task.search_space) - 1) for sel in islice(task.search_space, index_of_last_selector + 1, None): new_description = candidate_description + [sel] heappush(queue, (-optimistic_estimate, new_description)) result.sort(key=lambda x: x[0], reverse=True) return result
def search_internal(self, task, prefix, modificationSet, result, bitset): sg = ps.Subgroup(task.target, copy.copy(prefix)) sgSize = bitset.count() positiveInstances = bitset & self.targetBitset sgPositiveCount = positiveInstances.count() optimisticEstimate = task.qf.optimistic_estimate_from_statistics( self.popSize, self.popPositives, sgSize, sgPositiveCount) if (optimisticEstimate <= ps.minimum_required_quality(result, task)): return result quality = task.qf.evaluate_from_statistics(self.popSize, self.popPositives, sgSize, sgPositiveCount) ps.add_if_required(result, sg, quality, task) if len(prefix) < task.depth: newModificationSet = copy.copy(modificationSet) for sel in modificationSet: prefix.append(sel) newBitset = bitset & self.bitsets[sel] newModificationSet.pop(0) self.search_internal(task, prefix, newModificationSet, result, newBitset) # remove the sel again prefix.pop(-1) return result
def search_internal(self, task, prefix, modification_set, result, bitset): self.num_calls += 1 sg_size = bitset.sum() if sg_size == 0: return result target_values_sg = self.target_values[bitset] target_values_cs = np.cumsum(target_values_sg) sizes = np.arange(1, len(target_values_cs) + 1) mean_values_cs = target_values_cs / sizes tpl = DFSNumeric.tpl(sizes, mean_values_cs) qualities = self.evaluate(None, tpl) optimistic_estimate = np.max(qualities) if optimistic_estimate <= ps.minimum_required_quality(result, task): return result sg = ps.Conjunction(copy.copy(prefix)) quality = qualities[-1] ps.add_if_required(result, sg, quality, task) if len(prefix) < task.depth: new_modification_set = copy.copy(modification_set) for sel in modification_set: prefix.append(sel) new_bitset = bitset & self.bitsets[sel] new_modification_set.pop(0) self.search_internal(task, prefix, new_modification_set, result, new_bitset) # remove the sel again prefix.pop(-1) return result
def search_internal(self, task, prefix, modification_set, result, use_optimistic_estimates): sg = ps.Conjunction(copy.copy(prefix)) statistics = task.qf.calculate_statistics(sg, task.data) if use_optimistic_estimates and len( prefix) < task.depth and isinstance( task.qf, ps.BoundedInterestingnessMeasure): optimistic_estimate = task.qf.optimistic_estimate(sg, statistics) if not (optimistic_estimate > ps.minimum_required_quality( result, task)): return result quality = task.qf.evaluate(sg, statistics) ps.add_if_required(result, sg, quality, task) if len(prefix) < task.depth: new_modification_set = copy.copy(modification_set) for sel in modification_set: prefix.append(sel) new_modification_set.pop(0) self.search_internal(task, prefix, new_modification_set, result, use_optimistic_estimates) # remove the sel again prefix.pop(-1) return result
def search_internal(self, task, prefix, modification_set, result, bitset): sg_size = bitset.sum() if sg_size == 0: return target_values_sg = self.target_values[bitset] target_values_cs = np.cumsum(target_values_sg) mean_values_cs = target_values_cs / (np.arange(len(target_values_cs)) + 1) qualities = self.f( np.arange(len(target_values_cs)) + 1, mean_values_cs) optimistic_estimate = np.max(qualities) if optimistic_estimate <= ps.minimum_required_quality(result, task): return result sg = ps.Subgroup(task.target, copy.copy(prefix)) quality = qualities[-1] ps.add_if_required(result, sg, quality, task) if len(prefix) < task.depth: new_modification_set = copy.copy(modification_set) for sel in modification_set: prefix.append(sel) new_bitset = bitset & self.bitsets[sel] new_modification_set.pop(0) self.search_internal(task, prefix, new_modification_set, result, new_bitset) # remove the sel again prefix.pop(-1) return result
def search_internal(self, task, prefix, modification_set, result, bitset): sg_size = bitset.sum() positive_instances = np.logical_and(bitset, self.target_bitset) sg_positive_count = positive_instances.sum() optimistic_estimate = task.qf.optimistic_estimate_from_statistics( self.pop_size, self.pop_positives, sg_size, sg_positive_count) if optimistic_estimate <= ps.minimum_required_quality(result, task): return result sg = ps.Subgroup(task.target, copy.copy(prefix)) quality = task.qf.evaluate_from_statistics(self.pop_size, self.pop_positives, sg_size, sg_positive_count) ps.add_if_required(result, sg, quality, task) if len(prefix) < task.depth: new_modification_set = copy.copy(modification_set) for sel in modification_set: prefix.append(sel) newBitset = np.logical_and(bitset, self.bitsets[sel]) new_modification_set.pop(0) self.search_internal(task, prefix, new_modification_set, result, newBitset) # remove the sel again prefix.pop(-1) return result
def search_internal(self, task, prefix, modification_set, result, use_optimistic_estimates): sg = ps.Subgroup(task.target, ps.SubgroupDescription(copy.copy(prefix))) if use_optimistic_estimates and len( prefix) < task.depth and isinstance( task.qf, ps.BoundedInterestingnessMeasure): optimistic_estimate = task.qf.optimistic_estimate_from_dataset( task.data, sg) if optimistic_estimate <= ps.minimum_required_quality( result, task): return result if task.qf.supports_weights(): quality = task.qf.evaluate_from_dataset(task.data, sg, task.weighting_attribute) else: quality = task.qf.evaluate_from_dataset(task.data, sg) ps.add_if_required(result, sg, quality, task) if len(prefix) < task.depth: new_modification_set = copy.copy(modification_set) for sel in modification_set: prefix.append(sel) new_modification_set.pop(0) self.search_internal(task, prefix, new_modification_set, result, use_optimistic_estimates) # remove the sel again prefix.pop(-1) return result
def get_next_level_candidates_vectorized(self, task, result, next_level_candidates): promising_candidates = [] statistics = [] optimistic_estimate_function = getattr(task.qf, self.optimistic_estimate_name) for sg in next_level_candidates: statistics.append(task.qf.calculate_statistics(sg, task.data)) tpl_class = statistics[0].__class__ vec_statistics = tpl_class._make( np.array(tpl) for tpl in zip(*statistics)) qualities = task.qf.evaluate(None, vec_statistics) optimistic_estimates = optimistic_estimate_function( None, vec_statistics) for sg, quality, stats in zip(next_level_candidates, qualities, statistics): ps.add_if_required(result, sg, quality, task, statistics=stats) min_quality = ps.minimum_required_quality(result, task) for sg, optimistic_estimate in zip(next_level_candidates, optimistic_estimates): if optimistic_estimate >= min_quality: promising_candidates.append(sg.selectors) return promising_candidates
def search_internal(self, task, result, sg): statistics = task.qf.calculate_statistics(sg) optimistic_estimate = task.qf.optimistic_estimate(sg, statistics) if not (optimistic_estimate > ps.minimum_required_quality( result, task)): return quality = task.qf.evaluate(sg, statistics) ps.add_if_required(result, sg, quality, task) if sg.depth < task.depth and statistics.size > 0: for new_sg in self.operator.refinements(sg): self.search_internal(task, result, new_sg)
def search_internal(self, task, result, sg): statistics = task.qf.calculate_statistics(sg) if not constraints_hold(task.constraints_monotone, sg, statistics, task.data): return optimistic_estimate = task.qf.optimistic_estimate(sg, statistics) if not optimistic_estimate > ps.minimum_required_quality(result, task): return quality = task.qf.evaluate(sg, statistics) ps.add_if_required(result, sg, quality, task, statistics=statistics) if sg.depth < task.depth: for new_sg in self.operator.refinements(sg): self.search_internal(task, result, new_sg)
def execute(self, task): result = [] queue = [(float("-inf"), ps.Conjunction([]))] operator = SpecializationOperator(data=task.data.drop(['target'], axis=1), n_bins=self.n_bins, max_features=self.max_features, intervals_only=self.intervals_only, binning=self.binning, specialization=self.specialization, search_space=task.search_space) task.qf.calculate_constant_statistics(task.data, task.target) while queue: q, old_description = heappop(queue) q = -q if not q > ps.minimum_required_quality(result, task): break for candidate_description in operator.refinements(old_description): score_eval = task.qf.evaluate(candidate_description, task.target, task.data, None) ps.add_if_required(result, candidate_description, score_eval, task) if len(candidate_description) < task.depth: heappush(queue, (-score_eval, candidate_description)) result.sort(key=lambda x: x[0], reverse=True) return ps.SubgroupDiscoveryResult(result, task)
def execute(self, task): measure_statistics_based = hasattr( task.qf, 'optimistic_estimate_from_statistics') result = [] # init the first level next_level_candidates = [] for sel in task.search_space: next_level_candidates.append(ps.Subgroup(task.target, [sel])) # level-wise search depth = 1 while next_level_candidates: # check sgs from the last level promising_candidates = [] for sg in next_level_candidates: if measure_statistics_based: statistics = sg.get_base_statistics(task.data) ps.add_if_required( result, sg, task.qf.evaluate_from_statistics(*statistics), task) optimistic_estimate = task.qf.optimistic_estimate_from_statistics( *statistics) if isinstance( task.qf, ps.BoundedInterestingnessMeasure) else float("inf") else: ps.add_if_required( result, sg, task.qf.evaluate_from_dataset(task.data, sg), task) optimistic_estimate = task.qf.optimistic_estimate_from_dataset( task.data, sg) if isinstance( task.qf, ps.BoundedInterestingnessMeasure) else float("inf") # optimistic_estimate = task.qf.optimistic_estimate_from_dataset(task.data, sg) # if isinstance(task.qf, m.BoundedInterestingnessMeasure) else float("inf") # quality = task.qf.evaluate_from_dataset(task.data, sg) # ut.add_if_required (result, sg, quality, task) if optimistic_estimate >= ps.minimum_required_quality( result, task): promising_candidates.append( sg.subgroup_description.selectors) if depth == task.depth: break # generate candidates next level next_level_candidates = [] for i, sg1 in enumerate(promising_candidates): for j, sg2 in enumerate(promising_candidates): if i < j and sg1[:-1] == sg2[:-1]: candidate = list(sg1) + [sg2[-1]] # check if ALL generalizations are contained in promising_candidates generalization_descriptions = [[ x for x in candidate if x != sel ] for sel in candidate] if all(g in promising_candidates for g in generalization_descriptions): next_level_candidates.append( ps.Subgroup(task.target, candidate)) depth = depth + 1 result.sort(key=lambda x: x[0], reverse=True) return result