def getMaxGeneralizationTargetShare(data, subgroup, weightingAttribute=None): selectors = subgroup.subgroupDescription.selectors generalizations = ut.powerset(selectors) maxTargetShare = 0 for sels in generalizations: sgd = SubgroupDescription(list(sels)) sg = Subgroup(subgroup.target, sgd) (_, _, instancesSubgroup, positivesSubgroup) = sg.get_base_statistics(data, weightingAttribute) targetShare = positivesSubgroup / instancesSubgroup maxTargetShare = max(maxTargetShare, targetShare) return maxTargetShare
def execute(self, task): result = [] queue = [] measure_statistics_based = hasattr(task.qf, 'optimisticEstimateFromStatistics') # init the first level for sel in task.searchSpace: queue.append((float("-inf"), [sel])) while (queue): q, candidate_description = heappop(queue) q = -q if (q) < ut.minimumRequiredQuality(result, task): break sg = Subgroup(task.target, candidate_description) if (measure_statistics_based): statistics = sg.get_base_statistics(task.data) ut.addIfRequired(result, sg, task.qf.evaluateFromStatistics(*statistics), task) optimistic_estimate = task.qf.optimisticEstimateFromStatistics( *statistics) if isinstance( task.qf, m.BoundedInterestingnessMeasure) else float("inf") else: ut.addIfRequired(result, sg, task.qf.evaluateFromDataset(task.data, sg), task) optimistic_estimate = task.qf.optimisticEstimateFromDataset( task.data, sg) if isinstance( task.qf, m.BoundedInterestingnessMeasure) else float("inf") # compute refinements and fill the queue if (len(candidate_description) < task.depth and optimistic_estimate >= ut.minimumRequiredQuality( result, task)): # iterate over all selectors that are behind the last selector contained in the evaluated candidate according to the initial order index_of_last_selector = min( task.searchSpace.index(candidate_description[-1]), len(task.searchSpace) - 1) for sel in islice(task.searchSpace, index_of_last_selector + 1, None): new_description = candidate_description + [sel] heappush(queue, (-optimistic_estimate, new_description)) result.sort(key=lambda x: x[0], reverse=True) return result
def searchInternal(self, task, prefix, modificationSet, result, bitset): sg = Subgroup(task.target, copy.copy(prefix)) sgSize = len(bitset) positiveInstances = ut.intersect_of_ordered_list( bitset, self.targetBitset) sgPositiveCount = len(positiveInstances) optimisticEstimate = task.qf.optimisticEstimateFromStatistics( self.popSize, self.popPositives, sgSize, sgPositiveCount) if (optimisticEstimate <= ut.minimumRequiredQuality(result, task)): return result quality = task.qf.evaluateFromStatistics(self.popSize, self.popPositives, sgSize, sgPositiveCount) ut.addIfRequired(result, sg, quality, task) if (len(prefix) < task.depth): newModificationSet = copy.copy(modificationSet) for sel in modificationSet: prefix.append(sel) newBitset = ut.intersect_of_ordered_list( bitset, self.bitsets[sel]) newModificationSet.pop(0) self.searchInternal(task, prefix, newModificationSet, result, newBitset) # remove the sel again prefix.pop(-1) return result
def searchInternal(self, task: SubgroupDiscoveryTask, prefix: List, modificationSet: List, result: List, useOptimisticEstimates: bool) -> List: sg = Subgroup(task.target, SubgroupDescription(copy.copy(prefix))) optimisticEstimate = float("inf") if useOptimisticEstimates and len(prefix) < task.depth and isinstance( task.qf, m.BoundedInterestingnessMeasure): optimisticEstimate = task.qf.optimisticEstimateFromDataset( task.data, sg) if (optimisticEstimate <= ut.minimumRequiredQuality(result, task)): return result if task.qf.supportsWeights(): quality = task.qf.evaluateFromDataset(task.data, sg, task.weightingAttribute) else: quality = task.qf.evaluateFromDataset(task.data, sg) ut.addIfRequired(result, sg, quality, task) if (len(prefix) < task.depth): newModificationSet = copy.copy(modificationSet) for sel in modificationSet: prefix.append(sel) newModificationSet.pop(0) self.searchInternal(task, prefix, newModificationSet, result, useOptimisticEstimates) # remove the sel again prefix.pop(-1) return result
def execute(self, task): # adapt beam width to the result set size if desired if self.beamWidthAdaptive: self.beamWidth = task.resultSetSize # check if beam size is to small for result set if (self.beamWidth < task.resultSetSize): raise RuntimeError( 'Beam width in the beam search algorithm is smaller than the result set size!' ) # init beam = [(0, Subgroup(task.target, []))] last_beam = None depth = 0 while (beam != last_beam and depth < task.depth): last_beam = beam.copy() for (_, last_sg) in last_beam: for sel in task.searchSpace: # create a clone new_selectors = list(last_sg.subgroupDescription.selectors) if not sel in new_selectors: new_selectors.append(sel) sg = Subgroup(task.target, new_selectors) quality = task.qf.evaluateFromDataset(task.data, sg) ut.addIfRequired(beam, sg, quality, task, check_for_duplicates=True) depth += 1 result = beam[:task.resultSetSize] result.sort(key=lambda x: x[0], reverse=True) return result
def execute(self, task): measure_statistics_based = hasattr(task.qf, 'optimisticEstimateFromStatistics') result = [] # init the first level next_level_candidates = [] for sel in task.searchSpace: next_level_candidates.append(Subgroup(task.target, [sel])) # level-wise search depth = 1 while (next_level_candidates): # check sgs from the last level promising_candidates = [] for sg in next_level_candidates: if (measure_statistics_based): statistics = sg.get_base_statistics(task.data) ut.addIfRequired( result, sg, task.qf.evaluateFromStatistics(*statistics), task) optimistic_estimate = task.qf.optimisticEstimateFromStatistics( *statistics) if isinstance( task.qf, m.BoundedInterestingnessMeasure) else float("inf") else: ut.addIfRequired( result, sg, task.qf.evaluateFromDataset(task.data, sg), task) optimistic_estimate = task.qf.optimisticEstimateFromDataset( task.data, sg) if isinstance( task.qf, m.BoundedInterestingnessMeasure) else float("inf") # optimistic_estimate = task.qf.optimisticEstimateFromDataset(task.data, sg) if isinstance(task.qf, m.BoundedInterestingnessMeasure) else float("inf") # quality = task.qf.evaluateFromDataset(task.data, sg) # ut.addIfRequired (result, sg, quality, task) if (optimistic_estimate >= ut.minimumRequiredQuality( result, task)): promising_candidates.append( sg.subgroupDescription.selectors) if (depth == task.depth): break # generate candidates next level next_level_candidates = [] for i, sg1 in enumerate(promising_candidates): for j, sg2 in enumerate(promising_candidates): if (i < j and sg1[:-1] == sg2[:-1]): candidate = list(sg1) + [sg2[-1]] # check if ALL generalizations are contained in promising_candidates generalization_descriptions = [[ x for x in candidate if x != sel ] for sel in candidate] if all(g in promising_candidates for g in generalization_descriptions): next_level_candidates.append( Subgroup(task.target, candidate)) depth = depth + 1 result.sort(key=lambda x: x[0], reverse=True) return result