def getMaxGeneralizationTargetShare(data, subgroup, weightingAttribute=None): selectors = subgroup.subgroupDescription.selectors generalizations = ut.powerset(selectors) maxTargetShare = 0 for sels in generalizations: sgd = SubgroupDescription(list(sels)) sg = Subgroup(subgroup.target, sgd) (_, _, instancesSubgroup, positivesSubgroup) = sg.get_base_statistics(data, weightingAttribute) targetShare = positivesSubgroup / instancesSubgroup maxTargetShare = max(maxTargetShare, targetShare) return maxTargetShare
def execute(self, task): result = [] queue = [] measure_statistics_based = hasattr(task.qf, 'optimisticEstimateFromStatistics') # init the first level for sel in task.searchSpace: queue.append((float("-inf"), [sel])) while (queue): q, candidate_description = heappop(queue) q = -q if (q) < ut.minimumRequiredQuality(result, task): break sg = Subgroup(task.target, candidate_description) if (measure_statistics_based): statistics = sg.get_base_statistics(task.data) ut.addIfRequired(result, sg, task.qf.evaluateFromStatistics(*statistics), task) optimistic_estimate = task.qf.optimisticEstimateFromStatistics( *statistics) if isinstance( task.qf, m.BoundedInterestingnessMeasure) else float("inf") else: ut.addIfRequired(result, sg, task.qf.evaluateFromDataset(task.data, sg), task) optimistic_estimate = task.qf.optimisticEstimateFromDataset( task.data, sg) if isinstance( task.qf, m.BoundedInterestingnessMeasure) else float("inf") # compute refinements and fill the queue if (len(candidate_description) < task.depth and optimistic_estimate >= ut.minimumRequiredQuality( result, task)): # iterate over all selectors that are behind the last selector contained in the evaluated candidate according to the initial order index_of_last_selector = min( task.searchSpace.index(candidate_description[-1]), len(task.searchSpace) - 1) for sel in islice(task.searchSpace, index_of_last_selector + 1, None): new_description = candidate_description + [sel] heappush(queue, (-optimistic_estimate, new_description)) result.sort(key=lambda x: x[0], reverse=True) return result