def construct_new_rule(self, rule, idx, ddist, bdist, attr, used, negate): if attr.var_type == Orange.feature.Type.Discrete: keys = [k for k,v in ddist.items() if v > 0] fanout = len(keys) / 5 if len(keys) / self.fanout > 5 else self.fanout for keyblock in block_iter(keys, fanout): new_rule = rule.cloneAndAddCondition(attr, keyblock, used=used, negate=negate) new_rule.parent_rule = rule if new_rule not in self.bad_rules: yield new_rule else: avgv = Orange.data.Value(bdist.variable, bdist.avg) minv = Orange.data.Value(bdist.variable, bdist.min) - 0.5 maxv = Orange.data.Value(bdist.variable, bdist.max) + 0.5 if used: minv, maxv = None, None for cond in rule.filter.conditions: if cond.position == idx: minv = max(cond.min, minv) if minv else cond.min maxv = min(cond.max, maxv) if maxv else cond.max block = (maxv - minv) / self.fanout ranges = [(minv + i*block, minv + (i+1)*block) for i in xrange(self.fanout)] for minv, maxv in ranges: if minv == maxv: # edge case continue new_rule = rule.cloneAndAddContCondition(attr, minv, maxv, orange.ValueFilter.Between, used=used, negate=negate) new_rule.parent_rule = rule if new_rule not in self.bad_rules: yield new_rule
def construct_new_rule(self, rule, idx, ddist, bdist, attr, used, negate): if attr.var_type == Orange.feature.Type.Discrete: matches = filter(lambda c: idx == c.position, rule.filter.conditions) if matches: keys = [attr.values[int(v)] for v in matches[0].values] else: keys = [k for k,v in ddist.items() if v > 0] if len(keys) <= 1: return fanout = len(keys) / 5 if len(keys) / self.fanout > 5 else self.fanout fanout = self.fanout #fanout = 60 fanout = min(fanout, len(keys))#fanout) #fanout = len(keys) for keyblock in block_iter(keys, fanout): new_rule = rule.cloneAndAddCondition(attr, keyblock, used=used, negate=negate) new_rule.parent_rule = rule if new_rule not in self.bad_rules: yield new_rule else: minv = bdist.percentile(0) maxv = bdist.percentile(100) avgv = bdist.percentile(50) #avgv = Orange.data.Value(bdist.variable, bdist.avg) #minv = Orange.data.Value(bdist.variable, bdist.min)# - 0.5 #maxv = Orange.data.Value(bdist.variable, bdist.max)# + 0.5 if minv == maxv: return if used: # This shouldn't be an issues because rule.examples # should filetr the distribution correctly minv, maxv = None, None for cond in rule.filter.conditions: if cond.position == idx: minv = max(cond.min, minv) if minv else cond.min maxv = min(cond.max, maxv) if maxv else cond.max #block = (maxv - minv) / self.fanout #ranges = [(minv + i*block, minv + (i+1)*block) for i in xrange(self.fanout)] ranges = [[minv, avgv], [avgv, maxv]] for minv, maxv in ranges: if minv == maxv: # edge case continue new_rule = rule.cloneAndAddContCondition(attr, minv, maxv, orange.ValueFilter.Between, used=used, negate=negate) new_rule.parent_rule = rule if new_rule in self.bad_rules: continue #if len(new_rule.examples) == len(rule.examples): # continue yield new_rule