def testinducer(self): # A good example of very poorly written and uninspired unit test # Regression tests should take care of this data = orange.ExampleTable("zoo") rules = orange.AssociationRulesInducer(data, support=0.5, confidence=0.9) rules2 = orange.AssociationRulesInducer(support=0.5, confidence=0.9)(data) self.assertEqual(rules, rules2) data = orange.ExampleTable("iris") self.assertRaises(TypeError, orange.AssociationRulesInducer, data)
def generateRules(self): self.error() self.warning(0) if self.dataset: if self.dataset and self.useSparseAlgorithm and not self.datasetIsSparse: self.warning( 0, "Using algorithm for sparse data, but data does not appear to be sparse!" ) try: num_steps = 20 for i in range(num_steps): build_support = (i == num_steps - 1) and self.minSupport / 100. or ( 1 - float(i) / num_steps * (1 - self.minSupport / 100.0)) if self.useSparseAlgorithm: rules = orange.AssociationRulesSparseInducer( self.dataset, support=build_support, confidence=self.minConfidence / 100., storeExamples=True) else: rules = orange.AssociationRulesInducer( self.dataset, support=build_support, confidence=self.minConfidence / 100., classificationRules=self.classificationRules, storeExamples=True) if len(rules) >= self.maxRules: break self.send("Association Rules", rules) except orange.KernelException, (errValue): self.error(str(errValue)) self.send("Association Rules", None)
def startMining(self, var, sup): data = orange.ExampleTable("data/finalData.csv") #was47 data = data.select(range(44)) minSupport = float(sup) rules = orange.AssociationRulesInducer(data, support=minSupport, max_item_sets=30000000) print "%i rules with support higher than or equal to %5.3f found." % ( len(rules), minSupport) orngAssoc.printRules(rules[:10], ["support", "confidence"])
def findItemsets(self): self.error() if self.dataset: try: if self.useSparseAlgorithm: self.itemsets = orange.AssociationRulesSparseInducer(support = self.minSupport/100., storeExamples = True).getItemsets(self.dataset) else: self.itemsets = orange.AssociationRulesInducer(support = self.minSupport/100., storeExamples = True).getItemsets(self.dataset) self.send("Itemsets", (self.dataset, self.itemsets)) except Exception, (errValue): errValue = str(errValue) if "non-discrete attributes" in errValue and not self.useSparseAlgorithm: errValue += "\nTry using the algorithm for sparse data" self.error(str(errValue)) self.send("Itemsets", None)
def generateRules(self): self.error() if self.dataset: try: num_steps = 20 for i in range(num_steps): build_support = (i == num_steps-1) and self.minSupport/100. or (1 - float(i) / num_steps * (1 - self.minSupport/100.0)) if self.useSparseAlgorithm: rules = orange.AssociationRulesSparseInducer(self.dataset, support = build_support, confidence = self.minConfidence/100., storeExamples = True) else: rules = orange.AssociationRulesInducer(self.dataset, support = build_support, confidence = self.minConfidence/100., classificationRules = self.classificationRules, storeExamples = True) if len(rules) >= self.maxRules: break self.send("Association Rules", rules) except orange.KernelException as errValue: self.error(str(errValue)) self.send("Association Rules", None) else: self.send("Association Rules", None)
def startMining(self, var): data = orange.ExampleTable("data/finalData.csv") #was47 data = data.select(range(44)) minSupport = 0.4 rules = orange.AssociationRulesInducer(data, support = minSupport, max_item_sets = 30000000) orig_stdout = sys.stdout f = open('results/{}_assocrules.txt'.format(var), 'w') sys.stdout = f print "%i rules with support higher than or equal to %5.3f found." % (len(rules), minSupport) orngAssoc.printRules(rules[:10], ["support", "confidence"]) > f sys.stdout = orig_stdout f.close()
self.supp_min = min(supps) self.supp_max = max(supps) del supps confs = [rule.confidence for rule in self.rules] self.conf_min = min(confs) self.conf_max = max(confs) del confs self.checkScale() else: self.supp_min, self.supp_max = self.conf_min, self.conf_max = 0., 1. self.supp_allmin, self.supp_allmax, self.conf_allmin, self.conf_allmax = self.supp_min, self.supp_max, self.conf_min, self.conf_max self.rezoom(self.supp_allmin, self.supp_allmax, self.conf_allmin, self.conf_allmax) if __name__ == "__main__": a = QApplication(sys.argv) ow = OWAssociationRulesViewer() dataset = orange.ExampleTable('../../doc/datasets/car.tab') rules = orange.AssociationRulesInducer(dataset, minSupport=0.3, maxItemSets=15000) ow.arules(rules) ow.show() a.exec_() ow.saveSettings()
import orange data = orange.ExampleTable("lenses") rules = orange.AssociationRulesInducer(data, support=0.3, storeExamples=True) rule = rules[0] print print "Rule: ", rule print print rule print "Match left: " print "\n".join(str(rule.examples[i]) for i in rule.matchLeft) print "\nMatch both: " print "\n".join(str(rule.examples[i]) for i in rule.matchBoth) inducer = orange.AssociationRulesInducer(support=0.3, storeExamples=True) itemsets = inducer.getItemsets(data) print itemsets[8]
# Description: Association rule sorting and filtering # Category: description # Uses: imports-85 # Classes: orngAssoc.build, Preprocessor_discretize, EquiNDiscretization # Referenced: assoc.htm import orange, orngAssoc data = orange.ExampleTable("imports-85") data = orange.Preprocessor_discretize(data, \ method=orange.EquiNDiscretization(numberOfIntervals=3)) data = data.select(range(10)) rules = orange.AssociationRulesInducer(data, support=0.4) n = 5 print "%i most confident rules:" % (n) orngAssoc.sort(rules, ["confidence", "support"]) orngAssoc.printRules(rules[0:n], ['confidence', 'support', 'lift']) conf = 0.8 lift = 1.1 print "\nRules with confidence>%5.3f and lift>%5.3f" % (conf, lift) rulesC = rules.filter(lambda x: x.confidence > conf and x.lift > lift) orngAssoc.sort(rulesC, ['confidence']) orngAssoc.printRules(rulesC, ['confidence', 'support', 'lift'])
import orange data = orange.ExampleTable("lenses") print "\nAssociation rules" rules = orange.AssociationRulesInducer(data, support=0.3) for r in rules: print "%5.3f %5.3f %s" % (r.support, r.confidence, r) print "\nClassification rules" rules = orange.AssociationRulesInducer(data, support=0.3, classificationRules=1) for r in rules: print "%5.3f %5.3f %s" % (r.support, r.confidence, r)
def __call__(self, data, targetClass, max_rules=0): '''Returns the Apriori-C classifier.''' data_discretized = False # If any of the attributes are continuous, discretize them if data.domain.hasContinuousAttributes(): original_data = data data_discretized = True new_domain = [] discretize = orange.EntropyDiscretization(forceAttribute=True) for attribute in data.domain.attributes: if attribute.varType == orange.VarTypes.Continuous: d_attribute = discretize(attribute, data) # An attribute is irrelevant, if it is discretized into a single interval # if len(d_attribute.getValueFrom.transformer.points) > 0: new_domain.append(d_attribute) else: new_domain.append(attribute) data = original_data.select(new_domain + [original_data.domain.classVar]) self.data = data self.rulesSD = [] # build association classification rules rules = orange.AssociationRulesInducer(data, support=self.minSup, classificationRules=1, maxItemSets=10000000) #_______________________________ post-processing step 1 # select rules that classify in the target class right = orange.Example( data.domain, [orange.Value(orange.VarTypes.Discrete, orange.ValueTypes.DK)] * len(data.domain)) right.setclass(targetClass) rules = rules.filter(lambda rule: rule.right == right) # select rules with confidence >= minConfidence rules = rules.filter(lambda rule: rule.confidence >= self.minConf) #________________________________ post processing step 2 # weighted covering self.data.addMetaAttribute( self.weightID) # set weights of all examples to 1 bestRuleWRacc = 100 while len(rules) > 0 and self.uncoveredExamples( ) > 0 and bestRuleWRacc > 0 and (max_rules == 0 or len(self.rulesSD) < max_rules): (bestRule, bestRuleWRacc) = self.findBestRule(rules) rules.remove(bestRule) self.removeSimilarRules(bestRule, rules) self.decreaseExampleWeights(bestRule) self.rulesSD.append(bestRule) #____________________________ transform rules to SD format beam = [] targetClassRule = SDRule(data, targetClass, conditions=[], g=1) for r in self.rulesSD: cond = [] for i in range(len(r.left)): if not orange.Value.is_DC(r.left[i]): cond.append( orange.ValueFilter_discrete( position=i, values=[ orange.Value(data.domain.attributes[i], r.left[i]) ])) rSD = SDRule(data, targetClass, cond) beam.append(rSD) if data_discretized: targetClassRule = SDRule(original_data, targetClass, conditions=[], g=1) # change beam so the rules apply to original data beam = [rule.getUndiscretized(original_data) for rule in beam] else: targetClassRule = SDRule(data, targetClass, conditions=[], g=1) return SDRules(beam, targetClassRule, "Apriori-SD")