# Description: Uses cross-validation to compare regression tree and k-nearest neighbors # Category: modelling, evaluation # Uses: housing # Classes: orngStat.MSE, orngTest.crossValidation, MajorityLearner, orngTree.TreeLearner, orange.kNNLearner # Referenced: regression.htm import orange, orngTree, orngTest, orngStat data = orange.ExampleTable("housing.tab") maj = orange.MajorityLearner() maj.name = "default" rt = orngTree.TreeLearner(measure="retis", mForPruning=2, minExamples=20) rt.name = "regression tree" k = 5 knn = orange.kNNLearner(k=k) knn.name = "k-NN (k=%i)" % k learners = [maj, rt, knn] data = orange.ExampleTable("housing.tab") results = orngTest.crossValidation(learners, data, folds=10) mse = orngStat.MSE(results) print "Learner MSE" for i in range(len(learners)): print "%-15s %5.3f" % (learners[i].name, mse[i])
def __call__(self, data, y=None, x=None, nc=None, weight=None, **kwds): if y == None: y = [data.domain.classVar] if x == None: x = [v for v in data.domain.variables if v not in y] Ncomp = nc if nc is not None else len(x) dataX = orange.ExampleTable(orange.Domain(x, False), data) dataY = orange.ExampleTable(orange.Domain(y, False), data) # transformation to numpy arrays X = dataX.toNumpy()[0] Y = dataY.toNumpy()[0] # data dimensions n, mx = numpy.shape(X) my = numpy.shape(Y)[1] # Z-scores of original matrices YMean = numpy.mean(Y, axis=0) YStd = numpy.std(Y, axis=0) XMean = numpy.mean(X, axis=0) XStd = numpy.std(X, axis=0) #FIXME: standard deviation should never be 0. Ask Lan, if the following #fix is ok. XStd = numpy.maximum(XStd, 10e-16) YStd = numpy.maximum(YStd, 10e-16) X = (X - XMean) / XStd Y = (Y - YMean) / YStd P = numpy.empty((mx, Ncomp)) C = numpy.empty((my, Ncomp)) T = numpy.empty((n, Ncomp)) U = numpy.empty((n, Ncomp)) B = numpy.zeros((Ncomp, Ncomp)) W = numpy.empty((mx, Ncomp)) E, F = X, Y # main algorithm for i in range(Ncomp): u = numpy.random.random_sample((n, 1)) #FIXME random seed? w = normalize(dot(E.T, u)) t = normalize(dot(E, w)) dif = t # iterations for loading vector t while numpy.linalg.norm(dif) > 10e-16: c = normalize(dot(F.T, t)) u = dot(F, c) w = normalize(dot(E.T, u)) t0 = normalize(dot(E, w)) dif = t - t0 t = t0 #print "T", T #print "X*W", numpy.dot(X,W) T[:, i] = t.T U[:, i] = u.T C[:, i] = c.T W[:, i] = w.T b = dot(t.T, u)[0] B[i][i] = b p = dot(E.T, t) P[:, i] = p.T E = E - dot(t, p.T) F = F - b * dot(t, c.T) # esimated Y YE = dot(dot(T, B), C.T) * YStd + YMean Y = Y * numpy.std(Y, axis=0) + YMean BPls = dot(dot(numpy.linalg.pinv(P.T), B), C.T) partial = {} if self.save_partial: partial["T"] = T partial["U"] = U partial["C"] = C partial["W"] = W partial["P"] = P return PLSRegression(domain=data.domain, BPls=BPls, YMean=YMean, YStd=YStd, XMean=XMean, XStd=XStd, name=self.name, **partial)
self.progressBarFinished() self.classifier.name = self.LearnerName self.classifier.setattr("data", self.data) if self.data.domain.classVar: self.send("Classifier", self.classifier) self.send("SOM", self.classifier) def sendReport(self): self.reportSettings( "Topology", [("Shape", ["hexagonal", "rectangular"][self.topology]), ("Size", "%i columns, %i rows" % (self.xdim, self.ydim))]) self.reportSettings( "Optimization", [("Initialization", ["linear", "random"][self.initialization]), ("Neighborhood", ["Gaussian", "bubble"][self.neighborhood]), ("Radius", "initial: %i, final: %i" % (self.radius1, self.radius2)), ("Number of iterations", self.iterations1)]) if __name__ == "__main__": app = QApplication(sys.argv) w = OWSOM() ## app.setMainWidget(w) w.show() data = orange.ExampleTable("../../doc/datasets/iris.tab") w.setData(data) app.exec_()
# Description: Creates a list of association rules, selects five rules and prints them out # Category: description # Uses: imports-85 # Classes: orngAssoc.build, Preprocessor_discretize, EquiNDiscretization # Referenced: assoc.htm import orange, orngAssoc data = orange.ExampleTable("imports-85") data = orange.Preprocessor_discretize(data, \ method=orange.EquiNDiscretization(numberOfIntervals=3)) data = data.select(range(10)) rules = orange.AssociationRulesInducer(data, support=0.4) print "%i rules with support higher than or equal to %5.3f found.\n" % (len(rules), 0.4) orngAssoc.sort(rules, ["support", "confidence"]) orngAssoc.printRules(rules[:5], ["support", "confidence"]) print del rules[:3] orngAssoc.printRules(rules[:5], ["support", "confidence"]) print
def __call__(self, newRule, examples, weightID, targetClass, prior): N = len(examples) ny = len(filter(lambda e: e.getclass() == targetClass, examples)) N1 = n1x = n1xy = 0 for e in examples: tmp = e.getweight(weightID) N1 += tmp if newRule.filter(e): n1x += tmp if e.getclass() == targetClass: n1xy += tmp wracc = n1xy / N1 - ny * n1x / (N1 * N) return wracc if __name__ == "__main__": filename = "..\\..\\doc\\datasets\\lenses.tab" if 'linux' in sys.platform: filename = "/usr/doc/orange/datasets/lenses.tab" data = orange.ExampleTable(filename) print print print learner = CN2_SD(3) targetClass = orange.Value(data.domain["lenses"], "none") rules = learner(data, targetClass, 10) print "____________________SN2-SD results______________________" rules.printRules()
def makeTable(data, engine, tagLayer, useFarAway=False): table = orange.ExampleTable(engine.domain()) if engine.name() in skipMap: skipList = skipMap[engine.name()] else: skipList = [] labeledCount = 0 negativeCount = 0 farAwayCount = 0 for engineName, landmarkName, geometry in data: #geometry["landmark"] = createLandmarkPt(math2d.centroid(geometry["landmark"])) if not (engineName in skipList): # or True: try: geometry["landmark"] = geometry["ground"] ex = engine.makeExample(**geometry) except preposition.InsaneExample: continue except: print "dc", engineName print "dc", geometry print "dc", landmarkName raise if engineName == engine.name(): if engineName == "down" and False: print "doing down differently" if landmarkName == "hallway": cls = "True" labeledCount += 1 else: continue #cls = "False" #negativeCount -= 1 else: cls = "True" labeledCount += 1 else: cls = "False" negativeCount += 1 ex['class'] = cls ex['sourceEngineName'] = engineName ex['engineName'] = engine.name() ex['landmarkName'] = landmarkName ex['farAway'] = False table.append(ex) if useFarAway and engine.name() != "through" and engine.name() != "down": for name, landmark in tagLayer: centroid1 = math2d.centroid(landmark) for engineName, landmarkName, geometry in data: if engineName == engine.name(): centroid2 = math2d.centroid(geometry["landmark"]) d1 = math2d.dist(centroid1, centroid2) d2 = math2d.length(geometry["figure"]) if d1 > d2: ex = engine.makeExample(figure=geometry["figure"], landmark=landmark) ex['class'] = "False" ex['landmarkName'] = landmarkName ex['sourceEngineName'] = engineName ex['engineName'] = engine.name() ex['farAway'] = True table.append(ex) farAwayCount += 1 if farAwayCount >= 100: break for ex in table: ex['drawMap'] = None #ex['geometry'] = None print "counts" print labeledCount, "labeled examples." print negativeCount, "negative examples." print farAwayCount, "far away examples." return table
# Classes: ExampleTable # Uses: iris, heart_disease # Referenced: ExampleTable.htm import orange import orange, random random.seed(0) values = ["0", "1"] mynames = ["orange", "green", "red", "yellow", "black", "magenta"] attributes = [orange.EnumVariable(mynames[i], values=values) for i in range(6)] classattr = orange.EnumVariable("classname", values=["0", "1"]) domain = orange.Domain(attributes + [classattr]) print "attributes", attributes print "classattr", classattr print "domain:", domain card = [1, 1, 1, 1, 1, 1] data = orange.ExampleTable(domain) for i in range(5): ex = [random.randint(0, c) for c in card] ex.append(ex[0] == ex[1] or ex[4] == 0) data.append(ex) for ex in data: print ex classifier = orange.BayesLearner(data) print classifier(data[0], orange.GetBoth)
# Description: Add a new attribute price to a car data set, compute it from two existing attributes (buying, maint) # Category: preprocessing # Uses: car # Classes: Domain, Value, getValueFrom, EnumVariable # Referenced: domain.htm import orange data = orange.ExampleTable('../datasets/car') # add attribute price = f(buying, maint) # see also http://www.ailab.si/hint/car_dataset.asp priceTable = {} priceTable['v-high:v-high'] = 'v-high' priceTable['high:v-high'] = 'v-high' priceTable['med:v-high'] = 'high' priceTable['low:v-high'] = 'high' priceTable['v-high:high'] = 'v-high' priceTable['high:high'] = 'high' priceTable['med:high'] = 'high' priceTable['low:high'] = 'med' priceTable['v-high:med'] = 'high' priceTable['high:med'] = 'high' priceTable['med:med'] = 'med' priceTable['low:med'] = 'low' priceTable['v-high:low'] = 'high' priceTable['high:low'] = 'high' priceTable['med:low'] = 'low' priceTable['low:low'] = 'low'
# Description: Shows how to sample example by random divisions into two groups # Category: sampling # Classes: MakeRandomIndices, MakeRandomIndices2, RandomGenerator # Uses: lenses # Referenced: RandomIndices.htm import orange data = orange.ExampleTable("lenses") indices2 = orange.MakeRandomIndices2(p0=6) ind = indices2(data) print ind data0 = data.select(ind, 0) data1 = data.select(ind, 1) print len(data0), len(data1) print "\nIndices without playing with random generator" for i in range(5): print indices2(data) print "\nIndices with random generator" indices2.randomGenerator = orange.RandomGenerator(42) for i in range(5): print indices2(data) print "\nIndices with randseed" indices2.randomGenerator = None indices2.randseed = 42 for i in range(5):
# Description: Learn decision tree from data and output class probabilities for first few instances # Category: modelling # Uses: voting.tab # Classes: orngTree.TreeLearner # Referenced: c_otherclass.htm import orange, orngTree data = orange.ExampleTable("voting") tree = orngTree.TreeLearner(data, sameMajorityPruning=1, mForPruning=2) print "Possible classes:", data.domain.classVar.values print "Probabilities for democrats:" for i in range(5): p = tree(data[i], orange.GetProbabilities) print "%d: %5.3f (originally %s)" % (i + 1, p[1], data[i].getclass()) print orngTree.printTxt(tree) orngTree.printDot(tree, fileName='tree.dot', internalNodeShape="ellipse", leafShape="box")
supps = [rule.support for rule in self.rules] self.supp_min = min(supps) self.supp_max = max(supps) del supps confs = [rule.confidence for rule in self.rules] self.conf_min = min(confs) self.conf_max = max(confs) del confs self.checkScale() else: self.supp_min, self.supp_max = self.conf_min, self.conf_max = 0., 1. self.supp_allmin, self.supp_allmax, self.conf_allmin, self.conf_allmax = self.supp_min, self.supp_max, self.conf_min, self.conf_max self.rezoom(self.supp_allmin, self.supp_allmax, self.conf_allmin, self.conf_allmax) if __name__=="__main__": a=QApplication(sys.argv) ow=OWAssociationRulesViewer() dataset = orange.ExampleTable('../../doc/datasets/car.tab') rules=orange.AssociationRulesInducer(dataset, minSupport = 0.3, maxItemSets=15000) ow.arules(rules) ow.show() a.exec_() ow.saveSettings()
# Description: Shows what the contingency matrix looks like and which are its common methods # Category: statistics # Classes: Contingency, ContingencyAttrClass # Uses: monk1 # Referenced: contingency.htm import orange data = orange.ExampleTable("monk1") cont = orange.ContingencyAttrClass("e", data) print "Contingency items:" for val, dist in cont.items(): print val, dist print print "Contingency keys: ", cont.keys() print "Contingency values: ", cont.values() print "Contingency items: ", cont.items() print print "cont[0] =", cont[0] print 'cont[\"1\"] =', cont["1"] print 'cont[orange.Value(data.domain["e"], "1")] =', cont[orange.Value( data.domain["e"], "1")] print print "Iteration through contingency:" for i in cont: print i print
def getSelectionsAsExampleTables(self, attrList, useAnchorData=1, addProjectedPositions=0): return (None, None) # TODO: this is disabled for now if not self.have_data: return (None, None) selected = self.get_selected_indices() if addProjectedPositions == 0 and not numpy.any(selected): return (None, self.raw_data) if (useAnchorData and len(self.anchor_data) < 3) or len(attrList) < 3: return (None, None) x_attr = orange.FloatVariable("X Positions") y_attr = orange.FloatVariable("Y Positions") z_attr = orange.FloatVariable("Z Positions") if addProjectedPositions == 1: domain = orange.Domain([x_attr, y_attr, z_attr] + [v for v in self.data_domain.variables]) elif addProjectedPositions == 2: domain = orange.Domain(self.data_domain) domain.addmeta(orange.newmetaid(), x_attr) domain.addmeta(orange.newmetaid(), y_attr) domain.addmeta(orange.newmetaid(), z_attr) else: domain = orange.Domain(self.data_domain) domain.addmetas(self.data_domain.getmetas()) if useAnchorData: indices = [ self.attribute_name_index[val[3]] for val in self.anchor_data ] else: indices = [self.attribute_name_index[label] for label in attrList] valid_data = self.getValidList(indices) if len(valid_data) == 0: return (None, None) array = self.create_projection_as_numeric_array( attrList, scaleFactor=self.scaleFactor, useAnchorData=useAnchorData, removeMissingData=0) if array == None: return (None, None) unselected = numpy.logical_not(selected) selected_indices, unselected_indices = list(selected), list(unselected) if addProjectedPositions: selected = orange.ExampleTable( domain, self.raw_data.selectref(selected_indices)) unselected = orange.ExampleTable( domain, self.raw_data.selectref(unselected_indices)) selected_index = 0 unselected_index = 0 for i in range(len(selected_indices)): if selected_indices[i]: selected[selected_index][x_attr] = array[i][0] selected[selected_index][y_attr] = array[i][1] selected[selected_index][z_attr] = array[i][2] selected_index += 1 else: unselected[unselected_index][x_attr] = array[i][0] unselected[unselected_index][y_attr] = array[i][1] unselected[unselected_index][z_attr] = array[i][2] else: selected = self.raw_data.selectref(selected_indices) unselected = self.raw_data.selectref(unselected_indices) if len(selected) == 0: selected = None if len(unselected) == 0: unselected = None return (selected, unselected)
import orange data = orange.ExampleTable("inquisition") rules = orange.AssociationRulesSparseInducer(data, support = 0.5, storeExamples = True) rule0 = rules[10] print "Rule:", rule0 print "Match left: " print [rule0.examples[i] for i in rule0.matchLeft] print "\nMatch both: " print [rule0.examples[i] for i in rule0.matchBoth] inducer = orange.AssociationRulesSparseInducer(support = 0.5) itemsets = inducer.getItemsets(data) print itemsets[5]
import orange import orngCN2 data = orange.ExampleTable("titanic.tab") # create learner learner = orange.RuleLearner() cl = learner(data) for r in cl.rules: print orngCN2.ruleToString(r) print "*****" learner.ruleFinder = orange.RuleBeamFinder() learner.ruleFinder.evaluator = orngCN2.mEstimate(m=50) cl = learner(data) for r in cl.rules: print orngCN2.ruleToString(r) print "****" learner.ruleFinder.ruleStoppingValidator = orange.RuleValidator_LRS( alpha=0.01, min_coverage=10, max_rule_complexity=2) learner.ruleFinder.ruleFilter = orange.RuleBeamFilter_Width(width=50) cl = learner(data) for r in cl.rules: print orngCN2.ruleToString(r)
def dataset(self, data): #self.data=data self.data = self.isDataWithClass( data, orange.VarTypes.Discrete) and data or None self.setLearner() def qualityButtonPressed(self, id=0): self.QualityButton = id for i in range(len(self.ruleQualityBG.buttons)): self.ruleQualityBG.buttons[i].setChecked(id == i) self.mSpin.control.setEnabled(id == 1) def coveringAlgButtonPressed(self, id=0): self.CoveringButton = id for i in range(len(self.coveringAlgBG.buttons)): self.coveringAlgBG.buttons[i].setChecked(id == i) self.weightSpin.control.setEnabled(id == 1) def applySettings(self): self.setLearner() if __name__ == "__main__": app = QApplication(sys.argv) w = OWCN2() #w.dataset(orange.ExampleTable("titanic.tab")) w.dataset( orange.ExampleTable(r"E:\Development\Orange Datasets\UCI\titanic.tab")) w.show() app.exec_() w.saveSettings()
def nway(): engine_to_examples = {} trainer = Trainer() classes = set() for i, key in enumerate(trainer.annotationEngines): engine = trainer.engineMap[key] table = trainer.makeTable(engine) for ex in table: if ex["farAway"].value: cls = "null" else: cls = ex["sourceEngineName"].value geometry = ex["geometry"].value engine_to_examples.setdefault(cls, []) classes.add(cls) examples = [ trainer.engineMap[key].makeExample(expectInsane=True, **geometry) for key in trainer.annotationEngines if not len(geometry["figure"]) == 0 ] engine_to_examples[cls].append(examples) if i >= 1: #break pass variables = [] for ex in examples: for attr in ex.domain: if attr.name == "class": continue new_attr = orange.FloatVariable(attr.name) variables.append(new_attr) domain = orange.Domain(variables, orange.EnumVariable("class", values=list(classes))) table = orange.ExampleTable(domain) for engine_name, example_lists in engine_to_examples.iteritems(): for example_list in example_lists: ex = orange.Example(domain) for engine_ex in example_list: for attr in engine_ex.domain: ex[attr.name] = engine_ex[attr.name] ex["class"] = engine_name table.append(ex) print "domain", domain cv_indices = orange.MakeRandomIndices2(table, p0=0.75) training = table.select(cv_indices, 0, negate=True) testing = table.select(cv_indices, 0, negate=False) #classifier = orngBayes.BayesLearner(training) classifier = orangePickle.PickleableClassifier(training, orngBayes.BayesLearner) results = orngTest.testOnData([classifier], testing) print orngStat.CA(results) cm = orngStat.confusionMatrices(results)[0] classes = list(domain.classVar.values) print " ", " ".join([c.rjust(12) for c in classes + ["", ""]]) for className, classConfusions in zip(classes, cm): #format = ("%s" + ("\t%i" * len(classes))) values = (className, ) + tuple(classConfusions) print " ".join([str(c).rjust(12) for c in values]) #print format % values for name in classes: classIndex = classes.index(name) mpl.figure() rocCurve(results, "", classIndex, stepSize=0.001, plotArgs=dict(linewidth=5, markersize=10)) mpl.title(name, size=30) mpl.xlabel("FP", fontsize=30) mpl.ylabel("TP", fontsize=30) mpl.xticks([0, 1], fontsize=17) mpl.yticks([0, 1], fontsize=17) fname = "nway.pck" print "saving", fname with open(fname, "w") as f: pickle.dump(classifier, f, protocol=2) mpl.show()
def setLearner(self): if hasattr(self, "btnApply"): self.btnApply.setFocus() #progress bar self.progressBarInit() #learner self.learner = orngCN2.CN2UnorderedLearner() self.learner.name = self.name self.learner.progressCallback = CN2ProgressBar(self) self.send("Learner", self.learner) ruleFinder = orange.RuleBeamFinder() if self.QualityButton == 0: ruleFinder.evaluator = orange.RuleEvaluator_Laplace() elif self.QualityButton == 1: ruleFinder.evaluator = orngCN2.mEstimate(self.m) elif self.QualityButton == 2: ruleFinder.evaluator = orngCN2.WRACCEvaluator() if self.useMaxRuleLength: maxRuleLength = self.MaxRuleLength else: maxRuleLength = -1 ruleFinder.ruleStoppingValidator = orange.RuleValidator_LRS( alpha=self.stepAlpha, min_coverage=self.MinCoverage, max_rule_complexity=maxRuleLength) ruleFinder.validator = orange.RuleValidator_LRS( alpha=self.Alpha, min_coverage=self.MinCoverage, max_rule_complexity=maxRuleLength) ruleFinder.ruleFilter = orange.RuleBeamFilter_Width( width=self.BeamWidth) self.learner.ruleFinder = ruleFinder if self.CoveringButton == 0: self.learner.coverAndRemove = orange.RuleCovererAndRemover_Default( ) elif self.CoveringButton == 1: self.learner.coverAndRemove = orngCN2.CovererAndRemover_multWeights( mult=self.Weight) self.classifier = None self.error() if self.data: oldDomain = orange.Domain(self.data.domain) learnData = orange.ExampleTable(oldDomain, self.data) self.classifier = self.learner(learnData) self.classifier.name = self.name for r in self.classifier.rules: r.examples = orange.ExampleTable(oldDomain, r.examples) self.classifier.examples = orange.ExampleTable( oldDomain, self.classifier.examples) self.classifier.setattr("data", self.classifier.examples) self.error("") ## except orange.KernelException, (errValue): ## self.classifier=None ## self.error(errValue) ## except Exception: ## self.classifier=None ## if not self.data.domain.classVar: ## self.error("Classless domain.") ## elif self.data.domain.classVar.varType == orange.VarTypes.Continuous: ## self.error("CN2 can learn only from discrete class.") ## else: ## self.error("Unknown error") self.send("Classifier", self.classifier) self.send("Unordered CN2 Classifier", self.classifier) self.progressBarFinished()
labels = [m.name for m in d.domain.getmetas().values()] + \ [a.name for a in d.domain.variables] self.labelCombo.addItems(labels) # here we would need to use the domain dependent setting of the label id self.labelCombo.setCurrentIndex(0) self.Label = labels[0] self.setLabel() def dataset(self, data): if data and len(data.domain.attributes): self.data = data self.setLabelComboItems() self.computeMatrix() else: self.send("Distance Matrix", None) ################################################################################################## # test script if __name__ == "__main__": import os data = orange.ExampleTable(r'../../doc/datasets/glass') data = orange.ExampleTable('glass') a = QApplication(sys.argv) ow = OWExampleDistance() ow.show() ow.dataset(data) a.exec_() ow.saveSettings()
# Description: Shows how to derive a Python class from orange.TreeSplitConstructor # Category: classification, decision trees, callbacks to Python # Classes: TreeSplitConstructor, Classifier, SubsetsGenerator_constSize, orngMisc.BestOnTheFly # Uses: lenses # Referenced: callbacks.htm import orange, orngTree, orngMisc tab = orange.ExampleTable(r"lenses.tab") class CartesianClassifier(orange.Classifier): def __init__(self, var1, var2): self.var1 = var1 self.var2 = var2 self.noValues2 = len(var2.values) self.classVar = orange.EnumVariable("%sx%s" % (var1.name, var2.name)) self.classVar.values = [ "%s-%s" % (v1, v2) for v1 in var1.values for v2 in var2.values ] def __call__(self, ex, what=orange.Classifier.GetValue): val = ex[self.var1] * self.noValues2 + ex[self.var2] if what == orange.Classifier.GetValue: return orange.Value(self.classVar, val) probs = orange.DiscDistribution(self.classVar) probs[val] = 1.0 if what == orange.Classifier.GetProbabilities: return probs else: return (orange.Value(self.classVar, val), probs)
def setLearner(self, learner=None): self.learner = learner self.commit() def setData(self, data): self.data = data self.commit() def onChange(self): pass def commit(self): wrapped = None if self.learner: wrapped = self.METHODS[self.method][1](self.learner, t=self.t) self.send("Learner", wrapped) if self.data and wrapped: classifier = wrapped(self.data) self.send("Classifier", classifier) if __name__ == "__main__": app = QApplication(sys.argv) w = OWEnsemble() w.setLearner(orange.BayesLearner()) w.setData(orange.ExampleTable("../../doc/datasets/iris")) w.show() app.exec_()
def __call__(self, examples, weightID=0, **kwds): import orngTest, orngStat, statc self.__dict__.update(kwds) if self.removeThreshold < self.addThreshold: raise "'removeThreshold' should be larger or equal to 'addThreshold'" classVar = examples.domain.classVar indices = orange.MakeRandomIndicesCV(examples, folds=getattr(self, "folds", 10)) domain = orange.Domain([], classVar) res = orngTest.testWithIndices([self.learner], orange.ExampleTable(domain, examples), indices) oldStat = self.stat(res)[0] oldStats = [self.stat(x)[0] for x in orngStat.splitByIterations(res)] print(".", oldStat, domain) stop = False while not stop: stop = True if len(domain.attributes) >= 2: bestStat = None for attr in domain.attributes: newdomain = orange.Domain( [x for x in domain.attributes if x != attr], classVar) res = orngTest.testWithIndices( [self.learner], (orange.ExampleTable(newdomain, examples), weightID), indices) newStat = self.stat(res)[0] newStats = [ self.stat(x)[0] for x in orngStat.splitByIterations(res) ] print("-", newStat, newdomain) ## If stat has increased (ie newStat is better than bestStat) if not bestStat or cmp(newStat, bestStat) == self.statsign: if cmp(newStat, oldStat) == self.statsign: bestStat, bestStats, bestAttr = newStat, newStats, attr elif statc.wilcoxont( oldStats, newStats)[1] > self.removeThreshold: bestStat, bestAttr, bestStats = newStat, newStats, attr if bestStat: domain = orange.Domain( [x for x in domain.attributes if x != bestAttr], classVar) oldStat, oldStats = bestStat, bestStats stop = False print("removed", bestAttr.name) bestStat, bestAttr = oldStat, None for attr in examples.domain.attributes: if not attr in domain.attributes: newdomain = orange.Domain(domain.attributes + [attr], classVar) res = orngTest.testWithIndices( [self.learner], (orange.ExampleTable(newdomain, examples), weightID), indices) newStat = self.stat(res)[0] newStats = [ self.stat(x)[0] for x in orngStat.splitByIterations(res) ] print("+", newStat, newdomain) ## If stat has increased (ie newStat is better than bestStat) if cmp(newStat, bestStat) == self.statsign and statc.wilcoxont( oldStats, newStats)[1] < self.addThreshold: bestStat, bestStats, bestAttr = newStat, newStats, attr if bestAttr: domain = orange.Domain(domain.attributes + [bestAttr], classVar) oldStat, oldStats = bestStat, bestStats stop = False print("added", bestAttr.name) return self.learner(orange.ExampleTable(domain, examples), weightID)
# Description: Shows different uses of orange.Domain # Category: preprocessing # Uses: glass # Classes: Domain # Referenced: domain.htm import orange domain = orange.ExampleTable("glass").domain tests = ( '(["Na", "Mg"], domain)', '(["Na", "Mg"], 1, domain)', '(["Na", "Mg"], 0, domain)', '(["Na", "Mg"], domain.variables)', '(["Na", "Mg"], 1, domain.variables)', '(["Na", "Mg"], 0, domain.variables)', '([domain["Na"], "Mg"], 0, domain.variables)', '([domain["Na"], "Mg"], 0, domain)', '([domain["Na"], "Mg"], 0, domain.variables)', '([domain["Na"], domain["Mg"]], 0)', '([domain["Na"], domain["Mg"]], 1)', '([domain["Na"], domain["Mg"]], None)', '([domain["Na"], domain["Mg"]], orange.EnumVariable("something completely different"))', '(domain)', '(domain, 0)', '(domain, 1)', '(domain, "Mg")', '(domain, domain[0])', '(domain, None)', '(domain, orange.FloatVariable("nothing completely different"))') for args in tests: line = "orange.Domain%s" % args d = eval(line) print line print " classVar: %s" % d.classVar print " attributes: %s" % d.attributes
import orange import orngClustering def callback(km): print "Iteration: %d, changes: %d, score: %.4f" % (km.iteration, km.nchanges, km.score) data = orange.ExampleTable("iris") km = orngClustering.KMeans(data, 3, minscorechange=0, inner_callback=callback)
# Description: Builds a regression tree and prints it out # Category: modelling # Uses: housing # Classes: orngTree.TreeLearner # Referenced: regression.htm import orange, orngTree data = orange.ExampleTable("housing") rt = orngTree.TreeLearner(data, measure="retis", mForPruning=2, minExamples=20) orngTree.printTxt(rt, leafStr="%V %I")
# Description: Read data, show mean for continuous attributes and contingency matrix for nominal attributes # Category: description # Uses: adult_sample.tab # Classes: DomainContingency # Referenced: basic_exploration.htm import orange data = orange.ExampleTable("../datasets/adult_sample") print "Continuous attributes:" for a in range(len(data.domain.attributes)): if data.domain.attributes[a].varType == orange.VarTypes.Continuous: d = 0. n = 0 for e in data: if not e[a].isSpecial(): d += e[a] n += 1 print " %s, mean=%3.2f" % (data.domain.attributes[a].name, d / n) print "\nNominal attributes (contingency matrix for classes:", data.domain.classVar.values, ")" cont = orange.DomainContingency(data) for a in data.domain.attributes: if a.varType == orange.VarTypes.Discrete: print " %s:" % a.name for v in range(len(a.values)): sum = 0 for cv in cont[a][v]: sum += cv print " %s, total %d, %s" % (a.values[v], sum, cont[a][v]) print
def __call__(self, data, weight=None): if not self.use_attributes == None: new_domain = orange.Domain(self.use_attributes, data.domain.classVar) new_domain.addmetas(data.domain.getmetas()) data = orange.ExampleTable(new_domain, data) if self.stepwise and self.stepwise_before: use_attributes = stepwise(data, add_sig=self.add_sig, remove_sig=self.remove_sig) new_domain = orange.Domain(use_attributes, data.domain.classVar) new_domain.addmetas(data.domain.getmetas()) data = orange.ExampleTable(new_domain, data) # continuization (replaces discrete with continuous attributes) continuizer = orange.DomainContinuizer() continuizer.multinomialTreatment = continuizer.FrequentIsBase continuizer.zeroBased = True domain0 = continuizer(data) data = data.translate(domain0) if self.stepwise and not self.stepwise_before: use_attributes = stepwise(data, weight, add_sig=self.add_sig, remove_sig=self.remove_sig) new_domain = orange.Domain(use_attributes, data.domain.classVar) new_domain.addmetas(data.domain.getmetas()) data = orange.ExampleTable(new_domain, data) # missing values handling (impute missing) imputer = orange.ImputerConstructor_model() imputer.learnerContinuous = orange.MajorityLearner() imputer.learnerDiscrete = orange.MajorityLearner() imputer = imputer(data) data = imputer(data) # convertion to numpy A, y, w = data.toNumpy() # weights ?? if A == None: n = len(data) m = 0 else: n, m = numpy.shape(A) if self.beta0 == True: if A == None: X = numpy.ones([len(data), 1]) else: X = numpy.insert(A, 0, 1, axis=1) # adds a column of ones else: X = A # set weights W = numpy.identity(len(data)) if weight: for di, d in enumerate(data): W[di, di] = float(d[weight]) D = dot( dot(numpy.linalg.pinv(dot(dot(X.T, W), X)), X.T), W ) # adds some robustness by computing the pseudo inverse; normal inverse could fail due to singularity of the X.T*W*X beta = dot(D, y) yEstimated = dot(X, beta) # estimation # some desriptive statistisc muY, sigmaY = numpy.mean(y), numpy.std(y) muX, covX = numpy.mean(X, axis=0), numpy.cov(X, rowvar=0) # model statistics SST, SSR = numpy.sum((y - muY)**2), numpy.sum((yEstimated - muY)**2) SSE, RSquare = SST - SSR, SSR / SST R = numpy.sqrt(RSquare) # coefficient of determination RAdjusted = 1 - (1 - RSquare) * (n - 1) / (n - m - 1) F = (SSR / m) / (SST - SSR / (n - m - 1)) # F statistisc df = m - 1 sigmaSquare = SSE / (n - m - 1) # standard error of estimated coefficients errCoeff = sqrt(sigmaSquare * inv(dot(X.T, X)).diagonal()) # t statistisc, significance t = beta / errCoeff df = n - 2 significance = [] for tt in t: try: significance.append( statc.betai(df * 0.5, 0.5, df / (df + tt * tt))) except: significance.append(1.0) # standardized coefficients if m > 0: stdCoeff = (sqrt(covX.diagonal()) / sigmaY) * beta else: stdCoeff = (sqrt(covX) / sigmaY) * beta model = { 'descriptives': { 'meanX': muX, 'covX': covX, 'meanY': muY, 'sigmaY': sigmaY }, 'model': { 'estCoeff': beta, 'stdErrorEstimation': errCoeff }, 'model summary': { 'TotalVar': SST, 'ExplVar': SSE, 'ResVar': SSR, 'R': R, 'RAdjusted': RAdjusted, 'F': F, 't': t, 'sig': significance } } return LinearRegression(statistics=model, domain=data.domain, name=self.name, beta0=self.beta0, imputer=imputer)
print "Attributes in favor of %s = %s [%f]" % ( t.domain.classVar.name, t.domain.classVar.values[1], m.probfunc(m.example_c[idx][0])) printpie(e1, m.probfunc(m.example_c[idx][0])) print "\nProjection of the example in the basis space:" j = 0 for i in range(len(m.coeff_names)): print m.coeff_names[i][0], ':' for x in m.coeff_names[i][1:]: print '\t', x, '=', vector[j] j += 1 print "beta:", -m.beta #t = orange.ExampleTable('c:/proj/domains/voting.tab') # discrete t = orange.ExampleTable( r"E:\Development\Orange Datasets\UCI\shuttle.tab") # discrete #t = orange.ExampleTable('c_cmc.tab') # continuous print "NAIVE BAYES" print "===========" bl = orange.BayesLearner() bl.estimatorConstructor = orange.ProbabilityEstimatorConstructor_Laplace() # prevent too many estimation points # increase the smoothing level bl.conditionalEstimatorConstructorContinuous = orange.ConditionalProbabilityEstimatorConstructor_loess( windowProportion=0.5, nPoints=10) c = bl(t) printmodel(t, c, printexamples=0) print "\n\nLOGISTIC REGRESSION"
## data = orange.ExampleTable(r'..\..\doc\datasets\adult_sample.tab') ## dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\sterolgene v.0 mouse probeRatios.tab') ## dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\Copy of sterolgene v.0 mouse probeRatios.tab') ## dataB = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\sterolgene v.0 mouse probeRatios.tab') dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\et1.tab') dataB = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\et2.tab') a=QApplication(sys.argv) ow=OWMergeData() a.setMainWidget(ow) ow.show() ow.onDataAInput(dataA) ow.onDataBInput(dataB) # data table dt = OWDataTable.OWDataTable(signalManager = signalManager) signalManager.addWidget(ow) signalManager.addWidget(dt) signalManager.setFreeze(1) signalManager.addLink(ow, dt, 'Merged Examples A+B', 'Examples', 1) signalManager.addLink(ow, dt, 'Merged Examples B+A', 'Examples', 1) signalManager.setFreeze(0) dt.show() a.exec_() """ import sys a = QApplication(sys.argv) ow = OWMergeData() ow.show() data = orange.ExampleTable("iris.tab") ow.onDataAInput(data) a.exec_()
# Description: using your own imputer and continuizer in PCA # Category: projection # Uses: adult_sample # Referenced: orngPCA.htm # Classes: orngPCA.PCA import orange, orngPCA data = orange.ExampleTable("bridges.tab") imputer = orange.ImputerConstructor_maximal continuizer = orange.DomainContinuizer() continuizer.multinomialTreatment = continuizer.AsNormalizedOrdinal continuizer.classTreatment = continuizer.Ignore continuizer.continuousTreatment = continuizer.Leave pca = orngPCA.PCA(data, standardize=True, imputer=imputer, continuizer=continuizer) print pca