def main(): print "loading" annotations = annotation_reader.from_file("%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME) table = annotations.as_orange_table() cv_indices = orange.MakeRandomIndices2(table, p0=0.5) print "indices", set(cv_indices) print "splitting" training, testing = annotation_reader.split(annotations, cv_indices) print "features" engine = PairwiseEngine(training) training_table = engine.training_table testing_table = engine.makeTable(testing) print len(training_table), "training" print len(testing_table), "testing" learners = [orange.MajorityLearner(), orngEnsemble.RandomForestLearner(), ] results = orngTest.learnAndTestOnTestData(learners, training_table, testing_table) for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results)): print orangeUtils.confusion_matrix_to_string(table.domain, cm) print "accuracy: %.2f%%" % (accuracy*100)
def constructLearner(self): rand = random.Random(self.rseed) attrs = None if self.attributes: attrs = self.attributesP smallLearner = orngTree.TreeLearner() if self.preNodeInst: smallLearner.stop.minExamples = self.preNodeInstP else: smallLearner.stop.minExamples = 0 smallLearner.storeExamples = 1 smallLearner.storeNodeClassifier = 1 smallLearner.storeContingencies = 1 smallLearner.storeDistributions = 1 if self.limitDepth: smallLearner.maxDepth = self.limitDepthP learner = orngEnsemble.RandomForestLearner(base_learner=smallLearner, trees=self.trees, rand=rand, attributes=attrs) if self.preprocessor: learner = self.preprocessor.wrapLearner(learner) learner.name = self.name return learner
def constructLearner(self): rand = random.Random(self.rseed) attrs = None if self.attributes: attrs = self.attributesP from Orange.classification.tree import SimpleTreeLearner smallLearner = SimpleTreeLearner() if self.preNodeInst: smallLearner.min_instances = self.preNodeInstP else: smallLearner.min_instances = 0 if self.limitDepth: smallLearner.max_depth = self.limitDepthP learner = orngEnsemble.RandomForestLearner(base_learner=smallLearner, trees=self.trees, rand=rand, attributes=attrs) if self.preprocessor: learner = self.preprocessor.wrapLearner(learner) learner.name = self.name return learner
def train(self,trainset): """ Trains a random forest using Orange. """ self.n_classes = len(trainset.metadata['targets']) trainset_orange = make_orange_dataset(trainset) self.trainset_domain = trainset_orange.domain import random self.forest = orngEnsemble.RandomForestLearner(trees=self.n_trees, attributes = self.n_features_per_node, rand = random.Random(self.seed), name="forest")(trainset_orange)
def main(): print "loading" annotations = annotation_reader.from_file( "%s/data/directions/breadbox/nouns_stefie10.txt" % TKLIB_HOME) annotator2 = annotation_reader.from_file( "%s/data/directions/breadbox/nouns_dlaude.partial.txt" % TKLIB_HOME) #histogram(annotations) print "table" table = annotations.as_orange_table() cv_indices = orange.MakeRandomIndices2(table, p0=0.5) print "indices", set(cv_indices) print "splitting" training, testing = annotation_reader.split(annotations, cv_indices) print "features" engine = WordnetParentsEngine(training) training_table = engine.makeTable(training) testing_table = engine.makeTable(testing) #training_table, testing_table = wordnet_parents(training, testing) #training_table, testing_table = wordnet_glosses(training, testing) #training_table, testing_table = flickr_parents(training, testing) print len(training_table), "training examples" print len(testing_table), "testing examples" #training_table = annotation_reader.to_big_small(training_table) #testing_table = annotation_reader.to_big_small(testing_table) #information_gain = orange.MeasureAttribute_info() #for x in training_table.domain.attributes: # print "x", information_gain(x, training_table) learners = [ orange.MajorityLearner(), orngEnsemble.RandomForestLearner(), WordnetKnnClassifier, agreement.WizardOfOzLearner(annotator2.as_orange_table()) ] results = orngTest.learnAndTestOnTestData(learners, training_table, testing_table) for accuracy, cm in zip(orngStat.CA(results), orngStat.confusionMatrices(results)): print orangeUtils.confusion_matrix_to_string(table.domain, cm) print "accuracy: %.2f%%" % (accuracy * 100)
def __init__(self, training): self.training = training self.ancestor_to_count = training.ancestor_map() self.all_ancestors = list(self.ancestor_to_count.keys()) self.all_ancestors.sort(key=lambda a: self.ancestor_to_count[a], reverse=True) self.used_ancestors = self.all_ancestors print "name", self.used_ancestors[0].name self.attributes = [ orange.EnumVariable(a.name, values=["True", "False"]) for a in self.used_ancestors ] #self.attributes = [orange.FloatVariable(a.name) # for a in self.used_ancestors] print "got", len(self.used_ancestors), "features" self.domain = orange.Domain(self.attributes, training.orange_class_var) self.domain.addmeta(orange.newmetaid(), orange.StringVariable("word")) table = self.makeTable(self.training) self.classifier = orngEnsemble.RandomForestLearner()(table)
def setLearner(self): self.progressBarInit() if hasattr(self, "btnApply"): self.btnApply.setFocus() #assemble learner rand = random.Random(self.rseed) attrs = None if self.attributes: attrs = self.attributesP self.learner = orngEnsemble.RandomForestLearner(trees = self.trees, rand=rand, attributes=attrs, callback=self.pbchange) if self.preNodeInst: self.learner.learner.stop.minExamples = self.preNodeInstP else: self.learner.learner.stop.minExamples = 0 self.learner.learner.storeExamples = 1 self.learner.learner.storeNodeClassifier = 1 self.learner.learner.storeContigencies = 1 self.learner.learner.storeDistributions = 1 if self.limitDepth: self.learner.learner.maxDepth = self.limitDepthP self.learner.name = self.name self.send("Learner", self.learner) self.error() if self.data: try: self.classifier = self.learner(self.data) self.classifier.name = self.name self.streeEnabled(True) except Exception, (errValue): self.error(str(errValue)) self.classifier = None self.streeEnabled(False)
def learnModel(self, X, y): if numpy.unique(y).shape[0] != 2: raise ValueError("Can only operate on binary data") classes = numpy.unique(y) self.worstResponse = classes[classes != self.bestResponse][0] #We need to convert y into indices newY = self.labelsToInds(y) XY = numpy.c_[X, newY] attrList = [] for i in range(X.shape[1]): attrList.append(orange.FloatVariable("X" + str(i))) attrList.append(orange.EnumVariable("y")) attrList[-1].addValue(str(self.bestResponse)) attrList[-1].addValue(str(self.worstResponse)) self.domain = orange.Domain(attrList) eTable = orange.ExampleTable(self.domain, XY) #Weight examples preprocessor = orange.Preprocessor_addClassWeight(equalize=1) preprocessor.classWeights = [1 - self.weight, self.weight] eTable, weightID = preprocessor(eTable) eTable.domain.addmeta(weightID, orange.FloatVariable("w")) tree = orngTree.TreeLearner(mForPruning=self.m, measure="gainRatio", minExamples=self.minSplit, maxDepth=self.maxDepth).instance() self.learner = orngEnsemble.RandomForestLearner( learner=tree, trees=self.numTrees, attributes=numpy.round(X.shape[1] * self.featureSize)) self.classifier = self.learner(eTable, weightID)
def fit(self, X, y): # Check params self.n_features_ = X.shape[1] if isinstance(self.max_features, str): if self.max_features == "auto": max_features = max(1, int(np.sqrt(self.n_features_))) elif self.max_features == "sqrt": max_features = max(1, int(np.sqrt(self.n_features_))) elif self.max_features == "log2": max_features = max(1, int(np.log2(self.n_features_))) else: raise ValueError( 'Invalid value for max_features. Allowed string ' 'values are "auto", "sqrt" or "log2".') elif self.max_features is None: max_features = self.n_features_ elif isinstance(self.max_features, (numbers.Integral, np.integer)): max_features = self.max_features else: # float max_features = int(self.max_features * self.n_features_) # Convert data self.classes_ = np.unique(y) self.n_classes_ = len(self.classes_) y = np.searchsorted(self.classes_, y) X = X.astype(np.float32) self.table_ = make_orange_dataset(X, y, self.n_classes_) # Run self.model_ = orngEnsemble.RandomForestLearner(self.table_, trees=self.n_estimators, attributes=max_features) return self
def constructLearner(self): rand = random.Random(self.rseed) attrs = None if self.attributes: attrs = self.attributesP learner = orngEnsemble.RandomForestLearner(trees=self.trees, rand=rand, attributes=attrs) if self.preNodeInst: learner.learner.stop.minExamples = self.preNodeInstP else: learner.learner.stop.minExamples = 0 learner.learner.storeExamples = 1 learner.learner.storeNodeClassifier = 1 learner.learner.storeContingencies = 1 learner.learner.storeDistributions = 1 if self.limitDepth: learner.learner.maxDepth = self.limitDepthP learner.name = self.name return learner
def setUp(self): import orngEnsemble, orngTree self.learner = orngEnsemble.RandomForestLearner()
def train(self, keyword, learner="svm"): training_docs, test_docs, train_label, test_label = self.get_training_test_sets( keyword, 0.8) if (len(train_label) == 0): return None F_train = [] print "--------------TRAIN:", keyword, "-------------------" for i, doc_i in enumerate(training_docs): #print self.documents[i] myfeatures = self.get_features(keyword, self.documents[doc_i]) if (not '1' in myfeatures): continue myfeatures.append(str(int(train_label[i]))) F_train.append(myfeatures) if (len(F_train) == 0): return None #create the attributes and domain table = orange.ExampleTable(self.get_domain()) #define the rest of the table by addign elements to it for i in range(len(F_train)): #print self.known_objects #print "i=", i #print "ftrain[i]", zip(self.known_objects, F_train[i]), #print " label[i]", train_label[i] #F_train[i].append(str(int(train_label[i]))) table.append(F_train[i]) #perform the learning if (learner == "bayes"): print "running bayes" classifier = orngBayes.BayesLearner(table) #classifier = orngBayes.BayesLearner(table, m=2) elif (learner == "tree"): print "running tree" classifier = orngTree.TreeLearner(table) elif (learner == "svm"): #can't load the svmlearner print "running svm" classifier = orngSVM.SVMLearner(table, svm_type=orange.SVMLearner.Nu_SVC, nu=0.3, probability=True) elif (learner == "boosting"): #problem here too #this is meant to be adaboost classifier = orngTree.BoostedLearner(table) elif (learner == "randomforest"): #problem here too classifier = orngEnsemble.RandomForestLearner(table, trees=50, name="forest") else: print "unknown learner" raise return classifier
# Description: Demonstrates the use of random forests from orngEnsemble module # Category: classification, ensembles # Classes: RandomForestLearner # Uses: bupa.tab # Referenced: orngEnsemble.htm import orange, orngTree, orngEnsemble data = orange.ExampleTable('bupa.tab') tree = orngTree.TreeLearner(minExamples=2, mForPrunning=2, \ sameMajorityPruning=True, name='tree') forest = orngEnsemble.RandomForestLearner(trees=50, name="forest") learners = [tree, forest] import orngTest, orngStat results = orngTest.crossValidation(learners, data, folds=3) print "Learner CA Brier AUC" for i in range(len(learners)): print "%-8s %5.3f %5.3f %5.3f" % (learners[i].name, \ orngStat.CA(results)[i], orngStat.BrierScore(results)[i], orngStat.AUC(results)[i])
# Description: Defines a tree learner (trunks of depth less than 5) and uses them in forest tree, prints out the number of nodes in each tree # Category: classification, ensembles # Classes: RandomForestLearner # Uses: bupa.tab # Referenced: orngEnsemble.htm import orange, orngTree, orngEnsemble data = orange.ExampleTable('bupa.tab') tree = orngTree.TreeLearner(storeNodeClassifier = 0, storeContingencies=0, \ storeDistributions=1, minExamples=5, ).instance() gini = orange.MeasureAttribute_gini() tree.split.discreteSplitConstructor.measure = \ tree.split.continuousSplitConstructor.measure = gini tree.maxDepth = 5 tree.split = orngEnsemble.SplitConstructor_AttributeSubset(tree.split, 3) forestLearner = orngEnsemble.RandomForestLearner(learner=tree, trees=50) forest = forestLearner(data) for c in forest.classifiers: print orngTree.countNodes(c), print
def train(self): F_train_obs = [] F_train_trans = [] print "--------------creating tables-------------------" for i, obs in enumerate(self.dataset.observations): print i, "of", len(self.dataset.observations) f_obs, f_trans = self.dataset.to_orange_entries(obs) for i, fs in enumerate(f_obs): print i, f_obs[0] F_train_obs.append(fs) for fs in f_trans: F_train_trans.append(fs) if (len(F_train_obs) == 0): return None print ">> adding elements to tables" #create the attributes and domain #define the rest of the table by addign elements to it table_obs = orange.ExampleTable(self.get_domain_obs()) for i in range(len(F_train_obs)): table_obs.append(F_train_obs[i]) table_trans = orange.ExampleTable(self.get_domain_trans()) for i in range(len(F_train_trans)): table_trans.append(F_train_trans[i]) #perform the learning print "training" if (self.learner == "bayes"): print "training bayes obs" self.classifier_obs = orngBayes.BayesLearner(table_obs) print "training bayes trans" self.classifier_trans = orngBayes.BayesLearner(table_trans) elif (self.learner == "tree"): print "running tree" self.classifier_obs = orngTree.TreeLearner(table_obs) self.classifier_trans = orngTree.TreeLearner(table_trans) elif (self.learner == "svm"): #can't load the svmlearner print "trianing observation svm" self.classifier_obs = orngSVM.SVMLearner( table_obs, svm_type=orange.SVMLearner.Nu_SVC, nu=0.3, probability=True) print "trianing transition svm" self.classifier_trans = orngSVM.SVMLearner( table_trans, svm_type=orange.SVMLearner.Nu_SVC, nu=0.3, probability=True) elif (self.learner == "boosting"): #problem here too #this is meant to be adaboost self.classifier_obs = orngTree.BoostedLearner(table_obs) self.classifier_trans = orngTree.BoostedLearner(table_trans) elif (self.learner == "randomforest"): #problem here too self.classifier_obs = orngEnsemble.RandomForestLearner( table_obs, trees=50, name="forest") self.classifier_trans = orngEnsemble.RandomForestLearner( table_trans, trees=50, name="forest") else: print "unknown learner" raise return self.classifier_obs, self.classifier_trans