def readQuinTreeRec(fle, domain): node = orange.TreeNode() line = fle.readline() match = re_node.search(line) if match: splitattr, thresh = match.group("splitattr", "thresh") node.branchSelector = orange.Classifier(lambda ex, rw, attr=domain[ splitattr], thr=float(thresh): ex[attr] > thr) node.branchDescriptions = ["<= " + thresh, "> " + thresh] node.branches = [ readQuinTreeRec(fle, domain), readQuinTreeRec(fle, domain) ] node.branchSizes = [ node.branches[0].nExamples, node.branches[1].nExamples ] else: match = re_constraint.search(line) if not match: raise "Cannot read line '%s'" % line attrs, signs, cov, amb, xmpls = match.group("attrs", "signs", "cov", "amb", "xmpls") mqc = dict(zip(attrs.split(","), signs.split(","))) node.nodeClassifier = orange.DefaultClassifier(defaultVal=int( "".join([sidx[mqc.get(attr.name, None)] for attr in domain.attributes]), 3)) node.setattr("coverage", cov == "**" and -1 or float(cov[:cov.index("%")])) node.setattr("ambiguity", float(amb)) node.setattr("nExamples", float(xmpls)) return node
def __call__(self, example, result_type=orange.GetValue): classifier = None for r in self.rules: # r.filter.domain = example.domain if r(example) and r.classifier: classifier = r.classifier classifier.defaultDistribution = r.classDistribution break if not classifier: classifier = orange.DefaultClassifier(example.domain.classVar, self.prior.modus()) classifier.defaultDistribution = self.prior if result_type == orange.GetValue: return classifier(example) if result_type == orange.GetProbabilities: return classifier.defaultDistribution return (classifier(example), classifier.defaultDistribution)
print impdata = imputer(data) for i in range(20, 25): print data[i] print impdata[i] print print "\n*** CUSTOM IMPUTATION BY MODELS ***\n" imputer = orange.Imputer_model() imputer.models = [None] * len(data.domain) imputer.models[data.domain.index("LANES")] = orange.DefaultClassifier(2.0) tord = orange.DefaultClassifier(orange.Value(data.domain["T-OR-D"], "THROUGH")) imputer.models[data.domain.index("T-OR-D")] = tord import orngTree len_domain = orange.Domain(["MATERIAL", "SPAN", "ERECTED", "LENGTH"], data.domain) len_data = orange.ExampleTable(len_domain, data) len_tree = orngTree.TreeLearner(len_data, minSubset=20) imputer.models[data.domain.index("LENGTH")] = len_tree orngTree.printTxt(len_tree) spanVar = data.domain["SPAN"] def computeSpan(ex, rw): if ex["TYPE"] == "WOOD" or ex["PURPOSE"] == "WALK": return orange.Value(spanVar, "SHORT")