Esempio n. 1
0
# Description: Uses cross-validation to compare regression tree and k-nearest neighbors
# Category:    modelling, evaluation
# Uses:        housing
# Classes:     orngStat.MSE, orngTest.crossValidation, MajorityLearner, orngTree.TreeLearner, orange.kNNLearner
# Referenced:  regression.htm

import orange, orngTree, orngTest, orngStat

data = orange.ExampleTable("housing.tab")

maj = orange.MajorityLearner()
maj.name = "default"
rt = orngTree.TreeLearner(measure="retis", mForPruning=2, minExamples=20)
rt.name = "regression tree"
k = 5
knn = orange.kNNLearner(k=k)
knn.name = "k-NN (k=%i)" % k
learners = [maj, rt, knn]

data = orange.ExampleTable("housing.tab")
results = orngTest.crossValidation(learners, data, folds=10)
mse = orngStat.MSE(results)

print "Learner        MSE"
for i in range(len(learners)):
  print "%-15s %5.3f" % (learners[i].name, mse[i])
Esempio n. 2
0
    def __call__(self, data, y=None, x=None, nc=None, weight=None, **kwds):

        if y == None:
            y = [data.domain.classVar]
        if x == None:
            x = [v for v in data.domain.variables if v not in y]

        Ncomp = nc if nc is not None else len(x)

        dataX = orange.ExampleTable(orange.Domain(x, False), data)
        dataY = orange.ExampleTable(orange.Domain(y, False), data)

        # transformation to numpy arrays
        X = dataX.toNumpy()[0]
        Y = dataY.toNumpy()[0]

        # data dimensions
        n, mx = numpy.shape(X)
        my = numpy.shape(Y)[1]

        # Z-scores of original matrices
        YMean = numpy.mean(Y, axis=0)
        YStd = numpy.std(Y, axis=0)
        XMean = numpy.mean(X, axis=0)
        XStd = numpy.std(X, axis=0)

        #FIXME: standard deviation should never be 0. Ask Lan, if the following
        #fix is ok.
        XStd = numpy.maximum(XStd, 10e-16)
        YStd = numpy.maximum(YStd, 10e-16)

        X = (X - XMean) / XStd
        Y = (Y - YMean) / YStd

        P = numpy.empty((mx, Ncomp))
        C = numpy.empty((my, Ncomp))
        T = numpy.empty((n, Ncomp))
        U = numpy.empty((n, Ncomp))
        B = numpy.zeros((Ncomp, Ncomp))
        W = numpy.empty((mx, Ncomp))
        E, F = X, Y

        # main algorithm
        for i in range(Ncomp):
            u = numpy.random.random_sample((n, 1))  #FIXME random seed?
            w = normalize(dot(E.T, u))
            t = normalize(dot(E, w))
            dif = t

            # iterations for loading vector t
            while numpy.linalg.norm(dif) > 10e-16:
                c = normalize(dot(F.T, t))
                u = dot(F, c)
                w = normalize(dot(E.T, u))
                t0 = normalize(dot(E, w))
                dif = t - t0
                t = t0

            #print "T", T
            #print "X*W", numpy.dot(X,W)

            T[:, i] = t.T
            U[:, i] = u.T
            C[:, i] = c.T
            W[:, i] = w.T
            b = dot(t.T, u)[0]
            B[i][i] = b
            p = dot(E.T, t)
            P[:, i] = p.T
            E = E - dot(t, p.T)
            F = F - b * dot(t, c.T)

        # esimated Y
        YE = dot(dot(T, B), C.T) * YStd + YMean
        Y = Y * numpy.std(Y, axis=0) + YMean
        BPls = dot(dot(numpy.linalg.pinv(P.T), B), C.T)

        partial = {}
        if self.save_partial:
            partial["T"] = T
            partial["U"] = U
            partial["C"] = C
            partial["W"] = W
            partial["P"] = P

        return PLSRegression(domain=data.domain,
                             BPls=BPls,
                             YMean=YMean,
                             YStd=YStd,
                             XMean=XMean,
                             XStd=XStd,
                             name=self.name,
                             **partial)
Esempio n. 3
0
            self.progressBarFinished()
            self.classifier.name = self.LearnerName
            self.classifier.setattr("data", self.data)
            if self.data.domain.classVar:
                self.send("Classifier", self.classifier)
            self.send("SOM", self.classifier)

    def sendReport(self):
        self.reportSettings(
            "Topology",
            [("Shape", ["hexagonal", "rectangular"][self.topology]),
             ("Size", "%i columns, %i rows" % (self.xdim, self.ydim))])
        self.reportSettings(
            "Optimization",
            [("Initialization", ["linear", "random"][self.initialization]),
             ("Neighborhood", ["Gaussian", "bubble"][self.neighborhood]),
             ("Radius", "initial: %i, final: %i" %
              (self.radius1, self.radius2)),
             ("Number of iterations", self.iterations1)])


if __name__ == "__main__":
    app = QApplication(sys.argv)
    w = OWSOM()
    ##    app.setMainWidget(w)
    w.show()
    data = orange.ExampleTable("../../doc/datasets/iris.tab")

    w.setData(data)
    app.exec_()
Esempio n. 4
0
# Description: Creates a list of association rules, selects five rules and prints them out
# Category:    description
# Uses:        imports-85
# Classes:     orngAssoc.build, Preprocessor_discretize, EquiNDiscretization
# Referenced:  assoc.htm

import orange, orngAssoc

data = orange.ExampleTable("imports-85")
data = orange.Preprocessor_discretize(data, \
  method=orange.EquiNDiscretization(numberOfIntervals=3))
data = data.select(range(10))

rules = orange.AssociationRulesInducer(data, support=0.4)

print "%i rules with support higher than or equal to %5.3f found.\n" % (len(rules), 0.4)

orngAssoc.sort(rules, ["support", "confidence"])

orngAssoc.printRules(rules[:5], ["support", "confidence"])
print

del rules[:3]
orngAssoc.printRules(rules[:5], ["support", "confidence"])
print
Esempio n. 5
0
    def __call__(self, newRule, examples, weightID, targetClass, prior):
        N = len(examples)
        ny = len(filter(lambda e: e.getclass() == targetClass, examples))
        N1 = n1x = n1xy = 0

        for e in examples:
            tmp = e.getweight(weightID)
            N1 += tmp
            if newRule.filter(e):
                n1x += tmp
                if e.getclass() == targetClass:
                    n1xy += tmp
        wracc = n1xy / N1 - ny * n1x / (N1 * N)
        return wracc


if __name__ == "__main__":
    filename = "..\\..\\doc\\datasets\\lenses.tab"
    if 'linux' in sys.platform:
        filename = "/usr/doc/orange/datasets/lenses.tab"
    data = orange.ExampleTable(filename)

    print
    print
    print
    learner = CN2_SD(3)
    targetClass = orange.Value(data.domain["lenses"], "none")
    rules = learner(data, targetClass, 10)
    print "____________________SN2-SD results______________________"
    rules.printRules()
Esempio n. 6
0
def makeTable(data, engine, tagLayer, useFarAway=False):
    table = orange.ExampleTable(engine.domain())
    if engine.name() in skipMap:
        skipList = skipMap[engine.name()]
    else:
        skipList = []

    labeledCount = 0
    negativeCount = 0
    farAwayCount = 0
    for engineName, landmarkName, geometry in data:

        #geometry["landmark"] = createLandmarkPt(math2d.centroid(geometry["landmark"]))

        if not (engineName in skipList):  # or True:

            try:
                geometry["landmark"] = geometry["ground"]
                ex = engine.makeExample(**geometry)
            except preposition.InsaneExample:
                continue
            except:
                print "dc", engineName
                print "dc", geometry
                print "dc", landmarkName
                raise
            if engineName == engine.name():
                if engineName == "down" and False:
                    print "doing down differently"
                    if landmarkName == "hallway":
                        cls = "True"
                        labeledCount += 1
                    else:
                        continue
                        #cls = "False"
                        #negativeCount -= 1
                else:
                    cls = "True"
                    labeledCount += 1
            else:
                cls = "False"
                negativeCount += 1
            ex['class'] = cls
            ex['sourceEngineName'] = engineName
            ex['engineName'] = engine.name()
            ex['landmarkName'] = landmarkName
            ex['farAway'] = False
            table.append(ex)
    if useFarAway and engine.name() != "through" and engine.name() != "down":
        for name, landmark in tagLayer:
            centroid1 = math2d.centroid(landmark)
            for engineName, landmarkName, geometry in data:
                if engineName == engine.name():
                    centroid2 = math2d.centroid(geometry["landmark"])
                    d1 = math2d.dist(centroid1, centroid2)
                    d2 = math2d.length(geometry["figure"])
                    if d1 > d2:
                        ex = engine.makeExample(figure=geometry["figure"],
                                                landmark=landmark)
                        ex['class'] = "False"
                        ex['landmarkName'] = landmarkName
                        ex['sourceEngineName'] = engineName
                        ex['engineName'] = engine.name()
                        ex['farAway'] = True
                        table.append(ex)
                        farAwayCount += 1
                        if farAwayCount >= 100:
                            break
    for ex in table:
        ex['drawMap'] = None
        #ex['geometry'] = None

    print "counts"
    print labeledCount, "labeled examples."
    print negativeCount, "negative examples."
    print farAwayCount, "far away examples."

    return table
Esempio n. 7
0
# Classes:     ExampleTable
# Uses:        iris, heart_disease
# Referenced:  ExampleTable.htm

import orange
import orange, random

random.seed(0)
values = ["0", "1"]
mynames = ["orange", "green", "red", "yellow", "black", "magenta"]
attributes = [orange.EnumVariable(mynames[i], values=values) for i in range(6)]
classattr = orange.EnumVariable("classname", values=["0", "1"])
domain = orange.Domain(attributes + [classattr])

print "attributes", attributes
print "classattr", classattr
print "domain:", domain

card = [1, 1, 1, 1, 1, 1]
data = orange.ExampleTable(domain)
for i in range(5):
    ex = [random.randint(0, c) for c in card]
    ex.append(ex[0] == ex[1] or ex[4] == 0)
    data.append(ex)

for ex in data:
    print ex

classifier = orange.BayesLearner(data)
print classifier(data[0], orange.GetBoth)
Esempio n. 8
0
# Description: Add a new attribute price to a car data set, compute it from two existing attributes (buying, maint)
# Category:    preprocessing
# Uses:        car
# Classes:     Domain, Value, getValueFrom, EnumVariable
# Referenced:  domain.htm

import orange
data = orange.ExampleTable('../datasets/car')

# add attribute price = f(buying, maint)
# see also http://www.ailab.si/hint/car_dataset.asp

priceTable = {}
priceTable['v-high:v-high'] = 'v-high'
priceTable['high:v-high'] = 'v-high'
priceTable['med:v-high'] = 'high'
priceTable['low:v-high'] = 'high'
priceTable['v-high:high'] = 'v-high'
priceTable['high:high'] = 'high'
priceTable['med:high'] = 'high'
priceTable['low:high'] = 'med'
priceTable['v-high:med'] = 'high'
priceTable['high:med'] = 'high'
priceTable['med:med'] = 'med'
priceTable['low:med'] = 'low'
priceTable['v-high:low'] = 'high'
priceTable['high:low'] = 'high'
priceTable['med:low'] = 'low'
priceTable['low:low'] = 'low'

# Description: Shows how to sample example by random divisions into two groups
# Category:    sampling
# Classes:     MakeRandomIndices, MakeRandomIndices2, RandomGenerator
# Uses:        lenses
# Referenced:  RandomIndices.htm

import orange

data = orange.ExampleTable("lenses")

indices2 = orange.MakeRandomIndices2(p0=6)

ind = indices2(data)
print ind
data0 = data.select(ind, 0)
data1 = data.select(ind, 1)
print len(data0), len(data1)

print "\nIndices without playing with random generator"
for i in range(5):
    print indices2(data)

print "\nIndices with random generator"
indices2.randomGenerator = orange.RandomGenerator(42)
for i in range(5):
    print indices2(data)

print "\nIndices with randseed"
indices2.randomGenerator = None
indices2.randseed = 42
for i in range(5):
Esempio n. 10
0
# Description: Learn decision tree from data and output class probabilities for first few instances
# Category:    modelling
# Uses:        voting.tab
# Classes:     orngTree.TreeLearner
# Referenced:  c_otherclass.htm

import orange, orngTree

data = orange.ExampleTable("voting")

tree = orngTree.TreeLearner(data, sameMajorityPruning=1, mForPruning=2)
print "Possible classes:", data.domain.classVar.values
print "Probabilities for democrats:"
for i in range(5):
    p = tree(data[i], orange.GetProbabilities)
    print "%d: %5.3f (originally %s)" % (i + 1, p[1], data[i].getclass())

print
orngTree.printTxt(tree)
orngTree.printDot(tree,
                  fileName='tree.dot',
                  internalNodeShape="ellipse",
                  leafShape="box")
Esempio n. 11
0
            supps = [rule.support for rule in self.rules]
            self.supp_min = min(supps)
            self.supp_max = max(supps)
            del supps

            confs = [rule.confidence for rule in self.rules]
            self.conf_min = min(confs)
            self.conf_max = max(confs)
            del confs

            self.checkScale()
        else:
            self.supp_min, self.supp_max = self.conf_min, self.conf_max = 0., 1.

        self.supp_allmin, self.supp_allmax, self.conf_allmin, self.conf_allmax = self.supp_min, self.supp_max, self.conf_min, self.conf_max
        self.rezoom(self.supp_allmin, self.supp_allmax, self.conf_allmin, self.conf_allmax)



if __name__=="__main__":
    a=QApplication(sys.argv)
    ow=OWAssociationRulesViewer()

    dataset = orange.ExampleTable('../../doc/datasets/car.tab')
    rules=orange.AssociationRulesInducer(dataset, minSupport = 0.3, maxItemSets=15000)
    ow.arules(rules)

    ow.show()
    a.exec_()
    ow.saveSettings()
Esempio n. 12
0
# Description: Shows what the contingency matrix looks like and which are its common methods
# Category:    statistics
# Classes:     Contingency, ContingencyAttrClass
# Uses:        monk1
# Referenced:  contingency.htm

import orange
data = orange.ExampleTable("monk1")
cont = orange.ContingencyAttrClass("e", data)

print "Contingency items:"
for val, dist in cont.items():
    print val, dist
print

print "Contingency keys: ", cont.keys()
print "Contingency values: ", cont.values()
print "Contingency items: ", cont.items()
print

print "cont[0] =", cont[0]
print 'cont[\"1\"] =', cont["1"]
print 'cont[orange.Value(data.domain["e"], "1")] =', cont[orange.Value(
    data.domain["e"], "1")]
print

print "Iteration through contingency:"
for i in cont:
    print i
print
Esempio n. 13
0
    def getSelectionsAsExampleTables(self,
                                     attrList,
                                     useAnchorData=1,
                                     addProjectedPositions=0):
        return (None, None)  # TODO: this is disabled for now

        if not self.have_data:
            return (None, None)

        selected = self.get_selected_indices()

        if addProjectedPositions == 0 and not numpy.any(selected):
            return (None, self.raw_data)
        if (useAnchorData and len(self.anchor_data) < 3) or len(attrList) < 3:
            return (None, None)

        x_attr = orange.FloatVariable("X Positions")
        y_attr = orange.FloatVariable("Y Positions")
        z_attr = orange.FloatVariable("Z Positions")

        if addProjectedPositions == 1:
            domain = orange.Domain([x_attr, y_attr, z_attr] +
                                   [v for v in self.data_domain.variables])
        elif addProjectedPositions == 2:
            domain = orange.Domain(self.data_domain)
            domain.addmeta(orange.newmetaid(), x_attr)
            domain.addmeta(orange.newmetaid(), y_attr)
            domain.addmeta(orange.newmetaid(), z_attr)
        else:
            domain = orange.Domain(self.data_domain)

        domain.addmetas(self.data_domain.getmetas())

        if useAnchorData:
            indices = [
                self.attribute_name_index[val[3]] for val in self.anchor_data
            ]
        else:
            indices = [self.attribute_name_index[label] for label in attrList]
        valid_data = self.getValidList(indices)
        if len(valid_data) == 0:
            return (None, None)

        array = self.create_projection_as_numeric_array(
            attrList,
            scaleFactor=self.scaleFactor,
            useAnchorData=useAnchorData,
            removeMissingData=0)
        if array == None:
            return (None, None)

        unselected = numpy.logical_not(selected)
        selected_indices, unselected_indices = list(selected), list(unselected)

        if addProjectedPositions:
            selected = orange.ExampleTable(
                domain, self.raw_data.selectref(selected_indices))
            unselected = orange.ExampleTable(
                domain, self.raw_data.selectref(unselected_indices))
            selected_index = 0
            unselected_index = 0
            for i in range(len(selected_indices)):
                if selected_indices[i]:
                    selected[selected_index][x_attr] = array[i][0]
                    selected[selected_index][y_attr] = array[i][1]
                    selected[selected_index][z_attr] = array[i][2]
                    selected_index += 1
                else:
                    unselected[unselected_index][x_attr] = array[i][0]
                    unselected[unselected_index][y_attr] = array[i][1]
                    unselected[unselected_index][z_attr] = array[i][2]
        else:
            selected = self.raw_data.selectref(selected_indices)
            unselected = self.raw_data.selectref(unselected_indices)

        if len(selected) == 0:
            selected = None
        if len(unselected) == 0:
            unselected = None
        return (selected, unselected)
Esempio n. 14
0
import orange

data = orange.ExampleTable("inquisition")
rules = orange.AssociationRulesSparseInducer(data,
            support = 0.5, storeExamples = True)

rule0 = rules[10]

print "Rule:", rule0
print "Match left: "
print [rule0.examples[i] for i in rule0.matchLeft]
print "\nMatch both: "
print [rule0.examples[i] for i in rule0.matchBoth]

inducer = orange.AssociationRulesSparseInducer(support = 0.5)
itemsets = inducer.getItemsets(data)
print itemsets[5]
Esempio n. 15
0
import orange
import orngCN2

data = orange.ExampleTable("titanic.tab")

# create learner
learner = orange.RuleLearner()

cl = learner(data)
for r in cl.rules:
    print orngCN2.ruleToString(r)
print "*****"

learner.ruleFinder = orange.RuleBeamFinder()
learner.ruleFinder.evaluator = orngCN2.mEstimate(m=50)

cl = learner(data)
for r in cl.rules:
    print orngCN2.ruleToString(r)
print "****"

learner.ruleFinder.ruleStoppingValidator = orange.RuleValidator_LRS(
    alpha=0.01, min_coverage=10, max_rule_complexity=2)
learner.ruleFinder.ruleFilter = orange.RuleBeamFilter_Width(width=50)

cl = learner(data)
for r in cl.rules:
    print orngCN2.ruleToString(r)
Esempio n. 16
0
    def dataset(self, data):
        #self.data=data
        self.data = self.isDataWithClass(
            data, orange.VarTypes.Discrete) and data or None
        self.setLearner()

    def qualityButtonPressed(self, id=0):
        self.QualityButton = id
        for i in range(len(self.ruleQualityBG.buttons)):
            self.ruleQualityBG.buttons[i].setChecked(id == i)
        self.mSpin.control.setEnabled(id == 1)

    def coveringAlgButtonPressed(self, id=0):
        self.CoveringButton = id
        for i in range(len(self.coveringAlgBG.buttons)):
            self.coveringAlgBG.buttons[i].setChecked(id == i)
        self.weightSpin.control.setEnabled(id == 1)

    def applySettings(self):
        self.setLearner()

if __name__ == "__main__":
    app = QApplication(sys.argv)
    w = OWCN2()
    #w.dataset(orange.ExampleTable("titanic.tab"))
    w.dataset(
        orange.ExampleTable(r"E:\Development\Orange Datasets\UCI\titanic.tab"))
    w.show()
    app.exec_()
    w.saveSettings()
Esempio n. 17
0
def nway():
    engine_to_examples = {}

    trainer = Trainer()
    classes = set()
    for i, key in enumerate(trainer.annotationEngines):
        engine = trainer.engineMap[key]
        table = trainer.makeTable(engine)

        for ex in table:
            if ex["farAway"].value:
                cls = "null"
            else:
                cls = ex["sourceEngineName"].value
            geometry = ex["geometry"].value
            engine_to_examples.setdefault(cls, [])

            classes.add(cls)

            examples = [
                trainer.engineMap[key].makeExample(expectInsane=True,
                                                   **geometry)
                for key in trainer.annotationEngines
                if not len(geometry["figure"]) == 0
            ]

            engine_to_examples[cls].append(examples)

        if i >= 1:
            #break
            pass
    variables = []
    for ex in examples:
        for attr in ex.domain:
            if attr.name == "class":
                continue
            new_attr = orange.FloatVariable(attr.name)
            variables.append(new_attr)
    domain = orange.Domain(variables,
                           orange.EnumVariable("class", values=list(classes)))
    table = orange.ExampleTable(domain)
    for engine_name, example_lists in engine_to_examples.iteritems():
        for example_list in example_lists:
            ex = orange.Example(domain)
            for engine_ex in example_list:
                for attr in engine_ex.domain:
                    ex[attr.name] = engine_ex[attr.name]
            ex["class"] = engine_name
            table.append(ex)
    print "domain", domain

    cv_indices = orange.MakeRandomIndices2(table, p0=0.75)

    training = table.select(cv_indices, 0, negate=True)
    testing = table.select(cv_indices, 0, negate=False)
    #classifier = orngBayes.BayesLearner(training)

    classifier = orangePickle.PickleableClassifier(training,
                                                   orngBayes.BayesLearner)

    results = orngTest.testOnData([classifier], testing)
    print orngStat.CA(results)
    cm = orngStat.confusionMatrices(results)[0]
    classes = list(domain.classVar.values)
    print "           ", " ".join([c.rjust(12) for c in classes + ["", ""]])
    for className, classConfusions in zip(classes, cm):
        #format = ("%s" + ("\t%i" * len(classes)))
        values = (className, ) + tuple(classConfusions)
        print " ".join([str(c).rjust(12) for c in values])
        #print  format % values

    for name in classes:
        classIndex = classes.index(name)
        mpl.figure()
        rocCurve(results,
                 "",
                 classIndex,
                 stepSize=0.001,
                 plotArgs=dict(linewidth=5, markersize=10))
        mpl.title(name, size=30)
        mpl.xlabel("FP", fontsize=30)
        mpl.ylabel("TP", fontsize=30)
        mpl.xticks([0, 1], fontsize=17)
        mpl.yticks([0, 1], fontsize=17)
    fname = "nway.pck"
    print "saving", fname
    with open(fname, "w") as f:
        pickle.dump(classifier, f, protocol=2)
    mpl.show()
Esempio n. 18
0
    def setLearner(self):
        if hasattr(self, "btnApply"):
            self.btnApply.setFocus()
        #progress bar
        self.progressBarInit()

        #learner
        self.learner = orngCN2.CN2UnorderedLearner()
        self.learner.name = self.name
        self.learner.progressCallback = CN2ProgressBar(self)
        self.send("Learner", self.learner)

        ruleFinder = orange.RuleBeamFinder()
        if self.QualityButton == 0:
            ruleFinder.evaluator = orange.RuleEvaluator_Laplace()
        elif self.QualityButton == 1:
            ruleFinder.evaluator = orngCN2.mEstimate(self.m)
        elif self.QualityButton == 2:
            ruleFinder.evaluator = orngCN2.WRACCEvaluator()

        if self.useMaxRuleLength:
            maxRuleLength = self.MaxRuleLength
        else:
            maxRuleLength = -1
        ruleFinder.ruleStoppingValidator = orange.RuleValidator_LRS(
            alpha=self.stepAlpha,
            min_coverage=self.MinCoverage,
            max_rule_complexity=maxRuleLength)
        ruleFinder.validator = orange.RuleValidator_LRS(
            alpha=self.Alpha,
            min_coverage=self.MinCoverage,
            max_rule_complexity=maxRuleLength)
        ruleFinder.ruleFilter = orange.RuleBeamFilter_Width(
            width=self.BeamWidth)
        self.learner.ruleFinder = ruleFinder

        if self.CoveringButton == 0:
            self.learner.coverAndRemove = orange.RuleCovererAndRemover_Default(
            )
        elif self.CoveringButton == 1:
            self.learner.coverAndRemove = orngCN2.CovererAndRemover_multWeights(
                mult=self.Weight)

        self.classifier = None
        self.error()
        if self.data:
            oldDomain = orange.Domain(self.data.domain)
            learnData = orange.ExampleTable(oldDomain, self.data)
            self.classifier = self.learner(learnData)
            self.classifier.name = self.name
            for r in self.classifier.rules:
                r.examples = orange.ExampleTable(oldDomain, r.examples)
            self.classifier.examples = orange.ExampleTable(
                oldDomain, self.classifier.examples)
            self.classifier.setattr("data", self.classifier.examples)
            self.error("")


##            except orange.KernelException, (errValue):
##                self.classifier=None
##                self.error(errValue)
##            except Exception:
##                self.classifier=None
##                if not self.data.domain.classVar:
##                    self.error("Classless domain.")
##                elif self.data.domain.classVar.varType == orange.VarTypes.Continuous:
##                    self.error("CN2 can learn only from discrete class.")
##                else:
##                    self.error("Unknown error")
        self.send("Classifier", self.classifier)
        self.send("Unordered CN2 Classifier", self.classifier)
        self.progressBarFinished()
Esempio n. 19
0
        labels = [m.name for m in d.domain.getmetas().values()] + \
                 [a.name for a in d.domain.variables]
        self.labelCombo.addItems(labels)
        # here we would need to use the domain dependent setting of the label id
        self.labelCombo.setCurrentIndex(0)
        self.Label = labels[0]
        self.setLabel()

    def dataset(self, data):
        if data and len(data.domain.attributes):
            self.data = data
            self.setLabelComboItems()
            self.computeMatrix()
        else:
            self.send("Distance Matrix", None)


##################################################################################################
# test script

if __name__ == "__main__":
    import os
    data = orange.ExampleTable(r'../../doc/datasets/glass')
    data = orange.ExampleTable('glass')
    a = QApplication(sys.argv)
    ow = OWExampleDistance()
    ow.show()
    ow.dataset(data)
    a.exec_()
    ow.saveSettings()
# Description: Shows how to derive a Python class from orange.TreeSplitConstructor
# Category:    classification, decision trees, callbacks to Python
# Classes:     TreeSplitConstructor, Classifier, SubsetsGenerator_constSize, orngMisc.BestOnTheFly
# Uses:        lenses
# Referenced:  callbacks.htm

import orange, orngTree, orngMisc

tab = orange.ExampleTable(r"lenses.tab")


class CartesianClassifier(orange.Classifier):
    def __init__(self, var1, var2):
        self.var1 = var1
        self.var2 = var2
        self.noValues2 = len(var2.values)
        self.classVar = orange.EnumVariable("%sx%s" % (var1.name, var2.name))
        self.classVar.values = [
            "%s-%s" % (v1, v2) for v1 in var1.values for v2 in var2.values
        ]

    def __call__(self, ex, what=orange.Classifier.GetValue):
        val = ex[self.var1] * self.noValues2 + ex[self.var2]
        if what == orange.Classifier.GetValue:
            return orange.Value(self.classVar, val)
        probs = orange.DiscDistribution(self.classVar)
        probs[val] = 1.0
        if what == orange.Classifier.GetProbabilities:
            return probs
        else:
            return (orange.Value(self.classVar, val), probs)
Esempio n. 21
0
    def setLearner(self, learner=None):
        self.learner = learner
        self.commit()

    def setData(self, data):
        self.data = data
        self.commit()

    def onChange(self):
        pass

    def commit(self):
        wrapped = None
        if self.learner:
            wrapped = self.METHODS[self.method][1](self.learner, t=self.t)
            self.send("Learner", wrapped)

        if self.data and wrapped:
            classifier = wrapped(self.data)
            self.send("Classifier", classifier)


if __name__ == "__main__":
    app = QApplication(sys.argv)
    w = OWEnsemble()
    w.setLearner(orange.BayesLearner())
    w.setData(orange.ExampleTable("../../doc/datasets/iris"))
    w.show()
    app.exec_()
Esempio n. 22
0
    def __call__(self, examples, weightID=0, **kwds):
        import orngTest, orngStat, statc

        self.__dict__.update(kwds)

        if self.removeThreshold < self.addThreshold:
            raise "'removeThreshold' should be larger or equal to 'addThreshold'"

        classVar = examples.domain.classVar

        indices = orange.MakeRandomIndicesCV(examples,
                                             folds=getattr(self, "folds", 10))
        domain = orange.Domain([], classVar)

        res = orngTest.testWithIndices([self.learner],
                                       orange.ExampleTable(domain, examples),
                                       indices)

        oldStat = self.stat(res)[0]
        oldStats = [self.stat(x)[0] for x in orngStat.splitByIterations(res)]
        print(".", oldStat, domain)
        stop = False
        while not stop:
            stop = True
            if len(domain.attributes) >= 2:
                bestStat = None
                for attr in domain.attributes:
                    newdomain = orange.Domain(
                        [x for x in domain.attributes if x != attr], classVar)
                    res = orngTest.testWithIndices(
                        [self.learner],
                        (orange.ExampleTable(newdomain, examples), weightID),
                        indices)

                    newStat = self.stat(res)[0]
                    newStats = [
                        self.stat(x)[0]
                        for x in orngStat.splitByIterations(res)
                    ]
                    print("-", newStat, newdomain)
                    ## If stat has increased (ie newStat is better than bestStat)
                    if not bestStat or cmp(newStat, bestStat) == self.statsign:
                        if cmp(newStat, oldStat) == self.statsign:
                            bestStat, bestStats, bestAttr = newStat, newStats, attr
                        elif statc.wilcoxont(
                                oldStats, newStats)[1] > self.removeThreshold:
                            bestStat, bestAttr, bestStats = newStat, newStats, attr
                if bestStat:
                    domain = orange.Domain(
                        [x for x in domain.attributes if x != bestAttr],
                        classVar)
                    oldStat, oldStats = bestStat, bestStats
                    stop = False
                    print("removed", bestAttr.name)

            bestStat, bestAttr = oldStat, None
            for attr in examples.domain.attributes:
                if not attr in domain.attributes:
                    newdomain = orange.Domain(domain.attributes + [attr],
                                              classVar)
                    res = orngTest.testWithIndices(
                        [self.learner],
                        (orange.ExampleTable(newdomain, examples), weightID),
                        indices)

                    newStat = self.stat(res)[0]
                    newStats = [
                        self.stat(x)[0]
                        for x in orngStat.splitByIterations(res)
                    ]
                    print("+", newStat, newdomain)

                    ## If stat has increased (ie newStat is better than bestStat)
                    if cmp(newStat,
                           bestStat) == self.statsign and statc.wilcoxont(
                               oldStats, newStats)[1] < self.addThreshold:
                        bestStat, bestStats, bestAttr = newStat, newStats, attr
            if bestAttr:
                domain = orange.Domain(domain.attributes + [bestAttr],
                                       classVar)
                oldStat, oldStats = bestStat, bestStats
                stop = False
                print("added", bestAttr.name)

        return self.learner(orange.ExampleTable(domain, examples), weightID)
Esempio n. 23
0
# Description: Shows different uses of orange.Domain
# Category:    preprocessing
# Uses:        glass
# Classes:     Domain
# Referenced:  domain.htm

import orange

domain = orange.ExampleTable("glass").domain

tests = (
    '(["Na", "Mg"], domain)', '(["Na", "Mg"], 1, domain)',
    '(["Na", "Mg"], 0, domain)', '(["Na", "Mg"], domain.variables)',
    '(["Na", "Mg"], 1, domain.variables)',
    '(["Na", "Mg"], 0, domain.variables)',
    '([domain["Na"], "Mg"], 0, domain.variables)',
    '([domain["Na"], "Mg"], 0, domain)',
    '([domain["Na"], "Mg"], 0, domain.variables)',
    '([domain["Na"], domain["Mg"]], 0)', '([domain["Na"], domain["Mg"]], 1)',
    '([domain["Na"], domain["Mg"]], None)',
    '([domain["Na"], domain["Mg"]], orange.EnumVariable("something completely different"))',
    '(domain)', '(domain, 0)', '(domain, 1)', '(domain, "Mg")',
    '(domain, domain[0])', '(domain, None)',
    '(domain, orange.FloatVariable("nothing completely different"))')

for args in tests:
    line = "orange.Domain%s" % args
    d = eval(line)
    print line
    print "  classVar: %s" % d.classVar
    print "  attributes: %s" % d.attributes
Esempio n. 24
0
import orange
import orngClustering


def callback(km):
    print "Iteration: %d, changes: %d, score: %.4f" % (km.iteration,
                                                       km.nchanges, km.score)


data = orange.ExampleTable("iris")
km = orngClustering.KMeans(data, 3, minscorechange=0, inner_callback=callback)
Esempio n. 25
0
# Description: Builds a regression tree and prints it out
# Category:    modelling
# Uses:        housing
# Classes:     orngTree.TreeLearner
# Referenced:  regression.htm

import orange, orngTree

data = orange.ExampleTable("housing")
rt = orngTree.TreeLearner(data, measure="retis", mForPruning=2, minExamples=20)
orngTree.printTxt(rt, leafStr="%V %I")
Esempio n. 26
0
# Description: Read data, show mean for continuous attributes and contingency matrix for nominal attributes
# Category:    description
# Uses:        adult_sample.tab
# Classes:     DomainContingency
# Referenced:  basic_exploration.htm

import orange
data = orange.ExampleTable("../datasets/adult_sample")

print "Continuous attributes:"
for a in range(len(data.domain.attributes)):
    if data.domain.attributes[a].varType == orange.VarTypes.Continuous:
        d = 0.
        n = 0
        for e in data:
            if not e[a].isSpecial():
                d += e[a]
                n += 1
        print "  %s, mean=%3.2f" % (data.domain.attributes[a].name, d / n)

print "\nNominal attributes (contingency matrix for classes:", data.domain.classVar.values, ")"
cont = orange.DomainContingency(data)
for a in data.domain.attributes:
    if a.varType == orange.VarTypes.Discrete:
        print "  %s:" % a.name
        for v in range(len(a.values)):
            sum = 0
            for cv in cont[a][v]:
                sum += cv
            print "    %s, total %d, %s" % (a.values[v], sum, cont[a][v])
        print
Esempio n. 27
0
    def __call__(self, data, weight=None):
        if not self.use_attributes == None:
            new_domain = orange.Domain(self.use_attributes,
                                       data.domain.classVar)
            new_domain.addmetas(data.domain.getmetas())
            data = orange.ExampleTable(new_domain, data)

        if self.stepwise and self.stepwise_before:
            use_attributes = stepwise(data,
                                      add_sig=self.add_sig,
                                      remove_sig=self.remove_sig)
            new_domain = orange.Domain(use_attributes, data.domain.classVar)
            new_domain.addmetas(data.domain.getmetas())
            data = orange.ExampleTable(new_domain, data)

        # continuization (replaces discrete with continuous attributes)
        continuizer = orange.DomainContinuizer()
        continuizer.multinomialTreatment = continuizer.FrequentIsBase
        continuizer.zeroBased = True
        domain0 = continuizer(data)
        data = data.translate(domain0)

        if self.stepwise and not self.stepwise_before:
            use_attributes = stepwise(data,
                                      weight,
                                      add_sig=self.add_sig,
                                      remove_sig=self.remove_sig)
            new_domain = orange.Domain(use_attributes, data.domain.classVar)
            new_domain.addmetas(data.domain.getmetas())
            data = orange.ExampleTable(new_domain, data)

        # missing values handling (impute missing)
        imputer = orange.ImputerConstructor_model()
        imputer.learnerContinuous = orange.MajorityLearner()
        imputer.learnerDiscrete = orange.MajorityLearner()
        imputer = imputer(data)
        data = imputer(data)

        # convertion to numpy
        A, y, w = data.toNumpy()  # weights ??
        if A == None:
            n = len(data)
            m = 0
        else:
            n, m = numpy.shape(A)

        if self.beta0 == True:
            if A == None:
                X = numpy.ones([len(data), 1])
            else:
                X = numpy.insert(A, 0, 1, axis=1)  # adds a column of ones
        else:
            X = A

        # set weights
        W = numpy.identity(len(data))
        if weight:
            for di, d in enumerate(data):
                W[di, di] = float(d[weight])

        D = dot(
            dot(numpy.linalg.pinv(dot(dot(X.T, W), X)), X.T), W
        )  # adds some robustness by computing the pseudo inverse; normal inverse could fail due to singularity of the X.T*W*X
        beta = dot(D, y)

        yEstimated = dot(X, beta)  # estimation
        # some desriptive statistisc
        muY, sigmaY = numpy.mean(y), numpy.std(y)
        muX, covX = numpy.mean(X, axis=0), numpy.cov(X, rowvar=0)

        # model statistics
        SST, SSR = numpy.sum((y - muY)**2), numpy.sum((yEstimated - muY)**2)
        SSE, RSquare = SST - SSR, SSR / SST
        R = numpy.sqrt(RSquare)  # coefficient of determination
        RAdjusted = 1 - (1 - RSquare) * (n - 1) / (n - m - 1)
        F = (SSR / m) / (SST - SSR / (n - m - 1))  # F statistisc
        df = m - 1

        sigmaSquare = SSE / (n - m - 1)

        # standard error of estimated coefficients
        errCoeff = sqrt(sigmaSquare * inv(dot(X.T, X)).diagonal())

        # t statistisc, significance
        t = beta / errCoeff
        df = n - 2
        significance = []
        for tt in t:
            try:
                significance.append(
                    statc.betai(df * 0.5, 0.5, df / (df + tt * tt)))
            except:
                significance.append(1.0)

        # standardized coefficients
        if m > 0:
            stdCoeff = (sqrt(covX.diagonal()) / sigmaY) * beta
        else:
            stdCoeff = (sqrt(covX) / sigmaY) * beta

        model = {
            'descriptives': {
                'meanX': muX,
                'covX': covX,
                'meanY': muY,
                'sigmaY': sigmaY
            },
            'model': {
                'estCoeff': beta,
                'stdErrorEstimation': errCoeff
            },
            'model summary': {
                'TotalVar': SST,
                'ExplVar': SSE,
                'ResVar': SSR,
                'R': R,
                'RAdjusted': RAdjusted,
                'F': F,
                't': t,
                'sig': significance
            }
        }
        return LinearRegression(statistics=model,
                                domain=data.domain,
                                name=self.name,
                                beta0=self.beta0,
                                imputer=imputer)
Esempio n. 28
0
        print "Attributes in favor of %s = %s [%f]" % (
            t.domain.classVar.name, t.domain.classVar.values[1],
            m.probfunc(m.example_c[idx][0]))
        printpie(e1, m.probfunc(m.example_c[idx][0]))

        print "\nProjection of the example in the basis space:"
        j = 0
        for i in range(len(m.coeff_names)):
            print m.coeff_names[i][0], ':'
            for x in m.coeff_names[i][1:]:
                print '\t', x, '=', vector[j]
                j += 1
        print "beta:", -m.beta

    #t = orange.ExampleTable('c:/proj/domains/voting.tab') # discrete
    t = orange.ExampleTable(
        r"E:\Development\Orange Datasets\UCI\shuttle.tab")  # discrete

    #t = orange.ExampleTable('c_cmc.tab') # continuous

    print "NAIVE BAYES"
    print "==========="
    bl = orange.BayesLearner()
    bl.estimatorConstructor = orange.ProbabilityEstimatorConstructor_Laplace()
    # prevent too many estimation points
    # increase the smoothing level
    bl.conditionalEstimatorConstructorContinuous = orange.ConditionalProbabilityEstimatorConstructor_loess(
        windowProportion=0.5, nPoints=10)
    c = bl(t)
    printmodel(t, c, printexamples=0)

    print "\n\nLOGISTIC REGRESSION"
Esempio n. 29
0
##    data = orange.ExampleTable(r'..\..\doc\datasets\adult_sample.tab')
##    dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\sterolgene v.0 mouse probeRatios.tab')
##    dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\Copy of sterolgene v.0 mouse probeRatios.tab')
##    dataB = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\STEROLTALK\Sterolgene v.0 mouse\sterolgene v.0 mouse probeRatios.tab')
    dataA = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\et1.tab')
    dataB = orange.ExampleTable(r'c:\Documents and Settings\peterjuv\My Documents\et2.tab')
    a=QApplication(sys.argv)
    ow=OWMergeData()
    a.setMainWidget(ow)
    ow.show()
    ow.onDataAInput(dataA)
    ow.onDataBInput(dataB)
    # data table
    dt = OWDataTable.OWDataTable(signalManager = signalManager)
    signalManager.addWidget(ow)
    signalManager.addWidget(dt)
    signalManager.setFreeze(1)
    signalManager.addLink(ow, dt, 'Merged Examples A+B', 'Examples', 1)
    signalManager.addLink(ow, dt, 'Merged Examples B+A', 'Examples', 1)
    signalManager.setFreeze(0)
    dt.show()
    a.exec_()
    """
    import sys
    a = QApplication(sys.argv)
    ow = OWMergeData()
    ow.show()
    data = orange.ExampleTable("iris.tab")
    ow.onDataAInput(data)
    a.exec_()
Esempio n. 30
0
# Description: using your own imputer and continuizer in PCA
# Category:    projection
# Uses:        adult_sample
# Referenced:  orngPCA.htm
# Classes:     orngPCA.PCA

import orange, orngPCA

data = orange.ExampleTable("bridges.tab")

imputer = orange.ImputerConstructor_maximal

continuizer = orange.DomainContinuizer()
continuizer.multinomialTreatment = continuizer.AsNormalizedOrdinal
continuizer.classTreatment = continuizer.Ignore
continuizer.continuousTreatment = continuizer.Leave

pca = orngPCA.PCA(data,
                  standardize=True,
                  imputer=imputer,
                  continuizer=continuizer)
print pca