Exemple #1
0
    def test_call_construction(self):
        d = orange.ExampleTable("iris")
        e1 = d[0]
        e2 = d[50]
        eud = orange.ExamplesDistanceConstructor_Euclidean(d, ignore_class=True, normalize=False, foo=42)
        self.assertEqual(eud(e1, e2), math.sqrt(sum((x-y)**2 for x, y in zip(list(e1)[:-1], e2))))
        self.assertEqual(eud.foo, 42)

        eud = orange.ExamplesDistanceConstructor_Euclidean(d, ignore_class=False, normalize=False, foo=42)
        self.assertEqual(eud(e1, e2), math.sqrt(sum((x-y)**2 for x, y in zip(list(e1), e2))))
        self.assertEqual(eud.foo, 42)
Exemple #2
0
def kNNratioInd(train, calSet, measure=None):
    """
    Use the fraction of kNN with the same response.
    """
    if not measure:
        #measure = instances.MahalanobisConstructor(extTrain)
        measure = orange.ExamplesDistanceConstructor_Euclidean(train)

    alphaList = []
    for predEx in calSet:
        distList = []
        for runIdx in range(len(train)):
            dist = measure(predEx, train[runIdx])
            distList.append(dist)

        # Get the distance of the 10th NN
        distList.sort()
        thresDist = distList[9]

        # Find the labels of the 10 NN
        sameCount = 0
        for runIdx in range(len(train)):
            dist = measure(predEx, train[runIdx])
            if dist <= thresDist:
                if predEx.get_class().value == train[runIdx].get_class().value:
                    sameCount = sameCount + 1
        alpha = 1.00 - float(sameCount) / 10.0
        alphaList.append(alpha)

    return alphaList, train
Exemple #3
0
def avgNN(idx, extTrain, measure=None):
    """
    Use the ratio between the distance to the kNN of the same and of the other class
    """
    attrList = ["SMILES_1"]
    extTrain = dataUtilities.attributeDeselectionData(extTrain, attrList)

    distListSame = []
    distListDiff = []
    #measure = Orange.distance.Euclidean(extTrain)
    if not measure:
        measure = orange.ExamplesDistanceConstructor_Euclidean(extTrain)
    for runIdx in range(len(extTrain)):
        if runIdx != idx:
            dist = measure(extTrain[idx], extTrain[runIdx])
            if extTrain[idx].get_class().value == extTrain[runIdx].get_class(
            ).value:
                distListSame.append(dist)
            else:
                distListDiff.append(dist)
    distListSame.sort()
    avgSame = sum(distListSame[0:10]) / 10.0
    distListDiff.sort()
    avgDiff = sum(distListDiff[0:10]) / 10.0
    if avgDiff == 0:
        alpha = max(distListDiff)
    else:
        alpha = avgSame / float(avgDiff)

    return alpha
Exemple #4
0
def kNNratio(idx, extTrain, measure=None):
    """
    Use the fraction of kNN with the same response.
    """
    attrList = ["SMILES_1"]
    extTrain = dataUtilities.attributeDeselectionData(extTrain, attrList)

    distList = []
    if not measure:
        #measure = instances.MahalanobisConstructor(extTrain)
        measure = orange.ExamplesDistanceConstructor_Euclidean(extTrain)
    for runIdx in range(len(extTrain)):
        if runIdx != idx:
            dist = measure(extTrain[idx], extTrain[runIdx])
            distList.append(dist)

    # Get the distance of the 10th NN
    distList.sort()
    thresDist = distList[9]

    # Find the labels of the 10 NN
    sameCount = 0
    for runIdx in range(len(extTrain)):
        if runIdx != idx:
            dist = measure(extTrain[idx], extTrain[runIdx])
            if dist <= thresDist:
                if extTrain[idx].get_class(
                ).value == extTrain[runIdx].get_class().value:
                    sameCount = sameCount + 1
    alpha = 1.00 - float(sameCount) / 10.0

    return alpha
Exemple #5
0
def getMinDistRatio(train):
    """
    Calculate the minDistSame and minDistDiff ratio for all ex in the data set and select the greatest quotient.
    Used to scale the minDist ratios in the non-conf score.
    """

    # Get the min dist for all ex in the data set
    minSame = []
    minDiff = []
    minRatio = []
    measure = orange.ExamplesDistanceConstructor_Euclidean(extTrain)
    for idx in range(len(train)):
        distListSame = []
        distListDiff = []
        for iidx in range(len(train)):
            if idx != iidx:
                dist = measure(train[idx], train[iidx])
                if train[idx].get_class().value == train[iidx].get_class(
                ).value:
                    distListSame.append(dist)
                else:
                    distListDiff.append(dist)
        minSame.append(min(distListSame))
        minDiff.append(min(distListDiff))
        if min(distListDiff) == 0:
            alpha = max(distListDiff)
        else:
            minRatio.append(min(distListSame) / float(min(distListDiff)))

    # Calculate min, mean and std of all the min distances
    meanSame, stdSame = meanStd(minSame)
    meanDiff, stdDiff = meanStd(minDiff)
    maxDistRatio = max(minRatio)

    return maxDistRatio
Exemple #6
0
def getMeanStd(extTrain):

    # Get the min dist for all ex in the data set
    minSame = []
    minDiff = []
    measure = orange.ExamplesDistanceConstructor_Euclidean(extTrain)
    for idx in range(len(extTrain)):
        distListSame = []
        distListDiff = []
        for iidx in range(len(extTrain)):
            if idx != iidx:
                dist = measure(extTrain[idx], extTrain[iidx])
                if extTrain[idx].get_class().value == extTrain[iidx].get_class(
                ).value:
                    distListSame.append(dist)
                else:
                    distListDiff.append(dist)
        minSame.append(min(distListSame))
        minDiff.append(min(distListDiff))

    # Calculate mean and std of all the min distances
    meanSame, stdSame = meanStd(minSame)
    meanDiff, stdDiff = meanStd(minDiff)

    return meanSame, stdSame, meanDiff, stdDiff
Exemple #7
0
def CVByPairs(data, dimensions=None, method=None, **dic):
    import orngTree
    cv = orange.MakeRandomIndicesCV(data, 10)
    meter = orange.ExamplesDistanceConstructor_Euclidean(data)

    maxDist = 0
    for i in range(100):
        maxDist = max(maxDist, meter(data.randomexample(),
                                     data.randomexample()))
    weightK = 10.0 / maxDist

    acc = amb = unre = 0
    for fold in range(10):
        train = data.select(cv, fold, negate=1)
        test = data.select(cv, fold)
        pa, qid, did, cid = pade(train,
                                 dimensions,
                                 method,
                                 originalAsMeta=True,
                                 **dic)
        tree = orngTree.TreeLearner(pa, maxDepth=4)

        tacc, tamb, tunre = computeDirectionAccuracyForPairs(
            tree, data, meter, weightK, -1)
        acc += tacc
        amb += tamb
        unre += tunre

    return acc / 10, amb / 10, unre / 10
Exemple #8
0
 def distanceMatrix(data):
     dist = orange.ExamplesDistanceConstructor_Euclidean(data)
     matrix = orange.SymMatrix(len(data))
     matrix.setattr('items', data)
     for i in range(len(data)):
         for j in range(i + 1):
             matrix[i, j] = dist(data[i], data[j])
     return matrix
Exemple #9
0
def LLOOprob(idx, extTrain, measure=None):
    """
    Use the fraction of kNN correctly predicted by a local model
    Hard coded to 20 NN.
    Modeling method. RF of Tree?
    """

    distList = []
    if not measure:
        measure = orange.ExamplesDistanceConstructor_Euclidean(extTrain)
    for runIdx in range(len(extTrain)):
        if runIdx != idx:
            dist = measure(extTrain[idx], extTrain[runIdx])
            distList.append(dist)

    # Get the distance of the 20th NN
    distList.sort()
    thresDist = distList[
        50]  # Smaller number of NN does not work with returnDFV

    # Find the predEx and the 20 NN
    kNN = []
    for runIdx in range(len(extTrain)):
        dist = measure(extTrain[idx], extTrain[runIdx])
        if dist <= thresDist:
            kNN.append(extTrain[runIdx])
    kNNtrain = dataUtilities.DataTable(kNN)

    # Find the fraction of correctly predicted ex in a LOO over kNN
    alphaList = []
    for iidx in range(len(kNNtrain)):

        # Deselect example idx in extTrain
        idxList = range(0, iidx)
        idxList.extend(range(iidx + 1, len(kNNtrain)))
        train = kNNtrain.get_items(idxList)

        # Get prediction and pred probability
        model = AZorngRF.RFLearner(train)
        predList = model(kNNtrain[iidx], returnDFV=True)
        pred = predList[0].value
        prob = predList[1]
        actual = kNNtrain[iidx].get_class().value
        # alpha should be greater the less certain the model
        try:
            if pred != actual:
                alpha = 1.0 + abs(prob)
            else:
                alpha = 1.0 - abs(prob)
            alphaList.append(alpha)
        except:
            pass

    alpha = sum(alphaList) / float(len(alphaList))

    return alpha
    def test_learner_on(self, data):
        """ Test custom kernel wrapper
        """
        # Need the data for ExamplesDistanceConstructor_Euclidean
        self.learner = self.LEARNER(
            kernel_type=SVMLearner.Custom,
            kernel_func=RBFKernelWrapper(
                orange.ExamplesDistanceConstructor_Euclidean(data), gamma=0.5))

        testing.LearnerTestCase.test_learner_on(self, data)
Exemple #11
0
def construct_distance_matrix(data):
    '''
    Constructs a distance matrix using Euclidean distance
    '''
    euclidean = orange.ExamplesDistanceConstructor_Euclidean(data)
    distance = orange.SymMatrix(len(data))
    for i in range(len(data)):
        for j in range(i + 1):
            distance[i, j] = euclidean(data[i], data[j])
    return distance
Exemple #12
0
    def test_learner_on(self, data):
        """ Test custom kernel wrapper
        """
        if data.domain.has_continuous_attributes():
            dist = orange.ExamplesDistanceConstructor_Euclidean(data)
        else:
            dist = orange.ExamplesDistanceConstructor_Hamming(data)
        self.learner = self.LEARNER(kernel_type=SVMLearner.Custom,
                                    kernel_func=RBFKernelWrapper(dist,
                                                                 gamma=0.5))

        testing.LearnerTestCase.test_learner_on(self, data)
        svm_test_binary_classifier(self, data)
Exemple #13
0
 def test_iris(self):
     data = orange.ExampleTable("iris")
     dss = orange.ExamplesDistanceConstructor_Euclidean(data)
     t = orange.HierarchicalClustering.Linkage
     for linkage in [t.Single, t.Average, t.Complete, t.Ward]:
         dist = orange.SymMatrix(len(data))
         for i, e in enumerate(data):
             for j in range(i):
                 dist[i, j] = dss(e, data[j])
         root = orange.HierarchicalClustering(dist, linkage=linkage)
         self.assertEqual(len(root), len(data))
         self.rectestlen(root)
         root.mapping.objects = data
         self.assertEqual(root[0], data[0])
Exemple #14
0
def LLOO(idx, extTrain, measure=None):
    """
    Use the fraction of kNN correctly predicted by a local model
    Hard coded to 20 NN.
    Modeling method. RF of Tree?
    """
    attrList = ["SMILES_1"]
    extTrain = dataUtilities.attributeDeselectionData(extTrain, attrList)

    distList = []
    if not measure:
        measure = orange.ExamplesDistanceConstructor_Euclidean(extTrain)
    for runIdx in range(len(extTrain)):
        if runIdx != idx:
            dist = measure(extTrain[idx], extTrain[runIdx])
            distList.append(dist)

    # Get the distance of the 20th NN
    distList.sort()
    thresDist = distList[19]

    # Find the labels of the 20 NN
    kNN = []
    for runIdx in range(len(extTrain)):
        dist = measure(extTrain[idx], extTrain[runIdx])
        if dist <= thresDist:
            kNN.append(extTrain[runIdx])
    kNNtrain = dataUtilities.DataTable(kNN)

    # Find the fraction of correctly predicted ex in a LOO over kNN
    corrPred = 0
    for idx in range(len(kNNtrain)):

        # Deselect example idx in extTrain
        idxList = range(0, idx)
        idxList.extend(range(idx + 1, len(kNNtrain)))
        train = kNNtrain.get_items(idxList)

        # Train a model
        model = AZorngRF.RFLearner(train)
        #model = Orange.classification.tree.TreeLearner(train)

        pred = model(kNNtrain[idx]).value
        actual = kNNtrain[idx].get_class().value
        if pred == actual:
            corrPred = corrPred + 1
    alpha = 1.0 - float(corrPred) / len(kNNtrain)

    return alpha
Exemple #15
0
    def analysis(self):
        markers = [d.marker for d in self.data]
        if len(filter(lambda x: x is not None, markers)) < 3:
            self.sbox.setDisabled(True)
            for item in self.scene.items():
                self.scene.removeItem(item)
#                i.setCanvas(None)
#            self.canvas.update()
            return
        self.sbox.setEnabled(True)

        pa, pb, pab = [self.data[markers.index(x)] for x in range(3)]

        pb = orange.ExampleTable(pa.domain, pb)
        pab = orange.ExampleTable(pa.domain, pab)

        dist = orange.ExamplesDistanceConstructor_Euclidean(pa,
                                                            normalize=False)

        ave = [0] * 3
        vote = [0] * 3
        genevote = []
        for g in range(len(pa)):
            d = [dist(pb[g], pab[g]), dist(pa[g], pab[g]), dist(pa[g], pb[g])]
            voteindx = d.index(min(d))
            vote[voteindx] += 1
            genevote.append(voteindx)
            if self.distype == 1:
                for i in range(3):
                    d[i] = d[i] * d[i]
            for i in range(3):
                ave[i] += d[i]
        if self.distype == 1:
            ave = [math.sqrt(x) / len(pa) for x in ave]
        else:
            ave = [x / len(pa) for x in ave]

        # compute Chi^2 statistics,
        # update the interface (report on results)
        for i in range(3):
            self.selChkbox[i].setText(self.cbinfo[i][1] +
                                      "  (%d genes)" % vote[i])
        p = statc.chisquare([len(pa) / 3.] * 3, vote)[1]
        self.infochi.setText('Chi Square: ' +
                             ['p = %6.4f' % p, 'p < 0.0001'][p < 0.0001])

        self.setAnalysisPlot(ave)
        self.senddata(genevote)
Exemple #16
0
def test():
    app = QApplication(sys.argv)
    w = OWHierarchicalClustering()
    w.show()
    data = orange.ExampleTable("../../doc/datasets/iris.tab")
    id = orange.newmetaid()
    data.domain.addmeta(id, orange.FloatVariable("a"))
    data.addMetaAttribute(id)
    matrix = orange.SymMatrix(len(data))
    dist = orange.ExamplesDistanceConstructor_Euclidean(data)
    matrix = orange.SymMatrix(len(data))
    matrix.setattr('items', data)
    for i in range(len(data)):
        for j in range(i + 1):
            matrix[i, j] = dist(data[i], data[j])

    w.set_matrix(matrix)
    app.exec_()
Exemple #17
0
def minNN(idx, extTrain, maxDistRatio=None, measure=None):
    """
    Use the ratio between the distance to the nearest neighbor of the same and of the other class
    Two versions exist, with and without scaling with the max distance ratio within the train set. 
    """

    attrList = ["SMILES_1"]
    extTrain = dataUtilities.attributeDeselectionData(extTrain, attrList)

    distListSame = []
    distListDiff = []
    #measure = Orange.distance.Euclidean(extTrain)
    if not measure:
        measure = orange.ExamplesDistanceConstructor_Euclidean(extTrain)
    for runIdx in range(len(extTrain)):
        if runIdx != idx:
            dist = measure(extTrain[idx], extTrain[runIdx])
            if extTrain[idx].get_class().value == extTrain[runIdx].get_class(
            ).value:
                distListSame.append(dist)
            else:
                distListDiff.append(dist)
    minDistSame = min(distListSame)
    minDistDiff = min(distListDiff)
    if minDistDiff == 0:
        if maxDistRatio:
            alpha = 1.0
        else:
            alpha = max(distListDiff)
    else:
        if maxDistRatio:
            alpha = minDistSame / (float(minDistDiff) * maxDistRatio)
        else:
            alpha = minDistSame / float(minDistDiff)

    #fid = open("tempFile.txt", "a")
    #fid.write(str(minDistSame)+"\t"+str(minDistDiff)+"\t"+str(maxDistRatio)+"\t"+str(alpha)+"\n")
    #fid.close()

    return alpha
Exemple #18
0
    def test_euclidean(self):
        d = orange.ExampleTable("iris")
        e1 = d[0]
        e2 = d[50]
        euc = orange.ExamplesDistanceConstructor_Euclidean()
        euc.normalize = False
        euc.ignoreClass = True
        eud = euc(d)
        self.assertEqual(eud(e1, e2), math.sqrt(sum((x-y)**2 for x, y in zip(list(e1)[:-1], e2))))
        
        euc.ignoreClass = False
        eud = euc(d)
        self.assertEqual(eud(e1, e2), math.sqrt(sum((x-y)**2 for x, y in zip(e1, e2))))

        euc.normalize = True        
        eud = euc(d)
        eud(e1, e2) # Returns whatever

        d2 = orange.ExampleTable("zoo")
        self.assertRaises(ValueError, eud, d2[0], d2[1])
        self.assertRaises(ValueError, eud, d2[0], d[0])
        eud = euc(d2)
        self.assertEqual(eud(d2[0], d2[0]), 0)
        eud(d2[0], d2[1]) # Returns whatever
Exemple #19
0
def repTime(msg):
    #print "%s: %s" % (time.asctime(), msg)
    pass


def callback(f, o):
    print int(round(100 * f)),


repTime("Loading data")
data = orange.ExampleTable("iris")

repTime("Computing distances")
matrix = orange.SymMatrix(len(data))
matrix.setattr("objects", data)
distance = orange.ExamplesDistanceConstructor_Euclidean(data)
for i1, ex1 in enumerate(data):
    for i2 in range(i1 + 1, len(data)):
        matrix[i1, i2] = distance(ex1, data[i2])

repTime("Hierarchical clustering (single linkage)")
clustering = orange.HierarchicalClustering()
clustering.linkage = clustering.Average
clustering.overwriteMatrix = 1
root = clustering(matrix)

repTime("Done.")


def prune(cluster, togo):
    if cluster.branches:
import orange, orngSVM
data = orange.ExampleTable("iris.tab")
l1 = orngSVM.SVMLearner()
l1.kernelFunc = orngSVM.RBFKernelWrapper(
    orange.ExamplesDistanceConstructor_Euclidean(data), gamma=0.5)
l1.kernel_type = orange.SVMLearner.Custom
l1.probability = True
c1 = l1(data)
l1.name = "SVM - RBF(Euclidean)"

l2 = orngSVM.SVMLearner()
l2.kernelFunc = orngSVM.RBFKernelWrapper(
    orange.ExamplesDistanceConstructor_Hamming(data), gamma=0.5)
l2.kernel_type = orange.SVMLearner.Custom
l2.probability = True
c2 = l2(data)
l2.name = "SVM - RBF(Hamming)"

l3 = orngSVM.SVMLearner()
l3.kernelFunc = orngSVM.CompositeKernelWrapper(
    orngSVM.RBFKernelWrapper(
        orange.ExamplesDistanceConstructor_Euclidean(data), gamma=0.5),
    orngSVM.RBFKernelWrapper(orange.ExamplesDistanceConstructor_Hamming(data),
                             gamma=0.5),
    l=0.5)
l3.kernel_type = orange.SVMLearner.Custom
l3.probability = True
c3 = l1(data)
l3.name = "SVM - Composite"

import orngTest, orngStat
Exemple #21
0
def tubedRegression(cache, dimensions, progressCallback=None, **args):
    if not cache.findNearest:
        cache.findNearest = orange.FindNearestConstructor_BruteForce(
            cache.data,
            distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean(),
            includeSame=True)

    if not cache.attrStat:
        cache.attrStat = orange.DomainBasicAttrStat(cache.data)

    normalizers = cache.findNearest.distance.normalizers

    if progressCallback:
        nExamples = len(cache.data)
        nPoints = 100.0 / nExamples / len(dimensions)

    effNeighbours = len(cache.contAttributes) > 1 and cache.nNeighbours or len(
        cache.deltas)

    for di, d in enumerate(dimensions):
        contIdx = cache.contIndices[d]

        minV, maxV = cache.attrStat[contIdx].min, cache.attrStat[contIdx].max
        if minV == maxV:
            continue

        oldNormalizer = normalizers[cache.contIndices[d]]
        normalizers[cache.contIndices[d]] = 0

        for exi, ref_example in enumerate(cache.data):
            if ref_example[contIdx].isSpecial():
                cache.deltas[exi][d] = "?"
                continue

            ref_x = float(ref_example[contIdx])

            Sx = Sy = Sxx = Syy = Sxy = n = 0.0

            nn = cache.findNearest(ref_example, 0, True)
            nn = [ex for ex in nn
                  if not ex[contIdx].isSpecial()][:effNeighbours]
            mx = [abs(ex[contIdx] - ref_x) for ex in nn]
            if not mx:
                cache.deltas[exi][d] = "?"
                continue
            if max(mx) < 1e-10:
                kw = math.log(.001)
            else:
                kw = math.log(.001) / max(mx)**2
            for ex in nn[:effNeighbours]:
                ex_x = float(ex[contIdx])
                ex_y = float(ex.getclass())
                w = math.exp(kw * (ex_x - ref_x)**2)
                Sx += w * ex_x
                Sy += w * ex_y
                Sxx += w * ex_x**2
                Syy += w * ex_y**2
                Sxy += w * ex_x * ex_y
                n += w

            div = n * Sxx - Sx**2
            if div:  # and i<40:
                b = (Sxy * n - Sx * Sy) / div

                #                div = Sx*Sy/n - Sxy
                #                if abs(div) < 1e-10:
                #                    cache.errors[exi][d] = 1
                #                else:
                #                    B = ((Syy - Sy**2/n) - (Sxx - Sx**2/n)) / 2 / div
                #
                #                    b_p = -B + math.sqrt(B**2+1)
                #                    a = Sy/n - b_p * Sx/n
                #                    error1 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy)
                #
                #                    b_2 = -B - math.sqrt(B**2+1)
                #                    a = Sy/n - b_p * Sx/n
                #                    error2 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy)
                #
                #                    if error1 < error2 and error1 >= 0:
                #                        cache.errors[exi][d] = error1
                #                    elif error2 >= 0:
                #                        cache.errors[exi][d] = error2
                #                    else:
                #                        cache.errors[exi][d] = 42
                #                        print error1, error2

                a = (Sy - b * Sx) / n
                err = (n * a**2 + b**2 * Sxx + Syy + 2 * a * b * Sx -
                       2 * a * Sy - 2 * b * Sxy)
                tot = Syy - Sy**2 / n
                mod = tot - err
                merr = err / (n - 2)
                if merr < 1e-10:
                    F = 0
                    Fprob = 1
                else:
                    F = mod / merr
                    Fprob = statc.fprob(F, 1, int(n - 2))
                cache.errors[exi][d] = Fprob
                #                        print "%.4f" % Fprob,
                #print ("%.3f\t" + "%.0f\t"*6 + "%f\t%f") % (w, ref_x, ex_x, n, a, b, merr, F, Fprob)
                cache.deltas[exi][d] = b
            else:
                cache.deltas[exi][d] = "?"

            if progressCallback:
                progressCallback((nExamples * di + exi) * nPoints)

        normalizers[cache.contIndices[d]] = oldNormalizer
Exemple #22
0
import orange, orngSVM
data=orange.ExampleTable("iris.tab")
l1=orngSVM.SVMLearner()
l1.kernelFunc=orngSVM.RBFKernelWrapper(orange.ExamplesDistanceConstructor_Euclidean(data), gamma=0.5)
l1.kernel_type=orange.SVMLearner.Custom
l1.probability=True
c1=l1(data)
l1.name="SVM - RBF(Euclidean)"

l2=orngSVM.SVMLearner()
l2.kernelFunc=orngSVM.RBFKernelWrapper(orange.ExamplesDistanceConstructor_Hamming(data), gamma=0.5)
l2.kernel_type=orange.SVMLearner.Custom
l2.probability=True
c2=l2(data)
l2.name="SVM - RBF(Hamming)"

l3=orngSVM.SVMLearner()
l3.kernelFunc=orngSVM.CompositeKernelWrapper(orngSVM.RBFKernelWrapper(orange.ExamplesDistanceConstructor_Euclidean(data), gamma=0.5),orngSVM.RBFKernelWrapper(orange.ExamplesDistanceConstructor_Hamming(data), gamma=0.5), l=0.5)
l3.kernel_type=orange.SVMLearner.Custom
l3.probability=True
c3=l1(data)
l3.name="SVM - Composite"


import orngTest, orngStat
tests=orngTest.crossValidation([l1, l2, l3], data, folds=5)
[ca1, ca2, ca3]=orngStat.CA(tests)
print l1.name, "CA: %.2f" % ca1
print l2.name, "CA: %.2f" % ca2
print l3.name, "CA: %.2f" % ca3
Exemple #23
0
    def showWidget(self):
        self.information()
        
        if self.ow is not None:
            self.ow.topWidgetPart.hide()
            self.ow.setLayout(self.layout())
        elif self.layout() is not None: 
            sip.delete(self.layout())
            
        self.ow = None
        if self.data is None: 
            self.information("No learning data given.")
            return
        if self.model is None: return
        if "model" not in self.model.domain: return
        if "label" in self.model.domain:
            attr = self.model["label"].value.split(', ')
        
        modelType = self.model["model"].value.upper()
        
        projWidget = None
        if modelType == "SCATTERPLOT" or modelType == "SCATTTERPLOT": 
            projWidget = self.setWidget(OWScatterPlot.OWScatterPlot)

        if modelType == "RADVIZ":
            projWidget = self.setWidget(OWRadviz.OWRadviz) 
            
        if modelType == "POLYVIZ": 
            projWidget = self.setWidget(OWPolyviz.OWPolyviz) 
            
        if projWidget is not None:
            self.ow.setData(self.data)
            self.ow.setShownAttributes(attr)
            self.ow.handleNewSignals() 
        
        ################################
        ### add new model types here ###
        ################################
        
        if modelType == "SPCA" or modelType == "LINPROJ": 
            self.setWidget(OWLinProj.OWLinProj) 
            self.ow.setData(self.data)
            self.ow.setShownAttributes(attr)
            self.ow.handleNewSignals() 
            xAnchors, yAnchors = self.model["anchors"].value
            self.ow.updateGraph(None, setAnchors=1, XAnchors=xAnchors, YAnchors=yAnchors)
            
        if modelType == "TREE":
            self.setWidget(OWClassificationTreeGraph.OWClassificationTreeGraph)
            classifier = self.model["classifier"].value
            self.ow.ctree(classifier)
            
        if modelType == "BAYES":
            self.setWidget(OWNomogram.OWNomogram) 
            classifier = self.model["classifier"].value
            self.ow.classifier(classifier)
            
        if modelType == "KNN":
            exclude = [att for att in self.data.domain if att.name not in attr + [self.data.domain.classVar.name]]
            data2 = orange.Preprocessor_ignore(self.data, attributes = exclude)
            dist = orange.ExamplesDistanceConstructor_Euclidean(data2)
            smx = orange.SymMatrix(len(data2))
            smx.setattr('items', data2)
            pb = OWGUI.ProgressBar(self, 100)
            milestones = orngMisc.progressBarMilestones(len(data2)*(len(data2)-1)/2, 100)
            count = 0
            for i in range(len(data2)):
                for j in range(i+1):
                    smx[i, j] = dist(data2[i], data2[j])
                    if count in milestones:
                        pb.advance()
                    count += 1
            pb.finish()
            self.setWidget(OWMDS.OWMDS)
            self.ow.cmatrix(smx)
            
        if self.ow is not None:
            self.ow.send = self.send
            if self.layout() is not None: sip.delete(self.layout())
            self.setLayout(self.ow.layout())
            self.ow.topWidgetPart.show()
        
        self.update()
# Description: Shows how to find the nearest neighbours of the given example
# Category:    basic classes, distances
# Classes:     FindNearest, FindNearestConstructor, FindNearest_BruteForce, FindNearestConstructor_BruteForce
# Uses:        lenses
# Referenced:  FindNearest.htm

import orange

data = orange.ExampleTable("lenses")

nnc = orange.FindNearestConstructor_BruteForce()
nnc.distanceConstructor = orange.ExamplesDistanceConstructor_Euclidean()

did = -42
# Note that this is wrong: id should be assigned by
# did = orange.newmetaid()
# We only do this so that the script gives the same output each time it's run
nn = nnc(data, 0, did)

print "*** Reference example: ", data[0]
for ex in sorted(nn(data[0], 5)):
    print ex