Exemple #1
0
    def test_bruteforce(self):
        self.assertEqual(orange.FindNearestConstructor,
                         orange.FindNearestConstructor_BruteForce)
        self.assertEqual(orange.FindNearest, orange.FindNearest_BruteForce)

        d = orange.ExampleTable("iris")
        fnc = orange.FindNearestConstructor()
        fn = fnc(d)
        nearest15a = fn(d[0], 15)

        fnc = orange.FindNearestConstructor_BruteForce()
        fn = fnc(d)
        nearest15b = fn(d[0], 15)

        self.assertEqual(nearest15a.checksum(), nearest15b.checksum())
Exemple #2
0
def tubedRegression(cache, dimensions, progressCallback=None, **args):
    if not cache.findNearest:
        cache.findNearest = orange.FindNearestConstructor_BruteForce(
            cache.data,
            distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean(),
            includeSame=True)

    if not cache.attrStat:
        cache.attrStat = orange.DomainBasicAttrStat(cache.data)

    normalizers = cache.findNearest.distance.normalizers

    if progressCallback:
        nExamples = len(cache.data)
        nPoints = 100.0 / nExamples / len(dimensions)

    effNeighbours = len(cache.contAttributes) > 1 and cache.nNeighbours or len(
        cache.deltas)

    for di, d in enumerate(dimensions):
        contIdx = cache.contIndices[d]

        minV, maxV = cache.attrStat[contIdx].min, cache.attrStat[contIdx].max
        if minV == maxV:
            continue

        oldNormalizer = normalizers[cache.contIndices[d]]
        normalizers[cache.contIndices[d]] = 0

        for exi, ref_example in enumerate(cache.data):
            if ref_example[contIdx].isSpecial():
                cache.deltas[exi][d] = "?"
                continue

            ref_x = float(ref_example[contIdx])

            Sx = Sy = Sxx = Syy = Sxy = n = 0.0

            nn = cache.findNearest(ref_example, 0, True)
            nn = [ex for ex in nn
                  if not ex[contIdx].isSpecial()][:effNeighbours]
            mx = [abs(ex[contIdx] - ref_x) for ex in nn]
            if not mx:
                cache.deltas[exi][d] = "?"
                continue
            if max(mx) < 1e-10:
                kw = math.log(.001)
            else:
                kw = math.log(.001) / max(mx)**2
            for ex in nn[:effNeighbours]:
                ex_x = float(ex[contIdx])
                ex_y = float(ex.getclass())
                w = math.exp(kw * (ex_x - ref_x)**2)
                Sx += w * ex_x
                Sy += w * ex_y
                Sxx += w * ex_x**2
                Syy += w * ex_y**2
                Sxy += w * ex_x * ex_y
                n += w

            div = n * Sxx - Sx**2
            if div:  # and i<40:
                b = (Sxy * n - Sx * Sy) / div

                #                div = Sx*Sy/n - Sxy
                #                if abs(div) < 1e-10:
                #                    cache.errors[exi][d] = 1
                #                else:
                #                    B = ((Syy - Sy**2/n) - (Sxx - Sx**2/n)) / 2 / div
                #
                #                    b_p = -B + math.sqrt(B**2+1)
                #                    a = Sy/n - b_p * Sx/n
                #                    error1 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy)
                #
                #                    b_2 = -B - math.sqrt(B**2+1)
                #                    a = Sy/n - b_p * Sx/n
                #                    error2 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy)
                #
                #                    if error1 < error2 and error1 >= 0:
                #                        cache.errors[exi][d] = error1
                #                    elif error2 >= 0:
                #                        cache.errors[exi][d] = error2
                #                    else:
                #                        cache.errors[exi][d] = 42
                #                        print error1, error2

                a = (Sy - b * Sx) / n
                err = (n * a**2 + b**2 * Sxx + Syy + 2 * a * b * Sx -
                       2 * a * Sy - 2 * b * Sxy)
                tot = Syy - Sy**2 / n
                mod = tot - err
                merr = err / (n - 2)
                if merr < 1e-10:
                    F = 0
                    Fprob = 1
                else:
                    F = mod / merr
                    Fprob = statc.fprob(F, 1, int(n - 2))
                cache.errors[exi][d] = Fprob
                #                        print "%.4f" % Fprob,
                #print ("%.3f\t" + "%.0f\t"*6 + "%f\t%f") % (w, ref_x, ex_x, n, a, b, merr, F, Fprob)
                cache.deltas[exi][d] = b
            else:
                cache.deltas[exi][d] = "?"

            if progressCallback:
                progressCallback((nExamples * di + exi) * nPoints)

        normalizers[cache.contIndices[d]] = oldNormalizer
# Description: Shows how to find the nearest neighbours of the given example
# Category:    basic classes, distances
# Classes:     FindNearest, FindNearestConstructor, FindNearest_BruteForce, FindNearestConstructor_BruteForce
# Uses:        lenses
# Referenced:  FindNearest.htm

import orange

data = orange.ExampleTable("lenses")

nnc = orange.FindNearestConstructor_BruteForce()
nnc.distanceConstructor = orange.ExamplesDistanceConstructor_Euclidean()

did = -42
# Note that this is wrong: id should be assigned by
# did = orange.newmetaid()
# We only do this so that the script gives the same output each time it's run
nn = nnc(data, 0, did)

print "*** Reference example: ", data[0]
for ex in sorted(nn(data[0], 5)):
    print ex