コード例 #1
0
ファイル: orngRegression.py プロジェクト: AutumnLight/orange
def get_sig(m1, m2, n):
    if m1 is None or m2 is None:
        return 1.0
    p1, p2 = len(m1.domain.attributes), len(m2.domain.attributes)
    RSS1, RSS2 = m1.statistics["model summary"]["ExplVar"], m2.statistics["model summary"]["ExplVar"]
    if RSS1<=RSS2 or p2<=p1 or n<=p2 or RSS2 <= 0:
        return 1.0
    F = ((RSS1 - RSS2)/(p2-p1))/(RSS2/(n-p2))
    return statc.fprob(int(p2-p1),int(n-p2),F)
コード例 #2
0
def get_sig(m1, m2, n):
    if m1==None or m2==None:
        return 1.0
    p1, p2 = len(m1.domain.attributes), len(m2.domain.attributes)
    RSS1, RSS2 = m1.statistics["model summary"]["ExplVar"], m2.statistics["model summary"]["ExplVar"]
    if RSS1<=RSS2 or p2<=p1 or n<=p2 or RSS2 <= 0:
        return 1.0
    F = ((RSS1 - RSS2)/(p2-p1))/(RSS2/(n-p2))
    return statc.fprob(int(p2-p1),int(n-p2),F)
コード例 #3
0
def tubedRegression(cache, dimensions, progressCallback=None, **args):
    if not cache.findNearest:
        cache.findNearest = orange.FindNearestConstructor_BruteForce(
            cache.data,
            distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean(),
            includeSame=True)

    if not cache.attrStat:
        cache.attrStat = orange.DomainBasicAttrStat(cache.data)

    normalizers = cache.findNearest.distance.normalizers

    if progressCallback:
        nExamples = len(cache.data)
        nPoints = 100.0 / nExamples / len(dimensions)

    effNeighbours = len(cache.contAttributes) > 1 and cache.nNeighbours or len(
        cache.deltas)

    for di, d in enumerate(dimensions):
        contIdx = cache.contIndices[d]

        minV, maxV = cache.attrStat[contIdx].min, cache.attrStat[contIdx].max
        if minV == maxV:
            continue

        oldNormalizer = normalizers[cache.contIndices[d]]
        normalizers[cache.contIndices[d]] = 0

        for exi, ref_example in enumerate(cache.data):
            if ref_example[contIdx].isSpecial():
                cache.deltas[exi][d] = "?"
                continue

            ref_x = float(ref_example[contIdx])

            Sx = Sy = Sxx = Syy = Sxy = n = 0.0

            nn = cache.findNearest(ref_example, 0, True)
            nn = [ex for ex in nn
                  if not ex[contIdx].isSpecial()][:effNeighbours]
            mx = [abs(ex[contIdx] - ref_x) for ex in nn]
            if not mx:
                cache.deltas[exi][d] = "?"
                continue
            if max(mx) < 1e-10:
                kw = math.log(.001)
            else:
                kw = math.log(.001) / max(mx)**2
            for ex in nn[:effNeighbours]:
                ex_x = float(ex[contIdx])
                ex_y = float(ex.getclass())
                w = math.exp(kw * (ex_x - ref_x)**2)
                Sx += w * ex_x
                Sy += w * ex_y
                Sxx += w * ex_x**2
                Syy += w * ex_y**2
                Sxy += w * ex_x * ex_y
                n += w

            div = n * Sxx - Sx**2
            if div:  # and i<40:
                b = (Sxy * n - Sx * Sy) / div

                #                div = Sx*Sy/n - Sxy
                #                if abs(div) < 1e-10:
                #                    cache.errors[exi][d] = 1
                #                else:
                #                    B = ((Syy - Sy**2/n) - (Sxx - Sx**2/n)) / 2 / div
                #
                #                    b_p = -B + math.sqrt(B**2+1)
                #                    a = Sy/n - b_p * Sx/n
                #                    error1 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy)
                #
                #                    b_2 = -B - math.sqrt(B**2+1)
                #                    a = Sy/n - b_p * Sx/n
                #                    error2 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy)
                #
                #                    if error1 < error2 and error1 >= 0:
                #                        cache.errors[exi][d] = error1
                #                    elif error2 >= 0:
                #                        cache.errors[exi][d] = error2
                #                    else:
                #                        cache.errors[exi][d] = 42
                #                        print error1, error2

                a = (Sy - b * Sx) / n
                err = (n * a**2 + b**2 * Sxx + Syy + 2 * a * b * Sx -
                       2 * a * Sy - 2 * b * Sxy)
                tot = Syy - Sy**2 / n
                mod = tot - err
                merr = err / (n - 2)
                if merr < 1e-10:
                    F = 0
                    Fprob = 1
                else:
                    F = mod / merr
                    Fprob = statc.fprob(F, 1, int(n - 2))
                cache.errors[exi][d] = Fprob
                #                        print "%.4f" % Fprob,
                #print ("%.3f\t" + "%.0f\t"*6 + "%f\t%f") % (w, ref_x, ex_x, n, a, b, merr, F, Fprob)
                cache.deltas[exi][d] = b
            else:
                cache.deltas[exi][d] = "?"

            if progressCallback:
                progressCallback((nExamples * di + exi) * nPoints)

        normalizers[cache.contIndices[d]] = oldNormalizer
コード例 #4
0
ファイル: orngPade.py プロジェクト: testmana2/orange
def tubedRegression(cache, dimensions, progressCallback=None, **args):
    if not cache.findNearest:
        cache.findNearest = orange.FindNearestConstructor_BruteForce(
            cache.data, distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean(), includeSame=True
        )

    if not cache.attrStat:
        cache.attrStat = orange.DomainBasicAttrStat(cache.data)

    normalizers = cache.findNearest.distance.normalizers

    if progressCallback:
        nExamples = len(cache.data)
        nPoints = 100.0 / nExamples / len(dimensions)

    effNeighbours = len(cache.contAttributes) > 1 and cache.nNeighbours or len(cache.deltas)

    for di, d in enumerate(dimensions):
        contIdx = cache.contIndices[d]

        minV, maxV = cache.attrStat[contIdx].min, cache.attrStat[contIdx].max
        if minV == maxV:
            continue

        oldNormalizer = normalizers[cache.contIndices[d]]
        normalizers[cache.contIndices[d]] = 0

        for exi, ref_example in enumerate(cache.data):
            if ref_example[contIdx].isSpecial():
                cache.deltas[exi][d] = "?"
                continue

            ref_x = float(ref_example[contIdx])

            Sx = Sy = Sxx = Syy = Sxy = n = 0.0

            nn = cache.findNearest(ref_example, 0, True)
            nn = [ex for ex in nn if not ex[contIdx].isSpecial()][:effNeighbours]
            mx = [abs(ex[contIdx] - ref_x) for ex in nn]
            if not mx:
                cache.deltas[exi][d] = "?"
                continue
            if max(mx) < 1e-10:
                kw = math.log(0.001)
            else:
                kw = math.log(0.001) / max(mx) ** 2
            for ex in nn[:effNeighbours]:
                ex_x = float(ex[contIdx])
                ex_y = float(ex.getclass())
                w = math.exp(kw * (ex_x - ref_x) ** 2)
                Sx += w * ex_x
                Sy += w * ex_y
                Sxx += w * ex_x ** 2
                Syy += w * ex_y ** 2
                Sxy += w * ex_x * ex_y
                n += w

            div = n * Sxx - Sx ** 2
            if div:  # and i<40:
                b = (Sxy * n - Sx * Sy) / div

                #                div = Sx*Sy/n - Sxy
                #                if abs(div) < 1e-10:
                #                    cache.errors[exi][d] = 1
                #                else:
                #                    B = ((Syy - Sy**2/n) - (Sxx - Sx**2/n)) / 2 / div
                #
                #                    b_p = -B + math.sqrt(B**2+1)
                #                    a = Sy/n - b_p * Sx/n
                #                    error1 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy)
                #
                #                    b_2 = -B - math.sqrt(B**2+1)
                #                    a = Sy/n - b_p * Sx/n
                #                    error2 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy)
                #
                #                    if error1 < error2 and error1 >= 0:
                #                        cache.errors[exi][d] = error1
                #                    elif error2 >= 0:
                #                        cache.errors[exi][d] = error2
                #                    else:
                #                        cache.errors[exi][d] = 42
                #                        print error1, error2

                a = (Sy - b * Sx) / n
                err = n * a ** 2 + b ** 2 * Sxx + Syy + 2 * a * b * Sx - 2 * a * Sy - 2 * b * Sxy
                tot = Syy - Sy ** 2 / n
                mod = tot - err
                merr = err / (n - 2)
                if merr < 1e-10:
                    F = 0
                    Fprob = 1
                else:
                    F = mod / merr
                    Fprob = statc.fprob(F, 1, int(n - 2))
                cache.errors[exi][d] = Fprob
                #                        print "%.4f" % Fprob,
                # print ("%.3f\t" + "%.0f\t"*6 + "%f\t%f") % (w, ref_x, ex_x, n, a, b, merr, F, Fprob)
                cache.deltas[exi][d] = b
            else:
                cache.deltas[exi][d] = "?"

            if progressCallback:
                progressCallback((nExamples * di + exi) * nPoints)

        normalizers[cache.contIndices[d]] = oldNormalizer