def get_sig(m1, m2, n): if m1 is None or m2 is None: return 1.0 p1, p2 = len(m1.domain.attributes), len(m2.domain.attributes) RSS1, RSS2 = m1.statistics["model summary"]["ExplVar"], m2.statistics["model summary"]["ExplVar"] if RSS1<=RSS2 or p2<=p1 or n<=p2 or RSS2 <= 0: return 1.0 F = ((RSS1 - RSS2)/(p2-p1))/(RSS2/(n-p2)) return statc.fprob(int(p2-p1),int(n-p2),F)
def get_sig(m1, m2, n): if m1==None or m2==None: return 1.0 p1, p2 = len(m1.domain.attributes), len(m2.domain.attributes) RSS1, RSS2 = m1.statistics["model summary"]["ExplVar"], m2.statistics["model summary"]["ExplVar"] if RSS1<=RSS2 or p2<=p1 or n<=p2 or RSS2 <= 0: return 1.0 F = ((RSS1 - RSS2)/(p2-p1))/(RSS2/(n-p2)) return statc.fprob(int(p2-p1),int(n-p2),F)
def tubedRegression(cache, dimensions, progressCallback=None, **args): if not cache.findNearest: cache.findNearest = orange.FindNearestConstructor_BruteForce( cache.data, distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean(), includeSame=True) if not cache.attrStat: cache.attrStat = orange.DomainBasicAttrStat(cache.data) normalizers = cache.findNearest.distance.normalizers if progressCallback: nExamples = len(cache.data) nPoints = 100.0 / nExamples / len(dimensions) effNeighbours = len(cache.contAttributes) > 1 and cache.nNeighbours or len( cache.deltas) for di, d in enumerate(dimensions): contIdx = cache.contIndices[d] minV, maxV = cache.attrStat[contIdx].min, cache.attrStat[contIdx].max if minV == maxV: continue oldNormalizer = normalizers[cache.contIndices[d]] normalizers[cache.contIndices[d]] = 0 for exi, ref_example in enumerate(cache.data): if ref_example[contIdx].isSpecial(): cache.deltas[exi][d] = "?" continue ref_x = float(ref_example[contIdx]) Sx = Sy = Sxx = Syy = Sxy = n = 0.0 nn = cache.findNearest(ref_example, 0, True) nn = [ex for ex in nn if not ex[contIdx].isSpecial()][:effNeighbours] mx = [abs(ex[contIdx] - ref_x) for ex in nn] if not mx: cache.deltas[exi][d] = "?" continue if max(mx) < 1e-10: kw = math.log(.001) else: kw = math.log(.001) / max(mx)**2 for ex in nn[:effNeighbours]: ex_x = float(ex[contIdx]) ex_y = float(ex.getclass()) w = math.exp(kw * (ex_x - ref_x)**2) Sx += w * ex_x Sy += w * ex_y Sxx += w * ex_x**2 Syy += w * ex_y**2 Sxy += w * ex_x * ex_y n += w div = n * Sxx - Sx**2 if div: # and i<40: b = (Sxy * n - Sx * Sy) / div # div = Sx*Sy/n - Sxy # if abs(div) < 1e-10: # cache.errors[exi][d] = 1 # else: # B = ((Syy - Sy**2/n) - (Sxx - Sx**2/n)) / 2 / div # # b_p = -B + math.sqrt(B**2+1) # a = Sy/n - b_p * Sx/n # error1 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy) # # b_2 = -B - math.sqrt(B**2+1) # a = Sy/n - b_p * Sx/n # error2 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy) # # if error1 < error2 and error1 >= 0: # cache.errors[exi][d] = error1 # elif error2 >= 0: # cache.errors[exi][d] = error2 # else: # cache.errors[exi][d] = 42 # print error1, error2 a = (Sy - b * Sx) / n err = (n * a**2 + b**2 * Sxx + Syy + 2 * a * b * Sx - 2 * a * Sy - 2 * b * Sxy) tot = Syy - Sy**2 / n mod = tot - err merr = err / (n - 2) if merr < 1e-10: F = 0 Fprob = 1 else: F = mod / merr Fprob = statc.fprob(F, 1, int(n - 2)) cache.errors[exi][d] = Fprob # print "%.4f" % Fprob, #print ("%.3f\t" + "%.0f\t"*6 + "%f\t%f") % (w, ref_x, ex_x, n, a, b, merr, F, Fprob) cache.deltas[exi][d] = b else: cache.deltas[exi][d] = "?" if progressCallback: progressCallback((nExamples * di + exi) * nPoints) normalizers[cache.contIndices[d]] = oldNormalizer
def tubedRegression(cache, dimensions, progressCallback=None, **args): if not cache.findNearest: cache.findNearest = orange.FindNearestConstructor_BruteForce( cache.data, distanceConstructor=orange.ExamplesDistanceConstructor_Euclidean(), includeSame=True ) if not cache.attrStat: cache.attrStat = orange.DomainBasicAttrStat(cache.data) normalizers = cache.findNearest.distance.normalizers if progressCallback: nExamples = len(cache.data) nPoints = 100.0 / nExamples / len(dimensions) effNeighbours = len(cache.contAttributes) > 1 and cache.nNeighbours or len(cache.deltas) for di, d in enumerate(dimensions): contIdx = cache.contIndices[d] minV, maxV = cache.attrStat[contIdx].min, cache.attrStat[contIdx].max if minV == maxV: continue oldNormalizer = normalizers[cache.contIndices[d]] normalizers[cache.contIndices[d]] = 0 for exi, ref_example in enumerate(cache.data): if ref_example[contIdx].isSpecial(): cache.deltas[exi][d] = "?" continue ref_x = float(ref_example[contIdx]) Sx = Sy = Sxx = Syy = Sxy = n = 0.0 nn = cache.findNearest(ref_example, 0, True) nn = [ex for ex in nn if not ex[contIdx].isSpecial()][:effNeighbours] mx = [abs(ex[contIdx] - ref_x) for ex in nn] if not mx: cache.deltas[exi][d] = "?" continue if max(mx) < 1e-10: kw = math.log(0.001) else: kw = math.log(0.001) / max(mx) ** 2 for ex in nn[:effNeighbours]: ex_x = float(ex[contIdx]) ex_y = float(ex.getclass()) w = math.exp(kw * (ex_x - ref_x) ** 2) Sx += w * ex_x Sy += w * ex_y Sxx += w * ex_x ** 2 Syy += w * ex_y ** 2 Sxy += w * ex_x * ex_y n += w div = n * Sxx - Sx ** 2 if div: # and i<40: b = (Sxy * n - Sx * Sy) / div # div = Sx*Sy/n - Sxy # if abs(div) < 1e-10: # cache.errors[exi][d] = 1 # else: # B = ((Syy - Sy**2/n) - (Sxx - Sx**2/n)) / 2 / div # # b_p = -B + math.sqrt(B**2+1) # a = Sy/n - b_p * Sx/n # error1 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy) # # b_2 = -B - math.sqrt(B**2+1) # a = Sy/n - b_p * Sx/n # error2 = 1/(1+b_p**2) * (Syy + a**2 + b_p**2*Sxx - 2*a*Sy + 2*a*b_p*Sx - 2*b_p*Sxy) # # if error1 < error2 and error1 >= 0: # cache.errors[exi][d] = error1 # elif error2 >= 0: # cache.errors[exi][d] = error2 # else: # cache.errors[exi][d] = 42 # print error1, error2 a = (Sy - b * Sx) / n err = n * a ** 2 + b ** 2 * Sxx + Syy + 2 * a * b * Sx - 2 * a * Sy - 2 * b * Sxy tot = Syy - Sy ** 2 / n mod = tot - err merr = err / (n - 2) if merr < 1e-10: F = 0 Fprob = 1 else: F = mod / merr Fprob = statc.fprob(F, 1, int(n - 2)) cache.errors[exi][d] = Fprob # print "%.4f" % Fprob, # print ("%.3f\t" + "%.0f\t"*6 + "%f\t%f") % (w, ref_x, ex_x, n, a, b, merr, F, Fprob) cache.deltas[exi][d] = b else: cache.deltas[exi][d] = "?" if progressCallback: progressCallback((nExamples * di + exi) * nPoints) normalizers[cache.contIndices[d]] = oldNormalizer