def tansey_linear_regression(points): """ to be commented. @param points is a list of tuples (x,y) of float values. @return intercept,slope """ if len(points)==0: return 0. sumOfXSq = 0. sumCodeviates = 0. n = len(points) for x,y in points: sumCodeviates += (x*y) sumOfXSq += (x*x) sum_x = central.fsum( [x for x,y in points] ) sum_y = central.fsum( [y for x,y in points] ) mean_x = central.fmean( [x for x,y in points] ) mean_y = central.fmean( [y for x,y in points] ) ssx = sumOfXSq - ((sum_x*sum_x) / n) sco = sumCodeviates - ((sum_x * sum_y) / n) b = mean_y - ((sco / ssx) * mean_x) m = sco / ssx return b, m
def tga_linear_regression(points): """ to be commented. @param points is a list of tuples (x,y) of float values. @return intercept,slope """ if len(points)==0: return 0. # Fix means mean_x = central.fmean( [x for x,y in points] ) mean_y = central.fmean( [y for x,y in points] ) xysum = 0. xsqsum = 0. for x,y in points: dx = x - mean_x dy = y - mean_y xysum += (dx*dy) xsqsum += (dx*dx) # Intercept if xsqsum == 0: m = xysum else: m = xysum / xsqsum # Slope b = mean_y - m * mean_x return b,m
def lvariation(items): """ Calculates the coefficient of variation of data values. It shows the extent of variability in relation to the mean. It's a standardized measure of dispersion: stdev / mean and returned as a percentage. @param items (list) list of data values @return (float) """ return variability.lstdev(items) / float(central.fmean(items)) * 100.0
def lmoment(items, moment=1): """ Calculates the r-th moment about the mean for a sample: 1/n * SUM((items(i)-mean)**r) @param items (list) list of data values @return (float) """ if moment == 1: return 0.0 mn = central.fmean(items) momentlist = [(i - mn) ** moment for i in items] return sum(momentlist) / float(len(items))
def lvariance(items): """ Calculates the variance of the data values, using N for the denominator. The variance is a measure of dispersion near the mean. @param items (list) list of data values @return (float) """ if len(items) < 2: return 0.0 mn = central.fmean(items) return central.fsum(pow(i - mn, 2) for i in items) / (len(items))
def lz(items, score): """ Calculates the z-score for a given input score, given that score and the data values from which that score came. The z-score determines the relative location of a data value. @param items (list) list of data values @param score (float) a score of any items @return (float) """ if len(items) < 2: return 0.0 return (score - central.fmean(items)) / lstdev(items)
@param items (list) list of data values @return (float) """ if len(items) < 2: return 0.0 n = len(items) - 1 sumd = 0.0 for i in range(n): d1 = items[i] d2 = items[i + 1] delta = math.fabs(d1 - d2) meand = (d1 + d2) / 2.0 sumd += delta / meand return 100.0 * sumd / n # ---------------------------------------------------------------------------- if __name__ == "__main__": l = [x * x for x in range(1, 11)] print l print "mean:", central.fmean(l) print "median:", central.fmedian(l) print "variance:", lvariance(l) print "standard deviation:", lstdev(l) print "rPVI:", rPVI(l) print "nPVI:", nPVI(l)
a high kurtosis distribution has a sharper peak and fatter tails, while a low kurtosis distribution has a more rounded peak and thinner tails. @param items (list) list of data values @return (float) """ return lmoment(items, 4) / pow(lmoment(items, 2), 2.0) # ---------------------------------------------------------------------------- if __name__ == "__main__": import datetime l = [x * x for x in range(1, 500)] print "moment:" print datetime.datetime.now().isoformat() moment = 10 mn = central.fmean(l) s = 0 for x in l: s = s + (x - mn) ** moment print s / float(len(l)) print datetime.datetime.now().isoformat() print lmoment(l, moment) print datetime.datetime.now().isoformat() print