def _convertToCommonFormat(data): """ Convert either a distribution or a set of data to a (stats, cdf, pdf) pair """ if isinstance(data, Distributions.Distribution): # data is a subclass of BoostDistribution, generate needed stats, and pass in cdf and pdf. stats = { "mean": data.untruncatedMean(), "stdev": data.untruncatedStdDev() } cdf = lambda x: data.cdf(x) pdf = lambda x: data.pdf(x) return stats, cdf, pdf if type(data).__name__ == "tuple": # data is (list,list), then it is a list of weights assert len(data) == 2 points, weights = data assert len(points) == len(weights) elif '__len__' in dir(data): # data is list, then it is a list of data, generate uniform weights and begin points = data weights = [1.0 / len(points)] * len(points) else: raise IOError("Unknown type in _convertToCommonFormat") #Sturges method for determining number of bins numBins = int(math.ceil(mathUtils.log2(len(points)) + 1)) return _getPDFandCDFfromWeightedData(points, weights, numBins, False, 'linear')
def __processData(self, data, methodInfo): """ Method to process the computed data @ In, data, np.array, the data to process @ In, methodInfo, dict, the info about which processing method needs to be used @ Out, ret, dict, the processed data """ ret = {} if hasattr(data,'tolist'): sortedData = data.tolist() else: sortedData = list(data) sortedData.sort() low = sortedData[0] high = sortedData[-1] dataRange = high - low ret['low'] = low ret['high'] = high if not 'binMethod' in methodInfo: numBins = methodInfo.get("numBins", 10) else: binMethod = methodInfo['binMethod'] dataN = len(sortedData) if binMethod == 'square-root': numBins = int(math.ceil(math.sqrt(dataN))) elif binMethod == 'sturges': numBins = int(math.ceil(mathUtils.log2(dataN) + 1)) else: self.raiseADebug("Unknown binMethod " + binMethod, 'ExceptedError') numBins = 5 ret['numBins'] = numBins kind = methodInfo.get("kind", "uniformBins") if kind == "uniformBins": bins = [low + x * dataRange / numBins for x in range(1, numBins)] ret['minBinSize'] = dataRange / numBins elif kind == "equalProbability": stride = len(sortedData) // numBins bins = [sortedData[x] for x in range(stride - 1, len(sortedData) - stride + 1, stride)] if len(bins) > 1: ret['minBinSize'] = min(map(lambda x, y: x - y, bins[1:], bins[:-1])) else: ret['minBinSize'] = dataRange counts = mathUtils.countBins(sortedData, bins) ret['bins'] = bins ret['counts'] = counts ret.update(mathUtils.calculateStats(sortedData)) skewness = ret["skewness"] delta = math.sqrt((math.pi / 2.0) * (abs(skewness) ** (2.0 / 3.0)) / (abs(skewness) ** (2.0 / 3.0) + ((4.0 - math.pi) / 2.0) ** (2.0 / 3.0))) delta = math.copysign(delta, skewness) alpha = delta / math.sqrt(1.0 - delta ** 2) variance = ret["sampleVariance"] omega = variance / (1.0 - 2 * delta ** 2 / math.pi) mean = ret['mean'] xi = mean - omega * delta * math.sqrt(2.0 / math.pi) ret['alpha'] = alpha ret['omega'] = omega ret['xi'] = xi return ret
2.1,2.2,2.3,2.4, 3.1,3.2,3.3] boundaries = [1,2,3] counted = mathUtils.countBins(data,boundaries) checkArray('countBins',counted,[2,3,4,3],1e-5) ### check "log2" data = [(1e-15,-49.82892), (0.5,-1.0), (1.0,0.0), (4,2.0), (10,3.32193), (1e34,112.945556)] for d in data: dat,soln = d val = mathUtils.log2(dat) checkAnswer('log2',val,soln,1e-5) ### check "calculateStats" data = [0.6752,0.0610,0.1172,0.5233,0.0056] moms = mathUtils.calculateStats(data) checkAnswer('calculateStats.mean' ,moms['mean' ], 0.27646 ,1e-5) checkAnswer('calculateStats.stdev' ,moms['stdev' ], 0.30211 ,1e-5) checkAnswer('calculateStats.variance' ,moms['variance' ], 0.073015,1e-5) checkAnswer('calculateStats.skewness' ,moms['skewness' ], 0.45134 ,1e-5) checkAnswer('calculateStats.kurtosis' ,moms['kurtosis' ],-1.60548 ,1e-5) checkAnswer('calculateStats.sampleVariance',moms['sampleVariance'], 0.09127 ,1e-5) ### check "historySetWindows" # TODO I think this takes a historySet? Documentation is poor.