def _getPDFandCDFfromData(dataName, data, csv, methodInfo, interpolation, generateCSV): """ This method is used to convert some data into a PDF and CDF function. Note, it might be better done by scipy.stats.gaussian_kde @ In, dataName, str, The name of the data. @ In, data, np.array, one dimentional array of the data to process @ In, csv, File, file to write out information on data. @ In, methodInfo, dict, the info about which processing method needs to be used @ In, interpolation, str, "linear" or "quadratic", depending on which interpolation is used @ In, generateCSV, bool, True if the csv should be written @ Out, (dataStats, cdfFunc, pdfFunc), tuple, dataStats is dictionary with things like "mean" and "stdev", cdfFunction is a function that returns the CDF value and pdfFunc is a function that returns the PDF value. """ #Convert data to pdf and cdf. dataStats = __processData( data, methodInfo) dataKeys = set(dataStats.keys()) counts = dataStats['counts'] bins = dataStats['bins'] countSum = sum(counts) binBoundaries = [dataStats['low']] + bins + [dataStats['high']] if generateCSV: utils.printCsv(csv, '"' + dataName + '"') utils.printCsv(csv, '"numBins"', dataStats['numBins']) utils.printCsv(csv, '"binBoundary"', '"binMidpoint"', '"binCount"', '"normalizedBinCount"', '"f_prime"', '"cdf"') cdf = [0.0] * len(counts) midpoints = [0.0] * len(counts) cdfSum = 0.0 for i in range(len(counts)): f0 = counts[i] / countSum cdfSum += f0 cdf[i] = cdfSum midpoints[i] = (binBoundaries[i] + binBoundaries[i + 1]) / 2.0 cdfFunc = mathUtils.createInterp(midpoints, cdf, 0.0, 1.0, interpolation) fPrimeData = [0.0] * len(counts) for i in range(len(counts)): h = binBoundaries[i + 1] - binBoundaries[i] nCount = counts[i] / countSum # normalized count f0 = cdf[i] if i + 1 < len(counts): f1 = cdf[i + 1] else: f1 = 1.0 if i + 2 < len(counts): f2 = cdf[i + 2] else: f2 = 1.0 if interpolation == 'linear': fPrime = (f1 - f0) / h else: fPrime = (-1.5 * f0 + 2.0 * f1 + -0.5 * f2) / h fPrimeData[i] = fPrime if generateCSV: utils.printCsv(csv, binBoundaries[i + 1], midpoints[i], counts[i], nCount, fPrime, cdf[i]) pdfFunc = mathUtils.createInterp(midpoints, fPrimeData, 0.0, 0.0, interpolation) dataKeys -= set({'numBins', 'counts', 'bins'}) if generateCSV: for key in dataKeys: utils.printCsv(csv, '"' + key + '"', dataStats[key]) return dataStats, cdfFunc, pdfFunc
def collectOutput(self, finishedJob, output): """ Function to place all of the computed data into the output object @ In, finishedJob, JobHandler External or Internal instance, A JobHandler object that is in charge of running this post-processor @ In, output, dataObjects, The object where we want to place our computed results @ Out, None """ self.raiseADebug("finishedJob: " + str(finishedJob) + ", output " + str(output)) evaluation = finishedJob.getEvaluation() if isinstance(evaluation, Runners.Error): self.raiseAnError(RuntimeError, "No available output to collect (run possibly not finished yet)") outputDictionary = evaluation[1] self.dataDict.update(outputDictionary) dataToProcess = [] for compareGroup in self.compareGroups: dataPulls = compareGroup.dataPulls reference = compareGroup.referenceData foundDataObjects = [] for name, kind, rest in dataPulls: data = self.dataDict[name].getParametersValues(kind) if len(rest) == 1: foundDataObjects.append(data[rest[0]]) dataToProcess.append((dataPulls, foundDataObjects, reference)) generateCSV = False generatePointSet = False if isinstance(output,Files.File): generateCSV = True elif output.type == 'PointSet': generatePointSet = True else: self.raiseAnError(IOError, 'unsupported type ' + str(type(output))) if generateCSV: csv = output for dataPulls, datas, reference in dataToProcess: graphData = [] if "name" in reference: distributionName = reference["name"] if not distributionName in self.distributions: self.raiseAnError(IOError, 'Did not find ' + distributionName + ' in ' + str(self.distributions.keys())) else: distribution = self.distributions[distributionName] refDataStats = {"mean":distribution.untruncatedMean(), "stdev":distribution.untruncatedStdDev()} refDataStats["minBinSize"] = refDataStats["stdev"] / 2.0 refPdf = lambda x:distribution.pdf(x) refCdf = lambda x:distribution.cdf(x) graphData.append((refDataStats, refCdf, refPdf, "ref_" + distributionName)) for dataPull, data in zip(dataPulls, datas): dataStats = self.__processData( data, self.methodInfo) dataKeys = set(dataStats.keys()) counts = dataStats['counts'] bins = dataStats['bins'] countSum = sum(counts) binBoundaries = [dataStats['low']] + bins + [dataStats['high']] if generateCSV: utils.printCsv(csv, '"' + str(dataPull) + '"') utils.printCsv(csv, '"numBins"', dataStats['numBins']) utils.printCsv(csv, '"binBoundary"', '"binMidpoint"', '"binCount"', '"normalizedBinCount"', '"f_prime"', '"cdf"') cdf = [0.0] * len(counts) midpoints = [0.0] * len(counts) cdfSum = 0.0 for i in range(len(counts)): f0 = counts[i] / countSum cdfSum += f0 cdf[i] = cdfSum midpoints[i] = (binBoundaries[i] + binBoundaries[i + 1]) / 2.0 cdfFunc = mathUtils.createInterp(midpoints, cdf, 0.0, 1.0, self.interpolation) fPrimeData = [0.0] * len(counts) for i in range(len(counts)): h = binBoundaries[i + 1] - binBoundaries[i] nCount = counts[i] / countSum # normalized count f0 = cdf[i] if i + 1 < len(counts): f1 = cdf[i + 1] else: f1 = 1.0 if i + 2 < len(counts): f2 = cdf[i + 2] else: f2 = 1.0 if self.interpolation == 'linear': fPrime = (f1 - f0) / h else: fPrime = (-1.5 * f0 + 2.0 * f1 + -0.5 * f2) / h fPrimeData[i] = fPrime if generateCSV: utils.printCsv(csv, binBoundaries[i + 1], midpoints[i], counts[i], nCount, fPrime, cdf[i]) pdfFunc = mathUtils.createInterp(midpoints, fPrimeData, 0.0, 0.0, self.interpolation) dataKeys -= set({'numBins', 'counts', 'bins'}) if generateCSV: for key in dataKeys: utils.printCsv(csv, '"' + key + '"', dataStats[key]) self.raiseADebug("dataStats: " + str(dataStats)) graphData.append((dataStats, cdfFunc, pdfFunc, str(dataPull))) graphDataDict = mathUtils.getGraphs(graphData, self.fZStats) if generateCSV: for key in graphDataDict: value = graphDataDict[key] if type(value).__name__ == 'list': utils.printCsv(csv, *(['"' + l[0] + '"' for l in value])) for i in range(1, len(value[0])): utils.printCsv(csv, *([l[i] for l in value])) else: utils.printCsv(csv, '"' + key + '"', value) if generatePointSet: for key in graphDataDict: value = graphDataDict[key] if type(value).__name__ == 'list': for i in range(len(value)): subvalue = value[i] name = subvalue[0] subdata = subvalue[1:] if i == 0: output.updateInputValue(name, subdata) else: output.updateOutputValue(name, subdata) break # XXX Need to figure out way to specify which data to return if generateCSV: for i in range(len(graphData)): dataStat = graphData[i][0] def delist(l): """ Method to create a string out of a list l @ In, l, list, the list to be 'stringed' out @ Out, delist, string, the string representing the list """ if type(l).__name__ == 'list': return '_'.join([delist(x) for x in l]) else: return str(l) newFileName = output.getBase() + "_" + delist(dataPulls) + "_" + str(i) + ".csv" if type(dataStat).__name__ != 'dict': assert(False) continue dataPairs = [] for key in sorted(dataStat.keys()): value = dataStat[key] if np.isscalar(value): dataPairs.append((key, value)) extraCsv = Files.returnInstance('CSV',self) extraCsv.initialize(newFileName,self.messageHandler) extraCsv.open("w") extraCsv.write(",".join(['"' + str(x[0]) + '"' for x in dataPairs])) extraCsv.write("\n") extraCsv.write(",".join([str(x[1]) for x in dataPairs])) extraCsv.write("\n") extraCsv.close() utils.printCsv(csv)
def collectOutput(self, finishedJob, output): """ Function to place all of the computed data into the output object @ In, finishedJob, JobHandler External or Internal instance, A JobHandler object that is in charge of running this post-processor @ In, output, dataObjects, The object where we want to place our computed results @ Out, None """ self.raiseADebug("finishedJob: " + str(finishedJob) + ", output " + str(output)) evaluation = finishedJob.getEvaluation() if isinstance(evaluation, Runners.Error): self.raiseAnError( RuntimeError, "No available output to collect (run possibly not finished yet)" ) outputDictionary = evaluation[1] self.dataDict.update(outputDictionary) dataToProcess = [] for compareGroup in self.compareGroups: dataPulls = compareGroup.dataPulls reference = compareGroup.referenceData foundDataObjects = [] for name, kind, rest in dataPulls: dataSet = self.dataDict[name].asDataset() if len(rest) == 1: foundDataObjects.append(copy.copy(dataSet[rest[0]].values)) dataToProcess.append((dataPulls, foundDataObjects, reference)) if not isinstance(output, Files.File): self.raiseAnError(IOError, 'unsupported type ' + str(type(output))) for dataPulls, datas, reference in dataToProcess: graphData = [] if "name" in reference: distributionName = reference["name"] if not distributionName in self.distributions: self.raiseAnError( IOError, 'Did not find ' + distributionName + ' in ' + str(self.distributions.keys())) else: distribution = self.distributions[distributionName] refDataStats = { "mean": distribution.untruncatedMean(), "stdev": distribution.untruncatedStdDev() } refDataStats["minBinSize"] = refDataStats["stdev"] / 2.0 refPdf = lambda x: distribution.pdf(x) refCdf = lambda x: distribution.cdf(x) graphData.append( (refDataStats, refCdf, refPdf, "ref_" + distributionName)) for dataPull, data in zip(dataPulls, datas): dataStats, cdfFunc, pdfFunc = _getPDFandCDFfromData( str(dataPull), data, output, self.methodInfo, self.interpolation, True) self.raiseADebug("dataStats: " + str(dataStats)) graphData.append((dataStats, cdfFunc, pdfFunc, str(dataPull))) graphDataDict = _getGraphs(graphData, self.fZStats) for key in graphDataDict: value = graphDataDict[key] if type(value).__name__ == 'list': utils.printCsv(output, *(['"' + l[0] + '"' for l in value])) for i in range(1, len(value[0])): utils.printCsv(output, *([l[i] for l in value])) else: utils.printCsv(output, '"' + key + '"', value) for i in range(len(graphData)): dataStat = graphData[i][0] def delist(l): """ Method to create a string out of a list l @ In, l, list, the list to be 'stringed' out @ Out, delist, string, the string representing the list """ if type(l).__name__ == 'list': return '_'.join([delist(x) for x in l]) else: return str(l) newFileName = output.getBase() + "_" + delist( dataPulls) + "_" + str(i) + ".csv" if type(dataStat).__name__ != 'dict': assert (False) continue dataPairs = [] for key in sorted(dataStat.keys()): value = dataStat[key] if np.isscalar(value): dataPairs.append((key, value)) extraCsv = Files.returnInstance('CSV', self) extraCsv.initialize(newFileName, self.messageHandler) extraCsv.open("w") extraCsv.write(",".join( ['"' + str(x[0]) + '"' for x in dataPairs])) extraCsv.write("\n") extraCsv.write(",".join([str(x[1]) for x in dataPairs])) extraCsv.write("\n") extraCsv.close() utils.printCsv(output)