def _compareRoundTrip(self, data): """ Make sure that data is unchanged by packing/unpacking. """ packed, attrs = database3.packSpecialData(data, "testing") roundTrip = database3.unpackSpecialData(packed, attrs, "testing") self._compareArrays(data, roundTrip)
def test_replaceNones(self): """ This definitely needs some work. """ data3 = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) data1 = numpy.array([1, 2, 3, 4, 5, 6, 7, 8]) data1iNones = numpy.array([1, 2, None, 5, 6]) data1fNones = numpy.array([None, 2.0, None, 5.0, 6.0]) data2fNones = numpy.array([None, [[1.0, 2.0, 6.0], [2.0, 3.0, 4.0]]]) data_jag = numpy.array([[[1, 2], [3, 4]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]]]) data_dict = numpy.array([{ "bar": 2, "baz": 3 }, { "foo": 4, "baz": 6 }, { "foo": 7, "bar": 8 }]) # nones = numpy.where([d is None for d in data1])[0] # conv_d1 = database.replaceNonesWithNonsense(data1, None, nones) print("data3: ", database.packSpecialData(data3, "")) print("data_jag", database.packSpecialData(data_jag, "")) # print("data1", database.packSpecialData(data1, "")) print("data1iNones", database.packSpecialData(data1iNones, "")) print("data1fNones", database.packSpecialData(data1fNones, "")) print("data2fNones", database.packSpecialData(data2fNones, "")) print("dataDict", database.packSpecialData(data_dict, "")) packedData, attrs = database.packSpecialData(data_jag, "") roundTrip = database.unpackSpecialData(packedData, attrs, "") print("round-tripped jagged:", roundTrip) print("round-tripped dtype:", roundTrip.dtype) packedData, attrs = database.packSpecialData(data_dict, "") roundTrip = database.unpackSpecialData(packedData, attrs, "") print("round-tripped dict:", roundTrip)
def _diffSpecialData( refData: h5py.Dataset, srcData: h5py.Dataset, out: OutputWriter, diffResults: DiffResults, ): """ Compare specially-formatted datasets. This employs the pack/unpackSpecialData functions to reconstitute complicated datasets for comparison. These usually don't behave well as giant numpy arrays, so we go element-by-element to calculate the diffs, then concatenate them. """ name = refData.name paramName = refData.name.split("/")[-1] compName = refData.name.split("/")[-2] nDiffs = _compareSets(set(srcData.attrs.keys()), set(refData.attrs.keys()), "formatting data") keysMatch = nDiffs == 0 diffResults.addStructureDiffs(nDiffs) if not keysMatch: diffResults.addDiff(name, name, [numpy.inf], [numpy.inf], [numpy.inf]) return if srcData.attrs.get("dict", False): # not bothering with dictionaries yet, though we will need to for things like # number densities return attrsMatch = True for k, srcAttr, refAttr in [(k, srcData.attrs[k], refData.attrs[k]) for k in srcData.attrs.keys()]: if isinstance(srcAttr, numpy.ndarray): same = all(srcAttr.flatten() == refAttr.flatten()) else: same = srcAttr == refAttr if not same: attrsMatch = False out.writeln( "Special formatting parameters for {} do not match for {}. Src: {} " "Ref: {}".format(name, k, srcData.attrs[k], refData.attrs[k])) if not attrsMatch: return src = database3.unpackSpecialData(srcData[()], srcData.attrs, paramName) ref = database3.unpackSpecialData(refData[()], refData.attrs, paramName) diff = [] for dSrc, dRef in zip(src.tolist(), ref.tolist()): if isinstance(dSrc, numpy.ndarray) and isinstance(dRef, numpy.ndarray): if dSrc.shape != dRef.shape: out.writeln("Shapes did not match for {}".format(refData)) diffResults.add([numpy.inf], [numpy.inf], [numpy.inf], [numpy.inf]) return # make sure not to try to compare empty arrays. Numpy is mediocre at # these; they are super degenerate and cannot participate in concatenation. # Why? if 0 not in dSrc.shape: # Use the mean of the two to calc relative error. This is more robust to # changes that cause one of the values to be zero, while the other is # non-zero, leading to infinite relative error dMean = (dSrc + dRef) / 2 diff.append((dSrc - dRef) / dMean) continue if (dSrc is None) ^ (dRef is None): out.writeln("Mismatched Nones for {} in {}".format( paramName, compName)) diff.append([numpy.inf]) continue if dSrc is None: diff.append([0.0]) continue try: # Use mean to avoid some infinities; see above dMean = (dSrc + dRef) / 2 diff.append([(dSrc - dRef) / dMean]) except ZeroDivisionError: if dSrc == dRef: diff.append([0.0]) else: diff.append([numpy.inf]) if diff: try: diff = [numpy.array(d).flatten() for d in diff] diff = numpy.concatenate(diff) except ValueError as e: out.writeln( "Failed to concatenate diff data for {} in {}: {}".format( paramName, compName, diff)) out.writeln("Because: {}".format(e)) return absDiff = numpy.abs(diff) mean = numpy.nanmean(diff) absMax = numpy.nanmax(absDiff) absMean = numpy.nanmean(absDiff) diffResults.addDiff(compName, paramName, absMean, mean, absMax) return