예제 #1
0
 def _compareRoundTrip(self, data):
     """
     Make sure that data is unchanged by packing/unpacking.
     """
     packed, attrs = database3.packSpecialData(data, "testing")
     roundTrip = database3.unpackSpecialData(packed, attrs, "testing")
     self._compareArrays(data, roundTrip)
예제 #2
0
    def test_replaceNones(self):
        """
        This definitely needs some work.
        """
        data3 = numpy.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
        data1 = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
        data1iNones = numpy.array([1, 2, None, 5, 6])
        data1fNones = numpy.array([None, 2.0, None, 5.0, 6.0])
        data2fNones = numpy.array([None, [[1.0, 2.0, 6.0], [2.0, 3.0, 4.0]]])
        data_jag = numpy.array([[[1, 2], [3, 4]],
                                [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
        data_dict = numpy.array([{
            "bar": 2,
            "baz": 3
        }, {
            "foo": 4,
            "baz": 6
        }, {
            "foo": 7,
            "bar": 8
        }])
        # nones = numpy.where([d is None for d in data1])[0]
        # conv_d1 = database.replaceNonesWithNonsense(data1, None, nones)
        print("data3: ", database.packSpecialData(data3, ""))
        print("data_jag", database.packSpecialData(data_jag, ""))
        # print("data1", database.packSpecialData(data1, ""))
        print("data1iNones", database.packSpecialData(data1iNones, ""))
        print("data1fNones", database.packSpecialData(data1fNones, ""))
        print("data2fNones", database.packSpecialData(data2fNones, ""))
        print("dataDict", database.packSpecialData(data_dict, ""))

        packedData, attrs = database.packSpecialData(data_jag, "")
        roundTrip = database.unpackSpecialData(packedData, attrs, "")
        print("round-tripped jagged:", roundTrip)
        print("round-tripped dtype:", roundTrip.dtype)

        packedData, attrs = database.packSpecialData(data_dict, "")
        roundTrip = database.unpackSpecialData(packedData, attrs, "")
        print("round-tripped dict:", roundTrip)
예제 #3
0
def _diffSpecialData(
    refData: h5py.Dataset,
    srcData: h5py.Dataset,
    out: OutputWriter,
    diffResults: DiffResults,
):
    """
    Compare specially-formatted datasets.

    This employs the pack/unpackSpecialData functions to reconstitute complicated
    datasets for comparison. These usually don't behave well as giant numpy arrays, so
    we go element-by-element to calculate the diffs, then concatenate them.
    """
    name = refData.name
    paramName = refData.name.split("/")[-1]
    compName = refData.name.split("/")[-2]

    nDiffs = _compareSets(set(srcData.attrs.keys()), set(refData.attrs.keys()),
                          "formatting data")
    keysMatch = nDiffs == 0
    diffResults.addStructureDiffs(nDiffs)

    if not keysMatch:
        diffResults.addDiff(name, name, [numpy.inf], [numpy.inf], [numpy.inf])
        return

    if srcData.attrs.get("dict", False):
        # not bothering with dictionaries yet, though we will need to for things like
        # number densities
        return

    attrsMatch = True
    for k, srcAttr, refAttr in [(k, srcData.attrs[k], refData.attrs[k])
                                for k in srcData.attrs.keys()]:
        if isinstance(srcAttr, numpy.ndarray):
            same = all(srcAttr.flatten() == refAttr.flatten())
        else:
            same = srcAttr == refAttr
        if not same:
            attrsMatch = False
            out.writeln(
                "Special formatting parameters for {} do not match for {}. Src: {} "
                "Ref: {}".format(name, k, srcData.attrs[k], refData.attrs[k]))
    if not attrsMatch:
        return

    src = database3.unpackSpecialData(srcData[()], srcData.attrs, paramName)
    ref = database3.unpackSpecialData(refData[()], refData.attrs, paramName)

    diff = []
    for dSrc, dRef in zip(src.tolist(), ref.tolist()):
        if isinstance(dSrc, numpy.ndarray) and isinstance(dRef, numpy.ndarray):
            if dSrc.shape != dRef.shape:
                out.writeln("Shapes did not match for {}".format(refData))
                diffResults.add([numpy.inf], [numpy.inf], [numpy.inf],
                                [numpy.inf])
                return

            # make sure not to try to compare empty arrays. Numpy is mediocre at
            # these; they are super degenerate and cannot participate in concatenation.
            # Why?
            if 0 not in dSrc.shape:
                # Use the mean of the two to calc relative error. This is more robust to
                # changes that cause one of the values to be zero, while the other is
                # non-zero, leading to infinite relative error
                dMean = (dSrc + dRef) / 2
                diff.append((dSrc - dRef) / dMean)
            continue

        if (dSrc is None) ^ (dRef is None):
            out.writeln("Mismatched Nones for {} in {}".format(
                paramName, compName))
            diff.append([numpy.inf])
            continue

        if dSrc is None:
            diff.append([0.0])
            continue

        try:
            # Use mean to avoid some infinities; see above
            dMean = (dSrc + dRef) / 2
            diff.append([(dSrc - dRef) / dMean])
        except ZeroDivisionError:
            if dSrc == dRef:
                diff.append([0.0])
            else:
                diff.append([numpy.inf])

    if diff:
        try:
            diff = [numpy.array(d).flatten() for d in diff]
            diff = numpy.concatenate(diff)
        except ValueError as e:
            out.writeln(
                "Failed to concatenate diff data for {} in {}: {}".format(
                    paramName, compName, diff))
            out.writeln("Because: {}".format(e))
            return
        absDiff = numpy.abs(diff)
        mean = numpy.nanmean(diff)
        absMax = numpy.nanmax(absDiff)
        absMean = numpy.nanmean(absDiff)

        diffResults.addDiff(compName, paramName, absMean, mean, absMax)
    return