Example #1
0
    def _run_tstSaveAsBinarySeries(self, testIdx, narys_, valDtype, groupingDim_):
        """Pseudo-parameterized test fixture, allows reusing existing spark context
        """
        paramStr = "(groupingdim=%d, valuedtype='%s')" % (groupingDim_, valDtype)
        arys, aryShape, arySize = _generateTestArrays(narys_, dtype_=valDtype)
        dims = aryShape[:]
        outdir = os.path.join(self.outputdir, "anotherdir%02d" % testIdx)

        images = ImagesLoader(self.sc).fromArrays(arys)

        slicesPerDim = [1]*arys[0].ndim
        slicesPerDim[groupingDim_] = arys[0].shape[groupingDim_]
        images.toBlocks(slicesPerDim, units="splits").saveAsBinarySeries(outdir)

        ndims = len(aryShape)
        # prevent padding to 4-byte boundaries: "=" specifies no alignment
        unpacker = struct.Struct('=' + 'h'*ndims + dtypeFunc(valDtype).char*narys_)

        def calcExpectedNKeys():
            tmpShape = list(dims[:])
            del tmpShape[groupingDim_]
            return prod(tmpShape)
        expectedNKeys = calcExpectedNKeys()

        def byrec(f_, unpacker_, nkeys_):
            rec = True
            while rec:
                rec = f_.read(unpacker_.size)
                if rec:
                    allRecVals = unpacker_.unpack(rec)
                    yield allRecVals[:nkeys_], allRecVals[nkeys_:]

        outFilenames = glob.glob(os.path.join(outdir, "*.bin"))
        assert_equals(dims[groupingDim_], len(outFilenames))
        for outFilename in outFilenames:
            with open(outFilename, 'rb') as f:
                nkeys = 0
                for keys, vals in byrec(f, unpacker, ndims):
                    nkeys += 1
                    assert_equals(narys_, len(vals))
                    for valIdx, val in enumerate(vals):
                        assert_equals(arys[valIdx][keys], val, "Expected %g, got %g, for test %d %s" %
                                      (arys[valIdx][keys], val, testIdx, paramStr))
                assert_equals(expectedNKeys, nkeys)

        confName = os.path.join(outdir, "conf.json")
        assert_true(os.path.isfile(confName))
        with open(os.path.join(outdir, "conf.json"), 'r') as fconf:
            import json
            conf = json.load(fconf)
            assert_equals(outdir, conf['input'])
            assert_equals(len(aryShape), conf['nkeys'])
            assert_equals(narys_, conf['nvalues'])
            assert_equals(valDtype, conf['valuetype'])
            assert_equals('int16', conf['keytype'])

        assert_true(os.path.isfile(os.path.join(outdir, 'SUCCESS')))
Example #2
0
    def test_toSeriesWithPack(self):
        ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4))

        image = ImagesLoader(self.sc).fromArrays(ary)
        series = image.toBlocks("150M").toSeries()

        seriesVals = series.collect()
        seriesAry = series.pack()
        seriesAry_xpose = series.pack(transpose=True)

        # check ordering of keys
        assert_equals((0, 0), seriesVals[0][0])  # first key
        assert_equals((1, 0), seriesVals[1][0])  # second key
        assert_equals((0, 1), seriesVals[2][0])
        assert_equals((1, 1), seriesVals[3][0])
        assert_equals((0, 2), seriesVals[4][0])
        assert_equals((1, 2), seriesVals[5][0])
        assert_equals((0, 3), seriesVals[6][0])
        assert_equals((1, 3), seriesVals[7][0])

        # check dimensions tuple matches numpy shape
        assert_equals(image.dims.count, series.dims.count)
        assert_equals(ary.shape, series.dims.count)

        # check that values are in Fortran-convention order
        collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel()
        assert_true(array_equal(ary.ravel(order='F'), collectedVals))

        # check that packing returns original array
        assert_true(array_equal(ary, seriesAry))
        assert_true(array_equal(ary.T, seriesAry_xpose))
Example #3
0
    def test_fromStackToSeriesWithPack(self):
        ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4))
        filename = os.path.join(self.outputdir, "test.stack")
        ary.tofile(filename)

        image = ImagesLoader(self.sc).fromStack(filename, dims=(4, 2))
        strategy = SimpleBlockingStrategy.generateFromBlockSize(image, "150M")
        series = image.toBlocks(strategy).toSeries()

        seriesVals = series.collect()
        seriesAry = series.pack()

        # check ordering of keys
        assert_equals((0, 0), seriesVals[0][0])  # first key
        assert_equals((1, 0), seriesVals[1][0])  # second key
        assert_equals((2, 0), seriesVals[2][0])
        assert_equals((3, 0), seriesVals[3][0])
        assert_equals((0, 1), seriesVals[4][0])
        assert_equals((1, 1), seriesVals[5][0])
        assert_equals((2, 1), seriesVals[6][0])
        assert_equals((3, 1), seriesVals[7][0])

        # check dimensions tuple is reversed from numpy shape
        assert_equals(ary.shape[::-1], series.dims.count)

        # check that values are in original order
        collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel()
        assert_true(array_equal(ary.ravel(), collectedVals))

        # check that packing returns transpose of original array
        assert_true(array_equal(ary.T, seriesAry))
Example #4
0
    def test_toSeriesWithInefficientSplitAndSortedPack(self):
        ary = arange(8, dtype=dtypeFunc('int16')).reshape((4, 2))

        image = ImagesLoader(self.sc).fromArrays(ary)
        series = image.toBlocks((2, 1), units="s").toSeries()

        seriesVals = series.collect()
        seriesAry = series.pack(sorting=True)

        # check ordering of keys
        assert_equals((0, 0), seriesVals[0][0])  # first key
        assert_equals((1, 0), seriesVals[1][0])  # second key
        assert_equals((0, 1), seriesVals[2][0])
        assert_equals((1, 1), seriesVals[3][0])
        # end of first block
        # beginning of second block
        assert_equals((2, 0), seriesVals[4][0])
        assert_equals((3, 0), seriesVals[5][0])
        assert_equals((2, 1), seriesVals[6][0])
        assert_equals((3, 1), seriesVals[7][0])

        # check dimensions tuple matches numpy shape
        assert_equals(ary.shape, series.dims.count)

        # check that values are in expected order
        collectedVals = array([kv[1] for kv in seriesVals], dtype=dtypeFunc('int16')).ravel()
        assert_true(array_equal(ary[:2, :].ravel(order='F'), collectedVals[:4]))  # first block
        assert_true(array_equal(ary[2:4, :].ravel(order='F'), collectedVals[4:]))  # second block

        # check that packing returns original array (after sort)
        assert_true(array_equal(ary, seriesAry))
 def setUp(self):
     super(TestBlockKeys, self).setUp()
     shape = (30, 30)
     arys = [ones(shape) for _ in range(0, 3)]
     data = ImagesLoader(self.sc).fromArrays(arys)
     self.blocks = data.toBlocks(size=(10, 10)).collect()
     self.keys = [k for k, v in self.blocks]
Example #6
0
    def test_toSeries(self):
        # create 3 arrays of 4x3x3 images (C-order), containing sequential integers
        narys = 3
        arys, sh, sz = _generateTestArrays(narys)

        imageData = ImagesLoader(self.sc).fromArrays(arys)
        series = imageData.toBlocks((4, 1, 1), units="s").toSeries().collect()

        self.evaluateSeries(arys, series, sz)
Example #7
0
    def _run_tst_roundtripThroughBlocks(self, strategy):
        imagepath = findSourceTreeDir("utils/data/fish/images")
        images = ImagesLoader(self.sc).fromTif(imagepath)
        blockedimages = images.toBlocks(strategy)
        recombinedimages = blockedimages.toImages()

        collectedimages = images.collect()
        roundtrippedimages = recombinedimages.collect()
        for orig, roundtripped in zip(collectedimages, roundtrippedimages):
            assert_true(array_equal(orig[1], roundtripped[1]))
Example #8
0
    def test_toBlocksWithSplit(self):
        ary = arange(8, dtype=dtypeFunc('int16')).reshape((2, 4))

        image = ImagesLoader(self.sc).fromArrays(ary)
        groupedblocks = image.toBlocks((1, 2), units="s")

        # collectedblocks = blocks.collect()
        collectedgroupedblocks = groupedblocks.collect()
        assert_equals((0, 0), collectedgroupedblocks[0][0].spatialKey)
        assert_true(array_equal(ary[:, :2].ravel(), collectedgroupedblocks[0][1].ravel()))
        assert_equals((0, 2), collectedgroupedblocks[1][0].spatialKey)
        assert_true(array_equal(ary[:, 2:].ravel(), collectedgroupedblocks[1][1].ravel()))
Example #9
0
    def test_toSeriesBySlices(self):
        narys = 3
        arys, sh, sz = _generateTestArrays(narys)

        imageData = ImagesLoader(self.sc).fromArrays(arys)
        imageData.cache()

        testParams = [
            (1, 1, 1), (1, 1, 2), (1, 1, 3), (1, 2, 1), (1, 2, 2), (1, 2, 3),
            (1, 3, 1), (1, 3, 2), (1, 3, 3),
            (2, 1, 1), (2, 1, 2), (2, 1, 3), (2, 2, 1), (2, 2, 2), (2, 2, 3),
            (2, 3, 1), (2, 3, 2), (2, 3, 3)]

        for bpd in testParams:
            series = imageData.toBlocks(bpd, units="s").toSeries().collect()
            self.evaluateSeries(arys, series, sz)