Beispiel #1
0
    def loadSeriesFromArray(self, values, index=None, npartitions=None):
        """
        Load Series data from a local array

        Parameters
        ----------
        values : list or ndarray
            A list of 1d numpy arrays, or a single 2d numpy array

        index : array-like, optional, deafult = None
            Index to set for Series object, if None will use linear indices.

        npartitions : position int, optional, default = None
            Number of partitions for RDD, if unspecified will use
            default parallelism.
        """
        from numpy import ndarray, asarray
        from thunder.rdds.fileio.seriesloader import SeriesLoader
        loader = SeriesLoader(self._sc)

        if not npartitions:
            npartitions = self._sc.defaultParallelism

        if isinstance(values, list):
            values = asarray(values)

        if isinstance(values, ndarray) and values.ndim > 1:
            values = list(values)

        data = loader.fromArrays(values, npartitions=npartitions)

        if index:
            data.index = index

        return data
Beispiel #2
0
    def loadSeriesFromArray(self, values, index=None, npartitions=None):
        """
        Load Series data from a local array

        Parameters
        ----------
        values : list or ndarray
            A list of 1d numpy arrays, or a single 2d numpy array

        index : array-like, optional, deafult = None
            Index to set for Series object, if None will use linear indices.

        npartitions : position int, optional, default = None
            Number of partitions for RDD, if unspecified will use
            default parallelism.
        """
        from numpy import ndarray, asarray
        from thunder.rdds.fileio.seriesloader import SeriesLoader
        loader = SeriesLoader(self._sc)

        if not npartitions:
            npartitions = self._sc.defaultParallelism

        if isinstance(values, list):
            values = asarray(values)

        if isinstance(values, ndarray) and values.ndim > 1:
            values = list(values)

        data = loader.fromArrays(values, npartitions=npartitions)

        if index:
            data.index = index

        return data
Beispiel #3
0
    def _run_roundtrip_exception_tst(self, nimages, aryShape, dtypeSpec,
                                     sizeSpec):
        testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(
            nimages, aryShape, dtypeSpec)
        loader = SeriesLoader(self.sc)
        series = loader.fromArrays(testArrays)

        assert_raises(ValueError, series.toBlocks, sizeSpec)
    def _run_roundtrip_tst(self, nimages, aryShape, dtypeSpec, sizeSpec):
        testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(nimages, aryShape, dtypeSpec)
        loader = SeriesLoader(self.sc)
        series = loader.fromArrays(testArrays)

        blocks = series.toBlocks(sizeSpec)

        roundtrippedSeries = blocks.toSeries(newDType=series.dtype)

        packedSeries = series.pack()
        packedRoundtrippedSeries = roundtrippedSeries.pack()

        assert_true(array_equal(packedSeries, packedRoundtrippedSeries))
Beispiel #5
0
    def _run_roundtrip_tst(self, nimages, aryShape, dtypeSpec, sizeSpec):
        testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(
            nimages, aryShape, dtypeSpec)
        loader = SeriesLoader(self.sc)
        series = loader.fromArrays(testArrays)

        blocks = series.toBlocks(sizeSpec)

        roundtrippedSeries = blocks.toSeries(newDType=series.dtype)

        packedSeries = series.pack()
        packedRoundtrippedSeries = roundtrippedSeries.pack()

        assert_true(array_equal(packedSeries, packedRoundtrippedSeries))
Beispiel #6
0
    def _run_roundtrip_tst(self, testIdx, nimages, aryShape, dtypeSpec,
                           npartitions):
        testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(
            nimages, aryShape, dtypeSpec)
        loader = SeriesLoader(self.sc)
        series = loader.fromArrays(testArrays)

        saveDirPath = os.path.join(self.outputdir, 'save%d' % testIdx)
        series.repartition(
            npartitions
        )  # note: this does an elementwise shuffle! won't be in sorted order
        series.saveAsBinarySeries(saveDirPath)

        nnonemptyPartitions = 0
        for partitionList in series.rdd.glom().collect():
            if partitionList:
                nnonemptyPartitions += 1
        del partitionList
        nsaveFiles = len(glob.glob(saveDirPath + os.sep + "*.bin"))

        roundtrippedSeries = loader.fromBinary(saveDirPath)

        with open(os.path.join(saveDirPath, "conf.json"), 'r') as fp:
            conf = json.load(fp)

        # sorting is required here b/c of the randomization induced by the repartition.
        # orig and roundtripped will in general be different from each other, since roundtripped
        # will have (0, 0, 0) index as first element (since it will be the lexicographically first
        # file) while orig has only a 1 in npartitions chance of starting with (0, 0, 0) after repartition.
        expectedPackedAry = series.pack(sorting=True)
        actualPackedAry = roundtrippedSeries.pack(sorting=True)

        assert_true(array_equal(expectedPackedAry, actualPackedAry))

        assert_equals(nnonemptyPartitions, nsaveFiles)

        assert_equals(len(aryShape), conf["nkeys"])
        assert_equals(nimages, conf["nvalues"])
        assert_equals("int16", conf["keytype"])
        assert_equals(str(series.dtype), conf["valuetype"])
        # check that we have converted ourselves to an appropriate float after reloading
        assert_equals(str(smallestFloatType(series.dtype)),
                      str(roundtrippedSeries.dtype))
    def _run_roundtrip_tst(self, testIdx, nimages, aryShape, dtypeSpec, npartitions):
        testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(nimages, aryShape, dtypeSpec)
        loader = SeriesLoader(self.sc)
        series = loader.fromArrays(testArrays)

        saveDirPath = os.path.join(self.outputdir, 'save%d' % testIdx)
        series.repartition(npartitions)  # note: this does an elementwise shuffle! won't be in sorted order
        series.saveAsBinarySeries(saveDirPath)

        nnonemptyPartitions = 0
        for partitionList in series.rdd.glom().collect():
            if partitionList:
                nnonemptyPartitions += 1
        del partitionList
        nsaveFiles = len(glob.glob(saveDirPath + os.sep + "*.bin"))

        roundtrippedSeries = loader.fromBinary(saveDirPath)

        with open(os.path.join(saveDirPath, "conf.json"), 'r') as fp:
            conf = json.load(fp)

        # sorting is required here b/c of the randomization induced by the repartition.
        # orig and roundtripped will in general be different from each other, since roundtripped
        # will have (0, 0, 0) index as first element (since it will be the lexicographically first
        # file) while orig has only a 1 in npartitions chance of starting with (0, 0, 0) after repartition.
        expectedPackedAry = series.pack(sorting=True)
        actualPackedAry = roundtrippedSeries.pack(sorting=True)

        assert_true(array_equal(expectedPackedAry, actualPackedAry))

        assert_equals(nnonemptyPartitions, nsaveFiles)

        assert_equals(len(aryShape), conf["nkeys"])
        assert_equals(nimages, conf["nvalues"])
        assert_equals("int16", conf["keytype"])
        assert_equals(str(series.dtype), conf["valuetype"])
        # check that we have converted ourselves to an appropriate float after reloading
        assert_equals(str(smallestFloatType(series.dtype)), str(roundtrippedSeries.dtype))
    def _run_roundtrip_exception_tst(self, nimages, aryShape, dtypeSpec, sizeSpec):
        testArrays = TestSeriesBinaryWriteFromStack.generateTestImages(nimages, aryShape, dtypeSpec)
        loader = SeriesLoader(self.sc)
        series = loader.fromArrays(testArrays)

        assert_raises(ValueError, series.toBlocks, sizeSpec)