Beispiel #1
0
    def indToSub(self, order='F', isOneBased=True, dims=None):
        """
        Convert linear indexing to subscript indexing

        Parameters
        ----------
        dims : array-like, optional
            Maximum dimensions. If not provided, will use dims property.

        order : str, 'C' or 'F', default = 'F'
            Specifies row-major or column-major array indexing. See numpy.unravel_index.

        onebased : boolean, default = True
            True if generated subscript indices are to start at 1, False to start at 0
        """
        from thunder.rdds.keys import _indToSubConverter

        if dims is None:
            dims = self.dims.max

        converter = _indToSubConverter(dims,
                                       order=order,
                                       isOneBased=isOneBased)
        rdd = self.rdd.map(lambda (k, v): (converter(k), v))
        return self._constructor(rdd, index=self._index).__finalize__(self)
Beispiel #2
0
 def check_indtosubResult(indsubParam):
     data = indsubParam.indices
     converter = _indToSubConverter(dims=indsubParam.dims, order=indsubParam.order, isOneBased=indsubParam.onebased)
     results = map(lambda x: converter(x), data)
     for res, expected, index in zip(results, indsubParam.subscripts, indsubParam.indices):
         assert_equals(expected, res, 'Got subscript %s instead of %s for index:%d, dims:%s' %
                       (res, expected, index, str(indsubParam.dims)))
Beispiel #3
0
    def query(self, inds, var='inds', order='F', isOneBased=True):
        """
        Extract records with indices matching those provided

        Keys will be automatically linearized before matching to provided indices.
        This will not affect

        Parameters
        ----------
        inds : str, or array-like (2D)
            Array of indices, each an array-like of integer indices, or
            filename of a MAT file containing a set of indices as a cell array

        var : str, optional, default = 'inds'
            Variable name if loading from a MAT file

        order : str, optional, default = 'F'
            Specify ordering for linearizing indices (see subtoind)

        onebased : boolean, optional, default = True
            Specify zero or one based indexing for linearizing (see subtoind)

        Returns
        -------
        keys : array, shape (n, k) where k is the length of each value
            Averaged values

        values : array, shape (n, d) where d is the number of keys
            Averaged keys
        """

        if isinstance(inds, str):
            inds = loadMatVar(inds, var)[0]
        else:
            inds = asarray(inds)

        n = len(inds)

        from thunder.rdds.keys import _indToSubConverter
        converter = _indToSubConverter(dims=self.dims.max,
                                       order=order,
                                       isOneBased=isOneBased)

        keys = zeros((n, len(self.dims.count)))
        values = zeros((n, len(self.first()[1])))

        data = self.subToInd(order=order, isOneBased=isOneBased)

        for idx, indList in enumerate(inds):
            if len(indList) > 0:
                indsSet = set(asarray(indList).flat)
                bcInds = self.rdd.context.broadcast(indsSet)
                values[idx, :] = data.filterOnKeys(
                    lambda k: k in bcInds.value).values().mean()
                keys[idx, :] = mean(map(lambda k: converter(k), indList),
                                    axis=0)

        return keys, values
Beispiel #4
0
    def query(self, inds, var='inds', order='F', isOneBased=True):
        """
        Extract records with indices matching those provided

        Keys will be automatically linearized before matching to provided indices.
        This will not affect

        Parameters
        ----------
        inds : str, or array-like (2D)
            Array of indices, each an array-like of integer indices, or
            filename of a MAT file containing a set of indices as a cell array

        var : str, optional, default = 'inds'
            Variable name if loading from a MAT file

        order : str, optional, default = 'F'
            Specify ordering for linearizing indices (see subtoind)

        onebased : boolean, optional, default = True
            Specify zero or one based indexing for linearizing (see subtoind)

        Returns
        -------
        keys : array, shape (n, k) where k is the length of each value
            Averaged values

        values : array, shape (n, d) where d is the number of keys
            Averaged keys
        """

        if isinstance(inds, str):
            inds = loadMatVar(inds, var)[0]
        else:
            inds = asarray(inds)

        n = len(inds)

        from thunder.rdds.keys import _indToSubConverter
        converter = _indToSubConverter(dims=self.dims.max, order=order, isOneBased=isOneBased)

        keys = zeros((n, len(self.dims.count)))
        values = zeros((n, len(self.first()[1])))

        data = self.subToInd(order=order, isOneBased=isOneBased)

        for idx, indList in enumerate(inds):
            if len(indList) > 0:
                indsSet = set(asarray(indList).flat)
                bcInds = self.rdd.context.broadcast(indsSet)
                values[idx, :] = data.filterOnKeys(lambda k: k in bcInds.value).values().mean()
                keys[idx, :] = mean(map(lambda k: converter(k), indList), axis=0)

        return keys, values
Beispiel #5
0
    def indToSub(self, order='F', isOneBased=True, dims=None):
        """
        Convert linear indexing to subscript indexing

        Parameters
        ----------
        dims : array-like, optional
            Maximum dimensions. If not provided, will use dims property.

        order : str, 'C' or 'F', default = 'F'
            Specifies row-major or column-major array indexing. See numpy.unravel_index.

        onebased : boolean, default = True
            True if generated subscript indices are to start at 1, False to start at 0
        """
        from thunder.rdds.keys import _indToSubConverter

        if dims is None:
            dims = self.dims.max

        converter = _indToSubConverter(dims, order=order, isOneBased=isOneBased)
        rdd = self.rdd.map(lambda (k, v): (converter(k), v))
        return self._constructor(rdd, index=self._index).__finalize__(self)
Beispiel #6
0
 def test_ind_to_sub_array(self):
     inds = range(1, 13)
     converter = _indToSubConverter(dims=[2, 3, 2])
     subs = map(lambda x: converter(x), inds)
     assert(allclose(subs, array([(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1),
                                  (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)])))