Example #1
0
 def check_indtosub_result(indsub_param):
     # attach dummy value 'x' to indices to match expected input to indtosub
     data = map(lambda d: (d, 'x'), indsub_param.indices)
     results = indtosub(data, indsub_param.dims, order=indsub_param.order, onebased=indsub_param.onebased)
     for res, expected, index in zip(results, indsub_param.subscripts, indsub_param.indices):
         assert_equals(expected, res[0], 'Got subscript %s instead of %s for index:%d, dims:%s' %
                       (res[0], expected, index, str(indsub_param.dims)))
Example #2
0
    def test_ind_to_sub_rdd(self):
        data_local = map(lambda x: (x, array([1.0])), range(1, 13))

        data = self.sc.parallelize(data_local)
        dims = [2, 3, 2]
        subs = indtosub(data, dims).map(lambda (k, _): k).collect()
        assert allclose(
            subs,
            array(
                [
                    (1, 1, 1),
                    (2, 1, 1),
                    (1, 2, 1),
                    (2, 2, 1),
                    (1, 3, 1),
                    (2, 3, 1),
                    (1, 1, 2),
                    (2, 1, 2),
                    (1, 2, 2),
                    (2, 2, 2),
                    (1, 3, 2),
                    (2, 3, 2),
                ]
            ),
        )
Example #3
0
    def calc(self, data):
        """Calculate averages. Keys (tuples) are converted
        into linear indices based on their dimensions

        Parameters
        ----------
        data : RDD of (tuple, array) pairs, each array of shape (ncols,)
            Data to compute averages from

        Returns
        -------
        self : returns an instance of self.
        """

        dims = getdims(data)
        data = subtoind(data, dims.max)

        # loop over indices, computing average keys and average values
        keys = zeros((self.n, len(dims.count())))
        values = zeros((self.n, len(data.first()[1])))
        for idx, indlist in enumerate(self.inds):
            if len(indlist) > 0:
                values[idx, :] = self.select(data, idx).map(lambda (k, x): x).sum() / len(indlist)
                keys[idx, :] = mean(map(lambda (k, v): k, indtosub(map(lambda k: (k, 0), indlist), dims.max)), axis=0)

        self.keys = keys
        self.values = values

        return self
Example #4
0
    def calc(self, data):
        """Calculate averages. Keys (tuples) are converted
        into linear indices based on their dimensions

        Parameters
        ----------
        data : RDD of (tuple, array) pairs, each array of shape (ncols,)
            Data to compute averages from

        Returns
        -------
        self : returns an instance of self.
        """

        dims = getdims(data)
        data = subtoind(data, dims.max)

        # loop over indices, computing average keys and average values
        keys = zeros((self.n, len(dims.count())))
        values = zeros((self.n, len(data.first()[1])))
        for idx, indlist in enumerate(self.inds):
            if len(indlist) > 0:
                values[idx, :] = self.select(
                    data, idx).map(lambda (k, x): x).sum() / len(indlist)
                keys[idx, :] = mean(map(
                    lambda (k, v): k,
                    indtosub(map(lambda k: (k, 0), indlist), dims.max)),
                                    axis=0)

        self.keys = keys
        self.values = values

        return self
Example #5
0
 def test_ind_to_sub_array(self):
     data_local = map(lambda x: (x, array([1.0])), range(1, 13))
     dims = [2, 3, 2]
     subs = map(lambda x: x[0], indtosub(data_local, dims))
     assert (allclose(
         subs,
         array([(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1),
                (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2),
                (1, 3, 2), (2, 3, 2)])))
Example #6
0
    def test_ind_to_sub_rdd(self):
        data_local = map(lambda x: (x, array([1.0])), range(1, 13))

        data = self.sc.parallelize(data_local)
        dims = [2, 3, 2]
        subs = indtosub(data, dims).map(lambda (k, _): k).collect()
        assert (allclose(
            subs,
            array([(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1),
                   (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2),
                   (1, 3, 2), (2, 3, 2)])))
Example #7
0
 def check_indtosub_result(indsub_param):
     # attach dummy value 'x' to indices to match expected input to indtosub
     data = map(lambda d: (d, 'x'), indsub_param.indices)
     results = indtosub(data,
                        indsub_param.dims,
                        order=indsub_param.order,
                        onebased=indsub_param.onebased)
     for res, expected, index in zip(results, indsub_param.subscripts,
                                     indsub_param.indices):
         assert_equals(
             expected, res[0],
             'Got subscript %s instead of %s for index:%d, dims:%s' %
             (res[0], expected, index, str(indsub_param.dims)))
Example #8
0
 def test_ind_to_sub_array(self):
     data_local = map(lambda x: (x, array([1.0])), range(1, 13))
     dims = [2, 3, 2]
     subs = map(lambda x: x[0], indtosub(data_local, dims))
     assert allclose(
         subs,
         array(
             [
                 (1, 1, 1),
                 (2, 1, 1),
                 (1, 2, 1),
                 (2, 2, 1),
                 (1, 3, 1),
                 (2, 3, 1),
                 (1, 1, 2),
                 (2, 1, 2),
                 (1, 2, 2),
                 (2, 2, 2),
                 (1, 3, 2),
                 (2, 3, 2),
             ]
         ),
     )