Exemple #1
0
 def test_sub_to_ind_array(self):
     subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1),
             (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)]
     data_local = map(lambda x: (x, array([1.0])), subs)
     dims = [2, 3, 2]
     inds = map(lambda x: x[0], subtoind(data_local, dims))
     assert(allclose(inds, array(range(1, 13))))
Exemple #2
0
    def calc(self, data):
        """Calculate averages. Keys (tuples) are converted
        into linear indices based on their dimensions

        Parameters
        ----------
        data : RDD of (tuple, array) pairs, each array of shape (ncols,)
            Data to compute averages from

        Returns
        -------
        ts : array, shape (n, ncols)
        """

        dims = getdims(data)
        data = subtoind(data, dims.max)

        # loop over indices, averaging time series
        ts = zeros((self.n, len(data.first()[1])))
        for i in range(0, self.n):
            if len(self.inds[i]) > 0:
                ts[i, :] = self.select(
                    data, i).map(lambda (k, x): x).sum() / len(self.inds[i])

        return ts
Exemple #3
0
    def calc(self, data):
        """Calculate averages. Keys (tuples) are converted
        into linear indices based on their dimensions

        Parameters
        ----------
        data : RDD of (tuple, array) pairs, each array of shape (ncols,)
            Data to compute averages from

        Returns
        -------
        self : returns an instance of self.
        """

        dims = getdims(data)
        data = subtoind(data, dims.max)

        # loop over indices, computing average keys and average values
        keys = zeros((self.n, len(dims.count())))
        values = zeros((self.n, len(data.first()[1])))
        for idx, indlist in enumerate(self.inds):
            if len(indlist) > 0:
                values[idx, :] = self.select(data, idx).map(lambda (k, x): x).sum() / len(indlist)
                keys[idx, :] = mean(map(lambda (k, v): k, indtosub(map(lambda k: (k, 0), indlist), dims.max)), axis=0)

        self.keys = keys
        self.values = values

        return self
Exemple #4
0
    def calc(self, data):
        """Calculate averages. Keys (tuples) are converted
        into linear indices based on their dimensions

        Parameters
        ----------
        data : RDD of (tuple, array) pairs, each array of shape (ncols,)
            Data to compute averages from

        Returns
        -------
        self : returns an instance of self.
        """

        dims = getdims(data)
        data = subtoind(data, dims.max)

        # loop over indices, computing average keys and average values
        keys = zeros((self.n, len(dims.count())))
        values = zeros((self.n, len(data.first()[1])))
        for idx, indlist in enumerate(self.inds):
            if len(indlist) > 0:
                values[idx, :] = self.select(
                    data, idx).map(lambda (k, x): x).sum() / len(indlist)
                keys[idx, :] = mean(map(
                    lambda (k, v): k,
                    indtosub(map(lambda k: (k, 0), indlist), dims.max)),
                                    axis=0)

        self.keys = keys
        self.values = values

        return self
Exemple #5
0
 def check_subtoind_result(si_param):
     # attach dummy value 'x' to subscripts to match expected input to subtoind
     data = map(lambda d: (d, 'x'), si_param.subscripts)
     results = subtoind(data, si_param.dims, order=si_param.order, onebased=si_param.onebased)
     # check results individually to highlight specific failures
     for res, expected, subscript in zip(results, si_param.indices, si_param.subscripts):
         assert_equals(expected, res[0], 'Got index %d instead of %d for subscript:%s, dims:%s' %
                       (res[0], expected, str(subscript), str(si_param.dims)))
Exemple #6
0
 def test_sub_to_ind_array(self):
     subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1),
             (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2),
             (1, 3, 2), (2, 3, 2)]
     data_local = map(lambda x: (x, array([1.0])), subs)
     dims = [2, 3, 2]
     inds = map(lambda x: x[0], subtoind(data_local, dims))
     assert (allclose(inds, array(range(1, 13))))
Exemple #7
0
    def test_sub_to_ind_rdd(self):
        subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1),
                (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)]
        data_local = map(lambda x: (x, array([1.0])), subs)

        data = self.sc.parallelize(data_local)
        dims = [2, 3, 2]
        inds = subtoind(data, dims).map(lambda (k, _): k).collect()
        assert(allclose(inds, array(range(1, 13))))
Exemple #8
0
    def test_sub_to_ind_rdd(self):
        subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1),
                (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2),
                (1, 3, 2), (2, 3, 2)]
        data_local = map(lambda x: (x, array([1.0])), subs)

        data = self.sc.parallelize(data_local)
        dims = [2, 3, 2]
        inds = subtoind(data, dims).map(lambda (k, _): k).collect()
        assert (allclose(inds, array(range(1, 13))))
Exemple #9
0
 def check_subtoind_result(si_param):
     # attach dummy value 'x' to subscripts to match expected input to subtoind
     data = map(lambda d: (d, 'x'), si_param.subscripts)
     results = subtoind(data,
                        si_param.dims,
                        order=si_param.order,
                        onebased=si_param.onebased)
     # check results individually to highlight specific failures
     for res, expected, subscript in zip(results, si_param.indices,
                                         si_param.subscripts):
         assert_equals(
             expected, res[0],
             'Got index %d instead of %d for subscript:%s, dims:%s' %
             (res[0], expected, str(subscript), str(si_param.dims)))
Exemple #10
0
    def calc(self, data):
        """Calculate averages. Keys (tuples) are converted
        into linear indices based on their dimensions

        Parameters
        ----------
        data : RDD of (tuple, array) pairs, each array of shape (ncols,)
            Data to compute averages from

        Returns
        -------
        ts : array, shape (n, ncols)
        """

        dims = getdims(data)
        data = subtoind(data, dims.max)

        # loop over indices, averaging time series
        ts = zeros((self.n, len(data.first()[1])))
        for i in range(0, self.n):
            ts[i, :] = self.select(data, i).map(lambda (k, x): x).sum() / len(self.inds[i])

        return ts