def check_indtosub_result(indsub_param): # attach dummy value 'x' to indices to match expected input to indtosub data = map(lambda d: (d, 'x'), indsub_param.indices) results = indtosub(data, indsub_param.dims, order=indsub_param.order, onebased=indsub_param.onebased) for res, expected, index in zip(results, indsub_param.subscripts, indsub_param.indices): assert_equals(expected, res[0], 'Got subscript %s instead of %s for index:%d, dims:%s' % (res[0], expected, index, str(indsub_param.dims)))
def test_ind_to_sub_rdd(self): data_local = map(lambda x: (x, array([1.0])), range(1, 13)) data = self.sc.parallelize(data_local) dims = [2, 3, 2] subs = indtosub(data, dims).map(lambda (k, _): k).collect() assert allclose( subs, array( [ (1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2), ] ), )
def calc(self, data): """Calculate averages. Keys (tuples) are converted into linear indices based on their dimensions Parameters ---------- data : RDD of (tuple, array) pairs, each array of shape (ncols,) Data to compute averages from Returns ------- self : returns an instance of self. """ dims = getdims(data) data = subtoind(data, dims.max) # loop over indices, computing average keys and average values keys = zeros((self.n, len(dims.count()))) values = zeros((self.n, len(data.first()[1]))) for idx, indlist in enumerate(self.inds): if len(indlist) > 0: values[idx, :] = self.select(data, idx).map(lambda (k, x): x).sum() / len(indlist) keys[idx, :] = mean(map(lambda (k, v): k, indtosub(map(lambda k: (k, 0), indlist), dims.max)), axis=0) self.keys = keys self.values = values return self
def calc(self, data): """Calculate averages. Keys (tuples) are converted into linear indices based on their dimensions Parameters ---------- data : RDD of (tuple, array) pairs, each array of shape (ncols,) Data to compute averages from Returns ------- self : returns an instance of self. """ dims = getdims(data) data = subtoind(data, dims.max) # loop over indices, computing average keys and average values keys = zeros((self.n, len(dims.count()))) values = zeros((self.n, len(data.first()[1]))) for idx, indlist in enumerate(self.inds): if len(indlist) > 0: values[idx, :] = self.select( data, idx).map(lambda (k, x): x).sum() / len(indlist) keys[idx, :] = mean(map( lambda (k, v): k, indtosub(map(lambda k: (k, 0), indlist), dims.max)), axis=0) self.keys = keys self.values = values return self
def test_ind_to_sub_array(self): data_local = map(lambda x: (x, array([1.0])), range(1, 13)) dims = [2, 3, 2] subs = map(lambda x: x[0], indtosub(data_local, dims)) assert (allclose( subs, array([(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)])))
def test_ind_to_sub_rdd(self): data_local = map(lambda x: (x, array([1.0])), range(1, 13)) data = self.sc.parallelize(data_local) dims = [2, 3, 2] subs = indtosub(data, dims).map(lambda (k, _): k).collect() assert (allclose( subs, array([(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)])))
def check_indtosub_result(indsub_param): # attach dummy value 'x' to indices to match expected input to indtosub data = map(lambda d: (d, 'x'), indsub_param.indices) results = indtosub(data, indsub_param.dims, order=indsub_param.order, onebased=indsub_param.onebased) for res, expected, index in zip(results, indsub_param.subscripts, indsub_param.indices): assert_equals( expected, res[0], 'Got subscript %s instead of %s for index:%d, dims:%s' % (res[0], expected, index, str(indsub_param.dims)))
def test_ind_to_sub_array(self): data_local = map(lambda x: (x, array([1.0])), range(1, 13)) dims = [2, 3, 2] subs = map(lambda x: x[0], indtosub(data_local, dims)) assert allclose( subs, array( [ (1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2), ] ), )