def test_sub_to_ind_array(self): subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)] data_local = map(lambda x: (x, array([1.0])), subs) dims = [2, 3, 2] inds = map(lambda x: x[0], subtoind(data_local, dims)) assert(allclose(inds, array(range(1, 13))))
def calc(self, data): """Calculate averages. Keys (tuples) are converted into linear indices based on their dimensions Parameters ---------- data : RDD of (tuple, array) pairs, each array of shape (ncols,) Data to compute averages from Returns ------- ts : array, shape (n, ncols) """ dims = getdims(data) data = subtoind(data, dims.max) # loop over indices, averaging time series ts = zeros((self.n, len(data.first()[1]))) for i in range(0, self.n): if len(self.inds[i]) > 0: ts[i, :] = self.select( data, i).map(lambda (k, x): x).sum() / len(self.inds[i]) return ts
def calc(self, data): """Calculate averages. Keys (tuples) are converted into linear indices based on their dimensions Parameters ---------- data : RDD of (tuple, array) pairs, each array of shape (ncols,) Data to compute averages from Returns ------- self : returns an instance of self. """ dims = getdims(data) data = subtoind(data, dims.max) # loop over indices, computing average keys and average values keys = zeros((self.n, len(dims.count()))) values = zeros((self.n, len(data.first()[1]))) for idx, indlist in enumerate(self.inds): if len(indlist) > 0: values[idx, :] = self.select(data, idx).map(lambda (k, x): x).sum() / len(indlist) keys[idx, :] = mean(map(lambda (k, v): k, indtosub(map(lambda k: (k, 0), indlist), dims.max)), axis=0) self.keys = keys self.values = values return self
def calc(self, data): """Calculate averages. Keys (tuples) are converted into linear indices based on their dimensions Parameters ---------- data : RDD of (tuple, array) pairs, each array of shape (ncols,) Data to compute averages from Returns ------- self : returns an instance of self. """ dims = getdims(data) data = subtoind(data, dims.max) # loop over indices, computing average keys and average values keys = zeros((self.n, len(dims.count()))) values = zeros((self.n, len(data.first()[1]))) for idx, indlist in enumerate(self.inds): if len(indlist) > 0: values[idx, :] = self.select( data, idx).map(lambda (k, x): x).sum() / len(indlist) keys[idx, :] = mean(map( lambda (k, v): k, indtosub(map(lambda k: (k, 0), indlist), dims.max)), axis=0) self.keys = keys self.values = values return self
def check_subtoind_result(si_param): # attach dummy value 'x' to subscripts to match expected input to subtoind data = map(lambda d: (d, 'x'), si_param.subscripts) results = subtoind(data, si_param.dims, order=si_param.order, onebased=si_param.onebased) # check results individually to highlight specific failures for res, expected, subscript in zip(results, si_param.indices, si_param.subscripts): assert_equals(expected, res[0], 'Got index %d instead of %d for subscript:%s, dims:%s' % (res[0], expected, str(subscript), str(si_param.dims)))
def test_sub_to_ind_array(self): subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)] data_local = map(lambda x: (x, array([1.0])), subs) dims = [2, 3, 2] inds = map(lambda x: x[0], subtoind(data_local, dims)) assert (allclose(inds, array(range(1, 13))))
def test_sub_to_ind_rdd(self): subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)] data_local = map(lambda x: (x, array([1.0])), subs) data = self.sc.parallelize(data_local) dims = [2, 3, 2] inds = subtoind(data, dims).map(lambda (k, _): k).collect() assert(allclose(inds, array(range(1, 13))))
def test_sub_to_ind_rdd(self): subs = [(1, 1, 1), (2, 1, 1), (1, 2, 1), (2, 2, 1), (1, 3, 1), (2, 3, 1), (1, 1, 2), (2, 1, 2), (1, 2, 2), (2, 2, 2), (1, 3, 2), (2, 3, 2)] data_local = map(lambda x: (x, array([1.0])), subs) data = self.sc.parallelize(data_local) dims = [2, 3, 2] inds = subtoind(data, dims).map(lambda (k, _): k).collect() assert (allclose(inds, array(range(1, 13))))
def check_subtoind_result(si_param): # attach dummy value 'x' to subscripts to match expected input to subtoind data = map(lambda d: (d, 'x'), si_param.subscripts) results = subtoind(data, si_param.dims, order=si_param.order, onebased=si_param.onebased) # check results individually to highlight specific failures for res, expected, subscript in zip(results, si_param.indices, si_param.subscripts): assert_equals( expected, res[0], 'Got index %d instead of %d for subscript:%s, dims:%s' % (res[0], expected, str(subscript), str(si_param.dims)))
def calc(self, data): """Calculate averages. Keys (tuples) are converted into linear indices based on their dimensions Parameters ---------- data : RDD of (tuple, array) pairs, each array of shape (ncols,) Data to compute averages from Returns ------- ts : array, shape (n, ncols) """ dims = getdims(data) data = subtoind(data, dims.max) # loop over indices, averaging time series ts = zeros((self.n, len(data.first()[1]))) for i in range(0, self.n): ts[i, :] = self.select(data, i).map(lambda (k, x): x).sum() / len(self.inds[i]) return ts