def test_large(self): # Source data: 15 GB small_array = self._small_array() array = biggus.ArrayStack([[small_array] * 1000] * 5) target = _WriteCounter(array.shape) biggus.save([array], [target]) self.assertTrue(target.all_written())
def test_ndarray(self): # Sequence of tests, defined as: # 1. Stack shape, # 2. item shape, # 3. expected result. tests = [ [(1, ), (3, ), np.arange(3).reshape(1, 3)], [(1, ), (3, 4), np.arange(12).reshape(1, 3, 4)], [(6, ), (3, 4), np.arange(72).reshape(6, 3, 4)], [(6, 70), (3, 4), np.arange(5040).reshape(6, 70, 3, 4)], ] for stack_shape, item_shape, target in tests: stack = np.empty(stack_shape, dtype='O') item_size = np.array(item_shape).prod() for index in np.ndindex(stack.shape): start = np.ravel_multi_index(index, stack_shape) * item_size concrete = np.arange(item_size).reshape(item_shape) concrete += start array = biggus.NumpyArrayAdapter(concrete) stack[index] = array array = biggus.ArrayStack(stack) result = array.ndarray() self.assertIsInstance(result, np.ndarray) self.assertEqual(array.dtype, result.dtype) self.assertEqual(array.shape, result.shape) np.testing.assert_array_equal(result, target)
def test_shape_and_getitem(self): # Sequence of tests, defined as: # 1. Stack shape, # 2. item shape, # 3. sequence of indexing operations to apply, # 4. expected result shape or exception. tests = [ [(6, 70), (30, 40), [], (6, 70, 30, 40)], [(6, 70), (30, 40), [5], ( 70, 30, 40, )], [(6, 70), (30, 40), [(5, )], ( 70, 30, 40, )], [(6, 70), (30, 40), [5, 3], (30, 40)], [(6, 70), (30, 40), [(5, 3)], (30, 40)], [(6, 70), (30, 40), [5, 3, 2], (40, )], [(6, 70), (30, 40), [(5, 3, 2)], (40, )], [(6, 70), (30, 40), [5, 3, 2, 1], ()], [(6, 70), (30, 40), [(5, 3, 2, 1)], ()], [(6, 70), (30, 40), [5, (3, 2), 1], ()], [(6, 70), (30, 40), [(slice(None, None), 6)], (6, 30, 40)], [(6, 70), (30, 40), [(slice(None, None), slice(1, 5))], (6, 4, 30, 40)], [(6, 70), (30, 40), [(slice(None, None), ), 4], ( 70, 30, 40, )], [(6, 70), (30, 40), [5, (slice(None, None), )], ( 70, 30, 40, )], [(6, 70), (30, 40), [(slice(None, 10), )], (6, 70, 30, 40)], [(6, 70), (30, 40), [(slice(None, 10), ), 5], ( 70, 30, 40, )], [(6, 70), (30, 40), [(slice(None, 10), ), (slice(None, 3), )], (3, 70, 30, 40)], [(6, 70), (30, 40), [(slice(None, 10), ), (slice(None, None, 2), )], (3, 70, 30, 40)], [(6, 70), (30, 40), [(slice(5, 10), ), (slice(None, None), slice(2, 6))], (1, 4, 30, 40)], [(6, 70), (30, 40), [(slice(None, None), slice(2, 6)), (slice(5, 10), )], (1, 4, 30, 40)], [(6, 70), (30, 40), [3.5], TypeError], [(6, 70), (30, 40), ['foo'], TypeError], [(6, 70), (30, 40), [object()], TypeError], ] dtype = np.dtype('f4') for stack_shape, item_shape, cuts, target in tests: def make_array(*n): concrete = np.empty(item_shape, dtype) array = biggus.NumpyArrayAdapter(concrete) return array stack = np.empty(stack_shape, dtype='O') for index in np.ndindex(stack.shape): stack[index] = make_array() array = biggus.ArrayStack(stack) if isinstance(target, type): with self.assertRaises(target): for cut in cuts: array = array.__getitem__(cut) else: for cut in cuts: array = array.__getitem__(cut) self.assertIsInstance(array, biggus.Array) self.assertEqual(array.shape, target, '\nCuts: {!r}'.format(cuts))
def test_dtype(self): dtype = np.dtype('f4') item = biggus.NumpyArrayAdapter(np.empty(6, dtype=dtype)) stack = np.array([item], dtype='O') array = biggus.ArrayStack(stack) self.assertEqual(array.dtype, dtype)
def get_seasonal_means_with_ttest_stats( self, season_to_monthperiod=None, start_year=None, end_year=None, convert_monthly_accumulators_to_daily=False): """ :param season_to_monthperiod: :param start_year: :param end_year: :param convert_monthly_accumulators_to_daily: if true converts monthly accumulators to daily, :return dict(season: [mean, std, nobs]) """ if True: raise NotImplementedError( "Biggus way of calculation is not implemented, use the dask version of the method" ) # select the interval of interest timesel = [ i for i, d in enumerate(self.time) if start_year <= d.year <= end_year ] data = self.data[timesel, :, :] times = [self.time[i] for i in timesel] if convert_monthly_accumulators_to_daily: ndays = np.array( [calendar.monthrange(d.year, d.month)[1] for d in times]) data = biggus.divide(data, ndays[:, np.newaxis, np.newaxis]) else: data = self.data year_month_to_index_arr = defaultdict(list) for i, t in enumerate(times): year_month_to_index_arr[t.year, t.month].append(i) # calculate monthly means monthly_data = {} for y in range(start_year, end_year + 1): for m in range(1, 13): aslice = slice(year_month_to_index_arr[y, m][0], year_month_to_index_arr[y, m][-1] + 1) monthly_data[y, m] = biggus.mean( data[aslice.start:aslice.stop, :, :], axis=0) result = {} for season, month_period in season_to_monthperiod.items(): assert isinstance(month_period, MonthPeriod) seasonal_means = [] ndays_per_season = [] for p in month_period.get_season_periods(start_year=start_year, end_year=end_year): lmos = biggus.ArrayStack([ monthly_data[start.year, start.month] for start in p.range("months") ]) ndays_per_month = np.array([ calendar.monthrange(start.year, start.month)[1] for start in p.range("months") ]) seasonal_mean = biggus.sum(biggus.multiply( lmos, ndays_per_month[:, np.newaxis, np.newaxis]), axis=0) seasonal_mean = biggus.divide(seasonal_mean, ndays_per_month.sum()) seasonal_means.append(seasonal_mean) ndays_per_season.append(ndays_per_month.sum()) seasonal_means = biggus.ArrayStack(seasonal_means) ndays_per_season = np.array(ndays_per_season) print(seasonal_means.shape, ndays_per_season.shape) assert seasonal_means.shape[0] == ndays_per_season.shape[0] clim_mean = biggus.sum(biggus.multiply( seasonal_means, ndays_per_season[:, np.newaxis, np.newaxis]), axis=0) / ndays_per_season.sum() diff = biggus.subtract(seasonal_means, clim_mean.masked_array()[np.newaxis, :, :]) sq_mean = biggus.sum(biggus.multiply( diff**2, ndays_per_season[:, np.newaxis, np.newaxis]), axis=0) / ndays_per_season.sum() clim_std = biggus.power(sq_mean, 0.5) clim_mean = clim_mean.masked_array() print("calculated mean") clim_std = clim_std.masked_array() print("calculated std") result[season] = [clim_mean, clim_std, ndays_per_season.shape[0]] return result