def calc_diff_trends(sthext_cube, notsthext_cube, window=109): """Calculate trends in difference between southern extratropics and rest of globe. A window of 109 matches the length of the Argo record (i.e. 10 years of annually smoothed monthly data) """ diff = sthext_cube - notsthext_cube diff_windows = rolling_window(diff.data, window=window, axis=0) x_axis_windows = rolling_window(diff.coord('time').points, window=window, axis=0) ntimes = diff_windows.shape[0] trends = numpy.zeros(ntimes) for i in range(0, ntimes): x = x_axis_windows[i, :] y = diff_windows[i, :] slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) trends[i] = slope # convert units from J/month to J/s so can be expressed as Watts (1 J = W.s) assert 'days' in str(diff.coord('time').units) hours_in_day = 24 minutes_in_hour = 60 seconds_in_minute = 60 trends = trends / (hours_in_day * minutes_in_hour * seconds_in_minute) return trends
def test_1d(self): # 1-d array input a = np.array([0, 1, 2, 3, 4], dtype=np.int32) expected_result = np.array([[0, 1], [1, 2], [2, 3], [3, 4]], dtype=np.int32) result = rolling_window(a, window=2) self.assertArrayEqual(result, expected_result)
def test_step(self): # step should control how far apart consecutive windows are a = np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=np.int32) expected_result = np.array( [[[0, 1, 2], [2, 3, 4]], [[5, 6, 7], [7, 8, 9]]], dtype=np.int32) result = rolling_window(a, window=3, step=2, axis=1) self.assertArrayEqual(result, expected_result)
def test_2d(self): # 2-d array input a = np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=np.int32) expected_result = np.array([[[0, 1, 2], [1, 2, 3], [2, 3, 4]], [[5, 6, 7], [6, 7, 8], [7, 8, 9]]], dtype=np.int32) result = rolling_window(a, window=3, axis=1) self.assertArrayEqual(result, expected_result)
def test_1d_masked(self): # 1-d masked array input a = ma.array([0, 1, 2, 3, 4], mask=[0, 0, 1, 0, 0], dtype=np.int32) expected_result = ma.array([[0, 1], [1, 2], [2, 3], [3, 4]], mask=[[0, 0], [0, 1], [1, 0], [0, 0]], dtype=np.int32) result = rolling_window(a, window=2) self.assertMaskedArrayEqual(result, expected_result)
def test_step(self): # step should control how far apart consecutive windows are a = np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=np.int32) expected_result = np.array([[[0, 1, 2], [2, 3, 4]], [[5, 6, 7], [7, 8, 9]]], dtype=np.int32) result = rolling_window(a, window=3, step=2, axis=1) self.assertArrayEqual(result, expected_result)
def count_spells(data, threshold, axis, spell_length): """ Count data occurrences. Define a function to perform the custom statistical operation. Note: in order to meet the requirements of iris.analysis.Aggregator, it must do the calculation over an arbitrary (given) data axis. Function to calculate the number of points in a sequence where the value has exceeded a threshold value for at least a certain number of timepoints. Generalised to operate on multiple time sequences arranged on a specific axis of a multidimensional array. Parameters ---------- data: ndarray raw data to be compared with value threshold. threshold: float threshold point for 'significant' datapoints. axis: int number of the array dimension mapping the time sequences. (Can also be negative, e.g. '-1' means last dimension) spell_length: int number of consecutive times at which value > threshold to "count". Returns ------- int Number of counts. """ if axis < 0: # just cope with negative axis numbers axis += data.ndim # Threshold the data to find the 'significant' points. if not threshold: data_hits = data else: data_hits = data > float(threshold) # Make an array with data values "windowed" along the time axis. ############################################################### # WARNING: default step is = window size i.e. no overlapping # if you want overlapping windows set the step to be m*spell_length # where m is a float ############################################################### hit_windows = rolling_window(data_hits, window=spell_length, step=spell_length, axis=axis) # Find the windows "full of True-s" (along the added 'window axis'). full_windows = np.all(hit_windows, axis=axis + 1) # Count points fulfilling the condition (along the time axis). spell_point_counts = np.sum(full_windows, axis=axis, dtype=int) return spell_point_counts
def test_degenerate_mask(self): a = ma.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], dtype=np.int32) expected_result = ma.array([[[0, 1, 2], [1, 2, 3], [2, 3, 4]], [[5, 6, 7], [6, 7, 8], [7, 8, 9]]], mask=[[[0, 0, 0], [0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 0], [0, 0, 0]]], dtype=np.int32) result = rolling_window(a, window=3, axis=1) self.assertMaskedArrayEqual(result, expected_result)
def count_spells(data, threshold, axis, spell_length): if axis < 0: axis += data.ndim data_hits = data > threshold hit_windows = rolling_window(data_hits, window=spell_length, axis=axis) full_windows = np.all(hit_windows, axis=axis + 1) spell_point_counts = np.sum(full_windows, axis=axis, dtype=int) return spell_point_counts
def count_spells(data, threshold, axis, spell_length): """ Function to calculate the number of points in a sequence where the value has exceeded a threshold value for at least a certain number of timepoints. Generalised to operate on multiple time sequences arranged on a specific axis of a multidimensional array. Args: * data (array): raw data to be compared with value threshold. * threshold (float): threshold point for 'significant' datapoints. * axis (int): number of the array dimension mapping the time sequences. (Can also be negative, e.g. '-1' means last dimension) * spell_length (int): number of consecutive times at which value > threshold to "count". """ if axis < 0: # just cope with negative axis numbers axis += data.ndim # Threshold the data to find the 'significant' points. data_hits = data > threshold # Make an array with data values "windowed" along the time axis. ############################################################### # WARNING: default step is = window size i.e. no overlapping # if you want overlapping windows set the step to be m*spell_length # where m is a float ############################################################### hit_windows = rolling_window(data_hits, window=spell_length, step=spell_length, axis=axis) # Find the windows "full of True-s" (along the added 'window axis'). full_windows = np.all(hit_windows, axis=axis + 1) # Count points fulfilling the condition (along the time axis). spell_point_counts = np.sum(full_windows, axis=axis, dtype=int) return spell_point_counts
def test_2d_masked(self): # 2-d masked array input a = ma.array( [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], mask=[[0, 0, 1, 0, 0], [1, 0, 1, 0, 0]], dtype=np.int32, ) expected_result = ma.array( [ [[0, 1, 2], [1, 2, 3], [2, 3, 4]], [[5, 6, 7], [6, 7, 8], [7, 8, 9]], ], mask=[ [[0, 0, 1], [0, 1, 0], [1, 0, 0]], [[1, 0, 1], [0, 1, 0], [1, 0, 0]], ], dtype=np.int32, ) result = rolling_window(a, window=3, axis=1) self.assertMaskedArrayEqual(result, expected_result)
def count_spells(data, threshold, axis, spell_length): """ Function to calculate the number of points in a sequence where the value has exceeded a threshold value for at least a certain number of timepoints. Generalised to operate on multiple time sequences arranged on a specific axis of a multidimensional array. Args: * data (array): raw data to be compared with value threshold. * threshold (float): threshold point for 'significant' datapoints. * axis (int): number of the array dimension mapping the time sequences. (Can also be negative, e.g. '-1' means last dimension) * spell_length (int): number of consecutive times at which value > threshold to "count". """ if axis < 0: # just cope with negative axis numbers axis += data.ndim # Threshold the data to find the 'significant' points. data_hits = data > threshold # Make an array with data values "windowed" along the time axis. hit_windows = rolling_window(data_hits, window=spell_length, axis=axis) # Find the windows "full of True-s" (along the added 'window axis'). full_windows = np.all(hit_windows, axis=axis+1) # Count points fulfilling the condition (along the time axis). spell_point_counts = np.sum(full_windows, axis=axis, dtype=int) return spell_point_counts
def num_frozen(data, threshold, axis, frozen_length): """ Count valid frozen points. Function to calculate the number of points in a sequence where the value is less than freezing for at least a certain number of timepoints. Generalised to operate on multiple time sequences arranged on a specific axis of a multidimensional array. """ if axis < 0: # just cope with negative axis numbers axis += data.ndim # Threshold the data to find the 'significant' points. data_hits = data < threshold # Make an array with data values "windowed" along the time axis. hit_windows = ut.rolling_window(data_hits, window=frozen_length, axis=axis) # Find the windows "full of True-s" (along the added 'window axis'). full_windows = np.all(hit_windows, axis=axis + 1) # Count points fulfilling the condition (along the time axis). frozen_point_counts = np.sum(full_windows, axis=axis, dtype=int) return frozen_point_counts
def test_invalid_step(self): # raise an error if the step between windows is less than 1 a = np.empty([5]) with self.assertRaises(ValueError): rolling_window(a, step=0)
def test_window_too_long(self): # raise an error if the window length is longer than the # corresponding array dimension a = np.empty([7, 5]) with self.assertRaises(ValueError): rolling_window(a, window=6, axis=1)
def test_window_too_short(self): # raise an error if the window length is less than 1 a = np.empty([5]) with self.assertRaises(ValueError): rolling_window(a, window=0)