def testDataset3(self): """Dataset with only missing values""" mean = compute_mean(self.dataset3, -9999) self.assertEquals(mean, -9999) std = compute_mean(self.dataset3, -9999) self.assertAlmostEquals(std, -9999, delta=1e-3)
def testDataset1(self): """Dataset with one missing value""" mean = compute_mean(self.dataset1, -9999) self.assertEquals(mean, 40.0) std = compute_std(self.dataset1, -9999) self.assertAlmostEquals(std, 18.2574, delta=1e-3)
def testDataset2(self): """Dataset with no missing values""" mean = compute_mean(self.dataset2, -9999) self.assertEquals(mean, 45.0) std = compute_std(self.dataset2, -9999) self.assertAlmostEquals(std, 18.7082, delta=1e-3)
def standardize(data, missing_val=-9999): """Standardize a subset of data using a normal score. Computes the normal score for a set of data necessary for performing the standard normal homogenity test. The normal scores are defined as Z_i = (Q_i - Qbar) / sigma_Q, For more information, please refer to Alexandersson and Moberg, 1997, Int'l Journal of Climatology Vol. 17, pp 25-34. This is a direct port of splitmerge.v21f.f > subroutine 'standard'. :Param data: The dataset to standardize. :Param missing_val: The placeholder for missing data. :Return: A list of length (right-left), with the standardized reference values computed here. """ ## Find the valid data to use to compute the data_mean, etc. valid_data = get_valid_data(data, missing_val) num_vals = len(valid_data) data_mean = compute_mean(valid_data, valid=True) ## Compute the sum the squared error for each term (variance) variance_sum = 0.0 for d in valid_data: variance_sum = variance_sum + (d-data_mean)**2 ## The standard deviation is the root of the sum of the squared error sum_std = sqrt(variance_sum/(num_vals-2)) ## Normalize each data value using this standard deviation standardized_data = [] #for d in data[left:right+1]: for d in data: if d!= missing_val: standardized_data.append((d-data_mean)/sum_std) else: standardized_data.append(missing_val) return standardized_data