def decompose(self, output: Dataset, levels: np.ma.MaskedArray) -> np.ma.MaskedArray: q: Quadrant = Quadrant.for_assembly(self.group.name, self.var.name, self.var) events, upper_bound, lower_bound = q.transformed_shape() # tranformed shape 1: (e, gl, gl), 2: (e, 2*gl, gl), 4:(e, 2* gl, 2*gl) all_U = np.ma.masked_all((events, upper_bound, lower_bound)) all_s = np.ma.masked_all((events, lower_bound)) all_Vh = np.ma.masked_all((events, lower_bound, lower_bound)) all_k = np.ma.masked_all((events), dtype=np.int) max_k = 0 for event in range(self.var.shape[0]): if np.ma.is_masked(levels[event]) or levels.data[event] > 29: continue # reduce array dimensions level = int(levels.data[event]) matrix = q.transform(self.var[event][...], level) if not self.matrix_ok(event, self.target_path(), matrix): continue # decompose reduced array try: U, s, Vh = la.svd(matrix.data, full_matrices=False, lapack_driver='gesdd') except np.linalg.LinAlgError as err: logger.error(f'{err} at {self.target_path()}:{event}') if np.iscomplex(U).any(): raise ValueError( f'Left-singuar values are complex for {self.target_path()}:{event}') if np.iscomplex(s).any(): raise ValueError(f'Eigenvalues are complex for {self.target_path()}:{event}') if np.iscomplex(Vh).any(): raise ValueError( f'Right-singlar values are complex for {self.target_path()}:{event}') # find k eigenvalues k = self.target_rank(s) sigma = s[:k] max_k = max(k, max_k) # assign sliced decomposition to all all_k[event] = k all_U[event][:U.shape[0], :k] = U[:, :k] all_s[event][:k] = sigma all_Vh[event][:k, :Vh.shape[1]] = Vh[:k, :] # write all to output upper_dim, lower_dim = q.upper_and_lower_dimension() group = self.create_target_group(output) group.createDimension('rank', size=max_k) group.description = f'singular value decomposistion of {self.var.description}. reconstruction with (U * s).dot(Vh)' k_out = group.createVariable('k', 'i1', ('event')) k_out[:] = all_k[:] k_out.description = 'target rank of decomposition (number of eigenvalues)' U_dim = ('event', upper_dim, 'rank') U_out = group.createVariable('U', 'f', U_dim, zlib=True) U_out[:] = all_U[:, :, :max_k] U_out = 'left-singular vectors of decompositon' s_dim = ('event', 'rank') s_out = group.createVariable('s', 'f', s_dim, zlib=True) s_out[:] = all_s[:, :max_k] s_out.description = 'eigenvalues of decomposition' Vh_dim = ('event', 'rank', lower_dim) Vh_out = group.createVariable('Vh', 'f', Vh_dim, zlib=True) Vh_out[:] = all_Vh[:, :max_k, :] Vh_out.description = 'transposed right-singular vectors of decompositon'
def test_single_quadrant_disassembly(self): atm_n = self.compressed['state/HNO3/n/Q'] q: Quadrant = Quadrant.for_disassembly('HNO3', 'n', atm_n) self.assertIsInstance(q, Quadrant) self.assertTupleEqual(q.transformed_shape(), (1, 29, 29)) array = np.random.uniform(size=(29, 29)) disassembly = q.transform(array, 23) self.assertTupleEqual(disassembly.shape, (23, 23))
def test_two_quadrants_assembly(self): xavk = self.uncompressed['state/GHG/Tatmxavk'] q: Quadrant = Quadrant.for_assembly('GHG', xavk.name, xavk) self.assertIsInstance(q, AssembleTwoQuadrants) self.assertTupleEqual(q.transformed_shape(), (1, 58, 29)) array = np.random.uniform(size=(2, 29, 29)) assembly = q.transform(array, 23) self.assertTupleEqual(assembly.shape, (46, 23))
def __init__(self, group: Group): super().__init__(group) vars = group.variables.keys() assert 'Q' in vars and 's' in vars and 'k' in vars self.Q = group['Q'] self.s = group['s'] self.k = group['k'] self.quadrant = Quadrant.for_disassembly( group.parent.name, group.name, self.Q)
def test_single_quadrant_assembly(self): avk = self.uncompressed['state/HNO3/avk'] q: Quadrant = Quadrant.for_assembly('HNO3', 'avk', avk) self.assertIsInstance(q, Quadrant) self.assertTupleEqual(q.transformed_shape(), (1, 29, 29)) array = np.random.uniform(size=(29, 29)) assembly = q.transform(array, 23) self.assertTupleEqual(assembly.shape, (23, 23)) self.assertTrue(np.allclose(array[: 23, : 23], assembly))
def _export_reconstruction(self, target: Dataset, array: np.ma.MaskedArray, quadrant: Quadrant): root = root_group_of(self.group) existing_dimensions = target.dimensions.keys() for name, dim in root.dimensions.items(): if name in existing_dimensions: continue target.createDimension( name, len(dim) if not dim.isunlimited() else None) var = quadrant.create_variable(target, self.group.path) var[:] = array[:]
def test_four_quadrants_assembly(self): avk = self.uncompressed['/state/WV/avk'] q: Quadrant = Quadrant.for_assembly('WV', 'avk', avk) self.assertIsInstance(q, AssembleFourQuadrants) self.assertTupleEqual(q.transformed_shape(), (1, 58, 58)) array = np.random.uniform(size=(2, 2, 29, 29)) assembly = q.transform(array, 23) self.assertTupleEqual(assembly.shape, (46, 46)) close = np.allclose(assembly[23:23*2, :23], array[0, 1, :23, :23]) self.assertTrue(close, 'Four quadrant assembly not close')
def test_two_quadrant_disassembly(self): xavk = self.compressed['state/GHG/Tatmxavk/U'] q: Quadrant = Quadrant.for_disassembly('GHG', 'Tatmxavk', xavk) self.assertIsInstance(q, DisassembleTwoQuadrants) self.assertTupleEqual(q.transformed_shape(), (1, 2, 29, 29)) array = np.arange(29*58).reshape(58, 29) disassembly = q.transform(array, 23) self.assertTupleEqual(disassembly.shape, (2, 23, 23)) close = np.allclose(array[:23, :23], disassembly[0, :23, :23]) self.assertTrue(close) close = np.allclose(array[29:52, :23], disassembly[1, :23, :23]) self.assertTrue(close)
def test_four_quadrant_disassembly(self): avk_rc = self.compressed['state/WV/avk/U'] q: Quadrant = Quadrant.for_disassembly('WV', 'avk', avk_rc) self.assertIsInstance(q, DisassembleFourQuadrants) self.assertTupleEqual(q.transformed_shape(), (1, 2, 2, 29, 29)) avk = self.uncompressed['state/WV/avk/'] q_assembly = Quadrant.for_assembly('WV', 'avk', avk) array = np.arange(58*58).reshape(58, 58) disassembly = q.transform(array, 23) array_rc = q_assembly.transform(disassembly, 23) self.assertTupleEqual(disassembly.shape, (2, 2, 23, 23)) close = np.allclose(array[29:52, 29:52], disassembly[1, 1, :23, :23]) self.assertTrue(close) for i in range(2): for j in range(2): close = np.allclose( array[i * 29:23 + i * 29, j * 29:23 + j * 29], array_rc[i * 23:(i+1) * 23, j * 23:(j+1) * 23] ) self.assertTrue(close)
def report_for(self, variable: Variable, original, reconstructed, rc_error) -> pd.DataFrame: # if not original.shape == reconstructed.shape: # message = f'Different shape for {type(self).__name__} {variable.name}: original {original.shape}, reconstructed {reconstructed.shape}' # logger.error(message) # raise ValueError(message) result = { 'event': [], 'level_of_interest': [], 'err': [], 'rc_error': [], 'type': [] } error_estimation_methods = { 'avk': self.averaging_kernel, 'n': self.noise_matrix, 'Tatmxavk': self.cross_averaging_kernel } estimation_method = error_estimation_methods.get(variable.name) if estimation_method is None: raise ValueError( f'No error estimation method for variable {variable.name}') reshaper = Quadrant.for_assembly(self.gas, variable.name, variable) path = f'/state/{self.gas}/{variable.name}' for event in range(original.shape[0]): if np.ma.is_masked(self.nol[event]) or self.nol.data[event] > 29: continue nol_event = self.nol.data[event] if not self.matrix_ok(event, path, self.alt[event, :nol_event]): continue covariance = Covariance(nol_event, self.alt[event]) original_event = reshaper.transform(original[event], nol_event) if not self.matrix_ok(event, path, original_event): continue # use reconstruced values iff rc_error flag is set if rc_error: rc_event = reshaper.transform(reconstructed[event], nol_event) if not self.matrix_ok(event, path, rc_event): continue rc_event = rc_event.data else: rc_event = None if isinstance(self, WaterVapour): avk_event = AssembleFourQuadrants(nol_event).transform( self.avk[event], nol_event) if not self.matrix_ok(event, 'wv_avk', avk_event): continue avk_event = avk_event.data else: avk_event = None # type two error only exists for water vapour # if gas does not require type 2 error estimation, break loop after first iteration calc_type_two = self.type_two while True: error = estimation_method(event, original_event.data, rc_event, covariance, type2=calc_type_two, avk=avk_event) for loi in self.levels_of_interest: # zero == surface (special value) if loi == 0: level = 0 # for other levels substract from highest level else: level = nol_event + loi if level < 2: continue result['event'].append(event) result['level_of_interest'].append(loi) result['err'].append(error[level, level]) result['rc_error'].append(rc_error) result['type'].append(2 if calc_type_two else 1) if self.gas == 'GHG': # for greenhouse gases export also CH4 (lower right quadrant) # nol as index offset for error level result['event'].append(event) result['level_of_interest'].append(loi - 29) result['err'].append(error[level + nol_event, level + nol_event]) result['rc_error'].append(rc_error) result['type'].append(2 if calc_type_two else 1) # stop if type 1 is calculated if not calc_type_two: break # just finished type 2 in first iteration -> repeat with type 1 calc_type_two = False return pd.DataFrame(result)
def decompose(self, output: Dataset, levels: np.ma.MaskedArray) -> np.ma.MaskedArray: q: Quadrant = Quadrant.for_assembly(self.group.name, self.var.name, self.var) events, _, bound = q.transformed_shape() # should be always the same because reshaped variable is square all_Q = np.ma.masked_all((events, bound, bound)) all_s = np.ma.masked_all((events, bound)) all_k = np.ma.masked_all((events)) max_k = 0 for event in range(self.var.shape[0]): if np.ma.is_masked(levels[event]) or levels.data[event] > 29: continue level = int(levels.data[event]) # reduce array dimensions matrix = q.transform(self.var[event][...], level) if not self.matrix_ok(event, self.target_path(), matrix): continue # test if nearlly symmetric matrix = matrix.data if not np.allclose(matrix, matrix.T): raise ValueError( f'Noise matrix is not symmeric for {self.target_path()}:{event}') # make matrix symmetric by fixing rounding errors matrix = (matrix + matrix.T) / 2 # decompose matrix eigenvalues, eigenvectors = np.linalg.eig(matrix) # should not be complex anymore because matrix is symmetric (see above) if np.iscomplex(eigenvalues).any(): raise ValueError(f'Eigenvalues are complex for {self.target_path()}:{event}') if np.iscomplex(eigenvectors).any(): raise ValueError( f'Eigenvectors are complex for {self.target_path()}:{event}') # covarinace maticies are postive semi definite # this implies that eigenvalues are positive # unfortuenately, due to floating point errors this is not garantueed # to address this problem we assume negative eigenpairs as negligible and filter them selected_eigenvalues = [] selected_eigenvectors = [] max_eigenvalue = eigenvalues.max() for value, vector in zip(eigenvalues, eigenvectors.T): if value > 0 and (max_eigenvalue * self.threshold < value): selected_eigenvalues.append(value) selected_eigenvectors.append(vector) k = len(selected_eigenvalues) max_k = max(k, max_k) selected_eigenvectors = np.array(selected_eigenvectors).T all_k[event] = k all_Q[event][:selected_eigenvectors.shape[0], :k] = selected_eigenvectors[:, :k] all_s[event][:k] = selected_eigenvalues # write all to output dimension_name, _ = q.upper_and_lower_dimension() target_group = self.create_target_group(output) target_group.createDimension('rank', size=max_k) target_group.description = f'eigen decomposition of {self.var.description}. reconstruction with (Q * s).dot(Q.T)' k_out = target_group.createVariable('k', 'i1', ('event')) k_out[:] = all_k[:] k_out.description = 'target rank of decomposition (number of eigenvalues)' Q_dim = ('event', dimension_name, 'rank') Q_out = target_group.createVariable('Q', 'f', Q_dim, zlib=True) Q_out[:] = all_Q[:, :, :max_k] Q_out.description = 'eigenvectors of noise matrix' s_dim = ('event', 'rank') s_out = target_group.createVariable('s', 'f', s_dim, zlib=True) s_out[:] = all_s[:, :max_k] s_out.description = 'eigenvalues of noise matrix'