예제 #1
0
 def decompose(self, output: Dataset, levels: np.ma.MaskedArray) -> np.ma.MaskedArray:
     q: Quadrant = Quadrant.for_assembly(self.group.name, self.var.name, self.var)
     events, upper_bound, lower_bound = q.transformed_shape()
     # tranformed shape 1: (e, gl, gl), 2: (e, 2*gl, gl), 4:(e, 2* gl, 2*gl)
     all_U = np.ma.masked_all((events, upper_bound, lower_bound))
     all_s = np.ma.masked_all((events, lower_bound))
     all_Vh = np.ma.masked_all((events, lower_bound, lower_bound))
     all_k = np.ma.masked_all((events), dtype=np.int)
     max_k = 0
     for event in range(self.var.shape[0]):
         if np.ma.is_masked(levels[event]) or levels.data[event] > 29:
             continue
         # reduce array dimensions
         level = int(levels.data[event])
         matrix = q.transform(self.var[event][...], level)
         if not self.matrix_ok(event, self.target_path(), matrix):
             continue
         # decompose reduced array
         try:
             U, s, Vh = la.svd(matrix.data, full_matrices=False, lapack_driver='gesdd')
         except np.linalg.LinAlgError as err:
             logger.error(f'{err} at {self.target_path()}:{event}')
         if np.iscomplex(U).any():
             raise ValueError(
                 f'Left-singuar values are complex for {self.target_path()}:{event}')
         if np.iscomplex(s).any():
             raise ValueError(f'Eigenvalues are complex for {self.target_path()}:{event}')
         if np.iscomplex(Vh).any():
             raise ValueError(
                 f'Right-singlar values are complex for {self.target_path()}:{event}')
         # find k eigenvalues
         k = self.target_rank(s)
         sigma = s[:k]
         max_k = max(k, max_k)
         # assign sliced decomposition to all
         all_k[event] = k
         all_U[event][:U.shape[0], :k] = U[:, :k]
         all_s[event][:k] = sigma
         all_Vh[event][:k, :Vh.shape[1]] = Vh[:k, :]
     # write all to output
     upper_dim, lower_dim = q.upper_and_lower_dimension()
     group = self.create_target_group(output)
     group.createDimension('rank', size=max_k)
     group.description = f'singular value decomposistion of {self.var.description}. reconstruction with (U * s).dot(Vh)'
     k_out = group.createVariable('k', 'i1', ('event'))
     k_out[:] = all_k[:]
     k_out.description = 'target rank of decomposition (number of eigenvalues)'
     U_dim = ('event', upper_dim, 'rank')
     U_out = group.createVariable('U', 'f', U_dim, zlib=True)
     U_out[:] = all_U[:, :, :max_k]
     U_out = 'left-singular vectors of decompositon'
     s_dim = ('event', 'rank')
     s_out = group.createVariable('s', 'f', s_dim, zlib=True)
     s_out[:] = all_s[:, :max_k]
     s_out.description = 'eigenvalues of decomposition'
     Vh_dim = ('event', 'rank', lower_dim)
     Vh_out = group.createVariable('Vh', 'f', Vh_dim, zlib=True)
     Vh_out[:] = all_Vh[:, :max_k, :]
     Vh_out.description = 'transposed right-singular vectors of decompositon'
예제 #2
0
 def test_single_quadrant_disassembly(self):
     atm_n = self.compressed['state/HNO3/n/Q']
     q: Quadrant = Quadrant.for_disassembly('HNO3', 'n', atm_n)
     self.assertIsInstance(q, Quadrant)
     self.assertTupleEqual(q.transformed_shape(), (1, 29, 29))
     array = np.random.uniform(size=(29, 29))
     disassembly = q.transform(array, 23)
     self.assertTupleEqual(disassembly.shape, (23, 23))
예제 #3
0
 def test_two_quadrants_assembly(self):
     xavk = self.uncompressed['state/GHG/Tatmxavk']
     q: Quadrant = Quadrant.for_assembly('GHG', xavk.name, xavk)
     self.assertIsInstance(q, AssembleTwoQuadrants)
     self.assertTupleEqual(q.transformed_shape(), (1, 58, 29))
     array = np.random.uniform(size=(2, 29, 29))
     assembly = q.transform(array, 23)
     self.assertTupleEqual(assembly.shape, (46, 23))
예제 #4
0
 def __init__(self, group: Group):
     super().__init__(group)
     vars = group.variables.keys()
     assert 'Q' in vars and 's' in vars and 'k' in vars
     self.Q = group['Q']
     self.s = group['s']
     self.k = group['k']
     self.quadrant = Quadrant.for_disassembly(
         group.parent.name, group.name, self.Q)
예제 #5
0
 def test_single_quadrant_assembly(self):
     avk = self.uncompressed['state/HNO3/avk']
     q: Quadrant = Quadrant.for_assembly('HNO3', 'avk', avk)
     self.assertIsInstance(q, Quadrant)
     self.assertTupleEqual(q.transformed_shape(), (1, 29, 29))
     array = np.random.uniform(size=(29, 29))
     assembly = q.transform(array, 23)
     self.assertTupleEqual(assembly.shape, (23, 23))
     self.assertTrue(np.allclose(array[: 23, : 23], assembly))
예제 #6
0
 def _export_reconstruction(self, target: Dataset, array: np.ma.MaskedArray, quadrant: Quadrant):
     root = root_group_of(self.group)
     existing_dimensions = target.dimensions.keys()
     for name, dim in root.dimensions.items():
         if name in existing_dimensions:
             continue
         target.createDimension(
             name, len(dim) if not dim.isunlimited() else None)
     var = quadrant.create_variable(target, self.group.path)
     var[:] = array[:]
예제 #7
0
 def test_four_quadrants_assembly(self):
     avk = self.uncompressed['/state/WV/avk']
     q: Quadrant = Quadrant.for_assembly('WV', 'avk', avk)
     self.assertIsInstance(q, AssembleFourQuadrants)
     self.assertTupleEqual(q.transformed_shape(), (1, 58, 58))
     array = np.random.uniform(size=(2, 2, 29, 29))
     assembly = q.transform(array, 23)
     self.assertTupleEqual(assembly.shape, (46, 46))
     close = np.allclose(assembly[23:23*2, :23], array[0, 1, :23, :23])
     self.assertTrue(close, 'Four quadrant assembly not close')
예제 #8
0
 def test_two_quadrant_disassembly(self):
     xavk = self.compressed['state/GHG/Tatmxavk/U']
     q: Quadrant = Quadrant.for_disassembly('GHG', 'Tatmxavk', xavk)
     self.assertIsInstance(q, DisassembleTwoQuadrants)
     self.assertTupleEqual(q.transformed_shape(), (1, 2, 29, 29))
     array = np.arange(29*58).reshape(58, 29)
     disassembly = q.transform(array, 23)
     self.assertTupleEqual(disassembly.shape, (2, 23, 23))
     close = np.allclose(array[:23, :23], disassembly[0, :23, :23])
     self.assertTrue(close)
     close = np.allclose(array[29:52, :23], disassembly[1, :23, :23])
     self.assertTrue(close)
예제 #9
0
    def test_four_quadrant_disassembly(self):
        avk_rc = self.compressed['state/WV/avk/U']
        q: Quadrant = Quadrant.for_disassembly('WV', 'avk', avk_rc)

        self.assertIsInstance(q, DisassembleFourQuadrants)
        self.assertTupleEqual(q.transformed_shape(), (1, 2, 2, 29, 29))
        avk = self.uncompressed['state/WV/avk/']
        q_assembly = Quadrant.for_assembly('WV', 'avk', avk)
        array = np.arange(58*58).reshape(58, 58)
        disassembly = q.transform(array, 23)
        array_rc = q_assembly.transform(disassembly, 23)
        self.assertTupleEqual(disassembly.shape, (2, 2, 23, 23))
        close = np.allclose(array[29:52, 29:52], disassembly[1, 1, :23, :23])
        self.assertTrue(close)
        for i in range(2):
            for j in range(2):
                close = np.allclose(
                    array[i * 29:23 + i * 29, j * 29:23 + j * 29],
                    array_rc[i * 23:(i+1) * 23, j * 23:(j+1) * 23]
                )
                self.assertTrue(close)
예제 #10
0
    def report_for(self, variable: Variable, original, reconstructed,
                   rc_error) -> pd.DataFrame:
        # if not original.shape == reconstructed.shape:
        #     message = f'Different shape for {type(self).__name__} {variable.name}: original {original.shape}, reconstructed {reconstructed.shape}'
        #     logger.error(message)
        #     raise ValueError(message)
        result = {
            'event': [],
            'level_of_interest': [],
            'err': [],
            'rc_error': [],
            'type': []
        }
        error_estimation_methods = {
            'avk': self.averaging_kernel,
            'n': self.noise_matrix,
            'Tatmxavk': self.cross_averaging_kernel
        }
        estimation_method = error_estimation_methods.get(variable.name)
        if estimation_method is None:
            raise ValueError(
                f'No error estimation method for variable {variable.name}')

        reshaper = Quadrant.for_assembly(self.gas, variable.name, variable)
        path = f'/state/{self.gas}/{variable.name}'
        for event in range(original.shape[0]):
            if np.ma.is_masked(self.nol[event]) or self.nol.data[event] > 29:
                continue
            nol_event = self.nol.data[event]
            if not self.matrix_ok(event, path, self.alt[event, :nol_event]):
                continue
            covariance = Covariance(nol_event, self.alt[event])
            original_event = reshaper.transform(original[event], nol_event)
            if not self.matrix_ok(event, path, original_event):
                continue
            # use reconstruced values iff rc_error flag is set
            if rc_error:
                rc_event = reshaper.transform(reconstructed[event], nol_event)
                if not self.matrix_ok(event, path, rc_event):
                    continue
                rc_event = rc_event.data
            else:
                rc_event = None
            if isinstance(self, WaterVapour):
                avk_event = AssembleFourQuadrants(nol_event).transform(
                    self.avk[event], nol_event)
                if not self.matrix_ok(event, 'wv_avk', avk_event):
                    continue
                avk_event = avk_event.data
            else:
                avk_event = None
            # type two error only exists for water vapour
            # if gas does not require type 2 error estimation, break loop after first iteration
            calc_type_two = self.type_two
            while True:
                error = estimation_method(event,
                                          original_event.data,
                                          rc_event,
                                          covariance,
                                          type2=calc_type_two,
                                          avk=avk_event)
                for loi in self.levels_of_interest:
                    # zero == surface (special value)
                    if loi == 0:
                        level = 0
                    # for other levels substract from highest level
                    else:
                        level = nol_event + loi
                        if level < 2:
                            continue
                    result['event'].append(event)
                    result['level_of_interest'].append(loi)
                    result['err'].append(error[level, level])
                    result['rc_error'].append(rc_error)
                    result['type'].append(2 if calc_type_two else 1)
                    if self.gas == 'GHG':
                        # for greenhouse gases export also CH4 (lower right quadrant)
                        # nol as index offset for error level
                        result['event'].append(event)
                        result['level_of_interest'].append(loi - 29)
                        result['err'].append(error[level + nol_event,
                                                   level + nol_event])
                        result['rc_error'].append(rc_error)
                        result['type'].append(2 if calc_type_two else 1)
                # stop if type 1 is calculated
                if not calc_type_two:
                    break
                # just finished type 2 in first iteration -> repeat with type 1
                calc_type_two = False
        return pd.DataFrame(result)
예제 #11
0
 def decompose(self, output: Dataset, levels: np.ma.MaskedArray) -> np.ma.MaskedArray:
     q: Quadrant = Quadrant.for_assembly(self.group.name, self.var.name, self.var)
     events, _, bound = q.transformed_shape()
     # should be always the same because reshaped variable is square
     all_Q = np.ma.masked_all((events, bound, bound))
     all_s = np.ma.masked_all((events, bound))
     all_k = np.ma.masked_all((events))
     max_k = 0
     for event in range(self.var.shape[0]):
         if np.ma.is_masked(levels[event]) or levels.data[event] > 29:
             continue
         level = int(levels.data[event])
         # reduce array dimensions
         matrix = q.transform(self.var[event][...], level)
         if not self.matrix_ok(event, self.target_path(), matrix):
             continue
         # test if nearlly symmetric
         matrix = matrix.data
         if not np.allclose(matrix, matrix.T):
             raise ValueError(
                 f'Noise matrix is not symmeric for {self.target_path()}:{event}')
         # make matrix symmetric by fixing rounding errors
         matrix = (matrix + matrix.T) / 2
         # decompose matrix
         eigenvalues, eigenvectors = np.linalg.eig(matrix)
         # should not be complex anymore because matrix is symmetric (see above)
         if np.iscomplex(eigenvalues).any():
             raise ValueError(f'Eigenvalues are complex for {self.target_path()}:{event}')
         if np.iscomplex(eigenvectors).any():
             raise ValueError(
                 f'Eigenvectors are complex for {self.target_path()}:{event}')
         # covarinace maticies are postive semi definite
         # this implies that eigenvalues are positive
         # unfortuenately, due to floating point errors this is not garantueed
         # to address this problem we assume negative eigenpairs as negligible and filter them
         selected_eigenvalues = []
         selected_eigenvectors = []
         max_eigenvalue = eigenvalues.max()
         for value, vector in zip(eigenvalues, eigenvectors.T):
             if value > 0 and (max_eigenvalue * self.threshold < value):
                 selected_eigenvalues.append(value)
                 selected_eigenvectors.append(vector)
         k = len(selected_eigenvalues)
         max_k = max(k, max_k)
         selected_eigenvectors = np.array(selected_eigenvectors).T
         all_k[event] = k
         all_Q[event][:selected_eigenvectors.shape[0],
                         :k] = selected_eigenvectors[:, :k]
         all_s[event][:k] = selected_eigenvalues
     # write all to output
     dimension_name, _ = q.upper_and_lower_dimension()
     target_group = self.create_target_group(output)
     target_group.createDimension('rank', size=max_k)
     target_group.description = f'eigen decomposition of {self.var.description}. reconstruction with (Q * s).dot(Q.T)'
     k_out = target_group.createVariable('k', 'i1', ('event'))
     k_out[:] = all_k[:]
     k_out.description = 'target rank of decomposition (number of eigenvalues)'
     Q_dim = ('event', dimension_name, 'rank')
     Q_out = target_group.createVariable('Q', 'f', Q_dim, zlib=True)
     Q_out[:] = all_Q[:, :, :max_k]
     Q_out.description = 'eigenvectors of noise matrix'
     s_dim = ('event', 'rank')
     s_out = target_group.createVariable('s', 'f', s_dim, zlib=True)
     s_out[:] = all_s[:, :max_k]
     s_out.description = 'eigenvalues of noise matrix'