def test_get_scaling_factor(): new_event = namedtuple("named_dict", ["keys", "threshold"]) event = new_event(["one_random_key"], 0.95) np.random.seed(123) input_matrix = np.random.rand(10, 10) matrix = DataMatrix(pd.DataFrame(data=input_matrix)) assert matrix.get_scaling_factor(event) == np.sqrt(10 / 6.0)
def test_std_normalization(): input_matrix = pd.DataFrame(np.ones((3, 3))) input_matrix.loc["OBS"] = np.ones(3) input_matrix.loc["STD"] = np.ones(3) * 0.1 expected_matrix = [[10.0, 10.0, 10.0], [10.0, 10.0, 10.0], [10.0, 10.0, 10.0]] matrix = DataMatrix(pd.concat({"A_KEY": input_matrix}, axis=1)) result = matrix.get_normalized_by_std() assert (result.loc[[0, 1, 2]].values == expected_matrix).all()
def scale(self, measured_data): """ Collects data, performs scaling and applies scaling, assumes validated input. """ config = self._config.snapshot measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha) measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff) matrix = DataMatrix(measured_data.data) matrix.normalize_by_std() scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS) events = config.CALCULATE_KEYS data_matrix = matrix.get_data_matrix() nr_components, singular_values = matrix.get_nr_primary_components( threshold=events.threshold) self._reporter.publish("svd", list(singular_values)) nr_observations = data_matrix.shape[1] logging.info("Scaling factor calculated from {}".format(events.keys)) scale_factor = np.sqrt(nr_observations / float(nr_components)) self._reporter.publish("scale_factor", scale_factor) update_data = create_active_lists(self._obs, config.UPDATE_KEYS.keys) self._update_scaling(self._obs, scale_factor, update_data)
def scale(self, measured_data): """ Collects data, performs scaling and applies scaling, assumes validated input. """ config = self._config.snapshot measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha) measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff) matrix = DataMatrix(measured_data.data) matrix.std_normalization(inplace=True) scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS) update_data = create_active_lists(self._obs, config.UPDATE_KEYS.keys) self._update_scaling(self._obs, scale_factor, update_data)
def _observation_scaling(facade, config): """ Collects data, performs scaling and applies scaling, assumes validated input. """ calculate_keys = [event.key for event in config.CALCULATE_KEYS.keys] index_lists = [event.index for event in config.CALCULATE_KEYS.keys] measured_data = MeasuredData(facade, calculate_keys, index_lists) measured_data.remove_failed_realizations() measured_data.remove_inactive_observations() measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha) measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff) matrix = DataMatrix(measured_data.data) matrix.std_normalization(inplace=True) scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS) update_data = _create_active_lists(facade.get_observations(), config.UPDATE_KEYS.keys) _update_scaling(facade.get_observations(), scale_factor, update_data)
def test_get_nr_primary_components(threshold, expected_result): np.random.seed(123) input_matrix = np.random.rand(10, 10) matrix = DataMatrix(pd.DataFrame(data=input_matrix)) components, _ = matrix.get_nr_primary_components(threshold) assert components == expected_result
def test_get_nr_primary_components(threshold, expected_result): np.random.seed(123) input_matrix = np.random.rand(10, 10) components, _ = DataMatrix._get_nr_primary_components( input_matrix, threshold) assert components == expected_result