예제 #1
0
def test_get_scaling_factor():
    new_event = namedtuple("named_dict", ["keys", "threshold"])
    event = new_event(["one_random_key"], 0.95)
    np.random.seed(123)
    input_matrix = np.random.rand(10, 10)

    matrix = DataMatrix(pd.DataFrame(data=input_matrix))
    assert matrix.get_scaling_factor(event) == np.sqrt(10 / 6.0)
예제 #2
0
def test_std_normalization():
    input_matrix = pd.DataFrame(np.ones((3, 3)))
    input_matrix.loc["OBS"] = np.ones(3)
    input_matrix.loc["STD"] = np.ones(3) * 0.1
    expected_matrix = [[10.0, 10.0, 10.0], [10.0, 10.0, 10.0],
                       [10.0, 10.0, 10.0]]
    matrix = DataMatrix(pd.concat({"A_KEY": input_matrix}, axis=1))
    result = matrix.get_normalized_by_std()
    assert (result.loc[[0, 1, 2]].values == expected_matrix).all()
예제 #3
0
파일: job.py 프로젝트: hnformentin/semeio
    def scale(self, measured_data):
        """
        Collects data, performs scaling and applies scaling, assumes validated input.
        """
        config = self._config.snapshot

        measured_data.remove_failed_realizations()
        measured_data.remove_inactive_observations()
        measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha)
        measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff)

        matrix = DataMatrix(measured_data.data)
        matrix.normalize_by_std()

        scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS)
        events = config.CALCULATE_KEYS
        data_matrix = matrix.get_data_matrix()
        nr_components, singular_values = matrix.get_nr_primary_components(
            threshold=events.threshold)
        self._reporter.publish("svd", list(singular_values))
        nr_observations = data_matrix.shape[1]

        logging.info("Scaling factor calculated from {}".format(events.keys))

        scale_factor = np.sqrt(nr_observations / float(nr_components))
        self._reporter.publish("scale_factor", scale_factor)

        update_data = create_active_lists(self._obs, config.UPDATE_KEYS.keys)
        self._update_scaling(self._obs, scale_factor, update_data)
예제 #4
0
    def scale(self, measured_data):
        """
        Collects data, performs scaling and applies scaling, assumes validated input.
        """
        config = self._config.snapshot

        measured_data.remove_failed_realizations()
        measured_data.remove_inactive_observations()
        measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha)
        measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff)

        matrix = DataMatrix(measured_data.data)
        matrix.std_normalization(inplace=True)

        scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS)

        update_data = create_active_lists(self._obs, config.UPDATE_KEYS.keys)
        self._update_scaling(self._obs, scale_factor, update_data)
예제 #5
0
def _observation_scaling(facade, config):
    """
    Collects data, performs scaling and applies scaling, assumes validated input.
    """
    calculate_keys = [event.key for event in config.CALCULATE_KEYS.keys]
    index_lists = [event.index for event in config.CALCULATE_KEYS.keys]
    measured_data = MeasuredData(facade, calculate_keys, index_lists)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha)
    measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff)

    matrix = DataMatrix(measured_data.data)
    matrix.std_normalization(inplace=True)

    scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS)

    update_data = _create_active_lists(facade.get_observations(),
                                       config.UPDATE_KEYS.keys)

    _update_scaling(facade.get_observations(), scale_factor, update_data)
예제 #6
0
def test_get_nr_primary_components(threshold, expected_result):
    np.random.seed(123)
    input_matrix = np.random.rand(10, 10)
    matrix = DataMatrix(pd.DataFrame(data=input_matrix))
    components, _ = matrix.get_nr_primary_components(threshold)
    assert components == expected_result
예제 #7
0
def test_get_nr_primary_components(threshold, expected_result):
    np.random.seed(123)
    input_matrix = np.random.rand(10, 10)
    components, _ = DataMatrix._get_nr_primary_components(
        input_matrix, threshold)
    assert components == expected_result