Ejemplo n.º 1
0
def test_problem_residuals(problem: Problem):
    problem.calculate_residual()
    if problem.grouped:
        assert isinstance(problem.residuals, list)
        assert all(isinstance(r, np.ndarray) for r in problem.residuals)
        assert len(problem.residuals) == suite.e_axis.size
    else:
        assert isinstance(problem.residuals, dict)
        assert "dataset1" in problem.residuals
        assert all(
            isinstance(r, np.ndarray) for r in problem.residuals["dataset1"])
        assert len(problem.residuals["dataset1"]) == suite.e_axis.size
    assert isinstance(problem.reduced_clps, dict)
    assert "dataset1" in problem.reduced_clps
    assert all(
        isinstance(c, np.ndarray) for c in problem.reduced_clps["dataset1"])
    assert len(problem.reduced_clps["dataset1"]) == suite.e_axis.size
    assert isinstance(problem.clps, dict)
    assert "dataset1" in problem.clps
    assert all(isinstance(c, np.ndarray) for c in problem.clps["dataset1"])
    assert len(problem.clps["dataset1"]) == suite.e_axis.size
    assert isinstance(problem.additional_penalty, np.ndarray)
    assert problem.additional_penalty.size == 1
    assert problem.additional_penalty[0] == 0.1
    assert isinstance(problem.full_penalty, np.ndarray)
    assert (
        problem.full_penalty.size == (suite.c_axis.size * suite.e_axis.size) +
        problem.additional_penalty.size)
Ejemplo n.º 2
0
def _create_result(
    problem: Problem,
    ls_result: OptimizeResult | None,
    free_parameter_labels: list[str],
    termination_reason: str,
) -> Result:

    success = ls_result is not None

    number_of_function_evaluation = (ls_result.nfev if ls_result is not None
                                     else len(problem.parameter_history))
    number_of_jacobian_evaluation = ls_result.njev if success else None
    optimality = ls_result.optimality if success else None
    number_of_data_points = ls_result.fun.size if success else None
    number_of_variables = ls_result.x.size if success else None
    degrees_of_freedom = number_of_data_points - number_of_variables if success else None
    chi_square = np.sum(ls_result.fun**2) if success else None
    reduced_chi_square = chi_square / degrees_of_freedom if success else None
    root_mean_square_error = np.sqrt(reduced_chi_square) if success else None
    jacobian = ls_result.jac if success else None

    problem.save_parameters_for_history()
    history_index = None if success else -2
    data = problem.create_result_data(history_index=history_index)
    # the optimized parameters are those of the last run if the optimization has crashed
    parameters = problem.parameters
    covariance_matrix = None
    if success:
        try:
            covariance_matrix = np.linalg.inv(jacobian.T.dot(jacobian))
            standard_errors = np.sqrt(np.diagonal(covariance_matrix))
            for label, error in zip(free_parameter_labels, standard_errors):
                parameters.get(label).standard_error = error
        except np.linalg.LinAlgError:
            warn(
                "The resulting Jacobian is singular, cannot compute covariance matrix and "
                "standard errors.")

    return Result(
        additional_penalty=problem.additional_penalty,
        cost=problem.cost,
        data=data,
        free_parameter_labels=free_parameter_labels,
        number_of_function_evaluations=number_of_function_evaluation,
        initial_parameters=problem.scheme.parameters,
        optimized_parameters=parameters,
        scheme=problem.scheme,
        success=success,
        termination_reason=termination_reason,
        chi_square=chi_square,
        covariance_matrix=covariance_matrix,
        degrees_of_freedom=degrees_of_freedom,
        jacobian=jacobian,
        number_of_data_points=number_of_data_points,
        number_of_jacobian_evaluations=number_of_jacobian_evaluation,
        number_of_variables=number_of_variables,
        optimality=optimality,
        reduced_chi_square=reduced_chi_square,
        root_mean_square_error=root_mean_square_error,
    )
Ejemplo n.º 3
0
def test_problem_matrices(problem: Problem):
    problem.calculate_matrices()

    if problem.grouped:
        if problem.index_dependent:
            assert all(isinstance(m, list) for m in problem.reduced_clp_labels)
            assert all(
                isinstance(m, np.ndarray) for m in problem.reduced_matrices)
            assert len(problem.reduced_clp_labels) == suite.e_axis.size
            assert len(problem.reduced_matrices) == suite.e_axis.size
        else:
            assert "dataset1" in problem.reduced_clp_labels
            assert "dataset1" in problem.reduced_matrices
            assert isinstance(problem.reduced_clp_labels["dataset1"], list)
            assert isinstance(problem.reduced_matrices["dataset1"], np.ndarray)
    else:
        if problem.index_dependent:
            assert isinstance(problem.reduced_clp_labels, dict)
            assert isinstance(problem.reduced_matrices, dict)
            assert isinstance(problem.reduced_matrices["dataset1"], list)
            assert all(
                isinstance(c, list)
                for c in problem.reduced_clp_labels["dataset1"])
            assert all(
                isinstance(m, np.ndarray)
                for m in problem.reduced_matrices["dataset1"])
        else:
            assert isinstance(problem.reduced_matrices["dataset1"], np.ndarray)

        assert isinstance(problem.clp_labels, dict)
        assert isinstance(problem.matrices, dict)
        assert isinstance(problem.reduced_clp_labels["dataset1"], list)
        assert "dataset1" in problem.reduced_clp_labels
        assert "dataset1" in problem.reduced_matrices
Ejemplo n.º 4
0
def _calculate_penalty(parameters: np.ndarray,
                       free_parameter_labels: list[str] = None,
                       problem: Problem = None):
    problem.save_parameters_for_history()
    problem.parameters.set_from_label_and_value_arrays(free_parameter_labels,
                                                       parameters)
    problem.reset()
    return problem.full_penalty
Ejemplo n.º 5
0
def optimize_problem(problem: Problem, verbose: bool = True) -> Result:

    if problem.scheme.optimization_method not in SUPPORTED_METHODS:
        raise ValueError(
            f"Unsupported optimization method {problem.scheme.optimization_method}. "
            f"Supported methods are '{list(SUPPORTED_METHODS.keys())}'")

    (
        free_parameter_labels,
        initial_parameter,
        lower_bounds,
        upper_bounds,
    ) = problem.scheme.parameters.get_label_value_and_bounds_arrays(
        exclude_non_vary=True)
    method = SUPPORTED_METHODS[problem.scheme.optimization_method]
    nfev = problem.scheme.maximum_number_function_evaluations
    ftol = problem.scheme.ftol
    gtol = problem.scheme.gtol
    xtol = problem.scheme.xtol
    verbose = 2 if verbose else 0
    termination_reason = ""

    try:
        ls_result = least_squares(
            _calculate_penalty,
            initial_parameter,
            bounds=(lower_bounds, upper_bounds),
            method=method,
            max_nfev=nfev,
            verbose=verbose,
            ftol=ftol,
            gtol=gtol,
            xtol=xtol,
            kwargs={
                "free_parameter_labels": free_parameter_labels,
                "problem": problem
            },
        )
        termination_reason = ls_result.message
    except Exception as e:
        warn(f"Optimization failed:\n\n{e}")
        termination_reason = str(e)
        ls_result = None

    problem.save_parameters_for_history()

    return _create_result(problem, ls_result, free_parameter_labels,
                          termination_reason)
Ejemplo n.º 6
0
def test_multi_dataset_overlap():
    model = MockModel.from_dict({
        "dataset": {
            "dataset1": {
                "megacomplex": [],
            },
            "dataset2": {
                "megacomplex": [],
            },
        }
    })

    model.grouped = lambda: True
    print(model.validate())
    assert model.valid()
    assert model.grouped()

    parameters = ParameterGroup.from_list([1, 10])
    print(model.validate(parameters))
    assert model.valid(parameters)

    axis_e_1 = [1, 2, 3, 5]
    axis_c_1 = [5, 7]
    axis_e_2 = [0, 1.4, 2.4, 3.4, 9]
    axis_c_2 = [5, 7, 9, 12]
    data = {
        "dataset1":
        xr.DataArray(np.ones((4, 2)),
                     coords=[("e", axis_e_1),
                             ("c", axis_c_1)]).to_dataset(name="data"),
        "dataset2":
        xr.DataArray(np.ones((5, 4)),
                     coords=[("e", axis_e_2),
                             ("c", axis_c_2)]).to_dataset(name="data"),
    }

    scheme = Scheme(model, parameters, data, group_tolerance=5e-1)
    problem = Problem(scheme)
    bag = list(problem.bag)
    assert len(problem.groups) == 3
    assert "dataset1dataset2" in problem.groups
    assert problem.groups["dataset1dataset2"] == ["dataset1", "dataset2"]
    assert len(bag) == 6

    assert all(p.data.size == 4 for p in bag[:1])
    assert all(p.descriptor[0].label == "dataset1" for p in bag[1:5])
    assert all(all(p.descriptor[0].axis == axis_c_1) for p in bag[1:5])
    assert [p.descriptor[0].index for p in bag[1:5]] == axis_e_1

    assert all(p.data.size == 6 for p in bag[1:4])
    assert all(p.descriptor[1].label == "dataset2" for p in bag[1:4])
    assert all(all(p.descriptor[1].axis == axis_c_2) for p in bag[1:4])
    assert [p.descriptor[1].index for p in bag[1:4]] == axis_e_2[1:4]

    assert all(p.data.size == 4 for p in bag[5:])
    assert bag[4].descriptor[0].label == "dataset1"
    assert bag[5].descriptor[0].label == "dataset2"
    assert np.array_equal(bag[4].descriptor[0].axis, axis_c_1)
    assert np.array_equal(bag[5].descriptor[0].axis, axis_c_2)
    assert [p.descriptor[0].index for p in bag[1:4]] == axis_e_1[:-1]
Ejemplo n.º 7
0
def test_single_dataset():
    model = MockModel.from_dict(
        {"dataset": {
            "dataset1": {
                "megacomplex": [],
            },
        }})
    model.grouped = lambda: True
    print(model.validate())
    assert model.valid()
    assert model.grouped()

    parameters = ParameterGroup.from_list([1, 10])
    print(model.validate(parameters))
    assert model.valid(parameters)
    axis_e = [1, 2, 3]
    axis_c = [5, 7, 9, 12]

    data = {
        "dataset1":
        xr.DataArray(np.ones((3, 4)),
                     coords=[("e", axis_e),
                             ("c", axis_c)]).to_dataset(name="data")
    }

    scheme = Scheme(model, parameters, data)
    problem = Problem(scheme)
    bag = problem.bag
    datasets = problem.groups
    assert len(datasets) == 1
    assert len(bag) == 3
    assert all(p.data.size == 4 for p in bag)
    assert all(p.descriptor[0].label == "dataset1" for p in bag)
    assert all(all(p.descriptor[0].axis == axis_c) for p in bag)
    assert [p.descriptor[0].index for p in bag] == axis_e
Ejemplo n.º 8
0
def test_multi_dataset_no_overlap():
    model = MockModel.from_dict({
        "dataset": {
            "dataset1": {
                "megacomplex": [],
            },
            "dataset2": {
                "megacomplex": [],
            },
        }
    })

    model.grouped = lambda: True
    print(model.validate())
    assert model.valid()
    assert model.grouped()

    parameters = ParameterGroup.from_list([1, 10])
    print(model.validate(parameters))
    assert model.valid(parameters)

    axis_e_1 = [1, 2, 3]
    axis_c_1 = [5, 7]
    axis_e_2 = [4, 5, 6]
    axis_c_2 = [5, 7, 9]
    data = {
        "dataset1":
        xr.DataArray(np.ones((3, 2)),
                     coords=[("e", axis_e_1),
                             ("c", axis_c_1)]).to_dataset(name="data"),
        "dataset2":
        xr.DataArray(np.ones((3, 3)),
                     coords=[("e", axis_e_2),
                             ("c", axis_c_2)]).to_dataset(name="data"),
    }

    scheme = Scheme(model, parameters, data)
    problem = Problem(scheme)
    bag = list(problem.bag)
    assert len(problem.groups) == 2
    assert len(bag) == 6
    assert all(p.data.size == 2 for p in bag[:3])
    assert all(p.descriptor[0].label == "dataset1" for p in bag[:3])
    assert all(all(p.descriptor[0].axis == axis_c_1) for p in bag[:3])
    assert [p.descriptor[0].index for p in bag[:3]] == axis_e_1

    assert all(p.data.size == 3 for p in bag[3:])
    assert all(p.descriptor[0].label == "dataset2" for p in bag[3:])
    assert all(all(p.descriptor[0].axis == axis_c_2) for p in bag[3:])
    assert [p.descriptor[0].index for p in bag[3:]] == axis_e_2
Ejemplo n.º 9
0
def test_problem_result_data(problem: Problem):

    data = problem.create_result_data()

    assert "dataset1" in data

    dataset = data["dataset1"]

    assert "clp_label" in dataset.coords
    assert np.array_equal(dataset.clp_label, ["s1", "s2", "s3", "s4"])

    assert problem.model.global_dimension in dataset.coords
    assert np.array_equal(dataset.coords[problem.model.global_dimension],
                          suite.e_axis)

    assert problem.model.model_dimension in dataset.coords
    assert np.array_equal(dataset.coords[problem.model.model_dimension],
                          suite.c_axis)

    assert "matrix" in dataset
    matrix = dataset.matrix
    if problem.index_dependent:
        assert len(matrix.shape) == 3
        assert matrix.shape[0] == suite.e_axis.size
        assert matrix.shape[1] == suite.c_axis.size
        assert matrix.shape[2] == 4
    else:
        assert len(matrix.shape) == 2
        assert matrix.shape[0] == suite.c_axis.size
        assert matrix.shape[1] == 4

    assert "clp" in dataset
    clp = dataset.clp
    assert len(clp.shape) == 2
    assert clp.shape[0] == suite.e_axis.size
    assert clp.shape[1] == 4

    assert "weighted_residual" in dataset
    assert dataset.data.shape == dataset.weighted_residual.shape

    assert "residual" in dataset
    assert dataset.data.shape == dataset.residual.shape

    assert "residual_singular_values" in dataset
    assert "weighted_residual_singular_values" in dataset
Ejemplo n.º 10
0
def problem(request) -> Problem:
    model = suite.model
    model.is_grouped = request.param[0]
    model.is_index_dependent = request.param[1]

    dataset = simulate(
        suite.sim_model,
        "dataset1",
        suite.wanted_parameters,
        {
            "e": suite.e_axis,
            "c": suite.c_axis
        },
    )
    scheme = Scheme(model=model,
                    parameters=suite.initial_parameters,
                    data={"dataset1": dataset})
    return Problem(scheme)
Ejemplo n.º 11
0
def optimize(scheme: Scheme, verbose: bool = True) -> Result:
    problem = Problem(scheme)
    return optimize_problem(problem, verbose=verbose)
    dataset = read_data_file(data_path)
    model = read_model_from_yaml_file(model_path)
    parameter = read_parameters_from_yaml_file(parameter_path)
    scheme = Scheme(
        model,
        parameter,
        {"dataset1": dataset},
        maximum_number_function_evaluations=9,
        non_negative_least_squares=True,
    )

    print(model.validate(parameters=parameter))

    # The problem is constructed automatically from the scheme by the optimize call,
    # but can also be created manually for debug purposes:
    test_problem = Problem(scheme)

    # %%
    start = timer()
    # Warning: this may take a while (several seconds per iteration)
    result = optimize(scheme, verbose=True)
    end = timer()
    print(f"Total time: {end - start}")

    result.save(str(output_folder))
    end2 = timer()
    print(f"Saving took: {end2 - end}")

    # %%
    print(result.markdown(True))
Ejemplo n.º 13
0
def test_prepare_data():
    model_dict = {
        "dataset": {
            "dataset1": {
                "megacomplex": [],
            },
        },
        "weights": [
            {
                "datasets": ["dataset1"],
                "global_interval": (np.inf, 200),
                "model_interval": (4, 8),
                "value": 0.5,
            },
        ],
    }
    model = SimpleTestModel.from_dict(model_dict)
    print(model.validate())
    assert model.valid()

    parameters = ParameterGroup.from_list([])

    global_axis = np.asarray(range(50, 300))
    model_axis = np.asarray(range(15))

    dataset = xr.DataArray(
        np.ones((global_axis.size, model_axis.size)),
        coords={
            "e": global_axis,
            "c": model_axis
        },
        dims=("e", "c"),
    )

    scheme = Scheme(model, parameters, {"dataset1": dataset})
    problem = Problem(scheme)

    data = problem.data["dataset1"]
    print(data)
    assert "data" in data
    assert "weight" in data

    assert data.data.shape == (model_axis.size, global_axis.size)
    assert data.data.shape == data.weight.shape
    assert np.all(
        data.weight.sel(e=slice(0, 200), c=slice(4, 8)).values == 0.5)
    assert np.all(data.weight.sel(c=slice(0, 3)).values == 1)

    model_dict["weights"].append({
        "datasets": ["dataset1"],
        "value": 0.2,
    })
    model = SimpleTestModel.from_dict(model_dict)
    print(model.validate())
    assert model.valid()

    scheme = Scheme(model, parameters, {"dataset1": dataset})
    problem = Problem(scheme)
    data = problem.data["dataset1"]
    assert np.all(
        data.weight.sel(e=slice(0, 200), c=slice(4, 8)).values == 0.5 * 0.2)
    assert np.all(data.weight.sel(c=slice(0, 3)).values == 0.2)

    with pytest.warns(
            UserWarning,
            match="Ignoring model weight for dataset 'dataset1'"
            " because weight is already supplied by dataset.",
    ):
        Problem(Scheme(model, parameters, {"dataset1": data}))