def test_problem_residuals(problem: Problem): problem.calculate_residual() if problem.grouped: assert isinstance(problem.residuals, list) assert all(isinstance(r, np.ndarray) for r in problem.residuals) assert len(problem.residuals) == suite.e_axis.size else: assert isinstance(problem.residuals, dict) assert "dataset1" in problem.residuals assert all( isinstance(r, np.ndarray) for r in problem.residuals["dataset1"]) assert len(problem.residuals["dataset1"]) == suite.e_axis.size assert isinstance(problem.reduced_clps, dict) assert "dataset1" in problem.reduced_clps assert all( isinstance(c, np.ndarray) for c in problem.reduced_clps["dataset1"]) assert len(problem.reduced_clps["dataset1"]) == suite.e_axis.size assert isinstance(problem.clps, dict) assert "dataset1" in problem.clps assert all(isinstance(c, np.ndarray) for c in problem.clps["dataset1"]) assert len(problem.clps["dataset1"]) == suite.e_axis.size assert isinstance(problem.additional_penalty, np.ndarray) assert problem.additional_penalty.size == 1 assert problem.additional_penalty[0] == 0.1 assert isinstance(problem.full_penalty, np.ndarray) assert ( problem.full_penalty.size == (suite.c_axis.size * suite.e_axis.size) + problem.additional_penalty.size)
def _create_result( problem: Problem, ls_result: OptimizeResult | None, free_parameter_labels: list[str], termination_reason: str, ) -> Result: success = ls_result is not None number_of_function_evaluation = (ls_result.nfev if ls_result is not None else len(problem.parameter_history)) number_of_jacobian_evaluation = ls_result.njev if success else None optimality = ls_result.optimality if success else None number_of_data_points = ls_result.fun.size if success else None number_of_variables = ls_result.x.size if success else None degrees_of_freedom = number_of_data_points - number_of_variables if success else None chi_square = np.sum(ls_result.fun**2) if success else None reduced_chi_square = chi_square / degrees_of_freedom if success else None root_mean_square_error = np.sqrt(reduced_chi_square) if success else None jacobian = ls_result.jac if success else None problem.save_parameters_for_history() history_index = None if success else -2 data = problem.create_result_data(history_index=history_index) # the optimized parameters are those of the last run if the optimization has crashed parameters = problem.parameters covariance_matrix = None if success: try: covariance_matrix = np.linalg.inv(jacobian.T.dot(jacobian)) standard_errors = np.sqrt(np.diagonal(covariance_matrix)) for label, error in zip(free_parameter_labels, standard_errors): parameters.get(label).standard_error = error except np.linalg.LinAlgError: warn( "The resulting Jacobian is singular, cannot compute covariance matrix and " "standard errors.") return Result( additional_penalty=problem.additional_penalty, cost=problem.cost, data=data, free_parameter_labels=free_parameter_labels, number_of_function_evaluations=number_of_function_evaluation, initial_parameters=problem.scheme.parameters, optimized_parameters=parameters, scheme=problem.scheme, success=success, termination_reason=termination_reason, chi_square=chi_square, covariance_matrix=covariance_matrix, degrees_of_freedom=degrees_of_freedom, jacobian=jacobian, number_of_data_points=number_of_data_points, number_of_jacobian_evaluations=number_of_jacobian_evaluation, number_of_variables=number_of_variables, optimality=optimality, reduced_chi_square=reduced_chi_square, root_mean_square_error=root_mean_square_error, )
def test_problem_matrices(problem: Problem): problem.calculate_matrices() if problem.grouped: if problem.index_dependent: assert all(isinstance(m, list) for m in problem.reduced_clp_labels) assert all( isinstance(m, np.ndarray) for m in problem.reduced_matrices) assert len(problem.reduced_clp_labels) == suite.e_axis.size assert len(problem.reduced_matrices) == suite.e_axis.size else: assert "dataset1" in problem.reduced_clp_labels assert "dataset1" in problem.reduced_matrices assert isinstance(problem.reduced_clp_labels["dataset1"], list) assert isinstance(problem.reduced_matrices["dataset1"], np.ndarray) else: if problem.index_dependent: assert isinstance(problem.reduced_clp_labels, dict) assert isinstance(problem.reduced_matrices, dict) assert isinstance(problem.reduced_matrices["dataset1"], list) assert all( isinstance(c, list) for c in problem.reduced_clp_labels["dataset1"]) assert all( isinstance(m, np.ndarray) for m in problem.reduced_matrices["dataset1"]) else: assert isinstance(problem.reduced_matrices["dataset1"], np.ndarray) assert isinstance(problem.clp_labels, dict) assert isinstance(problem.matrices, dict) assert isinstance(problem.reduced_clp_labels["dataset1"], list) assert "dataset1" in problem.reduced_clp_labels assert "dataset1" in problem.reduced_matrices
def _calculate_penalty(parameters: np.ndarray, free_parameter_labels: list[str] = None, problem: Problem = None): problem.save_parameters_for_history() problem.parameters.set_from_label_and_value_arrays(free_parameter_labels, parameters) problem.reset() return problem.full_penalty
def optimize_problem(problem: Problem, verbose: bool = True) -> Result: if problem.scheme.optimization_method not in SUPPORTED_METHODS: raise ValueError( f"Unsupported optimization method {problem.scheme.optimization_method}. " f"Supported methods are '{list(SUPPORTED_METHODS.keys())}'") ( free_parameter_labels, initial_parameter, lower_bounds, upper_bounds, ) = problem.scheme.parameters.get_label_value_and_bounds_arrays( exclude_non_vary=True) method = SUPPORTED_METHODS[problem.scheme.optimization_method] nfev = problem.scheme.maximum_number_function_evaluations ftol = problem.scheme.ftol gtol = problem.scheme.gtol xtol = problem.scheme.xtol verbose = 2 if verbose else 0 termination_reason = "" try: ls_result = least_squares( _calculate_penalty, initial_parameter, bounds=(lower_bounds, upper_bounds), method=method, max_nfev=nfev, verbose=verbose, ftol=ftol, gtol=gtol, xtol=xtol, kwargs={ "free_parameter_labels": free_parameter_labels, "problem": problem }, ) termination_reason = ls_result.message except Exception as e: warn(f"Optimization failed:\n\n{e}") termination_reason = str(e) ls_result = None problem.save_parameters_for_history() return _create_result(problem, ls_result, free_parameter_labels, termination_reason)
def test_multi_dataset_overlap(): model = MockModel.from_dict({ "dataset": { "dataset1": { "megacomplex": [], }, "dataset2": { "megacomplex": [], }, } }) model.grouped = lambda: True print(model.validate()) assert model.valid() assert model.grouped() parameters = ParameterGroup.from_list([1, 10]) print(model.validate(parameters)) assert model.valid(parameters) axis_e_1 = [1, 2, 3, 5] axis_c_1 = [5, 7] axis_e_2 = [0, 1.4, 2.4, 3.4, 9] axis_c_2 = [5, 7, 9, 12] data = { "dataset1": xr.DataArray(np.ones((4, 2)), coords=[("e", axis_e_1), ("c", axis_c_1)]).to_dataset(name="data"), "dataset2": xr.DataArray(np.ones((5, 4)), coords=[("e", axis_e_2), ("c", axis_c_2)]).to_dataset(name="data"), } scheme = Scheme(model, parameters, data, group_tolerance=5e-1) problem = Problem(scheme) bag = list(problem.bag) assert len(problem.groups) == 3 assert "dataset1dataset2" in problem.groups assert problem.groups["dataset1dataset2"] == ["dataset1", "dataset2"] assert len(bag) == 6 assert all(p.data.size == 4 for p in bag[:1]) assert all(p.descriptor[0].label == "dataset1" for p in bag[1:5]) assert all(all(p.descriptor[0].axis == axis_c_1) for p in bag[1:5]) assert [p.descriptor[0].index for p in bag[1:5]] == axis_e_1 assert all(p.data.size == 6 for p in bag[1:4]) assert all(p.descriptor[1].label == "dataset2" for p in bag[1:4]) assert all(all(p.descriptor[1].axis == axis_c_2) for p in bag[1:4]) assert [p.descriptor[1].index for p in bag[1:4]] == axis_e_2[1:4] assert all(p.data.size == 4 for p in bag[5:]) assert bag[4].descriptor[0].label == "dataset1" assert bag[5].descriptor[0].label == "dataset2" assert np.array_equal(bag[4].descriptor[0].axis, axis_c_1) assert np.array_equal(bag[5].descriptor[0].axis, axis_c_2) assert [p.descriptor[0].index for p in bag[1:4]] == axis_e_1[:-1]
def test_single_dataset(): model = MockModel.from_dict( {"dataset": { "dataset1": { "megacomplex": [], }, }}) model.grouped = lambda: True print(model.validate()) assert model.valid() assert model.grouped() parameters = ParameterGroup.from_list([1, 10]) print(model.validate(parameters)) assert model.valid(parameters) axis_e = [1, 2, 3] axis_c = [5, 7, 9, 12] data = { "dataset1": xr.DataArray(np.ones((3, 4)), coords=[("e", axis_e), ("c", axis_c)]).to_dataset(name="data") } scheme = Scheme(model, parameters, data) problem = Problem(scheme) bag = problem.bag datasets = problem.groups assert len(datasets) == 1 assert len(bag) == 3 assert all(p.data.size == 4 for p in bag) assert all(p.descriptor[0].label == "dataset1" for p in bag) assert all(all(p.descriptor[0].axis == axis_c) for p in bag) assert [p.descriptor[0].index for p in bag] == axis_e
def test_multi_dataset_no_overlap(): model = MockModel.from_dict({ "dataset": { "dataset1": { "megacomplex": [], }, "dataset2": { "megacomplex": [], }, } }) model.grouped = lambda: True print(model.validate()) assert model.valid() assert model.grouped() parameters = ParameterGroup.from_list([1, 10]) print(model.validate(parameters)) assert model.valid(parameters) axis_e_1 = [1, 2, 3] axis_c_1 = [5, 7] axis_e_2 = [4, 5, 6] axis_c_2 = [5, 7, 9] data = { "dataset1": xr.DataArray(np.ones((3, 2)), coords=[("e", axis_e_1), ("c", axis_c_1)]).to_dataset(name="data"), "dataset2": xr.DataArray(np.ones((3, 3)), coords=[("e", axis_e_2), ("c", axis_c_2)]).to_dataset(name="data"), } scheme = Scheme(model, parameters, data) problem = Problem(scheme) bag = list(problem.bag) assert len(problem.groups) == 2 assert len(bag) == 6 assert all(p.data.size == 2 for p in bag[:3]) assert all(p.descriptor[0].label == "dataset1" for p in bag[:3]) assert all(all(p.descriptor[0].axis == axis_c_1) for p in bag[:3]) assert [p.descriptor[0].index for p in bag[:3]] == axis_e_1 assert all(p.data.size == 3 for p in bag[3:]) assert all(p.descriptor[0].label == "dataset2" for p in bag[3:]) assert all(all(p.descriptor[0].axis == axis_c_2) for p in bag[3:]) assert [p.descriptor[0].index for p in bag[3:]] == axis_e_2
def test_problem_result_data(problem: Problem): data = problem.create_result_data() assert "dataset1" in data dataset = data["dataset1"] assert "clp_label" in dataset.coords assert np.array_equal(dataset.clp_label, ["s1", "s2", "s3", "s4"]) assert problem.model.global_dimension in dataset.coords assert np.array_equal(dataset.coords[problem.model.global_dimension], suite.e_axis) assert problem.model.model_dimension in dataset.coords assert np.array_equal(dataset.coords[problem.model.model_dimension], suite.c_axis) assert "matrix" in dataset matrix = dataset.matrix if problem.index_dependent: assert len(matrix.shape) == 3 assert matrix.shape[0] == suite.e_axis.size assert matrix.shape[1] == suite.c_axis.size assert matrix.shape[2] == 4 else: assert len(matrix.shape) == 2 assert matrix.shape[0] == suite.c_axis.size assert matrix.shape[1] == 4 assert "clp" in dataset clp = dataset.clp assert len(clp.shape) == 2 assert clp.shape[0] == suite.e_axis.size assert clp.shape[1] == 4 assert "weighted_residual" in dataset assert dataset.data.shape == dataset.weighted_residual.shape assert "residual" in dataset assert dataset.data.shape == dataset.residual.shape assert "residual_singular_values" in dataset assert "weighted_residual_singular_values" in dataset
def problem(request) -> Problem: model = suite.model model.is_grouped = request.param[0] model.is_index_dependent = request.param[1] dataset = simulate( suite.sim_model, "dataset1", suite.wanted_parameters, { "e": suite.e_axis, "c": suite.c_axis }, ) scheme = Scheme(model=model, parameters=suite.initial_parameters, data={"dataset1": dataset}) return Problem(scheme)
def optimize(scheme: Scheme, verbose: bool = True) -> Result: problem = Problem(scheme) return optimize_problem(problem, verbose=verbose)
dataset = read_data_file(data_path) model = read_model_from_yaml_file(model_path) parameter = read_parameters_from_yaml_file(parameter_path) scheme = Scheme( model, parameter, {"dataset1": dataset}, maximum_number_function_evaluations=9, non_negative_least_squares=True, ) print(model.validate(parameters=parameter)) # The problem is constructed automatically from the scheme by the optimize call, # but can also be created manually for debug purposes: test_problem = Problem(scheme) # %% start = timer() # Warning: this may take a while (several seconds per iteration) result = optimize(scheme, verbose=True) end = timer() print(f"Total time: {end - start}") result.save(str(output_folder)) end2 = timer() print(f"Saving took: {end2 - end}") # %% print(result.markdown(True))
def test_prepare_data(): model_dict = { "dataset": { "dataset1": { "megacomplex": [], }, }, "weights": [ { "datasets": ["dataset1"], "global_interval": (np.inf, 200), "model_interval": (4, 8), "value": 0.5, }, ], } model = SimpleTestModel.from_dict(model_dict) print(model.validate()) assert model.valid() parameters = ParameterGroup.from_list([]) global_axis = np.asarray(range(50, 300)) model_axis = np.asarray(range(15)) dataset = xr.DataArray( np.ones((global_axis.size, model_axis.size)), coords={ "e": global_axis, "c": model_axis }, dims=("e", "c"), ) scheme = Scheme(model, parameters, {"dataset1": dataset}) problem = Problem(scheme) data = problem.data["dataset1"] print(data) assert "data" in data assert "weight" in data assert data.data.shape == (model_axis.size, global_axis.size) assert data.data.shape == data.weight.shape assert np.all( data.weight.sel(e=slice(0, 200), c=slice(4, 8)).values == 0.5) assert np.all(data.weight.sel(c=slice(0, 3)).values == 1) model_dict["weights"].append({ "datasets": ["dataset1"], "value": 0.2, }) model = SimpleTestModel.from_dict(model_dict) print(model.validate()) assert model.valid() scheme = Scheme(model, parameters, {"dataset1": dataset}) problem = Problem(scheme) data = problem.data["dataset1"] assert np.all( data.weight.sel(e=slice(0, 200), c=slice(4, 8)).values == 0.5 * 0.2) assert np.all(data.weight.sel(c=slice(0, 3)).values == 0.2) with pytest.warns( UserWarning, match="Ignoring model weight for dataset 'dataset1'" " because weight is already supplied by dataset.", ): Problem(Scheme(model, parameters, {"dataset1": data}))