def test_convolve_normal(self): p_1 = NormalDistribution(12.4, 0.346) p_2 = NormalDistribution(12.4, 2.463) p_x = NormalDistribution(12.3, 2.463) from flavio.statistics.probability import convolve_distributions # error if not the same central value: with self.assertRaises(AssertionError): convolve_distributions([p_1, p_x]) p_comb = convolve_distributions([p_1, p_2]) self.assertIsInstance(p_comb, NormalDistribution) self.assertEqual(p_comb.central_value, 12.4) self.assertEqual(p_comb.standard_deviation, sqrt(0.346**2+2.463**2))
def test_convolve_delta(self): p_1 = DeltaDistribution(12.4) p_2 = NormalDistribution(12.4, 2.463) p_x = DeltaDistribution(12.3) from flavio.statistics.probability import convolve_distributions with self.assertRaises(NotImplementedError): convolve_distributions([p_1, p_x], central_values='sum') with self.assertRaises(AssertionError): convolve_distributions([p_x, p_2]) p_comb = convolve_distributions([p_1, p_2]) self.assertIsInstance(p_comb, NormalDistribution) self.assertEqual(p_comb.central_value, 12.4) self.assertEqual(p_comb.standard_deviation, 2.463)
def test_convolve_normal(self): p_1 = NormalDistribution(12.4, 0.346) p_2 = NormalDistribution(12.4, 2.463) p_x = NormalDistribution(12.3, 2.463) from flavio.statistics.probability import convolve_distributions # error if not the same central value: with self.assertRaises(AssertionError): convolve_distributions([p_1, p_x]) p_comb = convolve_distributions([p_1, p_2]) self.assertIsInstance(p_comb, NormalDistribution) self.assertEqual(p_comb.central_value, 12.4) self.assertEqual(p_comb.standard_deviation, sqrt(0.346**2 + 2.463**2)) # check for addition of central values p_comb = convolve_distributions([p_1, p_x], central_values='sum') self.assertIsInstance(p_comb, NormalDistribution) self.assertAlmostEqual(p_comb.central_value, 24.7) self.assertEqual(p_comb.standard_deviation, sqrt(0.346**2 + 2.463**2))
def test_convolve_multivariate_gaussian(self): from flavio.statistics.probability import _convolve_multivariate_gaussians cov1 = np.array([[(0.2e-3)**2, 0.2e-3*0.5*0.3],[0.2e-3*0.5*0.3, 0.5**2]]) cov2 = np.array([[0.2**2, 0.5*0.2*0.4], [0.5*0.2*0.4, 0.4**2]]) cov12 = cov1 + cov2 c1 = [2, 5] c2 = [-100, -250] p_11 = MultivariateNormalDistribution(c1, cov1) p_12 = MultivariateNormalDistribution(c1, cov2) p_22 = MultivariateNormalDistribution(c2, cov2) conv_11_12 = convolve_distributions([p_11, p_12]) self.assertIsInstance(conv_11_12, MultivariateNormalDistribution) npt.assert_array_equal(conv_11_12.central_value, [2, 5]) npt.assert_array_almost_equal(conv_11_12.covariance, cov12, decimal=15) with self.assertRaises(AssertionError): convolve_distributions([p_11, p_22]) conv_11_22 = convolve_distributions([p_11, p_22], central_values='sum') self.assertIsInstance(conv_11_22, MultivariateNormalDistribution) npt.assert_array_almost_equal(conv_11_22.covariance, cov12, decimal=15) npt.assert_array_equal(conv_11_22.central_value, [-100+2, -250+5])
def test_convolve_multivariate_gaussian(self): from flavio.statistics.probability import _convolve_multivariate_gaussians cov1 = np.array([[(0.2e-3)**2, 0.2e-3 * 0.5 * 0.3], [0.2e-3 * 0.5 * 0.3, 0.5**2]]) cov2 = np.array([[0.2**2, 0.5 * 0.2 * 0.4], [0.5 * 0.2 * 0.4, 0.4**2]]) cov12 = cov1 + cov2 c1 = [2, 5] c2 = [-100, -250] p_11 = MultivariateNormalDistribution(c1, cov1) p_12 = MultivariateNormalDistribution(c1, cov2) p_22 = MultivariateNormalDistribution(c2, cov2) conv_11_12 = convolve_distributions([p_11, p_12]) self.assertIsInstance(conv_11_12, MultivariateNormalDistribution) npt.assert_array_equal(conv_11_12.central_value, [2, 5]) npt.assert_array_almost_equal(conv_11_12.covariance, cov12, decimal=15) with self.assertRaises(AssertionError): convolve_distributions([p_11, p_22]) conv_11_22 = convolve_distributions([p_11, p_22], central_values='sum') self.assertIsInstance(conv_11_22, MultivariateNormalDistribution) npt.assert_array_almost_equal(conv_11_22.covariance, cov12, decimal=15) npt.assert_array_equal(conv_11_22.central_value, [-100 + 2, -250 + 5])
def test_convolve_multivariate_gaussian_numerical(self): from flavio.statistics.probability import convolve_distributions cov1 = [[(0.1)**2, 0.1*0.5*0.3],[0.1*0.5*0.3, 0.5**2]] cov2 = [[0.2**2, 0.5*0.2*0.4], [0.5*0.2*0.4, 0.4**2]] c1 = [2, 5] c2 = [-100, -250] p_11 = MultivariateNormalDistribution(c1, cov1) p_12 = MultivariateNormalDistribution(c1, cov2) p_22 = MultivariateNormalDistribution(c2, cov2) n_11 = MultivariateNumericalDistribution.from_pd(p_11) n_12 = MultivariateNumericalDistribution.from_pd(p_12) n_22 = MultivariateNumericalDistribution.from_pd(p_22) conv_11_12_gauss = convolve_distributions([p_11, p_12]) conv_11_12 = convolve_distributions([p_11, n_12]) self.assertIsInstance(conv_11_12, MultivariateNumericalDistribution) npt.assert_array_almost_equal(conv_11_12.central_value, [2, 5], decimal=1) self.assertAlmostEqual(conv_11_12.logpdf([2.2, 4]), conv_11_12_gauss.logpdf([2.2, 4]), delta=0.1) self.assertAlmostEqual(conv_11_12.logpdf([2.2, 6]), conv_11_12_gauss.logpdf([2.2, 6]), delta=0.1) self.assertAlmostEqual(conv_11_12.logpdf([1.4, 4]), conv_11_12_gauss.logpdf([1.4, 4]), delta=0.2) self.assertAlmostEqual(conv_11_12.logpdf([1.4, 6]), conv_11_12_gauss.logpdf([1.4, 6]), delta=0.1) with self.assertRaises(AssertionError): convolve_distributions([p_11, n_22]) conv_11_22 = convolve_distributions([p_11, n_22], central_values='sum') conv_11_22_gauss = convolve_distributions([p_11, p_22], central_values='sum') self.assertIsInstance(conv_11_22, MultivariateNumericalDistribution) npt.assert_array_almost_equal(conv_11_22.central_value, [-100+2, -250+5], decimal=1) self.assertAlmostEqual(conv_11_22.logpdf([2.2-100, 4-250]), conv_11_22_gauss.logpdf([2.2-100, 4-250]), delta=0.1) self.assertAlmostEqual(conv_11_22.logpdf([1.6-100, 5.5-250]), conv_11_22_gauss.logpdf([1.6-100, 5.5-250]), delta=0.1)
def test_convolve_multivariate_gaussian_numerical(self): from flavio.statistics.probability import convolve_distributions cov1 = [[(0.1)**2, 0.1 * 0.5 * 0.3], [0.1 * 0.5 * 0.3, 0.5**2]] cov2 = [[0.2**2, 0.5 * 0.2 * 0.4], [0.5 * 0.2 * 0.4, 0.4**2]] c1 = [2, 5] c2 = [-100, -250] p_11 = MultivariateNormalDistribution(c1, cov1) p_12 = MultivariateNormalDistribution(c1, cov2) p_22 = MultivariateNormalDistribution(c2, cov2) n_11 = MultivariateNumericalDistribution.from_pd(p_11) n_12 = MultivariateNumericalDistribution.from_pd(p_12) n_22 = MultivariateNumericalDistribution.from_pd(p_22) conv_11_12_gauss = convolve_distributions([p_11, p_12]) conv_11_12 = convolve_distributions([p_11, n_12]) self.assertIsInstance(conv_11_12, MultivariateNumericalDistribution) npt.assert_array_almost_equal(conv_11_12.central_value, [2, 5], decimal=1) self.assertAlmostEqual(conv_11_12.logpdf([2.2, 4]), conv_11_12_gauss.logpdf([2.2, 4]), delta=0.1) self.assertAlmostEqual(conv_11_12.logpdf([2.2, 6]), conv_11_12_gauss.logpdf([2.2, 6]), delta=0.1) self.assertAlmostEqual(conv_11_12.logpdf([1.4, 4]), conv_11_12_gauss.logpdf([1.4, 4]), delta=0.2) self.assertAlmostEqual(conv_11_12.logpdf([1.4, 6]), conv_11_12_gauss.logpdf([1.4, 6]), delta=0.1) with self.assertRaises(AssertionError): convolve_distributions([p_11, n_22]) conv_11_22 = convolve_distributions([p_11, n_22], central_values='sum') conv_11_22_gauss = convolve_distributions([p_11, p_22], central_values='sum') self.assertIsInstance(conv_11_22, MultivariateNumericalDistribution) npt.assert_array_almost_equal(conv_11_22.central_value, [-100 + 2, -250 + 5], decimal=1) self.assertAlmostEqual(conv_11_22.logpdf([2.2 - 100, 4 - 250]), conv_11_22_gauss.logpdf([2.2 - 100, 4 - 250]), delta=0.1) self.assertAlmostEqual(conv_11_22.logpdf([1.6 - 100, 5.5 - 250]), conv_11_22_gauss.logpdf([1.6 - 100, 5.5 - 250]), delta=0.1)
def _load(obj): """Read measurements from a YAML stream or file.""" measurements = yaml.load(obj) for m_name, m_data in measurements.items(): m = Measurement(m_name) for arg in ['inspire', 'hepdata', 'experiment', 'url']: if arg in m_data: setattr(m, arg, m_data[arg]) if 'observables' in m_data: # for multivariate constraints pd = probability.dict2dist(m_data['values']) pd = probability.convolve_distributions(pd) # for observables without arguments (i.e. strings), this is trivial; obs_list = [ obs if isinstance(obs, str) # for obs. with arguments, need to convert dict of the form # {'name': myname, 'arg1': v1, ...} to a tuple of the form # (myname, v1, ...) else tuple([obs['name']] + [obs[arg] for arg in Observable[obs['name']].arguments]) for obs in m_data['observables'] ] m.add_constraint(obs_list, pd) elif 'correlation' not in m_data: # for univariate constraints if isinstance(m_data['values'], list): for value_dict in m_data['values']: args = Observable[value_dict['name']].arguments # numerical values of arguments, e.g. [1, 6] args_num = [value_dict[a] for a in args] # insert string name in front of argument values and turn it # into a tuple, e.g. ('FL(B0->K*mumu)', 1, 6) args_num.insert(0, value_dict['name']) obs_tuple = tuple(args_num) if isinstance(value_dict['value'], dict): m.set_constraint(obs_tuple, constraint_dict=value_dict['value']) else: m.set_constraint(obs_tuple, value_dict['value']) else: # otherwise, 'values' is a dict just containing name: constraint_string for obs, value in m_data['values'].items(): if isinstance(value, dict) or isinstance(value, list): m.set_constraint(obs, constraint_dict=value) else: m.set_constraint(obs, value) else: # for multivariate normal constraints observables = [] central_values = [] errors = [] if isinstance(m_data['values'], list): for value_dict in m_data['values']: # if "value" is a list, it contains the values of observable # arguments (like q^2) args = Observable[value_dict['name']].arguments args_num = [value_dict[a] for a in args] error_dict = errors_from_string(value_dict['value']) args_num.insert(0, value_dict['name']) obs_tuple = tuple(args_num) observables.append(obs_tuple) central_values.append(error_dict['central_value']) squared_error = 0. for sym_err in error_dict['symmetric_errors']: squared_error += sym_err**2 for asym_err in error_dict['asymmetric_errors']: squared_error += asym_err[0] * asym_err[1] errors.append(sqrt(squared_error)) else: # otherwise, 'values' is a dict just containing name: constraint_string for obs, value in m_data['values'].items(): observables.append(obs) error_dict = errors_from_string(value) central_values.append(error_dict['central_value']) squared_error = 0. for sym_err in error_dict['symmetric_errors']: squared_error += sym_err**2 for asym_err in error_dict['asymmetric_errors']: squared_error += asym_err[0] * asym_err[1] errors.append(sqrt(squared_error)) correlation = _fix_correlation_matrix(m_data['correlation'], len(observables)) covariance = np.outer(np.asarray(errors), np.asarray(errors)) * correlation if not np.all(np.linalg.eigvals(covariance) > 0): # if the covariance matrix is not positive definite, try a dirty trick: # multiply all the correlations by 0.99. n_dim = len(correlation) correlation = (correlation - np.eye(n_dim)) * 0.99 + np.eye(n_dim) covariance = np.outer(np.asarray(errors), np.asarray(errors)) * correlation # if it still isn't positive definite, give up. assert np.all( np.linalg.eigvals(covariance) > 0 ), "The covariance matrix is not positive definite!" + str( covariance) m.add_constraint( observables, probability.MultivariateNormalDistribution( central_values, covariance)) return list(measurements.keys())
def _load(obj): """Read measurements from a YAML stream or file.""" measurements = yaml.load(obj) for m_name, m_data in measurements.items(): m = Measurement(m_name) for arg in ['inspire', 'hepdata', 'experiment', 'url', 'description']: if arg in m_data: setattr(m, arg, m_data[arg]) if 'observables' in m_data: # for multivariate constraints pd = probability.dict2dist(m_data['values']) pd = probability.convolve_distributions(pd) # for observables without arguments (i.e. strings), this is trivial; obs_list = [obs if isinstance(obs, str) # for obs. with arguments, need to convert dict of the form # {'name': myname, 'arg1': v1, ...} to a tuple of the form # (myname, v1, ...) else tuple( [obs['name']] + [obs[arg] for arg in Observable[obs['name']].arguments]) for obs in m_data['observables']] m.add_constraint(obs_list, pd) elif 'correlation' not in m_data: # for univariate constraints if isinstance(m_data['values'], list): for value_dict in m_data['values']: args = Observable[value_dict['name']].arguments # numerical values of arguments, e.g. [1, 6] args_num = [value_dict[a] for a in args] # insert string name in front of argument values and turn it # into a tuple, e.g. ('FL(B0->K*mumu)', 1, 6) args_num.insert(0, value_dict['name']) obs_tuple = tuple(args_num) if isinstance(value_dict['value'], dict): m.set_constraint(obs_tuple, constraint_dict=value_dict['value']) else: m.set_constraint(obs_tuple, value_dict['value']) else: # otherwise, 'values' is a dict just containing name: constraint_string for obs, value in m_data['values'].items(): if isinstance(value, dict) or isinstance(value, list): m.set_constraint(obs, constraint_dict=value) else: m.set_constraint(obs, value) else: # for multivariate normal constraints observables = [] central_values = [] errors = [] if isinstance(m_data['values'], list): for value_dict in m_data['values']: # if "value" is a list, it contains the values of observable # arguments (like q^2) args = Observable[value_dict['name']].arguments args_num = [value_dict[a] for a in args] error_dict = errors_from_string(value_dict['value']) args_num.insert(0, value_dict['name']) obs_tuple = tuple(args_num) observables.append(obs_tuple) central_values.append(error_dict['central_value']) squared_error = 0. for sym_err in error_dict['symmetric_errors']: squared_error += sym_err**2 for asym_err in error_dict['asymmetric_errors']: squared_error += asym_err[0]*asym_err[1] errors.append(sqrt(squared_error)) else: # otherwise, 'values' is a dict just containing name: constraint_string for obs, value in m_data['values'].items(): observables.append(obs) error_dict = errors_from_string(value) central_values.append(error_dict['central_value']) squared_error = 0. for sym_err in error_dict['symmetric_errors']: squared_error += sym_err**2 for asym_err in error_dict['asymmetric_errors']: squared_error += asym_err[0]*asym_err[1] errors.append(sqrt(squared_error)) correlation = _fix_correlation_matrix(m_data['correlation'], len(observables)) covariance = np.outer(np.asarray(errors), np.asarray(errors))*correlation m.add_constraint(observables, probability.MultivariateNormalDistribution(central_values, covariance)) return list(measurements.keys())