def test_multisource(xes: fd.ERSource): lf = fd.LogLikelihood(sources=dict(er=xes.__class__), elife=(100e3, 500e3, 5), free_rates='er', data=xes.data) l1 = lf.log_likelihood(er_rate_multiplier=2.) lf2 = fd.LogLikelihood(sources=dict(er=xes.__class__, er2=xes.__class__), elife=(100e3, 500e3, 5), data=xes.data) # Prevent jitter from mu interpolator simulation to fail test itp = lf.mu_itps['er'] lf2.mu_itps = dict(er=itp, er2=itp) assert lf2.log_likelihood()[0] == l1[0]
def test_constraint(xes: fd.ERSource): lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=xes.data.copy()) ll1 = lf() lf2 = fd.LogLikelihood(sources=dict(er=fd.ERSource), log_constraint=lambda **kwargs: 100., data=xes.data.copy()) # Fix interpolator nondeterminism itp = lf.mu_itps['er'] lf2.mu_itps = dict(er=itp) ll2 = lf2() np.testing.assert_almost_equal(ll1 + 100., ll2)
def test_hessian_rateonly(xes: fd.ERSource): class Bla(xes.__class__): """ER source with slightly different elife to prevent a singular matrix """ @staticmethod def electron_detection_eff(drift_time, *, different_elife=333e3, extraction_eff=0.96): return extraction_eff * tf.exp(-drift_time / different_elife) # Test the hessian at the guess position lf = fd.LogLikelihood(sources=dict(er=xes.__class__, er2=Bla), free_rates=['er', 'er2'], data=xes.data) guess = lf.guess() assert len(guess) == 2 print(guess) print(lf.log_likelihood(second_order=True, **guess)) inv_hess = lf.inverse_hessian(guess) assert inv_hess.shape == (2, 2) assert inv_hess.dtype == np.float64 # Check symmetry of hessian # The hessian is explicitly symmetrized before being passed to # the optimizer in bestfit a = inv_hess[0, 1] b = inv_hess[1, 0] assert abs(a - b) / (a + b) < 1e-3
def test_inference(xes: fd.ERSource): lf = fd.LogLikelihood(sources=dict(er=xes.__class__), elife=(100e3, 500e3, 5), data=xes.data) # Test single-batch likelihood x, x_grad, _ = lf._log_likelihood( i_batch=tf.constant(0), dsetname=DEFAULT_DSETNAME, data_tensor=lf.data_tensors[DEFAULT_DSETNAME][0], batch_info=lf.batch_info, elife=tf.constant(200e3)) assert isinstance(x, tf.Tensor) assert x.dtype == fd.float_type() assert x.numpy() < 0 assert isinstance(x_grad, tf.Tensor) assert x_grad.dtype == fd.float_type() assert x_grad.numpy().shape == (1, ) # Test a different parameter gives a different likelihood x2, x2_grad, _ = lf._log_likelihood( i_batch=tf.constant(0), dsetname=DEFAULT_DSETNAME, data_tensor=lf.data_tensors[DEFAULT_DSETNAME][0], batch_info=lf.batch_info, elife=tf.constant(300e3)) assert (x - x2).numpy() != 0 assert (x_grad - x2_grad).numpy().sum() != 0 # Test batching l1 = lf.log_likelihood() l2 = lf() lf.log_likelihood(elife=tf.constant(200e3))
def test_multisource_er_nr(xes: fd.ERSource): lf = fd.LogLikelihood( sources=dict(er=xes.__class__, nr=fd.NRSource), elife=(100e3, 500e3, 5), data=xes.data) lf()
def test_inference(xes: fd.ERSource): lf = fd.LogLikelihood( sources=dict(er=xes.__class__), elife=(100e3, 500e3, 5), data=xes.data) ## # Test non-autograph version ## x, x_grad = lf._log_likelihood(i_batch=tf.constant(0), dsetname=DEFAULT_DSETNAME, autograph=False, elife=tf.constant(200e3)) assert isinstance(x, tf.Tensor) assert x.dtype == fd.float_type() assert x.numpy() < 0 assert isinstance(x_grad, tf.Tensor) assert x_grad.dtype == fd.float_type() assert x_grad.numpy().shape == (1,) # Test a different parameter gives a different likelihood x2, x2_grad = lf._log_likelihood(i_batch=tf.constant(0), dsetname=DEFAULT_DSETNAME, autograph=False, elife=tf.constant(300e3)) assert (x - x2).numpy() != 0 assert (x_grad - x2_grad).numpy().sum() !=0 ## # Test batching # ## l1 = lf.log_likelihood(autograph=False) l2 = lf(autograph=False) lf.log_likelihood(elife=tf.constant(200e3), autograph=False)
def test_multi_dset(xes: fd.ERSource): lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=xes.data.copy()) ll1 = lf() lf2 = fd.LogLikelihood(sources=dict(data1=dict(er1=fd.ERSource), data2=dict(er2=fd.ERSource)), data=dict(data1=xes.data.copy(), data2=xes.data.copy())) # Fix interpolator nondeterminism itp = lf.mu_itps['er'] lf2.mu_itps = dict(er1=itp, er2=itp) ll2 = lf2() np.testing.assert_almost_equal(2 * ll1, ll2, decimal=2)
def test_wimp_SR1_source(xes): # test KeyError 't' issue, because of add_extra_columns bug lf = fd.LogLikelihood(sources=dict(er=fd.SR1ERSource, wimp=fd.SR1WIMPSource), free_rates=('er', 'wimp')) d = lf.simulate(er_rate_multiplier=1.0, wimp_rate_multiplier=0.) lf.set_data(d)
def test_simulate_column(xes): # Test for issue #47, check if not crashing since ColumnSource has no # simulator lf = fd.LogLikelihood(sources=dict(er=fd.ERSource, muur=fd.ColumnSource), data=None) events = lf.simulate() events = lf.simulate(er_rate_multiplier=2.) events = lf.simulate(fix_truth=dict(x=0., y=0., z=-50.))
def test_columnsource(xes: fd.ERSource): class myColumnSource(fd.ColumnSource): column = "diffrate" mu = 3.14 xes.data['diffrate'] = 5. lf = fd.LogLikelihood(sources=dict(muur=myColumnSource), data=xes.data) np.testing.assert_almost_equal(lf(), -3.14 + len(xes.data) * np.log(5.))
def test_retrace_set_data(xes: fd.ERSource): # Test issue #53 lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=xes.data.copy()) ll1 = lf() new_data = xes.data.copy() new_data['s2'] *= 2 lf.set_data(new_data) ll2 = lf() # issue 53 would not have retraced ll so lf() would be unchanged assert not ll1 == ll2
def test_set_data_on_no_dset(xes: fd.ERSource): lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=None, batch_size=4) # The batch_size can be at most 2 * len(data) or padding wont work # which is why it is set explicitly in this test with only 2 events # Usually when constructing the likelihood with a very small dataset # the batch_size is set accordingly, but in this test with data=None # that is not possible (an assert has been put in Source._init_padding) lf.set_data(xes.data.copy()) assert lf.sources['er'].batch_size == 4 assert lf.sources['er'].n_batches == 1 assert lf.sources['er'].n_padding == 2 ll1 = lf() lf2 = fd.LogLikelihood(sources=dict(data1=dict(er1=fd.ERSource), data2=dict(er2=fd.ERSource)), data=dict(data1=None, data2=None), batch_size=4) lf2.set_data(dict(data1=xes.data.copy(), data2=xes.data.copy())) ll2 = lf2()
def test_set_data(xes: fd.ERSource): data1 = xes.data data2 = pd.concat([data1.copy(), data1.iloc[:1].copy()]) data2['s1'] *= 1.3 data3 = pd.concat([data2, data2.iloc[:1]]) data1.reset_index(drop=True, inplace=True) data2.reset_index(drop=True, inplace=True) data3.reset_index(drop=True, inplace=True) lf = fd.LogLikelihood( sources=dict(data1=dict(er1=fd.ERSource), data2=dict(er2=fd.ERSource)), data=dict(data1=data1, data2=data2)) def internal_data(sname, col): series = lf.sources[sname].data[col] n_padding = lf.sources[sname].n_padding return series.iloc[:len(series)-n_padding] # Test S1 columns are the same (DFs are annotated) # Here we don't have any padding since batch_size is n_events pd.testing.assert_series_equal(internal_data('er1', 's1'), data1['s1']) pd.testing.assert_series_equal(internal_data('er2', 's1'), data2['s1']) # Set new data for only one dataset lf.set_data(dict(data1=data2)) # Test S1 columns are the same (DFs are annotated) # Here we might have padding pd.testing.assert_series_equal(internal_data('er1', 's1'), data2['s1']) pd.testing.assert_series_equal(internal_data('er2', 's1'), data2['s1']) # Set new data for both datasets lf.set_data(dict(data1=data1, data2=data3)) # Test S1 columns are the same (DFs are annotated) pd.testing.assert_series_equal(internal_data('er1', 's1'), data1['s1']) pd.testing.assert_series_equal(internal_data('er2', 's1'), data3['s1']) # Test padding for smaller dsets lf.set_data(dict(data2=data1)) pd.testing.assert_series_equal(internal_data('er2', 's1'), data1['s1'])
def test_hessian_rate_and_shape(xes: fd.ERSource): lf = fd.LogLikelihood(sources=dict(er=xes.__class__), elife=(100e3, 500e3, 5), free_rates='er', data=xes.data) guess = lf.guess() assert len(guess) == 2 print(guess) print(lf.log_likelihood(second_order=True, **guess)) inv_hess = lf.inverse_hessian(guess) assert inv_hess.shape == (2, 2) assert inv_hess.dtype == np.float64 a = inv_hess[0, 1] b = inv_hess[1, 0] assert abs(a - b) / (a + b) < 1e-3
def test_bestfit_scipy(xes): # Test bestfit (including hessian) lf = fd.LogLikelihood( sources=dict(er=xes.__class__), elife=(100e3, 500e3, 5), free_rates='er', data=xes.data) guess = lf.guess() # Set reasonable rate # Evaluate the likelihood curve around the minimum xs_er = np.linspace(0.001, 0.004, 20) # ER source range xs_nr = np.linspace(0.04, 0.1, 20) # NR source range xs = list(xs_er) + list(xs_nr) ys = np.array([-lf(er_rate_multiplier=x) for x in xs]) guess['er_rate_multiplier'] = xs[np.argmin(ys)] assert len(guess) == 2 bestfit = lf.bestfit(guess, optimizer='scipy') assert isinstance(bestfit, dict) assert len(bestfit) == 2
def test_hessian(xes: fd.ERSource): # Test the hessian at the guess position lf = fd.LogLikelihood( sources=dict(er=xes.__class__), elife=(100e3, 500e3, 5), free_rates='er', data=xes.data) guess = lf.guess() assert len(guess) == 2 inv_hess = lf.inverse_hessian(guess) inv_hess_np = inv_hess.numpy() assert inv_hess_np.shape == (2, 2) assert inv_hess.dtype == fd.float_type() # Check symmetry of hessian # The hessian is explicitly symmetrized before being passed to # the optimizer in bestfit a = inv_hess_np[0, 1] b = inv_hess_np[1, 0] assert abs(a - b)/(a+b) < 1e-3
def test_one_parameter_interval(xes): lf = fd.LogLikelihood( sources=dict(er=xes.__class__), elife=(100e3, 500e3, 5), free_rates='er', data=xes.data) guess = lf.guess() # Set reasonable rate # Evaluate the likelihood curve around the minimum xs_er = np.linspace(0.001, 0.004, 20) # ER source range xs_nr = np.linspace(0.04, 0.1, 20) # NR source range xs = list(xs_er) + list(xs_nr) ys = np.array([-lf(er_rate_multiplier=x) for x in xs]) guess['er_rate_multiplier'] = xs[np.argmin(ys)] assert len(guess) == 2 # First find global best so we can check intervals bestfit = lf.bestfit(guess, optimizer='scipy') ul = lf.limit('er_rate_multiplier', bestfit, confidence_level=0.9, kind='upper') assert ul > bestfit['er_rate_multiplier'] ll = lf.limit('er_rate_multiplier', bestfit, confidence_level=0.9, kind='lower') assert ll < bestfit['er_rate_multiplier'] ll, ul = lf.limit('er_rate_multiplier', bestfit, confidence_level=0.9, kind='central') assert ll < bestfit['er_rate_multiplier'] < ul # Test fixed parameter fix = dict(elife=bestfit['elife']) ul = lf.limit('er_rate_multiplier', bestfit, fix=fix, confidence_level=0.9, kind='upper') assert bestfit['er_rate_multiplier'] < ul
def test_no_dset(xes: fd.ERSource): lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=None) lf2 = fd.LogLikelihood(sources=dict(data1=dict(er1=fd.ERSource), data2=dict(er2=fd.ERSource)), data=dict(data1=None, data2=None))
def test_simulate(xes): lf = fd.LogLikelihood(sources=dict(er=fd.ERSource), data=None) events = lf.simulate() events = lf.simulate(er_rate_multiplier=2.) events = lf.simulate(fix_truth=dict(x=0., y=0., z=-50.))