def build_histogram(self): # Get the events to estimate the PDF dimnames, bins = zip(*self.config['analysis_space']) mh = Histdd(bins=bins, axis_names=dimnames) # Get a generator function which will give us the events get = self.get_events_for_density_estimate if not inspect.isgeneratorfunction(get): def get(): return [self.get_events_for_density_estimate()] n_events = 0 for events, n_simulated in get(): n_events += n_simulated mh.add(*utils._events_to_analysis_dimensions(events, self.config['analysis_space'])) self.fraction_in_range = mh.n / n_events # Convert the histogram to a density estimate # This means we have to divide by # - the number of events IN RANGE received # (fraction_in_range keeps track of how many events were not in range) # - the bin sizes self._pdf_histogram = mh.similar_blank_hist() self._pdf_histogram.histogram = mh.histogram.astype(np.float) / mh.n # For the bin widths we need to take an outer product of several vectors, for which numpy has no builtin # This reduce trick does the job instead, see http://stackoverflow.com/questions/17138393 self._bin_volumes = reduce(np.multiply, np.ix_(*[np.diff(bs) for bs in bins])) self._pdf_histogram.histogram /= self._bin_volumes self._n_events_histogram = mh return mh
def set_data(self, d): LogLikelihoodBase.set_data(self, d) # Bin the data in the analysis space dimnames, bins = zip(*self.base_model.config['analysis_space']) self.data_events_per_bin = Histdd(bins=bins, axis_names=dimnames) self.data_events_per_bin.add( *self.base_model.to_analysis_dimensions(d))
def xes(request): # warnings.filterwarnings("error") data = pd.DataFrame([ dict(s1=56., s2=2905., drift_time=143465., x=2., y=0.4, z=-20, r=2.1, theta=0.1, event_time=1579784955000000000), dict(s1=23, s2=1080., drift_time=445622., x=1.12, y=0.35, z=-59., r=1., theta=0.3, event_time=1579784956000000000) ]) if request.param == 'ER': x = fd.ERSource(data.copy(), batch_size=2, max_sigma=8) elif request.param == 'NR': x = fd.NRSource(data.copy(), batch_size=2, max_sigma=8) elif request.param == 'WIMP': x = fd.WIMPSource(data.copy(), batch_size=2, max_sigma=8) elif request.param == 'ER_spatial': nbins = 100 r = np.linspace(0, 47.9, nbins + 1) z = np.linspace(-97.6, 0, nbins + 1) theta = np.linspace(0, 2 * np.pi, nbins + 1) # Construct PDF histogram h = Histdd(bins=[r, theta, z], axis_names=['r', 'theta', 'z']) h.histogram = np.ones((nbins, nbins, nbins)) # Calculate bin volumes for cylindrical coords (r dr dtheta) r_c, _, _ = h.bin_centers() bin_volumes = h.bin_volumes() * r_c[:, np.newaxis, np.newaxis] # Convert to events per bin histogram h.histogram *= bin_volumes class ERSpatial(fd.ERSource): spatial_rate_hist = h spatial_rate_bin_volumes = bin_volumes x = ERSpatial(data.copy(), batch_size=2, max_sigma=8) return x
def xes(request): # warnings.filterwarnings("error") data = pd.DataFrame([ dict(s1=56., s2=2905., drift_time=143465., x=2., y=0.4, z=-20, r=2.1, theta=0.1, event_time=1483488000000000000), dict(s1=23, s2=1080., drift_time=445622., x=1.12, y=0.35, z=-59., r=1., theta=0.3, event_time=1483488000000000000) ]) if request.param == 'ER': x = fd.ERSource(data.copy(), batch_size=2, max_sigma=8) elif request.param == 'NR': x = fd.NRSource(data.copy(), batch_size=2, max_sigma=8) elif request.param == 'WIMP': x = fd.WIMPSource(data.copy(), batch_size=2, max_sigma=8) elif request.param == 'ER_spatial': nbins = 100 r = np.linspace(0, 47.9, nbins + 1) z = np.linspace(-97.6, 0, nbins + 1) theta = np.linspace(0, 2 * np.pi, nbins + 1) h = Histdd(bins=[r, theta, z], axis_names=['r', 'theta', 'z']) h.histogram = np.ones((nbins, nbins, nbins)) class ERSpatial(fd.ERSource): spatial_hist = h x = ERSpatial(data.copy(), batch_size=2, max_sigma=8) return x
def _plot_mh_percentile( mh: multihist.Histdd, percentile: int, **kwargs, ): percentile_from_mh = mh.percentile(percentile, mh.axis_names[1]) kwargs.setdefault( 'drawstyle', 'steps-mid', ) plt.plot(percentile_from_mh.bin_centers, percentile_from_mh, **kwargs)
def __init__(self, *args, wimp_kwargs=None, **kwargs): # Compute the energy spectrum in a given time range # Times used by wimprates are J2000 timestamps assert self.n_time_bins >= 1, "Need >= 1 time bin" if hasattr(self, 'n_in'): raise RuntimeError( "n_in is gone! Use n_time_bins to control accuracy, or set " "pretend_wimps_dont_modulate to use a time-averaged spectrum.") times = np.linspace(wr.j2000(self.t_start.value), wr.j2000(self.t_stop.value), self.n_time_bins + 1) time_centers = self.bin_centers(times) if wimp_kwargs is None: # No arguments given at all; # use default mass, xsec and energy range wimp_kwargs = dict(mw=self.mw, sigma_nucleon=self.sigma_nucleon, es=self.es) else: assert 'mw' in wimp_kwargs and 'sigma_nucleon' in wimp_kwargs, \ "Pass at least 'mw' and 'sigma_nucleon' in wimp_kwargs" if 'es' not in wimp_kwargs: # Energies not given, use default energy bin edges wimp_kwargs['es'] = self.es es = wimp_kwargs['es'] es_centers = self.bin_centers(es) del wimp_kwargs['es'] # To avoid confusion centers / edges # Transform wimp_kwargs to arguments that can be passed to wimprates # which means transforming es from edges to centers spectra = np.array([ wr.rate_wimp_std(t=t, es=es_centers, **wimp_kwargs) * np.diff(es) for t in time_centers ]) assert spectra.shape == (len(time_centers), len(es_centers)) self.energy_hist = Histdd.from_histogram(spectra, bin_edges=(times, es)) if self.pretend_wimps_dont_modulate: self.energy_hist.histogram = ( np.ones_like(self.energy_hist.histogram) * self.energy_hist.sum(axis=0).histogram.reshape(1, -1) / self.n_time_bins) # Initialize the rest of the source, needs to be after energy_hist is # computed because of _populate_tensor_cache super().__init__(*args, **kwargs)
def build_histogram(self): # Get the events to estimate the PDF dimnames, bins = zip(*self.config['analysis_space']) mh = Histdd(bins=bins, axis_names=dimnames) # Get a generator function which will give us the events get = self.get_events_for_density_estimate if not inspect.isgeneratorfunction(get): def get(): return [self.get_events_for_density_estimate()] n_events = 0 for events, n_simulated in get(): n_events += n_simulated mh.add(*utils._events_to_analysis_dimensions( events, self.config['analysis_space'])) self.fraction_in_range = mh.n / n_events # Convert the histogram to a density estimate # This means we have to divide by # - the number of events IN RANGE received # (fraction_in_range keeps track of how many events were not in range) # - the bin sizes self._pdf_histogram = mh.similar_blank_hist() self._pdf_histogram.histogram = mh.histogram.astype(np.float) / mh.n # For the bin widths we need to take an outer product of several vectors, for which numpy has no builtin # This reduce trick does the job instead, see http://stackoverflow.com/questions/17138393 self._bin_volumes = reduce(np.multiply, np.ix_(*[np.diff(bs) for bs in bins])) self._pdf_histogram.histogram /= self._bin_volumes self._n_events_histogram = mh return mh
def __init__(self, *args, wimp_kwargs=None, **kwargs): # Compute the energy spectrum in a given time range # Times used by wimprates are J2000 timestamps assert self.n_in > 1, \ f"Number of time bin edges needs to be at least 2" times = np.linspace(wr.j2000(date=self.t_start), wr.j2000(date=self.t_stop), self.n_in) time_centers = self.bin_centers(times) if wimp_kwargs is None: # No arguments given at all; # use default mass, xsec and energy range wimp_kwargs = dict(mw=self.mw, sigma_nucleon=self.sigma_nucleon, es=self.es) else: assert 'mw' in wimp_kwargs and 'sigma_nucleon' in wimp_kwargs, \ "Pass at least 'mw' and 'sigma_nucleon' in wimp_kwargs" if 'es' not in wimp_kwargs: # Energies not given, use default energy bin edges wimp_kwargs['es'] = self.es es = wimp_kwargs['es'] es_centers = self.bin_centers(es) del wimp_kwargs['es'] # To avoid confusion centers / edges # Transform wimp_kwargs to arguments that can be passed to wimprates # which means transforming es from edges to centers spectra = np.array([ wr.rate_wimp_std(t=t, es=es_centers, **wimp_kwargs) * np.diff(es) for t in time_centers ]) assert spectra.shape == (len(time_centers), len(es_centers)) self.energy_hist = Histdd.from_histogram(spectra, bin_edges=(times, es)) # Initialize the rest of the source, needs to be after energy_hist is # computed because of _populate_tensor_cache super().__init__(*args, **kwargs)
class BinnedLogLikelihood(LogLikelihoodBase): def __init__(self, pdf_base_config, likelihood_config=None, **kwargs): LogLikelihoodBase.__init__(self, pdf_base_config, likelihood_config, **kwargs) pdf_base_config['pdf_interpolation_method'] = 'piecewise' self.model_statistical_uncertainty_handling = self.config.get('model_statistical_uncertainty_handling') @inherit_docstring_from(LogLikelihoodBase) def prepare(self, *args): LogLikelihood.prepare(self, *args) self.ps, self.n_model_events = self.base_model.pmf_grids() if len(self.shape_parameters): self.ps_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[0], extra_dims=list(self.ps.shape), anchor_models=self.anchor_models) if self.model_statistical_uncertainty_handling is not None: self.n_model_events_interpolator = self.morpher.make_interpolator(f=lambda m: m.pmf_grids()[1], extra_dims=list(self.ps.shape), anchor_models=self.anchor_models) @inherit_docstring_from(LogLikelihoodBase) def set_data(self, d): LogLikelihoodBase.set_data(self, d) # Bin the data in the analysis space dimnames, bins = zip(*self.base_model.config['analysis_space']) self.data_events_per_bin = Histdd(bins=bins, axis_names=dimnames) self.data_events_per_bin.add(*self.base_model.to_analysis_dimensions(d)) @inherit_docstring_from(LogLikelihoodBase) def _compute_single_pdf(self, **kwargs): model = self._compute_single_model(**kwargs) mus = model.expected_events() ps, n_model_events = model.pmf_grids() return mus, ps, n_model_events @_needs_data @inherit_docstring_from(LogLikelihoodBase) def adjust_expectations(self, mus, pmfs, n_model_events): if self.model_statistical_uncertainty_handling == 'bb_single': source_i = self.config.get('bb_single_source') if source_i is None: raise ValueError("You need to specify bb_single_source to use bb_single_source expectation adjustment") source_i = self.base_model.get_source_i(source_i) assert pmfs.shape == n_model_events.shape # Get the number of events expected for the sources we will NOT adjust counts_per_bin = pmfs.copy() for i, (mu, _x) in enumerate(zip(mus, counts_per_bin)): if i != source_i: _x *= mu else: _x *= 0. u_bins = np.sum(counts_per_bin, axis=0) p_calibration = mus[source_i] / n_model_events[source_i].sum() a_bins = n_model_events[source_i] A_bins_1, A_bins_2 = beeston_barlow_roots(a_bins, p_calibration, u_bins, self.data_events_per_bin.histogram) assert np.all(A_bins_1 <= 0) # it seems(?) the 1st root is always negative # For U=0, the solution above is singular; we need to use a special case instead A_bins_special = (self.data_events_per_bin.histogram + a_bins) / (1. + p_calibration) A_bins = np.choose(u_bins == 0, [A_bins_2, A_bins_special]) assert np.all(0 <= A_bins) pmfs[source_i] = A_bins / A_bins.sum() mus[source_i] = A_bins.sum() * p_calibration return mus, pmfs def _compute_likelihood(self, mus, pmfs): """Return binned Poisson log likelihood :param mus: numpy array with expected rates for each source :param pmfs: array (sources, *analysis_space) of PMFs for each source in each bin """ expected_counts = pmfs.copy() for mu, _p_bin_source in zip(mus, expected_counts): _p_bin_source *= mu # Works because of numpy view magic... expected_total = np.sum(expected_counts, axis=0) observed_counts = self.data_events_per_bin.histogram ret = observed_counts * np.log(expected_total) - expected_total - gammaln(observed_counts + 1.).real return np.sum(ret)
def plot_peaks_aft_histogram(context, run_id, peaks, pe_bins=np.logspace(0, 7, 120), width_bins=np.geomspace(2, 1e5, 120), extra_labels=tuple(), rate_range=(1e-4, 1), aft_range=(0, .85), figsize=(14, 5)): """Plot side-by-side (area, width) histograms of the peak rate and mean area fraction top.""" try: md = context.run_metadata(run_id, projection=('start', 'end')) livetime_sec = (md['end'] - md['start']).total_seconds() except strax.RunMetadataNotAvailable: livetime_sec = (strax.endtime(peaks)[-1] - peaks[0]['time']) / 1e9 mh = Histdd(peaks, dimensions=(('area', pe_bins), ('range_50p_area', width_bins), ('area_fraction_top', np.linspace(0, 1, 100)))) f, axes = plt.subplots(1, 2, figsize=figsize) def std_axes(): plt.gca().set_facecolor('k') plt.yscale('log') plt.xscale('log') plt.xlabel("Area [PE]") plt.ylabel("Range 50% area [ns]") labels = [ (12, 8, "AP?", 'white'), (3, 150, "1PE\npileup", 'gray'), (30, 200, "1e", 'gray'), (100, 1000, "n-e", 'w'), (2000, 2e4, "Train", 'gray'), (1200, 50, "S1", 'w'), (45e3, 60, "αS1", 'w'), (2e5, 800, "S2", 'w'), ] + list(extra_labels) for x, w, text, color in labels: plt.text(x, w, text, color=color, verticalalignment='center', horizontalalignment='center') plt.sca(axes[0]) (mh / livetime_sec).sum(axis=2).plot(log_scale=True, vmin=rate_range[0], vmax=rate_range[1], colorbar_kwargs=dict(extend='both'), cblabel='Peaks / (bin * hour)') std_axes() plt.sca(axes[1]) mh.average(axis=2).plot(vmin=aft_range[0], vmax=aft_range[1], colorbar_kwargs=dict(extend='max'), cmap=plt.cm.jet, cblabel='Mean area fraction top') std_axes() plt.tight_layout()
df = aft_peak_cut(df) #Binning window_length = 10**8 t_bins = np.linspace(0, window_length, 201) t_bin_width = t_bins[1:] - t_bins[:-1] r_bins = np.linspace(0, (R_tpc)**2, 101) dist_bins = np.linspace(-R_tpc, R_tpc, 101) s2_bins = np.linspace(2, 6, 51) s2_p_bins = np.linspace(0, 4, 51) #Define Blank Hists livet_histogram = Histdd(bins=[ t_bins, s2_bins, ], axis_names=[ 'delta_T', 's2_area', ]) livet_weights_histogram = Histdd(bins=[ t_bins, s2_bins, ], axis_names=[ 'delta_T', 's2_area', ]) events_histogram = Histdd(bins=[ t_bins,
def setUp(self): self.m = Histdd(range=test_range_2d, bins=test_bins_2d, axis_names=['foo', 'bar'])
class TestHistdd(TestCase): def setUp(self): self.m = Histdd(range=test_range_2d, bins=test_bins_2d, axis_names=['foo', 'bar']) def test_is_instance(self): self.assertIsInstance(self.m, Histdd) def test_add_data(self): m = self.m x = [0.1, 0.8, -0.4] y = [0, 0, 0] m.add(x, y) self.assertEqual( m.histogram.tolist(), np.histogram2d(x, y, range=test_range_2d, bins=test_bins_2d)[0].tolist()) m.add(x, y) self.assertEqual(m.histogram.tolist(), (np.histogram2d( x * 2, y * 2, range=test_range_2d, bins=test_bins_2d)[0].tolist())) m.add([999, 999], [111, 111]) self.assertEqual( m.histogram.tolist(), np.histogram2d(x * 2, y * 2, range=test_range_2d, bins=test_bins_2d)[0].tolist()) def test_pandas(self): import pandas as pd m = self.m test_data = pd.DataFrame([{'foo': 0, 'bar': 0}, {'foo': 0, 'bar': 5}]) m.add(test_data) self.assertEqual( m.histogram.tolist(), np.histogram2d([0, 0], [0, 5], range=test_range_2d, bins=test_bins_2d)[0].tolist()) def test_projection(self): m = self.m x = [0.1, 0.8, -0.4] y = [0, 0, 0] m.add(x, y) p1 = m.projection(0) self.assertEqual(p1.histogram.tolist(), [1, 1, 1]) self.assertAlmostEqual( np.sum(p1.bin_edges - np.array([-1, -1 / 3, 1 / 3, 1])), 0) p2 = m.projection(1) self.assertEqual(p2.histogram.tolist(), [0, 3, 0]) self.assertAlmostEqual( np.sum(p2.bin_edges - np.array([-1, -1 / 3, 1 / 3, 1])), 0) p_2 = m.projection('bar') self.assertEqual(p2.histogram.tolist(), p_2.histogram.tolist()) self.assertEqual(p2.bin_edges.tolist(), p_2.bin_edges.tolist()) def test_cumulate(self): self.m.add([-1, 0, 1], [-10, 0, 10]) np.testing.assert_equal(self.m.histogram, np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])) np.testing.assert_equal( self.m.cumulate(0).histogram, np.array([[1, 0, 0], [1, 1, 0], [1, 1, 1]])) np.testing.assert_equal( self.m.cumulate(1).histogram, np.array([[1, 1, 1], [0, 1, 1], [0, 0, 1]])) np.testing.assert_equal( self.m.cumulate(1).histogram, self.m.cumulative_density(1).histogram) self.m.add([-1, 0, 1], [-10, 0, 10]) np.testing.assert_equal( self.m.cumulate(1).histogram, 2 * self.m.cumulative_density(1).histogram)
class TestHistdd(TestCase): def setUp(self): self.m = Histdd(range=test_range_2d, bins=test_bins_2d, axis_names=['foo', 'bar']) def test_is_instance(self): self.assertIsInstance(self.m, Histdd) def test_add_data(self): m = self.m x = [0.1, 0.8, -0.4] y = [0, 0, 0] m.add(x, y) self.assertEqual(m.histogram.tolist(), np.histogram2d(x, y, range=test_range_2d, bins=test_bins_2d)[0].tolist()) m.add(x, y) self.assertEqual(m.histogram.tolist(), (np.histogram2d(x*2, y*2, range=test_range_2d, bins=test_bins_2d)[0].tolist())) m.add([999, 999], [111, 111]) self.assertEqual(m.histogram.tolist(), np.histogram2d(x*2, y*2, range=test_range_2d, bins=test_bins_2d)[0].tolist()) def test_pandas(self): import pandas as pd m = self.m test_data = pd.DataFrame([{'foo': 0, 'bar': 0}, {'foo': 0, 'bar': 5}]) m.add(test_data) self.assertEqual(m.histogram.tolist(), np.histogram2d([0, 0], [0, 5], range=test_range_2d, bins=test_bins_2d)[0].tolist()) def test_projection(self): m = self.m x = [0.1, 0.8, -0.4] y = [0, 0, 0] m.add(x, y) p1 = m.projection(0) self.assertEqual(p1.histogram.tolist(), [1, 1, 1]) self.assertAlmostEqual(np.sum(p1.bin_edges - np.array([-1, -1/3, 1/3, 1])), 0) p2 = m.projection(1) self.assertEqual(p2.histogram.tolist(), [0, 3, 0]) self.assertAlmostEqual(np.sum(p2.bin_edges - np.array([-1, -1/3, 1/3, 1])), 0) p_2 = m.projection('bar') self.assertEqual(p2.histogram.tolist(), p_2.histogram.tolist()) self.assertEqual(p2.bin_edges.tolist(), p_2.bin_edges.tolist()) def test_cumulate(self): self.m.add([-1, 0, 1], [-10, 0, 10]) np.testing.assert_equal(self.m.histogram, np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])) np.testing.assert_equal(self.m.cumulate(0).histogram, np.array([[1, 0, 0], [1, 1, 0], [1, 1, 1]])) np.testing.assert_equal(self.m.cumulate(1).histogram, np.array([[1, 1, 1], [0, 1, 1], [0, 0, 1]])) np.testing.assert_equal(self.m.cumulate(1).histogram, self.m.cumulative_density(1).histogram) self.m.add([-1, 0, 1], [-10, 0, 10]) np.testing.assert_equal(self.m.cumulate(1).histogram, 2 * self.m.cumulative_density(1).histogram)
def set_data(self, d): LogLikelihoodBase.set_data(self, d) # Bin the data in the analysis space dimnames, bins = zip(*self.base_model.config['analysis_space']) self.data_events_per_bin = Histdd(bins=bins, axis_names=dimnames) self.data_events_per_bin.add(*self.base_model.to_analysis_dimensions(d))
def plot_peaks_aft_histogram( context, run_id, peaks, pe_bins=np.logspace(0, 7, 120), rt_bins=np.geomspace(2, 1e5, 120), extra_labels=tuple(), rate_range=(1e-4, 1), aft_range=(0, .85), figsize=(14, 5)): """Plot side-by-side (area, width) histograms of the peak rate and mean area fraction top. :param pe_bins: Array of bin edges for the peak area dimension [PE] :param rt_bins: array of bin edges for the rise time dimension [ns] :param extra_labels: List of (area, risetime, text, color) extra labels to put on the plot :param rate_range: Range of rates to show [peaks/(bin*s)] :param aft_range: Range of mean S1 area fraction top / bin to show :param figsize: Figure size to use """ livetime_sec = straxen.get_livetime_sec(context, run_id, peaks) mh = Histdd(peaks, dimensions=( ('area', pe_bins), ('range_50p_area', rt_bins), ('area_fraction_top', np.linspace(0, 1, 100)) )) f, axes = plt.subplots(1, 2, figsize=figsize) def std_axes(): plt.gca().set_facecolor('k') plt.yscale('log') plt.xscale('log') plt.xlabel("Area [PE]") plt.ylabel("Range 50% area [ns]") labels = [ (12, 8, "AP?", 'white'), (3, 150, "1PE\npileup", 'gray'), (30, 200, "1e", 'gray'), (100, 1000, "n-e", 'w'), (2000, 2e4, "Train", 'gray'), (1200, 50, "S1", 'w'), (45e3, 60, "αS1", 'w'), (2e5, 800, "S2", 'w'), ] + list(extra_labels) for x, w, text, color in labels: plt.text(x, w, text, color=color, verticalalignment='center', horizontalalignment='center') plt.sca(axes[0]) (mh / livetime_sec).sum(axis=2).plot( log_scale=True, vmin=rate_range[0], vmax=rate_range[1], colorbar_kwargs=dict(extend='both'), cblabel='Peaks / (bin * s)') std_axes() plt.sca(axes[1]) mh.average(axis=2).plot( vmin=aft_range[0], vmax=aft_range[1], colorbar_kwargs=dict(extend='max'), cmap=plt.cm.jet, cblabel='Mean area fraction top') std_axes() plt.tight_layout()
for idx, x_edge in tqdm(enumerate(xv[:-1])): for idy, y_edge in enumerate(yv[:-1]): df_xy = df.loc[(df.x_s2_tpf > xv[idx][idy]) & (df.x_s2_tpf < xv[idx][idy + 1]) & (df.y_s2_tpf > yv[idx][idy]) & (df.y_s2_tpf < yv[idx + 1][idy])] unique_s2s = pd.unique(df_xy[['s2_time', 'x_s2_tpf', 'y_s2_tpf']].values) num_events = len(unique_s2s) #Define Blank Hists dt_r2_histogram = Histdd(bins=[ t_reduced_bins, r_bins, ], axis_names=[ 'delta_T', 'r_dist', ]) xy_histogram = Histdd(bins=[ t_reduced_bins, x_bins, y_bins, ], axis_names=[ 'delta_T', 'x_p_pos', 'y_p_pos', ])