Example #1
0
def write_hist(histfile: Path, name: str, hist: np.array,
               bin_edges: np.array) -> None:
    if histfile.exists():
        raise Exception(f"Error writing {histfile}, already exists.")
    if histfile.suffix == ".npz":
        np.savez_compressed(histfile, **{
            name: hist,
            "bin_edges": bin_edges,
        })
    elif histfile.suffix == ".txt":
        np.savetxt(
            histfile,
            hist,
            header="bin edges:\n" + str(bin_edges) + f"\n{name}:",
        )
    elif histfile.suffix == ".hdf5":
        try:
            import h5py

            with h5py.File(histfile, "w") as f:
                f.create_dataset(name,
                                 data=hist,
                                 compression="gzip",
                                 compression_opts=9)
                f.create_dataset(
                    "bin_edges",
                    data=bin_edges,
                    compression="gzip",
                    compression_opts=9,
                )
        except ModuleNotFoundError:
            raise Exception("Please install h5py to write hdf5 files")
    elif histfile.suffix == ".root":
        import uproot

        # TODO: Discard sumw2?
        if hist.ndim == 1:
            from uproot_methods.classes.TH1 import from_numpy

            h = from_numpy([hist, bin_edges])
        else:
            from uproot_methods.classes.TH2 import from_numpy

            h = from_numpy([hist, np.arange(0, hist.shape[0] + 1), bin_edges])
        with uproot.create(histfile) as f:
            f[name] = h
    else:
        raise Exception(f"Unknown output format: {histfile.suffix}")
Example #2
0
    def test_th1(self):
        from uproot_methods.classes.TH1 import Methods, _histtype, from_numpy

        edges = np.array((0., 1., 2.))
        values = np.array([2, 3])

        h = from_numpy((values, edges))

        assert h.name is None
        assert h.numbins == 2
        assert h.title == b""
        assert h.low == 0
        assert h.high == 2
        assert h.underflows == 0
        assert h.overflows == 0

        np.testing.assert_equal(h.edges, edges)
        np.testing.assert_equal(h.values, values)
        np.testing.assert_equal(h.variances, values**2)

        np.testing.assert_equal(h.alledges, [-np.inf] + list(edges) + [np.inf])
        np.testing.assert_equal(h.allvalues, [0] + list(values) + [0])
        np.testing.assert_equal(h.allvariances, [0] + list(values**2) + [0])

        np.testing.assert_equal(h.bins, ((0, 1), (1, 2)))
        np.testing.assert_equal(h.allbins,
                                ((-np.inf, 0), (0, 1), (1, 2), (2, np.inf)))

        assert h.interval(0) == (-np.inf, 0)
        assert h.interval(1) == (0, 1)
        assert h.interval(2) == (1, 2)
        assert h.interval(3) == (2, np.inf)
        assert h.interval(-1) == h.interval(3)
Example #3
0
 def test_histogram(self):
     np = TestHistogram.NUMPY_LIB
     data = np.array([2,3,4,5,6,7], dtype=np.float32)
     data[data<2] = 0
     weights = np.ones_like(data, dtype=np.float32)
     w, w2, e = self.ha.histogram_from_vector(data, weights, np.array([0,1,2,3,4,5], dtype=np.float32))
     npw, npe = np.histogram(data, np.array([0,1,2,3,4,5]))
     hr = from_numpy((w, e))
     f = uproot.recreate("test.root")
     f["hist"]  = hr
     
     data = np.random.normal(size=10000)
     data = np.array(data, dtype=np.float32)
     weights = np.ones_like(data, dtype=np.float32)
     w, w2, e = self.ha.histogram_from_vector(data, weights, np.linspace(-1,1,100, dtype=np.float32))
     hr = from_numpy((w, e))
     f["hist2"]  = hr
     f.close()
Example #4
0
def to_th1(hdict, name):
    content = np.array(hdict.contents)
    content_w2 = np.array(hdict.contents_w2)
    edges = np.array(hdict.edges)

    #remove inf/nan just in case
    content[np.isinf(content)] = 0
    content_w2[np.isinf(content_w2)] = 0

    content[np.isnan(content)] = 0
    content_w2[np.isnan(content_w2)] = 0

    #update the error bars
    centers = (edges[:-1] + edges[1:]) / 2.0
    th1 = from_numpy((content, edges))
    th1._fName = name
    th1._fSumw2 = np.array(hdict.contents_w2)
    th1._fTsumw2 = np.array(hdict.contents_w2).sum()
    th1._fTsumwx2 = np.array(hdict.contents_w2 * centers).sum()

    return th1
def save_shapes(var, hist, edges, args):
    def get_vwname(v, w):
        vwname = ''
        if 'nominal' in v:
            if 'off' in w: return ()
            elif 'nominal' in w:
                vwname = 'nominal'
            elif '_up' in w:
                vwname = w.replace('_up', 'Up').replace('wgt_', '')
            elif '_down' in w:
                vwname = w.replace('_down', 'Down').replace('wgt_', '')
        else:
            if 'nominal' not in w: return ()
            elif '_up' in v:
                vwname = v.replace('_up', 'Up')
            elif '_down' in v:
                vwname = v.replace('_down', 'Down')
        return vwname

    hist = hist[var.name]
    centers = (edges[:-1] + edges[1:]) / 2.0
    bin_columns = [c for c in hist.columns if 'bin' in c]
    sumw2_columns = [c for c in hist.columns if 'sumw2' in c]
    data_names = [n for n in hist.s.unique() if 'data' in n]
    for c in args['channels']:
        for r in args['regions']:
            out_fn = f'combine_new/shapes_{c}_{r}_{args["year"]}_{args["label"]}.root'
            out_file = uproot.recreate(out_fn)
            data_obs_hist = np.zeros(len(bin_columns), dtype=float)
            data_obs_sumw2 = np.zeros(len(sumw2_columns), dtype=float)
            for v in hist.v.unique():
                for w in hist.w.unique():
                    vwname = get_vwname(v, w)
                    if vwname == '': continue
                    if vwname == 'nominal':
                        data_obs = hist[hist.s.isin(data_names) & (hist.r == r)
                                        & (hist.c == c)]
                        data_obs_hist = data_obs[bin_columns].sum(
                            axis=0).values
                        data_obs_sumw2 = data_obs[sumw2_columns].sum(
                            axis=0).values
                    mc_hist = hist[~hist.s.isin(data_names) & (hist.v == v) &
                                   (hist.w == w) & (hist.r == r) &
                                   (hist.c == c)]
                    for s in mc_hist.s.unique():
                        if s in grouping.keys():
                            mc_hist.loc[hist.s == s, 'group'] = grouping[s]
                    mc_hist = mc_hist.groupby('group').aggregate(
                        np.sum).reset_index()
                    for g in mc_hist.group.unique():
                        histo = mc_hist[mc_hist.group ==
                                        g][bin_columns].values[0]
                        if len(histo) == 0: continue
                        sumw2 = mc_hist[mc_hist.group ==
                                        g][sumw2_columns].values[0]
                        rname = r.replace('-', '_')
                        name = f'{rname}_{g}_{vwname}'
                        th1 = from_numpy([histo, edges])
                        th1._fName = name
                        th1._fSumw2 = np.array(sumw2)
                        th1._fTsumw2 = np.array(sumw2).sum()
                        th1._fTsumwx2 = np.array(sumw2 * centers).sum()
                        out_file[f'{g}_{vwname}'] = th1
            th1_data = from_numpy([data_obs_hist, edges])
            th1_data._fName = 'data_obs'
            th1_data._fSumw2 = np.array(data_obs_sumw2)
            th1_data._fTsumw2 = np.array(data_obs_sumw2).sum()
            th1_data._fTsumwx2 = np.array(data_obs_sumw2 * centers).sum()
            out_file['data_obs'] = th1_data
            out_file.close()