Beispiel #1
0
def generate_toy(config, N=1000, force=True, max_N=100000):
    decay_group = config.get_decay()
    amp = config.get_amplitude()

    def gen(M):
        return generate_phsp(config, M)

    all_data = []
    n_gen = 0
    n_accept = 0
    n_total = 0
    test_N = 10 * N
    while N > n_accept:
        test_N = abs(min(max_N, test_N))
        data = single_sampling(gen, amp, test_N)
        n_gen = data_shape(data)
        n_total += test_N
        n_accept += n_gen
        test_N = int(1.01 * n_total / (n_accept + 1) * (N - n_accept))
        all_data.append(data)

    ret = data_merge(*all_data)

    if force:
        cut = tf.range(data_shape(ret)) < N
        ret = data_mask(ret, cut)

    return ret
Beispiel #2
0
 def _get_bg_weight(self, data=None, bg=None, display=True):
     w_bkg = self.config["data"].get("bg_weight", 0.0)
     if not isinstance(w_bkg, list):
         w_bkg = [w_bkg] * self._Ngroup
     assert len(w_bkg) == self._Ngroup
     w_inmc = self.config["data"].get("inject_ratio", 0.0)
     if not isinstance(w_inmc, list):
         w_inmc = [w_inmc] * self._Ngroup
     assert len(w_inmc) == self._Ngroup
     weight_scale = self.config["data"].get("weight_scale", False)  # ???
     if weight_scale:
         data = data if data is not None else self.get_data("data")
         bg = bg if bg is not None else self.get_data("bg")
         tmp = []
         for wb, dt, sb in zip(w_bkg, data, bg):
             if isinstance(wb, str):
                 wb = self.data.load_weight_file(wb)
             tmp.append(wb * data_shape(dt) / data_shape(sb))
         w_bkg = tmp
         if display:
             print("background weight:", w_bkg)
     else:
         tmp = []
         for wb in w_bkg:
             if isinstance(wb, str):
                 wb = self.data.load_weight_file(wb)
             tmp.append(wb)
         w_bkg = tmp
     return w_bkg, w_inmc
Beispiel #3
0
 def get_n_data(self):
     data = self.get_data("data")
     weight = [
         data_i.get("weight", np.ones((data_shape(data_i),)))
         for data_i in data
     ]
     return [np.sum(weight_i) for weight_i in weight]
Beispiel #4
0
 def process_scale(self, idx, data):
     if idx in self.scale_list and self.dic.get("weight_scale", False):
         n_bg = data_shape(data)
         scale_factor = self.get_n_data() / n_bg
         data["weight"] = (data.get("weight", np.ones(
             (n_bg, ))) * scale_factor)
     return data
Beispiel #5
0
 def load_data(self,
               files,
               weights=None,
               weights_sign=1,
               charge=None) -> dict:
     # print(files, weights)
     if files is None:
         return None
     order = self.get_dat_order()
     p_list = self.get_particle_p()
     center_mass = self.dic.get("center_mass", True)
     r_boost = self.dic.get("r_boost", False)
     random_z = self.dic.get("random_z", False)
     npz_data = np.load(files)
     p = {
         get_particle(str(v)): npz_data[str(k)]
         for k, v in zip(p_list, order)
     }
     data = cal_angle_from_momentum(
         p,
         self.decay_struct,
         center_mass=center_mass,
         r_boost=r_boost,
         random_z=random_z,
     )
     if "weight" in npz_data:
         data["weight"] = npz_data["weight"]
     if "charge_conjugation" in npz_data:
         data["charge_conjugation"] = npz_data["charge_conjugation"]
     else:
         data["charge_conjugation"] = np.ones((data_shape(data), ))
     return data
Beispiel #6
0
 def _amp2s(data, cached_data):
     n_data = data_shape(data)
     pv = build_params_vector(dg, data)
     ret = []
     for i, j in zip(pv, cached_data):
         # print(j)
         a = tf.reshape(i, [n_data, -1] + [1] * (len(j[0].shape) - 1))
         ret.append(tf.reduce_sum(a * tf.stack(j, axis=1), axis=1))
     # print(ret)
     amp = tf.reduce_sum(ret, axis=0)
     amp2s = tf.math.real(amp * tf.math.conj(amp))
     return tf.reduce_sum(amp2s, list(range(1, len(amp2s.shape))))
Beispiel #7
0
    def cal_signal_yields(self, params={}, mcdata=None, batch=25000):
        if hasattr(params, "params"):
            params = getattr(params, "params")
        if mcdata is None:
            mcdata = self.get_data("phsp")
        amp = self.get_amplitude()
        fracs = [
            fit_fractions(amp, i, self.inv_he, params, batch) for i in mcdata
        ]
        data = self.get_data("data")
        bg = self.get_data("bg")
        if bg is None:
            N_total = [data_shape(i) for i in data]
            for i in data:
                N_data = data_shape(i)
                N_total.append((N_data, np.sqrt(N_data)))
        else:
            bg_weight, _ = self._get_bg_weight(data, bg)
            N_total = []
            for i, j, w in zip(data, bg, bg_weight):
                N_data = data_shape(i)
                N_bg = data_shape(j)
                N_total.append(
                    (N_data - w * N_bg, np.sqrt(N_data + w * w * N_bg)))

        N_sig_s = []
        for frac_e, N_e in zip(fracs, N_total):
            frac, frac_err = frac_e
            N, N_err = N_e
            N_sig = {}
            for i in frac:
                N_sig[i] = (
                    frac[i] * N,
                    np.sqrt((N * frac_err.get(i, 0.0))**2 +
                            (N_err * frac[i])**2),
                )
            N_sig_s.append(N_sig)
        return N_sig_s
Beispiel #8
0
def build_params_vector(dg, data):
    n_data = data_shape(data)
    m_dep = dg.get_m_dep(data)
    ret = []
    for i in m_dep:
        tmp = i[0]
        if tmp.shape[0] == 1:
            tmp = tf.tile(tmp, [n_data] + [1] * (len(tmp.shape) - 1))
        tmp = tf.reshape(tmp, (n_data, -1))
        for j in i[1:]:
            tmp2 = tf.reshape(j, (j.shape[0], -1))
            tmp = tf.reshape(tmp[:, :, None] * tmp2[:, None, :], (n_data, -1))
        ret.append(tmp)
    return ret
Beispiel #9
0
 def load_data(
     self, files, weights=None, weights_sign=1, charge=None
 ) -> dict:
     # print(files, weights)
     if files is None:
         return None
     order = self.get_dat_order()
     center_mass = self.dic.get("center_mass", True)
     r_boost = self.dic.get("r_boost", False)
     random_z = self.dic.get("random_z", False)
     data = prepare_data_from_decay(
         files,
         self.decay_struct,
         order,
         center_mass=center_mass,
         r_boost=r_boost,
         random_z=random_z,
     )
     if weights is not None:
         if isinstance(weights, float):
             data["weight"] = np.array(
                 [weights * weights_sign] * data_shape(data)
             )
         elif isinstance(weights, str):  # weight files
             weight = self.load_weight_file(weights)
             data["weight"] = weight[: data_shape(data)] * weights_sign
         else:
             raise TypeError(
                 "weight format error: {}".format(type(weights))
             )
     if charge is not None:
         charges = self.load_weight_file(charge)
         data["charge_conjugation"] = charges[: data_shape(data)]
     else:
         data["charge_conjugation"] = np.ones((data_shape(data),))
     return data
Beispiel #10
0
 def reweight_init_value(amp, phsp, ns=None):
     """reset decay chain total and make the integration to be ns"""
     total = [i.total for i in amp.decay_group]
     n_phsp = data_shape(phsp)
     weight = np.array(phsp.get("weight", [1] * n_phsp))
     sw = np.sum(weight)
     if ns is None:
         ns = [1] * len(total)
     elif isinstance(ns, (int, float)):
         ns = [ns / len(total)] * len(total)
     for i in total:
         i.set_rho(1.0)
     pw = amp.partial_weight(phsp)
     for i, w, ni in zip(total, pw, ns):
         i.set_rho(np.sqrt(ni / np.sum(weight * w) * sw))
Beispiel #11
0
def cached_amp(dg, data, matrix_method=build_angle_amp_matrix):

    idx, c_amp = matrix_method(dg, data)
    n_data = data_shape(data)

    @tf.function
    def _amp():
        pv = build_params_vector(dg, data)
        ret = []
        for i, j in zip(pv, c_amp):
            a = tf.reshape(i, [n_data, -1] + [1] * (len(j[0].shape) - 1))
            ret.append(tf.reduce_sum(a * tf.stack(j, axis=1), axis=1))
        # print(ret)
        amp = tf.reduce_sum(ret, axis=0)
        return amp

    return _amp
Beispiel #12
0
    def fit(
        self,
        data=None,
        phsp=None,
        bg=None,
        inmc=None,
        batch=65000,
        method="BFGS",
        check_grad=False,
        improve=False,
        reweight=False,
        maxiter=None,
    ):
        if data is None and phsp is None:
            data, phsp, bg, inmc = self.get_all_data()
            fcn = self.get_fcn(batch=batch)
        else:
            fcn = self.get_fcn([data, phsp, bg, inmc], batch=batch)
        # print("sss")
        amp = self.get_amplitude()
        print("decay chains included: ")
        for i in self.full_decay:
            ls_list = [getattr(j, "get_ls_list", lambda x: None)() for j in i]
            print("  ", i, " ls: ", *ls_list)
        if reweight:
            ConfigLoader.reweight_init_value(amp,
                                             phsp[0],
                                             ns=data_shape(data[0]))

        print("\n########### initial parameters")
        print(json.dumps(amp.get_params(), indent=2), flush=True)
        print("initial NLL: ", fcn({}))  # amp.get_params()))
        # fit configure
        # self.bound_dic[""] = (,)
        self.fit_params = fit(
            fcn=fcn,
            method=method,
            bounds_dict=self.bound_dic,
            check_grad=check_grad,
            improve=False,
            maxiter=maxiter,
        )
        if self.fit_params.hess_inv is not None:
            self.inv_he = self.fit_params.hess_inv
        return self.fit_params
Beispiel #13
0
 def sum_nll_grad_bacth(self, data):
     data_id = id(data)
     data = list(data)
     weight = [i.get("weight", tf.ones((data_shape(i), ))) for i in data]
     if data_id not in self.cached_data:
         self.cached_data[data_id] = [
             build_amp.build_angle_amp_matrix(self.Amp.decay_group, i)[1]
             for i in data
         ]
     ln_data, g_ln_data = sum_gradient_data2(
         self.cached_amp,
         self.Amp.trainable_variables,
         data,
         self.cached_data[data_id],
         weight=weight,
         trans=clip_log,
     )
     return -ln_data, [-i for i in g_ln_data]
Beispiel #14
0
 def get_n_data(self):
     data = self.get_data("data")
     weight = data.get("weight", np.ones((data_shape(data),)))
     return np.sum(weight)
Beispiel #15
0
    def nll_grad_hessian(self,
                         data,
                         mcdata,
                         weight=1.0,
                         batch=24000,
                         bg=None,
                         mc_weight=1.0):
        """
        The parameters are the same with ``self.nll()``, but it will return Hessian as well.

        :return NLL: Real number. The value of NLL.
        :return gradients: List of real numbers. The gradients for each variable.
        :return Hessian: 2-D Array of real numbers. The Hessian matrix of the variables.
        """
        data, weight = self.get_weight_data(data, weight, bg=bg)
        if isinstance(mc_weight, float):
            mc_weight = tf.convert_to_tensor([mc_weight] * data_shape(mcdata),
                                             dtype="float64")
        n_mc = tf.reduce_sum(mc_weight)
        sw = tf.reduce_sum(weight)
        ln_data, g_ln_data, h_ln_data = sum_hessian(
            self.Amp,
            split_generator(data, batch),
            self.Amp.trainable_variables,
            weight=split_generator(weight, batch),
            trans=clip_log,
        )

        # int_mc, g_int_mc, h_int_mc = sum_hessian(self.Amp, split_generator(mcdata, batch),
        #                                         self.Amp.trainable_variables, weight=split_generator(
        #        mc_weight, batch))
        if isinstance(mc_weight, float):
            mc_weight = tf.convert_to_tensor([mc_weight] * data_shape(mcdata),
                                             dtype="float64")
            mc_weight = mc_weight / tf.reduce_sum(mc_weight)
        mc_id = id(mcdata)
        if mc_id not in self.cached_int:
            self.build_cached_int(mcdata, mc_weight)
        with tf.GradientTape(persistent=True) as tape0:
            with tf.GradientTape() as tape:
                y_i = self.get_cached_int(mc_id)
            g_i = tape.gradient(y_i,
                                self.Amp.trainable_variables,
                                unconnected_gradients="zero")
        h_s_i = []
        for gi in g_i:
            # 2nd order derivative
            h_s_i.append(
                tape0.gradient(
                    gi,
                    self.Amp.trainable_variables,
                    unconnected_gradients="zero",
                ))
        del tape0
        int_mc = y_i
        g_int_mc = tf.convert_to_tensor(g_i)
        h_int_mc = tf.convert_to_tensor(h_s_i)

        n_var = len(g_ln_data)
        nll = -ln_data + sw * tf.math.log(int_mc / n_mc)
        g = -g_ln_data + sw * g_int_mc / int_mc

        g_int_mc = g_int_mc / int_mc
        g_outer = tf.reshape(g_int_mc, (-1, 1)) * tf.reshape(g_int_mc, (1, -1))

        h = -h_ln_data - sw * g_outer + sw / int_mc * h_int_mc
        return nll, g, h
Beispiel #16
0
def test_generate_phsp(toy_config):
    data = toy_config.generate_toy(1000, force=False)
    assert data_shape(data) >= 1000
    data = toy_config.generate_toy(1000)
    assert data_shape(data) == 1000