def __init__( self, model=None, data=None, mcdata=None, bg=None, fcns=None, batch=65000, gauss_constr={}, ): if fcns is None: assert model is not None, "model required" assert data is not None, "data required" assert mcdata is not None, "mcdata required" self.fcns = [] self.cached_nll = 0.0 if bg is None: bg = _loop_generator(None) for model_i, data_i, mcdata_i, bg_i in zip( model, data, mcdata, bg ): self.fcns.append(FCN(model_i, data_i, mcdata_i, bg_i)) else: self.fcns = list(fcns) self.vm = self.fcns[0].vm self.gauss_constr = GaussianConstr(self.vm, gauss_constr)
def sum_hessian( f, data, var, weight=1.0, trans=tf.identity, args=(), kwargs=None ): """ The parameters are the same with ``sum_gradient()``, but this function will return hessian as well, which is the matrix of the second-order derivative. :return: Real number NLL, list gradient, 2-D list hessian """ kwargs = kwargs if kwargs is not None else {} if isinstance(weight, float): weight = _loop_generator(weight) y_s = [] g_s = [] h_s = [] for data_i, weight_i in zip(data, weight): with tf.GradientTape(persistent=True) as tape0: with tf.GradientTape() as tape: part_y = trans(f(data_i, *args, **kwargs)) y_i = tf.reduce_sum(tf.cast(weight_i, part_y.dtype) * part_y) g_i = tape.gradient(y_i, var, unconnected_gradients="zero") h_s_i = [] for gi in g_i: # 2nd order derivative h_s_i.append(tape0.gradient(gi, var, unconnected_gradients="zero")) del tape0 y_s.append(y_i) g_s.append(g_i) h_s.append(h_s_i) nll = tf.reduce_sum(y_s) g = tf.reduce_sum(g_s, axis=0) h = tf.reduce_sum(h_s, axis=0) # h = [[sum(j) for j in zip(*i)] for i in h_s] return nll, g, h
def sum_gradient( f, data, var, weight=1.0, trans=tf.identity, args=(), kwargs=None ): """ NLL is the sum of trans(f(data)):math:`*`weight; gradient is the derivatives for each variable in ``var``. :param f: Function. The amplitude PDF. :param data: Data array :param var: List of strings. Names of the trainable variables in the PDF. :param weight: Weight factor for each data point. It's either a real number or an array of the same shape with ``data``. :param trans: Function. Transformation of ``data`` before multiplied by ``weight``. :param kwargs: Further arguments for ``f``. :return: Real number NLL, list gradient """ kwargs = kwargs if kwargs is not None else {} if isinstance(weight, float): weight = _loop_generator(weight) ys = [] gs = [] for data_i, weight_i in zip(data, weight): with tf.GradientTape() as tape: part_y = trans(f(data_i, *args, **kwargs)) y_i = tf.reduce_sum(tf.cast(weight_i, part_y.dtype) * part_y) g_i = tape.gradient(y_i, var, unconnected_gradients="zero") ys.append(y_i) gs.append(g_i) nll = sum(ys) g = list(map(sum, zip(*gs))) return nll, g
def __init__( self, model, data, mcdata, bg=None, batch=65000, gauss_constr={}, ): self.cached_nll = 0.0 assert model is not None, "model required" assert data is not None, "data required" assert mcdata is not None, "mcdata required" self.fcns = [] self.cached_nll = 0.0 if bg is None: bg = _loop_generator(None) self.datas = [] self.weight_phsps = [] self.n_datas = [] self.model = model for model_i, data_i, mcdata_i, bg_i in zip(model, data, mcdata, bg): data_s = model_i.mix_data_bakcground(data_i, bg_i) self.datas.append(data_s) weight_phsp = type(mcdata_i)({k: v for k, v in mcdata_i.items() }) # simple copy w = weight_phsp.get_weight() weight_phsp["weight"] = w / tf.reduce_sum(w) self.n_datas.append(tf.reduce_sum(data_s.get_weight())) self.weight_phsps.append(list(split_generator(weight_phsp, batch))) self.fcns.append(FCN(model_i, data_i, mcdata_i, bg_i)) self.data_merge = list(split_generator(data_merge(*self.datas), batch)) self.vm = self.model[0].vm self.gauss_constr = GaussianConstr(self.vm, gauss_constr)
def sum_hessian_new( amp, data, mcdata, weight, mcweight, var, trans=tf.math.log, w_flatmc=lambda: 0, args=(), kwargs=None, ): """ The parameters are the same with ``sum_gradient()``, but this function will return hessian as well, which is the matrix of the second-order derivative. :return: Real number NLL, list gradient, 2-D list hessian """ kwargs = kwargs if kwargs is not None else {} if isinstance(weight, float): weight = _loop_generator(weight) ys = [] ymc = [] with tf.GradientTape(persistent=True) as tape0: with tf.GradientTape() as tape: for mcdata_i, mcweight_i in zip(mcdata, mcweight): part_y = amp(mcdata_i, *args, **kwargs) y_i = tf.reduce_sum(tf.cast(mcweight_i, part_y.dtype) * part_y) ymc.append(y_i) int_dt = tf.reduce_sum(ymc) for data_i, weight_i in zip(data, weight): wmc = w_flatmc() part_y = (amp(data_i, *args, **kwargs) / int_dt + wmc) / ( 1 + wmc ) part_y = trans(part_y) y_i = tf.reduce_sum(tf.cast(weight_i, part_y.dtype) * part_y) ys.append(y_i) nll = -tf.reduce_sum(ys) gradient = tape.gradient(nll, var, unconnected_gradients="zero") hessian = [] for gi in gradient: # 2nd order derivative hessian.append(tape0.gradient(gi, var, unconnected_gradients="zero")) del tape0 return nll, gradient, hessian
def sum_gradient_new( amp, data, mcdata, weight, mcweight, var, trans=tf.math.log, w_flatmc=lambda: 0, args=(), kwargs=None, ): """ NLL is the sum of trans(f(data)):math:`*`weight; gradient is the derivatives for each variable in ``var``. :param f: Function. The amplitude PDF. :param data: Data array :param var: List of strings. Names of the trainable variables in the PDF. :param weight: Weight factor for each data point. It's either a real number or an array of the same shape with ``data``. :param trans: Function. Transformation of ``data`` before multiplied by ``weight``. :param kwargs: Further arguments for ``f``. :return: Real number NLL, list gradient """ kwargs = kwargs if kwargs is not None else {} if isinstance(weight, float): weight = _loop_generator(weight) ys = [] # gs = [] ymc = [] with tf.GradientTape() as tape: for mcdata_i, mcweight_i in zip(mcdata, mcweight): part_y = amp(mcdata_i, *args, **kwargs) y_i = tf.reduce_sum(tf.cast(mcweight_i, part_y.dtype) * part_y) ymc.append(y_i) int_dt = tf.reduce_sum(ymc) for data_i, weight_i in zip(data, weight): wmc = w_flatmc() part_y = (amp(data_i, *args, **kwargs) / int_dt + wmc) / (1 + wmc) part_y = trans(part_y) y_i = tf.reduce_sum(tf.cast(weight_i, part_y.dtype) * part_y) ys.append(y_i) nll = -tf.reduce_sum(ys) g = tape.gradient(nll, var, unconnected_gradients="zero") return nll, g