def _create_intermediate_nodes(d, c, f_name, h_name, verbose=False): """Returns 'intermediate' nodes; i.e., false-alarm and hit probabilites. """ f = pm.Deterministic( f_name, (1 + T.erf((-d/2 - c)/T.sqrt(2))) / 2 ) h = pm.Deterministic( h_name, (1 + T.erf((d/2 - c)/T.sqrt(2))) / 2 ) return f, h
def get_output_for(self, input, eps=1e-7, **kwargs): x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle('x', 0) sigma = input[:, 1].dimshuffle(0, 'x') x = (x_axis - input[:, 0].dimshuffle(0, 'x')) / ( sigma * np.sqrt(2).astype('float32')) return (T.erf(x) + 1.0) / 2.0
def __init__(self, mu=0.0, sigma=1.0): super(Normal, self).__init__(mu=mu, sigma=sigma) # pdf self.pdf_ = ((1. / np.sqrt(2. * np.pi)) / self.sigma * T.exp(-(self.X - self.mu)**2 / (2. * self.sigma**2))).ravel() self.make_(self.pdf_, "pdf") # -log pdf self.nll_ = bound( T.log(self.sigma) + T.log(np.sqrt(2. * np.pi)) + (self.X - self.mu)**2 / (2. * self.sigma**2), np.inf, self.sigma > 0.).ravel() self.make_(self.nll_, "nll") # cdf self.cdf_ = 0.5 * (1. + T.erf( (self.X - self.mu) / (self.sigma * np.sqrt(2.)))).ravel() self.make_(self.cdf_, "cdf") # ppf self.ppf_ = (self.mu + np.sqrt(2.) * self.sigma * T.erfinv(2. * self.p - 1.)) self.make_(self.ppf_, "ppf", args=[self.p])
def __init__(self, random_state=None, mu=0.0, sigma=1.0): super(Normal, self).__init__(mu=mu, sigma=sigma, random_state=random_state, optimizer=None) # pdf self.pdf_ = ( (1. / np.sqrt(2. * np.pi)) / self.sigma * T.exp(-(self.X - self.mu) ** 2 / (2. * self.sigma ** 2))).ravel() self.make_(self.pdf_, "pdf") # -log pdf self.nnlf_ = bound( T.log(self.sigma) + T.log(np.sqrt(2. * np.pi)) + (self.X - self.mu) ** 2 / (2. * self.sigma ** 2), np.inf, self.sigma > 0.).ravel() self.make_(self.nnlf_, "nnlf") # cdf self.cdf_ = 0.5 * (1. + T.erf((self.X - self.mu) / (self.sigma * np.sqrt(2.)))).ravel() self.make_(self.cdf_, "cdf") # ppf self.ppf_ = (self.mu + np.sqrt(2.) * self.sigma * T.erfinv(2. * self.p - 1.)) self.make_(self.ppf_, "ppf", args=[self.p])
def __init__(self, x, mu, sigma, *args, **kwargs): super(Normal, self).__init__(*args, **kwargs) self._logp = bound(-(x - mu)**2 / (2 * sigma**2) + T.log(1 / T.sqrt(sigma**2 * 2 * np.pi)), sigma > 0) self._cdf = 0.5 * (1 + T.erf((x - mu)/(sigma*T.sqrt(2)))) self._add_expr('x', x) self._add_expr('mu', mu) self._add_expr('sigma', sigma)
def lognormal_cdf_math(x, mu, sigma, eps=1e-12): # wikipedia claims cdf is # .5 + .5 erf( log(x) - mu / sqrt(2 sigma^2)) # # the maximum is used to move negative values and 0 up to a point # where they do not cause nan or inf, but also don't contribute much # to the cdf. return 0.5 + 0.5 * tensor.erf((tensor.log(tensor.maximum(x, eps)) - mu) / tensor.sqrt(2 * sigma ** 2))
def forward_theano(self, x): abs_x = tt.abs_(x) y = tt.switch(abs_x < self.c, tt.erf(x / 2.**0.5), (((self.beta**2 - 4 * self.alpha * (self.gamma - abs_x))**0.5 - self.beta) / (2 * self.alpha)) * tt.sgn(x)) return y
def probit(x): """Probit function that ensures result is in (0, 1)""" eps = np.finfo(float).eps result = 0.5 + 0.5 * tt.erf(x / tt.sqrt(2)) result = tt.switch(tt.eq(result, 0), eps, result) result = tt.switch(tt.eq(result, 1), 1 - eps, result) return result
def theano_mu_sigma_erf(mu_erf, sigma_erf, eps=1e-7): x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle('x',0) if sigma_erf.ndim==0: sigma_erf = T.clip(sigma_erf.dimshuffle('x','x'), eps, 1) elif sigma_erf.ndim==1: sigma_erf = T.clip(sigma_erf.dimshuffle(0,'x'), eps, 1) x = (x_axis - mu_erf.dimshuffle(0,'x')) / (sigma_erf * np.sqrt(2).astype('float32')) return (T.erf(x) + 1)/2
def get_output_for(self, input, **kwargs): eps = 1e-7 x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle('x',0) # This needs to be clipped to avoid NaN's! sigma = T.exp(T.clip(input[:,1].dimshuffle(0,'x'), -10, 10)) #theano_printer.print_me_this("sigma", sigma) x = (x_axis - input[:,0].dimshuffle(0,'x')) / (sigma * np.sqrt(2).astype('float32')) return (T.erf(x) + 1)/2
def normal_cdf(x, location=0, scale=1): location = T.cast(location, theano.config.floatX) scale = T.cast(scale, theano.config.floatX) div = T.sqrt(2 * scale**2 + epsilon) div = T.cast(div, theano.config.floatX) erf_arg = (x - location) / div return .5 * (1 + T.erf(erf_arg + epsilon))
def cdf(x, location=0, scale=1): location = T.cast(location, theano.config.floatX) scale = T.cast(scale, theano.config.floatX) div = T.sqrt(2 * scale ** 2 + epsilon) div = T.cast(div, theano.config.floatX) erf_arg = (x - location) / div return .5 * (1 + T.erf(erf_arg + epsilon))
def lognormal_cdf_math(x, mu, sigma, eps=1e-12): # wikipedia claims cdf is # .5 + .5 erf( log(x) - mu / sqrt(2 sigma^2)) # # the maximum is used to move negative values and 0 up to a point # where they do not cause nan or inf, but also don't contribute much # to the cdf. return .5 + .5 * tensor.erf( (tensor.log(tensor.maximum(x, eps)) - mu) / tensor.sqrt(2 * sigma**2))
def get_output_for(self, inputs, **kwargs): """ :param inputs[0]: (batch, 600) :param inputs[1]: (batch, 1) :return: """ result = (T.erf( T.erfinv( T.clip(inputs[1].dimshuffle(0,'x',1)*2-1, -1+3e-8, 1-3e-8) ) * inputs[0].dimshuffle(0,1,'x') )+1)/2 return result[:,0,:]
def get_output_for(self, input, **kwargs): mu = input[0] sigma = input[1] x_range = T.arange(0, self.max_support).dimshuffle('x', 0) mu = T.repeat(mu, self.max_support, axis=1) sigma = T.repeat(sigma, self.max_support, axis=1) x = (x_range - mu) / (sigma * T.sqrt(2.) + 1e-16) cdf = (T.erf(x) + 1.) / 2. return cdf
def theano_mu_sigma_erf(mu_erf, sigma_erf, eps=1e-7): x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle('x', 0) if sigma_erf.ndim == 0: sigma_erf = T.clip(sigma_erf.dimshuffle('x', 'x'), eps, 1) elif sigma_erf.ndim == 1: sigma_erf = T.clip(sigma_erf.dimshuffle(0, 'x'), eps, 1) x = (x_axis - mu_erf.dimshuffle(0, 'x')) / (sigma_erf * np.sqrt(2).astype('float32')) return (T.erf(x) + 1) / 2
def get_output_for(self, input, **kwargs): eps = 1e-7 x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle('x', 0) # This needs to be clipped to avoid NaN's! sigma = T.exp(T.clip(input[:, 1].dimshuffle(0, 'x'), -10, 10)) #theano_printer.print_me_this("sigma", sigma) x = (x_axis - input[:, 0].dimshuffle(0, 'x')) / ( sigma * np.sqrt(2).astype('float32')) return (T.erf(x) + 1) / 2
def logp(self, value): tau = self.tau sd = self.sd mu = self.mu alpha = self.alpha return bound( tt.log(1 + tt.erf(( (value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2))) + (-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2., tau > 0, sd > 0)
def get_output_for(self, inputs, **kwargs): eps = 1e-7 mu_a, sigma_a, mu_b, sigma_b = inputs # Rescale input if self.trainable_scale: mu_a = mu_a * T.exp(self.W_mu[0]) + T.exp(self.b_mu[0]) sigma_a = sigma_a * T.exp(self.W_sigma[0]) + T.exp(self.b_sigma[0]) mu_b = mu_b * T.exp(self.W_mu[0]) + T.exp(self.b_mu[0]) sigma_b = sigma_b * T.exp(self.W_sigma[0]) + T.exp(self.b_sigma[0]) # Compute the distance between slices h = 0.1 # mm to cm # Compute mu for each slice pair mu_volumes = mu_a * mu_b * h # (batch, time, height) # Compute sigma for each slice pair var_a = sigma_a**2 var_b = sigma_b**2 var_volumes = (var_a * var_b + var_a * mu_b**2 + var_b * mu_a**2) * h**2 # (batch, time, height) # Compute mu and sigma per patient mu_volume_patient = np.pi / 4. * T.sum(mu_volumes, axis=2) # (batch, time) sigma_volume_patient = np.pi / 4. * T.sqrt( T.clip(T.sum(var_volumes, axis=2), eps, utils.maxfloat)) sigma_volume_patient = T.clip(sigma_volume_patient, eps, utils.maxfloat) # (batch, time) x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle( 'x', 'x', 0) x = (x_axis - mu_volume_patient.dimshuffle(0, 1, 'x')) / ( sigma_volume_patient.dimshuffle(0, 1, 'x')) prediction_matrix = (T.erf(x) + 1) / 2 # (batch, time, 600) # max because distribution of smaller one will lie higher l_systole = T.max(prediction_matrix, axis=1) l_diastole = T.min(prediction_matrix, axis=1) # (batch, 600) return T.concatenate([ l_systole.dimshuffle(0, 1, 'x'), l_diastole.dimshuffle(0, 1, 'x') ], axis=2)
def logp(self, value): tau = self.tau sd = self.sd mu = self.mu alpha = self.alpha return bound( tt.log(1 + tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2))) + (-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2., tau > 0, sd > 0)
def get_output_for(self, inputs, **kwargs): """ :param inputs[0]: (batch, 600) :param inputs[1]: (batch, 1) :return: """ result = (T.erf( T.erfinv( T.clip(inputs[1].dimshuffle(0, 'x', 1) * 2 - 1, -1 + 3e-8, 1 - 3e-8)) * inputs[0].dimshuffle(0, 1, 'x')) + 1) / 2 return result[:, 0, :]
def cdf(x, location=0, scale=1): epsilon = np.array(1e-32, dtype=theano.config.floatX) # Adapted from Breeze location = tt.cast(location, theano.config.floatX) scale = tt.cast(scale, theano.config.floatX) div = tt.sqrt(2 * scale ** 2 + epsilon) div = tt.cast(div, theano.config.floatX) erf_arg = (x - location) / div return .5 * (1 + tt.erf(erf_arg + epsilon))
def reluOfGaussian(m, v, eps=1e-8): # Gaussian approximation of the relu applied to a Gaussian # Implementation according to: # Hernandez-Lobato and Adams; Probabilistic Backpropagation for Scalable Learning of Bayesian Neural Networks pdf_norm = float(1. / (2. * np.pi)**0.5) alpha = m / T.sqrt(v + eps) alpha_inv = T.inv(alpha) alpha_div_sqrt2 = alpha * (0.5 ** 0.5) pdf_alpha = pdf_norm * T.exp(-0.5 * T.sqr(alpha)) cdf_alpha_pos = 0.5 * (1. + T.erf(alpha_div_sqrt2)) cdf_alpha_neg = 0.5 * (1. + T.erf(-alpha_div_sqrt2)) # TODO: try with 1. - cdf_alpha_pos gamma1 = pdf_alpha / cdf_alpha_pos gamma2 = -alpha - alpha_inv + 2.0 * alpha_inv ** 3. gamma = T.switch(T.ge(alpha, -10.), gamma1, gamma2) v_aux = m + T.sqrt(v + eps) * gamma m_out = cdf_alpha_pos * v_aux v_out = m_out * v_aux * cdf_alpha_neg + cdf_alpha_pos * v * (1. - gamma * (gamma + alpha)) v_out = T.maximum(v_out, eps) return m_out, v_out
def log_diff_normal_cdf(mu, sigma, x, y): """ Compute :math:`\\log(\\Phi(\frac{x - \\mu}{\\sigma}) - \\Phi(\frac{y - \\mu}{\\sigma}))` safely in log space. Parameters ---------- mu: float mean sigma: float std x: float y: float must be strictly less than x. Returns ------- log (\\Phi(x) - \\Phi(y)) """ x = (x - mu) / sigma / tt.sqrt(2.0) y = (y - mu) / sigma / tt.sqrt(2.0) # To stabilize the computation, consider these three regions: # 1) x > y > 0 => Use erf(x) = 1 - e^{-x^2} erfcx(x) and erf(y) =1 - e^{-y^2} erfcx(y) # 2) 0 > x > y => Use erf(x) = e^{-x^2} erfcx(-x) and erf(y) = e^{-y^2} erfcx(-y) # 3) x > 0 > y => Naive formula log( (erf(x) - erf(y)) / 2 ) works fine. return tt.log(0.5) + tt.switch( tt.gt(y, 0), -tt.square(y) + tt.log(tt.erfcx(y) - tt.exp(tt.square(y) - tt.square(x)) * tt.erfcx(x)), tt.switch( tt.lt(x, 0), # 0 > x > y -tt.square(x) + tt.log(tt.erfcx(-x) - tt.exp(tt.square(x) - tt.square(y)) * tt.erfcx(-y)), tt.log(tt.erf(x) - tt.erf(y)), # x >0 > y ), )
def get_output_for(self, input, **kwargs): mu = input[0] sigma = input[1] w = input[2] if self.log: sigma = T.exp(sigma) x_range = T.arange(0, 600).dimshuffle('x', 0, 'x') mu = mu.dimshuffle(0, 'x', 1) sigma = sigma.dimshuffle(0, 'x', 1) x = (x_range - mu) / (sigma * T.sqrt(2.) + 1e-16) cdf = (T.erf(x) + 1.) / 2. # (bs, 600, n_mix) cdf = T.sum(cdf * w.dimshuffle(0, 'x', 1), axis=-1) return cdf
def get_output_for(self, input, **kwargs): if input.ndim > 3: # input: (batch, time, axis, verti, horiz) # needs: (batch, time, pixels) input = input.flatten(ndim=3) eps=1e-7 clipped_input = T.clip(input, eps, 1-eps) mu = T.sum(clipped_input, axis=2).dimshuffle(0,1,'x') sigma = T.sqrt(T.sum(clipped_input * (1-clipped_input), axis=2).dimshuffle(0,1,'x') + eps) x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle('x','x',0) x = (x_axis - mu) / sigma return (T.erf(x) + 1)/2
def get_output_for(self, input, **kwargs): mu = input[0] sigma = input[1] if self.sigma_logscale: sigma = T.exp(sigma) if self.mu_logscale: mu = T.exp(mu) x_range = T.arange(0, 600).dimshuffle('x', 0) mu = T.repeat(mu, 600, axis=1) sigma = T.repeat(sigma, 600, axis=1) x = (x_range - mu) / (sigma * T.sqrt(2.) + 1e-16) cdf = (T.erf(x) + 1.) / 2. return cdf
def cdf(sample, location=0, scale=1): """Return a theano expression representing the values of the cumulative density function of a Gaussian distribution. Parameters ---------- sample : Theano variable Array of shape ``(n,)`` where ``n`` is the number of samples. location : Theano variable Scalar representing the mean of the distribution. scale : Theano variable Scalar representing the standard deviation of the distribution. Returns ------- l : Theano variable Array of shape ``(n,)`` where each entry represents the cumulative density of the corresponding sample. Examples -------- >>> import theano >>> import theano.tensor as T >>> import numpy as np >>> from breze.learn.utils import theano_floatx >>> sample, mean, std = T.vector(), T.scalar(), T.scalar() >>> c = cdf(sample, mean, std) >>> f_c = theano.function([sample, mean, std], c) >>> X, = theano_floatx(np.array([-1, 0, 1])) >>> cs = f_c(X, 0.1, 1.2) >>> np.allclose(cs, [0.17965868, 0.46679324, 0.77337265]) True """ location = T.cast(location, theano.config.floatX) scale = T.cast(scale, theano.config.floatX) div = T.sqrt(2 * scale ** 2 + epsilon) div = T.cast(div, theano.config.floatX) erf_arg = (sample - location) / div return .5 * (1 + T.erf(erf_arg + epsilon))
def cdf(sample, location=0, scale=1): """Return a theano expression representing the values of the cumulative density function of a Gaussian distribution. Parameters ---------- sample : Theano variable Array of shape ``(n,)`` where ``n`` is the number of samples. location : Theano variable Scalar representing the mean of the distribution. scale : Theano variable Scalar representing the standard deviation of the distribution. Returns ------- l : Theano variable Array of shape ``(n,)`` where each entry represents the cumulative density of the corresponding sample. Examples -------- >>> import theano >>> import theano.tensor as T >>> import numpy as np >>> from breze.learn.utils import theano_floatx >>> sample, mean, std = T.vector(), T.scalar(), T.scalar() >>> c = cdf(sample, mean, std) >>> f_c = theano.function([sample, mean, std], c) >>> X, = theano_floatx(np.array([-1, 0, 1])) >>> cs = f_c(X, 0.1, 1.2) >>> np.allclose(cs, [0.17965868, 0.46679324, 0.77337265]) True """ location = T.cast(location, theano.config.floatX) scale = T.cast(scale, theano.config.floatX) div = T.sqrt(2 * scale**2 + epsilon) div = T.cast(div, theano.config.floatX) erf_arg = (sample - location) / div return .5 * (1 + T.erf(erf_arg + epsilon))
def get_output_for(self, inputs, **kwargs): eps = 1e-7 mu_a, sigma_a, mu_b, sigma_b = inputs # Rescale input if self.trainable_scale: mu_a = mu_a * T.exp(self.W_mu[0]) + T.exp(self.b_mu[0]) sigma_a = sigma_a * T.exp(self.W_sigma[0]) + T.exp(self.b_sigma[0]) mu_b = mu_b * T.exp(self.W_mu[0]) + T.exp(self.b_mu[0]) sigma_b = sigma_b * T.exp(self.W_sigma[0]) + T.exp(self.b_sigma[0]) # Compute the distance between slices h = 0.1 # mm to cm # Compute mu for each slice pair mu_volumes = mu_a * mu_b * h # (batch, time, height) # Compute sigma for each slice pair var_a = sigma_a ** 2 var_b = sigma_b ** 2 var_volumes = (var_a * var_b + var_a * mu_b ** 2 + var_b * mu_a ** 2) * h ** 2 # (batch, time, height) # Compute mu and sigma per patient mu_volume_patient = np.pi / 4. * T.sum(mu_volumes, axis=2) # (batch, time) sigma_volume_patient = np.pi / 4. * T.sqrt(T.clip(T.sum(var_volumes, axis=2), eps, utils.maxfloat)) sigma_volume_patient = T.clip(sigma_volume_patient, eps, utils.maxfloat) # (batch, time) x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle('x', 'x', 0) x = (x_axis - mu_volume_patient.dimshuffle(0, 1, 'x')) / (sigma_volume_patient.dimshuffle(0, 1, 'x') ) prediction_matrix = (T.erf(x) + 1)/2 # (batch, time, 600) # max because distribution of smaller one will lie higher l_systole = T.max(prediction_matrix, axis=1) l_diastole = T.min(prediction_matrix, axis=1) # (batch, 600) return T.concatenate([l_systole.dimshuffle(0, 1, 'x'), l_diastole.dimshuffle(0, 1, 'x')], axis=2)
def get_output_for(self, input, **kwargs): if input.ndim > 3: # input: (batch, time, axis, verti, horiz) # needs: (batch, time, pixels) input = input.flatten(ndim=3) eps = 1e-7 clipped_input = T.clip(input, eps, 1 - eps) mu = T.sum(clipped_input, axis=2).dimshuffle(0, 1, 'x') sigma = T.sqrt( T.sum(clipped_input * (1 - clipped_input), axis=2).dimshuffle(0, 1, 'x') + eps) x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle( 'x', 'x', 0) x = (x_axis - mu) / sigma return (T.erf(x) + 1) / 2
def s_expectation_lt_thresh(self, x, thresh): """ return \int_{-inf}^{thresh} (thresh-y)*p(y|x) dy p(y | x) = gaussian with center mu(x) and variance sigma(x)**2 """ mu = self.s_mean(x) sigma = tensor.sqrt( tensor.maximum(self.s_variance(x), self.min_variance)) a = 0.5 * (mu - thresh) delta = (thresh - mu) / (sqrt(2) * sigma) sbar = sigma / sqrt(2 * pi) rval = sbar * tensor.exp(-delta**2) - a * (1 + tensor.erf(delta)) rval = tensor.maximum(rval, 1e-7) if rval.dtype != self.dtype: raise TypeError('rval dtype', rval.dtype) return rval
def s_expectation_lt_thresh(self, x, thresh): """ return \int_{-inf}^{thresh} (thresh-y)*p(y|x) dy p(y | x) = gaussian with center mu(x) and variance sigma(x)**2 """ mu = self.s_mean(x) sigma = tensor.sqrt( tensor.maximum(self.s_variance(x), self.min_variance)) a = 0.5 * (mu - thresh) delta = (thresh - mu) / (sqrt(2) * sigma) sbar = sigma / sqrt(2 * pi) rval = sbar * tensor.exp(-delta ** 2) - a * (1 + tensor.erf(delta)) rval = tensor.maximum(rval, 1e-7) if rval.dtype != self.dtype: raise TypeError('rval dtype', rval.dtype) return rval
def maxOfGaussians(m1, v1, m2, v2, eps=1e-8): # Gaussian approximation of the maximum of two Gaussians # Implementation according to: # Sinha et al.; Advances in Computation of the Maximum of a Set of Random Variables a_sqr = v1 + v2 + eps a = T.sqrt(a_sqr) alpha = (m1 - m2) / a aux_erf = T.erf(alpha * (0.5**0.5)) cdf_alpha_pos = 0.5 * (1. + aux_erf) cdf_alpha_neg = 0.5 * (1. - aux_erf) pdf_alpha = float(1. / (2. * np.pi)**0.5) * T.exp(-0.5 * T.sqr(alpha)) a_times_pdf_alpha = a * pdf_alpha m_max = m1 * cdf_alpha_pos + m2 * cdf_alpha_neg + a_times_pdf_alpha v_max = (v1 + T.sqr(m1)) * cdf_alpha_pos \ + (v2 + T.sqr(m2)) * cdf_alpha_neg \ + (m1 + m2) * a_times_pdf_alpha \ - T.sqr(m_max) + eps return m_max, v_max
def __init__(self, mu=0.0, sigma=1.0): """Constructor. Parameters ---------- * `mu` [float]: The distribution mean. * `sigma` [float]: The distribution standard deviation. """ super(Normal, self).__init__(mu=mu, sigma=sigma) # pdf self.pdf_ = ( (1. / np.sqrt(2. * np.pi)) / self.sigma * T.exp(-(self.X - self.mu) ** 2 / (2. * self.sigma ** 2))).ravel() self._make(self.pdf_, "pdf") # -log pdf self.nll_ = bound( T.log(self.sigma) + T.log(np.sqrt(2. * np.pi)) + (self.X - self.mu) ** 2 / (2. * self.sigma ** 2), np.inf, self.sigma > 0.).ravel() self._make(self.nll_, "nll") # cdf self.cdf_ = 0.5 * (1. + T.erf((self.X - self.mu) / (self.sigma * np.sqrt(2.)))).ravel() self._make(self.cdf_, "cdf") # ppf self.ppf_ = (self.mu + np.sqrt(2.) * self.sigma * T.erfinv(2. * self.p - 1.)) self._make(self.ppf_, "ppf", args=[self.p])
def norm_cdf(z): return 0.5 * (1 + tt.erf(z / np.sqrt(2)))
def cdf(z): """Cumulative distribution function via erf (Error function)""" return (numpy.float32(1) + T.erf(z)) / numpy.float32(2)
def probit(self, Y): # Probit function is acutally the CDF of the standard normal distribution N(0, 1) mu = 0 sd = 1 return 0.5 * (1 + tt.erf((Y - mu) / (sd * tt.sqrt(2))))
def Phi(x): erfarg = (x - avg) / (std * SQRT2) rval = 0.5 * (1. + T.erf(erfarg)) return rval.astype(dtype)
def phi(x, mu=0, sd=1): return 0.5 * (1 + tsr.erf((x - mu) / (sd * tsr.sqrt(2))))
def cdf(sample, mu=0, sigma=1, eps=1e-6): div = T.sqrt(2) * sigma erf_arg = (sample - mu) / div return .5 * (1 + T.erf(erf_arg))
def get_features(model, max_features=100): """ Form the original features in the model representation. Parameters ---------- model: pylearn2 Model The model. max_features: int The maximum number of features to process. Returns ------- features, stats """ def make_vec(i, V): vec, updates = theano.scan( fn=lambda x, j: T.switch(T.eq(i, j), x, 0), sequences=[V, theano.tensor.arange(V.shape[0])], outputs_info=[None]) return vec if isinstance(model, VAE): logger.info("Getting features for VAE model") means = model.prior.prior_mu sigmas = T.exp(model.prior.log_prior_sigma) idx = sigmas.argsort()[:max_features] means_matrix, updates = theano.scan(fn=lambda x: x, non_sequences=[means[idx]], n_steps=idx.shape[0]) sigmas_matrix, updates = theano.scan(make_vec, sequences=[idx], non_sequences=[sigmas]) theta0 = model.decode_theta(means_matrix) mu0, log_sigma0 = theta0 theta1 = model.decode_theta(means_matrix + 2 * sigmas_matrix) mu1, log_sigma1 = theta1 features = 1 - (0.5 * (1 + T.erf( (mu0 - mu1) / (T.exp(log_sigma1) * sqrt(2))))) stats = dict(m=means[idx], s=sigmas[idx], idx=idx) elif isinstance(model, NICE): logger.info("Getting features for NICE model") top_layer = model.encoder.layers[-1] if isinstance(top_layer, nice_mlp.Homothety): S = top_layer.D sigmas = T.exp(-S) elif isinstance(top_layer, nice_mlp.SigmaScaling): sigmas = top_layer.S top_layer = model.encoder.layers[-1] idx = sigmas.argsort()[:max_features] sigmas_matrix, updates = theano.scan(make_vec, sequences=[idx], non_sequences=[sigmas]) means_matrix = T.zeros_like(sigmas_matrix) mean_features = model.encoder.inv_fprop(means_matrix) features = (model.encoder.inv_fprop(2 * sigmas_matrix) - mean_features) stats = dict(s=sigmas[idx], idx=idx) elif isinstance(model, RBM): features = model.hidden_layer.transformer.get_params()[0].T #if isinstance(model.visible_layer, GaussianVisLayer): # X = T.eye(features.shape[0], model.visible_layer.nvis) # X -= model.visible_layer.mu # features = X.T.dot(features) stats = dict() elif isinstance(model, DBN): features, _ = get_features(model.top_rbm, max_features=max_features) stats = dict() else: raise NotImplementedError("No feature extraction for mode %s" % type(model)) return (features, stats)
def normcdf(nu, sigma, z): return 0.5 * ( 1 + TT.erf( (z-nu) / (sigma * (2**0.5) ) ) )
def probit_phi(x): """ Probit transform assuming 0 mean and 1 sd """ mu = 0;sd = 1; return 0.5 * (1 + tsr.erf((x - mu) / (sd * tsr.sqrt(2))))
def std_cdf(x): """ Calculates the standard normal cumulative distribution function. """ return 0.5 + 0.5 * tt.erf(x / tt.sqrt(2.))
def n_cdf(x): return 0.5 * (1.0 + T.erf(x / T.sqrt(2.0)))
def get_cumulative(self, z): return 0.5 * (1 + T.erf(z / T.sqrt(2)))
def normcdf(X, nu = 0, sigma=1): return 0.5 * (1 + TT.erf( (X-nu) / (sigma * 2**0.5)))
def std_cdf(x): """ Calculates the standard normal cumulative distribution function. """ return .5 + .5 * tt.erf(x / tt.sqrt(2.))
def normal_cdf(theta1, theta2, xj): """Compute the log of the cumulative density function of the normal.""" return 0.5 * (1 + tt.erf( (tt.log(xj / theta1)) / (theta2 * tt.sqrt(2))))
def get_output_for(self, input, **kwargs): x_axis = theano.shared(np.arange(0, 600, dtype='float32')).dimshuffle('x', 0) x = (x_axis - input[:,0].dimshuffle(0, 'x')) / (self.sigma * np.sqrt(2).astype('float32')) return (T.erf(x) + 1)/2
def gelu2(x): ''' similar to silu ''' return x * (tt.erf(x) + 1)
def probit_phi(x): """Probit transformation.""" mu = 0 sd = 1 return 0.5 * (1 + tsr.erf((x - mu) / (sd * tsr.sqrt(2))))
def cdf(x, miu=0.0, variance=1.0): return 1.0 / 2 * (1.0 + T.erf((x - miu) / T.sqrt(2 * variance)))