def obj_fun(tparams): """ Objective function (likelihood with priors) :param tparams: list of log-transformed parameters :return: value of negative log likelihood """ # Get parameters if not fix_R: self.R['value'] = np.exp(tparams[0]) * 100.0 self.spatial_cov.params['ell1']['value'] = np.exp( tparams[1]) * 100.0 self.spatial_cov.params['ell2']['value'] = np.exp( tparams[2]) * 100.0 n_temp_cov = len(self.temporal_cov_list) pind = 3 for i in range(n_temp_cov): self.temporal_cov_list[i].params['ell']['value'] = np.exp( tparams[pind]) pind = pind + 1 self.temporal_cov_list[i].params['sigma2']['value'] = np.exp( tparams[pind]) pind = pind + 1 if np.isscalar(self.sig2n['value']): self.sig2n['value'] = np.exp(tparams[pind]) else: self.sig2n['value'] = np.exp(tparams[pind:]) # compute log priors prior_lpdf = self.R['prior'].lpdf(self.R['value']) prior_lpdf = prior_lpdf + self.spatial_cov.params['ell1'][ 'prior'].lpdf(self.spatial_cov.params['ell1']['value']) prior_lpdf = prior_lpdf + self.spatial_cov.params['ell2'][ 'prior'].lpdf(self.spatial_cov.params['ell2']['value']) for i in range(n_temp_cov): prior_lpdf = prior_lpdf + self.temporal_cov_list[i].params[ 'ell']['prior'].lpdf( self.temporal_cov_list[i].params['ell']['value']) prior_lpdf = prior_lpdf + self.temporal_cov_list[i].params[ 'sigma2']['prior'].lpdf( self.temporal_cov_list[i].params['sigma2']['value']) if np.isscalar(self.sig2n['value']): prior_lpdf = prior_lpdf + self.sig2n['prior'].lpdf( self.sig2n['value']) else: for i in range(len(self.sig2n['prior'])): prior_lpdf = prior_lpdf + self.sig2n['prior'][i].lpdf( self.sig2n['value'][i]) # Compute likelihood try: llik = self.loglik() except np.linalg.LinAlgError as e: llik = -np.inf #print(e) nll = -1.0 * (llik + prior_lpdf) #* 1e-7 return nll
def constant_folding_einsum(formula, *args): in_formulas, out_formula = split_einsum_formula(formula) const_indices = [] node_indices = [] const_letters = set() node_letters = set() for i, (in_formula, arg) in enumerate(zip(in_formulas, args)): if is_constant(arg): const_indices.append(i) const_letters.update(in_formula) else: node_indices.append(i) node_letters.update(in_formula) const_args = [] const_in_formulas = [] indices_to_remove = [] for i in const_indices: if not node_letters.intersection(in_formulas[i]): const_args.append(args[i]) const_in_formulas.append(in_formulas[i]) indices_to_remove.append(i) elif node_letters.issuperset(in_formulas[i]) and np.all(args[i] == 1): indices_to_remove.append(i) if not indices_to_remove: return np.einsum(formula, *args) folded_constant = 1 if const_args: const_letters = frozenset(''.join(const_in_formulas)) const_out_formula = ''.join( [i for i in out_formula if i in const_letters]) folded_constant = np.einsum( '{}->{}'.format(','.join(const_in_formulas), const_out_formula), *const_args) if len(indices_to_remove) == len(in_formulas): return folded_constant retained_in_formulas = ','.join([ in_formulas[i] for i in range(len(in_formulas)) if i not in indices_to_remove ]) retained_args = [ arg for i, arg in enumerate(args) if i not in indices_to_remove ] if np.isscalar(folded_constant) and folded_constant == 0: return 0. elif np.isscalar(folded_constant) and folded_constant == 1: return np.einsum('{}->{}'.format(retained_in_formulas, out_formula), *retained_args) else: return np.einsum( '{},{}->{}'.format(const_out_formula, retained_in_formulas, out_formula), *([folded_constant] + retained_args))
def test_kinetic_energy(self): mom_resample_coeff = 1. dtype = np.float64 for n_dim in [10, 100, 1000]: mass_matrix = self.prng.normal(size=(n_dim, n_dim)) mass_matrix = mass_matrix.dot(mass_matrix.T) mass_matrix_chol = la.cholesky(mass_matrix, lower=True) sampler = uhmc.EuclideanMetricHmcSampler( energy_func=energy_func, mass_matrix=mass_matrix, energy_grad=energy_grad, prng=self.prng, mom_resample_coeff=mom_resample_coeff, dtype=dtype) pos, mom = self.prng.normal(size=( 2, n_dim, )).astype(dtype) k_energy = sampler.kinetic_energy(pos, mom, {}) assert np.isscalar(k_energy), ( 'kinetic_energy returning non-scalar value.') assert np.allclose( k_energy, 0.5 * mom.dot(la.cho_solve((mass_matrix_chol, True), mom))), ( 'kinetic_energy returning incorrect value.')
def adam(grad, init_params, subopt=None, callback=None, break_cond=None, num_iters=100, step_size=0.001, b1=0.9, b2=0.999, eps=10**-8): """Adam as described in http://arxiv.org/pdf/1412.6980.pdf. It's basically RMSprop with momentum and some correction terms.""" flattened_grad, unflatten, x = flatten_func(grad, init_params) # dynamic step sizes if np.isscalar(step_size): step_size = np.ones(num_iters) * step_size assert len(step_size) == num_iters, "step schedule needs to match num iter" m = np.zeros(len(x)) v = np.zeros(len(x)) for i in range(num_iters): g = flattened_grad(x, i) if callback: callback(unflatten(x), i, unflatten(g)) m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i + 1)) # Bias correction. vhat = v / (1 - b2**(i + 1)) x = x - step_size[i]*mhat/(np.sqrt(vhat) + eps) # do line search on last if subopt is not None: x = subopt(x, g, i) if break_cond is not None: if break_cond(x, i, g): break return unflatten(x)
def condition(args, to_shape=None): """Condition n-d args for PyCO2SYS. If NumPy can broadcast the args together, they are a valid combination, and they will be combined following NumPy broadcasting rules. All array-like args will be broadcast into the same shape. Any scalar args will be left as scalars. """ try: # check all args can be broadcast together args = {k: v for k, v in args.items() if v is not None} args_broadcast = broadcast1024(*args.values()) if to_shape is not None: try: # check args can be broadcast to to_shape, if provided broadcast1024(np.ones(to_shape), np.ones(args_broadcast.shape)) args_broadcast_shape = to_shape except ValueError: print("PyCO2SYS error: args are not broadcastable to to_shape.") return else: args_broadcast_shape = args_broadcast.shape # Broadcast the non-scalar args to a consistent shape args_conditioned = { k: np.broadcast_to(v, args_broadcast_shape) if not np.isscalar(v) else v for k, v in args.items() } # Convert to float, where needed args_conditioned = { k: np.float64(v) if k in input_floats else v for k, v in args_conditioned.items() } except ValueError: print("PyCO2SYS error: input shapes cannot be broadcast together.") return return args_conditioned
def ff(self, x, X, *params): x = np.array(x) if np.isscalar(X): X = np.ones_like(x) * X else: X = np.array(X) return 1 - np.exp(-self.Hf(x, X, *params))
def random(self, size, X, *params): dist_params = np.array(params[0:self.k_dist]) phi_params = np.array(params[self.k_dist:]) x = [] X_out = [] if type(X) == tuple: X = np.random.uniform(*X, size) for stress in np.unique(X, axis=0): life_param_mask = np.array(range( 0, len(dist_params))) == self.param_map[self.life_parameter] dist_params = np.where( life_param_mask, self.param_transform(self.phi(stress, *phi_params)), dist_params) U = np.random.uniform(0, 1, size) x.append(self.dist.qf(U, *dist_params)) if np.isscalar(stress): cols = 1 else: cols = len(stress) X_out.append((np.ones((size, cols)) * stress)) return np.array(x).flatten(), np.concatenate(X_out)
def sgd(grad, init_params, subopt=None, callback=None, break_cond=None, num_iters=200, step_size=0.1, mass=0.9): """Stochastic gradient descent with momentum. grad() must have signature grad(x, i), where i is the iteration number.""" flattened_grad, unflatten, x = flatten_func(grad, init_params) # dynamic step sizes if np.isscalar(step_size): step_size = np.ones(num_iters) * step_size assert len(step_size) == num_iters, "step schedule needs to match num iter" velocity = np.zeros(len(x)) for i in range(num_iters): g = flattened_grad(x, i) if callback: callback(unflatten(x), i, unflatten(g)) velocity = mass * velocity - (1.0 - mass) * g x = x + step_size[i] * velocity if subopt is not None: x = subopt(x, g, i) if break_cond is not None: if break_cond(x, i, g): break return unflatten(x)
def piecewiseLinear(x, xp, yp): if np.isscalar(x): if x <= xp[0]: y = yp[0] elif x >= xp[-1]: y = yp[-1] else: i = 0 while x >= xp[i + 1]: i = i + 1 y = (x - xp[i + 1]) / (xp[i] - xp[i + 1]) * yp[i] + (x - xp[i]) / ( xp[i + 1] - xp[i]) * yp[i + 1] else: n = len(x) if yp.ndim > 1: y = np.zeros((n, yp.shape[1])) else: y = np.zeros(n) i = 0 for j in range(n): if x[j] <= xp[0]: y[j] = yp[0] elif x[j] >= xp[-1]: y[j] = yp[-1] else: while x[j] >= xp[i + 1]: i = i + 1 y[j] = (x[j] - xp[i + 1]) / (xp[i] - xp[i + 1]) * yp[i] + ( x[j] - xp[i]) / (xp[i + 1] - xp[i]) * yp[i + 1] return y
def get_nat_params_from_likelihood(y, x, info): # get the natural parameters of beta from the likelihood # ie. get the coefficeints of beta and beta.T x beta assert np.shape(y)[-1] == np.shape(x)[-2] d1 = len(np.shape(y)) - 1 d2 = len(np.shape(x)) - 2 assert d1 >= d2 # extra dims n_per_dim = np.prod(np.shape(y)[d2:d1]) homosked = np.isscalar(info) # whether the errors are homoskedastic if homosked: # info is just a scalar info_x = x * info else: # info is n_t x n_t assert np.shape(info)[-1] == np.shape(info)[-2] assert np.shape(info)[-1] == np.shape(x)[-2] assert len(np.shape(info)) == len(np.shape(x)) info_x = mat_mul_last2dims(info, x) nat_param1 = matvec_mul_last2dims(info_x, y) nat_param2 = -0.5 * mat_mul_last2dims(x, info_x) * n_per_dim return nat_param1, nat_param2
def prepare_param(self, X, name): if isinstance(X, Parameter): assert X.name == name else: if np.isscalar(X): X = (X, ) X = Parameter(np.array(X, dtype=np.float), name=name, fixed=True) return X
def make_real_nvp(d, D): """ This defines a transformaiton using the Real NVP class of functions. Each transformation from D => D is defined conditionally; some set of d variables are held constant, and the other set are transformed conditioned on the first set (and a self-linear term) """ # first define the conditioning set and non-conditioning set mask = np.zeros(D) if np.isscalar(d): mask[:d] = 1 else: mask[d] = 1 # make parameter vector params = np.concatenate( [np.random.randn(D * D + D), np.random.randn(D * D + D)]) def unpack(params): W, b = params[:D * D], params[D * D:] return np.reshape(W, (D, D)), b def lfun(z, lparams): """ function from d => D-d, parameterized by lparams. implemented as a full function, and we assume the input and output are masked """ W, b = unpack(lparams) return np.tanh(np.dot(z, W) + b) def mfun(z, mparams): """ function from d => D-d, parameterized by lparams. implemented as a full function, and we assume the input and output are masked """ W, b = unpack(mparams) return np.tanh(np.dot(z, W) + b) def flow(z, params): lparams, mparams = np.split(params, 2) return mask * z + (1 - mask) * (z * np.exp(lfun(mask * z, lparams)) + mfun(mask * z, mparams)) def flow_inv(zprime, params): lparams, mparams = np.split(params, 2) first_part = mask * zprime sec_part = (1-mask) * (zprime - mfun(mask*zprime, mparams)) * \ np.exp(-lfun(mask*zprime,lparams)) return first_part + sec_part def flow_det(z, params): """ log determinant of this flow """ lparams, mparams = np.split(params, 2) diag = (1 - mask) * lfun(mask * z, lparams) if len(z.shape) > 1: return np.sum(diag, axis=1) else: return np.sum(diag) return flow, flow_det, flow_inv, params
def axisDerivative(f, x, i, epsilon=0.000001): D = x.shape[0] epsilon_matrix = np.zeros(D) epsilon_matrix[i] += epsilon assert(epsilon_matrix.shape == (D,)) assert(isScalar(epsilon)) ret = (f(x + epsilon_matrix) - f(x - epsilon_matrix)) / (2 * epsilon) assert(np.isscalar(ret)) return ret
def _rr_parcombos(par1type, par2type): """Generate all possible valid pairs of parameter type numbers excluding the input pair. """ assert isscalar(par1type) & isscalar( par2type), "Both inputs must be scalar." # Get all possible combinations of parameter type numbers allpars = list(_partypes.keys()) par1s, par2s = meshgrid(allpars, allpars) par1s = par1s.ravel() par2s = par2s.ravel() # Select only valid combinations and cut out input combination allIcases = solve.getIcase(par1s, par2s, checks=False) inputIcase = solve.getIcase(par1type, par2type, checks=False) valid = (par1s != par2s) & ~isin(allIcases, [45, 48, 58, inputIcase]) par1s = par1s[valid] par2s = par2s[valid] # Icases = pyco2.engine.getIcase(par1s, par2s, checks=True) # checks if all valid return par1s, par2s
def piecewiseConstant(x, xp, yp): if np.isscalar(xp): if np.isscalar(x): y = yp else: n = len(x) if np.isscalar(yp): y = np.ones(n) * yp else: y = np.tile(yp, (n, 1)) else: if np.isscalar(x): if x <= xp[0]: y = yp[0] elif x >= xp[-1]: y = yp[-1] else: i = 0 while x >= xp[i + 1]: i = i + 1 y = yp[i] else: n = len(x) if yp.ndim > 1: y = np.zeros((n, yp.shape[1])) else: y = np.zeros(n) i = 0 for j in range(n): if x[j] <= xp[0]: y[j] = yp[0] elif x[j] >= xp[-1]: y[j] = yp[-1] else: while x[j] >= xp[i + 1]: i = i + 1 y[j] = yp[i] return y
def fwd_grad_chooser(g, ans, gvs, vs, x, axis=None, keepdims=False): if anp.isscalar(x): return g if not keepdims: if isinstance(axis, int): ans = anp.expand_dims(ans, axis) elif isinstance(axis, tuple): for ax in sorted(axis): ans = anp.expand_dims(ans, ax) chosen_locations = x == ans return anp.sum(g * chosen_locations, axis=axis, keepdims=keepdims)
def roundrobin(par1, par2, par1type, par2type, sal, temp, pres, si, phos, pHscale, k1k2, kso4, **kwargs): """Solve the core marine carbonate system from given input parameters, then solve again from the results using every other possible combination of input pairs. """ # Check all inputs are scalar nonscalar_message = "All inputs must be scalar." assert all([isscalar(v) for k, v in locals().items() if k != "kwargs"]), nonscalar_message if "kwargs" in locals().keys(): assert all([isscalar(v) for k, v in locals()["kwargs"].items()]), nonscalar_message # Solve the MCS using the initial input pair args = (sal, temp, temp, pres, pres, si, phos, pHscale, k1k2, kso4) res0 = engine.CO2SYS(par1, par2, par1type, par2type, *args, **kwargs) # Extract the core variables res0core = hstack([res0[_partypes[i]] for i in range(1, 9)]) # Generate new inputs, all combinations par1types, par2types = _rr_parcombos(0, 0) par1s = res0core[par1types - 1] par2s = res0core[par2types - 1] # Solve the MCS again but from all combinations res = engine.CO2SYS(par1s, par2s, par1types, par2types, *args, **kwargs) # Calculate differences from original to aid comparisons nodiffs = [ "PAR1TYPE", "PAR2TYPE", "K1K2CONSTANTS", "KSO4CONSTANTS", "KSO4CONSTANT", "KFCONSTANT", "BORON", "pHSCALEIN", "buffers_mode", ] diff = {k: v - res0[k] if k not in nodiffs else v for k, v in res.items()} return res, diff
def mixture_of_ts_logprob(x, locs, iscales, pis, df): xx = np.atleast_2d(x) D = xx.shape[1] centered = xx[:, :, np.newaxis] - locs.T[np.newaxis, :, :] solved = np.einsum('ijk,lji->lki', iscales, centered) loglikes = np.reshape(tdist.logpdf(np.reshape(solved, (-1, D)), df=df), solved.shape) logprobs = np.sum(loglikes, axis=1) + np.log(pis) logprob = scpm.logsumexp(logprobs, axis=1) if np.isscalar(x) or len(x.shape) == 1: return logprob[0] else: return logprob
def test_unimplemented_falseyness(): def remove_grad_definitions(fun): grads, zero_grads = fun.grads, fun.zero_grads fun.grads, fun.zero_grads = {}, set() return grads, zero_grads def restore_grad_definitions(fun, grad_defs): fun.grads, fun.zero_grads = grad_defs grad_defs = remove_grad_definitions(np.isscalar) fun = lambda x: x**2 if np.isscalar(x) else np.sum(x) check_grads(fun, 5.) check_grads(fun, np.array([1., 2.])) restore_grad_definitions(np.isscalar, grad_defs)
def mog_logmarglike(x, means, covs, pis, ind=0): """ marginal x or y (depending on ind) """ K = pis.shape[0] xx = np.atleast_2d(x) centered = xx.T - means[:,ind,np.newaxis].T logprobs = [] for kk in xrange(K): quadterm = centered[:,kk] * centered[:,kk] * (1./covs[kk,ind,ind]) logprobsk = -.5*quadterm - .5*np.log(2*np.pi) \ -.5*np.log(covs[kk,ind,ind]) + np.log(pis[kk]) logprobs.append(np.squeeze(logprobsk)) logprobs = np.array(logprobs) logprob = scpm.logsumexp(logprobs, axis=0) if np.isscalar(x): return logprob[0] else: return logprob
def _pHfromTAVX(TA, VX, totals, k_constants, initialfunc, deltafunc): """Calculate pH from total alkalinity and DIC or one of its components using a Newton-Raphson iterative method. Although it is coded for H on the total pH scale, for the pH values occuring in seawater (pH > 6) it will be equally valid on any pH scale (H terms negligible) as long as the K Constants are on that scale. Based on the CalculatepHfromTA* functions, version 04.01, Oct 96, by Ernie Lewis. """ # First guess inspired by M13/OE15, added v1.3.0: pH_guess_args = ( TA, VX, totals["TB"], k_constants["K1"], k_constants["K2"], k_constants["KB"], ) if initial_pH_guess is None: pH = initialfunc(*pH_guess_args) else: assert np.isscalar(initial_pH_guess) pH = np.full(np.broadcast(*pH_guess_args).shape, initial_pH_guess) deltapH = 1.0 + pH_tolerance while np.any(np.abs(deltapH) >= pH_tolerance): pHdone = np.abs( deltapH) < pH_tolerance # check which rows don't need updating deltapH = deltafunc(pH, TA, VX, totals, k_constants) # the pH jump # To keep the jump from being too big: abs_deltapH = np.abs(deltapH) # Original CO2SYS-MATLAB approach is this only: deltapH = np.where(abs_deltapH > 1.0, deltapH / 2, deltapH) if not halve_big_jumps: # This is the default PyCO2SYS way - jump by 1 instead if `deltapH` > 1 abs_deltapH = np.abs(deltapH) sign_deltapH = np.sign(deltapH) deltapH = np.where(abs_deltapH > 1.0, sign_deltapH, deltapH) if update_all_pH: # Original CO2SYS-MATLAB approach, just here for testing pH = pH + deltapH # update all rows else: # This is the default PyCO2SYS way - the original is a bug pH = np.where(pHdone, pH, pH + deltapH) # only update rows that need it return pH
def mog_logprob(x, means, icovs, lndets, pis): """ compute the log likelihood according to a mixture of gaussians with means = [mu0, mu1, ... muk] icovs = [C0^-1, ..., CK^-1] lndets = ln [|C0|, ..., |CK|] pis = [pi1, ..., piK] (sum to 1) at locations given by x = [x1, ..., xN] """ xx = np.atleast_2d(x) D = xx.shape[1] centered = xx[:, :, np.newaxis] - means.T[np.newaxis, :, :] solved = np.einsum('ijk,lji->lki', icovs, centered) logprobs = - 0.5*np.sum(solved * centered, axis=1) - (D/2.)*np.log(2*np.pi) \ - 0.5*lndets + np.log(pis) logprob = scpm.logsumexp(logprobs, axis=1) if np.isscalar(x) or len(x.shape) == 1: return logprob[0] else: return logprob
def test_kinetic_energy(self): mom_resample_coeff = 1. dtype = np.float64 sampler = uhmc.IsotropicHmcSampler( energy_func=energy_func, energy_grad=energy_grad, prng=self.prng, mom_resample_coeff=mom_resample_coeff, dtype=dtype) for n_dim in [10, 100, 1000]: pos, mom = self.prng.normal(size=( 2, n_dim, )).astype(dtype) k_energy = sampler.kinetic_energy(pos, mom, {}) assert np.isscalar(k_energy), ( 'kinetic_energy returning non-scalar value.') assert np.allclose( k_energy, 0.5 * mom.dot(mom)), ('kinetic_energy returning incorrect value.')
def comp_eig_D(Ks, Kt, sig2n): """ Computes eigvecs and diagonal D for inversion of kron(Ks, Kt) + sig2n * I :param Ks: spatial covariance :param Kt: temporal covariance :param sig2n: noise variance :return: eigvec(Ks), eigvec(Kt), Dvec """ nx = Ks.shape[0] nt = Kt.shape[0] if np.isscalar(sig2n): sig2n_vec = sig2n * np.ones(nx * nt) else: sig2n_vec = np.repeat(sig2n, nt) #sig2n can be nx dimension evals_t, evec_t = np.linalg.eigh(Kt) evals_s, evec_s = np.linalg.eigh(Ks) #import scipy.linalg #evals_t, evec_t = scipy.linalg.eigh(Kt) #evals_s, evec_s = scipy.linalg.eigh(Ks) Dvec = np.repeat(evals_s, nt) * np.tile(evals_t, nx) + sig2n_vec return evec_s, evec_t, Dvec
def hf(self, x, X, *params): x = np.array(x) if np.isscalar(X): X = np.ones_like(x) * X else: X = np.array(X) dist_params = np.array(params[0:self.k_dist]) phi_params = np.array(params[self.k_dist:]) hf = np.zeros_like(x) for stress in np.unique(X, axis=0): life_param_mask = np.array(range( 0, len(dist_params))) == self.param_map[self.life_parameter] params = np.where( life_param_mask, self.param_transform(self.phi(stress, *phi_params)), dist_params) mask = (X == stress).all(axis=1) hf = np.where(mask, self.hf_dist(x, *params), hf) return hf
def is_scalar_literal(x): return np.isscalar(x) def is_scalar_node(x):
def getshape(val): val = getval(val) assert np.isscalar(val) or isinstance(val, np.ndarray), \ 'Jacobian requires input and output to be scalar- or array-valued' return np.shape(val)
def test_falseyness(): fun = lambda x: x**2 if np.isscalar(x) else np.sum(x) check_grads(fun, 5.) check_grads(fun, np.array([1., 2.]))
def fit_from_df(self, df, x=None, c=None, n=None, xl=None, xr=None, tl=None, tr=None, **fit_options): r""" The central feature to SurPyval's capability. This function aimed to have an API to mimic the simplicity of the scipy API. That is, to use a simple :code:`fit()` call, with as many or as few parameters as is needed. Parameters ---------- df : DataFrame DataFrame of data to be used to create surpyval model x : string, optional column name for the column in df containing the variable data. If not provided must provide both xl and xr c : string, optional column name for the column in df containing the censor flag of x. If not provided assumes all values of x are observed. n : string, optional column name in for the column in df containing the counts of x. If not provided assumes each x is one observation. tl : string or scalar, optional If string, column name in for the column in df containing the left truncation data. If scalar assumes each x is left truncated by that value. If not provided assumes x is not left truncated. tr : string or scalar, optional If string, column name in for the column in df containing the right truncation data. If scalar assumes each x is right truncated by that value. If not provided assumes x is not right truncated. xl : string, optional column name for the column in df containing the left interval for interval censored data. If left interval is -Inf, assumes left censored. If xl[i] == xr[i] assumes observed. Cannot be provided with x, must be provided with xr. xr : string, optional column name for the column in df containing the right interval for interval censored data. If right interval is Inf, assumes right censored. If xl[i] == xr[i] assumes observed. Cannot be provided with x, must be provided with xl. fit_options : dict, optional dictionary of fit options that will be passed to the :code:`fit` method, see that method for options. Returns ------- model : Parametric A parametric model with the fitted parameters and methods for all functions of the distribution using the fitted parameters. Examples -------- >>> import surpyval as surv >>> df = surv.datasets.BoforsSteel.df >>> model = surv.Weibull.fit_from_df(df, x='x', n='n', offset=True) >>> print(model) Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MLE Offset (gamma) : 39.76562962867477 Parameters : alpha: 7.141925216146524 beta: 2.6204524040137844 """ if not type(df) == pd.DataFrame: raise ValueError("df must be a pandas DataFrame") if (x is not None) and ((xl is not None) or (xr is not None)): raise ValueError( "Must use either `x` or `xl` and `xr`; cannot use both") if x is not None: x = df[x].astype(float) else: xl = df[xl].astype(float) xr = df[xr].astype(float) x = np.vstack([xl, xr]).T if c is not None: c = df[c].values.astype(int) if n is not None: n = df[n].values.astype(int) if tl is not None: if type(tl) == str: tl = df[tl].values.astype(float) elif np.isscalar(tl): tl = (np.ones(df.shape[0]) * tl).astype(float) else: raise ValueError('`tl` must be scalar or column label') else: tl = np.ones(df.shape[0]) * -np.inf if tr is not None: if type(tr) == str: tr = df[tr].values.astype(float) elif np.isscalar(tr): tr = (np.ones(df.shape[0]) * tr).astype(float) else: raise ValueError( "`tr` must be scalar or a string representing a column label" ) else: tr = np.ones(df.shape[0]) * np.inf t = np.vstack([tl, tr]).T return self.fit(x=x, c=c, n=n, t=t, **fit_options)
def fit( self, n_restarts=10, method='L-BFGS-B', fix_R=False, verbose=False, profile=False, options={ 'maxiter': 500, 'disp': False, 'gtol': 1e-5, 'ftol': 1e7 * np.finfo(float).eps }): # Store nll values and params over restarts nll_values = [] params = [] term_msg = [] # Get bounds from objects bounds = [] bounds.append((np.log(self.R['min'] / 100), np.log(self.R['max'] / 100))) # Bounds on R bounds.append((np.log(self.spatial_cov.params['ell1']['min'] / 100), np.log(self.spatial_cov.params['ell1']['max'] / 100))) bounds.append((np.log(self.spatial_cov.params['ell2']['min'] / 100), np.log(self.spatial_cov.params['ell2']['max'] / 100))) for i in range(len(self.temporal_cov_list)): ell_min = self.temporal_cov_list[i].params['ell']['min'] ell_max = self.temporal_cov_list[i].params['ell']['max'] sig2_min = self.temporal_cov_list[i].params['sigma2']['min'] sig2_max = self.temporal_cov_list[i].params['sigma2']['max'] bounds.append((np.log(ell_min), np.log(ell_max))) bounds.append((np.log(sig2_min), np.log(sig2_max))) if np.isscalar(self.sig2n['value']): bounds.append((np.log(self.sig2n['min']), np.log(self.sig2n['max']))) # bounds on log(sig2n) else: for i in range(len(self.sig2n['value'])): bounds.append((np.log(self.sig2n['min'][i]), np.log(self.sig2n['max'][i]))) def obj_fun(tparams): """ Objective function (likelihood with priors) :param tparams: list of log-transformed parameters :return: value of negative log likelihood """ # Get parameters if not fix_R: self.R['value'] = np.exp(tparams[0]) * 100.0 self.spatial_cov.params['ell1']['value'] = np.exp( tparams[1]) * 100.0 self.spatial_cov.params['ell2']['value'] = np.exp( tparams[2]) * 100.0 n_temp_cov = len(self.temporal_cov_list) pind = 3 for i in range(n_temp_cov): self.temporal_cov_list[i].params['ell']['value'] = np.exp( tparams[pind]) pind = pind + 1 self.temporal_cov_list[i].params['sigma2']['value'] = np.exp( tparams[pind]) pind = pind + 1 if np.isscalar(self.sig2n['value']): self.sig2n['value'] = np.exp(tparams[pind]) else: self.sig2n['value'] = np.exp(tparams[pind:]) # compute log priors prior_lpdf = self.R['prior'].lpdf(self.R['value']) prior_lpdf = prior_lpdf + self.spatial_cov.params['ell1'][ 'prior'].lpdf(self.spatial_cov.params['ell1']['value']) prior_lpdf = prior_lpdf + self.spatial_cov.params['ell2'][ 'prior'].lpdf(self.spatial_cov.params['ell2']['value']) for i in range(n_temp_cov): prior_lpdf = prior_lpdf + self.temporal_cov_list[i].params[ 'ell']['prior'].lpdf( self.temporal_cov_list[i].params['ell']['value']) prior_lpdf = prior_lpdf + self.temporal_cov_list[i].params[ 'sigma2']['prior'].lpdf( self.temporal_cov_list[i].params['sigma2']['value']) if np.isscalar(self.sig2n['value']): prior_lpdf = prior_lpdf + self.sig2n['prior'].lpdf( self.sig2n['value']) else: for i in range(len(self.sig2n['prior'])): prior_lpdf = prior_lpdf + self.sig2n['prior'][i].lpdf( self.sig2n['value'][i]) # Compute likelihood try: llik = self.loglik() except np.linalg.LinAlgError as e: llik = -np.inf #print(e) nll = -1.0 * (llik + prior_lpdf) #* 1e-7 return nll for _ in tqdm(range(n_restarts), desc="Restarts"): tparams0 = [] if fix_R: tparams0.append(np.log(self.R['value']) - np.log(100)) else: tparams0.append( np.log(self.R['prior'].sample()) - np.log(100)) # starting R tparams0.append( np.log(self.spatial_cov.params['ell1']['prior'].sample()) - np.log(100)) # starting spatial lengthscale tparams0.append( np.log(self.spatial_cov.params['ell2']['prior'].sample()) - np.log(100)) # starting spatial lengthscale for i in range(len(self.temporal_cov_list)): tparams0.append( np.log(self.temporal_cov_list[i].params['ell'] ['prior'].sample())) tparams0.append( np.log(self.temporal_cov_list[i].params['sigma2'] ['prior'].sample())) if np.isscalar(self.sig2n['value']): tparams0.append(np.log( self.sig2n['prior'].sample())) # starting sig2n else: for i in range(len(self.sig2n['value'])): tparams0.append(np.log( self.sig2n['prior'][i].sample())) # starting sig2n tparams0 = np.array(tparams0) # Profiling if profile and _ == 0: import cProfile cProfile.runctx('obj_fun(tparams0)', None, locals(), filename='objfunstats') g_obj_fun = grad(obj_fun) cProfile.runctx('g_obj_fun(tparams0)', None, locals(), filename='gradobjfunstats') return try: optrescov = scipy.optimize.minimize(obj_fun, tparams0, method=method, options=options, bounds=bounds, jac=grad(obj_fun)) tparams_fit = optrescov.x nllcov = optrescov.fun nll_values.append(nllcov) params.append(tparams_fit) term_msg.append(optrescov.message) except (ValueError, np.linalg.LinAlgError) as e: print(e) print('\nrestarting optimization...') nll_values = np.array(nll_values) if len(nll_values) < 1: print('problem with optimization!') return best_ind = np.argmin(nll_values[np.isfinite(nll_values)]) params = [ params[i] for i in range(len(nll_values)) if np.isfinite(nll_values[i]) ] if verbose: print('\nNeg log lik values across different initializations:') print(nll_values) print('Best index termination message') print(term_msg[best_ind]) if not fix_R: self.R['value'] = np.exp(params[best_ind][0]) * 100 self.spatial_cov.params['ell1']['value'] = np.exp( params[best_ind][1]) * 100 self.spatial_cov.params['ell2']['value'] = np.exp( params[best_ind][2]) * 100 pind = 3 for i in range(len(self.temporal_cov_list)): self.temporal_cov_list[i].params['ell']['value'] = np.exp( params[best_ind][pind]) pind += 1 self.temporal_cov_list[i].params['sigma2']['value'] = np.exp( params[best_ind][pind]) pind += 1 if np.isscalar(self.sig2n['value']): self.sig2n['value'] = np.exp(params[best_ind][pind]) else: self.sig2n['value'] = np.exp(params[best_ind][pind:])
def cost(x, array, y): c = circuit(0.111, array, 4.5) if not np.isscalar(c): c = c[0] # get a scalar return c + 0.5 * array[0, 0] + x - 0.4 * y
def allclose(m1, m2): if isinstance(m1, np.ndarray): return np.allclose(m1, m2) elif np.isscalar(m1): return np.isclose(m1, m2) return len(m1) == len(m2) and all(map(allclose, m1, m2))
def fit_scalar_glm(Xs, ys, model="bernoulli", mean_function="logistic", model_hypers={}, fit_intercept=True, weights=None, X_variances=None, prior=None, proximal_point=None, threshold=1e-6, step_size=1, max_iter=50, verbose=False): """ Fit a GLM with vector inputs X and scalar outputs y. The user provides the inputs, outputs, the model type (i.e. the conditional distribution of the data), and the mean function that maps linear weighted inputs to the expected value of the output. The following models are supported: - Gaussian - Bernoulli - Poisson - Negative binomial (fixed r) Arguments --------- Xs: array of shape (n, p) or list of arrays with shapes [(n_1, p), (n_2, p), ..., (n_M, p)] containing covariates for the GLM. ys: array of shape (n,) or list of arrays with shapes [(n_1,), (n_2,), ..., (n_M,)] containing the scalar outputs of the GLM. model: string specifying the conditional distribution of of the data. Currently supported values are: - "gaussian" - "bernoulli" - "poisson" - "negative binomial" mean_function: string or lambda function specifying the mapping from the projected data to the mean of the output. Currently supported values are: - "identity" - "logistic" - "exp" - "softplus" It is up to the user to make sure that the chosen mean function has the correct range for the corresponding model. For example, model="bernoulli" and mean_function="exp" will fail. model_hypers: dictionary of hyperparameters for the model. For example, the negative binomial requires an extra hyperparameter for the "number of failures". For valid values of the `model_hypers`, see ssm.regression.model_kwarg_descriptions. fit_intercept: bool specifying whether or not to fit an intercept term. If True, the output will include the weights (an array of length p), and a scalar intercept value. weights: array of shape (n,) or list of arrays with shapes [(n_1,), (n_2,), ..., (n_M,)] containing non-negative weights associated with each data point. For example, these are used when fitting mixtures of GLMs with the EM algorithm. X_variances: array of shape (n, p, p) or list of arrays with shapes [(n_1, p, p), (n_2, p, p), ..., (n_M, p, p)] containing the covariance of given covariates. These are used when the data itself is uncertain, but where we have distributions q(X) and q(y) on the inputs and outputs, respectively. (We assume X and y are independent.) In this case, Xs and ys are treated as the marginal means E[X] and E[y] respectively. To fit the GLM, we also need the marginal covariances of the inputs. These are specified here as an array of covariance matrices, or as a list of arrays of covariance matrices, one for each data point. prior: tuple of (mean, variance) of a Gaussian prior on the weights of the GLM. The mean must be a scalar or an array of shape (p,) if fit_intercept is False or (p+1,) otherwise. If scalar, it is multiplied by a vector of ones. The variance can be a positive scalar or a (p, p) or (p+1, p+1) matrix, depending again on whether fit_intercept is True. proximal_point: tuple of (array, positive scalar) for the proximal point algorithm. The array must be of shape (p,) if fit_intercept is False or (p+1,) otherwise. It specifies the current value of the parameters that we should not deviate too far from. The positive scalar specifies the inverse strength of this regularization. As this values goes to zero, the fitted value must be exactly the proximal point given in the array. Effectively, these specify an another Gaussian prior, which will multiplied with the prior above. threshold: positive scalar value specifying the mean absolute deviation in weights required for convergence. step_size: scalar value in (0, 1] specifying the linear combination of the next weights and current weights. A step size of 1 means that each iteration goes all the way to the mode of the quadratic approximation. max_iter: int, maximum number of iterations of the Newton-Raphson algorithm. verbose: bool, whether or not to print diagnostic messages. """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) p = Xs[0].shape[1] assert all([y.ndim == 1 for y in ys]) assert all([X.shape[1] == p for X in Xs]) assert all([y.shape[0] == X.shape[0] for X, y in zip(Xs, ys)]) # Check the weights. Default to all ones. if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] assert all( [weight.shape == (X.shape[0], ) for X, weight in zip(Xs, weights)]) else: weights = [np.ones(X.shape[0]) for X in Xs] # If the inputs are uncertain, the user may specify the marginal variance # of the data points. These must be an array of (p, p) covariance matrices. if X_variances is not None: X_variances = X_variances if isinstance(X_variances, (list, tuple)) else [X_variances] assert all([ X_var.shape == (X.shape[0], p, p) for X, X_var in zip(Xs, X_variances) ]) else: X_variances = [np.zeros((X.shape[0], p, p)) for X in Xs] # Add a column to X if fitting the intercept as well # Note: this could be memory intensive, but the code is a lot simpler. if fit_intercept: Xs = [np.column_stack((X, np.ones(X.shape[0]))) for X in Xs] new_X_variances = [np.zeros((X.shape[0], p + 1, p + 1)) for X in Xs] for X_var, new_X_var in zip(X_variances, new_X_variances): new_X_var[:, :p, :p] = X_var X_variances = new_X_variances p += 1 # Check the model specification model = model.lower() assert model in ("gaussian", "bernoulli", "poisson", "negative_binomial") # Initialize the prior if prior is None: prior_mean = np.zeros(p) prior_precision = np.zeros((p, p)) else: assert isinstance(prior, (tuple, list)) and len(prior) == 2 prior_mean, prior_variance = prior if np.isscalar(prior_mean): prior_mean = prior_mean * np.ones(p) else: assert prior_mean.shape == (p, ) if np.isscalar(prior_variance): assert prior_variance > 0 prior_precision = 1 / prior_variance * np.eye(p) else: assert prior_variance.shape == (p, p) prior_precision = np.linalg.inv(prior_variance) # Incorporate the proximal point into the prior, if specified. if proximal_point is not None: # Make sure the point and the regularization strength are both specified. assert isinstance(proximal_point, (tuple, list)) and len(proximal_point) == 2 point, alpha = proximal_point assert point.shape == (p, ) assert np.isscalar(alpha) and alpha > 0 # Combine the proximal point regularizer with the Gaussian prior. new_precision = prior_precision + 1 / alpha * np.eye(p) prior_mean = np.linalg.solve( new_precision, np.dot(prior_precision, prior_mean) + point / alpha) prior_precision = new_precision # Get the partition function (A) and mean function (f). # These determine the mapping from inputs to natural parameters (g). A = lambda eta: partition_functions[model](eta, **model_hypers) f = mean_functions[mean_function] if isinstance(mean_function, str) else mean_function g = lambda u: canonical_link_functions[model](f(u), **model_hypers) # Compute necessary derivatives for IRLS # When y is a scalar, these are all R^1 ->R^1 scalar functions df = elementwise_grad(f) dg = elementwise_grad(g) d2g = elementwise_grad(dg) dA = elementwise_grad(A) d2A = elementwise_grad(dA) # Construct the linear approximation for the gradient in the case of uncertain inputs h = lambda x, y, theta: g() # Initialize the weights, theta theta = np.zeros(p) dtheta = np.inf converged = False for itr in range(max_iter): if verbose: print("Iteration ", itr, "delta theta: ", dtheta) # Check convergence converged = dtheta < threshold if converged: print("Converged in ", itr, " iterations.") break # Compute the negative Hessian (J) and the gradient (h) of the objective J = prior_precision.copy() h = -np.dot(prior_precision, (theta - prior_mean)) for X, y, weight, X_var in zip(Xs, ys, weights, X_variances): # Project inputs with current parameters and get predicted values u = np.dot(X, theta) yhat = f(u) # Compute the weights G and R G = dg(u) R = d2g(u) * (yhat - y) + G**2 * d2A(g(u)) # Linearize the gradient for uncertain data H = G * (y - yhat) # dH = d2g(u) * (y - yhat) - dg(u) * df(u) dH = G * (y - yhat) - G**2 * d2A(g(u)) # nearly the same as R! # Update the negative Hessian weighted_X = X * R[:, None] * weight[:, None] J += np.dot(weighted_X.T, X) J += np.einsum('npq,n->pq', X_var, R) # Update the gradient h += np.dot(weighted_X.T, H / R) h += np.einsum('npq,n,q-> p', X_var, dH, theta) # Solve for the Newton update # (current parameters + negative Hessian^{-1} gradient) next_theta = theta + np.linalg.solve(J, h) # Check for convergence dtheta = np.mean(abs(next_theta - theta)) theta = (1 - step_size) * theta + step_size * next_theta # Output warning if terminated without convergence if not converged: warn("Newtons method failed to converge in {} iterations.".format( max_iter)) # Return the weights and intercept if necessary if fit_intercept: return theta[:-1], theta[-1] else: return theta
def isScalar(s): return np.isscalar(s)