def corr_spline_grad(D, theta): ss = np.zeros(D.shape) xi = np.abs(D) * theta I = np.where(xi <= 0.2) if len(I) > 0: ss[I] = 1 - xi[I]**2 * (15 - 30 * xi[I]) I = np.where(np.logical_and(xi > 0.2, xi < 1.0)) if len(I) > 0: ss[I] = 1.25 * (1 - xi[I])**3 dr = np.zeros(D.shape) m, n = D.shape u = np.sign(D) * theta I = np.where(u <= 0.2) if len(I) > 0: dr[I] = u[I] * ((90 * xi[I] - 30) * xi[I]) I = np.where(np.logical_and(xi > 0.2, xi < 1.0)) if len(I) > 0: dr[I] = -3.75 * u[I] * (1 - xi[I]**2) for j in range(n): _ss = np.copy(ss) _ss[:, j] = dr[:, j] dr[:, j] = np.prod(_ss, axis=1) return dr
def fit(self,X,T,ranked_pairs,smoothed_pairs,ranked_pair_weights=None,smoothed_pair_weights=None): """ Fit the DSSL loss Args: X - (n_samples,n_features) ndarray: Design matrix T - (n_samples,) ndarray of: Vector of continuous timestamps ranked_pairs - (n_ranked_pairs,2) integer ndarray: Contains ranked pairs of samples. Model will try to find parameters such that score(ranked_pairs[i,0]) > score(ranked_pairs[i,1]) for all i. smoothed_pairs - (n_smoothed_pairs,2) integer ndarray: Contains pairs of samples that are close in time. Model will try to find parameters such that minimizes (score(ranked_pairs[i,0]) - score(ranked_pairs[i,1]))**2/(T(ranked_pairs[i,0]) - T(ranked_pairs[i,1]))**2 for all i. ranked_pair_weights - (n_ranked_pairs,) float ndarray: Contains sample weights for each of the ranked pairs. smoothed_pair_weights - (n_smoothed_pairs,) float ndarray: Contains sample weights for each of the smoothed pairs. """ assert X.shape[0] > 0 assert T.shape == (X.shape[0],) assert ranked_pairs is None or np.issubdtype(ranked_pairs.dtype, np.dtype(int).type) assert smoothed_pairs is None or np.issubdtype(smoothed_pairs.dtype, np.dtype(int).type) assert ranked_pairs is None or np.all(np.logical_and(ranked_pairs >= 0,ranked_pairs <= X.shape[0])) assert smoothed_pairs is None or np.all(np.logical_and(smoothed_pairs >= 0,smoothed_pairs <= X.shape[0])) assert ranked_pairs is None or np.all(ranked_pairs[:,0] != ranked_pairs[:,1]) assert smoothed_pairs is None or np.all(smoothed_pairs[:,0] != smoothed_pairs[:,1]) # get obj obj = self.get_obj(X,T,ranked_pairs,smoothed_pairs,ranked_pair_weights,smoothed_pair_weights) # get the gradient function using autograd gfun = grad(obj) # init params w0 = np.zeros(X.shape[1]) # optimize objective self.res = minimize(obj,w0,method="L-BFGS-B",jac=gfun,options={"gtol":self.gtol,"maxiter":self.maxiter,"disp":self.disp},tol=self.tol) self.set_params(self.res.x) return self
def _process_sequence(self, state_sequence): """Reduces a state sequence (for tied states), if requested. Parameters ---------- state_sequence : array_like, shape (n,) Index of the most likely states for each observation. Returns ------- reduced_sequence : array_like, shape (n,) Index of the most likely states for each observation, treating tied states are the same state. """ if self.n_tied == 0: return state_sequence reduced_sequence = np.zeros(len(state_sequence)) limits = [u*(self.n_chain) for u in range(self.n_unique+1)] for s in range(self.n_unique): reduced_sequence[np.logical_and(state_sequence >= limits[s], state_sequence < limits[s+1])] = s return reduced_sequence.astype(int)
def loglik(ts, t_start=0.0, t_end=None, eval_ts=None, phi_kernel=None, mu_kernel=None, **kwargs): phi_kernel = influence.as_influence_kernel(phi_kernel) mu_kernel = background.as_background_kernel(mu_kernel) if t_end is None: t_end = ts[-1] # as an optimisation we allow passing in an eval_ts array, # in which case t_start and t_end are ignored. if eval_ts is None: if t_end > ts[-1]: eval_ts = np.concatenate((ts[ts > t_start], [t_end])) else: eval_ts = ts[np.logical_and((ts > t_start), (ts < t_end))] lam = lam_hawkes(ts=ts, phi_kernel=phi_kernel, mu_kernel=mu_kernel, eval_ts=eval_ts, **kwargs) big_lam = big_lam_hawkes(ts=ts, phi_kernel=phi_kernel, mu_kernel=mu_kernel, t_start=t_start, eval_ts=np.array(t_end), **kwargs) # if not np.isfinite(np.sum(np.log(lam)) - big_lam): # from IPython.core.debugger import Tracer; Tracer()() return np.sum(np.log(lam)) - big_lam
def include_aligned(displacement, dcutoff, pval, pcutoff=0.05, less=True): # split data up into spatial pixels, according to the distance of the RF center from the stimulus center if less: criterion = lambda x: (x**2).sum(0) < dcutoff**2 else: criterion = lambda x: (x**2).sum(0) > dcutoff**2 return np.logical_and(criterion(displacement), pval < pcutoff)
def data_processing(y, var_distrib, cast_types=False): dtypes_dict = {'continuous': float, 'categorical': str, 'ordinal': float,\ 'bernoulli': int, 'binomial': int} p = y.shape[1] le_dict = {} df = deepcopy(y) #===========================================# # Formating the data #===========================================# # Encode non-continuous variables for col_idx, colname in enumerate(df.columns): if np.logical_and(var_distrib[col_idx] != 'continuous', var_distrib[col_idx] != 'binomial'): le = LabelEncoder() df[colname] = le.fit_transform(df[colname]) le_dict[colname] = deepcopy(le) # Feature category (cf) if cast_types: dtype = {df.columns[j]: dtypes_dict[var_distrib[j]] for j in range(p)} df = df.astype(dtype) return df, le_dict
def test_sigmoid(): x = np.linspace(-2, 2, 100) y = sigmoid(x) yidx = np.argsort(y) xidx = np.argsort(x) assert (np.all(np.logical_and(np.greater_equal(y, 0), np.less_equal(y, 1)))) assert (np.all(np.equal(yidx, xidx)))
def test_batch_transform(): X = np.random.normal(0, 5, size=(10, 2)) Y = batch_transform(X, [sigmoid, stable_exp]) assert (np.all(np.equal(X.shape, Y.shape))) assert (np.all( np.logical_and(np.greater_equal(Y[:, 0], 0), np.less_equal(Y[:, 0], 1)))) assert (np.all(np.greater_equal(Y[:, 0], 0)))
def corr_spline(D, theta): ss = np.zeros(D.shape) xi = np.abs(D) * theta I = np.where(xi <= 0.2) if len(I) > 0: ss[I] = 1 - xi[I]**2 * (15 - 30 * xi[I]) I = np.where(np.logical_and(xi > 0.2, xi < 1.0)) if len(I) > 0: ss[I] = 1.25 * (1 - xi[I])**3 r = np.prod(ss, axis=1) return r
def get_likelihood_based_interval(thetas, interval_coverage, model): ''' thetas should be a list of free parameters for the model q should be in [0,100] Returns list of thetas, starting with lowest model.eval_objective(theta) up to the qth percentile. ''' #fn_vals = [model.eval_objective(theta) for theta in thetas] #sorted_inds = np.array(np.argsort(fn_vals)) #thresh = int(np.floor(thetas.shape[0]*(q/100.0))) #return thetas[sorted_inds[:thresh]], np.array(fn_vals)[sorted_inds] fn_vals = np.array([model.eval_objective(theta) for theta in thetas]) upper = np.percentile(fn_vals, interval_coverage+(100-interval_coverage)/2) lower = np.percentile(fn_vals, (100-interval_coverage)/2) inds = np.where(np.logical_and(lower <= fn_vals, fn_vals <= upper)) return fn_vals[inds]
def getquantile(x, lower=0.025, upper=0.975, return_indices=False): """ Indicates which elements of `x` fall into a quantile range Arguments: x: `ndarray(nsamples)` lower: `0<=float<max(upper,1)`. Lower quantile upper: `min(0, lower)<float<=1`. Upper quantile return_indices: `bool`. If `False`, returns boolean array. If `True` returns indices for entries of `x` falling between `lower` and `upper`. Returns: `ndarray`. Dimensionality will depend on `return_indices` """ lb, ub = np.percentile(x, [lower * 100, upper * 100]) y = np.logical_and(np.greater_equal(x, lb), np.less(x, ub)) if return_indices: y = np.arange(x.size)[y] return y
def test_softmax(): x = np.arange(10) p = softmax(x) assert p.sum() == 1 assert np.logical_and(np.all(np.greater_equal(p, 0)), np.all(np.less_equal(p, 1)))
def spec(guesses, targets): tn = np.sum(np.logical_and(guesses==0, targets==0)) fp = np.sum(np.logical_and(guesses==1, targets==0)) return np.true_divide(tn, tn + fp)
} lfp = {} x = {} z = {} for probe in ['probeC', 'probeD']: # Load data (saved by extract_data.py) with open('%s/results/neuropixel_viz_%s_m405751.pkl' % (root_path, probe), 'rb') as f: d = pickle.load(f) x[probe] = d['x'] # (69, 2) spatial locations, microns t = d['t'] # (2500, 1) time points in seconds (in 0.4ms increments) t *= 1000. # convert to ms t_ind = np.logical_and(t >= -40.0, t <= 110.0) t = np.expand_dims(t[t_ind], 1) lfp_tmp = d[ 'y'][:, t_ind, :] # (69, 2500, 150) which is (spatial, time, trials) lfp_tmp /= 100. # Scaling # remove evoked LFP lfp_tmp = lfp_tmp - np.mean(lfp_tmp, 2, keepdims=True) lfp[probe] = lfp_tmp # Desired CSD prediction locations z[probe] = np.stack([24. * np.ones(len(csd_loc[probe])), csd_loc[probe]]).T # %% Visualize data, check for outlier trials ol_bool = {} for probe in ['probeC', 'probeD']: trial_sd = np.std(lfp[probe], axis=2, keepdims=True)
def ppv(guesses, targets): tp = np.sum(np.logical_and(guesses==1, targets==1)) fp = np.sum(np.logical_and(guesses==1, targets==0)) return np.true_divide(tp, tp + fp)
def MIAMI(y, n_clusters, r, k, init, var_distrib, nj, authorized_ranges,\ target_nb_pseudo_obs = 500, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\ dm = [], max_patience = 1): # dm: Hack to remove ''' Generates pseudo-observations from a trained M1DGMM y (numobs x p ndarray): The observations containing mixed variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable authorized_ranges (ndarray): The ranges in which the observations have to lie in target_nb_pseudo_obs (int): The number of pseudo-observations to generate it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not dm (np array): The distance matrix of the observations. If not given M1DGMM computes it ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' out = M1DGMM(y, 'auto', r, k, init, var_distrib, nj, it,\ eps, maxstep, seed, perform_selec = perform_selec,\ dm = dm, max_patience = max_patience) # Upacking the model from the M1DGMM output #best_z = out['best_z'] k = out['best_k'] r = out['best_r'] w_s = out['best_w_s'] lambda_bin = out['lambda_bin'] lambda_ord = out['lambda_ord'] lambda_categ = out['lambda_categ'] lambda_cont = out['lambda_cont'] mu_s = out['mu'] sigma_s = out['sigma'] nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli', 'binomial'])].astype(int) nj_ord = nj[var_distrib == 'ordinal'].astype(int) nj_categ = nj[var_distrib == 'categorical'].astype(int) y_std = y[:, var_distrib == 'continuous'].std(axis=0, keepdims=True) M0 = 100 # The number of z to draw S0 = np.prod(k) MM = 30 # The number of y to draw for each z #======================================================= # Data augmentation part #======================================================= # Create pseudo-observations iteratively: nb_pseudo_obs = 0 y_new_all = [] w_snorm = np.array(w_s) / np.sum(w_s) total_nb_obs_generated = 0 while nb_pseudo_obs <= target_nb_pseudo_obs: #=================================================== # Generate a batch of latent variables #=================================================== # Draw some z^{(1)} | Theta using z^{(1)} | s, Theta z = np.zeros((M0, r[0])) z0_s = multivariate_normal(size = (M0, 1), \ mean = mu_s[0].flatten(order = 'C'), cov = block_diag(*sigma_s[0])) z0_s = z0_s.reshape(M0, S0, r[0], order='C') comp_chosen = np.random.choice(S0, M0, p=w_snorm) for m in range(M0): # Dirty loop for the moment z[m] = z0_s[m, comp_chosen[m]] #=================================================== # Generate a batch of pseudo-observations #=================================================== y_bin_new = [] y_categ_new = [] y_ord_new = [] y_cont_new = [] for mm in range(MM): y_bin_new.append(draw_new_bin(lambda_bin, z, nj_bin)) y_categ_new.append(draw_new_categ(lambda_categ, z, nj_categ)) y_ord_new.append(draw_new_ord(lambda_ord, z, nj_ord)) y_cont_new.append(draw_new_cont(lambda_cont, z)) # Stack the quantities y_bin_new = np.vstack(y_bin_new) y_categ_new = np.vstack(y_categ_new) y_ord_new = np.vstack(y_ord_new) y_cont_new = np.vstack(y_cont_new) # "Destandardize" the continous data y_cont_new = y_cont_new * y_std # Put them in the right order and append them to y type_counter = {'count': 0, 'ordinal': 0,\ 'categorical': 0, 'continuous': 0} y_new = np.full((M0 * MM, y.shape[1]), np.nan) # Quite dirty: for j, var in enumerate(var_distrib): if (var == 'bernoulli') or (var == 'binomial'): y_new[:, j] = y_bin_new[:, type_counter['count']] type_counter['count'] = type_counter['count'] + 1 elif var == 'ordinal': y_new[:, j] = y_ord_new[:, type_counter[var]] type_counter[var] = type_counter[var] + 1 elif var == 'categorical': y_new[:, j] = y_categ_new[:, type_counter[var]] type_counter[var] = type_counter[var] + 1 elif var == 'continuous': y_new[:, j] = y_cont_new[:, type_counter[var]] type_counter[var] = type_counter[var] + 1 else: raise ValueError(var, 'Type not implemented') #=================================================== # Acceptation rule #=================================================== # Check that each variable is in the good range y_new_exp = np.expand_dims(y_new, 1) total_nb_obs_generated += len(y_new) mask = np.logical_and(y_new_exp >= authorized_ranges[0][np.newaxis],\ y_new_exp <= authorized_ranges[1][np.newaxis]) # Keep an observation if it lies at least into one of the ranges possibility mask = np.any(mask.mean(2) == 1, axis=1) y_new = y_new[mask] y_new_all.append(y_new) nb_pseudo_obs = len(np.concatenate(y_new_all)) # Keep target_nb_pseudo_obs pseudo-observations y_new_all = np.concatenate(y_new_all) y_new_all = y_new_all[:target_nb_pseudo_obs] y_all = np.vstack([y, y_new_all]) share_kept_pseudo_obs = len(y_new_all) / total_nb_obs_generated out['y_all'] = y_all out['share_kept_pseudo_obs'] = share_kept_pseudo_obs return (out)
anp.ravel.defjvp( lambda g, ans, gvs, vs, x, order=None: anp.ravel(g, order=order)) anp.expand_dims.defjvp( lambda g, ans, gvs, vs, x, axis: anp.expand_dims(g, axis)) anp.squeeze.defjvp(lambda g, ans, gvs, vs, x, axis=None: anp.squeeze(g, axis)) anp.diag.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.diag(g, k)) anp.flipud.defjvp(lambda g, ans, gvs, vs, x, : anp.flipud(g)) anp.fliplr.defjvp(lambda g, ans, gvs, vs, x, : anp.fliplr(g)) anp.rot90.defjvp(lambda g, ans, gvs, vs, x, k=1: anp.rot90(g, k)) anp.trace.defjvp(lambda g, ans, gvs, vs, x, offset=0: anp.trace(g, offset)) anp.full.defjvp(lambda g, ans, gvs, vs, shape, fill_value, dtype=None: anp. full(shape, g, dtype), argnum=1) anp.triu.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.triu(g, k=k)) anp.tril.defjvp(lambda g, ans, gvs, vs, x, k=0: anp.tril(g, k=k)) anp.clip.defjvp(lambda g, ans, gvs, vs, x, a_min, a_max: g * anp.logical_and( ans != a_min, ans != a_max)) anp.swapaxes.defjvp( lambda g, ans, gvs, vs, x, axis1, axis2: anp.swapaxes(g, axis1, axis2)) anp.rollaxis.defjvp( lambda g, ans, gvs, vs, a, axis, start=0: anp.rollaxis(g, axis, start)) anp.real_if_close.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex(vs, g)) anp.real.defjvp(lambda g, ans, gvs, vs, x: anp.real(g)) anp.imag.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex(vs, -1j * g)) anp.conj.defjvp(lambda g, ans, gvs, vs, x: anp.conj(g)) anp.angle.defjvp(lambda g, ans, gvs, vs, x: npg.match_complex( vs, g * anp.conj(x * 1j) / anp.abs(x)**2)) anp.where.defjvp(lambda g, ans, gvs, vs, c, x=None, y=None: anp.where( c, g, anp.zeros(anp.shape(g))), argnum=1) anp.where.defjvp(lambda g, ans, gvs, vs, c, x=None, y=None: anp.where(
def boxQP(H, g, lower, upper, x0): n = H.shape[0] clamped = np.zeros(n) free = np.ones(n) Hfree = np.zeros(n) oldvalue = 0 result = 0 nfactor = 0 clamp = lambda value: np.maximum(lower, np.minimum(upper, value)) maxIter = 100 minRelImprove = 1e-8 minGrad = 1e-8 stepDec = 0.6 minStep = 1e-22 Armijo = 0.1 if x0.shape[0] == n: x = clamp(x0) else: lu = np.array([lower, upper]) lu[np.isnan(lu)] = np.nan x = np.nanmean(lu, axis=1) value = np.dot(x.T, np.dot(H, x)) + np.dot(x.T, g) for iteration in range(maxIter): if result != 0: break if iteration > 1 and (oldvalue - value) < minRelImprove * abs(oldvalue): result = 4 logging.info("[QP info] Improvement smaller than tolerance") break oldvalue = value grad = g + np.dot(H, x) old_clamped = clamped clamped = np.zeros(n) clamped[np.logical_and(x == lower, grad > 0)] = 1 clamped[np.logical_and(x == upper, grad < 0)] = 1 free = np.logical_not(clamped) if np.all(clamped): result = 6 logging.info("[QP info] All dimensions are clamped") break if iteration == 0: factorize = True else: factorize = np.any(old_clamped != clamped) if factorize: try: if not np.all(np.allclose(H, H.T)): H = np.triu(H) Hfree = np.linalg.cholesky(H[np.ix_(free, free)]) except LinAlgError: eigs, _ = np.linalg.eig(H[np.ix_(free, free)]) print(eigs) result = -1 logging.info("[QP info] Hessian is not positive definite") break nfactor += 1 gnorm = np.linalg.norm(grad[free]) if gnorm < minGrad: result = 5 logging.info("[QP info] Gradient norm smaller than tolerance") break grad_clamped = g + np.dot(H, x*clamped) search = np.zeros(n) y = np.linalg.lstsq(Hfree.T, grad_clamped[free])[0] search[free] = -np.linalg.lstsq(Hfree, y)[0] - x[free] sdotg = np.sum(search*grad) if sdotg >= 0: print(f"[QP info] No descent direction found. Should not happen. Grad is {grad}") break # armijo linesearch step = 1 nstep = 0 xc = clamp(x + step*search) vc = np.dot(xc.T, g) + 0.5*np.dot(xc.T, np.dot(H, xc)) while (vc - oldvalue) / (step*sdotg) < Armijo: step *= stepDec nstep += 1 xc = clamp(x + step * search) vc = np.dot(xc.T, g) + 0.5 * np.dot(xc.T, np.dot(H, xc)) if step < minStep: result = 2 break # accept candidate x = xc value = vc # print(f"[QP info] Iteration {iteration}, value of the cost: {vc}") if iteration >= maxIter: result = 1 return x, result, Hfree, free
def atomsDistances(positions, cell, cutoff_radius=6.0, self_interaction=False): """ Compute the distance of every atom to its neighbors. This function computes the distances of every central atom to its neighbors. If the distances is larger than the cutoff radius, then the distances will be handled as 0. Here, periodic boundary condition is assuming true for every axis. Parameters: ----------- positions: np.ndarray Atomic positions. The size of this tensor will be (N_atoms, 3), where N_atoms is the number of atoms in the cluster. cell: np.ndarray Periodic cell, which has the size of (3, 3) cutoff_radius: float Cutoff Radius, which is a hyper parameters. The default is 6.0 Angstrom. self_interaction: boolean Default is False, which means that results will not consider the atom itself as its neighbor. Returns: ---------- distances: np.ndarray Differentialble distances array. first_atoms: np.ndarray Atoms that we observed in the cell. The np.unique of first_atoms will be np.arange of the number of atoms in the cell. second_atoms: np.ndarray Atoms that are considered as the neighbor atoms of first atoms. The distances of first_atoms and second_atoms will be computed and stored in the distances array. cell_shift_vector: np.ndarray The cell shift vector of every atom. """ # Compute reciprocal lattice vectors. inverse_cell = np.linalg.pinv(cell).T # Compute distances of cell faces. face_dist_c = 1 / np.linalg.norm(inverse_cell, axis=0) # We use a minimum bin size of 3 A bin_size = max(cutoff_radius, 3) # Compute number of bins, the minimum bin size must be [1., 1., 1.]. nbins_c = np.maximum( (face_dist_c / bin_size - (face_dist_c / bin_size) % 1), [1., 1., 1.]) nbins = np.prod(nbins_c) # Compute the number of neighbor cell that need to be search neighbor_search_x, neighbor_search_y, neighbor_search_z =\ np.ceil(bin_size * nbins_c / face_dist_c).astype(int) # Sort atoms into bins. scaled_positions_ic = np.dot(positions, inverse_cell) % 1 bin_index_ic = scaled_positions_ic * nbins_c - (scaled_positions_ic * nbins_c) % 1 # Convert Cartesian bin index to unique scalar bin index. bin_index_i = (bin_index_ic[:, 0] + nbins_c[0] * (bin_index_ic[:, 1] + nbins_c[1] * bin_index_ic[:, 2])) # atom_i contains atom index in new sort order. atom_i = np.argsort(bin_index_i) bin_index_i = bin_index_i[atom_i] # Compute the maximum number of atoms in a bin max_natoms_per_bin = np.bincount(np.int_(bin_index_i)).max() # Sort atoms into bins. The atoms_in_bin_ba contains the information about where the atoms located. atoms_in_bin_ba = -np.ones([np.int_(nbins), max_natoms_per_bin], dtype=int) for i in range(max_natoms_per_bin): # Create a mask array that identifies the first atom of each bin. mask = np.append([True], bin_index_i[:-1] != bin_index_i[1:]) # Assign all first atoms. atoms_in_bin_ba[np.int_(bin_index_i[mask]), i] = atom_i[mask] # Remove atoms that we just sorted into atoms_in_bin_ba. The next # "first" atom will be the second and so on. mask = np.logical_not(mask) atom_i = atom_i[mask] bin_index_i = bin_index_i[mask] # Create the shift list that indicates that where the cell might shift. shift = [] for x in range(-neighbor_search_x, neighbor_search_x + 1): for y in range(-neighbor_search_y, neighbor_search_y + 1): for z in range(-neighbor_search_z, neighbor_search_z + 1): shift += [[x, y, z]] # Therefore, the possible positions of neighborhood bin can be computed by the following code. neighborbin = (bin_index_ic[:, None] + np.array(shift)[None, :]) % nbins_c cell_shift = ((bin_index_ic[:, None] + np.array(shift)[None, :]) - neighborbin) / nbins_c neighborbin = neighborbin[:, :, 0] + nbins_c[0] * ( neighborbin[:, :, 1] + nbins_c[1] * neighborbin[:, :, 2]) distances = [] first_atoms = [] second_atoms = [] cell_shift_vector = [] for i in range(len(positions)): # Create a mask that indicates which neighborhood bin contains atoms. if self_interaction: mask = (atoms_in_bin_ba[np.int_(neighborbin[i])] != -1) else: mask = np.logical_and( atoms_in_bin_ba[np.int_(neighborbin[i])] != -1, atoms_in_bin_ba[np.int_(neighborbin[i])] != i) distances_vec = positions[atoms_in_bin_ba[np.int_( neighborbin[i])]] - positions[i] # the distance should consider the cell shift distances_vec = distances_vec + np.dot(cell_shift[i], cell)[:, None] # make the cell shift vector for every atom instead of every bin. _cell_shift_vector = np.repeat(cell_shift[i][:, None], max_natoms_per_bin, axis=1)[mask] distances_vec = distances_vec[mask] temp_distances = np.sum(distances_vec * distances_vec, axis=1) temp_distances = (temp_distances)**0.5 cutoff_mask = (temp_distances < cutoff_radius) _second_atoms = atoms_in_bin_ba[np.int_( neighborbin[i])][mask][cutoff_mask] _first_atoms = [i] * len(_second_atoms) _cell_shift_vector = _cell_shift_vector[cutoff_mask] first_atoms.extend(_first_atoms) second_atoms.extend(_second_atoms) distances.extend(temp_distances[cutoff_mask]) cell_shift_vector.extend(_cell_shift_vector) distances = np.array(distances) cell_shift_vector = np.array(cell_shift_vector) first_atoms = np.array(first_atoms) second_atoms = np.array(second_atoms) return distances, first_atoms, second_atoms, cell_shift_vector
def integrate_rkdp5( rhs, x_eval, x_initial, y_initial, atol=1e-12, rtol=0., step_safety_factor=0.9, step_update_factor_max=10, step_update_factor_min=2e-1, ): """ Integrate using the RKDP5(4) method. For quick intuition, consult [2] and [3]. See table 5.2 on pp. 178 of [1] or [3] for the Butcher tableau. See pp. 167-169 of [1] for automatic step size control and starting step size. Scipy's RK45 implementation in python [4] was used as a reference for this implementation. References: [1] E. Hairer, S.P. Norsett and G. Wanner, Solving Ordinary Differential Equations i. Nonstiff Problems. 2nd edition. Springer Series in Computational Mathematics, Springer-Verlag (1993) [2] https://en.wikipedia.org/wiki/Runge–Kutta_methods [3] https://en.wikipedia.org/wiki/Dormand–Prince_method [4] https://github.com/scipy/scipy/blob/master/scipy/integrate/_ivp/rk.py [5] https://math.stackexchange.com/questions/2947231/how-can-i-derive-the-dense-output-of-ode45/2948244 Arguments: atol :: float or array(N) - the absolute tolerance of the component-wise local error, i.e. "Atoli" in e.q. 4.10 on pp. 167 of [1] rhs :: (x :: float, y :: array(N)) -> dy_dx :: array(N) - the right-hand side of the equation dy_dx = rhs(x, y) that defines the first order differential equation rtol :: float or array(N) - the relative tolerance of the component-wise local error, i.e. "Rtoli" in e.q. 4.10 on pp. 167 of [1] step_safety_factor :: float - the safety multiplication factor used in the step update rule, i.e. "fac" in e.q. 4.13 on pp. 168 of [1] step_update_factor_max :: float - the maximum step multiplication factor used in the step update rule, i.e. "facmax" in e.q. 4.13 on pp. 168 of [1] step_update_factor_min :: float - the minimum step multiplication factor used in the step update rule, i.e. "facmin"in e.q.e 4.13 on pp. 168 of [1] x_eval :: ndarray (eval_count) - an array of points `x` whose corresponding `y` value should be evaluated. It is assumed that this list does not contain duplicates, that the values are sorted in increasing order, and that all values are greater than `x_initial`. x_final :: float - the final value of x (inclusive) that concludes the integration interval x_initial :: float - the initial value of x (inclusive) that begins the integration interval y_initial :: array(N) - the initial value of y Returns: y_evald :: ndarray (eval_count x N) - an array of points `y` whose corresponding `x` value is specified in x_eval """ # Determine how far to integrate to. if len(x_eval) == 0: raise ValueError("No output was specified.") else: x_final = x_eval[-1] # Compute initial step size per pp. 169 of [1]. f0 = rhs(x_initial, y_initial) d0 = rms_norm(y_initial) d1 = rms_norm(f0) if d0 < 1e-5 or d1 < 1e-5: h0 = 1e-6 else: h0 = 0.01 * d0 / d1 y1 = y_initial + h0 * f0 f1 = rhs(x_initial + h0, y1) d2 = rms_norm(f1 - f0) / h0 if anp.maximum(d1, d2) <= 1e-15: h1 = anp.maximum(1e-6, h0 * 1e-3) else: h1 = anp.power(0.01 / anp.maximum(d1, d2), 1 / (P + 1)) step_current = anp.minimum(100 * h0, h1) # Integrate. y_eval_list = list() x_current = x_initial y_current = y_initial k1 = f0 while x_current <= x_final: step_rejected = False step_accepted = False # Repeatedly attempt to move to the next position in the mesh # until the step size is adapted such that the local step error # is within an acceptable tolerance. while not step_accepted: # Attempt to step by `step_current`. ks, y1, y1h = integrate_rkdp5_step(step_current, rhs, x_current, y_current, k1=k1) # Before the step size is updated for the next step, note where # the current attempted step size places us in the mesh. x_new = x_current + step_current # Compute the local error associated with the attempted step. scale = atol + anp.maximum(anp.abs(y1), anp.abs(y1h)) * rtol error_norm = rms_norm((y1 - y1h) / scale) # If the step is accepted, increase the step size, # and move to the next step. if error_norm < 1: step_accepted = True # Avoid division by zero in update. if error_norm == 0: step_update_factor = step_update_factor_max else: step_update_factor = anp.minimum( step_update_factor_max, step_safety_factor * anp.power(error_norm, ERROR_EXP)) # Avoid an extraneous update in next step. if step_rejected: step_update_factor = anp.minimum(1, step_update_factor) step_current = step_current * step_update_factor # If the step was rejected, decrease the step size, # and reattempt the step. else: step_rejected = True step_update_factor = anp.maximum( step_update_factor_min, step_safety_factor * anp.power(error_norm, ERROR_EXP)) step_current = step_current * step_update_factor #ENDWHILE # Interpolate any output points that ocurred in the step. x_eval_step_indices = anp.nonzero( anp.logical_and(x_current <= x_eval, x_eval <= x_new))[0] x_eval_step = x_eval[x_eval_step_indices] if len(x_eval_step) != 0: y_eval_step = rkdp5_dense(ks, x_current, x_new, x_eval_step, y_current, y1) for y_eval_ in y_eval_step: y_eval_list.append(y_eval_) # Update the position in the mesh. x_current = x_new y_current = y1 k1 = ks[6] # k[6] = k7 #ENDWHILE return anp.stack(y_eval_list)
def load_celegans_network(props=np.ones((3, 4))): """" This function loads a connectome with a subsample of the entire connectome. The sub-sample is given by props. props[i,j] = proportion of neurons of category (i,j) to include category i = body position (Head = 0, Middle =1, Tail =2) category j = neuron type (Sensory = 0, Motor = 1, Interneuron =2, Poly-type =3) Besides names and positions of neurons, it outputs an array of adjacency matrix, for each type of connectivity (Synapse, electric junction and NMJ (?))""" NeuronTypeCSV = csv.reader(open('data/NeuronType.csv', 'r'), delimiter=',', skipinitialspace=True) neuron_info_all = [[] for index in range(4)] relevant_indexes = [0, 1, 2, 14] # load relevant information (names, numerica position, anatomical position and type) for row in NeuronTypeCSV: for j0, j in enumerate(relevant_indexes): neuron_info_all[j0].append(row[j].strip(' \t\n\r')) names_with_zeros = deepcopy(neuron_info_all[0]) # erase extra zeros in name for j in range(279): indZero = neuron_info_all[0][j].find('0') if (indZero >= 0 and indZero < len(neuron_info_all[0][j]) - 1): neuron_info_all[0][j] = neuron_info_all[0][j].replace('0', '') names = deepcopy(neuron_info_all[0]) xpos = np.array(neuron_info_all[1]) location = neuron_info_all[2] issensory = np.zeros(279) ismotor = np.zeros(279) isinterneuron = np.zeros(279) NeuronTypeISM = csv.reader(open('data/NeuronTypeISM.csv', 'r'), delimiter=',', skipinitialspace=True) for row in NeuronTypeISM: try: index = names.index(row[0]) words = row[2].lower() if ('sensory' in words): issensory[index] = 1 if ('motor' in words): ismotor[index] = 1 if ('interneuron' in words): isinterneuron[index] = 1 except: pass NeuronRemainingTypesISM = csv.reader(open( 'data/NeuronRemainingTypesISM.csv', 'r'), delimiter=',', skipinitialspace=True) for row in NeuronRemainingTypesISM: try: index = neuron_info_all[0].index(row[0]) words = row[1].lower() if ('sensory' in words): issensory[index] = 1 if ('motor' in words): ismotor[index] = 1 if ('interneuron' in words): isinterneuron[index] = 1 except: pass ConnectomeCSV = csv.reader(open('data/NeuronConnect.csv', 'r'), delimiter=',', skipinitialspace=True) As_weighted = np.zeros((3, 279, 279)) for row in ConnectomeCSV: try: index1 = names_with_zeros.index(row[0]) index2 = names_with_zeros.index(row[1]) if ('S' in row[2] or 'R' in row[2] or 'Sp' in row[2] or 'Rp' in row[2]): As_weighted[0, index1, index2] = As_weighted[0, index1, index2] + float( row[3]) if ('EJ' in row[2]): As_weighted[1, index1, index2] = As_weighted[1, index1, index2] + float( row[3]) if ('NMJ' in row[2]): As_weighted[2, index1, index2] = As_weighted[2, index1, index2] + float( row[3]) except: pass As = (As_weighted > 0).astype(int) ind_type = [[] for _ in range(4)] # 0=sensory,motor,interneuron,poly ind_type[0] = np.where( np.logical_and( np.logical_and(issensory.astype(bool), (1 - ismotor).astype(bool)), (1 - isinterneuron).astype(bool)))[0] ind_type[1] = np.where( np.logical_and( np.logical_and((1 - issensory).astype(bool), ismotor.astype(bool)), (1 - isinterneuron).astype(bool)))[0] ind_type[2] = np.where( np.logical_and( np.logical_and((1 - issensory).astype(bool), (1 - ismotor).astype(bool)), isinterneuron.astype(bool)))[0] ind_type[3] = np.where(issensory + ismotor + isinterneuron >= 2)[0] # Head, Middle, Tail ind_pos = [[] for _ in range(3)] ind_pos[0] = [i for i, j in enumerate(location) if j == 'H'] ind_pos[1] = [i for i, j in enumerate(location) if j == 'M'] ind_pos[2] = [i for i, j in enumerate(location) if j == 'T'] ind_type_pos_number = np.zeros((3, 4)) ind_type_pos = [[] for _ in range(3)] for j in range(3): ind_type_pos[j] = [[] for _ in range(4)] for i in range(4): for j in range(3): ind_type_pos[j][i] = [ val for val in ind_pos[j] if val in ind_type[i] ] ind_type_pos_number[j, i] = len(ind_type_pos[j][i]) ind_neuron_subsampled = [[] for _ in range(3) for _ in range(4)] for j in range(3): ind_neuron_subsampled[j] = [[] for _ in range(4)] for i in range(4): for j in range(3): try: ind_neuron_subsampled[j][i] = np.random.choice( ind_type_pos[j][i], np.floor(ind_type_pos_number[j, i] * props[j, i]).astype(int), replace=False) except: ind_neuron_subsampled[j][i] = [] ind_neuron_subsampled = np.sort( np.concatenate([ np.concatenate(ind_neuron_subsampled[j][:], axis=0) for j in range(3) ]).astype(int)) As = As[np.ix_(range(3), ind_neuron_subsampled, ind_neuron_subsampled)] xpos = np.array(deepcopy(xpos[ind_neuron_subsampled]).astype(float)) names = [j for j0, j in enumerate(names) if j0 in ind_neuron_subsampled] return As, names, xpos
def MIAMI(y, n_clusters, r, k, init, var_distrib, nj, authorized_ranges,\ target_nb_pseudo_obs = 500, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\ dm = [], max_patience = 1): # dm: Hack to remove ''' Complete the missing values using a trained M1DGMM y (numobs x p ndarray): The observations containing mixed variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise target_nb_pseudo_obs (int): The number of pseudo-observations to generate it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not dm (np array): The distance matrix of the observations. If not given M1DGMM computes it n_neighbors (int): The number of neighbors to use for NA imputation ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' # !!! Hack cols = y.columns # Formatting if not isinstance(y, np.ndarray): y = np.asarray(y) assert len(k) < 2 # Not implemented for deeper MDGMM for the moment out = M1DGMM(y, n_clusters, r, k, init, var_distrib, nj, it,\ eps, maxstep, seed, perform_selec = perform_selec,\ dm = dm, max_patience = max_patience, use_silhouette = True) # Compute the associations vars_contributions(pd.DataFrame(y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \ title = 'Contribution of the variables to the latent dimensions',\ storage_path = None) # Upacking the model from the M1DGMM output p = y.shape[1] k = out['best_k'] r = out['best_r'] mu = out['mu'][0] sigma = out['sigma'][0] w = out['best_w_s'] #eta = out['eta'][0] #Ez_y = out['Ez.y'] lambda_bin = np.array(out['lambda_bin']) lambda_ord = out['lambda_ord'] lambda_categ = out['lambda_categ'] lambda_cont = np.array(out['lambda_cont']) nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli', 'binomial'])].astype(int) nj_ord = nj[var_distrib == 'ordinal'].astype(int) nj_categ = nj[var_distrib == 'categorical'].astype(int) y_std = y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\ keepdims = True) nb_points = 200 # Bloc de contraintes ''' is_constrained = np.isfinite(authorized_ranges).any(1)[0] is_min_constrained = np.isfinite(authorized_ranges[0])[0] is_max_constrained = np.isfinite(authorized_ranges[1])[0] is_continuous = (var_distrib == 'continuous') | (var_distrib == 'binomial') min_unconstrained_cont = is_continuous & ~is_min_constrained max_unconstrained_cont = is_continuous & ~is_max_constrained authorized_ranges[0] = np.where(min_unconstrained_cont, np.min(y, 0), authorized_ranges[0]) authorized_ranges[1] = np.where(max_unconstrained_cont, np.max(y, 0), authorized_ranges[1]) ''' #from scipy.stats import norm ''' #============================================== # Constraints determination #============================================== # Force to stay in the support for binomial and continuous variables #authorized_ranges = np.expand_dims(np.stack([[-np.inf,np.inf] for var in var_distrib]).T, 1) #authorized_ranges[:, 0, 8] = [0, 0] # Of more than 60 years old #authorized_ranges[:, 0, 0] = [-np.inf, np.inf] # Of more than 60 years old # Look for the constrained variables #authorized_ranges[:,:,0] = np.array([[-np.inf],[np.inf]]) is_constrained = np.isfinite(authorized_ranges).any(1)[0] #bbox = np.dstack([Ez_y.min(0),Ez_y.max(0)]) #bbox * np.array([0.6, 1.4]) proba_min = 1E-3 proba = proba_min epsilon = 1E-12 best_A = [] best_b = [] is_solution = True while is_solution: b = []#np.array([]) A = []#np.array([[]]).reshape((0, r[0])) bbox = np.array([[-10, 10]] * r[0]) # !!! A corriger alpha = 1 - proba q = norm.ppf(1 - alpha / 2) #========================================= # Store the constraints for each datatype #========================================= for j in range(p): if is_constrained[j]: bounds_j = authorized_ranges[:,:,j] # The index of the variable among the variables of the same type idx_among_type = (var_distrib[:j] == var_distrib[j]).sum() if var_distrib[j] == 'continuous': # Lower bound lb_j = bounds_j[0] / y_std[0, idx_among_type] - lambda_cont[idx_among_type, 0] + q A.append(- lambda_cont[idx_among_type,1:]) b.append(- lb_j) # Upper bound ub_j = bounds_j[1] / y_std[0, idx_among_type] - lambda_cont[idx_among_type, 0] - q A.append(lambda_cont[idx_among_type,1:]) b.append(ub_j) elif var_distrib[j] == 'binomial': idx_among_type = ((var_distrib[:j] == 'bernoulli') | (var_distrib[:j] == 'binomial')).sum() # Lower bound lb_j = bounds_j[0] lb_j = logit(lb_j / nj_bin[idx_among_type]) - lambda_bin[idx_among_type,0] A.append(- lambda_bin[idx_among_type,1:]) b.append(- lb_j) # Upper bound ub_j = bounds_j[1] ub_j = logit(ub_j / nj_bin[idx_among_type]) - lambda_bin[idx_among_type,0] A.append(lambda_bin[idx_among_type, 1:]) b.append(ub_j) elif var_distrib[j] == 'bernoulli': idx_among_type = ((var_distrib[:j] == 'bernoulli') | (var_distrib[:j] == 'binomial')).sum() assert bounds_j[0] == bounds_j[1] # !!! To improve # Lower bound lb_j = proba if bounds_j[0] == 1 else 0 + epsilon lb_j = logit(lb_j / nj_bin[idx_among_type]) - lambda_bin[idx_among_type,0] A.append(- lambda_bin[idx_among_type,1:]) b.append(- lb_j) # Upper bound ub_j = 1 - epsilon if bounds_j[0] == 1 else 1 - proba ub_j = logit(ub_j / nj_bin[idx_among_type]) - lambda_bin[idx_among_type,0] A.append(lambda_bin[idx_among_type, 1:]) b.append(ub_j) elif var_distrib[j] == 'categorical': continue assert bounds_j[0] == bounds_j[1] # !!! To improve modality_idx = int(bounds_j[0][0]) # Define the probability to draw the modality of interest to proba pi = np.full(nj_categ[idx_among_type],\ (1 - proba) / (nj_categ[idx_among_type] - 1)) # For the inversion of the softmax a constant C = 0 is taken: pi[modality_idx] = proba lb_j = np.log(pi) - lambda_categ[idx_among_type][:, 0] # -1 Mask mask = np.ones((nj_categ[idx_among_type], 1)) mask[modality_idx] = -1 A.append(lambda_categ[idx_among_type][:, 1:] * mask) b.append(lb_j * mask[:,0]) elif var_distrib[j] == 'ordinal': assert bounds_j[0] == bounds_j[1] # !!! To improve modality_idx = int(bounds_j[0][0]) RuntimeError('Not implemented for the moment') #========================================= # Try if the solution is feasible #========================================= try: points, interior_point, hs = solve_convex_set(np.reshape(A, (-1, r[0]),\ order = 'C'), np.hstack(b), bbox) # If yes store the new constraints best_A = deepcopy(A) best_b = deepcopy(b) proba = np.min([1.05 * proba, 0.8]) if proba >= 0.8: is_solution = False except QhullError: is_solution = False best_A = np.reshape(best_A, (-1, r[0]), order = 'C') best_b = np.hstack(best_b) points, interior_point, hs = solve_convex_set(best_A, best_b, bbox) polygon = Polygon(points) ''' #======================================================= # Data augmentation part #======================================================= # Create pseudo-observations iteratively: nb_pseudo_obs = 0 y_new_all = [] zz = [] total_nb_obs_generated = 0 while nb_pseudo_obs <= target_nb_pseudo_obs: #=================================================== # Generate a batch of latent variables (try) #=================================================== ''' # Simulate points in the Polynom pts = generate_random(nb_points, polygon) pts = np.array([np.array([p.x, p.y]) for p in pts]) # Compute their density and resample them pts_density = fz(pts, mu, sigma, w) pts_density = pts_density / pts_density.sum(keepdims = True) # Normalized the pdfs idx = np.random.choice(np.arange(nb_points), size = target_nb_pseudo_obs,\ p = pts_density, replace=True) z = pts[idx] ''' #=================================================== # Generate a batch of latent variables #=================================================== # Draw some z^{(1)} | Theta using z^{(1)} | s, Theta z = np.zeros((nb_points, r[0])) z0_s = multivariate_normal(size = (nb_points, 1), \ mean = mu.flatten(order = 'C'), cov = block_diag(*sigma)) z0_s = z0_s.reshape(nb_points, k[0], r[0], order='C') comp_chosen = np.random.choice(k[0], nb_points, p=w / w.sum()) for m in range(nb_points): # Dirty loop for the moment z[m] = z0_s[m, comp_chosen[m]] #=================================================== # Draw pseudo-observations #=================================================== y_bin_new = [] y_categ_new = [] y_ord_new = [] y_cont_new = [] y_bin_new.append(draw_new_bin(lambda_bin, z, nj_bin)) y_categ_new.append(draw_new_categ(lambda_categ, z, nj_categ)) y_ord_new.append(draw_new_ord(lambda_ord, z, nj_ord)) y_cont_new.append(draw_new_cont(lambda_cont, z)) # Stack the quantities y_bin_new = np.vstack(y_bin_new) y_categ_new = np.vstack(y_categ_new) y_ord_new = np.vstack(y_ord_new) y_cont_new = np.vstack(y_cont_new) # "Destandardize" the continous data y_cont_new = y_cont_new * y_std # Put them in the right order and append them to y type_counter = {'count': 0, 'ordinal': 0,\ 'categorical': 0, 'continuous': 0} y_new = np.full((nb_points, y.shape[1]), np.nan) # Quite dirty: for j, var in enumerate(var_distrib): if (var == 'bernoulli') or (var == 'binomial'): y_new[:, j] = y_bin_new[:, type_counter['count']] type_counter['count'] = type_counter['count'] + 1 elif var == 'ordinal': y_new[:, j] = y_ord_new[:, type_counter[var]] type_counter[var] = type_counter[var] + 1 elif var == 'categorical': y_new[:, j] = y_categ_new[:, type_counter[var]] type_counter[var] = type_counter[var] + 1 elif var == 'continuous': y_new[:, j] = y_cont_new[:, type_counter[var]] type_counter[var] = type_counter[var] + 1 else: raise ValueError(var, 'Type not implemented') #=================================================== # Acceptation rule #=================================================== # Check that each variable is in the good range y_new_exp = np.expand_dims(y_new, 1) total_nb_obs_generated += len(y_new) mask = np.logical_and(y_new_exp >= authorized_ranges[0][np.newaxis],\ y_new_exp <= authorized_ranges[1][np.newaxis]) # Keep an observation if it lies at least into one of the ranges possibility mask = np.any(mask.mean(2) == 1, axis=1) y_new = y_new[mask] y_new_all.append(y_new) nb_pseudo_obs = len(np.concatenate(y_new_all)) zz.append(z[mask]) #print(nb_pseudo_obs) # Keep target_nb_pseudo_obs pseudo-observations y_new_all = np.concatenate(y_new_all) y_new_all = y_new_all[:target_nb_pseudo_obs] #y_all = np.vstack([y, y_new_all]) share_kept_pseudo_obs = len(y_new_all) / total_nb_obs_generated out['zz'] = zz out['y_all'] = y_new_all out['share_kept_pseudo_obs'] = share_kept_pseudo_obs return (out) '''
def fit(self, x=None, c=None, n=None, t=None, how='MLE', offset=False, zi=False, lfp=False, tl=None, tr=None, xl=None, xr=None, fixed=None, heuristic='Turnbull', init=[], rr='y', on_d_is_0=False, turnbull_estimator='Fleming-Harrington'): r""" The central feature to SurPyval's capability. This function aimed to have an API to mimic the simplicity of the scipy API. That is, to use a simple :code:`fit()` call, with as many or as few parameters as is needed. Parameters ---------- x : array like, optional Array of observations of the random variables. If x is :code:`None`, xl and xr must be provided. c : array like, optional Array of censoring flag. -1 is left censored, 0 is observed, 1 is right censored, and 2 is intervally censored. If not provided will assume all values are observed. n : array like, optional Array of counts for each x. If data is proivded as counts, then this can be provided. If :code:`None` will assume each observation is 1. t : 2D-array like, optional 2D array like of the left and right values at which the respective observation was truncated. If not provided it assumes that no truncation occurs. how : {'MLE', 'MPP', 'MOM', 'MSE', 'MPS'}, optional Method to estimate parameters, these are: - MLE : Maximum Likelihood Estimation - MPP : Method of Probability Plotting - MOM : Method of Moments - MSE : Mean Square Error - MPS : Maximum Product Spacing offset : boolean, optional If :code:`True` finds the shifted distribution. If not provided assumes not a shifted distribution. Only works with distributions that are supported on the half-real line. tl : array like or scalar, optional Values of left truncation for observations. If it is a scalar value assumes each observation is left truncated at the value. If an array, it is the respective 'late entry' of the observation tr : array like or scalar, optional Values of right truncation for observations. If it is a scalar value assumes each observation is right truncated at the value. If an array, it is the respective right truncation value for each observation xl : array like, optional Array like of the left array for 2-dimensional input of x. This is useful for data that is all intervally censored. Must be used with the :code:`xr` input. xr : array like, optional Array like of the right array for 2-dimensional input of x. This is useful for data that is all intervally censored. Must be used with the :code:`xl` input. fixed : dict, optional Dictionary of parameters and their values to fix. Fixes parameter by name. heuristic : {'"Blom", "Median", "ECDF", "Modal", "Midpoint", "Mean", "Weibull", "Benard", "Beard", "Hazen", "Gringorten", "None", "Tukey", "DPW", "Fleming-Harrington", "Kaplan-Meier", "Nelson-Aalen", "Filliben", "Larsen", "Turnbull"} Plotting method to use, if using the probability plotting, MPP, method. init : array like, optional initial guess of parameters. Useful if method is failing. rr : ('y', 'x') The dimension on which to minimise the spacing between the line and the observation. If 'y' the mean square error between the line and vertical distance to each point is minimised. If 'x' the mean square error between the line and horizontal distance to each point is minimised. on_d_is_0 : boolean, optional For the case when using MPP and the highest value is right censored, you can choosed to include this value into the regression analysis or not. That is, if :code:`False`, all values where there are 0 deaths are excluded from the regression. If :code:`True` all values regardless of whether there is a death or not are included in the regression. turnbull_estimator : ('Nelson-Aalen', 'Kaplan-Meier', or 'Fleming-Harrington'), str, optional If using the Turnbull heuristic, you can elect to use either the KM, NA, or FH estimator with the Turnbull estimates of r, and d. Defaults to FH. Returns ------- model : Parametric A parametric model with the fitted parameters and methods for all functions of the distribution using the fitted parameters. Examples -------- >>> from surpyval import Weibull >>> import numpy as np >>> x = Weibull.random(100, 10, 4) >>> model = Weibull.fit(x) >>> print(model) Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MLE Parameters : alpha: 10.551521182640098 beta: 3.792549834495306 >>> Weibull.fit(x, how='MPS', fixed={'alpha' : 10}) Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MPS Parameters : alpha: 10.0 beta: 3.4314657446866836 >>> Weibull.fit(xl=x-1, xr=x+1, how='MPP') Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MPP Parameters : alpha: 9.943092756713078 beta: 8.613016934518258 >>> c = np.zeros_like(x) >>> c[x > 13] = 1 >>> x[x > 13] = 13 >>> c = c[x > 6] >>> x = x[x > 6] >>> Weibull.fit(x=x, c=c, tl=6) Parametric SurPyval Model ========================= Distribution : Weibull Fitted by : MLE Parameters : alpha: 10.363725328793413 beta: 4.9886821457305865 """ if offset and self.name in [ 'Normal', 'Beta', 'Uniform', 'Gumbel', 'Logistic' ]: raise ValueError( '{dist} distribution cannot be offset'.format(dist=self.name)) if how not in PARA_METHODS: raise ValueError('"how" must be one of: ' + str(PARA_METHODS)) if how == 'MPP' and self.name == 'ExpoWeibull': raise ValueError( 'ExpoWeibull distribution does not work with probability plot fitting' ) if t is not None and how == 'MPS': raise ValueError( 'Maximum product spacing doesn\'t yet support tuncation') if t is not None and how == 'MSE': raise NotImplementedError( 'Mean square error doesn\'t yet support tuncation') if t is not None and how == 'MOM': raise ValueError( 'Maximum product spacing doesn\'t support tuncation') if (lfp or zi) & (how != 'MLE'): raise ValueError( 'Limited failure or zero-inflated models can only be made with MLE' ) if (zi & (self.support[0] != 0)): raise ValueError( "zero-inflated models can only work with models starting at 0") x, c, n, t = surpyval.xcnt_handler(x=x, c=c, n=n, t=t, tl=tl, tr=tr, xl=xl, xr=xr) if surpyval.utils.check_no_censoring(c) and (how == 'MOM'): raise ValueError('Method of moments doesn\'t support censoring') if (surpyval.utils.no_left_or_int(c)) and (how == 'MPP') and ( not heuristic == 'Turnbull'): raise ValueError( 'Probability plotting estimation with left or interval censoring only works with Turnbull heuristic' ) if (heuristic == 'Turnbull') & (not ((-1 in c) or (2 in c))) & ( (~np.isfinite(t[:, 1])).any()): # The Turnbull method is extremely memory intensive. # So if no left or interval censoring and no right-truncation # then this is equivalent. heuristic = turnbull_estimator if (not offset) & (not zi): if x.ndim == 2: if ((x[:, 0] <= self.support[0]) & (c == 0)).any(): raise ValueError( "Observed values must be in support of distribution; are some of your observed values 0, -Inf, or Inf?" ) else: if ((x <= self.support[0]) & (c == 0)).any(): raise ValueError( "Observed values must be in support of distribution; are some of your observed values 0, -Inf, or Inf?" ) # Passed checks data = {'x': x, 'c': c, 'n': n, 't': t} model = para.Parametric(self, how, data, offset, lfp, zi) fitting_info = {} if how != 'MPP': transform, inv_trans, funcs, inv_f = bounds_convert( x, model.bounds) const, fixed_idx, not_fixed = fix_idx_and_function( fixed, model.param_map, funcs) fitting_info['transform'] = transform fitting_info['inv_trans'] = inv_trans fitting_info['funcs'] = funcs fitting_info['inv_f'] = inv_f fitting_info['const'] = const fitting_info['fixed_idx'] = fixed_idx fitting_info['not_fixed'] = not_fixed # Need a better general fitter to include offset if init == []: if self.name in ['Gumbel', 'Beta', 'Normal', 'Uniform']: init = np.array(self._parameter_initialiser(x, c, n)) else: if x.ndim == 2: init_mask = np.logical_or(x[:, 0] <= self.support[0], x[:, 0] >= self.support[1]) init_mask = ~np.logical_and(init_mask, c == 0) xl = x[init_mask, 0] xr = x[init_mask, 1] x_init = np.vstack([xl, xr]).T else: init_mask = np.logical_or(x <= self.support[0], x >= self.support[1]) init_mask = ~np.logical_and(init_mask, c == 0) x_init = x[init_mask] c_init = c[init_mask] n_init = n[init_mask] init = np.array( self._parameter_initialiser(x_init, c_init, n_init, offset=offset)) if offset: init[0] = x.min() - 1. if lfp: _, _, _, F = nonp.plotting_positions( x, c, n, heuristic='Nelson-Aalen') max_F = np.max(F) if max_F > 0.5: init = np.concatenate([init, [0.99]]) else: init = np.concatenate( [init_from_bounds(self), [max_F]]) if zi: init = np.concatenate( [init, [(n[x == 0]).sum() / n.sum()]]) init = transform(init) init = init[not_fixed] fitting_info['init'] = init else: # Probability plotting method does not need an initial estimate fitting_info['rr'] = rr fitting_info['heuristic'] = heuristic fitting_info['on_d_is_0'] = on_d_is_0 fitting_info['turnbull_estimator'] = turnbull_estimator fitting_info['init'] = None model.fitting_info = fitting_info results = METHOD_FUNC_DICT[how](model) for k, v in results.items(): setattr(model, k, v) if hasattr(model, 'params'): for k, v in zip(self.param_names, model.params): setattr(model, k, v) return model
def sens(guesses, targets): tp = np.sum(np.logical_and(guesses==1, targets==1)) fn = np.sum(np.logical_and(guesses==0, targets==1)) return np.true_divide(tp, tp + fn)
num_iters=200, callback=callback) # print(variational_params) # # Sample functions from the final posterior. rs = npr.RandomState(0) mean, log_std = unpack_params(variational_params) # rs = npr.RandomState(0) sample_weights = rs.randn(1000, num_weights) * np.exp(log_std) + mean np.savetxt("VIwts.csv", sample_weights, delimiter=',', fmt='%f') plot_inputs = np.linspace(-2, 2, num=400) outputs_final = predictions(sample_weights, np.expand_dims(plot_inputs, 1)) lowerbd = numpy.quantile(outputs_final, 0.05, axis=0) upperbd = numpy.quantile(outputs_final, 0.95, axis=0) inconint = np.logical_and(lowerbd < tot_targets, upperbd > tot_targets).ravel() con_ind = np.zeros(len(lowerbd)) con_ind[inconint] = 1 con_ind = con_ind.reshape(len(con_ind), 1) coverage_df = np.concatenate([coverage_df, con_ind], axis=1) # # Plot data and functions. fig = plt.figure(figsize=(12, 8), facecolor='white') ax = fig.add_subplot(111, frameon=False) ax.plot(tot_inputs.ravel(), tot_targets.ravel(), "bx", label="testing data") ax.plot(inputs.ravel(), targets.ravel(), 'rx', label="training data") ax.plot(tot_inputs.ravel(), lowerbd.ravel(), "k-", label="error bar") ax.plot(tot_inputs.ravel(), upperbd.ravel(), "k-") ax.legend()
# %% Time series plots for medium depth electrode plt.figure(figsize=(12, 4)) plt.subplot(221) plt.plot(t, lfp_baseline[11, :, ::100]) plt.plot(t, np.mean(lfp_baseline[11, :, :], 1), 'k', linewidth=3) plt.title('LFP') plt.xlabel('Time (ms)') plt.subplot(222) plt.plot(t, gpcsd_model.csd_pred[11, :, ::100]) plt.plot(t, np.mean(gpcsd_model.csd_pred[11, :, :], 1), 'k', linewidth=3) plt.title('GPCSD') plt.xlabel('Time (ms)') # %% Prediction during trial - both CSD and LFP at the two timescales t_ind = np.logical_and(time >= 0, time < 500) lfp_trial = lfp[:, t_ind, :] #lfp_trial = lfp_trial - np.mean(lfp_trial, 2, keepdims=True) gpcsd_model.update_lfp(lfp_trial, time[t_ind][:, None]) gpcsd_model.predict(x, time[t_ind][:, None], type="both") tcsd = predictcsd_trad_1d(lfp_trial) # %% trial_plot = 0 vmlfp = 0.9 * np.max(np.abs(lfp_trial[:, :, trial_plot])) vmgpcsd = 0.9 * np.max(np.abs(gpcsd_model.csd_pred[:, :, trial_plot])) vmtcsd = 0.9 * np.max(np.abs(tcsd[:, :, trial_plot])) plt.figure(figsize=(12, 5)) plt.subplot(231) plt.imshow(gpcsd_model.csd_pred_list[0][1:-1, :, trial_plot],
bdd['kappa'] = bdk bdd['mu'] = bdm bdd['ht'] = bdht bdd['b'] = bdb helper_vars['bounds_dict'] = bdd fn_obj = dfre_nd.models_nd.model_from_rate_function(contrast_parametric_ori, sizedict, helper_vars) # - #tr = np.zeros((dtrialwise.shape[0],8)) imax = 50 tr = np.zeros((imax, ns, nc + 4)) for isize in range(ns): runtrial = np.logical_and( np.logical_and(helper_vars['runtrial'], stimulus_id[0] == isize), stimulus_id[3] == 0) for ind in range(imax): print(ind) np.seterr(all='print') data_obj = dfre_nd.data_obj(stimulus_id[:, runtrial], F[ind].T[:, runtrial], F[ind].T[:, runtrial], F[ind].flatten(), nbefore, nafter) fit_obj = dfre_nd.fit_obj(data_obj, pct_spike=97.5) np.seterr(all='warn') prestim = data_obj.F[:, :nbefore].mean() during = data_obj.F[:, nbefore:-nafter].mean() if prestim < during: guessA = 1 guessb = eps
def _setup_graphs( self, ts, phi_kernel=None, mu_kernel=None, n_phi_bases=1, n_mu_bases=0, fit_tau=False, fit_omega=False, fit_kappa=True, t_start=0.0, t_end=None, param_vector=None, tol=None, weight_count=False, **kwargs): self._t_start = t_start self._t_end = t_end or ts[-1] if tol is None: tol = 1e-5 * ts.size / (self._t_end - self._t_start) self.tol = tol self._ts = ts self._n_ts = ts[ np.logical_and( ts >= self._t_start, ts < self._t_end ) ].size # Full data likelihood must evaluate at the t_end also if ts[-1] < self._t_end: _eval_ts = np.append( ts[ts > self._t_start], [self._t_end] ) else: _eval_ts = ts[ np.logical_and( ts >= self._t_start, ts < self._t_end ) ] self._eval_ts = _eval_ts if phi_kernel is None: phi_kernel = influence.MaxwellKernel( n_bases=n_phi_bases ) else: phi_kernel = influence.as_influence_kernel( phi_kernel, n_bases=n_phi_bases ) self.phi_kernel = phi_kernel self.n_phi_bases = phi_kernel.n_bases mu_kernel = background.as_background_kernel( mu_kernel, n_bases=n_mu_bases, t_start=self._t_start, t_end=self._t_end, ) self.mu_kernel = mu_kernel self.n_mu_bases = mu_kernel.n_bases self._fit_tau = fit_tau if self.n_mu_bases > 0 else False self._fit_omega = fit_omega if self.n_mu_bases > 0 else False self._fit_kappa = fit_kappa self._n_kappa_pack = self.n_phi_bases * self._fit_kappa self._n_omega_pack = self.n_mu_bases * self._fit_omega self._n_tau_pack = self.n_phi_bases * self._fit_tau self._weight_count = weight_count self._mu_bounds = mu_kernel.mu_bounds() if self._fit_kappa: self._kappa_bounds = phi_kernel.kappa_bounds() else: self._kappa_bounds = [] if self._fit_omega: self._omega_bounds = mu_kernel.kappa_bounds() else: self._omega_bounds = [] if self._fit_tau: self._tau_bounds = phi_kernel.tau_bounds() else: self._tau_bounds = [] self._all_bounds = ( self._mu_bounds + self._kappa_bounds + self._omega_bounds + self._tau_bounds ) if weight_count is None: weight_count = self._weight_count self._omega_weight = np.ones(self.n_mu_bases) # NB this will break if we ever fit the background rate tau if weight_count: self._omega_weight += np.array(self.mu_kernel.count( self._eval_ts, **kwargs ), dtype='float')