def colorize(frames, cmap): import matplotlib.pyplot as plt cmap = plt.get_cmap(cmap) if isinstance(cmap, str) else cmap frames = frames.copy() frames -= np.nanmin(frames) frames /= np.nanmax(frames) return np.clip(cmap(frames)[..., :3] * 255, 0, 255)
def colorize(frames, cmap): import matplotlib.pyplot as plt cmap = plt.get_cmap(cmap) if isinstance(cmap, str) else cmap frames = frames.copy() frames -= np.nanmin(frames) frames /= np.nanmax(frames) return np.clip(cmap(frames)[...,:3]*255, 0, 255)
def initialize(self, delta0=0): # initialize theta with reasonable starting values r0 = np.nanmin(self.data) rd = 100 rs = 100 sd = 10 ss = 60 m = 1 n = 1 #2 delta = delta0 theta0 = np.array( (r0, 0, rd, rs, 0, rd, rs, sd, ss, sd, ss, m, n, n, delta)) return theta0
def gen_traces(datafiles, blcutoff=blcutoff, blspan=blspan): #nbefore=nbefore,nafter=nafter trialwise = np.array(()) ctrialwise = np.array(()) strialwise = np.array(()) dfofall = np.array(()) baselineall = np.array(()) for datafile in datafiles: frm = sio.loadmat(datafile.replace('.rois', '.mat'), squeeze_me=True)['info']['frame'][()][1:] with h5py.File(datafile, mode='r') as f: to_add = f['corrected'][:].T to_add[np.isnan(to_add)] = np.nanmin(to_add) # baseline = np.percentile(to_add,blcutoff,axis=1) baseline = sfi.percentile_filter(to_add[:, ::ds], blcutoff, (1, int(blspan / ds))) baseline = np.repeat(baseline, ds, axis=1) for i in range(baseline.shape[0]): baseline[i] = sfi.gaussian_filter1d(baseline[i], blspan / 2) # if baseline.shape[1]<to_add.shape[1]: # baseline = np.hstack((baseline,np.repeat(baseline[:,-1],to_add.shape[1]-baseline.shape[1]))) if baseline.shape[1] > to_add.shape[1]: baseline = baseline[:, :to_add.shape[1]] c = np.zeros_like(to_add) s = np.zeros_like(to_add) dfof = np.zeros_like(to_add) for i in range(c.shape[0]): # dfof = (to_add[i]-baseline[i,np.newaxis])/baseline[i,np.newaxis] dfof[i] = (to_add[i] - baseline[i, :]) / (baseline[i, :]) #try: c[i], s[i], _, _, _ = deconvolve(dfof[i].astype(np.float64), penalty=1, sn=5e-3) #except: # print("in "+datafile+" couldn't do "+str(i)) try: trialwise = np.concatenate((trialwise, to_add), axis=0) ctrialwise = np.concatenate((ctrialwise, c), axis=0) strialwise = np.concatenate((strialwise, s), axis=0) dfofall = np.concatenate((dfofall, dfof), axis=0) baselineall = np.concatenate((baselineall, baseline), axis=0) except: trialwise = to_add.copy() ctrialwise = c.copy() strialwise = s.copy() dfofall = dfof.copy() baselineall = baseline.copy() return trialwise, ctrialwise, strialwise, dfofall, baselineall
def lc_plot(ax, t_orig, y_orig, mu, std, xlim=None): color = "#ff7f0e" ax.fill_between(t_orig, mu + std * 3, mu - std * 3, alpha=0.7, color=color, zorder=0) ax.plot(t_orig, y_orig, '.', zorder=1) if xlim is None: xlim = [t_orig[0], t_orig[-1]] ax.set_xlim(xlim) use, = np.where((t_orig > xlim[0]) & (t_orig < xlim[1])) ylow = 1.1 * np.nanmin(y_orig[use]) yhigh = 1.1 * np.nanmax(y_orig[use]) ax.set_ylim(ylow, yhigh) ax.set_xlabel('Time (days)', fontsize=14) ax.set_ylabel('Relative Brighness', fontsize=14)
print(log_iw.shape) psis_lw, K_hat_stan = psislw(log_iw.T) K_hat_stan_advi_list[j, n] = K_hat_stan print(psis_lw.shape) print('K hat statistic for Stan ADVI:') print(K_hat_stan) ###################### Plotting L2 norm here ################################# plt.figure() plt.plot(stan_vb_w[:, 0], stan_vb_w[:, 1], 'mo', label='STAN-ADVI') plt.savefig('vb_w_samples_mf.pdf') np.save('K_hat_linear_' + datatype + '_' + algo_name + '_' + str(N) + 'N', K_hat_stan_advi_list) plt.figure() plt.plot(K_list, np.nanmean(K_hat_stan_advi_list, axis=1), 'r-', alpha=1) plt.plot(K_list, np.nanmin(K_hat_stan_advi_list, axis=1), 'r-', alpha=0.5) plt.plot(K_list, np.nanmax(K_hat_stan_advi_list, axis=1), 'r-', alpha=0.5) plt.xlabel('Dimensions') plt.ylabel('K-hat') np.save( 'K_hat_linear_' + datatype + '_' + algo_name + '_' + str(N) + 'N' + '_samples_' + str(gradsamples), K_hat_stan_advi_list) #plt.ylim((0,5)) plt.legend() plt.savefig('Linear_Regression_K_hat_vs_D_' + datatype + '_' + algo_name + '_' + str(N) + 'N.pdf')
def MI2AMI(y, n_clusters, r, k, init, var_distrib, nj,\ nan_mask, target_nb_pseudo_obs = 500, it = 50, \ eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\ dm = [], max_patience = 1): # dm: Hack to remove ''' Complete the missing values using a trained M1DGMM y (numobs x p ndarray): The observations containing mixed variables n_clusters (int): The number of clusters to look for in the data r (list): The dimension of latent variables through the first 2 layers k (list): The number of components of the latent Gaussian mixture layers init (dict): The initialisation parameters for the algorithm var_distrib (p 1darray): An array containing the types of the variables in y nj (p 1darray): For binary/count data: The maximum values that the variable can take. For ordinal data: the number of different existing categories for each variable nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise target_nb_pseudo_obs (int): The number of pseudo-observations to generate it (int): The maximum number of MCEM iterations of the algorithm eps (float): If the likelihood increase by less than eps then the algorithm stops maxstep (int): The maximum number of optimisation step for each variable seed (int): The random state seed to set (Only for numpy generated data for the moment) perform_selec (Bool): Whether to perform architecture selection or not dm (np array): The distance matrix of the observations. If not given M1DGMM computes it n_neighbors (int): The number of neighbors to use for NA imputation ------------------------------------------------------------------------------------------------ returns (dict): The predicted classes, the likelihood through the EM steps and a continuous representation of the data ''' # !!! Hack cols = y.columns # Formatting if not isinstance(nan_mask, np.ndarray): nan_mask = np.asarray(nan_mask) if not isinstance(y, np.ndarray): y = np.asarray(y) assert len(k) < 2 # Not implemented for deeper MDGMM for the moment # Keep complete observations complete_y = y[~np.isnan(y.astype(float)).any(1)] completed_y = deepcopy(y) out = M1DGMM(complete_y, 'auto', r, k, init, var_distrib, nj, it,\ eps, maxstep, seed, perform_selec = perform_selec,\ dm = dm, max_patience = max_patience, use_silhouette = True) # Compute the associations vc = vars_contributions(pd.DataFrame(complete_y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \ title = 'Contribution of the variables to the latent dimensions',\ storage_path = None) # Upacking the model from the M1DGMM output #p = y.shape[1] k = out['best_k'] r = out['best_r'] mu = out['mu'][0] lambda_bin = np.array(out['lambda_bin']) lambda_ord = out['lambda_ord'] lambda_categ = out['lambda_categ'] lambda_cont = np.array(out['lambda_cont']) nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli', 'binomial'])].astype(int) nj_ord = nj[var_distrib == 'ordinal'].astype(int) nj_categ = nj[var_distrib == 'categorical'].astype(int) nb_cont = np.sum(var_distrib == 'continuous') nb_bin = np.sum(var_distrib == 'binomial') y_std = complete_y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\ keepdims = True) cat_features = var_distrib != 'categorical' # Compute the associations between variables and use them as weights for the optimisation assoc = cosine_similarity(vc, dense_output=True) np.fill_diagonal(assoc, 0.0) assoc = np.abs(assoc) weights = (assoc / assoc.sum(1, keepdims=True)) #============================================== # Optimisation sandbox #============================================== # Define the observation generated by the center of each cluster cluster_obs = [impute(mu[kk,:,0], var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\ lambda_ord, nj_ord, lambda_cont, y_std) for kk in range(k[0])] # Use only of the observed variables as references types = {'bin': ['bernoulli', 'binomial'], 'categ': ['categorical'],\ 'cont': ['continuous'], 'ord': 'ordinal'} # Gradient optimisation nan_indices = np.where(nan_mask.any(1))[0] imputed_y = np.zeros_like(y) numobs = y.shape[0] #************************************ # Linear constraint to stay in the support of continuous variables #************************************ lb = np.array([]) ub = np.array([]) A = np.array([[]]).reshape((0, r[0])) if nb_bin > 0: ## Corrected Binomial bounds (ub is actually +inf) bin_indices = var_distrib[np.logical_or(var_distrib == 'bernoulli', var_distrib == 'binomial')] binomial_indices = bin_indices == 'binomial' lb_bin = np.nanmin(y[:, var_distrib == 'binomial'], 0) lb_bin = logit( lb_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices, 0] ub_bin = np.nanmax(y[:, var_distrib == 'binomial'], 0) ub_bin = logit( ub_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices, 0] A_bin = lambda_bin[binomial_indices, 1:] ## Concatenate the constraints lb = np.concatenate([lb, lb_bin]) ub = np.concatenate([ub, ub_bin]) A = np.concatenate([A, A_bin], axis=0) if nb_cont > 0: ## Corrected Gaussian bounds lb_cont = np.nanmin(y[:, var_distrib == 'continuous'], 0) / y_std[0] - lambda_cont[:, 0] ub_cont = np.nanmax(y[:, var_distrib == 'continuous'], 0) / y_std[0] - lambda_cont[:, 0] A_cont = lambda_cont[:, 1:] ## Concatenate the constraints lb = np.concatenate([lb, lb_cont]) ub = np.concatenate([ub, ub_cont]) A = np.concatenate([A, A_cont], axis=0) lc = LinearConstraint(A, lb, ub, keep_feasible=True) zz = [] fun = [] for i in range(numobs): if i in nan_indices: # Design the nan masks for the optimisation process nan_mask_i = nan_mask[i] weights_i = weights[nan_mask_i].mean(0) # Look for the best starting point cluster_dist = [error(y[i, ~nan_mask_i], obs[~nan_mask_i],\ cat_features[~nan_mask_i], weights_i)\ for obs in cluster_obs] z02 = mu[np.argmin(cluster_dist), :, 0] # Formatting vars_i = {type_alias: np.where(~nan_mask_i[np.isin(var_distrib, vartype)])[0] \ for type_alias, vartype in types.items()} complete_categ = [ l for idx, l in enumerate(lambda_categ) if idx in vars_i['categ'] ] complete_ord = [ l for idx, l in enumerate(lambda_ord) if idx in vars_i['ord'] ] opt = minimize(stat_all, z02, \ args = (y[i, ~nan_mask_i], var_distrib[~nan_mask_i],\ weights_i[~nan_mask_i],\ lambda_bin[vars_i['bin']], nj_bin[vars_i['bin']],\ complete_categ,\ nj_categ[vars_i['categ']],\ complete_ord,\ nj_ord[vars_i['ord']],\ lambda_cont[vars_i['cont']], y_std[:, vars_i['cont']]), tol = eps, method='trust-constr', jac = grad_stat,\ constraints = lc, options = {'maxiter': 1000}) z = opt.x zz.append(z) fun.append(opt.fun) imputed_y[i] = impute(z, var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\ lambda_ord, nj_ord, lambda_cont, y_std) else: imputed_y[i] = y[i] completed_y = np.where(nan_mask, imputed_y, y) out['completed_y'] = completed_y out['zz'] = zz out['fun'] = fun return (out)