def setup_sampler(model, Y, monotone=False): # Pick which variables to sample and which to fix at the truth model.sample_W = True model.sample_V = True model.sample_Tau2 = True model.sample_sigma2 = True model.sample_lam2 = True # Use nonnegative matrix factorization to initialize if model.sample_W and model.sample_V: nmf_W, nmf_V = tensor_nmf(Y, model.nembeds, monotone=monotone) model.W[:] = nmf_W model.V[:] = nmf_V # model.Mu_ep, model.Sigma_ep = ep_from_mf(Y, model.W, model.V, mode='multiplier', multiplier=3) if model.sample_lam2: model._init_lam2() if model.sample_Tau2: model._init_Tau2() if model.sample_sigma2: model._init_sigma2()
# Get the true mean values Mu = np.einsum('nk,mtk->nmt', W_true, V_true) # Generate the data Y = np.random.poisson(Mu[...,None], size=(nrows, ncols, ndepth, nreplicates)).astype(float) # Hold out some curves Y_missing = Y.copy() Y_missing[:3,:3] = np.nan # for nembeds in nembeds_options: print('Seed {} d={}'.format(seed, nembeds)) models = [] ############### Setup the NMF baseline ############### W_nmf, V_nmf = tensor_nmf(Y_missing, nembeds) Mu_nmf = (W_nmf[:,None,None] * V_nmf[None]).sum(axis=-1) models.append({'name': 'NMF', 'fit': Mu_nmf, 'samples': Mu_nmf[None], 'file': 'nmf.npy'}) ########################################################################### ############### Setup the PGDS baseline ############### print('Fitting PGDS') # try: for tau in [0.25, 0.5, 1]: # If you have the Poisson-gamma dynamical system of Schein et al installed, # add that baseline comparison # sys.path.append('../apf/src/') from functionalmf.pgds import fit_pgds # Fit the PGDS model print('\tk={} tau={}'.format(nembeds, tau)) import warnings
np.isnan(Y_candidate), axis=(1, 2, 3))) | np.any( np.all(np.isnan(Y_candidate), axis=(0, 2, 3))) # Remove the held out data points but keep track of them for evaluation at the end held_out = selected.T Y = Y_candidate print(held_out) # Create the Y in shared memory for parallel processing Y_shared = sa.create(args.sharedprefix + 'Y_obs', Y.shape) Y_shared[:] = Y Y = Y_shared # Get the raw NMF as a baseline print('Fitting NMF') W_nmf, V_nmf = tensor_nmf(Y, args.nembeds, max_entry=0.999, verbose=False) Mu_nmf = (W_nmf[:, None, None] * V_nmf[None]).sum(axis=-1) np.save(os.path.join(args.outdir, 'nmf_w'), W_nmf) np.save(os.path.join(args.outdir, 'nmf_v'), V_nmf) # Get the monotone projected NMF as a baseline print('Fitting Monotone NMF') W_nmf_proj, V_nmf_proj = tensor_nmf(Y, args.nembeds, monotone=True, max_entry=0.999) Mu_nmf_proj = (W_nmf_proj[:, None, None] * V_nmf_proj[None]).sum(axis=-1) print('Initializing model') model, Us, callback = init_model(Y, likelihood, args) Mu_init = (model.W[:, None, None] * model.V[None]).sum(axis=-1)
def init_model(Y, likelihood, args): # Linear constraints requiring monotonicity and [0,1] means. # Note that we use a softened monotonicity constraint allowing a small # fudge factor for numerical stability. C_zero = np.concatenate([np.eye(ndepth), np.zeros((ndepth, 1))], axis=1) C_mono = np.array([ np.concatenate( [np.zeros(i), [1, -1], np.zeros(ndepth - i - 2), [-1e-2]]) for i in range(ndepth - 1) ]) C_one = np.concatenate( [np.eye(ndepth) * -1, np.full((ndepth, 1), -1)], axis=1) C = np.concatenate([C_zero, C_one, C_mono], axis=0) # If the user provided an optional set of binary row features if args.features is not None: import pandas as pd print('Loading features') df = pd.read_csv(args.features, index_col=0, header=0) # Filter the features into those with and without dose-response data cells = np.load(os.path.join(args.outdir, 'cells.npy')) # Print some info on the breakdown of features and dose-response data have_both = [c for c in cells if c in df.index] doseresponse_only = [c for c in cells if c not in df.index] features_only = [c for c in df.index if c not in cells] print('Have dose-response and features: {}'.format(len(have_both))) print('Dose-response only: {}'.format(len(doseresponse_only))) print('Features only: {}'.format(len(features_only))) # Create feature matrices for samples with and without dose-response curves X_with = np.array([ df.loc[c].values if c in df.index else np.full( len(df.columns), np.nan) for c in cells ]) X_without = np.array([df.loc[c].values for c in features_only]) print( 'Initializing dose-response embeddings via NMF with row features') W, V, U = tensor_nmf(Y, args.nembeds, monotone=True, max_entry=0.999, verbose=False, row_features=X_with) # If we have samples that have no dose-response, generate factors for them as well # TODO: fitting this jointly is probably marginally better, but let's not do it for now. # if X_without.shape[0] > 0: # W_without, _ = tensor_nmf(X_without[:,:,None], args.nembeds, V=U, fit_V=False, max_entry=0.999, verbose=False) X = X_with # Quick and dirty approach that just uses the samples with dose-response for now # Create shared arrays X_shared = sa.create(args.sharedprefix + 'X', X.shape) X_shared[:] = X X = X_shared U_shared = sa.create(args.sharedprefix + 'U', U.shape) U_shared[:] = U U = U_shared if args.sample_features: # Create constraints for WU to be in [0,1] Row_zero = np.concatenate([U, np.full((U.shape[0], 1), 0)], axis=1) Row_one = np.concatenate( [U * -1, np.full((U.shape[0], 1), -1)], axis=1) Row_constraints = np.concatenate([Row_zero, Row_one], axis=0) # Posterior samples # U_samples = sa.create(args.sharedprefix + 'U_samples', (args.nsamples, U.shape[0], U.shape[1])) U_samples = np.zeros((args.nsamples, U.shape[0], U.shape[1])) from functionalmf.gass import gass def U_step(model, _, step): # TODO: Parallelize this # Setup the [0,1] constraints U_zero = np.concatenate( [model.W, np.full((model.W.shape[0], 1), 0)], axis=1) U_one = np.concatenate( [model.W * -1, np.full((model.W.shape[0], 1), -1)], axis=1) U_constraints = np.concatenate([U_zero, U_one], axis=0) U_Sigma = np.eye(U.shape[1]) # Sample each U_i vector for i in range(U.shape[0]): def u_loglike(u, xx): if len(u.shape) == 2: wu = u.dot(model.W.T) return np.nansum( X[None, :, i] * np.log(wu) + (1 - X[None, :, i]) * np.log(1 - wu), axis=1) wu = model.W.dot(u) return np.nansum(X[:, i] * np.log(wu) + (1 - X[:, i]) * np.log(1 - wu)) U[i], _ = gass(U[i], U_Sigma, u_loglike, U_constraints) # Update W constraints for WU to be in [0,1] Row_zero = np.concatenate([U, np.full((U.shape[0], 1), 0)], axis=1) Row_one = np.concatenate( [U * -1, np.full((U.shape[0], 1), -1)], axis=1) Row_constraints = np.concatenate([Row_zero, Row_one], axis=0) model.Row_constraints[:] = Row_constraints # Save the U sample if step >= args.nburn and (step - args.nburn) % args.nthin == 0: sidx = (step - args.nburn) // args.nthin U_samples[sidx] = U callback = U_step loglikelihood = rowcol_likelihood_with_X else: Row_constraints = None callback = None U_samples = U[None] loglikelihood = rowcol_likelihood_with_X else: # Initialize the model with a nonnegative matrix factorization on the clipped values print('Initializing dose-response embeddings via NMF') W, V = tensor_nmf(Y, args.nembeds, monotone=True, max_entry=0.999, verbose=False) Row_constraints = None callback = None U_samples = None loglikelihood = rowcol_likelihood # Sanity check that we're starting at valid points Mu = (W[:, None, None] * V[None]).sum(axis=-1) assert Mu.min() >= 0, 'Mu range [{},{}]'.format(Mu.min(), Mu.max()) assert Mu.max() <= 1, 'Mu range [{},{}]'.format(Mu.min(), Mu.max()) # Get an EP approximation centered at the mean and with the variance overestimated. EP_approx = ep_from_mf(Y, W, V, mode='multiplier', multiplier=3) # Create the model model = ConstrainedNonconjugateBayesianTensorFiltering( Y.shape[0], Y.shape[1], Y.shape[2], loglikelihood, C, nembeds=args.nembeds, tf_order=args.tf_order, lam2_true=args.lam2, ep_approx=EP_approx, nthreads=args.nthreads, W_true=W if args.features is not None and not args.sample_features else None, # Do not sample W if we have features Row_constraints=Row_constraints, # Row feature constraints to [0,1] sharedprefix=args.sharedprefix, worker_init=worker_init) # Initialize at the NMF fit model.W[:], model.V[:] = W, V return model, U_samples, callback
verbose = 0) # Constraints requiring positive means C_zero = np.concatenate([np.eye(ndepth), np.zeros((ndepth,1))], axis=1) # Setup the lower bound inequalities model = ConstrainedNonconjugateBayesianTensorFiltering(nrows, ncols, ndepth, rowcol_loglikelihood, C_zero, nembeds=nembeds, tf_order=2, sigma2_init=0.5, nthreads=3, lam2_init=0.1) # Use NMF to initialize the model model.W, model.V = tensor_nmf(Mu_pgds.mean(axis=0), nembeds) model.Mu_ep, model.Sigma_ep = ep_from_nmf(Y_train, model.W, model.V) ''' # Setup the non-conjugate model model = NonconjugateBayesianTensorFiltering(nrows, ncols, ndepth, ess_loglikelihood, nembeds=nembeds, tf_order=2, sigma2_init=1, lam2_init=0.1) # model.W, model.V = tensor_nmf(Y_train, nembeds) ''' print('Running Gibbs sampler') results = model.run_gibbs(Y_train, nburn=nburn, nthin=nthin, nsamples=nsamples, print_freq=10, verbose=True) Ws = results['W'] Vs = results['V'] Tau2s = results['Tau2'] lam2s = results['lam2'] sigma2s = results['sigma2']