Ejemplo n.º 1
0
def setup_sampler(model, Y, monotone=False):
    # Pick which variables to sample and which to fix at the truth
    model.sample_W = True
    model.sample_V = True
    model.sample_Tau2 = True
    model.sample_sigma2 = True
    model.sample_lam2 = True

    # Use nonnegative matrix factorization to initialize
    if model.sample_W and model.sample_V:
        nmf_W, nmf_V = tensor_nmf(Y, model.nembeds, monotone=monotone)
        model.W[:] = nmf_W
        model.V[:] = nmf_V
        # model.Mu_ep, model.Sigma_ep = ep_from_mf(Y, model.W, model.V, mode='multiplier', multiplier=3)


    if model.sample_lam2:
        model._init_lam2()

    if model.sample_Tau2:
        model._init_Tau2()

    if model.sample_sigma2:
        model._init_sigma2()
Ejemplo n.º 2
0
    # Get the true mean values
    Mu = np.einsum('nk,mtk->nmt', W_true, V_true)

    # Generate the data
    Y = np.random.poisson(Mu[...,None], size=(nrows, ncols, ndepth, nreplicates)).astype(float)

    # Hold out some curves
    Y_missing = Y.copy()
    Y_missing[:3,:3] = np.nan

    # for nembeds in nembeds_options:
    print('Seed {} d={}'.format(seed, nembeds))
    models = []

    ############### Setup the NMF baseline ###############
    W_nmf, V_nmf = tensor_nmf(Y_missing, nembeds)
    Mu_nmf = (W_nmf[:,None,None] * V_nmf[None]).sum(axis=-1)
    models.append({'name': 'NMF', 'fit': Mu_nmf, 'samples': Mu_nmf[None], 'file': 'nmf.npy'})
    ###########################################################################

    ############### Setup the PGDS baseline ###############
    print('Fitting PGDS')
    # try:
    for tau in [0.25, 0.5, 1]:
        # If you have the Poisson-gamma dynamical system of Schein et al installed,
        # add that baseline comparison
        # sys.path.append('../apf/src/')
        from functionalmf.pgds import fit_pgds
        # Fit the PGDS model
        print('\tk={} tau={}'.format(nembeds, tau))
        import warnings
Ejemplo n.º 3
0
                np.isnan(Y_candidate), axis=(1, 2, 3))) | np.any(
                    np.all(np.isnan(Y_candidate), axis=(0, 2, 3)))

        # Remove the held out data points but keep track of them for evaluation at the end
        held_out = selected.T
        Y = Y_candidate
        print(held_out)

    # Create the Y in shared memory for parallel processing
    Y_shared = sa.create(args.sharedprefix + 'Y_obs', Y.shape)
    Y_shared[:] = Y
    Y = Y_shared

    # Get the raw NMF as a baseline
    print('Fitting NMF')
    W_nmf, V_nmf = tensor_nmf(Y, args.nembeds, max_entry=0.999, verbose=False)
    Mu_nmf = (W_nmf[:, None, None] * V_nmf[None]).sum(axis=-1)
    np.save(os.path.join(args.outdir, 'nmf_w'), W_nmf)
    np.save(os.path.join(args.outdir, 'nmf_v'), V_nmf)

    # Get the monotone projected NMF as a baseline
    print('Fitting Monotone NMF')
    W_nmf_proj, V_nmf_proj = tensor_nmf(Y,
                                        args.nembeds,
                                        monotone=True,
                                        max_entry=0.999)
    Mu_nmf_proj = (W_nmf_proj[:, None, None] * V_nmf_proj[None]).sum(axis=-1)

    print('Initializing model')
    model, Us, callback = init_model(Y, likelihood, args)
    Mu_init = (model.W[:, None, None] * model.V[None]).sum(axis=-1)
Ejemplo n.º 4
0
def init_model(Y, likelihood, args):
    # Linear constraints requiring monotonicity and [0,1] means.
    # Note that we use a softened monotonicity constraint allowing a small
    # fudge factor for numerical stability.
    C_zero = np.concatenate([np.eye(ndepth), np.zeros((ndepth, 1))], axis=1)
    C_mono = np.array([
        np.concatenate(
            [np.zeros(i), [1, -1],
             np.zeros(ndepth - i - 2), [-1e-2]]) for i in range(ndepth - 1)
    ])
    C_one = np.concatenate(
        [np.eye(ndepth) * -1, np.full((ndepth, 1), -1)], axis=1)
    C = np.concatenate([C_zero, C_one, C_mono], axis=0)

    # If the user provided an optional set of binary row features
    if args.features is not None:
        import pandas as pd

        print('Loading features')
        df = pd.read_csv(args.features, index_col=0, header=0)

        # Filter the features into those with and without dose-response data
        cells = np.load(os.path.join(args.outdir, 'cells.npy'))

        # Print some info on the breakdown of features and dose-response data
        have_both = [c for c in cells if c in df.index]
        doseresponse_only = [c for c in cells if c not in df.index]
        features_only = [c for c in df.index if c not in cells]
        print('Have dose-response and features: {}'.format(len(have_both)))
        print('Dose-response only: {}'.format(len(doseresponse_only)))
        print('Features only: {}'.format(len(features_only)))

        # Create feature matrices for samples with and without dose-response curves
        X_with = np.array([
            df.loc[c].values if c in df.index else np.full(
                len(df.columns), np.nan) for c in cells
        ])
        X_without = np.array([df.loc[c].values for c in features_only])

        print(
            'Initializing dose-response embeddings via NMF with row features')
        W, V, U = tensor_nmf(Y,
                             args.nembeds,
                             monotone=True,
                             max_entry=0.999,
                             verbose=False,
                             row_features=X_with)

        # If we have samples that have no dose-response, generate factors for them as well
        # TODO: fitting this jointly is probably marginally better, but let's not do it for now.
        # if X_without.shape[0] > 0:
        #     W_without, _ = tensor_nmf(X_without[:,:,None], args.nembeds, V=U, fit_V=False, max_entry=0.999, verbose=False)
        X = X_with  # Quick and dirty approach that just uses the samples with dose-response for now

        # Create shared arrays
        X_shared = sa.create(args.sharedprefix + 'X', X.shape)
        X_shared[:] = X
        X = X_shared
        U_shared = sa.create(args.sharedprefix + 'U', U.shape)
        U_shared[:] = U
        U = U_shared

        if args.sample_features:
            # Create constraints for WU to be in [0,1]
            Row_zero = np.concatenate([U, np.full((U.shape[0], 1), 0)], axis=1)
            Row_one = np.concatenate(
                [U * -1, np.full((U.shape[0], 1), -1)], axis=1)
            Row_constraints = np.concatenate([Row_zero, Row_one], axis=0)

            # Posterior samples
            # U_samples = sa.create(args.sharedprefix + 'U_samples', (args.nsamples, U.shape[0], U.shape[1]))
            U_samples = np.zeros((args.nsamples, U.shape[0], U.shape[1]))

            from functionalmf.gass import gass

            def U_step(model, _, step):
                # TODO: Parallelize this
                # Setup the [0,1] constraints
                U_zero = np.concatenate(
                    [model.W, np.full((model.W.shape[0], 1), 0)], axis=1)
                U_one = np.concatenate(
                    [model.W * -1,
                     np.full((model.W.shape[0], 1), -1)], axis=1)
                U_constraints = np.concatenate([U_zero, U_one], axis=0)

                U_Sigma = np.eye(U.shape[1])

                # Sample each U_i vector
                for i in range(U.shape[0]):

                    def u_loglike(u, xx):
                        if len(u.shape) == 2:
                            wu = u.dot(model.W.T)
                            return np.nansum(
                                X[None, :, i] * np.log(wu) +
                                (1 - X[None, :, i]) * np.log(1 - wu),
                                axis=1)
                        wu = model.W.dot(u)
                        return np.nansum(X[:, i] * np.log(wu) +
                                         (1 - X[:, i]) * np.log(1 - wu))

                    U[i], _ = gass(U[i], U_Sigma, u_loglike, U_constraints)

                # Update W constraints for WU to be in [0,1]
                Row_zero = np.concatenate([U, np.full((U.shape[0], 1), 0)],
                                          axis=1)
                Row_one = np.concatenate(
                    [U * -1, np.full((U.shape[0], 1), -1)], axis=1)
                Row_constraints = np.concatenate([Row_zero, Row_one], axis=0)
                model.Row_constraints[:] = Row_constraints

                # Save the U sample
                if step >= args.nburn and (step -
                                           args.nburn) % args.nthin == 0:
                    sidx = (step - args.nburn) // args.nthin
                    U_samples[sidx] = U

            callback = U_step
            loglikelihood = rowcol_likelihood_with_X
        else:
            Row_constraints = None
            callback = None
            U_samples = U[None]
            loglikelihood = rowcol_likelihood_with_X
    else:
        # Initialize the model with a nonnegative matrix factorization on the clipped values
        print('Initializing dose-response embeddings via NMF')
        W, V = tensor_nmf(Y,
                          args.nembeds,
                          monotone=True,
                          max_entry=0.999,
                          verbose=False)
        Row_constraints = None
        callback = None
        U_samples = None
        loglikelihood = rowcol_likelihood

    # Sanity check that we're starting at valid points
    Mu = (W[:, None, None] * V[None]).sum(axis=-1)
    assert Mu.min() >= 0, 'Mu range [{},{}]'.format(Mu.min(), Mu.max())
    assert Mu.max() <= 1, 'Mu range [{},{}]'.format(Mu.min(), Mu.max())

    # Get an EP approximation centered at the mean and with the variance overestimated.
    EP_approx = ep_from_mf(Y, W, V, mode='multiplier', multiplier=3)

    # Create the model
    model = ConstrainedNonconjugateBayesianTensorFiltering(
        Y.shape[0],
        Y.shape[1],
        Y.shape[2],
        loglikelihood,
        C,
        nembeds=args.nembeds,
        tf_order=args.tf_order,
        lam2_true=args.lam2,
        ep_approx=EP_approx,
        nthreads=args.nthreads,
        W_true=W if args.features is not None and not args.sample_features else
        None,  # Do not sample W if we have features
        Row_constraints=Row_constraints,  # Row feature constraints to [0,1]
        sharedprefix=args.sharedprefix,
        worker_init=worker_init)

    # Initialize at the NMF fit
    model.W[:], model.V[:] = W, V

    return model, U_samples, callback
Ejemplo n.º 5
0
                                                verbose = 0)


# Constraints requiring positive means
C_zero = np.concatenate([np.eye(ndepth), np.zeros((ndepth,1))], axis=1)

# Setup the lower bound inequalities
model = ConstrainedNonconjugateBayesianTensorFiltering(nrows, ncols, ndepth,
                                                      rowcol_loglikelihood,
                                                      C_zero,
                                                      nembeds=nembeds, tf_order=2,
                                                      sigma2_init=0.5, nthreads=3,
                                                      lam2_init=0.1)

# Use NMF to initialize the model
model.W, model.V = tensor_nmf(Mu_pgds.mean(axis=0), nembeds)
model.Mu_ep, model.Sigma_ep = ep_from_nmf(Y_train, model.W, model.V)

'''
# Setup the non-conjugate model
model = NonconjugateBayesianTensorFiltering(nrows, ncols, ndepth, ess_loglikelihood, nembeds=nembeds, tf_order=2, sigma2_init=1, lam2_init=0.1)
# model.W, model.V = tensor_nmf(Y_train, nembeds)
'''
print('Running Gibbs sampler')
results = model.run_gibbs(Y_train, nburn=nburn, nthin=nthin, nsamples=nsamples, print_freq=10, verbose=True)
Ws = results['W']
Vs = results['V']
Tau2s = results['Tau2']
lam2s = results['lam2']
sigma2s = results['sigma2']