def run_factorization(self, N, S, X, K, num_cov, k, n):
		# Smart initialization
		rat = k/n
		nans = np.isnan(rat)
		conc_inits = np.zeros((1, S))
		beta_inits = np.zeros((num_cov, S))
		for index_s in range(S):
			column_rat = rat[:, index_s]
			column_nans = np.isnan(column_rat)
			valid_rat = column_rat[~column_nans]
			conc_init = min(1.0/np.var(valid_rat), 1000.0)
			m_init = min(max(np.mean(valid_rat), 1.0/1000 ), 1.0-(1.0/1000))
			conc_inits[0, index_s] = conc_init
			beta_inits[0, index_s] = np.log(m_init/(1.0-m_init))
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.Gamma('CONC', alpha=1e-4, beta=1e-4, shape=(1,S), testval=conc_inits)
			BETA = pm.Normal('BETA', mu=0, tau=(1/1000000.0), shape=(S, num_cov), testval=beta_inits.T)
			U = pm.Normal('U', mu=0, tau=(1/1000.0), shape=(N, K), testval=np.random.randn(N, K))
			V = pm.Normal('V', mu=0, tau=(1/1000.0), shape=(S, K), testval=np.random.randn(S, K))
			p = pm.math.invlogit(pm.math.dot(X, BETA.T) + pm.math.dot(U,V.T))
			conc_mat = pm.math.dot(np.ones((N,1)), CONC)
			R = pm.BetaBinomial('like',alpha=(p*conc_mat)[~nans], beta=((1.0-p)*conc_mat)[~nans], n=n[~nans], observed=k[~nans])
			approx = pm.fit(method='advi', n=30000)
		pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
コード例 #2
0
	def run_factorization(self):
		rat = self.allelic_counts/self.total_counts
		nans = np.isnan(rat)
		# Run bb-mf
		with pm.Model() as bb_glm:
			CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init)
			BETA = pm.Normal('BETA', mu=0, tau=(1/1000000.0), shape=(self.S, self.num_cov), testval=self.beta_init)
			U = pm.Normal('U', mu=0, tau=(1.0/100.0), shape=(self.N, self.K), testval=self.U_init)
			V = pm.Normal('V', mu=0, tau=(1.0/100.0), shape=(self.S, self.K), testval=self.V_init)

			MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init)
			SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init)
			mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A)
			sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A)
			A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init)

			p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(U,V.T) + A[self.Z,:])
			conc_mat = pm.math.dot(np.ones((self.N,1)), CONC)
			R = pm.BetaBinomial('like',alpha=(p*conc_mat)[~nans], beta=((1.0-p)*conc_mat)[~nans], n=self.total_counts[~nans], observed=self.allelic_counts[~nans])
			approx = pm.fit(method='advi', n=1000)
		pickle.dump(approx, open(self.output_root + '_model', 'wb'))
		#approx = pickle.load( open(self.output_root + '_model', "rb" ) )
		means_dict = approx.bij.rmap(approx.params[0].eval())
		np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_BETA.txt', (means_dict['BETA'].T), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_CONC.txt', np.exp(means_dict['CONC_log__']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_A.txt', (means_dict['A']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_MU_A.txt', (means_dict['MU_A']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_SIGMA_A.txt', np.exp(means_dict['SIGMA_A_log__']), fmt="%s", delimiter='\t')
		np.savetxt(self.output_root + '_temper_ELBO.txt', approx.hist, fmt="%s", delimiter='\t')
コード例 #3
0
def build_biallelic_model3(g, n, s):
    # EXPERIMENTAL: Observations overdispersed as a BetaBinom w/ concentrations
    # 10.
    a = 2

    with pm.Model() as model:
        # Fraction
        pi = pm.Dirichlet(
            'pi',
            a=np.ones(s),
            shape=(n, s),
            transform=stick_breaking,
        )
        pi_hyper = pm.Data('pi_hyper', value=0.0)
        pm.Potential('heterogeneity_penalty',
                     -(pm.math.sqrt(pi).sum(0).sum()**2) * pi_hyper)

        rho_hyper = pm.Data('rho_hyper', value=0.0)
        pm.Potential('diversity_penalty',
                     -(pm.math.sqrt(pi.sum(0)).sum()**2) * rho_hyper)

        # Genotype
        gamma_ = pm.Uniform('gamma_', 0, 1, shape=(g * s, 1))
        gamma = pm.Deterministic(
            'gamma',
            (pm.math.concatenate([gamma_, 1 - gamma_], axis=1).reshape(
                (g, s, a))))
        gamma_hyper = pm.Data('gamma_hyper', value=0.0)
        pm.Potential(
            'ambiguity_penalty',
            -(pm.math.sqrt(gamma).sum(2)**2).sum(0).sum(0) * gamma_hyper)

        # Product of fraction and genotype
        true_p = pm.Deterministic('true_p', pm.math.dot(pi, gamma))

        # Sequencing error
        epsilon_hyper = pm.Data('epsilon_hyper', value=100)
        epsilon = pm.Beta('epsilon', alpha=2, beta=epsilon_hyper, shape=n)
        epsilon_ = epsilon.reshape((n, 1, 1))
        err_base_prob = tt.ones((n, g, a)) / a
        p_with_error = (true_p * (1 - epsilon_)) + (err_base_prob * epsilon_)

        # Observation
        _p = p_with_error.reshape((-1, a))[:, 0]
        # Overdispersion term
        # alpha = pm.Gamma('alpha', mu=100, sigma=5)
        # TODO: Figure out how to also fit this term.
        # FIXME: Do I want the default to be a valid value?
        #  Realistic or close to asymptotic?
        alpha = pm.Data('alpha', value=1000)

        observed = pm.Data('observed', value=np.empty((g * n, a)))
        pm.BetaBinomial('data',
                        alpha=_p * alpha,
                        beta=(1 - _p) * alpha,
                        n=observed.reshape((-1, a)).sum(1),
                        observed=observed[:, 0])

    return model
コード例 #4
0
ファイル: test_pymc3.py プロジェクト: EJHortala/symbolic-pymc
def test_pymc3_convert_dists():
    """Just a basic check that all PyMC3 RVs will convert to and from Theano RVs."""
    tt.config.compute_test_value = "ignore"
    theano.config.cxx = ""

    with pm.Model() as model:
        norm_rv = pm.Normal("norm_rv", 0.0, 1.0, observed=1.0)
        mvnorm_rv = pm.MvNormal("mvnorm_rv",
                                np.r_[0.0],
                                np.c_[1.0],
                                shape=1,
                                observed=np.r_[1.0])
        cauchy_rv = pm.Cauchy("cauchy_rv", 0.0, 1.0, observed=1.0)
        halfcauchy_rv = pm.HalfCauchy("halfcauchy_rv", 1.0, observed=1.0)
        uniform_rv = pm.Uniform("uniform_rv", observed=1.0)
        gamma_rv = pm.Gamma("gamma_rv", 1.0, 1.0, observed=1.0)
        invgamma_rv = pm.InverseGamma("invgamma_rv", 1.0, 1.0, observed=1.0)
        exp_rv = pm.Exponential("exp_rv", 1.0, observed=1.0)
        halfnormal_rv = pm.HalfNormal("halfnormal_rv", 1.0, observed=1.0)
        beta_rv = pm.Beta("beta_rv", 2.0, 2.0, observed=1.0)
        binomial_rv = pm.Binomial("binomial_rv", 10, 0.5, observed=5)
        dirichlet_rv = pm.Dirichlet("dirichlet_rv",
                                    np.r_[0.1, 0.1],
                                    observed=np.r_[0.1, 0.1])
        poisson_rv = pm.Poisson("poisson_rv", 10, observed=5)
        bernoulli_rv = pm.Bernoulli("bernoulli_rv", 0.5, observed=0)
        betabinomial_rv = pm.BetaBinomial("betabinomial_rv",
                                          0.1,
                                          0.1,
                                          10,
                                          observed=5)
        categorical_rv = pm.Categorical("categorical_rv",
                                        np.r_[0.5, 0.5],
                                        observed=1)
        multinomial_rv = pm.Multinomial("multinomial_rv",
                                        5,
                                        np.r_[0.5, 0.5],
                                        observed=np.r_[2])

    # Convert to a Theano `FunctionGraph`
    fgraph = model_graph(model)

    rvs_by_name = {
        n.owner.inputs[1].name: n.owner.inputs[1]
        for n in fgraph.outputs
    }

    pymc_rv_names = {n.name for n in model.observed_RVs}
    assert all(
        isinstance(rvs_by_name[n].owner.op, RandomVariable)
        for n in pymc_rv_names)

    # Now, convert back to a PyMC3 model
    pymc_model = graph_model(fgraph)

    new_pymc_rv_names = {n.name for n in pymc_model.observed_RVs}
    pymc_rv_names == new_pymc_rv_names
コード例 #5
0
	def run_ppca_initialization(self):
		print('Starting PPCA initialization')
		rat = self.allelic_counts/self.total_counts
		nans = np.isnan(rat)

		scaled_rat = scale_allelic_ratios(rat)
		scaled_residual_rat = regress_out_cell_line(scaled_rat, self.Z)
		rescaled_residual_rat = scale_allelic_ratios(scaled_residual_rat)
		ppca = PPCA()
		ppca.fit(data=np.transpose(rescaled_residual_rat), d=self.K, verbose=True, tol=1e-6)
		self.U_init = ppca.C/np.std(ppca.C)
		# Run bb-mf
		with pm.Model() as bb_glm_init:
			CONC = pm.HalfCauchy('CONC', beta=5, shape=(1,self.S), testval=self.conc_init)
			BETA = pm.Normal('BETA', mu=0, tau=(1/1000000.0), shape=(self.S, self.num_cov), testval=self.beta_init)
			#U = pm.Normal('U', mu=0, tau=(1.0/1.0), shape=(N, K), testval=self.U_init)
			V = pm.Normal('V', mu=0, tau=(1.0/1.0), shape=(self.S, self.K), testval=np.zeros(self.V_init.shape))

			MU_A = pm.Normal("MU_A", mu=0., sd=100**2, shape=(1,self.S), testval=self.mu_a_init)
			SIGMA_A = pm.HalfCauchy("SIGMA_A", beta=5.0, shape=(1,self.S), testval=self.sigma_a_init)
			mu_a_mat = pm.math.dot(np.ones((self.I,1)), MU_A)
			sigma_a_mat = pm.math.dot(np.ones((self.I,1)), SIGMA_A)
			A = pm.Normal('A', mu=mu_a_mat, sigma=sigma_a_mat, shape=(self.I,self.S), testval=self.A_init)

			p = pm.math.invlogit(pm.math.dot(self.cov, BETA.T) + pm.math.dot(self.U_init,V.T) + A[self.Z,:])
			conc_mat = pm.math.dot(np.ones((self.N,1)), CONC)
			R = pm.BetaBinomial('like',alpha=(p*conc_mat)[~nans], beta=((1.0-p)*conc_mat)[~nans], n=self.total_counts[~nans], observed=self.allelic_counts[~nans])
			approx_init = pm.fit(method='advi', n=2000)
		pickle.dump(approx_init, open(self.output_root + '_model_init', 'wb'))
		init_dict = approx_init.bij.rmap(approx_init.params[0].eval())
		self.beta_init = init_dict['BETA']
		self.A_init = init_dict['A']
		self.sigma_a_init = np.exp(init_dict['SIGMA_A_log__'])
		self.mu_a_init = init_dict['MU_A']
		self.conc_init = np.exp(init_dict['CONC_log__'])
		self.V_init = init_dict['V']
		print('Smart PPCA complete')
コード例 #6
0
ファイル: inference_methods.py プロジェクト: troycomi/BACIQ
    def mcmc_sample(self, data, bin_width):
        proteins, idx = get_proteins_and_indices(data)
        with pm.Model():
            τ = pm.Gamma('τ', alpha=7.5, beta=1)
            BoundedNormal = pm.Bound(pm.Normal, lower=0, upper=1)
            μ = BoundedNormal('μ', mu=0.5, sigma=1, shape=len(proteins))
            κ = pm.Exponential('κ', τ, shape=len(proteins))
            pm.BetaBinomial('y',
                            alpha=μ[idx] * κ[idx],
                            beta=(1.0 - μ[idx]) * κ[idx],
                            n=data['sum'],
                            observed=data[self.channel])
            db = hist_backend.Histogram(vars=[μ],
                                        bin_width=bin_width,
                                        remove_first=self.tuning)
            pm.sample(draws=self.samples,
                      tune=self.tuning,
                      chains=self.chains,
                      cores=get_num_cores(),
                      progressbar=sys.stdout.isatty(),
                      compute_convergence_checks=False,
                      trace=db)

            return proteins, db.hist['μ']
コード例 #7
0
def lohhla_clone_model(sample_ids,
                       tree_edges,
                       clonal_prevalence_mat,
                       cellularity,
                       ploidy_values,
                       tumour_sample_reads,
                       normal_sample_reads,
                       integercpn_info,
                       all_genotypes,
                       transition_inputs,
                       stayrate_alpha=0.9,
                       stayrate_beta=0.1,
                       sd=0.5,
                       nb_alpha=0.5,
                       iter_count=20000,
                       tune_iters=20000,
                       anchor_type='nb',
                       anchor_mode='snvcn',
                       nchains=2,
                       njobs=2):
    '''
    stayrate_alpha: Beta prior alpha-parameter on stayrate in clone tree Markov chain
    stayrate_beta: Beta prior beta-parameter on stayrate in clone tree Markov chain
    all_genotypes: Dataframe of genotypes, 0-indexed
    '''
    num_nodes = clonal_prevalence_mat.shape[1]

    valid_transitions = transition_inputs['valid_transitions']
    num_transitions = transition_inputs['num_transitions']
    num_genotypes = transition_inputs['num_genotypes']
    cn_genotype_matrix = transition_inputs['cn_genotype_matrix']

    ## Beta-binomial dispersion (higher = less dispersed)
    dispersion = 200.

    ## Tree edges
    edges = tree_edges.as_matrix().astype(int) - 1

    with pm.Model() as model:
        BoundedNormal = pm.Bound(pm.Normal, lower=0., upper=1.)
        stay_rate = BoundedNormal('stayrate', mu=0.75, sd=0.4)

        P = np.zeros(shape=(num_genotypes, num_genotypes))
        P = P + tt.eye(num_genotypes) * stay_rate

        fill_values = tt.as_tensor((1. - stay_rate) / num_transitions)
        fill_values = tt.set_subtensor(fill_values[0], 0)

        P = P + valid_transitions * fill_values[:, np.newaxis]
        P = tt.set_subtensor(P[0, 0], 1.)

        A = tt.dmatrix('A')

        PA = tt.ones(shape=(num_genotypes)) / num_genotypes

        states = CloneTreeGenotypes('genotypes',
                                    PA=PA,
                                    P=P,
                                    edges=edges,
                                    k=num_genotypes,
                                    shape=(num_nodes))

        total_cns = theano.shared(np.array(all_genotypes['total_cn'].values))
        alt_cns = theano.shared(np.array(all_genotypes['alt_cn'].values))

        total_cn = pm.Deterministic('total_cn', total_cns[states])
        alt_cn = pm.Deterministic('alt_cn', alt_cns[states])

        sample_alt_copies = tt.dot(clonal_prevalence_mat, alt_cn
                                   ) * cellularity + (1. - cellularity) * 1.

        vafs = sample_alt_copies / (
            tt.dot(clonal_prevalence_mat, total_cn) * cellularity +
            (1. - cellularity) * 2.)
        pm.Deterministic('vafs', vafs)

        alphas = vafs * dispersion
        betas = (1 - vafs) * dispersion

        ## Copy number of tumour cells (aggregated over clones, but not including normal contamination)
        tutotalcn = pm.Deterministic('tutotalcn',
                                     tt.dot(clonal_prevalence_mat, total_cn))

        ## Can't be vectorized further
        for j in range(len(sample_ids)):
            current_sample = sample_ids[j]
            total_counts = integercpn_info['TumorCov_type1'][
                current_sample].values + integercpn_info['TumorCov_type2'][
                    current_sample].values
            alt_counts = integercpn_info['TumorCov_type2'][
                current_sample].values
            alpha_sel = alphas[j]
            beta_sel = betas[j]

            ## Draw alternative allele counts for HLA locus for each polymorphic site
            alt_reads = pm.BetaBinomial('x_' + str(j),
                                        alpha=alpha_sel,
                                        beta=beta_sel,
                                        n=total_counts,
                                        observed=alt_counts)

            mult_factor_mean = (tumour_sample_reads[current_sample] /
                                normal_sample_reads)

            ploidy = ploidy_values[j]
            ploidy_ratio = (tutotalcn[j] * cellularity[j] +
                            (1 - cellularity[j]) * 2) / (
                                cellularity[j] * ploidy +
                                (1 - cellularity[j]) * 2)
            if anchor_mode == 'snvcn':
                mult_factor_computed = pm.Deterministic(
                    'mult_factor_computed_' + str(j), 1. / ploidy_ratio *
                    (integercpn_info['Total_TumorCov'][current_sample].values /
                     integercpn_info['Total_NormalCov'][current_sample].values)
                )
                nloci = len(
                    integercpn_info['Total_TumorCov'][current_sample].values)

                tumour_reads_observed = integercpn_info['Total_TumorCov'][
                    current_sample].values
                normal_reads_observed = integercpn_info['Total_NormalCov'][
                    current_sample].values
            elif anchor_mode == 'binmedian':
                binvar_tumour = 'combinedBinTumor'
                binvar_normal = 'combinedBinNormal'
                ## All within a bin are the same, so this is OK
                duplicated_entries = integercpn_info['binNum'][
                    current_sample].duplicated(keep='first')
                nloci = len(integercpn_info[binvar_tumour][current_sample]
                            [~duplicated_entries].values)

                mult_factor_computed = pm.Deterministic(
                    'mult_factor_computed_' + str(j),
                    (1. / ploidy_ratio *
                     (integercpn_info[binvar_tumour][current_sample]
                      [~duplicated_entries].values /
                      integercpn_info[binvar_normal][current_sample]
                      [~duplicated_entries].values)))

                tumour_reads_observed = integercpn_info[binvar_tumour][
                    current_sample][~duplicated_entries].values
                normal_reads_observed = integercpn_info[binvar_normal][
                    current_sample][~duplicated_entries].values
            else:
                raise Exception("Invalid option specified.")

            ## Draw ploidy-corrected tumour/normal locus coverage ratio for each polymorphic site

            if anchor_type == 'mult_factor':
                mult_factor = pm.Lognormal('mult_factor_' + str(j),
                                           mu=np.log(mult_factor_mean),
                                           sd=sd,
                                           observed=mult_factor_computed,
                                           shape=(nloci))
            elif anchor_type == 'nb':
                tc_nc_ratio = pm.Deterministic(
                    'tc_nc_ratio_' + str(j), (tutotalcn[j] * cellularity[j] +
                                              (1 - cellularity[j]) * 2) /
                    (ploidy * cellularity[j] + (1 - cellularity[j]) * 2))

                tumoursamplecn = pm.Deterministic(
                    'tumoursamplecn_' + str(j),
                    (tutotalcn[j] * cellularity[j] + (1 - cellularity[j]) * 2))

                tumour_reads_mean = pm.Deterministic(
                    'tumour_reads_mean_' + str(j),
                    tc_nc_ratio * mult_factor_mean * normal_reads_observed)

                tumour_reads = pm.NegativeBinomial(
                    'tumour_reads_' + str(j),
                    mu=tumour_reads_mean,
                    alpha=nb_alpha,
                    observed=tumour_reads_observed)
            else:
                raise Exception('Must specify a valid model type.')

        pm.Deterministic('log_prob', model.logpt)

        step1 = pm.CategoricalGibbsMetropolis(vars=[states])
        step2 = pm.Metropolis(vars=[stay_rate])

        trace = pm.sample(iter_count,
                          tune=tune_iters,
                          step=[step1, step2],
                          njobs=njobs,
                          chains=nchains)

        return trace
コード例 #8
0
plt.ylabel("Density")

# %%
admit_df = pd.read_csv("data/UCBadmit.csv", sep=";")

# %%
with pm.Model() as m11_5:
    a = pm.Normal("a", 0.0, 2.0)
    pbar = pm.Deterministic("pbar", pm.math.sigmoid(a))

    theta = pm.Exponential("theta", 1.0)

    admit_obs = pm.BetaBinomial(
        "admit_obs",
        pbar * theta,
        (1.0 - pbar) * theta,
        admit_df.applications.values,
        observed=admit_df.admit.values,
    )


# %%
with m11_5:
    trace_11_5 = pm.sample(1000, tune=1000)

# %%
pm.summary(trace_11_5).round(2)

# %%
np.percentile(trace_11_5["pbar"], [2.5, 50.0, 97.5])
    def run_factorization(self, N, S, X, Z, I, K, num_cov, k, n):
        # Smart initialization
        rat = k / n
        nans = np.isnan(rat)
        conc_inits = np.zeros((1, S))
        beta_inits = np.zeros((num_cov, S))
        for index_s in range(S):
            column_rat = rat[:, index_s]
            column_nans = np.isnan(column_rat)
            valid_rat = column_rat[~column_nans]
            conc_init = min(1.0 / np.var(valid_rat), 1000.0)
            m_init = min(max(np.mean(valid_rat), 1.0 / 1000),
                         1.0 - (1.0 / 1000))
            conc_inits[0, index_s] = conc_init
            beta_inits[0, index_s] = np.log(m_init / (1.0 - m_init))
        U_init = np.random.rand(N, K)
        for n_iter in range(N):
            U_init[n_iter, :] = U_init[n_iter, :] / np.sum(U_init[n_iter, :])
        # Run bb-mf
        with pm.Model() as bb_glm:
            CONC = pm.HalfCauchy('CONC',
                                 beta=5,
                                 shape=(1, S),
                                 testval=conc_inits)
            BETA = pm.Normal('BETA',
                             mu=0,
                             tau=(1 / 1000000.0),
                             shape=(S, num_cov),
                             testval=beta_inits.T)
            #U = pm.Normal('U', mu=0, tau=(1/10000.0), shape=(N, K), testval=np.random.randn(N, K))
            U = pm.Dirichlet('U',
                             a=np.ones(K) * 1.0,
                             shape=(N, K),
                             testval=U_init)
            V = pm.Normal('V',
                          mu=0,
                          tau=(1 / 10000.0),
                          shape=(S, K),
                          testval=np.random.randn(S, K))

            MU_A = pm.Normal("MU_A",
                             mu=0.,
                             sd=100**2,
                             shape=(1, S),
                             testval=np.zeros((1, S)))
            SIGMA_A = pm.HalfCauchy("SIGMA_A",
                                    beta=5.0,
                                    shape=(1, S),
                                    testval=np.ones((1, S)))
            mu_a_mat = pm.math.dot(np.ones((I, 1)), MU_A)
            sigma_a_mat = pm.math.dot(np.ones((I, 1)), SIGMA_A)
            A = pm.Normal('A',
                          mu=mu_a_mat,
                          sigma=sigma_a_mat,
                          shape=(I, S),
                          testval=np.zeros((I, S)))

            p = pm.math.invlogit(
                pm.math.dot(X, BETA.T) + pm.math.dot(U, V.T) + A[Z, :])
            conc_mat = pm.math.dot(np.ones((N, 1)), CONC)
            R = pm.BetaBinomial('like',
                                alpha=(p * conc_mat)[~nans],
                                beta=((1.0 - p) * conc_mat)[~nans],
                                n=n[~nans],
                                observed=k[~nans])
            approx = pm.fit(method='advi', n=30000)
        pickle.dump(approx, open(self.output_root + '_model', 'wb'))
        #approx = pickle.load( open(self.output_root + '_model', "rb" ) )
        means_dict = approx.bij.rmap(approx.params[0].eval())
        U = backward_stickbreaking(means_dict['U_stickbreaking__'])
        np.savetxt(self.output_root + '_temper_U.txt',
                   U,
                   fmt="%s",
                   delimiter='\t')
        np.savetxt(self.output_root + '_temper_U_init.txt',
                   U_init,
                   fmt="%s",
                   delimiter='\t')
        np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T),
                   fmt="%s",
                   delimiter='\t')
        np.savetxt(self.output_root + '_temper_BETA.txt',
                   (means_dict['BETA'].T),
                   fmt="%s",
                   delimiter='\t')
コード例 #10
0
    26, 24, 31, 25
])
print('Length of array: ', y.size)
q = 40  # How many total questions
yp = y.astype(float) / q
print('marks: ', y)
print('Rates (yp): ', yp)

# Priors for p e.g., (alpha1,beta1) = (2.0,5.0) yields maximum at p = 0.2
alpha1 = 2.0
beta1 = 5.0
# Prior for k e.g., combination of alpha2=beta2 yields maximum at 50% knowledge, magnitudes determine dispersion
alpha2 = 3.3
beta2 = 3.3
# Number of iterations for MCMC
niter = 50000

with pm.Model():  # context management

    # define priors
    p = pm.Beta('p', alpha=alpha1, beta=beta1)
    k = pm.BetaBinomial('k', alpha=alpha2, beta=beta2, n=y)

    # Likelihood (sampling distribution) of observations
    obs = pm.Binomial('obs', n=q - k, p=p, observed=y - k)

    # inference
    trace = pm.sample(niter, return_inferencedata=False)

    az.plot_trace(trace)
    az.plot_posterior(trace, hdi_prob=0.95)
    def run_non_sparse_model_for_initialization(self):
        rat = self.allelic_counts / self.total_counts
        nans = np.isnan(rat)
        # Run bb-mf
        with pm.Model() as bb_glm:
            CONC = pm.HalfCauchy('CONC',
                                 beta=5,
                                 shape=(1, self.S),
                                 testval=self.conc_init)
            BETA = pm.Normal('BETA',
                             mu=0,
                             tau=(1 / 1000000.0),
                             shape=(self.S, self.num_cov),
                             testval=self.beta_init)
            U = pm.Normal('U',
                          mu=0,
                          tau=(1.0 / 1.0),
                          shape=(self.N, self.K),
                          testval=self.U_init)
            V = pm.Normal('V',
                          mu=0,
                          tau=(1.0 / 1.0),
                          shape=(self.S, self.K),
                          testval=self.V_init)

            MU_A = pm.Normal("MU_A",
                             mu=0.,
                             sd=100**2,
                             shape=(1, self.S),
                             testval=self.mu_a_init)
            SIGMA_A = pm.HalfCauchy("SIGMA_A",
                                    beta=5.0,
                                    shape=(1, self.S),
                                    testval=self.sigma_a_init)
            mu_a_mat = pm.math.dot(np.ones((self.I, 1)), MU_A)
            sigma_a_mat = pm.math.dot(np.ones((self.I, 1)), SIGMA_A)
            A = pm.Normal('A',
                          mu=mu_a_mat,
                          sigma=sigma_a_mat,
                          shape=(self.I, self.S),
                          testval=self.A_init)

            p = pm.math.invlogit(
                pm.math.dot(self.cov, BETA.T) + pm.math.dot(U, V.T) +
                A[self.Z, :])
            conc_mat = pm.math.dot(np.ones((self.N, 1)), CONC)
            R = pm.BetaBinomial('like',
                                alpha=(p * conc_mat)[~nans],
                                beta=((1.0 - p) * conc_mat)[~nans],
                                n=self.total_counts[~nans],
                                observed=self.allelic_counts[~nans])
            approx = pm.fit(method='advi', n=10000)
        means_dict = approx.bij.rmap(approx.params[0].eval())
        # Set initializations for sparse model to learned values from this non-sparse model
        self.conc_init = np.exp(means_dict['CONC_log__'])
        self.beta_init = means_dict['BETA']
        self.U_init = means_dict['U']
        self.V_init = means_dict['V']
        self.mu_a_init = means_dict['MU_A']
        self.sigma_a_init = np.exp(means_dict['SIGMA_A_log__'])
        self.A_init = means_dict['A']
コード例 #12
0
freq_bins = np.logspace(-6, -3, 50, base=10)
tot_bins = np.logspace(4, 6.5, 50, base=10)
w_bins = np.linspace(0, 70, 50)

g = _jointplot('W', 'what', sim, ybins=None, xbins=None)

g = _jointplot('W', 'sfreq', sim, ybins=None, xbins=None)

g = _jointplot('what', 'resid', sim, ybins=None, xbins=None)
g = _jointplot('W', 'resid', sim, ybins=None, xbins=None)

with pm.Model() as model:
    alpha = pm.Exponential('alpha', 1 / sim['W'].sum())
    beta = pm.Exponential('beta', 1 / (sim['M'] - sim['W']).sum())
    obs = pm.BetaBinomial('obs', alpha, beta, sim['M'], observed=sim['W'])

with model:
    # draw 500 posterior samples
    trace = pm.sample(5000, return_inferencedata=False)

az.plot_trace(trace)

az.summary(trace, round_to=2)

with pm.Model() as betabinomial:

    predictor = pm.Data('predictor', sim['x'])
    trials = pm.Data('trials', sim['M'])

    intercept = pm.Normal('intercept', mu=np.log(0.1), sd=0.001)
コード例 #13
0
    def run_factorization(self, N, S, X, Z, I, K, num_cov, k, n):
        # Smart initialization
        print("STARTING")
        rat = k / n
        nans = np.isnan(rat)
        conc_inits = np.zeros((1, S))
        beta_inits = np.zeros((num_cov, S))
        for index_s in range(S):
            column_rat = rat[:, index_s]
            column_nans = np.isnan(column_rat)
            valid_rat = column_rat[~column_nans]
            conc_init = min(1.0 / np.var(valid_rat), 1000.0)
            m_init = min(max(np.mean(valid_rat), 1.0 / 1000),
                         1.0 - (1.0 / 1000))
            conc_inits[0, index_s] = conc_init
            beta_inits[0, index_s] = np.log(m_init / (1.0 - m_init))
        # Run bb-mf
        with pm.Model() as bb_glm:
            CONC = pm.HalfCauchy('CONC',
                                 beta=5,
                                 shape=(1, S),
                                 testval=conc_inits)
            BETA = pm.Normal('BETA',
                             mu=0,
                             tau=(1 / 1000000.0),
                             shape=(S, num_cov),
                             testval=beta_inits.T)
            #U = pm.Normal('U', mu=0, tau=(1/1.0), shape=(N, K), testval=np.random.randn(N, K))
            #U = pm.Exponential('U',lam=10.0, shape=(N, K), testval=np.abs(np.random.randn(N, K)))
            #V = pm.Normal('V', mu=0, tau=(1/100000.0), shape=(S, K), testval=np.random.randn(S, K))

            LAMBDA_U = pm.HalfCauchy('LAMBDA_U',
                                     beta=1,
                                     shape=(N, K),
                                     testval=np.ones((N, K)))
            TAU_U = pm.HalfCauchy('TAU_U', beta=1, testval=1.0)
            SIGMA_U = pm.Deterministic('SIGMA_U',
                                       TAU_U * TAU_U * LAMBDA_U * LAMBDA_U)
            U = pm.Normal('U',
                          mu=0,
                          sd=SIGMA_U,
                          shape=(N, K),
                          testval=np.random.randn(N, K))

            LAMBDA_V = pm.HalfCauchy('LAMBDA_V',
                                     beta=1,
                                     shape=(S, K),
                                     testval=np.ones((S, K)))
            TAU_V = pm.HalfCauchy('TAU_V', beta=1, testval=1.0)
            SIGMA_V = pm.Deterministic('SIGMA_V',
                                       TAU_V * TAU_V * LAMBDA_V * LAMBDA_V)
            V = pm.Normal('V',
                          mu=0,
                          sd=SIGMA_V,
                          shape=(S, K),
                          testval=np.random.randn(S, K))

            MU_A = pm.Normal("MU_A",
                             mu=0.,
                             sd=100**2,
                             shape=(1, S),
                             testval=np.zeros((1, S)))
            SIGMA_A = pm.HalfCauchy("SIGMA_A",
                                    beta=5.0,
                                    shape=(1, S),
                                    testval=np.ones((1, S)))
            mu_a_mat = pm.math.dot(np.ones((I, 1)), MU_A)
            sigma_a_mat = pm.math.dot(np.ones((I, 1)), SIGMA_A)
            A = pm.Normal('A',
                          mu=mu_a_mat,
                          sigma=sigma_a_mat,
                          shape=(I, S),
                          testval=np.zeros((I, S)))

            p = pm.math.invlogit(
                pm.math.dot(X, BETA.T) + pm.math.dot(U, V.T) + A[Z, :])
            conc_mat = pm.math.dot(np.ones((N, 1)), CONC)
            R = pm.BetaBinomial('like',
                                alpha=(p * conc_mat)[~nans],
                                beta=((1.0 - p) * conc_mat)[~nans],
                                n=n[~nans],
                                observed=k[~nans])
            approx = pm.fit(method='advi', n=30000)
        pickle.dump(approx, open(self.output_root + '_model', 'wb'))
        approx = pickle.load(open(self.output_root + '_model', "rb"))
        means_dict = approx.bij.rmap(approx.params[0].eval())
        np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']),
                   fmt="%s",
                   delimiter='\t')
        np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T),
                   fmt="%s",
                   delimiter='\t')
        np.savetxt(self.output_root + '_temper_BETA.txt',
                   (means_dict['BETA'].T),
                   fmt="%s",
                   delimiter='\t')
コード例 #14
0
    def run_factorization(self, N, S, X, Z, I, K, num_cov, k, n):
        # Smart initialization
        rat = k / n
        nans = np.isnan(rat)
        conc_inits = np.zeros((1, S))
        beta_inits = np.zeros((num_cov, S))
        for index_s in range(S):
            column_rat = rat[:, index_s]
            column_nans = np.isnan(column_rat)
            valid_rat = column_rat[~column_nans]
            conc_init = min(1.0 / np.var(valid_rat), 1000.0)
            m_init = min(max(np.mean(valid_rat), 1.0 / 1000),
                         1.0 - (1.0 / 1000))
            conc_inits[0, index_s] = conc_init
            beta_inits[0, index_s] = np.log(m_init / (1.0 - m_init))

        # Run bb-mf with mini batch
        indices = np.asarray(range(N))
        num_mb_indices = 1000
        mb_indices = pm.Minibatch(indices, num_mb_indices)
        mb_X = pm.Minibatch(X, num_mb_indices)
        mb_n = pm.Minibatch(n, num_mb_indices)
        mb_k = pm.Minibatch(k, num_mb_indices)
        mb_Z = pm.Minibatch(Z, num_mb_indices)
        mb_nans = pm.Minibatch(nans, num_mb_indices)

        with pm.Model() as bb_glm:
            CONC = pm.HalfCauchy('CONC',
                                 beta=5,
                                 shape=(1, S),
                                 testval=conc_inits)
            BETA = pm.Normal('BETA',
                             mu=0,
                             tau=(1 / 1000000.0),
                             shape=(S, num_cov),
                             testval=beta_inits.T)
            U = pm.Normal('U',
                          mu=0,
                          tau=(1 / 10000.0),
                          shape=(N, K),
                          testval=np.random.randn(N, K))
            V = pm.Normal('V',
                          mu=0,
                          tau=(1 / 10000.0),
                          shape=(S, K),
                          testval=np.random.randn(S, K))

            MU_A = pm.Normal("MU_A",
                             mu=0.,
                             sd=100**2,
                             shape=(1, S),
                             testval=np.zeros((1, S)))
            SIGMA_A = pm.HalfCauchy("SIGMA_A",
                                    beta=5.0,
                                    shape=(1, S),
                                    testval=np.ones((1, S)))
            mu_a_mat = pm.math.dot(np.ones((I, 1)), MU_A)
            sigma_a_mat = pm.math.dot(np.ones((I, 1)), SIGMA_A)
            A = pm.Normal('A',
                          mu=mu_a_mat,
                          sigma=sigma_a_mat,
                          shape=(I, S),
                          testval=np.zeros((I, S)))

            p = pm.math.invlogit(
                pm.math.dot(mb_X, BETA.T) +
                pm.math.dot(U[mb_indices, :], V.T) + A[mb_Z, :])
            conc_mat = pm.math.dot(np.ones((num_mb_indices, 1)), CONC)
            R = pm.BetaBinomial('like',
                                alpha=(p * conc_mat)[~mb_nans],
                                beta=((1.0 - p) * conc_mat)[~mb_nans],
                                n=mb_n[~mb_nans],
                                observed=mb_k[~mb_nans],
                                total_size=(k[~nans]).shape)
            approx = pm.fit(method='advi', n=30000)
        pickle.dump(approx, open(self.output_root + '_model', 'wb'))
        #approx = pickle.load( open(self.output_root + '_model', "rb" ) )
        means_dict = approx.bij.rmap(approx.params[0].eval())
        np.savetxt(self.output_root + '_temper_U.txt', (means_dict['U']),
                   fmt="%s",
                   delimiter='\t')
        np.savetxt(self.output_root + '_temper_V.txt', (means_dict['V'].T),
                   fmt="%s",
                   delimiter='\t')
        np.savetxt(self.output_root + '_temper_BETA.txt',
                   (means_dict['BETA'].T),
                   fmt="%s",
                   delimiter='\t')