def iter_configurations(rbm, batch_units=10, show_progress=False): assert rbm.nhid <= 30 scores = get_scores(rbm, batch_units=batch_units, show_progress=show_progress).as_numpy_array() prefix_len = rbm.nhid - batch_units batch_size = 2 ** batch_units prefixes = combinations_array(prefix_len) batch_scores = np.logaddexp.reduce(scores, axis=1) idxs = np.argsort(batch_scores) prefixes = prefixes[idxs] scores = scores[idxs] hid = gnp.zeros((batch_size, rbm.nhid)) hid[:, prefix_len:] = combinations_array(batch_units) pfn = np.logaddexp.reduce(np.sort(scores.ravel())) normalized_scores = scores - pfn p = np.exp(normalized_scores) if show_progress: pbar = misc.pbar(prefixes.shape[0]) for i, prefix in enumerate(prefixes): hid[:, :prefix_len] = prefix yield hid, p[i, :] if show_progress: pbar.update(i) if show_progress: pbar.finish()
def init_sparsity(data_matrix, mu_Z_mode, num_iter=200): if mu_Z_mode == 'row': return init_sparsity(data_matrix.transpose(), 'col', num_iter).transpose() elif mu_Z_mode == 'col': by_column = True elif mu_Z_mode == 'scalar': by_column = False # currently, data_matrix should always be real-valued with no missing values, so this just # passes on data_matrix.observations.values; we may want to replace it with interval observations # obtained from slice sampling S = data_matrix.sample_latent_values(np.zeros((data_matrix.m, data_matrix.n)), np.ones((data_matrix.m, data_matrix.n))) Z = np.random.normal(-1., 1., size=S.shape) # sparse_coding.py wants a full sparse coding problem, so pass in None for the things # that aren't relevant here state = sparse_coding.SparseCodingState(S, None, Z, None, -1., 1., None) pbar = misc.pbar(num_iter) for i in range(num_iter): sparse_coding.sample_Z(state) state.mu_Z = sparse_coding.cond_mu_Z(state, by_column).sample() state.sigma_sq_Z = sparse_coding.cond_sigma_sq_Z(state).sample() if hasattr(debugger, 'after_init_sparsity_iter'): debugger.after_init_sparsity_iter(locals()) pbar.update(i) pbar.finish() scale_node = recursive.GaussianNode(state.Z, 'scalar', state.sigma_sq_Z) return recursive.GSMNode(state.S, scale_node, mu_Z_mode, state.mu_Z)
def get_scores(rbm, batch_units=10, show_progress=False): nhid = rbm.nhid assert nhid <= 30 prefix_len = nhid - batch_units batch_size = 2 ** batch_units prefixes = combinations_array(prefix_len) num_batches = prefixes.shape[0] hid = gnp.zeros((batch_size, nhid)) hid[:, prefix_len:] = combinations_array(batch_units) scores = gnp.zeros((num_batches, batch_size)) if show_progress: pbar = misc.pbar(num_batches) for i, prefix in enumerate(prefixes): hid[:, :prefix_len] = prefix scores[i, :] = rbm.free_energy_hid(hid) if show_progress: pbar.update(i) if show_progress: pbar.finish() return scores
def score_row_predictive_variational(train_data_matrix, root, test_data_matrix, num_steps_ais=2000): N = test_data_matrix.m_orig predictive_info_orig = predictive_distributions.compute_predictive_info( train_data_matrix, root, N) predictive_info = predictive_distributions.remove_gsm(predictive_info_orig) result = np.zeros(test_data_matrix.m) pbar = misc.pbar(test_data_matrix.m) for i, row in enumerate(test_data_matrix.row_ids): idxs = np.where(test_data_matrix.observations.mask[i, :])[0] components, mu, Sigma = predictive_info.predictive_for_row(row, idxs) estimators = [] for comp in components: if isinstance( comp, predictive_distributions. MultinomialPredictiveDistribution): estimators.append( variational.MultinomialEstimator(comp.pi, comp.centers)) elif isinstance( comp, predictive_distributions.BernoulliPredictiveDistribution): estimators.append( variational.BernoulliEstimator(comp.pi, comp.A)) else: raise RuntimeError('Unknown predictive distribution') assert isinstance(test_data_matrix.observations, observations.RealObservations) problem = variational.VariationalProblem( estimators, test_data_matrix.observations.values[i, idxs] - mu, Sigma) reps = problem.solve() result[i] = problem.objective_function(reps) if predictive_distributions.has_gsm(predictive_info_orig): components, mu, Sigma = predictive_info_orig.predictive_for_row( row, idxs) assert np.allclose(mu, 0.) # can't do chains yet X = test_data_matrix.observations.values[i, idxs] X = X[nax, :] result[i] = ais_gsm.compute_likelihood(X, components, Sigma, [reps], np.array([result[i]]), num_steps=num_steps_ais)[0] pbar.update(i) pbar.finish() return result
def fit_model(data_matrix, isotropic_w=True, isotropic_b=True, num_iter=NUM_ITER): X_init = init_X(data_matrix) model = CRPModel(1., X_init.shape[1], distributions.InverseGammaDistribution(0.01, 0.01), distributions.InverseGammaDistribution(0.01, 0.01), isotropic_w, isotropic_b) N, D = X_init.shape k_init = min(N // 4, 40) km = sklearn.cluster.KMeans(n_clusters=k_init) km.fit(X_init) init_assignments = km.labels_ sigma_sq_f = sigma_sq_n = X_init.var() / 2. if not model.isotropic_b: sigma_sq_f = X_init.var(0) / 2. state = CollapsedCRPState(X_init, init_assignments, sigma_sq_n, sigma_sq_f) state.centers = km.cluster_centers_ fixed_variance = data_matrix.fixed_variance() data = data_matrix.observations if fixed_variance: if isotropic_w: state.sigma_sq_w = 1. else: state.sigma_sq_w = np.ones(D) pbar = misc.pbar(num_iter) t0 = time.time() for it in range(num_iter): pred = state.centers[state.assignments, :] state.X = data_matrix.sample_latent_values(pred, state.sigma_sq_w) gibbs_sweep_collapsed(model, data, state, fixed_variance) if time.time() - t0 > 3600.: # 1 hour break pbar.update(it) pbar.finish() # sample the centers cache = CollapsedCRPCache.from_state(model, data, state) gibbs_step_centers(model, data, state, cache) return state
def collect_log_probs(expt, subset='test', ignore_failed=False): """Load the results of individual partition function estimation trials, and return the averaged estimates along with bootstrap confidence intervals.""" if isinstance(expt, str): expt = get_experiment(expt) assert subset in ['train', 'test'] if subset == 'test': vis = expt.dataset.load_test().as_matrix() else: vis = expt.dataset.load().as_matrix() vis = np.random.binomial(1, vis) tr_expt = get_training_expt(expt) results = {} pbar = misc.pbar(len(tr_expt.save_after) * 2) count = 0 for it in tr_expt.save_after: for avg in AVG_VALS: count += 1 #print 'iteration', it try: rbm = load_rbm(expt, it, avg) except: continue if ignore_failed and not os.path.exists(expt.log_Z_file(it, avg)): continue if isinstance(expt.annealing, AnnealingParams): log_Z = storage.load(expt.log_Z_file(it, avg)).as_numpy_array() log_Z_lower, log_Z_upper = misc.bootstrap(log_Z, log_mean) train_fev = rbm.free_energy_vis(vis) results[it, avg] = Results(log_Z, log_Z_lower, log_Z_upper, train_fev, None) elif isinstance(expt.annealing, ExactParams): log_Z = storage.load(expt.log_Z_file(it, avg)) train_fev = rbm.free_energy_vis(vis) results[it, avg] = ExactResults(log_Z, train_fev, None) else: raise RuntimeError('Unknown annealing params') pbar.update(count) pbar.finish() return results
def fit_model(data_matrix, isotropic_w=True, isotropic_b=True, num_iter=NUM_ITER): X_init = init_X(data_matrix) model = CRPModel(1., X_init.shape[1], distributions.InverseGammaDistribution(0.01, 0.01), distributions.InverseGammaDistribution(0.01, 0.01), isotropic_w, isotropic_b) N, D = X_init.shape k_init = min(N//4, 40) km = sklearn.cluster.KMeans(n_clusters=k_init) km.fit(X_init) init_assignments = km.labels_ sigma_sq_f = sigma_sq_n = X_init.var() / 2. if not model.isotropic_b: sigma_sq_f = X_init.var(0) / 2. state = CollapsedCRPState(X_init, init_assignments, sigma_sq_n, sigma_sq_f) state.centers = km.cluster_centers_ fixed_variance = data_matrix.fixed_variance() data = data_matrix.observations if fixed_variance: if isotropic_w: state.sigma_sq_w = 1. else: state.sigma_sq_w = np.ones(D) pbar = misc.pbar(num_iter) t0 = time.time() for it in range(num_iter): pred = state.centers[state.assignments, :] state.X = data_matrix.sample_latent_values(pred, state.sigma_sq_w) gibbs_sweep_collapsed(model, data, state, fixed_variance) if time.time() - t0 > 3600.: # 1 hour break pbar.update(it) pbar.finish() # sample the centers cache = CollapsedCRPCache.from_state(model, data, state) gibbs_step_centers(model, data, state, cache) return state
def train_rbm(vis, nhid, params, init_rbm=None, after_step=None, updater=None, rbm_class=None, moments_class=None, weights_std=0.05, show_progress=False, trainer_class=None): """Train an RBM from scratch.""" assert isinstance(params, TrainingParams) if init_rbm is not None: rbm = init_rbm.copy() else: if rbm_class is None: rbm_class = binary_rbms.RBM if moments_class is None: moments_class = binary_rbms.Moments base_rate_moments = moments_class.from_data_base_rates(vis, nhid) rbm = rbm_class.from_moments(base_rate_moments, weights_std=weights_std) if updater is None: updater = get_updater(params.updater, rbm, vis) if trainer_class is None: trainer = Trainer(rbm, params, data_matrix=vis, updater=updater) else: trainer = trainer_class(rbm, params, data_matrix=vis, updater=updater) if show_progress: pbar = misc.pbar(params.num_steps) for i in range(params.num_steps): trainer.step() trainer.update() if after_step is not None: after_step(rbm, trainer, i) if show_progress: pbar.update(i + 1) if show_progress: pbar.finish() return rbm, trainer.fantasy_particles
def fit_model(data_matrix, K=K_INIT, num_iter=NUM_ITER, name=None): if SEED_0: np.random.seed(0) N, D = data_matrix.m, data_matrix.n X, state = init_state(data_matrix, K) pbar = misc.pbar(num_iter) t0 = time.time() for it in range(num_iter): sample_U_V(state, X, data_matrix.observations.mask) old = np.dot(state.U, state.V) givens_moves(state) assert np.allclose(np.dot(state.U, state.V), old) scaling_moves(state) assert np.allclose(np.dot(state.U, state.V), old) state.ssq_U = sample_variance(state.U, 0) pred = np.dot(state.U, state.V) if not data_matrix.observations.fixed_variance(): state.ssq_N = sample_variance(X - pred, None, mask=data_matrix.observations.mask) X = data_matrix.sample_latent_values(pred, state.ssq_N) for i in range(10): state = add_delete_move(state, X, data_matrix.observations.mask) if VERBOSE: print('K =', state.U.shape[1]) print('ssq_N =', state.ssq_N) print('X.var() =', X.var()) #misc.print_dot(it+1, num_iter) pbar.update(it) if time.time() - t0 > 3600.: # 1 hour break pbar.finish() return state, X
def fit_model(data_matrix, K=K_INIT, num_iter=NUM_ITER, name=None): if SEED_0: np.random.seed(0) N, D = data_matrix.m, data_matrix.n X, state = init_state(data_matrix, K) pbar = misc.pbar(num_iter) t0 = time.time() for it in range(num_iter): sample_U_V(state, X, data_matrix.observations.mask) old = np.dot(state.U, state.V) givens_moves(state) assert np.allclose(np.dot(state.U, state.V), old) scaling_moves(state) assert np.allclose(np.dot(state.U, state.V), old) state.ssq_U = sample_variance(state.U, 0) pred = np.dot(state.U, state.V) if not data_matrix.observations.fixed_variance(): state.ssq_N = sample_variance(X - pred, None, mask=data_matrix.observations.mask) X = data_matrix.sample_latent_values(pred, state.ssq_N) for i in range(10): state = add_delete_move(state, X, data_matrix.observations.mask) if VERBOSE: print 'K =', state.U.shape[1] print 'ssq_N =', state.ssq_N print 'X.var() =', X.var() #misc.print_dot(it+1, num_iter) pbar.update(it) if time.time() - t0 > 3600.: # 1 hour break pbar.finish() return state, X
def score_row_predictive_variational(train_data_matrix, root, test_data_matrix, num_steps_ais=2000): N = test_data_matrix.m_orig predictive_info_orig = predictive_distributions.compute_predictive_info(train_data_matrix, root, N) predictive_info = predictive_distributions.remove_gsm(predictive_info_orig) result = np.zeros(test_data_matrix.m) pbar = misc.pbar(test_data_matrix.m) for i, row in enumerate(test_data_matrix.row_ids): idxs = np.where(test_data_matrix.observations.mask[i, :])[0] components, mu, Sigma = predictive_info.predictive_for_row(row, idxs) estimators = [] for comp in components: if isinstance(comp, predictive_distributions.MultinomialPredictiveDistribution): estimators.append(variational.MultinomialEstimator(comp.pi, comp.centers)) elif isinstance(comp, predictive_distributions.BernoulliPredictiveDistribution): estimators.append(variational.BernoulliEstimator(comp.pi, comp.A)) else: raise RuntimeError('Unknown predictive distribution') assert isinstance(test_data_matrix.observations, observations.RealObservations) problem = variational.VariationalProblem(estimators, test_data_matrix.observations.values[i, idxs] - mu, Sigma) reps = problem.solve() result[i] = problem.objective_function(reps) if predictive_distributions.has_gsm(predictive_info_orig): components, mu, Sigma = predictive_info_orig.predictive_for_row(row, idxs) assert np.allclose(mu, 0.) # can't do chains yet X = test_data_matrix.observations.values[i, idxs] X = X[nax, :] result[i] = ais_gsm.compute_likelihood(X, components, Sigma, [reps], np.array([result[i]]), num_steps=num_steps_ais)[0] pbar.update(i) pbar.finish() return result
def sweep(data_matrix, root, num_iter=100, maximize=False): samplers = get_samplers(data_matrix, root, maximize) if num_iter > 1: print 'Dumb Gibbs sampling on %s...' % grammar.pretty_print(root.structure()) pbar = misc.pbar(num_iter) else: pbar = None for it in range(num_iter): for sampler in samplers: if sampler.preserves_root_value(): old = root.value() sampler.step() if sampler.preserves_root_value(): assert np.allclose(old, root.value()) if pbar is not None: pbar.update(it) if pbar is not None: pbar.finish()
def train_rbm(vis, nhid, params, init_rbm=None, after_step=None, updater=None, rbm_class=None, moments_class=None, weights_std=0.05, show_progress=False, trainer_class=None): """Train an RBM from scratch.""" assert isinstance(params, TrainingParams) if init_rbm is not None: rbm = init_rbm.copy() else: if rbm_class is None: rbm_class = binary_rbms.RBM if moments_class is None: moments_class = binary_rbms.Moments base_rate_moments = moments_class.from_data_base_rates(vis, nhid) rbm = rbm_class.from_moments(base_rate_moments, weights_std=weights_std) if updater is None: updater = get_updater(params.updater, rbm, vis) if trainer_class is None: trainer = Trainer(rbm, params, data_matrix=vis, updater=updater) else: trainer = trainer_class(rbm, params, data_matrix=vis, updater=updater) if show_progress: pbar = misc.pbar(params.num_steps) for i in range(params.num_steps): trainer.step() trainer.update() if after_step is not None: after_step(rbm, trainer, i) if show_progress: pbar.update(i+1) if show_progress: pbar.finish() return rbm, trainer.fantasy_particles
def run_gibbs(expt, save=True, show_progress=False): """Run Gibbs chains starting from the AIS particles (sampled proportionally to their weights), and save the final particles.""" if isinstance(expt, str): expt = get_experiment(expt) tr_expt = get_training_expt(expt) for it in tr_expt.save_after: for avg in AVG_VALS: print 'Iteration', it, avg try: rbm = load_rbm(expt, it, avg) except: continue log_Z = storage.load(expt.log_Z_file(it, avg)).as_numpy_array() final_states = storage.load(expt.final_states_file(it, avg)) # sample the states proportionally to the Z estimates p = log_Z - np.logaddexp.reduce(log_Z) p /= p.sum( ) # not needed in theory, but numpy complains if it doesn't sum exactly to 1 idxs = np.random.multinomial( 1, p, size=expt.annealing.num_particles).argmax(1) states = binary_rbms.RBMState(final_states.v[idxs, :], final_states.h[idxs, :]) if show_progress: pbar = misc.pbar(expt.gibbs_steps) for st in range(expt.gibbs_steps): states = rbm.step(states) if show_progress: pbar.update(st) if show_progress: pbar.finish() if save: storage.dump(states, expt.gibbs_states_file(it, avg))
def fit_model(data_matrix, num_iter=NUM_ITER): N_orig, N, D = data_matrix.m_orig, data_matrix.m, data_matrix.n X = data_matrix.sample_latent_values(np.zeros((N, D)), 1.) sigma_sq_D = sigma_sq_N = 1. fixed_variance = data_matrix.fixed_variance() row_ids = data_matrix.row_ids X_full = np.zeros((N_orig, D)) X_full[row_ids, :] = X states = np.zeros((N_orig, D)) resid = np.zeros((N, D)) diff = np.zeros((N_orig-1, D)) pbar = misc.pbar(num_iter) t0 = time.time() for it in range(num_iter): lam_N = np.zeros(N_orig) lam_N[row_ids] = 1. / sigma_sq_N for j in range(D): states[:, j] = sample_single_chain(X_full[:, j], 1. / sigma_sq_D, lam_N) resid = X - states[row_ids, :] diff = states[1:, :] - states[:-1, :] sigma_sq_D = sample_variance(diff) if not fixed_variance: sigma_sq_N = sample_variance(resid) X = data_matrix.sample_latent_values(states[row_ids, :], sigma_sq_N) X_full[row_ids, :] = X if time.time() - t0 > 3600.: # 1 hour break pbar.update(it) pbar.finish() return states, sigma_sq_D, sigma_sq_N
def fit_model(data_matrix, num_iter=NUM_ITER): model = IBPModel(1., distributions.InverseGammaDistribution(1., 1.), distributions.InverseGammaDistribution(1., 1.)) fixed_variance = data_matrix.fixed_variance() data = data_matrix.observations state = sequential_init(model, data, fixed_variance) pbar = misc.pbar(num_iter) t0 = time.time() for it in range(num_iter): gibbs_sweep(model, data, state, True, True, fixed_variance) pred = np.dot(state.Z, state.A) state.X = data.sample_latent_values(pred, state.sigma_sq_n) if time.time() - t0 > TIME_LIMIT: break pbar.update(it) pbar.finish() return state