Esempio n. 1
0
def init_level(name, level):
    """Initialize a given level of the search by saving all of the structures which need
    to be evaluated."""
    if not storage.exists(experiment_dir(name)):
        raise RuntimeError('Experiment %s not yet initialized.' % name)

    params = storage.load(params_file(name))
    splits = storage.load(splits_file(name))

    if level == 1:
        init_structures = ['g']
    else:
        init_structures = storage.load(winning_structure_file(name, level - 1))

    structure_pairs = list_structure_pairs(init_structures, params.rules, params.expand_noise)
    data_matrix = storage.load(data_file(name))
    X_train = data_matrix
    lab = None
    node_mat = np.zeros([params.num_splits * params.num_samples, 200, 200, 2])
    pruned_pairs = []
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # ; config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.Session(config=config)
    c = Classifier()
    real = tf.placeholder(shape=[None, 200, 200, 2], dtype=tf.float32)
    c_out = tf.reduce_mean(tf.nn.softmax(c(real), axis=-1), axis=0, keepdims=True)
    c_params = c.vars
    saver = tf.train.Saver(c_params)
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, "saved_model/d")
    for (init_structure, structure) in structure_pairs:
        for split_id in range(params.num_splits):
            for sample_id in range(params.num_samples):
                train_rows, train_cols, test_rows, test_cols = splits[split_id]
                X_train = data_matrix[train_rows[:, nax], train_cols[nax, :]]
                if level == 1:
                    init = X_train.sample_latent_values(np.zeros((X_train.m, X_train.n)), 1.)
                    prev_model = recursive.GaussianNode(init, 'scalar', 1.)
                else:
                    try:
                        prev_model = storage.load(samples_file(name, level - 1, init_structure, split_id, sample_id))
                    except:
                        print("structure", grammar.pretty_print(init_structure), "never exists")
                        continue
                    if isinstance(prev_model, recursive.Decomp):
                        prev_model = prev_model.root
                node, old_dist, rule = recursive.find_changed_node(prev_model, init_structure, structure)
                lab = labelize(rule)
                node_mat[split_id * params.num_samples + sample_id] = pad(random_shrink(node.value()))

        if_continue = sess.run(tf.nn.top_k(c_out, 3), feed_dict={real: node_mat})
        if lab in if_continue.indices:
            print("transformation structure ", grammar.pretty_print(init_structure), "->", grammar.pretty_print(structure), "i.e. lab ", lab,
                  " included with top_k", if_continue)
            pruned_pairs.append((init_structure, structure))
        else:
            print("transformation structure ", grammar.pretty_print(init_structure), "->", grammar.pretty_print(structure), "i.e. lab ", lab, " emitted, with top_k", if_continue)
    structure_pairs = pruned_pairs
    storage.dump(structure_pairs, structures_file(name, level))
def collect_scores(name, level, structure):
    """Collect the held-out predictive log-likelihood scores for all CV splits and
    order them according to the indices of the original data matrix."""
    params = storage.load(params_file(name))
    splits = storage.load(splits_file(name))

    row_loglik_all = []
    col_loglik_all = []
    failed = False

    for split_id, (train_rows, train_cols, test_rows, test_cols) in enumerate(splits):
        row_loglik_curr, col_loglik_curr = [], []
        num_samples = params.num_samples
        for sample_id in range(num_samples):
            try:
                row_loglik_single, col_loglik_single = storage.load(scores_file(name, level, structure, split_id, sample_id))
            except:
                row_loglik_single = np.nan * np.ones(len(test_rows))
                col_loglik_single = np.nan * np.ones(len(test_cols))
                failed = True
            row_loglik_curr.append(row_loglik_single)
            col_loglik_curr.append(col_loglik_single)

        row_loglik_all.append(np.array(row_loglik_curr))
        col_loglik_all.append(np.array(col_loglik_curr))

    if failed:
        print termcolor.colored('    failed: %s' % grammar.pretty_print(structure), 'red')

    storage.dump((row_loglik_all, col_loglik_all), collected_scores_file(name, level, structure))
Esempio n. 3
0
def collect_scores(name, level, structure):
    """Collect the held-out predictive log-likelihood scores for all CV splits and
    order them according to the indices of the original data matrix."""
    params = storage.load(params_file(name))
    splits = storage.load(splits_file(name))

    row_loglik_all = []
    col_loglik_all = []
    failed = False

    for split_id, (train_rows, train_cols, test_rows, test_cols) in enumerate(splits):
        row_loglik_curr, col_loglik_curr = [], []
        num_samples = params.num_samples
        for sample_id in range(num_samples):
            try:
                row_loglik_single, col_loglik_single = storage.load(scores_file(name, level, structure, split_id, sample_id))
            except Exception as e:
                if isinstance(e, FileNotFoundError):
                    return
                row_loglik_single = np.nan * np.ones(len(test_rows))
                col_loglik_single = np.nan * np.ones(len(test_cols))
                failed = True
            row_loglik_curr.append(row_loglik_single)
            col_loglik_curr.append(col_loglik_single)

        row_loglik_all.append(np.array(row_loglik_curr))
        col_loglik_all.append(np.array(col_loglik_curr))

    if failed:
        print(termcolor.colored('    failed: %s' % grammar.pretty_print(structure), 'red'))

    storage.dump((row_loglik_all, col_loglik_all), collected_scores_file(name, level, structure))
def fit_winning_sequence(name, sample_id):
    """After the sequence of models is identified, sample factorizations from each of the models on the full
    data matrix."""
    data_matrix = storage.load(data_file(name))
    sequence = sequence_of_structures(name)
    params = storage.load(params_file(name))
    decomps = recursive.fit_sequence(sequence, data_matrix, gibbs_steps=params.gibbs_steps)
    storage.dump(decomps, winning_samples_file(name, sample_id))
Esempio n. 5
0
def fit_winning_sequence(name, sample_id):
    """After the sequence of models is identified, sample factorizations from each of the models on the full
    data matrix."""
    data_matrix = storage.load(data_file(name))
    sequence = sequence_of_structures(name)
    params = storage.load(params_file(name))
    decomps = recursive.fit_sequence(sequence, data_matrix, gibbs_steps=params.gibbs_steps)
    storage.dump(decomps, winning_samples_file(name, sample_id))
Esempio n. 6
0
def run(params):
    if isinstance(params, str):
        params = get_params(params)

    v = gnp.garray(datasets.SubsampledMNISTInfo.load().as_matrix())
    v = 0.999 * v + 0.001 * 0.5

    tparams = rbm_training.TrainingParams.defaults('pcd')
    rbm, _ = rbm_training.train_rbm(v, 20, tparams, show_progress=True)

    storage.dump(rbm, rbm_file(params['name']))
Esempio n. 7
0
def run(params):
    if isinstance(params, str):
        params = get_params(params)

    v = gnp.garray(datasets.SubsampledMNISTInfo.load().as_matrix())
    v = 0.999 * v + 0.001 * 0.5

    tparams = rbm_training.TrainingParams.defaults('pcd')
    rbm, _ = rbm_training.train_rbm(v, 20, tparams, show_progress=True)

    storage.dump(rbm, rbm_file(params['name']))
def init_level(name, level):
    """Initialize a given level of the search by saving all of the structures which need
    to be evaluated."""
    if not storage.exists(experiment_dir(name)):
        raise RuntimeError('Experiment %s not yet initialized.' % name)

    params = storage.load(params_file(name))
    if level == 1:
        init_structures = ['g']
    else:
        init_structures = storage.load(winning_structure_file(name, level - 1))
    structure_pairs = list_structure_pairs(init_structures, params.rules, params.expand_noise)
    storage.dump(structure_pairs, structures_file(name, level))
Esempio n. 9
0
    def after_step(self, rbm, trainer, i):
        it = i + 1

        save = it in self.expt.save_after
        display = it in self.expt.show_after

        if save:
            if self.expt.save_particles:
                storage.dump(trainer.fantasy_particles, self.expt.pcd_particles_file(it))
            storage.dump(rbm, self.expt.rbm_file(it))
            if hasattr(trainer, 'avg_rbm'):
                storage.dump(trainer.avg_rbm, self.expt.avg_rbm_file(it))
            storage.dump(time.time() - self.t0, self.expt.time_file(it))

        if 'particles' in self.subset and (save or display):
            fig = rbm_vis.show_particles(rbm, trainer.fantasy_particles, self.expt.dataset, display=display,
                                         figtitle='PCD particles ({} updates)'.format(it))
            if display:
                pylab.gcf().canvas.draw()
            if save:
                misc.save_image(fig, self.expt.pcd_particles_figure_file(it))

        if 'gibbs_chains' in self.subset and (save or display):
            fig = diagnostics.show_chains(rbm, trainer.fantasy_particles, self.expt.dataset, display=display,
                                          figtitle='Gibbs chains (iteration {})'.format(it))
            if save:
                misc.save_image(fig, self.expt.gibbs_chains_figure_file(it))

        if 'objective' in self.subset:
            self.log_prob_tracker.update(rbm, trainer.fantasy_particles)

        if display:
            pylab.gcf().canvas.draw()
def compute_init_samples(name, level, structure, split_id, sample_id):
    """For one of the high-performing structures in the previous level, sample from the posterior
    so that it can be used to initialize the current level. This is only needed if
    params.save_samples == False. The log-likelihood scores are saved as well for purposes
    of determining statistical significance of the improvement over the previous level."""
    if level == 1:
        return

    init_structure = init_structure_for(name, level-1, structure)

    root = sample_from_model(name, level-1, init_structure, structure, split_id, sample_id)
    storage.dump(root, init_samples_file(name, level, structure, split_id, sample_id))
    row_loglik, col_loglik = evaluate_decomp(name, level-1, init_structure, split_id, sample_id, root)
    storage.dump((row_loglik, col_loglik), init_scores_file(name, level, structure, split_id, sample_id))
Esempio n. 11
0
def init_level(name, level):
    """Initialize a given level of the search by saving all of the structures which need
    to be evaluated."""
    if not storage.exists(experiment_dir(name)):
        raise RuntimeError('Experiment %s not yet initialized.' % name)

    params = storage.load(params_file(name))
    if level == 1:
        init_structures = ['g']
    else:
        init_structures = storage.load(winning_structure_file(name, level - 1))
    structure_pairs = list_structure_pairs(init_structures, params.rules,
                                           params.expand_noise)
    storage.dump(structure_pairs, structures_file(name, level))
Esempio n. 12
0
def save_exact_log_Z(expt):
    """Compute the exact partition functions for small RBMs."""
    if isinstance(expt, str):
        expt = get_experiment(expt)
    tr_expt = get_training_expt(expt)

    for it in tr_expt.save_after:
        for avg in AVG_VALS:
            print 'iteration', it, avg
            try:
                rbm = load_rbm(expt, it, avg)
            except:
                continue

            log_Z = tractable.exact_partition_function(rbm)
            storage.dump(log_Z, expt.log_Z_file(it, avg))
Esempio n. 13
0
def save_exact_samples(expt):
    """Save exact samples from the RBM distribution."""
    if isinstance(expt, str):
        expt = get_experiment(expt)
    tr_expt = get_training_expt(expt)

    for it in tr_expt.save_after:
        for avg in AVG_VALS:
            print 'Iteration', it, avg
            try:
                rbm = load_rbm(expt, it, avg)
            except:
                continue

            states = tractable.exact_samples(rbm, expt.annealing.num_samples)
            storage.dump(states, expt.gibbs_states_file(it, avg))
Esempio n. 14
0
def compute_init_samples(name, level, structure, split_id, sample_id):
    """For one of the high-performing structures in the previous level, sample from the posterior
    so that it can be used to initialize the current level. This is only needed if
    params.save_samples == False. The log-likelihood scores are saved as well for purposes
    of determining statistical significance of the improvement over the previous level."""
    if level == 1:
        return

    init_structure = init_structure_for(name, level-1, structure)
    try:
        root = sample_from_model(name, level-1, init_structure, structure, split_id, sample_id)
    except:
        return
    storage.dump(root, init_samples_file(name, level, structure, split_id, sample_id))
    row_loglik, col_loglik = evaluate_decomp(name, level-1, init_structure, split_id, sample_id, root)
    storage.dump((row_loglik, col_loglik), init_scores_file(name, level, structure, split_id, sample_id))
Esempio n. 15
0
def init_experiment(name, data_matrix, params, components=None, clean_data_matrix=None):
    """Initialize the structure search by saving the matrix, and possibly auxiliary
    information, to files, and generating cross-validation splits."""
    check_required_directories()

    if not storage.exists(experiment_dir(name)):
        storage.mkdir(experiment_dir(name))

    storage.dump(params, params_file(name))
    splits = nfold_cv(data_matrix.m, data_matrix.n, params.num_splits)
    storage.dump(splits, splits_file(name))

    if clean_data_matrix is not None:
        storage.dump(clean_data_matrix, clean_data_file(name))

    storage.dump(data_matrix, data_file(name))

    if components is not None:
        storage.dump(components, components_file(name))
def init_experiment(name, data_matrix, params, components=None, clean_data_matrix=None):
    """Initialize the structure search by saving the matrix, and possibly auxiliary
    information, to files, and generating cross-validation splits."""
    check_required_directories()
    
    if not storage.exists(experiment_dir(name)):
        storage.mkdir(experiment_dir(name))

    storage.dump(params, params_file(name))
    splits = nfold_cv(data_matrix.m, data_matrix.n, params.num_splits)
    storage.dump(splits, splits_file(name))

    if clean_data_matrix is not None:
        storage.dump(clean_data_matrix, clean_data_file(name))

    storage.dump(data_matrix, data_file(name))

    if components is not None:
        storage.dump(components, components_file(name))
Esempio n. 17
0
def run_ais(expt, save=True, show_progress=False):
    """Run AIS for all the RBMs, and save the estimated log partition functions and the final particles."""
    if isinstance(expt, str):
        expt = get_experiment(expt)

    mkl.set_num_threads(1)

    tr_expt = get_training_expt(expt)

    for it in tr_expt.save_after:
        for avg in AVG_VALS:
            print 'iteration', it, avg
            t0 = time.time()
            try:
                rbm = load_rbm(expt, it, avg)
            except:
                continue

            moments = compute_moments(tr_expt, rbm)
            brm = moments.full_base_rate_moments()
            init_rbm = binary_rbms.RBM.from_moments(brm)

            path = ais.GeometricRBMPath(init_rbm, rbm)
            schedule = np.linspace(0., 1., expt.annealing.num_steps)
            state, log_Z, _ = ais.ais(path,
                                      schedule,
                                      expt.annealing.num_particles,
                                      show_progress=show_progress)

            if save:
                storage.dump(log_Z, expt.log_Z_file(it, avg))
                storage.dump(state, expt.final_states_file(it, avg))
                storage.dump(time.time() - t0, expt.time_file(it, avg))
Esempio n. 18
0
def run_gibbs(expt, save=True, show_progress=False):
    """Run Gibbs chains starting from the AIS particles (sampled proportionally to their
    weights), and save the final particles."""
    if isinstance(expt, str):
        expt = get_experiment(expt)
    tr_expt = get_training_expt(expt)

    for it in tr_expt.save_after:
        for avg in AVG_VALS:
            print 'Iteration', it, avg
            try:
                rbm = load_rbm(expt, it, avg)
            except:
                continue
            log_Z = storage.load(expt.log_Z_file(it, avg)).as_numpy_array()
            final_states = storage.load(expt.final_states_file(it, avg))

            # sample the states proportionally to the Z estimates
            p = log_Z - np.logaddexp.reduce(log_Z)
            p /= p.sum(
            )  # not needed in theory, but numpy complains if it doesn't sum exactly to 1
            idxs = np.random.multinomial(
                1, p, size=expt.annealing.num_particles).argmax(1)
            states = binary_rbms.RBMState(final_states.v[idxs, :],
                                          final_states.h[idxs, :])

            if show_progress:
                pbar = misc.pbar(expt.gibbs_steps)

            for st in range(expt.gibbs_steps):
                states = rbm.step(states)

                if show_progress:
                    pbar.update(st)

            if show_progress:
                pbar.finish()

            if save:
                storage.dump(states, expt.gibbs_states_file(it, avg))
Esempio n. 19
0
def run_model(name,
              level,
              init_structure,
              structure,
              split_id,
              sample_id,
              save=True,
              save_sample=False):
    """Sample from the posterior given the training data, and evaluate on heldout rows/columns."""
    params = storage.load(params_file(name))
    t0 = time.time()
    root = sample_from_model(name, level, init_structure, structure, split_id,
                             sample_id)
    if save and (save_sample or params.save_samples):
        storage.dump(root,
                     samples_file(name, level, structure, split_id, sample_id))
        print 'Saved.'
    row_loglik, col_loglik = evaluate_decomp(name, level, init_structure,
                                             split_id, sample_id, root)
    print 'Row:', row_loglik.mean()
    print 'Col:', col_loglik.mean()
    if save:
        storage.dump((row_loglik, col_loglik),
                     scores_file(name, level, structure, split_id, sample_id))
        storage.dump(
            time.time() - t0,
            running_time_file(name, level, structure, split_id, sample_id))
Esempio n. 20
0
    def after_step(self, rbm, trainer, i):
        it = i + 1

        save = it in self.expt.save_after
        display = it in self.expt.show_after

        if save:
            if self.expt.save_particles:
                storage.dump(trainer.fantasy_particles,
                             self.expt.pcd_particles_file(it))
            storage.dump(rbm, self.expt.rbm_file(it))
            if hasattr(trainer, 'avg_rbm'):
                storage.dump(trainer.avg_rbm, self.expt.avg_rbm_file(it))
            storage.dump(time.time() - self.t0, self.expt.time_file(it))

        if 'particles' in self.subset and (save or display):
            fig = rbm_vis.show_particles(
                rbm,
                trainer.fantasy_particles,
                self.expt.dataset,
                display=display,
                figtitle='PCD particles ({} updates)'.format(it))
            if display:
                pylab.gcf().canvas.draw()
            if save:
                misc.save_image(fig, self.expt.pcd_particles_figure_file(it))

        if 'gibbs_chains' in self.subset and (save or display):
            fig = diagnostics.show_chains(
                rbm,
                trainer.fantasy_particles,
                self.expt.dataset,
                display=display,
                figtitle='Gibbs chains (iteration {})'.format(it))
            if save:
                misc.save_image(fig, self.expt.gibbs_chains_figure_file(it))

        if 'objective' in self.subset:
            self.log_prob_tracker.update(rbm, trainer.fantasy_particles)

        if display:
            pylab.gcf().canvas.draw()
def run_model(name, level, init_structure, structure, split_id, sample_id, save=True, save_sample=False):
    """Sample from the posterior given the training data, and evaluate on heldout rows/columns."""
    params = storage.load(params_file(name))
    t0 = time.time()
    root = sample_from_model(name, level, init_structure, structure, split_id, sample_id)
    if save and (save_sample or params.save_samples):
        storage.dump(root, samples_file(name, level, structure, split_id, sample_id))
        print 'Saved.'
    row_loglik, col_loglik = evaluate_decomp(name, level, init_structure, split_id, sample_id, root)
    print 'Row:', row_loglik.mean()
    print 'Col:', col_loglik.mean()
    if save:
        storage.dump((row_loglik, col_loglik), scores_file(name, level, structure, split_id, sample_id))
        storage.dump(time.time() - t0, running_time_file(name, level, structure, split_id, sample_id))
Esempio n. 22
0
def save_winning_structures(name, level):
    storage.dump(winning_structures(name, level), winning_structure_file(name, level))
Esempio n. 23
0
def sample_matrix(name, sample_id, level, size, output_file):
    decomps = storage.load(winning_samples_file(name, sample_id))
    data = decomps[level].sample_matrix(size)
    storage.dump(data, output_file)
    print('Samples saved at: "{}".'.format(output_file))
def save_winning_structures(name, level):
    storage.dump(winning_structures(name, level), winning_structure_file(name, level))