Esempio n. 1
0
def init_row_clustering(data_matrix, isotropic, num_iter=200):
    m, n = data_matrix.m, data_matrix.n
    state = crp.fit_model(data_matrix,
                          isotropic_w=isotropic,
                          isotropic_b=isotropic,
                          num_iter=num_iter)

    U = np.zeros((m, state.assignments.max() + 1), dtype=int)
    U[np.arange(m), state.assignments] = 1
    left = recursive.MultinomialNode(U)

    if isotropic:
        right = recursive.GaussianNode(state.centers, 'scalar',
                                       state.sigma_sq_b)
    else:
        right = recursive.GaussianNode(state.centers, 'col', state.sigma_sq_b)

    pred = state.centers[state.assignments, :]
    X = data_matrix.sample_latent_values(pred,
                                         state.sigma_sq_w * np.ones((m, n)))
    if isotropic:
        noise = recursive.GaussianNode(X - pred, 'scalar', state.sigma_sq_w)
    else:
        noise = recursive.GaussianNode(X - pred, 'col', state.sigma_sq_w)

    return recursive.SumNode([recursive.ProductNode([left, right]), noise])
Esempio n. 2
0
def init_row_binary(data_matrix, num_iter=200):
    state = ibp.fit_model(data_matrix, num_iter=num_iter)

    left = recursive.BernoulliNode(state.Z)
    
    right = recursive.GaussianNode(state.A, 'scalar', state.sigma_sq_f)
    
    pred = np.dot(state.Z, state.A)
    X = data_matrix.sample_latent_values(pred, state.sigma_sq_n)
    noise = recursive.GaussianNode(X - pred, 'scalar', state.sigma_sq_n)
    
    return recursive.SumNode([recursive.ProductNode([left, right]), noise])
Esempio n. 3
0
def init_row_chain(data_matrix, num_iter=200):
    states, sigma_sq_D, sigma_sq_N = chains.fit_model(data_matrix, num_iter=num_iter)

    integ = chains.integration_matrix(data_matrix.m_orig)[data_matrix.row_ids, :]
    left = recursive.IntegrationNode(integ)
    
    temp = np.vstack([states[0, :][nax, :],
                      states[1:, :] - states[:-1, :]])
    right = recursive.GaussianNode(temp, 'scalar', sigma_sq_D)

    pred = states[data_matrix.row_ids, :]
    X = data_matrix.sample_latent_values(pred, sigma_sq_N)
    noise = recursive.GaussianNode(X - pred, 'scalar', sigma_sq_N)

    return recursive.SumNode([recursive.ProductNode([left, right]), noise])
Esempio n. 4
0
def init_sparsity(data_matrix, mu_Z_mode, num_iter=200):
    if mu_Z_mode == 'row':
        return init_sparsity(data_matrix.transpose(), 'col', num_iter).transpose()
    elif mu_Z_mode == 'col':
        by_column = True
    elif mu_Z_mode == 'scalar':
        by_column = False

    # currently, data_matrix should always be real-valued with no missing values, so this just
    # passes on data_matrix.observations.values; we may want to replace it with interval observations
    # obtained from slice sampling
    S = data_matrix.sample_latent_values(np.zeros((data_matrix.m, data_matrix.n)),
                                         np.ones((data_matrix.m, data_matrix.n)))
    
    Z = np.random.normal(-1., 1., size=S.shape)

    # sparse_coding.py wants a full sparse coding problem, so pass in None for the things
    # that aren't relevant here
    state = sparse_coding.SparseCodingState(S, None, Z, None, -1., 1., None)

    pbar = misc.pbar(num_iter)
    for i in range(num_iter):
        sparse_coding.sample_Z(state)
        state.mu_Z = sparse_coding.cond_mu_Z(state, by_column).sample()
        state.sigma_sq_Z = sparse_coding.cond_sigma_sq_Z(state).sample()

        if hasattr(debugger, 'after_init_sparsity_iter'):
            debugger.after_init_sparsity_iter(locals())

        pbar.update(i)
    pbar.finish()

    scale_node = recursive.GaussianNode(state.Z, 'scalar', state.sigma_sq_Z)
    return recursive.GSMNode(state.S, scale_node, mu_Z_mode, state.mu_Z)
Esempio n. 5
0
def init_level(name, level):
    """Initialize a given level of the search by saving all of the structures which need
    to be evaluated."""
    if not storage.exists(experiment_dir(name)):
        raise RuntimeError('Experiment %s not yet initialized.' % name)

    params = storage.load(params_file(name))
    splits = storage.load(splits_file(name))

    if level == 1:
        init_structures = ['g']
    else:
        init_structures = storage.load(winning_structure_file(name, level - 1))

    structure_pairs = list_structure_pairs(init_structures, params.rules, params.expand_noise)
    data_matrix = storage.load(data_file(name))
    X_train = data_matrix
    lab = None
    node_mat = np.zeros([params.num_splits * params.num_samples, 200, 200, 2])
    pruned_pairs = []
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # ; config.gpu_options.per_process_gpu_memory_fraction = 0.4
    sess = tf.Session(config=config)
    c = Classifier()
    real = tf.placeholder(shape=[None, 200, 200, 2], dtype=tf.float32)
    c_out = tf.reduce_mean(tf.nn.softmax(c(real), axis=-1), axis=0, keepdims=True)
    c_params = c.vars
    saver = tf.train.Saver(c_params)
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, "saved_model/d")
    for (init_structure, structure) in structure_pairs:
        for split_id in range(params.num_splits):
            for sample_id in range(params.num_samples):
                train_rows, train_cols, test_rows, test_cols = splits[split_id]
                X_train = data_matrix[train_rows[:, nax], train_cols[nax, :]]
                if level == 1:
                    init = X_train.sample_latent_values(np.zeros((X_train.m, X_train.n)), 1.)
                    prev_model = recursive.GaussianNode(init, 'scalar', 1.)
                else:
                    try:
                        prev_model = storage.load(samples_file(name, level - 1, init_structure, split_id, sample_id))
                    except:
                        print("structure", grammar.pretty_print(init_structure), "never exists")
                        continue
                    if isinstance(prev_model, recursive.Decomp):
                        prev_model = prev_model.root
                node, old_dist, rule = recursive.find_changed_node(prev_model, init_structure, structure)
                lab = labelize(rule)
                node_mat[split_id * params.num_samples + sample_id] = pad(random_shrink(node.value()))

        if_continue = sess.run(tf.nn.top_k(c_out, 3), feed_dict={real: node_mat})
        if lab in if_continue.indices:
            print("transformation structure ", grammar.pretty_print(init_structure), "->", grammar.pretty_print(structure), "i.e. lab ", lab,
                  " included with top_k", if_continue)
            pruned_pairs.append((init_structure, structure))
        else:
            print("transformation structure ", grammar.pretty_print(init_structure), "->", grammar.pretty_print(structure), "i.e. lab ", lab, " emitted, with top_k", if_continue)
    structure_pairs = pruned_pairs
    storage.dump(structure_pairs, structures_file(name, level))
Esempio n. 6
0
def sample_from_model(name, level, init_structure, structure, split_id,
                      sample_id):
    """Run an MCMC sampler to approximately sample from the posterior."""
    params = storage.load(params_file(name))
    data_matrix = storage.load(data_file(name))
    splits = storage.load(splits_file(name))
    train_rows, train_cols, test_rows, test_cols = splits[split_id]

    X_train = data_matrix[train_rows[:, nax], train_cols[nax, :]]

    if level == 1:
        init = X_train.sample_latent_values(np.zeros((X_train.m, X_train.n)),
                                            1.)
        prev_model = recursive.GaussianNode(init, 'scalar', 1.)
    else:
        if params.save_samples:
            prev_model = storage.load(
                samples_file(name, level - 1, init_structure, split_id,
                             sample_id))
        else:
            prev_model = storage.load(
                init_samples_file(name, level, init_structure, split_id,
                                  sample_id))
        if isinstance(prev_model, recursive.Decomp):
            prev_model = prev_model.root

    return recursive.fit_model(structure,
                               X_train,
                               prev_model,
                               gibbs_steps=params.gibbs_steps)
Esempio n. 7
0
def init_low_rank(data_matrix, num_iter=200):
    m, n = data_matrix.m, data_matrix.n
    state, X = low_rank_poisson.fit_model(data_matrix, 2, num_iter=num_iter)
    U, V, ssq_U, ssq_N = state.U, state.V, state.ssq_U, state.ssq_N

    U /= ssq_U[nax, :] ** 0.25
    V *= ssq_U[:, nax] ** 0.25

    left = recursive.GaussianNode(U, 'col', np.sqrt(ssq_U))
    
    right = recursive.GaussianNode(V, 'row', np.sqrt(ssq_U))

    pred = np.dot(U, V)
    X = data_matrix.sample_latent_values(pred, ssq_N)
    noise = recursive.GaussianNode(X - pred, 'scalar', ssq_N)

    return recursive.SumNode([recursive.ProductNode([left, right]), noise])