Beispiel #1
0
def test_pca_compare_var():
    # create some random data
    num_samples = 10000
    dim = 10
    batch_size = 100
    num_components = 3

    # generate some data
    mean = np.random.random(dim)
    cov_factor = np.random.random((dim, dim))
    cov = np.dot(cov_factor, cov_factor.T)
    samples = be.float_tensor(
        np.random.multivariate_normal(mean, cov, size=num_samples))

    samples_train, samples_validate = batch.split_tensor(samples, 0.9)
    data = batch.Batch({
        'train':
        batch.InMemoryTable(samples_train, batch_size),
        'validate':
        batch.InMemoryTable(samples_validate, batch_size)
    })

    # find the principal directions
    pca_sgd = factorization.PCA.from_batch(data,
                                           num_components,
                                           epochs=10,
                                           grad_steps_per_minibatch=1,
                                           stepsize=0.01)
    pca_svd = factorization.PCA.from_svd(samples_train, num_components)

    assert be.norm(pca_sgd.var - pca_svd.var) / be.norm(pca_sgd.var) < 1e-1
Beispiel #2
0
def pdist(x: be.Tensor, y: be.Tensor) -> be.Tensor:
    """
    Compute the pairwise distance matrix between the rows of x and y.

    Args:
        x (tensor (num_samples_1, num_units))
        y (tensor (num_samples_2, num_units))

    Returns:
        tensor (num_samples_1, num_samples_2)

    """
    inner = be.dot(x, be.transpose(y))
    x_mag = be.norm(x, axis=1)**2
    y_mag = be.norm(y, axis=1)**2
    squared = be.add(be.unsqueeze(y_mag, axis=0),
                     be.add(be.unsqueeze(x_mag, axis=1), -2 * inner))
    return be.sqrt(be.clip(squared, a_min=0))
Beispiel #3
0
def weight_norm_histogram(rbm, show_plot=False, filename=None):
    import matplotlib.pyplot as plt
    import seaborn as sns

    fig, ax = plt.subplots()
    for l in range(rbm.num_connections):
        num_inputs = rbm.connections[l].shape[0]
        norm = be.to_numpy_array(be.norm(rbm.connections[l].weights.W(), axis=0) / sqrt(num_inputs))
        sns.distplot(norm, ax=ax, label=str(l))
    ax.legend()

    if show_plot:
        fig
    if filename is not None:
        fig.savefig(filename)
    plt.close(fig)
Beispiel #4
0
    cd.train(opt, num_epochs, method=fit.pcd, mcsteps=mc_steps,
             beta_std=beta_std, burn_in=1)

    # evaluate the model
    util.show_metrics(rbm, cd.monitor)
    valid = data.get('validate')
    util.show_reconstructions(rbm, valid, show_plot, n_recon=10, vertical=False)
    util.show_fantasy_particles(rbm, valid, show_plot, n_fantasy=5)
    util.show_weights(rbm, show_plot, n_weights=100)

    # close the HDF5 store
    data.close()
    print("Done")

    return rbm

if __name__ == "__main__":
    rbm = run(show_plot = True)
    import seaborn
    import matplotlib.pyplot as plt
    for conn in rbm.connections:
        c = be.corr(conn.weights.W(), conn.weights.W())
        fig, ax = plt.subplots()
        seaborn.heatmap(be.to_numpy_array(c), vmin=-1, vmax=1, ax=ax)
        fig

        n = be.norm(conn.weights.W(), axis=0)
        fig, ax = plt.subplots()
        seaborn.distplot(be.to_numpy_array(n), ax=ax)
        fig
Beispiel #5
0
def test_l2_normalize():
    result_pre = [
        be.norm(pre.l2_normalize(tensor), axis=1) for tensor in tensors
    ]
    result_ref = [be.ones((len(tensor), )) for tensor in tensors]
    assert compare_lists(result_pre, result_ref)