Exemple #1
0
def pca(data):
    """
    A simple PCA implementation that demonstrates how eigenvalue allocation
    is used to permute dimensions in order to balance the variance across
    subvectors. There are plenty of PCA implementations elsewhere. What is
    important is that the eigenvalues can be used to compute a variance-balancing
    dimension permutation.
    """

    # Compute mean
    count, D = data.shape
    mu = data.sum(axis=0) / float(count)

    # Compute covariance
    summed_covar = functools.reduce(lambda acc, x: acc + np.outer(x, x), data,
                                    np.zeros((D, D)))
    A = summed_covar / (count - 1) - np.outer(mu, mu)

    # Compute eigen decomposition
    eigenvalues, P = np.linalg.eigh(A)

    # Compute a permutation of dimensions to balance variance among 2 subvectors
    permuted_inds = eigenvalue_allocation(2, eigenvalues)

    # Build the permutation into the rotation matrix. One can alternately keep
    # these steps separate, rotating and then permuting, if desired.
    P = P[:, permuted_inds]

    return P, mu
def main(args):

    # assume hdfs path in params
    filename = copy_from_hdfs(args.pca_params)
    print 'Loading PCA Model locally from {} copied from {}'.format(
        filename, args.pca_params)
    params = pkl.load(open(filename))
    os.remove(filename)

    P = params['P']
    E = params['E']
    mu = params['mu']

    # Reduce dimension - eigenvalues assumed in ascending order
    E = E[-args.D:]
    P = P[:, -args.D:]

    # Balance variance across halves
    permuted_inds = eigenvalue_allocation(2, E)
    P = P[:, permuted_inds]

    # Save new params
    f = NamedTemporaryFile(delete=False)
    pkl.dump({'P': P, 'mu': mu}, open(f.name, 'w'))
    f.close()
    copy_to_hdfs(f, args.output)
    os.remove(f.name)
Exemple #3
0
def pca(data):
    """
    A simple PCA implementation that demonstrates how eigenvalue allocation
    is used to permute dimensions in order to balance the variance across
    subvectors. There are plenty of PCA implementations elsewhere. What is
    important is that the eigenvalues can be used to compute a variance-balancing
    dimension permutation.
    """

    # Compute mean
    count, D = data.shape
    mu = data.sum(axis=0) / float(count)

    # Compute covariance
    summed_covar = reduce(lambda acc, x: acc + np.outer(x, x), data, np.zeros((D, D)))
    A = summed_covar / (count - 1) - np.outer(mu, mu)

    # Compute eigen decomposition
    eigenvalues, P = np.linalg.eigh(A)

    # Compute a permutation of dimensions to balance variance among 2 subvectors
    permuted_inds = eigenvalue_allocation(2, eigenvalues)

    # Build the permutation into the rotation matrix. One can alternately keep
    # these steps separate, rotating and then permuting, if desired.
    P = P[:, permuted_inds]

    return P, mu
def main(args):

    # assume hdfs path in params
    filename = copy_from_hdfs(args.pca_params)
    print 'Loading PCA Model locally from {} copied from {}'.format(filename, args.pca_params)
    params = pkl.load(open(filename))
    os.remove(filename)

    P = params['P']
    E = params['E']
    mu = params['mu']

    # Reduce dimension - eigenvalues assumed in ascending order
    E = E[-args.D:]
    P = P[:,-args.D:]

    # Balance variance across halves
    permuted_inds = eigenvalue_allocation(2, E)
    P = P[:, permuted_inds]

    # Save new params
    f = NamedTemporaryFile(delete=False)
    pkl.dump({'P': P, 'mu': mu }, open(f.name, 'w'))
    f.close()
    copy_to_hdfs(f, args.output)
    os.remove(f.name)
Exemple #5
0
def test_eigenvalue_allocation():
    a = pkl.load(open(relpath('./testdata/test_eigenvalue_allocation_input.pkl')))

    vals, vecs = np.linalg.eigh(a)
    res = eigenvalue_allocation(4, vals)

    expected = np.array([
        63, 56, 52, 48, 44, 40, 36, 30, 26, 22, 18, 14, 10, 6, 3, 0,
        62, 57, 53, 51, 45, 41, 39, 33, 32, 31, 29, 25, 21, 17, 13, 9,
        61, 58, 54, 49, 47, 42, 38, 34, 28, 24, 20, 16, 12, 8, 5, 2,
        60, 59, 55, 50, 46, 43, 37, 35, 27, 23, 19, 15, 11, 7, 4, 1
    ])

    assert_true(np.equal(res, expected).all())
Exemple #6
0
def test_eigenvalue_allocation_normalized_features():
    eigenvalues = np.array([
        2.02255824, 1.01940991, 0.01569471, 0.01355569, 0.01264379, 0.01137654,
        0.01108961, 0.01054673, 0.01023358, 0.00989679, 0.00939045, 0.00900322,
        0.00878857, 0.00870027, 0.00850136, 0.00825236, 0.00813437, 0.00800231,
        0.00790201, 0.00782219, 0.00763405, 0.00752334, 0.00739174, 0.00728246,
        0.00701366, 0.00697365, 0.00677283, 0.00669658, 0.00654397, 0.00647679,
        0.00630645, 0.00621057
    ])
    indices = eigenvalue_allocation(2, eigenvalues)

    first_half = eigenvalues[indices[:16]]
    second_half = eigenvalues[indices[16:]]
    diff = np.abs(np.sum(np.log(first_half)) - np.sum(np.log(second_half)))
    assert_true(diff < .1, "eigenvalue_allocation is not working correctly")
Exemple #7
0
def test_eigenvalue_allocation():
    a = pkl.load(
        open(relpath('./testdata/test_eigenvalue_allocation_input.pkl')))

    vals, vecs = np.linalg.eigh(a)
    res = eigenvalue_allocation(4, vals)

    expected = np.array([
        63, 56, 52, 48, 44, 40, 36, 30, 26, 22, 18, 14, 10, 6, 3, 0, 62, 57,
        53, 51, 45, 41, 39, 33, 32, 31, 29, 25, 21, 17, 13, 9, 61, 58, 54, 49,
        47, 42, 38, 34, 28, 24, 20, 16, 12, 8, 5, 2, 60, 59, 55, 50, 46, 43,
        37, 35, 27, 23, 19, 15, 11, 7, 4, 1
    ])

    assert_true(np.equal(res, expected).all())
Exemple #8
0
def pca(data):
    """
    A simple PCA implementation that demonstrates how eigenvalue allocation
    is used to permute dimensions in order to balance the variance across
    subvectors. There are plenty of PCA implementations elsewhere. What is
    important is that the eigenvalues can be used to compute a variance-balancing
    dimension permutation.
    """
    count, D = data.shape
    mu = data.sum(axis=0) / float(count)
    summed_covar = reduce(lambda acc, x: acc + np.outer(x, x), data, np.zeros((D, D)))
    A = summed_covar / (count - 1) - np.outer(mu, mu)
    eigenvalues, P = np.linalg.eigh(A)
    permuted_inds = eigenvalue_allocation(2, eigenvalues)
    P = P[:, permuted_inds]
    return P, mu
Exemple #9
0
def test_eigenvalue_allocation_normalized_features():
    eigenvalues = np.array([
        2.02255824, 1.01940991, 0.01569471, 0.01355569, 0.01264379,
        0.01137654, 0.01108961, 0.01054673, 0.01023358, 0.00989679,
        0.00939045, 0.00900322, 0.00878857, 0.00870027, 0.00850136,
        0.00825236, 0.00813437, 0.00800231, 0.00790201, 0.00782219,
        0.00763405, 0.00752334, 0.00739174, 0.00728246, 0.00701366,
        0.00697365, 0.00677283, 0.00669658, 0.00654397, 0.00647679,
        0.00630645, 0.00621057
    ])
    indices = eigenvalue_allocation(2, eigenvalues)

    first_half = eigenvalues[indices[:16]]
    second_half = eigenvalues[indices[16:]]
    diff = np.abs(np.sum(np.log(first_half)) - np.sum(np.log(second_half)))
    assert_true(diff < .1, "eigenvalue_allocation is not working correctly")
def main(args):

    params = pkl.load(open(args.pca_params))
    P = params['P']
    E = params['E']
    mu = params['mu']

    # Reduce dimension - eigenvalues assumed in ascending order
    E = E[-args.D:]
    P = P[:, -args.D:]

    # Balance variance across halves
    permuted_inds = eigenvalue_allocation(2, E)
    P = P[:, permuted_inds]

    # Save new params
    pkl.dump({'P': P, 'mu': mu}, open(args.output, 'w'))
Exemple #11
0
def main(args):

    params = pkl.load(open(args.pca_params))
    P = params['P']
    E = params['E']
    mu = params['mu']

    # Reduce dimension - eigenvalues assumed in ascending order
    E = E[-args.D:]
    P = P[:,-args.D:]

    # Balance variance across halves
    permuted_inds = eigenvalue_allocation(2, E)
    P = P[:, permuted_inds]

    # Save new params
    pkl.dump({ 'P': P, 'mu': mu }, open(args.output, 'w'))