Example #1
0
def test_shortest_paths_retain_fraction(device):
    pytest.skip("This test is flaky.")

    # graph on 10 nodes: 10(9)/2 = 45 edges
    n = 10
    edges = torch.tensor([[i, i + 1] for i in range(n - 1)] + [[n - 1, 0]],
                         device=device)
    graph = preprocess.Graph.from_edges(edges)

    # 0.2 * 45 = approx 9 edges expected
    n_edges = 0
    nnz = 0
    n_trials = 100
    for _ in range(n_trials):
        shortest_path_graph = preprocess.graph.shortest_paths(
            graph, retain_fraction=0.2)
        n_edges += shortest_path_graph.n_edges
        nnz += shortest_path_graph.A.nnz
    mean_n_edges = n_edges / n_trials
    mean_nnz = nnz / n_trials

    assert shortest_path_graph.n_items == n
    testing.assert_allclose(mean_n_edges, 9.0, atol=1.5)
    testing.assert_allclose(mean_nnz, 18.0, atol=1.5)

    assert not (shortest_path_graph.A !=
                shortest_path_graph.A.T).todense().all()
Example #2
0
def test_fit_spectral(device):
    # TODO deflake this test
    pytest.skip("This test is flaky on macOS.")
    np.random.seed(0)
    torch.random.manual_seed(0)
    n = 200
    m = 3
    max_iter = 1000

    edges = util.all_edges(n)
    weights = torch.ones(edges.shape[0])
    f = penalties.Quadratic(weights)

    mde = problem.MDE(
        n,
        m,
        edges=edges,
        distortion_function=f,
        constraint=Standardized(),
        device=device,
    )
    X = mde.embed(max_iter=max_iter, eps=1e-10, memory_size=10)

    assert id(X) == id(mde.X)
    X_spectral = quadratic.spectral(n,
                                    m,
                                    edges=edges,
                                    weights=weights,
                                    device=device)

    testing.assert_allclose(
        mde.average_distortion(X).detach().cpu().numpy(),
        mde.average_distortion(X_spectral).detach().cpu().numpy(),
        atol=1e-4,
    )
Example #3
0
def test_differences(device):
    torch.random.manual_seed(0)
    edges = np.array([(0, 1), (0, 2), (1, 2)])
    X = torch.randn((3, 3), dtype=torch.float32, device=device)
    mde = problem.MDE(
        3,
        3,
        edges,
        penalties.Quadratic(torch.ones(3)),
        constraint=Standardized(),
        device=device,
    )
    diff = mde.differences(X)
    testing.assert_allclose(X[edges[:, 0]] - X[edges[:, 1]], diff)
Example #4
0
def test_k_nearest_neighbors():
    data_matrix = np.array([[0.0], [1.0], [1.5], [1.75]])
    graph = preprocess.data_matrix.k_nearest_neighbors(data_matrix, k=2)
    edges = set(tuple(e) for e in graph.edges.cpu().numpy().tolist())
    # [2, 1], [3, 2], [3, 1], omitted because they are duplicates
    expected = set(
        tuple(e)
        for e in np.array([[0, 1], [0, 2], [1, 2], [1, 3], [2, 3]]).tolist())
    assert edges == expected

    edges = graph.edges.cpu().numpy()
    # 2 if i and j are neighbors of each other,
    # 1 if i is neighbor of j but not vice versa
    testing.assert_allclose(np.array([1., 1., 2., 2., 2.]), graph.weights)
Example #5
0
def test_norm_grad_zero(device):
    torch.random.manual_seed(0)
    edges = np.array([(0, 1)])
    mde = problem.MDE(
        3,
        3,
        edges,
        penalties.Quadratic(torch.ones(3)),
        constraint=Standardized(),
        device=device,
    )
    X = torch.ones((3, 3), requires_grad=True, device=device)
    norms = mde.distances(X)
    norms.backward()
    testing.assert_allclose(X.grad, 0.0)
Example #6
0
def test_some_distances_numpy(device):
    del device
    np.random.seed(0)
    max_distances = 50
    retain_fraction = max_distances / int(500 * (499) / 2)
    data_matrix = np.random.randn(500, 2)
    graph = preprocess.data_matrix.distances(data_matrix,
                                             retain_fraction=retain_fraction)

    assert graph.n_items == data_matrix.shape[0]
    assert graph.n_edges == max_distances
    for e, d in zip(graph.edges, graph.distances):
        e = e.cpu().numpy()
        d = d.item()
        true_distance = np.linalg.norm(data_matrix[e[0]] - data_matrix[e[1]])
        testing.assert_allclose(true_distance, d)
Example #7
0
def test_all_distances_numpy(device):
    del device
    np.random.seed(0)
    data_matrix = np.random.randn(4, 2)
    graph = preprocess.data_matrix.distances(data_matrix)

    assert graph.n_items == data_matrix.shape[0]
    assert graph.n_edges == 6
    testing.assert_all_equal(
        graph.edges,
        torch.tensor([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]),
    )

    for e, d in zip(graph.edges, graph.distances):
        e = e.cpu().numpy()
        d = d.item()
        true_distance = np.linalg.norm(data_matrix[e[0]] - data_matrix[e[1]])
        testing.assert_allclose(true_distance, d)
Example #8
0
def test_all_distances_torch(device):
    np.random.seed(0)
    data_matrix = torch.tensor(np.random.randn(4, 2),
                               dtype=torch.float,
                               device=device)
    graph = preprocess.data_matrix.distances(data_matrix)

    assert graph.n_items == data_matrix.shape[0]
    assert graph.n_edges == 6
    testing.assert_all_equal(
        graph.edges,
        torch.tensor([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]),
    )

    for e, d in zip(graph.edges, graph.distances):
        e = e
        d = d
        true_distance = (data_matrix[e[0]] - data_matrix[e[1]]).norm()
        testing.assert_allclose(true_distance, d)
Example #9
0
def test_average_distortion(device):
    torch.random.manual_seed(0)
    edges = np.array([(0, 1), (0, 2), (1, 2)])
    mde = problem.MDE(
        3,
        2,
        edges,
        penalties.Quadratic(torch.tensor([1.0, 2.0, 3.0])),
        constraint=Standardized(),
        device=device,
    )
    X = torch.tensor(
        [[0.0, 0.0], [1.0, 1.0], [3.0, 3.0]],
        dtype=torch.float32,
        device=device,
    )
    average_distortion = mde.average_distortion(X)
    # (1*2 + 2*18 + 3*8)/3 = (2 + 36 + 24)/3 = 62/3
    testing.assert_allclose(average_distortion.detach().cpu().numpy(),
                            62.0 / 3)
Example #10
0
def test_average_distortion_grad(device):
    torch.random.manual_seed(0)
    edges = np.array([(0, 1), (0, 2), (1, 2)])
    f = penalties.Quadratic(torch.tensor([1.0, 2.0, 3.0], device=device))
    mde = problem.MDE(3, 2, edges, f, Standardized(), device=device)
    X = torch.randn(
        (3, 2),
        requires_grad=True,
        dtype=torch.float32,
        device=device,
    )
    average_distortion = mde.average_distortion(X)
    average_distortion.backward()
    A = torch.tensor(
        [[1, 1, 0], [-1, 0, 1], [0, -1, -1]],
        device=device,
    ).float()
    auto_grad = X.grad
    X.grad = None
    util._distortion(X, f, A, mde._lhs, mde._rhs).backward()
    manual_grad = X.grad
    testing.assert_allclose(auto_grad, manual_grad)
Example #11
0
def test_spectral():
    np.random.seed(0)
    torch.random.manual_seed(0)
    n = 5
    m = 3
    L = -np.abs(np.random.randn(n, n).astype(np.float32))
    L += L.T
    np.fill_diagonal(L, 0.0)
    np.fill_diagonal(L, -L.sum(axis=1))
    offdiag = np.triu_indices(n, 1)
    edges = np.column_stack(offdiag)
    weights = -L[offdiag]
    X = quadratic.spectral(n, m, edges, torch.tensor(weights))
    testing.assert_allclose(1.0 / n * X.T @ X, np.eye(m))
    X *= 1.0 / np.sqrt(n)

    eigenvalues, eigenvectors = scipy.sparse.linalg.eigsh(
        L, k=m + 1, which="SM", return_eigenvectors=True)
    eigenvectors = eigenvectors[:, 1:]
    for col in range(m):
        testing.assert_allclose(eigenvectors[:, col],
                                X[:, col],
                                up_to_sign=True)
Example #12
0
def test_initialization(device):
    torch.random.manual_seed(0)
    constraint = Standardized()
    X = constraint.initialization(5, 3, device=device)
    testing.assert_allclose(1.0 / 5 * X.T @ X, np.eye(3))
Example #13
0
def test_proj_standardized(device):
    X = torch.eye(2, dtype=torch.float32, device=device)
    proj = util.proj_standardized(X)
    testing.assert_allclose(1 / 2.0 * proj.T @ proj, np.eye(2))

    n = 10
    m = 3
    X = torch.randn((n, m), dtype=torch.float32, device=device)
    proj = util.proj_standardized(X)
    testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m))

    n = 100
    m = 3
    X = torch.randn((n, m), dtype=torch.float32, device=device)
    proj = util.proj_standardized(X)
    testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m))

    n = 100
    m = 3
    X = torch.randn((n, m), dtype=torch.float32, device=device)
    X -= X.mean(axis=0)
    proj = util.proj_standardized(X)
    testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m))
    testing.assert_allclose(proj.mean(axis=0), np.zeros(m))

    n = 100
    m = 3
    X = torch.randn((n, m), dtype=torch.float32, device=device)
    proj = util.proj_standardized(X, demean=True)
    testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m))
    testing.assert_allclose(proj.mean(axis=0), np.zeros(m))

    n = 1000
    m = 2
    X = torch.randn((n, m), dtype=torch.float32, device=device)
    proj = util.proj_standardized(X, demean=True)
    testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m))
    testing.assert_allclose(proj.mean(axis=0), np.zeros(m))

    n = 1000
    m = 3
    X = torch.randn((n, m), dtype=torch.float32, device=device)
    proj = util.proj_standardized(X, demean=True)
    testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m))
    testing.assert_allclose(proj.mean(axis=0), np.zeros(m))

    n = 1000
    m = 250
    X = torch.randn((n, m), dtype=torch.float32, device=device)
    proj = util.proj_standardized(X, demean=True)
    testing.assert_allclose(1.0 / n * proj.T @ proj, np.eye(m))
    testing.assert_allclose(proj.mean(axis=0), np.zeros(m))
Example #14
0
def test_pca():
    torch.random.manual_seed(0)
    n = 5
    Y = np.random.randn(n, n).astype(np.float32)
    Y -= Y.mean(axis=0)
    for m in range(1, 5):
        X = quadratic.pca(Y, m)
        testing.assert_allclose(1.0 / n * X.T @ X, np.eye(m))
        U, _, _ = np.linalg.svd(Y)
        X_unscaled = 1.0 / np.sqrt(n) * X
        U = U[:, :m]
        U = util.align(source=U, target=X_unscaled)
        for col in range(m):
            testing.assert_allclose(X_unscaled[:, col],
                                    U[:, col],
                                    up_to_sign=True)

    n = 5
    k = 4
    Y = np.random.randn(n, k).astype(np.float32)
    Y -= Y.mean(axis=0)
    for m in range(1, k):
        X = quadratic.pca(Y, m)
        testing.assert_allclose(1.0 / n * X.T @ X, np.eye(m))
        U, _, _ = np.linalg.svd(Y)
        X_unscaled = 1.0 / np.sqrt(n) * X
        U = U[:, :m]
        U = util.align(source=U, target=X_unscaled)
        for col in range(m):
            testing.assert_allclose(X_unscaled[:, col],
                                    U[:, col],
                                    up_to_sign=True)
    with pytest.raises(ValueError,
                       match=r"Embedding dimension must be at most.*"):
        X = quadratic.pca(Y, k + 1)

    n = 4
    k = 5
    Y = np.random.randn(n, k).astype(np.float32)
    Y -= Y.mean(axis=0)
    for m in range(1, n):
        X = quadratic.pca(Y, m)
        testing.assert_allclose(1.0 / n * X.T @ X, np.eye(m))
        U, _, _ = np.linalg.svd(Y)
        X_unscaled = 1.0 / np.sqrt(n) * X
        U = U[:, :m]
        for col in range(m):
            testing.assert_allclose(X_unscaled[:, col],
                                    U[:, col],
                                    up_to_sign=True)
    with pytest.raises(ValueError,
                       match=r"Embedding dimension must be at most.*"):
        X = quadratic.pca(Y, n + 1)