def test_output_simple():
    """
    Elbow should be at 2.
    """
    X = np.array([10, 9, 3, 2, 1])
    elbows, _ = select_dimension(X, n_elbows=1)
    assert_equal(elbows[0], 2)
def test_output_uniform():
    """
    Generate two sets of synthetic eigenvalues based on two uniform
    distributions. The elbow must be at 50.
    """
    np.random.seed(9)
    x1 = np.random.uniform(0, 45, 50)
    x2 = np.random.uniform(55, 100, 50)
    X = np.sort(np.hstack([x1, x2]))[::-1]
    elbows, _ = select_dimension(X, n_elbows=1)
    assert_equal(elbows[0], 50)
Esempio n. 3
0
    def fit(self, Xs):
        """
        Calculates a projection from each view to a latentent space such that
        the sum of pairwise latent space correlations is maximized. Each view
        'X' is normalized and the left singular vectors of 'X^T X' are
        calculated using SVD. The number of singular vectors kept is determined
        by either the percent variance explained, a given rank threshold, or a
        given number of components. The singular vectors kept are concatenated
        and SVD of that is taken and used to calculated projections for each
        view.

        Parameters
        ----------
        Xs : list of array-likes or numpy.ndarray
             - Xs length: n_views
             - Xs[i] shape: (n_samples, n_features_i)
            The data to fit to. Each view will receive its own embedding.

        Returns
        -------
        self : returns an instance of self.
        """

        Xs = check_Xs(Xs, multiview=True)
        n = Xs[0].shape[0]
        min_m = min(X.shape[1] for X in Xs)

        data = [self.center(x) for x in Xs]

        Uall = []
        Sall = []
        Vall = []
        ranks = []

        for x in data:
            # Preprocess
            x[np.isnan(x)] = 0

            # compute the SVD of the data
            if self.tall:
                v, s, ut = linalg.svd(x.T, full_matrices=False)
            else:
                u, s, vt = linalg.svd(x, full_matrices=False)
                ut = u.T
                v = vt.T

            Sall.append(s)
            Vall.append(v)
            # Dimensions to reduce to
            if self.sv_tolerance:
                if not isinstance(self.sv_tolerance, float) and not isinstance(
                        self.sv_tolerance, int):
                    raise TypeError("sv_tolerance must be numeric")
                elif self.sv_tolerance <= 0:
                    raise ValueError("sv_tolerance must be greater than 0")

                rank = sum(s > self.sv_tolerance)
            elif self.n_components:
                if not isinstance(self.n_components, int):
                    raise TypeError("n_components must be an integer")
                elif self.n_components <= 0:
                    raise ValueError("n_components must be greater than 0")
                elif self.n_components > min((n, min_m)):
                    raise ValueError(
                        "n_components must be less than or equal to the \
                            minimum input rank")

                rank = self.n_components
            elif self.fraction_var:
                if not isinstance(self.fraction_var, float) and not isinstance(
                        self.fraction_var, int):
                    raise TypeError("fraction_var must be an integer or float")
                elif self.fraction_var <= 0 or self.fraction_var > 1:
                    raise ValueError("fraction_var must be in (0,1]")

                s2 = np.square(s)
                rank = sum(np.cumsum(s2 / sum(s2)) < self.fraction_var) + 1
            else:
                s = s[:int(np.ceil(np.log2(np.min(x.shape))))]
                elbows, _ = select_dimension(s,
                                             n_elbows=self.n_elbows,
                                             threshold=None)
                rank = elbows[-1]

            ranks.append(rank)

            u = ut.T[:, :rank]
            Uall.append(u)

        d = min(ranks)

        # Create a concatenated view of Us
        Uall_c = np.concatenate(Uall, axis=1)

        _, _, VV = svds(Uall_c, d)
        VV = np.flip(VV.T, axis=1)
        VV = VV[:, :min([d, VV.shape[1]])]

        # SVDS the concatenated Us
        idx_end = 0
        projXs = []
        projection_mats = []
        for i in range(len(data)):
            idx_start = idx_end
            idx_end = idx_start + ranks[i]
            VVi = normalize(VV[idx_start:idx_end, :], "l2", axis=0)

            # Compute the canonical projections
            A = np.sqrt(n - 1) * Vall[i][:, :ranks[i]]
            A = A @ (linalg.solve(np.diag(Sall[i][:ranks[i]]), VVi))
            projXs.append(data[i] @ A)
            projection_mats.append(A)

        self.projection_mats_ = projection_mats
        self.ranks_ = ranks

        return self
def test_output_synthetic():
    data, _ = generate_data(10, 3)
    elbows, _, _ = select_dimension(X=data,
                                    n_elbows=2,
                                    return_likelihoods=True)
    assert_equal(elbows, [2, 4])
def test_invalid_inputes():
    X, D = generate_data()

    # invalid n_elbows
    with pytest.raises(ValueError):
        bad_n_elbows = -2
        select_dimension(X, n_elbows=bad_n_elbows)

    with pytest.raises(ValueError):
        bad_n_elbows = "string"
        select_dimension(X, n_elbows=bad_n_elbows)

    # invalid n_components
    with pytest.raises(ValueError):
        bad_n_components = -1
        select_dimension(X, n_components=bad_n_components)

    with pytest.raises(ValueError):
        bad_n_components = "string"
        select_dimension(X, n_components=bad_n_components)

    # invalid threshold
    with pytest.raises(ValueError):
        bad_threshold = -2
        select_dimension(X, threshold=bad_threshold)

    with pytest.raises(ValueError):
        bad_threshold = "string"
        select_dimension(X, threshold=bad_threshold)

    with pytest.raises(IndexError):
        bad_threshold = 1000000
        select_dimension(X, threshold=bad_threshold)

    # invalid X
    with pytest.raises(ValueError):
        bad_X = -2
        select_dimension(X=bad_X)

    with pytest.raises(ValueError):
        # input is tensor
        bad_X = np.random.normal(size=(100, 10, 10))
        select_dimension(X=bad_X)

    with pytest.raises(ValueError):
        bad_X = np.random.normal(size=100).reshape(100, -1)
        select_dimension(X=bad_X)