Esempio n. 1
0
def ppca(Y, Q, iterations=100):
    """
    EM implementation for probabilistic pca.

    :param array-like Y: Observed Data
    :param int Q: Dimensionality for reduced array
    :param int iterations: number of iterations for EM
    """
    from numpy.ma import dot as madot
    N, D = Y.shape
    # Initialise W randomly
    W = np.random.randn(D, Q) * 1e-3
    Y = np.ma.masked_invalid(Y, copy=0)
    mu = Y.mean(0)
    Ycentered = Y - mu
    try:
        for _ in range(iterations):
            exp_x = np.asarray_chkfinite(
                np.linalg.solve(W.T.dot(W), madot(W.T, Ycentered.T))).T
            W = np.asarray_chkfinite(
                np.linalg.solve(exp_x.T.dot(exp_x), madot(exp_x.T,
                                                          Ycentered))).T
    except np.linalg.linalg.LinAlgError:
        #"converged"
        pass
    return np.asarray_chkfinite(exp_x), np.asarray_chkfinite(W)
Esempio n. 2
0
def ppca(Y, Q, iterations=100):
    """
    EM implementation for probabilistic pca.
    :param array-like Y: Observed Data
    :param int Q: Dimensionality for reduced array
    :param int iterations: number of iterations for EM
    """
    from numpy.ma import dot as madot
    N, D = Y.shape
    # Initialise W randomly
    W = np.random.randn(D, Q) * 1e-3
    Y = np.ma.masked_invalid(Y, copy=0)
    mu = Y.mean(0)
    Ycentered = Y - mu
    try:
        for _ in range(iterations):
            exp_x = np.asarray_chkfinite(np.linalg.solve(W.T.dot(W), madot(W.T, Ycentered.T))).T
            W = np.asarray_chkfinite(np.linalg.solve(exp_x.T.dot(exp_x), madot(exp_x.T, Ycentered))).T
    except np.linalg.linalg.LinAlgError:
        #"converged"
        pass
    return np.asarray_chkfinite(exp_x), np.asarray_chkfinite(W)
Esempio n. 3
0
def ppca_missing_data_at_random(Y, Q, iters=100):
    """
    EM implementation of Probabilistic pca for when there is missing data.
    
    Taken from <SheffieldML, https://github.com/SheffieldML>

    .. math:
        \\mathbf{Y} = \mathbf{XW} + \\epsilon \\text{, where}
        \\epsilon = \\mathcal{N}(0, \\sigma^2 \mathbf{I})
        
    :returns: X, W, sigma^2 
    """
    from numpy.ma import dot as madot
    import diag
    from GPy.util.subarray_and_sorting import common_subarrays
    import time
    debug = 1
    # Initialise W randomly
    N, D = Y.shape
    W = np.random.randn(Q, D) * 1e-3
    Y = np.ma.masked_invalid(Y, copy=1)
    nu = 1.
    #num_obs_i = 1./Y.count()
    Ycentered = Y - Y.mean(0)

    X = np.zeros((N, Q))
    cs = common_subarrays(Y.mask)
    cr = common_subarrays(Y.mask, 1)
    Sigma = np.zeros((N, Q, Q))
    Sigma2 = np.zeros((N, Q, Q))
    mu = np.zeros(D)
    if debug:
        import matplotlib.pyplot as pylab
        fig = pylab.figure("FIT MISSING DATA")
        ax = fig.gca()
        ax.cla()
        lines = pylab.plot(np.zeros((N, Q)).dot(W))
    W2 = np.zeros((Q, D))

    for i in range(iters):
        #         Sigma = np.linalg.solve(diag.add(madot(W,W.T), nu), diag.times(np.eye(Q),nu))
        #         exp_x = madot(madot(Ycentered, W.T),Sigma)/nu
        #         Ycentered = (Y - exp_x.dot(W).mean(0))
        #         #import ipdb;ipdb.set_trace()
        #         #Ycentered = mu
        #         W = np.linalg.solve(madot(exp_x.T,exp_x) + Sigma, madot(exp_x.T, Ycentered))
        #         nu = (((Ycentered - madot(exp_x, W))**2).sum(0) + madot(W.T,madot(Sigma,W)).sum(0)).sum()/N
        for csi, (mask, index) in enumerate(cs.iteritems()):
            mask = ~np.array(mask)
            Sigma2[index, :, :] = nu * np.linalg.inv(
                diag.add(W2[:, mask].dot(W2[:, mask].T), nu))
            #X[index,:] = madot((Sigma[csi]/nu),madot(W,Ycentered[index].T))[:,0]
        X2 = ((Sigma2 / nu) *
              (madot(Ycentered, W2.T).base)[:, :, None]).sum(-1)
        mu2 = (Y - X.dot(W)).mean(0)
        for n in range(N):
            Sigma[n] = nu * np.linalg.inv(
                diag.add(W[:, ~Y.mask[n]].dot(W[:, ~Y.mask[n]].T), nu))
            X[n, :] = (Sigma[n] / nu).dot(W[:, ~Y.mask[n]].dot(
                Ycentered[n, ~Y.mask[n]].T))
        for d in range(D):
            mu[d] = (Y[~Y.mask[:, d], d] -
                     X[~Y.mask[:, d]].dot(W[:, d])).mean()
        Ycentered = (Y - mu)
        nu3 = 0.
        for cri, (mask, index) in enumerate(cr.iteritems()):
            mask = ~np.array(mask)
            W2[:, index] = np.linalg.solve(
                X[mask].T.dot(X[mask]) + Sigma[mask].sum(0),
                madot(X[mask].T, Ycentered[mask, index]))[:, None]
            W2[:, index] = np.linalg.solve(
                X.T.dot(X) + Sigma.sum(0), madot(X.T, Ycentered[:, index]))
            #nu += (((Ycentered[mask,index] - X[mask].dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma[mask].sum(0).dot(W[:,index])).sum(0)).sum()
            nu3 += (
                ((Ycentered[index] - X.dot(W[:, index]))**2).sum(0) +
                W[:, index].T.dot(Sigma.sum(0).dot(W[:, index])).sum(0)).sum()
        nu3 /= N
        nu = 0.
        nu2 = 0.
        W = np.zeros((Q, D))
        for j in range(D):
            W[:, j] = np.linalg.solve(
                X[~Y.mask[:, j]].T.dot(X[~Y.mask[:, j]]) +
                Sigma[~Y.mask[:, j]].sum(0),
                madot(X[~Y.mask[:, j]].T, Ycentered[~Y.mask[:, j], j]))
            nu2f = np.tensordot(W[:, j].T, Sigma[~Y.mask[:, j], :, :],
                                [0, 1]).dot(W[:, j])
            nu2s = W[:, j].T.dot(Sigma[~Y.mask[:, j], :, :].sum(0).dot(W[:,
                                                                         j]))
            nu2 += (((Ycentered[~Y.mask[:, j], j] -
                      X[~Y.mask[:, j], :].dot(W[:, j]))**2) + nu2f).sum()
            for i in range(N):
                if not Y.mask[i, j]:
                    nu += ((Ycentered[i, j] - X[i, :].dot(W[:, j]))**
                           2) + W[:, j].T.dot(Sigma[i, :, :].dot(W[:, j]))
        nu /= N
        nu2 /= N
        nu4 = (((Ycentered - X.dot(W))**2).sum(0) +
               W.T.dot(Sigma.sum(0).dot(W)).sum(0)).sum() / N
        import ipdb
        ipdb.set_trace()
        if debug:
            #print Sigma[0]
            print "nu:", nu, "sum(X):", X.sum()
            pred_y = X.dot(W)
            for x, l in zip(pred_y.T, lines):
                l.set_ydata(x)
            ax.autoscale_view()
            ax.set_ylim(pred_y.min(), pred_y.max())
            fig.canvas.draw()
            time.sleep(.3)
    return np.asarray_chkfinite(X), np.asarray_chkfinite(W), nu
Esempio n. 4
0
def ppca_missing_data_at_random(Y, Q, iters=100):
    """
    EM implementation of Probabilistic pca for when there is missing data.

    Taken from <SheffieldML, https://github.com/SheffieldML>

    .. math:
        \\mathbf{Y} = \mathbf{XW} + \\epsilon \\text{, where}
        \\epsilon = \\mathcal{N}(0, \\sigma^2 \mathbf{I})

    :returns: X, W, sigma^2
    """
    from numpy.ma import dot as madot
    import diag
    from GPy.util.subarray_and_sorting import common_subarrays
    import time
    debug = 1
    # Initialise W randomly
    N, D = Y.shape
    W = np.random.randn(Q, D) * 1e-3
    Y = np.ma.masked_invalid(Y, copy=1)
    nu = 1.
    #num_obs_i = 1./Y.count()
    Ycentered = Y - Y.mean(0)

    X = np.zeros((N,Q))
    cs = common_subarrays(Y.mask)
    cr = common_subarrays(Y.mask, 1)
    Sigma = np.zeros((N, Q, Q))
    Sigma2 = np.zeros((N, Q, Q))
    mu = np.zeros(D)
    """
    if debug:
        import matplotlib.pyplot as pylab
        fig = pylab.figure("FIT MISSING DATA");
        ax = fig.gca()
        ax.cla()
        lines = pylab.plot(np.zeros((N,Q)).dot(W))
    """
    W2 = np.zeros((Q,D))

    for i in range(iters):
#         Sigma = np.linalg.solve(diag.add(madot(W,W.T), nu), diag.times(np.eye(Q),nu))
#         exp_x = madot(madot(Ycentered, W.T),Sigma)/nu
#         Ycentered = (Y - exp_x.dot(W).mean(0))
#         #import ipdb;ipdb.set_trace()
#         #Ycentered = mu
#         W = np.linalg.solve(madot(exp_x.T,exp_x) + Sigma, madot(exp_x.T, Ycentered))
#         nu = (((Ycentered - madot(exp_x, W))**2).sum(0) + madot(W.T,madot(Sigma,W)).sum(0)).sum()/N
        for csi, (mask, index) in enumerate(cs.iteritems()):
            mask = ~np.array(mask)
            Sigma2[index, :, :] = nu * np.linalg.inv(diag.add(W2[:,mask].dot(W2[:,mask].T), nu))
            #X[index,:] = madot((Sigma[csi]/nu),madot(W,Ycentered[index].T))[:,0]
        X2 = ((Sigma2/nu) * (madot(Ycentered,W2.T).base)[:,:,None]).sum(-1)
        mu2 = (Y - X.dot(W)).mean(0)
        for n in range(N):
            Sigma[n] = nu * np.linalg.inv(diag.add(W[:,~Y.mask[n]].dot(W[:,~Y.mask[n]].T), nu))
            X[n, :] = (Sigma[n]/nu).dot(W[:,~Y.mask[n]].dot(Ycentered[n,~Y.mask[n]].T))
        for d in range(D):
            mu[d] = (Y[~Y.mask[:,d], d] - X[~Y.mask[:,d]].dot(W[:, d])).mean()
        Ycentered = (Y - mu)
        nu3 = 0.
        for cri, (mask, index) in enumerate(cr.iteritems()):
            mask = ~np.array(mask)
            W2[:,index] = np.linalg.solve(X[mask].T.dot(X[mask]) + Sigma[mask].sum(0), madot(X[mask].T, Ycentered[mask,index]))[:,None]
            W2[:,index] = np.linalg.solve(X.T.dot(X) + Sigma.sum(0), madot(X.T, Ycentered[:,index]))
            #nu += (((Ycentered[mask,index] - X[mask].dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma[mask].sum(0).dot(W[:,index])).sum(0)).sum()
            nu3 += (((Ycentered[index] - X.dot(W[:,index]))**2).sum(0) + W[:,index].T.dot(Sigma.sum(0).dot(W[:,index])).sum(0)).sum()
        nu3 /= N
        nu = 0.
        nu2 = 0.
        W = np.zeros((Q,D))
        for j in range(D):
            W[:,j] = np.linalg.solve(X[~Y.mask[:,j]].T.dot(X[~Y.mask[:,j]]) + Sigma[~Y.mask[:,j]].sum(0), madot(X[~Y.mask[:,j]].T, Ycentered[~Y.mask[:,j],j]))
            nu2f = np.tensordot(W[:,j].T, Sigma[~Y.mask[:,j],:,:], [0,1]).dot(W[:,j])
            nu2s = W[:,j].T.dot(Sigma[~Y.mask[:,j],:,:].sum(0).dot(W[:,j]))
            nu2 += (((Ycentered[~Y.mask[:,j],j] - X[~Y.mask[:,j],:].dot(W[:,j]))**2) + nu2f).sum()
            for i in range(N):
                if not Y.mask[i,j]:
                    nu += ((Ycentered[i,j] - X[i,:].dot(W[:,j]))**2) + W[:,j].T.dot(Sigma[i,:,:].dot(W[:,j]))
        nu /= N
        nu2 /= N
        nu4 = (((Ycentered - X.dot(W))**2).sum(0) + W.T.dot(Sigma.sum(0).dot(W)).sum(0)).sum()/N
        import ipdb;ipdb.set_trace()
        """
        if debug:
            #print Sigma[0]
            print "nu:", nu, "sum(X):", X.sum()
            pred_y = X.dot(W)
            for x, l in zip(pred_y.T, lines):
                l.set_ydata(x)
            ax.autoscale_view()
            ax.set_ylim(pred_y.min(), pred_y.max())
            fig.canvas.draw()
            time.sleep(.3)
        """
    return np.asarray_chkfinite(X), np.asarray_chkfinite(W), nu