예제 #1
0
파일: gllim.py 프로젝트: mxhsnowy/DMLIR
    def get_rnk_batch(self, X, Y):
        '''Get rnk for a batch by considering only target responsabilities'''

        logrnk = np.ndarray((len(X), self.K))
        #phiX=network.predict_on_batch(np.asarray(X))
        Y = np.asarray(Y)

        for (k, ck, pik, gammak) in zip(range(self.K), self.ckList,
                                        self.pikList, self.GammakList):
            # Warning here we removed the term depending on the high dimensional space. We notice that it helps to converge
            logrnk[:, k] = np.log(pik) + chol_loggausspdf(
                Y.T, ck.reshape((self.L, 1)), gammak)
        lognormrnk = logsumexp(logrnk, axis=1, keepdims=True)
        logrnk -= lognormrnk

        return np.exp(logrnk)
예제 #2
0
    def get_rnk_batch(self, X, Y):
        '''Get rnk for a batch by considering only target responsabilities'''

        logrnk = np.ndarray((len(X), self.K))
        #phiX=network.predict_on_batch(np.asarray(X))
        Y = np.asarray(Y)

        for (k, ck, pik, gammak) in zip(range(self.K), self.ckList,
                                        self.pikList, self.GammakList):

            logrnk[:, k] = np.log(pik) + chol_loggausspdf(
                Y.T, ck.reshape((self.L, 1)), gammak)
        lognormrnk = logsumexp(logrnk, axis=1, keepdims=True)
        logrnk -= lognormrnk

        return np.exp(logrnk)
예제 #3
0
파일: gllim.py 프로젝트: mxhsnowy/DMLIR
    def predict_low_high(self, X):
        '''Backward prediction'''

        N = X.shape[0]

        proj = np.empty((self.D, N, self.K))
        logalpha = np.zeros((N, self.K))

        for (k, pik, Ak, bk, ck, gamk) in zip(range(self.K), self.pikList,
                                              self.AkList, self.bkList,
                                              self.ckList, self.GammakList):

            proj[:, :, k] = Ak.dot(X.T) + np.expand_dims(bk, axis=1)
            logalpha[:, k] = np.log(pik) + chol_loggausspdf(
                X.T, ck.reshape((self.L, 1)), gamk)

        density = logsumexp(logalpha, axis=1, keepdims=True)
        logalpha -= density
        alpha = np.exp(logalpha)

        Ypred = np.sum(alpha.reshape((1, N, self.K)) * proj, axis=2)  # (15)

        return Ypred.T
예제 #4
0
파일: gllim.py 프로젝트: mxhsnowy/DMLIR
    def predict_high_low(self, Y):
        '''Forward prediction'''

        N = Y.shape[0]

        proj = np.empty((self.L, N, self.K))
        logalpha = np.zeros((N, self.K))

        for (k, pik, AkS, bkS, ckS,
             gamkS) in zip(range(self.K), self.pikList, self.AkListS,
                           self.bkListS, self.ckListS, self.GammakListS):

            proj[:, :, k] = AkS.dot(Y.T) + np.expand_dims(bkS, axis=1)
            logalpha[:, k] = np.log(pik) + chol_loggausspdf(
                Y.T, ckS.reshape((self.D, 1)), gamkS)

        density = logsumexp(logalpha, axis=1, keepdims=True)
        logalpha -= density
        alpha = np.exp(logalpha)

        Xpred = np.sum(alpha.reshape((1, N, self.K)) * proj, axis=2)  # (16)

        return Xpred.T
예제 #5
0
파일: gllim.py 프로젝트: mxhsnowy/DMLIR
    def fit(self, X, Y, maxIter, init, gmm_init=None):
        '''fit the Gllim
           # Arguments
            X: low dimension targets as a Numpy array
            Y: high dimension features as a Numpy array
            maxIter: maximum number of EM algorithm iterations
            init: boolean, compute GMM initialisation
            gmm_init: give a GMM as init
        '''

        N = X.shape[0]
        LL = np.ndarray((100, 1))
        it = 0
        converged = False

        if init == True:

            print "Start initialization"
            deltaLL = float('inf')
            print "X : ", X.shape
            print "Y : ", Y.shape

            # we initialize the model by running a GMM on the target only
            datas_matrix = X

            # uncomment the following line if you want to initialize the model on the complete data
            # datas_matrix = np.concatenate((X, Y), axis=1) #complete data matrix

            print "datas matrix shape:", datas_matrix.shape

            print "Initialization of posterior with GMM"
            start_time_EMinit = time.time()

            gmm = GMM(n_components=self.K,
                      covariance_type='diag',
                      random_state=None,
                      tol=0.001,
                      min_covar=0.001,
                      n_iter=100,
                      n_init=3,
                      params='wmc',
                      init_params='wmc',
                      verbose=1)

            if gmm_init == None:
                gmm.fit(datas_matrix)
            else:
                gmm = pickle.load(open(gmm_init, 'r'))
                gmm.fit(datas_matrix)

            self.rnk = gmm.predict_proba(datas_matrix)
            rkList = [np.sum(self.rnk[:, k]) for k in range(self.K)]

            print("--- %s seconds for EM initialization---" %
                  (time.time() - start_time_EMinit))

        print "Training with EM"
        start_time_EM = time.time()
        logrnk = np.ndarray((N, self.K))
        rkList = [np.sum(self.rnk[:, k]) for k in range(self.K)]

        while (converged == False) and (it < maxIter):

            it += 1

            print "Iteration nb " + str(it)

            #  M-GMM-step:
            print "M-GMM"

            self.pikList = [rk / N for rk in rkList]  # (28)

            self.ckList = [
                np.sum(self.rnk[:, k] * X.T, axis=1) / rk
                for k, rk in enumerate(rkList)
            ]

            self.GammakList = [
                np.dot((np.sqrt(self.rnk[:, k]).reshape(
                    (1, N))) * (X.T - ck.reshape((self.L, 1))),
                       ((np.sqrt(self.rnk[:, k]).reshape(
                           (1, N))) * (X.T - ck.reshape((self.L, 1)))).T) / rk
                for k, ck, rk in zip(range(self.K), self.ckList, rkList)
            ]  # (30)

            # M-mapping-step
            print "M-mapping"
            xk_bar = [
                np.sum(self.rnk[:, k] * X.T, axis=1) / rk
                for k, rk in enumerate(rkList)
            ]  # (35)

            yk_bar = [
                np.sum(self.rnk[:, k] * Y.T, axis=1) / rk
                for k, rk in enumerate(rkList)
            ]  # (36)

            XXt_stark = np.zeros((self.L, self.L))
            YXt_stark = np.zeros((self.D, self.L))

            for k, rk, xk, yk in zip(range(self.K), rkList, xk_bar, yk_bar):

                X_stark = (np.sqrt(self.rnk[:, k])) * (X - xk).T  # (33)
                Y_stark = (np.sqrt(self.rnk[:, k])) * (Y - yk).T  # (34)
                XXt_stark = np.dot(X_stark, X_stark.T)
                YXt_stark = np.dot(Y_stark, X_stark.T)
                self.AkList[k] = np.dot(YXt_stark, inv(XXt_stark))

            self.bkList = [
                np.sum(self.rnk[:, k].T * (Y - (Ak.dot(X.T)).T).T, axis=1) / rk
                for k, Ak, rk in zip(range(self.K), self.AkList, rkList)
            ]  # (37)

            diffSigmakList = [
                np.sqrt(self.rnk[:, k]).T * (Y - (Ak.dot(X.T)).T - bk.reshape(
                    (1, self.D))).T
                for k, Ak, bk in zip(range(self.K), self.AkList, self.bkList)
            ]

            sigma2 = [
                np.sum((diffSigma**2), axis=1) / rk
                for rk, diffSigma in zip(rkList, diffSigmakList)
            ]

            # isotropic sigma
            self.SigmakSquareList = [(np.sum(sig2) / self.D)
                                     for sig2 in sigma2]

            # numerical stability
            self.SigmakSquareList = [
                sig + 1e-08 for sig in self.SigmakSquareList
            ]

            # equal constraints
            self.SigmakSquareList = [
                self.SigmakSquareList[k] * rk for k, rk in enumerate(rkList)
            ]
            self.SigmakSquareList = [(sum(self.SigmakSquareList) / N)] * self.K

            #  E-Z step:
            print "E-Z"
            for (k, Ak, bk, ck, pik, gammak,
                 sigmakSquare) in zip(range(self.K), self.AkList, self.bkList,
                                      self.ckList, self.pikList,
                                      self.GammakList, self.SigmakSquareList):

                y_mean = np.dot(Ak, X.T) + bk.reshape((self.D, 1))
                logrnk[:, k] = np.log(pik) + chol_loggausspdf(
                    X.T, ck.reshape((self.L, 1)), gammak) + loggausspdf(
                        Y, y_mean.T, sigmakSquare)

            lognormrnk = logsumexp(logrnk, axis=1, keepdims=True)
            logrnk -= lognormrnk
            self.rnk = np.exp(logrnk)
            LL[it,
               0] = np.sum(lognormrnk)  # EVERY EM Iteration THIS MUST INCREASE
            print "Log-likelihood = " + str(
                LL[it, 0]) + " at iteration nb :" + str(it)
            rkList = [np.sum(self.rnk[:, k]) for k in range(self.K)]

            # Remove empty clusters
            ec = [True] * self.K
            cpt = 0
            for k in range(self.K):
                if (np.sum(self.rnk[:, k]) == 0) or (np.isinf(
                        np.sum(self.rnk[:, k]))):
                    cpt += 1
                    ec[k] = False
                    print "class ", k, " has been removed"
            self.K -= cpt
            rkList = list(compress(rkList, ec))
            self.AkList = list(compress(self.AkList, ec))
            self.bkList = list(compress(self.bkList, ec))
            self.ckList = list(compress(self.ckList, ec))
            self.SigmakSquareList = list(compress(self.SigmakSquareList, ec))
            self.pikList = list(compress(self.pikList, ec))
            self.GammakList = list(compress(self.GammakList, ec))

            if (it >= 3):
                deltaLL_total = np.amax(LL[1:it, 0]) - np.amin(LL[1:it, 0])
                deltaLL = LL[it, 0] - LL[it - 1, 0]
                converged = bool(deltaLL <= 0.001 * deltaLL_total)

        print "Final log-likelihood : " + str(LL[it, 0])

        print(" Converged in %s iterations" % (it))

        print("--- %s seconds for EM ---" % (time.time() - start_time_EM))

        # plt.plot(LL[1:it,0])
        # plt.show()

        return LL[1:it, 0]