Exemple #1
0
    def testing(self, x):
        """
		Estimate the outputs corresponding to new input points
		:param x: new input data (nb_data, dim_x)
		:return: corresponding output predictions (nb_data, dim_y)
		"""
        nb_data = x.shape[0]

        # Compute gate probabilities
        inProdEst = np.zeros((nb_data, self.nb_class))
        for n in range(0, nb_data):
            for c in range(0, self.nb_class):
                vTmp = khatriRaoProd(self.V[1][c], self.V[0][c])
                for i in range(2, self.nb_dim_x):
                    vTmp = khatriRaoProd(self.V[i][c], vTmp)
                vVec = np.dot(vTmp, np.ones((self.rank_g, 1)))

                inProdEst[n, c] = self.beta[c] + np.dot(
                    vVec[:, None].T, x[n].flatten())

        priors = softmax(inProdEst).T

        ytmp = []
        for c in range(0, self.nb_class):
            # Compute experts predictions
            alpha = self.alpha[c][:]
            wmsTmp = self.W[c][0]

            # Compute vec(W)
            wVec = []
            for j in range(self.nb_dim):
                wTmp = khatriRaoProd(wmsTmp[1], wmsTmp[0])
                for k in range(2, self.nb_dim_x):
                    wTmp = khatriRaoProd(wmsTmp[k], wTmp)

                wVec.append(np.dot(wTmp, np.ones((self.rank_e[c], 1))))

            yhat_tmp = np.zeros((nb_data, self.nb_dim))

            for n in range(0, nb_data):
                for dim in range(0, self.nb_dim):
                    yhat_tmp[n, dim] = alpha[dim] + np.dot(
                        wVec[dim][:, None].T, x[n].flatten())

            # Append expert predictions weighted by the gate
            ytmp.append(yhat_tmp * priors[i][:, None])

        # Compute final predictions
        return np.sum(ytmp, axis=0)
    def func(v):
        # Recover parameters from vector v
        # vector v is composed as [ xhi_1, phi_1, psi_1, ..., xhi_c, phi_c, psi_c ]
        V = np.reshape(v, (nb_dim * rank + 1, nb_class))
        xhi = V[0]
        phi = [
            np.reshape(V[1:nb_dim1 * rank + 1, i], (nb_dim1, rank))
            for i in range(nb_class)
        ]
        psi = [
            np.reshape(V[nb_dim1 * rank + 1::, i], (nb_dim2, rank))
            for i in range(nb_class)
        ]

        # Compute probabilitites
        inProdEst = np.zeros((nb_data, nb_class))
        for n in range(0, nb_data):
            for dim in range(0, nb_class):
                phipsiVec = np.dot(khatriRaoProd(psi[dim], phi[dim]),
                                   np.ones((rank, 1)))
                inProdEst[n, dim] = xhi[dim] + np.dot(phipsiVec[:, None].T,
                                                      x[n].flatten())

        est = softmax(inProdEst)
        est += 1e-308

        # Regularization term
        reg_term = 0.0
        for dim in range(0, nb_class):
            reg_term += np.linalg.norm(psi[dim]) + np.linalg.norm(phi[dim])

        return -np.sum(y * np.log(est)) + reg_fact * reg_term
Exemple #3
0
    def testing(self, x):
        """
		Estimate the outputs corresponding to new input points
		:param x: new input data (nb_data, dim_x)
		:return: corresponding output predictions (nb_data, dim_y)
		"""
        nb_data = x.shape[0]

        # Compute gate probabilities
        inProdEst = np.zeros((nb_data, self.nb_class))
        for n in range(0, nb_data):
            for dim in range(0, self.nb_class):
                phipsiVec = np.dot(khatriRaoProd(self.psi[dim], self.phi[dim]),
                                   np.ones((self.rank_g, 1)))
                inProdEst[n, dim] = self.xhi[dim] + np.dot(
                    phipsiVec[:, None].T, x[n].flatten())

        priors = softmax(inProdEst).T

        ytmp = []
        for i in range(0, self.nb_class):
            # Compute experts predictions
            alpha = self.alpha[i][:]
            b1tmp = self.b1[i][:]
            b2tmp = self.b2[i][:]
            bVec = [
                np.dot(khatriRaoProd(b2tmp[j], b1tmp[j]),
                       np.ones((self.rank_e[i], 1)))
                for j in range(self.nb_dim)
            ]

            yhat_tmp = np.zeros((nb_data, self.nb_dim))
            for n in range(0, nb_data):
                for dim in range(0, self.nb_dim):
                    yhat_tmp[n, dim] = alpha[dim] + np.dot(
                        bVec[dim][:, None].T, x[n].flatten())

            # Append expert predictions weighted by the gate
            ytmp.append(yhat_tmp * priors[i][:, None])

        # Compute final predictions
        return np.sum(ytmp, axis=0)
    def func(v):
        # Recover parameters from vector v
        # vector v is composed as [ beta_1, v1_1, v2_1, ..., beta_c, v1_c, v2_c, ... ]
        Vall = np.reshape(v, (nb_dim * rank + 1, nb_class))
        beta = Vall[0]
        V = []
        start = 1
        end = dims[0] * rank + 1
        for m in range(len(dims)):
            Vtmp = [
                np.reshape(Vall[start:end, i], (dims[m], rank))
                for i in range(nb_class)
            ]
            V.append(Vtmp)

            start = end
            if m < len(dims) - 1:
                end += dims[m + 1] * rank

        # Compute probabilitites
        inProdEst = np.zeros((nb_data, nb_class))
        for n in range(0, nb_data):
            for c in range(0, nb_class):
                vTmp = khatriRaoProd(V[1][c], V[0][c])
                for i in range(2, len(dims)):
                    vTmp = khatriRaoProd(V[i][c], vTmp)
                vVec = np.dot(vTmp, np.ones((rank, 1)))
                inProdEst[n, c] = beta[c] + np.dot(vVec[:, None].T,
                                                   x[n].flatten())

        est = softmax(inProdEst)
        est += 1e-308

        # Regularization term
        reg_term = 0.0
        for c in range(0, nb_class):
            for m in range(len(dims)):
                reg_term += np.linalg.norm(V[m][c])

        return -np.sum(y * np.log(est)) + reg_fact * reg_term
    def grad(v):
        # Recover parameters from vector v
        V = np.reshape(v, (nb_dim * rank + 1, nb_class))
        xhi = V[0]
        phi = [
            np.reshape(V[1:nb_dim1 * rank + 1, i], (nb_dim1, rank))
            for i in range(nb_class)
        ]
        psi = [
            np.reshape(V[nb_dim1 * rank + 1::, i], (nb_dim2, rank))
            for i in range(nb_class)
        ]

        # Compute probabilities
        inProdEst = np.zeros((nb_data, nb_class))
        for n in range(0, nb_data):
            for dim in range(0, nb_class):
                phipsiVec = np.dot(khatriRaoProd(psi[dim], phi[dim]),
                                   np.ones((rank, 1)))
                inProdEst[n, dim] = xhi[dim] + np.dot(phipsiVec[:, None].T,
                                                      x[n].flatten())

        est = softmax(inProdEst)

        # Compute gradients
        grad_xhi = np.sum(est - y, axis=0).flatten()

        grad_phi = np.zeros((nb_dim1 * rank, nb_class))
        grad_psi = np.zeros((nb_dim2 * rank, nb_class))
        for dim in range(0, nb_class):
            xVecPsi = np.zeros((nb_data, nb_dim1 * rank))
            for n in range(0, nb_data):
                xVecPsi[n] = np.dot(x[n], psi[dim]).flatten()

            xVecPhi = np.zeros((nb_data, nb_dim2 * rank))
            for n in range(0, nb_data):
                xVecPhi[n] = np.dot(x[n].T, phi[dim]).flatten()

            grad_phi[:, dim] = np.dot(xVecPsi.T, (est[:, dim] - y[:, dim]))
            grad_psi[:, dim] = np.dot(xVecPhi.T, (est[:, dim] - y[:, dim]))

            # Regularization term
            grad_phi[:, dim] += 2 * reg_fact * phi[dim].flatten()
            grad_psi[:, dim] += 2 * reg_fact * psi[dim].flatten()

        return np.hstack(
            (grad_xhi.flatten(), grad_phi.flatten(), grad_psi.flatten()))
Exemple #6
0
    def training(self,
                 x,
                 y,
                 y_class,
                 reg_rr=1e-2,
                 reg_lr=1e-2,
                 maxiter=100,
                 max_diff_ll=1e-5,
                 optmethod='BFGS'):
        """
		Training the MME model
		:param x: input data (nb_data, dim_x)
		:param y: output data (nb_data, dim_y)
		:param y_class: classes labels of outputs (nb_data, nb_classes)
		:param reg_rr: regularization term of the experts
		:param reg_lr: regularization term of the gate
		:param maxiter: maximum number of iterations for the EM algorithm
		:param max_diff_ll: maximum difference of log-likelihood for the EM algorithm
		:param optmethod: optimization method for the logistic regression (gate)
		"""
        nb_data = x.shape[0]
        dX = x.shape[1:]
        self.nb_dim_x = len(dX)
        self.nb_class = y_class.shape[1]
        self.nb_dim = y.shape[1]

        if type(self.rank_e) is not list:
            self.rank_e = [self.rank_e for i in range(self.nb_class)]

        # Experts initialization
        self.alpha = []
        self.W = []

        for rank_e in self.rank_e:
            tensRR = TensorRidgeRegression(rank=rank_e)
            tensRR.training(x, y)
            self.alpha.append(tensRR.alpha[:])
            self.W.append(tensRR.W)

        self.sigma = [np.eye(self.nb_dim) for i in range(0, self.nb_class)]

        # Gating initialization
        tensRR = TensorRidgeRegression(rank=self.rank_g)
        tensRR.training(x, y_class)
        beta_init = np.array(tensRR.alpha).T
        Vall = beta_init
        for m in range(self.nb_dim_x):
            wTmpRR = []
            for c in range(self.nb_class):
                wTmpRR.append(tensRR.W[c][m])
            Vtmp = np.reshape(np.array(wTmpRR),
                              (self.nb_class, dX[m] * self.rank_g)).T
            Vall = np.vstack((Vall, Vtmp))
        vinit = Vall.flatten()

        self.beta, self.V = optTensLogReg(x,
                                          y_class,
                                          rank=self.rank_g,
                                          v_init=vinit,
                                          optmethod=optmethod,
                                          reg_fact=reg_lr)

        # EM algorithm
        nb_min_steps = 2  # min num iterations
        nb_max_steps = maxiter  # max iterations

        LL = np.zeros(nb_max_steps)

        for it in range(nb_max_steps):
            # E - step
            Ltmp = np.zeros((self.nb_class, nb_data))

            # Compute gate probabilities
            inProdEst = np.zeros((nb_data, self.nb_class))
            for n in range(0, nb_data):
                for c in range(0, self.nb_class):
                    vTmp = khatriRaoProd(self.V[1][c], self.V[0][c])
                    for i in range(2, self.nb_dim_x):
                        vTmp = khatriRaoProd(self.V[i][c], vTmp)

                    vVec = np.dot(vTmp, np.ones((self.rank_g, 1)))

                    inProdEst[n, c] = self.beta[c] + np.dot(
                        vVec[:, None].T, x[n].flatten())

            priors = softmax(inProdEst).T

            # Compute experts distributions weighted by gate probabilities
            for c in range(0, self.nb_class):
                alpha = self.alpha[c][:]
                wmsTmp = self.W[c][0]

                # Compute vec(W)
                wVec = []
                for j in range(self.nb_dim):
                    wTmp = khatriRaoProd(wmsTmp[1], wmsTmp[0])
                    for k in range(2, self.nb_dim_x):
                        wTmp = khatriRaoProd(wmsTmp[k], wTmp)

                    wVec.append(np.dot(wTmp, np.ones((self.rank_e[c], 1))))

                # Compute predictions
                yhat_tmp = np.zeros((nb_data, self.nb_dim))
                for n in range(0, nb_data):
                    for dim in range(0, self.nb_dim):
                        yhat_tmp[n, dim] = alpha[dim] + np.dot(
                            wVec[dim][:, None].T, x[n].flatten())

                # Likelihood
                Ltmp[c] = priors[c] * multi_variate_normal(
                    y, yhat_tmp, self.sigma[c], log=False)

            # Compute  responsabilities
            GAMMA = Ltmp / (np.sum(Ltmp, axis=0) + 1e-100)

            LL[it] = np.sum(np.sum(GAMMA * np.log(Ltmp + 1e-100)))

            # M-step
            # Experts parameters update
            yhat = []
            for c in range(0, self.nb_class):
                r = np.diag(GAMMA[c])
                sqrGAMMA = np.sqrt(GAMMA[c])

                weighted_y = sqrGAMMA[:, None] * y
                for d in range(len(x.shape) - 1):
                    sqrGAMMA = np.expand_dims(sqrGAMMA, axis=-1)
                weighted_x = sqrGAMMA * x

                tensRR = None
                tensRR = TensorRidgeRegression(rank=self.rank_e[c])
                tensRR.training(weighted_x, weighted_y, reg=reg_rr)

                self.alpha[c] = tensRR.alpha[:]
                self.W[c] = tensRR.W

                yhat_tmp = tensRR.testing_multiple(x)
                yhat.append(yhat_tmp * priors[c][:, None])

                self.sigma[c] = np.dot(np.dot(
                    (y - yhat_tmp).T, r), (y - yhat_tmp)) / sum(
                        GAMMA[c]) + 1e-6 * np.eye(self.nb_dim)

            # Gate parameters update
            beta_init = np.array(self.beta).T
            Vall = beta_init
            for m in range(self.nb_dim_x):
                Vtmp = np.reshape(np.array(self.V[m]),
                                  (self.nb_class, dX[m] * self.rank_g)).T
                Vall = np.vstack((Vall, Vtmp))
            vinit = Vall.flatten()

            self.beta, self.V = optTensLogReg(x,
                                              GAMMA.T,
                                              rank=self.rank_g,
                                              v_init=vinit,
                                              optmethod=optmethod,
                                              reg_fact=reg_lr)

            print(it)
            # Check for convergence
            if it > nb_min_steps:
                if LL[it] - LL[it - 1] < max_diff_ll:
                    print('Converged after %d iterations: %.3e' % (it, LL[it]),
                          'red', 'on_white')
                    print(LL)
                    return LL[it]

        print(
            "TME did not converge before reaching max iteration. Consider augmenting the number of max iterations."
        )
        print(LL)
        return LL[-1]
Exemple #7
0
    def training(self, x, y, reg=1e-2, maxDiffCrit=1e-4, maxIter=200):
        """
		Train the parameters of the MRR model
		:param x: input matrices (nb_data, dim1, dim2)
		:param y: output vectors (nb_data, dim_y)
		:param reg: regularization term
		:param maxDiffCrit: stopping criterion for the alternative least squares procedure
		:param maxIter: maximum number of iterations for the alternative least squares procedure
		"""
        # Dimensions
        N = x.shape[0]
        d1 = x.shape[1]
        d2 = x.shape[2]
        self.dY = y.shape[1]

        for dim in range(0, self.dY):
            # Initialization
            # self.b1.append(np.random.randn(d1, self.rank))
            # self.b2.append(np.random.randn(d2, self.rank))
            # self.alpha.append(np.random.randn(1))
            self.b1.append(np.ones((d1, self.rank)))
            self.b2.append(np.ones((d2, self.rank)))
            self.alpha.append(np.zeros(1))
            self.bVec.append(np.random.randn(d1 * d2, 1))

            # Optimization of parameters (ALS procedure)
            nbIter = 1
            prevRes = 0

            while nbIter < maxIter:
                # Update b1
                zVec1 = np.zeros((N, d1 * self.rank))
                for n in range(0, N):
                    zVec1[n] = np.dot(x[n], self.b2[-1]).flatten()
                b1 = np.linalg.solve(
                    zVec1.T.dot(zVec1) + np.eye(d1 * self.rank) * reg,
                    zVec1.T).dot(y[:, dim] - self.alpha[-1])
                self.b1[-1] = np.reshape(b1, (d1, self.rank))

                # Update b2
                zVec2 = np.zeros((N, d2 * self.rank))
                for n in range(0, N):
                    zVec2[n] = np.dot(x[n].T, self.b1[-1]).flatten()
                b2 = np.linalg.solve(
                    zVec2.T.dot(zVec2) + np.eye(d2 * self.rank) * reg,
                    zVec2.T).dot(y[:, dim] - self.alpha[-1])
                self.b2[-1] = np.reshape(b2, (d2, self.rank))

                # Update alpha
                self.bVec[-1] = np.dot(khatriRaoProd(self.b2[-1], self.b1[-1]),
                                       np.ones((self.rank, 1)))
                alpha = 0
                for n in range(0, N):
                    alpha += y[n, dim] - np.dot(self.bVec[-1][:, None].T,
                                                x[n].flatten())
                self.alpha[-1] = alpha[0] / N

                # Compute residuals
                res = 0
                for n in range(0, N):
                    res += (
                        y[n, dim] - self.alpha[-1] -
                        np.dot(self.bVec[-1][:, None].T, x[n].flatten()))**2

                resDiff = prevRes - res

                # Check convergence
                if resDiff < maxDiffCrit and nbIter > 1:
                    print('MRR converged after %d iterations.' % nbIter)
                    break
                nbIter += 1
                prevRes = res

            if resDiff > maxDiffCrit:
                print('MRR did not converged after %d iterations.' % nbIter)
Exemple #8
0
    def training(self,
                 x,
                 y,
                 y_class,
                 reg_rr=1e-2,
                 reg_lr=1e-2,
                 maxiter=100,
                 max_diff_ll=1e-5,
                 optmethod='BFGS'):
        """
		Training the MME model
		:param x: input data (nb_data, dim_x)
		:param y: output data (nb_data, dim_y)
		:param y_class: classes labels of outputs (nb_data, nb_classes)
		:param reg_rr: regularization term of the experts
		:param reg_lr: regularization term of the gate
		:param maxiter: maximum number of iterations for the EM algorithm
		:param max_diff_ll: maximum difference of log-likelihood for the EM algorithm
		:param optmethod: optimization method for the logistic regression (gate)
		:return:
		"""
        nb_data = x.shape[0]
        d1 = x.shape[1]
        d2 = x.shape[2]
        self.nb_class = y_class.shape[1]
        self.nb_dim = y.shape[1]

        if type(self.rank_e) is not list:
            self.rank_e = [self.rank_e for i in range(self.nb_class)]

        # Initialization with sklearn
        # ysk = np.sum(y.T * np.array(range(0, self.nb_class))[:, None], axis=0)
        # mul_lr = sk.linear_model.LogisticRegression(multi_class='multinomial', solver='newton-cg').fit(xvec, ysk)
        # self.V = mul_lr.coef_.T
        # self.V = optLogReg(xvec.T, y.T)

        # matRR = MatrixRidgeRegression(rank=self.rank_e)
        # matRR.training(x, y)
        #
        # self.alpha = [matRR.alpha[:] for i in range(0, self.nb_class)]
        # self.b1 = [matRR.b1[:] for i in range(0, self.nb_class)]
        # self.b2 = [matRR.b2[:] for i in range(0, self.nb_class)]

        # Experts initialization
        self.alpha = []
        self.b1 = []
        self.b2 = []

        for rank_e in self.rank_e:
            matRR = MatrixRidgeRegression(rank=rank_e)
            matRR.training(x, y)
            self.alpha.append(matRR.alpha[:])
            self.b1.append(matRR.b1[:])
            self.b2.append(matRR.b2[:])

        self.sigma = [np.eye(self.nb_dim) for i in range(0, self.nb_class)]

        # Gating initialization
        matRR = MatrixRidgeRegression(rank=self.rank_g)
        matRR.training(x, y_class)
        xhi_init = np.array(matRR.alpha).T
        phi_init = np.reshape(np.array(matRR.b1),
                              (self.nb_class, d1 * self.rank_g)).T
        psi_init = np.reshape(np.array(matRR.b2),
                              (self.nb_class, d2 * self.rank_g)).T
        vinit = np.vstack((xhi_init, phi_init, psi_init)).flatten()
        self.xhi, self.phi, self.psi = optMatLogReg(x,
                                                    y_class,
                                                    rank=self.rank_g,
                                                    v_init=vinit,
                                                    optmethod=optmethod,
                                                    reg_fact=reg_lr)

        # EM algorithm
        nb_min_steps = 2  # min num iterations
        nb_max_steps = maxiter  # max iterations

        LL = np.zeros(nb_max_steps)

        for it in range(nb_max_steps):
            # E - step
            Ltmp = np.zeros((self.nb_class, nb_data))

            # Compute gate probabilities
            inProdEst = np.zeros((nb_data, self.nb_class))
            for n in range(0, nb_data):
                for dim in range(0, self.nb_class):
                    phipsiVec = np.dot(
                        khatriRaoProd(self.psi[dim], self.phi[dim]),
                        np.ones((self.rank_g, 1)))
                    inProdEst[n, dim] = self.xhi[dim] + np.dot(
                        phipsiVec[:, None].T, x[n].flatten())

            priors = softmax(inProdEst).T

            # Compute experts distributions weighted by gate probabilities
            for i in range(0, self.nb_class):
                alpha = self.alpha[i][:]
                b1tmp = self.b1[i][:]
                b2tmp = self.b2[i][:]
                bVec = [
                    np.dot(khatriRaoProd(b2tmp[j], b1tmp[j]),
                           np.ones((self.rank_e[i], 1)))
                    for j in range(self.nb_dim)
                ]

                yhat_tmp = np.zeros((nb_data, self.nb_dim))
                for n in range(0, nb_data):
                    for dim in range(0, self.nb_dim):
                        yhat_tmp[n, dim] = alpha[dim] + np.dot(
                            bVec[dim][:, None].T, x[n].flatten())

                Ltmp[i] = priors[i] * multi_variate_normal(
                    y, yhat_tmp, self.sigma[i], log=False)

            # Compute  responsabilities
            GAMMA = Ltmp / (np.sum(Ltmp, axis=0) + 1e-100)

            LL[it] = np.sum(np.sum(GAMMA * np.log(Ltmp + 1e-100)))

            # M-step
            # Experts parameters update
            yhat = []
            for i in range(0, self.nb_class):
                r = np.diag(GAMMA[i])
                sqrGAMMA = np.sqrt(GAMMA[i])
                weighted_x = sqrGAMMA[:, None, None] * x
                weighted_y = sqrGAMMA[:, None] * y

                matRR = None
                matRR = MatrixRidgeRegression(rank=self.rank_e[i])
                matRR.training(weighted_x, weighted_y, reg=reg_rr)

                self.alpha[i] = matRR.alpha[:]
                self.b1[i] = matRR.b1[:]
                self.b2[i] = matRR.b2[:]

                yhat_tmp = matRR.testing_multiple(x)
                yhat.append(yhat_tmp * priors[i][:, None])

                self.sigma[i] = np.dot(np.dot(
                    (y - yhat_tmp).T, r), (y - yhat_tmp)) / sum(
                        GAMMA[i]) + 1e-6 * np.eye(self.nb_dim)

            # Gate parameters update
            xhi_init = np.array(self.xhi).T
            phi_init = np.reshape(np.array(self.phi),
                                  (self.nb_class, d1 * self.rank_g)).T
            psi_init = np.reshape(np.array(self.psi),
                                  (self.nb_class, d2 * self.rank_g)).T
            vinit = np.vstack((xhi_init, phi_init, psi_init)).flatten()
            self.xhi, self.phi, self.psi = optMatLogReg(x,
                                                        GAMMA.T,
                                                        rank=self.rank_g,
                                                        v_init=vinit,
                                                        optmethod=optmethod,
                                                        reg_fact=reg_lr)

            print(it)
            # Check for convergence
            if it > nb_min_steps:
                if LL[it] - LL[it - 1] < max_diff_ll:
                    print('Converged after %d iterations: %.3e' % (it, LL[it]),
                          'red', 'on_white')
                    print(LL)
                    return LL[it]

        print(
            "MME did not converge before reaching max iteration. Consider augmenting the number of max iterations."
        )
        print(LL)
        return LL[-1]
Exemple #9
0
    def training(self, x, y, reg=1e-2, maxDiffCrit=1e-4, maxIter=200):
        """
		Train the parameters of the MRR model
		:param x: input matrices (nb_data, dim1, dim2, ...)
		:param y: output vectors (nb_data, dim_y)
		:param reg: regularization term
		:param maxDiffCrit: stopping criterion for the alternative least squares procedure
		:param maxIter: maximum number of iterations for the alternative least squares procedure
		"""
        # Dimensions
        N = x.shape[0]
        dX = x.shape[1:]
        self.dY = y.shape[1]

        for dim in range(0, self.dY):
            # Initialization
            wms = []
            for m in range(len(dX)):
                wms.append(np.ones((dX[m], self.rank)))

            self.alpha.append(np.zeros(1))
            self.wVec.append(np.reshape(np.zeros(dX), -1))

            # Optimization of parameters (ALS procedure)
            nbIter = 1
            prevRes = 0

            while nbIter < maxIter:
                for m in range(len(dX)):
                    # Compute Wm complement (WM o ... o Wm+1 o Wm-1 o ... o W1)
                    if m is 0:
                        wmComplement = wms[1]
                        for i in range(2, len(dX)):
                            wmComplement = khatriRaoProd(wms[i], wmComplement)
                    else:
                        wmComplement = wms[0]
                        for i in range(1, len(dX)):
                            if i != m:
                                wmComplement = khatriRaoProd(
                                    wms[i], wmComplement)

                    # Update Wm
                    zVec = np.zeros((N, dX[m] * self.rank))
                    for n in range(0, N):
                        zVec[n] = np.dot(tensor2mat(x[n], m),
                                         wmComplement).flatten()
                    wm = np.linalg.solve(
                        zVec.T.dot(zVec) + np.eye(dX[m] * self.rank) * reg,
                        zVec.T).dot(y[:, dim] - self.alpha[-1])
                    wms[m] = np.reshape(wm, (dX[m], self.rank))

                # Update alpha
                wTmp = khatriRaoProd(wms[1], wms[0])
                for i in range(2, len(dX)):
                    wTmp = khatriRaoProd(wms[i], wTmp)

                self.wVec[-1] = np.dot(wTmp, np.ones((self.rank, 1)))
                alpha = 0
                for n in range(0, N):
                    alpha += y[n, dim] - np.dot(self.wVec[-1][:, None].T,
                                                x[n].flatten())
                self.alpha[-1] = alpha[0] / N

                # Compute residuals
                res = 0
                for n in range(0, N):
                    res += (
                        y[n, dim] - self.alpha[-1] -
                        np.dot(self.wVec[-1][:, None].T, x[n].flatten()))**2

                resDiff = prevRes - res

                # Check convergence
                if resDiff < maxDiffCrit and nbIter > 1:
                    print('TRR converged after %d iterations.' % nbIter)
                    break
                nbIter += 1
                prevRes = res

            if resDiff > maxDiffCrit:
                print('TRR did not converged after %d iterations.' % nbIter)

            self.W.append(wms)
Exemple #10
0
	# Tensor-valued mixture of experts
	print('Tensor-valued mixture of experts...')
	tme = TensorMixtureLinearExperts([tme_rank_e, tme_rank_e], tme_rank_g)
	tme_LL = tme.training(X, y, y_class, reg_rr=1e-1, reg_lr=1e-1, maxiter=20, max_diff_ll=5.0, optmethod='CG')

	tme_coeffs = []
	tme_coeffs_lr = []
	for c in range(0, tme.nb_class):
		# TRR part
		alpha = tme.alpha[c][:]
		wmsTmp = tme.W[c][0]
		# Compute vec(W)
		tme_bVec = []
		for j in range(tme.nb_dim):
			wTmp = khatriRaoProd(wmsTmp[1], wmsTmp[0])
			for k in range(2, tme.nb_dim_x):
				wTmp = khatriRaoProd(wmsTmp[k], wTmp)

			tme_bVec.append(np.dot(wTmp, np.ones((tme.rank_e[c], 1))))

		tme_coeffs.append(np.reshape(tme_bVec[0], tuple_dim))

		# TLR part
		vTmp = khatriRaoProd(tme.V[1][c], tme.V[0][c])
		for j in range(2, tme.nb_dim_x):
			vTmp = khatriRaoProd(tme.V[j][c], vTmp)
		tme_vVec = np.dot(vTmp, np.ones((tme.rank_g, 1)))
		tme_coeffs_lr.append(np.reshape(tme_vVec, tuple_dim))

	rmse_tme_coeffs_rr = np.sqrt(np.sum([(np.sum((bVec[i] - tme_coeffs[i].flatten()) ** 2))
Exemple #11
0
	tot_rmse = np.sum([(np.sum((bVec[i] - me_coeffs[i].flatten()) ** 2))
					   for i in range(Nclass)]) + np.sum((np.sum((phipsiVec - me_coeffs_lr[0].flatten()) ** 2)))
	tot_rmse /= Nclass * d1 * d2 + d1 * d2
	rmse_me_coeffs = tot_rmse

	# Matrix-valued mixture of experts
	print('Matrix-valued mixture of experts...')
	mme = MaxtrixMixtureLinearExperts([mme_rank_e, mme_rank_e], mme_rank_g)
	mme_LL = mme.training(X, y, y_class, reg_rr=1e-1, reg_lr=1e-1, maxiter=20, max_diff_ll=5.0, optmethod='CG')
	mme_coeffs = []
	mme_coeffs_lr = []
	for i in range(0, mme.nb_class):
		alpha = mme.alpha[i][:]
		b1tmp = mme.b1[i][:]
		b2tmp = mme.b2[i][:]
		mme_bVec = [np.dot(khatriRaoProd(b2tmp[j], b1tmp[j]), np.ones((mme.rank_e[i], 1))) for j in range(mme.nb_dim)]
		mme_coeffs.append(np.reshape(mme_bVec[0], (d1, d2)))
		mme_phipsiVec = np.dot(khatriRaoProd(mme.psi[i], mme.phi[i]), np.ones((mme.rank_g, 1)))
		mme_coeffs_lr.append(np.reshape(mme_phipsiVec, (d1, d2)))

	rmse_mme_coeffs_rr = np.sqrt(np.sum([(np.sum((bVec[i] - mme_coeffs[i].flatten()) ** 2))
										 for i in range(Nclass)]) / (Nclass * d1 * d2))

	rmse_mme_coeffs_lr = np.sqrt(np.sum((np.sum((phipsiVec - mme_coeffs_lr[0].flatten()) ** 2))) / (d1 * d2))

	tot_rmse = np.sum([(np.sum((bVec[i] - mme_coeffs[i].flatten()) ** 2)) for i in range(Nclass)]) + \
			   np.sum((np.sum((phipsiVec - mme_coeffs_lr[0].flatten()) ** 2)))
	tot_rmse /= Nclass * d1 * d2 + d1 * d2
	rmse_mme_coeffs = np.sqrt(tot_rmse)

	# Show recovered coefficients
    def grad(v):
        # Recover parameters from vector v
        Vall = np.reshape(v, (nb_dim * rank + 1, nb_class))
        beta = Vall[0]
        V = []
        start = 1
        end = dims[0] * rank + 1
        for m in range(len(dims)):
            Vtmp = [
                np.reshape(Vall[start:end, i], (dims[m], rank))
                for i in range(nb_class)
            ]
            V.append(Vtmp)

            start = end
            if m < len(dims) - 1:
                end += dims[m + 1] * rank

        # Compute probabilities
        inProdEst = np.zeros((nb_data, nb_class))
        for n in range(0, nb_data):
            for c in range(0, nb_class):
                vTmp = khatriRaoProd(V[1][c], V[0][c])
                for i in range(2, len(dims)):
                    vTmp = khatriRaoProd(V[i][c], vTmp)
                vVec = np.dot(vTmp, np.ones((rank, 1)))
                inProdEst[n, c] = beta[c] + np.dot(vVec[:, None].T,
                                                   x[n].flatten())

        est = softmax(inProdEst)

        # Compute gradients
        grad_beta = np.sum(est - y, axis=0).flatten()

        grad_vec = grad_beta.flatten()

        gradV = [
            np.zeros((dims[m] * rank, nb_class)) for m in range(len(dims))
        ]
        for c in range(nb_class):
            for m in range(len(dims)):
                # Compute Vm complement (VM o ... o Vm+1 o Vm-1 o ... o V1)
                if m is 0:
                    vmComplement = V[1][c]
                    for i in range(2, len(dims)):
                        vmComplement = khatriRaoProd(V[i][c], vmComplement)
                else:
                    vmComplement = V[0][c]
                    for i in range(1, len(dims)):
                        if i != m:
                            vmComplement = khatriRaoProd(V[i][c], vmComplement)

                # Gradient
                zVec = np.zeros((nb_data, dims[m] * rank))
                for n in range(0, nb_data):
                    zVec[n] = np.dot(tensor2mat(x[n], m),
                                     vmComplement).flatten()

                gradV[m][:, c] = np.dot(zVec.T, (est[:, c] - y[:, c]))

                # Regularization term
                gradV[m][:, c] += 2 * reg_fact * V[m][c].flatten()

        for m in range(len(dims)):
            grad_vec = np.hstack((grad_vec, gradV[m].flatten()))

        return grad_vec