예제 #1
0
    def sym_mask_logdensity_estimator_intermediate(self, x, mask):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert non_linearity_name == "sigmoid" or non_linearity_name == "RLU"
        x = x.T  # BxD
        mask = mask.T  # BxD
        output_mask = constantX(1) - mask  # BxD
        D = constantX(self.n_visible)
        d = mask.sum(1)  # d is the 1-based index of the dimension whose value to infer (not the size of the context)
        masked_input = x * mask  # BxD
        h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1)  # BxH
        for l in xrange(self.n_layers - 1):
            h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l])  # BxH
        z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha)
        z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu)
        z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma)
        temp = T.exp(z_alpha)  # + 1e-6
        # temp += T.shape_padright(temp.sum(2)/1e-3)
        Alpha = temp / T.shape_padright(temp.sum(2))  # BxDxC
        Mu = z_mu  # BxDxC
        Sigma = T.exp(z_sigma)  # + 1e-6 #BxDxC

        # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Mu = Mu * T.shape_padright(output_mask)
        # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC

        Phi = (
            -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma)
            - T.log(Sigma)
            - constantX(0.5 * np.log(2 * np.pi))
        )  # BxDxC
        logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d)
        return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)
예제 #2
0
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu,
                                  b_mu, V_sigma, b_sigma, activation_factor,
                                  a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], theano.config.floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -T.log(
                2 * Sigma) - T.abs_(Mu - T.shape_padright(x_i, 1)) / Sigma
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = log_sum_exp(wPhi)
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=theano.config.floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = Pi * T.sgn(T.shape_padright(x_i, 1) - Mu) / Sigma
            # dp_dz_mu = dp_dz_mu * Sigma
            gb_mu = dp_dz_mu.mean(0, dtype=theano.config.floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            dp_dz_sigma = Pi * (T.abs_(T.shape_padright(x_i, 1) - Mu) / Sigma -
                                1)
            gb_sigma = dp_dz_sigma.mean(0, dtype=theano.config.floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(
                dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if non_linearity_name == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif non_linearity_name == "RLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(
                0, dtype=theano.config.floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i -
                    T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current, dP_da_i, gW, gb_alpha, gV_alpha,
                    gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
예제 #3
0
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma)
            return (p, a, x)
예제 #4
0
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], theano.config.floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x_i, 1)) / Sigma
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = log_sum_exp(wPhi)
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=theano.config.floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = Pi * T.sgn(T.shape_padright(x_i, 1) - Mu) / Sigma
            # dp_dz_mu = dp_dz_mu * Sigma
            gb_mu = dp_dz_mu.mean(0, dtype=theano.config.floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            dp_dz_sigma = Pi * (T.abs_(T.shape_padright(x_i, 1) - Mu) / Sigma - 1)
            gb_sigma = dp_dz_sigma.mean(0, dtype=theano.config.floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if non_linearity_name == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif non_linearity_name == "RLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=theano.config.floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current,
                    dP_da_i,
                    gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
예제 #5
0
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu,
                                           V_sigma, b_sigma,
                                           activations_factor, p_prev, a_prev,
                                           x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1),
                               T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(
                T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp(
                (T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(
                T.log(Alpha) - T.log(2 * Sigma) -
                T.abs_(Mu - T.shape_padright(x, 1)) / Sigma)
            return (p, a, x)
예제 #6
0
    def sym_mask_logdensity_estimator_intermediate(self, x, mask):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert (non_linearity_name == "sigmoid" or non_linearity_name == "RLU")
        x = x.T  # BxD
        mask = mask.T  # BxD
        output_mask = constantX(1) - mask  # BxD
        D = constantX(self.n_visible)
        # d is the 1-based index of the dimension whose value to infer (not the
        # size of the context)
        d = mask.sum(1)
        masked_input = x * mask  # BxD
        h = self.nonlinearity(
            T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) +
            self.b1)  # BxH
        for l in xrange(self.n_layers - 1):
            h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l])  # BxH
        z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(
            self.b_alpha)
        z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(
            self.b_mu)
        z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(
            self.b_sigma)
        temp = T.exp(z_alpha)  # + 1e-6
        # temp += T.shape_padright(temp.sum(2)/1e-3)
        Alpha = temp / T.shape_padright(temp.sum(2))  # BxDxC
        Mu = z_mu  # BxDxC
        Sigma = T.exp(z_sigma)  # + 1e-6 #BxDxC

        # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Mu = Mu * T.shape_padright(output_mask)
        # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) /
        # Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC

        Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma) - \
            T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi))  # BxDxC
        logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) *
                      output_mask).sum(1) * D / (D - d)
        return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)