コード例 #1
0
 def dense_layer(layer_in,
                 n,
                 dist_w=init.GlorotNormal,
                 dist_b=init.Normal):
     dense = DenseLayer(layer_in, n, dist_w(hid_w), dist_b(init_w),
                        None)
     if batchnorm:
         dense = BatchNormLayer(dense)
     return NonlinearityLayer(dense, self.transf)
コード例 #2
0
    def __init__(self, n_x, n_a, n_z, n_y, qa_hid, qz_hid, qy_hid, px_hid, pa_hid, nonlinearity=rectify,
                 px_nonlinearity=None, x_dist='bernoulli', batchnorm=False, seed=1234):
        """
        Initialize an skip deep generative model consisting of
        discriminative classifier q(y|a,x),
        generative model P p(a|z,y) and p(x|a,z,y),
        inference model Q q(a|x) and q(z|a,x,y).
        Weights are initialized using the Bengio and Glorot (2010) initialization scheme.
        :param n_x: Number of inputs.
        :param n_a: Number of auxiliary.
        :param n_z: Number of latent.
        :param n_y: Number of classes.
        :param qa_hid: List of number of deterministic hidden q(a|x).
        :param qz_hid: List of number of deterministic hidden q(z|a,x,y).
        :param qy_hid: List of number of deterministic hidden q(y|a,x).
        :param px_hid: List of number of deterministic hidden p(a|z,y) & p(x|z,y).
        :param nonlinearity: The transfer function used in the deterministic layers.
        :param x_dist: The x distribution, 'bernoulli', 'multinomial', or 'gaussian'.
        :param batchnorm: Boolean value for batch normalization.
        :param seed: The random seed.
        """
        super(SDGMSSL, self).__init__(n_x, qz_hid + px_hid, n_a + n_z, nonlinearity)
        self.x_dist = x_dist
        self.n_y = n_y
        self.n_x = n_x
        self.n_a = n_a
        self.n_z = n_z
        self.batchnorm = batchnorm
        self._srng = RandomStreams(seed)

        # Decide Glorot initializaiton of weights.
        init_w = 1e-3
        hid_w = ""
        if nonlinearity == rectify or nonlinearity == softplus:
            hid_w = "relu"

        # Define symbolic variables for theano functions.
        self.sym_beta = T.scalar('beta')  # scaling constant beta
        self.sym_x_l = T.matrix('x')  # labeled inputs
        self.sym_t_l = T.matrix('t')  # labeled targets
        self.sym_x_u = T.matrix('x')  # unlabeled inputs
        self.sym_bs_l = T.iscalar('bs_l')  # number of labeled data
        self.sym_samples = T.iscalar('samples')  # MC samples
        self.sym_z = T.matrix('z')  # latent variable z
        self.sym_a = T.matrix('a')  # auxiliary variable a

        # Assist methods for collecting the layers
        def dense_layer(layer_in, n, dist_w=init.GlorotNormal, dist_b=init.Normal):
            dense = DenseLayer(layer_in, n, dist_w(hid_w), dist_b(init_w), None)
            if batchnorm:
                dense = BatchNormLayer(dense)
            return NonlinearityLayer(dense, self.transf)

        def stochastic_layer(layer_in, n, samples, nonlin=None):
            mu = DenseLayer(layer_in, n, init.Normal(init_w), init.Normal(init_w), nonlin)
            logvar = DenseLayer(layer_in, n, init.Normal(init_w), init.Normal(init_w), nonlin)
            return SampleLayer(mu, logvar, eq_samples=samples, iw_samples=1), mu, logvar

        # Input layers
        l_x_in = InputLayer((None, n_x))
        l_y_in = InputLayer((None, n_y))

        # Auxiliary q(a|x)
        l_qa_x = l_x_in
        for hid in qa_hid:
            l_qa_x = dense_layer(l_qa_x, hid)
        l_qa_x, l_qa_x_mu, l_qa_x_logvar = stochastic_layer(l_qa_x, n_a, self.sym_samples)

        # Classifier q(y|a,x)
        l_qa_to_qy = DenseLayer(l_qa_x, qy_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_qa_to_qy = ReshapeLayer(l_qa_to_qy, (-1, self.sym_samples, 1, qy_hid[0]))
        l_x_to_qy = DenseLayer(l_x_in, qy_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_x_to_qy = DimshuffleLayer(l_x_to_qy, (0, 'x', 'x', 1))
        l_qy_xa = ReshapeLayer(ElemwiseSumLayer([l_qa_to_qy, l_x_to_qy]), (-1, qy_hid[0]))
        if batchnorm:
            l_qy_xa = BatchNormLayer(l_qy_xa)
        l_qy_xa = NonlinearityLayer(l_qy_xa, self.transf)
        if len(qy_hid) > 1:
            for hid in qy_hid[1:]:
                l_qy_xa = dense_layer(l_qy_xa, hid)
        l_qy_xa = DenseLayer(l_qy_xa, n_y, init.GlorotNormal(), init.Normal(init_w), softmax)

        # Recognition q(z|x,a,y)
        l_qa_to_qz = DenseLayer(l_qa_x, qz_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_qa_to_qz = ReshapeLayer(l_qa_to_qz, (-1, self.sym_samples, 1, qz_hid[0]))
        l_x_to_qz = DenseLayer(l_x_in, qz_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_x_to_qz = DimshuffleLayer(l_x_to_qz, (0, 'x', 'x', 1))
        l_y_to_qz = DenseLayer(l_y_in, qz_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_y_to_qz = DimshuffleLayer(l_y_to_qz, (0, 'x', 'x', 1))
        l_qz_axy = ReshapeLayer(ElemwiseSumLayer([l_qa_to_qz, l_x_to_qz, l_y_to_qz]), (-1, qz_hid[0]))
        if batchnorm:
            l_qz_axy = BatchNormLayer(l_qz_axy)
        l_qz_axy = NonlinearityLayer(l_qz_axy, self.transf)
        if len(qz_hid) > 1:
            for hid in qz_hid[1:]:
                l_qz_axy = dense_layer(l_qz_axy, hid)
        l_qz_axy, l_qz_axy_mu, l_qz_axy_logvar = stochastic_layer(l_qz_axy, n_z, 1)

        # Generative p(a|z,y)
        l_y_to_pa = DenseLayer(l_y_in, pa_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_y_to_pa = DimshuffleLayer(l_y_to_pa, (0, 'x', 'x', 1))
        l_qz_to_pa = DenseLayer(l_qz_axy, pa_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_qz_to_pa = ReshapeLayer(l_qz_to_pa, (-1, self.sym_samples, 1, pa_hid[0]))
        l_pa_zy = ReshapeLayer(ElemwiseSumLayer([l_qz_to_pa, l_y_to_pa]), [-1, pa_hid[0]])
        if batchnorm:
            l_pa_zy = BatchNormLayer(l_pa_zy)
        l_pa_zy = NonlinearityLayer(l_pa_zy, self.transf)
        if len(pa_hid) > 1:
            for hid in pa_hid[1:]:
                l_pa_zy = dense_layer(l_pa_zy, hid)
        l_pa_zy, l_pa_zy_mu, l_pa_zy_logvar = stochastic_layer(l_pa_zy, n_a, 1)

        # Generative p(x|a,z,y)
        l_qa_to_px = DenseLayer(l_qa_x, px_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_qa_to_px = ReshapeLayer(l_qa_to_px, (-1, self.sym_samples, 1, px_hid[0]))
        l_y_to_px = DenseLayer(l_y_in, px_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_y_to_px = DimshuffleLayer(l_y_to_px, (0, 'x', 'x', 1))
        l_qz_to_px = DenseLayer(l_qz_axy, px_hid[0], init.GlorotNormal(hid_w), init.Normal(init_w), None)
        l_qz_to_px = ReshapeLayer(l_qz_to_px, (-1, self.sym_samples, 1, px_hid[0]))
        l_px_azy = ReshapeLayer(ElemwiseSumLayer([l_qa_to_px, l_qz_to_px, l_y_to_px]), [-1, px_hid[0]])
        if batchnorm:
            l_px_azy = BatchNormLayer(l_px_azy)
        l_px_azy = NonlinearityLayer(l_px_azy, self.transf)
        if len(px_hid) > 1:
            for hid in px_hid[1:]:
                l_px_azy = dense_layer(l_px_azy, hid)

        if x_dist == 'bernoulli':
            l_px_azy = DenseLayer(l_px_azy, n_x, init.GlorotNormal(), init.Normal(init_w), sigmoid)
        elif x_dist == 'multinomial':
            l_px_azy = DenseLayer(l_px_azy, n_x, init.GlorotNormal(), init.Normal(init_w), softmax)
        elif x_dist == 'gaussian':
            l_px_azy, l_px_zy_mu, l_px_zy_logvar = stochastic_layer(l_px_azy, n_x, 1, px_nonlinearity)

        # Reshape all the model layers to have the same size
        self.l_x_in = l_x_in
        self.l_y_in = l_y_in
        self.l_a_in = l_qa_x

        self.l_qa = ReshapeLayer(l_qa_x, (-1, self.sym_samples, 1, n_a))
        self.l_qa_mu = DimshuffleLayer(l_qa_x_mu, (0, 'x', 'x', 1))
        self.l_qa_logvar = DimshuffleLayer(l_qa_x_logvar, (0, 'x', 'x', 1))

        self.l_qz = ReshapeLayer(l_qz_axy, (-1, self.sym_samples, 1, n_z))
        self.l_qz_mu = ReshapeLayer(l_qz_axy_mu, (-1, self.sym_samples, 1, n_z))
        self.l_qz_logvar = ReshapeLayer(l_qz_axy_logvar, (-1, self.sym_samples, 1, n_z))

        self.l_qy = ReshapeLayer(l_qy_xa, (-1, self.sym_samples, 1, n_y))

        self.l_pa = ReshapeLayer(l_pa_zy, (-1, self.sym_samples, 1, n_a))
        self.l_pa_mu = ReshapeLayer(l_pa_zy_mu, (-1, self.sym_samples, 1, n_a))
        self.l_pa_logvar = ReshapeLayer(l_pa_zy_logvar, (-1, self.sym_samples, 1, n_a))

        self.l_px = ReshapeLayer(l_px_azy, (-1, self.sym_samples, 1, n_x))
        self.l_px_mu = ReshapeLayer(l_px_zy_mu, (-1, self.sym_samples, 1, n_x)) if x_dist == "gaussian" else None
        self.l_px_logvar = ReshapeLayer(l_px_zy_logvar,
                                        (-1, self.sym_samples, 1, n_x)) if x_dist == "gaussian" else None

        # Predefined functions
        inputs = [self.sym_x_l, self.sym_samples]
        outputs = get_output(self.l_qy, self.sym_x_l, deterministic=True).mean(axis=(1, 2))
        self.f_qy = theano.function(inputs, outputs)

        inputs = [self.sym_x_l, self.sym_samples]
        outputs = get_output(self.l_qa, self.sym_x_l, deterministic=True).mean(axis=(1, 2))
        self.f_qa = theano.function(inputs, outputs)

        inputs = {l_qz_axy: self.sym_z, l_y_in: self.sym_t_l}
        outputs = get_output(self.l_pa, inputs, deterministic=True)
        self.f_pa = theano.function([self.sym_z, self.sym_t_l, self.sym_samples], outputs)

        inputs = {l_qa_x: self.sym_a, l_qz_axy: self.sym_z, l_y_in: self.sym_t_l}
        outputs = get_output(self.l_px, inputs, deterministic=True)
        self.f_px = theano.function([self.sym_a, self.sym_z, self.sym_t_l, self.sym_samples], outputs)

        # Define model parameters
        self.model_params = get_all_params([self.l_qy, self.l_pa, self.l_px])
        self.trainable_model_params = get_all_params([self.l_qy, self.l_pa, self.l_px], trainable=True)
コード例 #3
0
    def __init__(self,
                 n_c,
                 n_l,
                 n_a,
                 n_z,
                 n_y,
                 qa_hid,
                 qz_hid,
                 qy_hid,
                 px_hid,
                 pa_hid,
                 filters,
                 nonlinearity=rectify,
                 px_nonlinearity=None,
                 x_dist='bernoulli',
                 batchnorm=False,
                 seed=1234):
        """
        Initialize an skip deep generative model consisting of
        discriminative classifier q(y|a,x),
        generative model P p(a|z,y) and p(x|a,z,y),
        inference model Q q(a|x) and q(z|a,x,y).
        Weights are initialized using the Bengio and Glorot (2010) initialization scheme.
        :param n_c: Number of input channels.
        :param n_l: Number of lengths.
        :param n_a: Number of auxiliary.
        :param n_z: Number of latent.
        :param n_y: Number of classes.
        :param qa_hid: List of number of deterministic hidden q(a|x).
        :param qz_hid: List of number of deterministic hidden q(z|a,x,y).
        :param qy_hid: List of number of deterministic hidden q(y|a,x).
        :param px_hid: List of number of deterministic hidden p(a|z,y) & p(x|z,y).
        :param nonlinearity: The transfer function used in the deterministic layers.
        :param x_dist: The x distribution, 'bernoulli', 'multinomial', or 'gaussian'.
        :param batchnorm: Boolean value for batch normalization.
        :param seed: The random seed.
        """
        super(CSDGM, self).__init__(n_c, qz_hid + px_hid, n_a + n_z,
                                    nonlinearity)
        self.x_dist = x_dist
        self.n_y = n_y
        self.n_c = n_c
        self.n_l = n_l
        self.n_a = n_a
        self.n_z = n_z
        self.batchnorm = batchnorm
        self._srng = RandomStreams(seed)

        # Decide Glorot initializaiton of weights.
        init_w = 1e-3
        hid_w = ""
        if nonlinearity == rectify or nonlinearity == softplus:
            hid_w = "relu"

        pool_layers = []

        # Define symbolic variables for theano functions.
        self.sym_beta = T.scalar('beta')  # scaling constant beta
        self.sym_x_l = T.tensor3('x')  # labeled inputs
        self.sym_t_l = T.matrix('t')  # labeled targets
        self.sym_x_u = T.tensor3('x')  # unlabeled inputs
        self.sym_bs_l = T.iscalar('bs_l')  # number of labeled data
        self.sym_samples = T.iscalar('samples')  # MC samples
        self.sym_z = T.matrix('z')  # latent variable z
        self.sym_a = T.matrix('a')  # auxiliary variable a
        self.sym_warmup = T.fscalar('warmup')  # warmup to scale KL term

        # Assist methods for collecting the layers
        def dense_layer(layer_in,
                        n,
                        dist_w=init.GlorotNormal,
                        dist_b=init.Normal):
            dense = DenseLayer(layer_in, n, dist_w(hid_w), dist_b(init_w),
                               None)
            if batchnorm:
                dense = BatchNormLayer(dense)
            return NonlinearityLayer(dense, self.transf)

        def stochastic_layer(layer_in, n, samples, nonlin=None):
            mu = DenseLayer(layer_in, n, init.Normal(init_w),
                            init.Normal(init_w), nonlin)
            logvar = DenseLayer(layer_in, n, init.Normal(init_w),
                                init.Normal(init_w), nonlin)
            return SampleLayer(mu, logvar, eq_samples=samples,
                               iw_samples=1), mu, logvar

        def conv_layer(layer_in,
                       filter,
                       stride=(1, 1),
                       pool=1,
                       name='conv',
                       dist_w=init.GlorotNormal,
                       dist_b=init.Normal):
            l_conv = Conv2DLayer(layer_in,
                                 num_filters=filter,
                                 filter_size=(3, 1),
                                 stride=stride,
                                 pad='full',
                                 W=dist_w(hid_w),
                                 b=dist_b(init_w),
                                 name=name)
            if pool > 1:
                l_conv = MaxPool2DLayer(l_conv, pool_size=(pool, 1))
                pool_layers.append(l_conv)
            return l_conv

        # Input layers
        l_y_in = InputLayer((None, n_y))
        l_x_in = InputLayer((None, n_l, n_c), name='Input')

        # Reshape input
        l_x_in_reshp = ReshapeLayer(l_x_in, (-1, 1, n_l, n_c))
        print("l_x_in_reshp", l_x_in_reshp.output_shape)

        # CNN encoder implementation
        l_conv_enc = l_x_in_reshp
        for filter, stride, pool in filters:
            l_conv_enc = conv_layer(l_conv_enc, filter, stride, pool)
            print("l_conv_enc", l_conv_enc.output_shape)

        # Pool along last 2 axes
        l_global_pool_enc = GlobalPoolLayer(l_conv_enc, pool_function=T.mean)
        l_enc = dense_layer(l_global_pool_enc, n_z)
        print("l_enc", l_enc.output_shape)

        # Auxiliary q(a|x)
        l_qa_x = l_enc
        for hid in qa_hid:
            l_qa_x = dense_layer(l_qa_x, hid)
        l_qa_x, l_qa_x_mu, l_qa_x_logvar = stochastic_layer(
            l_qa_x, n_a, self.sym_samples)

        # Classifier q(y|a,x)
        l_qa_to_qy = DenseLayer(l_qa_x, qy_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_qy = ReshapeLayer(l_qa_to_qy,
                                  (-1, self.sym_samples, 1, qy_hid[0]))
        l_x_to_qy = DenseLayer(l_enc, qy_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_x_to_qy = DimshuffleLayer(l_x_to_qy, (0, 'x', 'x', 1))
        l_qy_xa = ReshapeLayer(ElemwiseSumLayer([l_qa_to_qy, l_x_to_qy]),
                               (-1, qy_hid[0]))
        if batchnorm:
            l_qy_xa = BatchNormLayer(l_qy_xa)
        l_qy_xa = NonlinearityLayer(l_qy_xa, self.transf)
        if len(qy_hid) > 1:
            for hid in qy_hid[1:]:
                l_qy_xa = dense_layer(l_qy_xa, hid)
        l_qy_xa = DenseLayer(l_qy_xa, n_y, init.GlorotNormal(),
                             init.Normal(init_w), softmax)

        # Recognition q(z|x,a,y)
        l_qa_to_qz = DenseLayer(l_qa_x, qz_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_qz = ReshapeLayer(l_qa_to_qz,
                                  (-1, self.sym_samples, 1, qz_hid[0]))
        l_x_to_qz = DenseLayer(l_enc, qz_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_x_to_qz = DimshuffleLayer(l_x_to_qz, (0, 'x', 'x', 1))
        l_y_to_qz = DenseLayer(l_y_in, qz_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_qz = DimshuffleLayer(l_y_to_qz, (0, 'x', 'x', 1))
        l_qz_axy = ReshapeLayer(
            ElemwiseSumLayer([l_qa_to_qz, l_x_to_qz, l_y_to_qz]),
            (-1, qz_hid[0]))
        if batchnorm:
            l_qz_axy = BatchNormLayer(l_qz_axy)
        l_qz_axy = NonlinearityLayer(l_qz_axy, self.transf)
        if len(qz_hid) > 1:
            for hid in qz_hid[1:]:
                l_qz_axy = dense_layer(l_qz_axy, hid)
        l_qz_axy, l_qz_axy_mu, l_qz_axy_logvar = stochastic_layer(
            l_qz_axy, n_z, 1)

        # Generative p(a|z,y)
        l_y_to_pa = DenseLayer(l_y_in, pa_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_pa = DimshuffleLayer(l_y_to_pa, (0, 'x', 'x', 1))
        l_qz_to_pa = DenseLayer(l_qz_axy, pa_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qz_to_pa = ReshapeLayer(l_qz_to_pa,
                                  (-1, self.sym_samples, 1, pa_hid[0]))
        l_pa_zy = ReshapeLayer(ElemwiseSumLayer([l_qz_to_pa, l_y_to_pa]),
                               [-1, pa_hid[0]])
        if batchnorm:
            l_pa_zy = BatchNormLayer(l_pa_zy)
        l_pa_zy = NonlinearityLayer(l_pa_zy, self.transf)
        if len(pa_hid) > 1:
            for hid in pa_hid[1:]:
                l_pa_zy = dense_layer(l_pa_zy, hid)
        l_pa_zy, l_pa_zy_mu, l_pa_zy_logvar = stochastic_layer(l_pa_zy, n_a, 1)

        # Generative p(x|a,z,y)
        l_qa_to_px = DenseLayer(l_qa_x, px_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_px = ReshapeLayer(l_qa_to_px,
                                  (-1, self.sym_samples, 1, px_hid[0]))
        l_y_to_px = DenseLayer(l_y_in, px_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_px = DimshuffleLayer(l_y_to_px, (0, 'x', 'x', 1))
        l_qz_to_px = DenseLayer(l_qz_axy, px_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qz_to_px = ReshapeLayer(l_qz_to_px,
                                  (-1, self.sym_samples, 1, px_hid[0]))
        l_px_azy = ReshapeLayer(
            ElemwiseSumLayer([l_qa_to_px, l_qz_to_px, l_y_to_px]),
            [-1, px_hid[0]])
        if batchnorm:
            l_px_azy = BatchNormLayer(l_px_azy)
        l_px_azy = NonlinearityLayer(l_px_azy, self.transf)

        # Note that px_hid[0] has to be equal to the number filters in the first convolution. Otherwise add a
        # dense layers here.

        # Inverse pooling
        l_global_depool = InverseLayer(l_px_azy, l_global_pool_enc)
        print("l_global_depool", l_global_depool.output_shape)

        # Reverse pool layer order
        pool_layers = pool_layers[::-1]

        # Decode
        l_deconv = l_global_depool
        for idx, filter in enumerate(filters[::-1]):
            filter, stride, pool = filter
            if pool > 1:
                l_deconv = InverseLayer(l_deconv, pool_layers[idx])
            l_deconv = Conv2DLayer(l_deconv,
                                   num_filters=filter,
                                   filter_size=(3, 1),
                                   stride=(stride, 1),
                                   W=init.GlorotNormal('relu'))
            print("l_deconv", l_deconv.output_shape)

        # The last l_conv layer should give us the input shape
        l_px_azy = Conv2DLayer(l_deconv,
                               num_filters=1,
                               filter_size=(3, 1),
                               pad='same',
                               nonlinearity=None)
        print("l_dec", l_px_azy.output_shape)

        # Flatten first two dimensions
        l_px_azy = ReshapeLayer(l_px_azy, (-1, n_c))

        if x_dist == 'bernoulli':
            l_px_azy = DenseLayer(l_px_azy, n_c, init.GlorotNormal(),
                                  init.Normal(init_w), sigmoid)
        elif x_dist == 'multinomial':
            l_px_azy = DenseLayer(l_px_azy, n_c, init.GlorotNormal(),
                                  init.Normal(init_w), softmax)
        elif x_dist == 'gaussian':
            l_px_azy, l_px_zy_mu, l_px_zy_logvar = stochastic_layer(
                l_px_azy, n_c, self.sym_samples, px_nonlinearity)
        elif x_dist == 'linear':
            l_px_azy = DenseLayer(l_px_azy, n_c, nonlinearity=None)

        # Reshape all the model layers to have the same size
        self.l_x_in = l_x_in
        self.l_y_in = l_y_in
        self.l_a_in = l_qa_x

        self.l_qa = ReshapeLayer(l_qa_x, (-1, self.sym_samples, 1, n_a))
        self.l_qa_mu = DimshuffleLayer(l_qa_x_mu, (0, 'x', 'x', 1))
        self.l_qa_logvar = DimshuffleLayer(l_qa_x_logvar, (0, 'x', 'x', 1))

        self.l_qz = ReshapeLayer(l_qz_axy, (-1, self.sym_samples, 1, n_z))
        self.l_qz_mu = ReshapeLayer(l_qz_axy_mu,
                                    (-1, self.sym_samples, 1, n_z))
        self.l_qz_logvar = ReshapeLayer(l_qz_axy_logvar,
                                        (-1, self.sym_samples, 1, n_z))

        self.l_qy = ReshapeLayer(l_qy_xa, (-1, self.sym_samples, 1, n_y))

        self.l_pa = ReshapeLayer(l_pa_zy, (-1, self.sym_samples, 1, n_a))
        self.l_pa_mu = ReshapeLayer(l_pa_zy_mu, (-1, self.sym_samples, 1, n_a))
        self.l_pa_logvar = ReshapeLayer(l_pa_zy_logvar,
                                        (-1, self.sym_samples, 1, n_a))

        # Here we assume that we pass (batch size * segment length, number of features) to the sample layer from
        # which we then get (batch size * segment length, samples, IW samples, features)
        self.l_px = ReshapeLayer(l_px_azy, (-1, n_l, self.sym_samples, 1, n_c))
        self.l_px_mu = ReshapeLayer(l_px_zy_mu, (-1, n_l, self.sym_samples, 1, n_c)) \
            if x_dist == "gaussian" else None
        self.l_px_logvar = ReshapeLayer(l_px_zy_logvar, (-1, n_l, self.sym_samples, 1, n_c)) \
            if x_dist == "gaussian" else None

        # Predefined functions
        inputs = {l_x_in: self.sym_x_l}
        outputs = get_output(self.l_qy, inputs,
                             deterministic=True).mean(axis=(1, 2))
        self.f_qy = theano.function([self.sym_x_l, self.sym_samples], outputs)

        outputs = get_output(l_qa_x, inputs, deterministic=True)
        self.f_qa = theano.function([self.sym_x_l, self.sym_samples], outputs)

        inputs = {l_x_in: self.sym_x_l, l_y_in: self.sym_t_l}
        outputs = get_output(l_qz_axy, inputs, deterministic=True)
        self.f_qz = theano.function(
            [self.sym_x_l, self.sym_t_l, self.sym_samples], outputs)

        inputs = {l_qz_axy: self.sym_z, l_y_in: self.sym_t_l}
        outputs = get_output(self.l_pa, inputs,
                             deterministic=True).mean(axis=(1, 2))
        self.f_pa = theano.function(
            [self.sym_z, self.sym_t_l, self.sym_samples], outputs)

        inputs = {
            l_x_in: self.sym_x_l,
            l_qa_x: self.sym_a,
            l_qz_axy: self.sym_z,
            l_y_in: self.sym_t_l
        }
        outputs = get_output(self.l_px, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_px = theano.function([
            self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l,
            self.sym_samples
        ], outputs)

        outputs = get_output(self.l_px_mu, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_mu = theano.function([
            self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l,
            self.sym_samples
        ], outputs)

        outputs = get_output(self.l_px_logvar, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_var = theano.function([
            self.sym_x_l, self.sym_a, self.sym_z, self.sym_t_l,
            self.sym_samples
        ], outputs)

        # Define model parameters
        self.model_params = get_all_params([self.l_qy, self.l_pa, self.l_px])
        self.trainable_model_params = get_all_params(
            [self.l_qy, self.l_pa, self.l_px], trainable=True)
コード例 #4
0
    def __init__(self,
                 n_l,
                 n_c,
                 n_a,
                 n_z,
                 n_y,
                 qa_hid,
                 qz_hid,
                 qy_hid,
                 px_hid,
                 pa_hid,
                 enc_rnn=256,
                 dec_rnn=256,
                 nonlinearity=rectify,
                 px_nonlinearity=None,
                 x_dist='bernoulli',
                 batchnorm=False,
                 seed=1234):
        """
        Initialize an skip deep generative model consisting of
        discriminative classifier q(y|a,x),
        generative model P p(a|z,y) and p(x|a,z,y),
        inference model Q q(a|x) and q(z|a,x,y).
        Weights are initialized using the Bengio and Glorot (2010) initialization scheme.
        :param n_c: Number of inputs.
        :param n_a: Number of auxiliary.
        :param n_z: Number of latent.
        :param n_y: Number of classes.
        :param qa_hid: List of number of deterministic hidden q(a|x).
        :param qz_hid: List of number of deterministic hidden q(z|a,x,y).
        :param qy_hid: List of number of deterministic hidden q(y|a,x).
        :param px_hid: List of number of deterministic hidden p(a|z,y) & p(x|z,y).
        :param nonlinearity: The transfer function used in the deterministic layers.
        :param x_dist: The x distribution, 'bernoulli', 'multinomial', or 'gaussian'.
        :param batchnorm: Boolean value for batch normalization.
        :param seed: The random seed.
        """
        super(RSDGM, self).__init__(n_c, qz_hid + px_hid, n_a + n_z,
                                    nonlinearity)
        self.x_dist = x_dist
        self.n_y = n_y
        self.n_c = n_c
        self.n_a = n_a
        self.n_z = n_z
        self.n_l = n_l
        self.batchnorm = batchnorm
        self._srng = RandomStreams(seed)

        # Decide Glorot initializaiton of weights.
        init_w = 1e-3
        hid_w = ""
        if nonlinearity == rectify or nonlinearity == softplus:
            hid_w = "relu"

        # Define symbolic variables for theano functions.
        self.sym_beta = T.scalar('beta')  # scaling constant beta
        self.sym_x_l = T.tensor3('x_l')  # labeled inputs
        self.sym_t_l = T.matrix('t')  # labeled targets
        self.sym_x_u = T.tensor3('x_u')  # unlabeled inputs
        self.sym_bs_l = T.iscalar('bs_l')  # number of labeled data
        self.sym_samples = T.iscalar('samples')  # MC samples
        self.sym_z = T.matrix('z')  # latent variable z
        self.sym_a = T.matrix('a')  # auxiliary variable a
        self.sym_warmup = T.fscalar('warmup')  # warmup to dampen KL term

        # Assist methods for collecting the layers
        def dense_layer(layer_in,
                        n,
                        dist_w=init.GlorotNormal,
                        dist_b=init.Normal):
            dense = DenseLayer(layer_in, n, dist_w(hid_w), dist_b(init_w),
                               None)
            if batchnorm:
                dense = BatchNormLayer(dense)
            return NonlinearityLayer(dense, self.transf)

        def stochastic_layer(layer_in, n, samples, nonlin=None):
            mu = DenseLayer(layer_in, n, init.Normal(init_w),
                            init.Normal(init_w), nonlin)
            logvar = DenseLayer(layer_in, n, init.Normal(init_w),
                                init.Normal(init_w), nonlin)
            return SampleLayer(mu, logvar, eq_samples=samples,
                               iw_samples=1), mu, logvar

        def lstm_layer(input,
                       nunits,
                       return_final,
                       backwards=False,
                       name='LSTM'):
            ingate = Gate(W_in=init.Uniform(0.01),
                          W_hid=init.Uniform(0.01),
                          b=init.Constant(0.0))
            forgetgate = Gate(W_in=init.Uniform(0.01),
                              W_hid=init.Uniform(0.01),
                              b=init.Constant(5.0))
            cell = Gate(
                W_cell=None,
                nonlinearity=T.tanh,
                W_in=init.Uniform(0.01),
                W_hid=init.Uniform(0.01),
            )
            outgate = Gate(W_in=init.Uniform(0.01),
                           W_hid=init.Uniform(0.01),
                           b=init.Constant(0.0))

            lstm = LSTMLayer(input,
                             num_units=nunits,
                             backwards=backwards,
                             peepholes=False,
                             ingate=ingate,
                             forgetgate=forgetgate,
                             cell=cell,
                             outgate=outgate,
                             name=name,
                             only_return_final=return_final)

            rec = RecurrentLayer(input,
                                 nunits,
                                 W_in_to_hid=init.GlorotNormal('relu'),
                                 W_hid_to_hid=init.GlorotNormal('relu'),
                                 backwards=backwards,
                                 nonlinearity=rectify,
                                 only_return_final=return_final,
                                 name=name)
            return lstm

        # Input layers
        l_y_in = InputLayer((None, n_y))
        l_x_in = InputLayer((None, n_l, n_c))

        # RNN encoder implementation
        l_enc_forward = lstm_layer(l_x_in,
                                   enc_rnn,
                                   return_final=True,
                                   backwards=False,
                                   name='enc_forward')
        l_enc_backward = lstm_layer(l_x_in,
                                    enc_rnn,
                                    return_final=True,
                                    backwards=True,
                                    name='enc_backward')
        l_enc_concat = ConcatLayer([l_enc_forward, l_enc_backward])
        l_enc = dense_layer(l_enc_concat, enc_rnn)

        # Auxiliary q(a|x)
        l_qa_x = l_enc
        for hid in qa_hid:
            l_qa_x = dense_layer(l_qa_x, hid)
        l_qa_x, l_qa_x_mu, l_qa_x_logvar = stochastic_layer(
            l_qa_x, n_a, self.sym_samples)

        # Classifier q(y|a,x)
        l_qa_to_qy = DenseLayer(l_qa_x, qy_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_qy = ReshapeLayer(l_qa_to_qy,
                                  (-1, self.sym_samples, 1, qy_hid[0]))
        l_x_to_qy = DenseLayer(l_enc, qy_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_x_to_qy = DimshuffleLayer(l_x_to_qy, (0, 'x', 'x', 1))
        l_qy_xa = ReshapeLayer(ElemwiseSumLayer([l_qa_to_qy, l_x_to_qy]),
                               (-1, qy_hid[0]))
        if batchnorm:
            l_qy_xa = BatchNormLayer(l_qy_xa)
        l_qy_xa = NonlinearityLayer(l_qy_xa, self.transf)
        if len(qy_hid) > 1:
            for hid in qy_hid[1:]:
                l_qy_xa = dense_layer(l_qy_xa, hid)
        l_qy_xa = DenseLayer(l_qy_xa, n_y, init.GlorotNormal(),
                             init.Normal(init_w), softmax)

        # Recognition q(z|x,a,y)
        l_qa_to_qz = DenseLayer(l_qa_x, qz_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_qz = ReshapeLayer(l_qa_to_qz,
                                  (-1, self.sym_samples, 1, qz_hid[0]))
        l_x_to_qz = DenseLayer(l_enc, qz_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_x_to_qz = DimshuffleLayer(l_x_to_qz, (0, 'x', 'x', 1))
        l_y_to_qz = DenseLayer(l_y_in, qz_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_qz = DimshuffleLayer(l_y_to_qz, (0, 'x', 'x', 1))
        l_qz_axy = ReshapeLayer(
            ElemwiseSumLayer([l_qa_to_qz, l_x_to_qz, l_y_to_qz]),
            (-1, qz_hid[0]))
        if batchnorm:
            l_qz_axy = BatchNormLayer(l_qz_axy)
        l_qz_axy = NonlinearityLayer(l_qz_axy, self.transf)
        if len(qz_hid) > 1:
            for hid in qz_hid[1:]:
                l_qz_axy = dense_layer(l_qz_axy, hid)
        l_qz_axy, l_qz_axy_mu, l_qz_axy_logvar = stochastic_layer(
            l_qz_axy, n_z, 1)

        # Generative p(a|z,y)
        l_y_to_pa = DenseLayer(l_y_in, pa_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_pa = DimshuffleLayer(l_y_to_pa, (0, 'x', 'x', 1))
        l_qz_to_pa = DenseLayer(l_qz_axy, pa_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qz_to_pa = ReshapeLayer(l_qz_to_pa,
                                  (-1, self.sym_samples, 1, pa_hid[0]))
        l_pa_zy = ReshapeLayer(ElemwiseSumLayer([l_qz_to_pa, l_y_to_pa]),
                               [-1, pa_hid[0]])
        if batchnorm:
            l_pa_zy = BatchNormLayer(l_pa_zy)
        l_pa_zy = NonlinearityLayer(l_pa_zy, self.transf)
        if len(pa_hid) > 1:
            for hid in pa_hid[1:]:
                l_pa_zy = dense_layer(l_pa_zy, hid)
        l_pa_zy, l_pa_zy_mu, l_pa_zy_logvar = stochastic_layer(l_pa_zy, n_a, 1)

        # Generative p(x|a,z,y)
        l_qa_to_px = DenseLayer(l_qa_x, px_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qa_to_px = ReshapeLayer(l_qa_to_px,
                                  (-1, self.sym_samples, 1, px_hid[0]))
        l_y_to_px = DenseLayer(l_y_in, px_hid[0], init.GlorotNormal(hid_w),
                               init.Normal(init_w), None)
        l_y_to_px = DimshuffleLayer(l_y_to_px, (0, 'x', 'x', 1))
        l_qz_to_px = DenseLayer(l_qz_axy, px_hid[0], init.GlorotNormal(hid_w),
                                init.Normal(init_w), None)
        l_qz_to_px = ReshapeLayer(l_qz_to_px,
                                  (-1, self.sym_samples, 1, px_hid[0]))
        l_px_azy = ReshapeLayer(
            ElemwiseSumLayer([l_qa_to_px, l_qz_to_px, l_y_to_px]),
            [-1, px_hid[0]])
        if batchnorm:
            l_px_azy = BatchNormLayer(l_px_azy)
        l_px_azy = NonlinearityLayer(l_px_azy, self.transf)

        # RNN decoder implementation
        l_px_azy_repeat = RepeatLayer(l_px_azy, n=n_l)
        l_dec_forward = lstm_layer(l_px_azy_repeat,
                                   dec_rnn,
                                   return_final=False,
                                   backwards=False,
                                   name='dec_forward')
        l_dec_backward = lstm_layer(l_px_azy_repeat,
                                    dec_rnn,
                                    return_final=False,
                                    backwards=True,
                                    name='dec_backward')
        l_dec_concat = ConcatLayer([l_dec_forward, l_dec_backward], axis=-1)
        l_dec = ReshapeLayer(l_dec_concat, (-1, 2 * dec_rnn))
        l_dec = dense_layer(l_dec, dec_rnn)

        l_px_azy = l_dec
        if len(px_hid) > 1:
            for hid in px_hid[1:]:
                l_px_azy = dense_layer(l_px_azy, hid)

        if x_dist == 'bernoulli':
            l_px_azy = DenseLayer(l_px_azy, n_c, init.GlorotNormal(),
                                  init.Normal(init_w), sigmoid)
        elif x_dist == 'multinomial':
            l_px_azy = DenseLayer(l_px_azy, n_c, init.GlorotNormal(),
                                  init.Normal(init_w), softmax)
        elif x_dist == 'gaussian':
            l_px_azy, l_px_zy_mu, l_px_zy_logvar = stochastic_layer(
                l_px_azy, n_c, self.sym_samples, px_nonlinearity)

        # Reshape all the model layers to have the same size
        self.l_x_in = l_x_in
        self.l_y_in = l_y_in
        self.l_a_in = l_qa_x

        self.l_qa = ReshapeLayer(l_qa_x, (-1, self.sym_samples, 1, n_a))
        self.l_qa_mu = DimshuffleLayer(l_qa_x_mu, (0, 'x', 'x', 1))
        self.l_qa_logvar = DimshuffleLayer(l_qa_x_logvar, (0, 'x', 'x', 1))

        self.l_qz = ReshapeLayer(l_qz_axy, (-1, self.sym_samples, 1, n_z))
        self.l_qz_mu = ReshapeLayer(l_qz_axy_mu,
                                    (-1, self.sym_samples, 1, n_z))
        self.l_qz_logvar = ReshapeLayer(l_qz_axy_logvar,
                                        (-1, self.sym_samples, 1, n_z))

        self.l_qy = ReshapeLayer(l_qy_xa, (-1, self.sym_samples, 1, n_y))

        self.l_pa = ReshapeLayer(l_pa_zy, (-1, self.sym_samples, 1, n_a))
        self.l_pa_mu = ReshapeLayer(l_pa_zy_mu, (-1, self.sym_samples, 1, n_a))
        self.l_pa_logvar = ReshapeLayer(l_pa_zy_logvar,
                                        (-1, self.sym_samples, 1, n_a))

        self.l_px = ReshapeLayer(l_px_azy, (-1, n_l, self.sym_samples, 1, n_c))
        self.l_px_mu = ReshapeLayer(l_px_zy_mu, (-1, n_l, self.sym_samples, 1, n_c)) \
            if x_dist == "gaussian" else None
        self.l_px_logvar = ReshapeLayer(l_px_zy_logvar, (-1, n_l, self.sym_samples, 1, n_c)) \
            if x_dist == "gaussian" else None

        # Predefined functions
        inputs = [self.sym_x_l, self.sym_samples]
        outputs = get_output(self.l_qy, self.sym_x_l,
                             deterministic=True).mean(axis=(1, 2))
        self.f_qy = theano.function(inputs, outputs)

        inputs = [self.sym_x_l, self.sym_samples]
        outputs = get_output(self.l_qa, self.sym_x_l,
                             deterministic=True).mean(axis=(1, 2))
        self.f_qa = theano.function(inputs, outputs)

        inputs = {l_x_in: self.sym_x_l, l_y_in: self.sym_t_l}
        outputs = get_output(l_qz_axy, inputs, deterministic=True)
        self.f_qz = theano.function(
            [self.sym_x_l, self.sym_t_l, self.sym_samples], outputs)

        inputs = {l_qz_axy: self.sym_z, l_y_in: self.sym_t_l}
        outputs = get_output(self.l_pa, inputs, deterministic=True)
        self.f_pa = theano.function(
            [self.sym_z, self.sym_t_l, self.sym_samples], outputs)

        inputs = {
            l_qa_x: self.sym_a,
            l_qz_axy: self.sym_z,
            l_y_in: self.sym_t_l
        }
        outputs = get_output(self.l_px, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_px = theano.function(
            [self.sym_a, self.sym_z, self.sym_t_l, self.sym_samples], outputs)

        outputs = get_output(self.l_px_mu, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_mu = theano.function(
            [self.sym_a, self.sym_z, self.sym_t_l, self.sym_samples], outputs)

        outputs = get_output(self.l_px_logvar, inputs,
                             deterministic=True).mean(axis=(2, 3))
        self.f_var = theano.function(
            [self.sym_a, self.sym_z, self.sym_t_l, self.sym_samples], outputs)

        # Define model parameters
        self.model_params = get_all_params([self.l_qy, self.l_pa, self.l_px])
        self.trainable_model_params = get_all_params(
            [self.l_qy, self.l_pa, self.l_px], trainable=True)