Ejemplo n.º 1
0
    def __init__(self,
                 nb_filter,
                 nb_row,
                 nb_col,
                 hidden_dim,
                 n_hidden,
                 stack_size,
                 n_flows_q,
                 n_flows_r,
                 use_cuda=True,
                 prior_var=1.0,
                 threshold_var=0.5):
        nn.Module.__init__(self)
        self.nb_filter = nb_filter
        self.nb_row = nb_row
        self.nb_col = nb_col
        self.n_hidden = n_hidden
        self.n_flows_q = n_flows_q
        self.n_flows_r = n_flows_r
        self.prior_var = prior_var
        self.threshold_var = threshold_var
        self.use_cuda = use_cuda
        self.stack_size = stack_size
        self.weight_mu = nn.Parameter(
            torch.Tensor(self.nb_filter, self.stack_size, self.nb_row,
                         self.nb_col))
        self.weight_logstd = nn.Parameter(
            torch.Tensor(self.nb_filter, self.stack_size, self.nb_row,
                         self.nb_col))
        self.bias_mu = nn.Parameter(torch.Tensor(self.nb_filter))
        self.bias_logvar = nn.Parameter(torch.Tensor(self.nb_filter))

        self.qzero_mu = nn.Parameter(torch.Tensor(self.nb_filter))
        self.qzero_logvar = nn.Parameter(torch.Tensor(self.nb_filter))
        self.rzero_c = nn.Parameter(torch.Tensor(self.nb_filter))
        self.rzero_b1 = nn.Parameter(torch.Tensor(self.nb_filter))
        self.rzero_b2 = nn.Parameter(torch.Tensor(self.nb_filter))

        self.flow_q = NAF(flowtype=2, n=1, dim=self.nb_filter)
        self.flow_r = MaskedNVPFlow(self.nb_filter, hidden_dim, n_hidden,
                                    n_flows_r)
        self.register_buffer('epsilon_z', torch.Tensor(self.nb_filter))
        self.reset_parameters()
        self.reset_noise()
Ejemplo n.º 2
0
    def __init__(self,
                 in_features,
                 out_features,
                 hidden_dim,
                 n_hidden,
                 n_flows_q,
                 n_flows_r,
                 use_cuda=True,
                 prior_var=1.0,
                 threshold_var=0.5):
        nn.Module.__init__(self)
        self.in_features = in_features
        self.out_features = out_features
        self.hidden_dim = hidden_dim
        self.n_hidden = n_hidden
        self.n_flows_q = n_flows_q
        self.n_flows_r = n_flows_r
        self.prior_var = prior_var
        self.threshold_var = threshold_var
        self.use_cuda = use_cuda

        self.weight_mu = nn.Parameter(torch.Tensor(in_features, out_features))
        self.weight_logstd = nn.Parameter(
            torch.Tensor(in_features, out_features))
        self.bias_mu = nn.Parameter(torch.Tensor(out_features))
        self.bias_logvar = nn.Parameter(torch.Tensor(out_features))

        self.qzero_mu = nn.Parameter(torch.Tensor(in_features))
        self.qzero_logvar = nn.Parameter(torch.Tensor(in_features))

        self.rzero_c = nn.Parameter(torch.Tensor(in_features))
        self.rzero_b1 = nn.Parameter(torch.Tensor(in_features))
        self.rzero_b2 = nn.Parameter(torch.Tensor(in_features))
        self.flow_q = NAF(flowtype=0, n=1, dim=self.in_features)
        self.flow_r = MaskedNVPFlow(in_features, hidden_dim, n_hidden,
                                    n_flows_r)

        self.register_buffer('epsilon_z', torch.Tensor(self.in_features))
        self.register_buffer('epsilon_linear', torch.Tensor(self.out_features))
        self.reset_parameters()
        self.reset_noise()
Ejemplo n.º 3
0
    def build(self):
        stack_size = self.input_shape[-1]
        self.W_shape = (self.nb_row, self.nb_col, stack_size, self.nb_filter)
        self.input_dim = self.nb_col * stack_size * self.nb_row
        self.stack_size = stack_size

        with tf.variable_scope(self.name):
            self.mu_W = randmat(self.W_shape, name='mean_W')
            self.logvar_W = randmat(self.W_shape,
                                    mu=-9.,
                                    name='logvar_W',
                                    extra_scale=1e-6)
            self.mu_bias = tf.Variable(tf.zeros((self.nb_filter, )),
                                       name='mean_bias')
            self.logvar_bias = randmat((self.nb_filter, ),
                                       mu=-9.,
                                       name='logvar_bias',
                                       extra_scale=1e-6)

            if self.use_z:
                self.qzero_mean = randmat((self.nb_filter, ),
                                          name='dropout_rates_mean',
                                          mu=1. if self.n_flows_q == 0 else 0.)
                self.qzero = randmat((self.nb_filter, ),
                                     name='dropout_rates',
                                     mu=np.log(0.1),
                                     extra_scale=1e-6)
                self.rsr_M = randmat((self.nb_filter, ), name='var_r_aux')
                self.apvar_M = randmat((self.nb_filter, ), name='apvar_r_aux')
                self.rsri_M = randmat((self.nb_filter, ), name='var_r_auxi')

            self.pvar = randmat((self.input_dim, ),
                                mu=np.log(self.prior_var),
                                name='prior_var_r_p',
                                extra_scale=1e-6,
                                trainable=self.learn_p)
            self.pvar_bias = randmat((1, ),
                                     mu=np.log(self.prior_var_b),
                                     name='prior_var_r_p_bias',
                                     extra_scale=1e-6,
                                     trainable=self.learn_p)

        if self.n_flows_r > 0:
            self.flow_r = MaskedNVPFlow(self.nb_filter,
                                        n_flows=self.n_flows_r,
                                        name=self.name + '_fr',
                                        n_hidden=0,
                                        dim_h=2 * self.flow_dim_h,
                                        scope=self.name)

        if self.n_flows_q > 0:
            self.flow_q = MaskedNVPFlow(self.nb_filter,
                                        n_flows=self.n_flows_q,
                                        name=self.name + '_fq',
                                        n_hidden=0,
                                        dim_h=self.flow_dim_h,
                                        scope=self.name)

        print('Built layer {}, output_dim: {}, input_shape: {}, flows_r: {}, flows_q: {}, use_z: {}, learn_p: {}, ' \
              'pvar: {}, thres_var: {}'.format(self.name, self.nb_filter, self.input_shape, self.n_flows_r,
                                               self.n_flows_q, self.use_z, self.learn_p, self.prior_var, self.thres_var))
Ejemplo n.º 4
0
class Conv2DMNF(Layer):
    """2D convolutional layer with a multiplicative normalizing flow (MNF) aproximate posterior over the weights.
    Prior is a standard normal.
    """
    def __init__(self,
                 nb_filter,
                 nb_row,
                 nb_col,
                 input_shape=(),
                 activation=tf.identity,
                 N=1,
                 name=None,
                 border_mode='SAME',
                 subsample=(1, 1, 1, 1),
                 flows_q=2,
                 flows_r=2,
                 learn_p=False,
                 use_z=True,
                 prior_var=1.,
                 prior_var_b=1.,
                 flow_dim_h=50,
                 logging=False,
                 thres_var=1.,
                 **kwargs):

        if border_mode not in {'VALID', 'SAME'}:
            raise Exception('Invalid border mode for Convolution2D:',
                            border_mode)

        self.nb_filter = nb_filter
        self.nb_row = nb_row
        self.nb_col = nb_col
        self.border_mode = border_mode
        self.subsample = subsample
        self.thres_var = thres_var

        self.N = N
        self.flow_dim_h = flow_dim_h
        self.learn_p = learn_p
        self.input_shape = input_shape

        self.prior_var = prior_var
        self.prior_var_b = prior_var_b
        self.n_flows_q = flows_q
        self.n_flows_r = flows_r
        self.use_z = use_z
        super(Conv2DMNF, self).__init__(N=N,
                                        nonlin=activation,
                                        name=name,
                                        logging=logging)

    def build(self):
        stack_size = self.input_shape[-1]
        self.W_shape = (self.nb_row, self.nb_col, stack_size, self.nb_filter)
        self.input_dim = self.nb_col * stack_size * self.nb_row
        self.stack_size = stack_size

        with tf.variable_scope(self.name):
            self.mu_W = randmat(self.W_shape, name='mean_W')
            self.logvar_W = randmat(self.W_shape,
                                    mu=-9.,
                                    name='logvar_W',
                                    extra_scale=1e-6)
            self.mu_bias = tf.Variable(tf.zeros((self.nb_filter, )),
                                       name='mean_bias')
            self.logvar_bias = randmat((self.nb_filter, ),
                                       mu=-9.,
                                       name='logvar_bias',
                                       extra_scale=1e-6)

            if self.use_z:
                self.qzero_mean = randmat((self.nb_filter, ),
                                          name='dropout_rates_mean',
                                          mu=1. if self.n_flows_q == 0 else 0.)
                self.qzero = randmat((self.nb_filter, ),
                                     name='dropout_rates',
                                     mu=np.log(0.1),
                                     extra_scale=1e-6)
                self.rsr_M = randmat((self.nb_filter, ), name='var_r_aux')
                self.apvar_M = randmat((self.nb_filter, ), name='apvar_r_aux')
                self.rsri_M = randmat((self.nb_filter, ), name='var_r_auxi')

            self.pvar = randmat((self.input_dim, ),
                                mu=np.log(self.prior_var),
                                name='prior_var_r_p',
                                extra_scale=1e-6,
                                trainable=self.learn_p)
            self.pvar_bias = randmat((1, ),
                                     mu=np.log(self.prior_var_b),
                                     name='prior_var_r_p_bias',
                                     extra_scale=1e-6,
                                     trainable=self.learn_p)

        if self.n_flows_r > 0:
            self.flow_r = MaskedNVPFlow(self.nb_filter,
                                        n_flows=self.n_flows_r,
                                        name=self.name + '_fr',
                                        n_hidden=0,
                                        dim_h=2 * self.flow_dim_h,
                                        scope=self.name)

        if self.n_flows_q > 0:
            self.flow_q = MaskedNVPFlow(self.nb_filter,
                                        n_flows=self.n_flows_q,
                                        name=self.name + '_fq',
                                        n_hidden=0,
                                        dim_h=self.flow_dim_h,
                                        scope=self.name)

        print('Built layer {}, output_dim: {}, input_shape: {}, flows_r: {}, flows_q: {}, use_z: {}, learn_p: {}, ' \
              'pvar: {}, thres_var: {}'.format(self.name, self.nb_filter, self.input_shape, self.n_flows_r,
                                               self.n_flows_q, self.use_z, self.learn_p, self.prior_var, self.thres_var))

    def sample_z(self, size_M=1, sample=True):
        if not self.use_z:
            return ones_d((size_M, self.nb_filter)), zeros_d((size_M, ))
        qm0 = self.get_params_m()
        isample_M = tf.tile(tf.expand_dims(self.qzero_mean, 0), [size_M, 1])
        eps = tf.random_normal(tf.stack((size_M, self.nb_filter)))
        sample_M = isample_M + tf.sqrt(qm0) * eps if sample else isample_M

        logdets = zeros_d((size_M, ))
        if self.n_flows_q > 0:
            sample_M, logdets = self.flow_q.get_output_for(sample_M,
                                                           sample=sample)

        return sample_M, logdets

    def get_params_m(self):
        if not self.use_z:
            return None

        return tf.exp(self.qzero)

    def get_params_W(self):
        return tf.exp(self.logvar_W)

    def get_mean_var(self, x):
        var_w = tf.clip_by_value(self.get_params_W(), 0., self.thres_var)
        var_w = tf.square(var_w)
        var_b = tf.clip_by_value(tf.exp(self.logvar_bias), 0.,
                                 self.thres_var**2)

        # formally we do cross-correlation here
        muout = tf.nn.conv2d(x,
                             self.mu_W,
                             self.subsample,
                             self.border_mode,
                             use_cudnn_on_gpu=True) + self.mu_bias
        varout = tf.nn.conv2d(tf.square(x),
                              var_w,
                              self.subsample,
                              self.border_mode,
                              use_cudnn_on_gpu=True) + var_b

        return muout, varout

    def kldiv(self):
        M, logdets = self.sample_z()
        logdets = logdets[0]
        M = tf.squeeze(M)

        std_w = self.get_params_W()
        mu = tf.reshape(self.mu_W, [-1, self.nb_filter])
        std_w = tf.reshape(std_w, [-1, self.nb_filter])
        Mtilde = mu * tf.expand_dims(M, 0)
        mbias = self.mu_bias * M
        Vtilde = tf.square(std_w)

        iUp = outer(tf.exp(self.pvar), ones_d((self.nb_filter, )))

        qm0 = self.get_params_m()
        logqm = 0.
        if self.use_z > 0.:
            logqm = -tf.reduce_sum(.5 * (tf.log(2 * np.pi) + tf.log(qm0) + 1))
            logqm -= logdets

        kldiv_w = tf.reduce_sum(.5 * tf.log(iUp) - .5 * tf.log(Vtilde) +
                                ((Vtilde + tf.square(Mtilde)) /
                                 (2 * iUp)) - .5)
        kldiv_bias = tf.reduce_sum(
            .5 * self.pvar_bias - .5 * self.logvar_bias +
            ((tf.exp(self.logvar_bias) + tf.square(mbias)) /
             (2 * tf.exp(self.pvar_bias))) - .5)

        logrm = 0.
        if self.use_z:
            apvar_M = self.apvar_M
            mw = tf.matmul(Mtilde, tf.expand_dims(apvar_M, 1))
            vw = tf.matmul(Vtilde, tf.expand_dims(tf.square(apvar_M), 1))
            eps = tf.expand_dims(tf.random_normal((self.input_dim, )), 1)
            a = mw + tf.sqrt(vw) * eps
            mb = tf.reduce_sum(mbias * apvar_M)
            vb = tf.reduce_sum(tf.exp(self.logvar_bias) * tf.square(apvar_M))
            a += mb + tf.sqrt(vb) * tf.random_normal(())

            w__ = tf.reduce_mean(outer(tf.squeeze(a), self.rsr_M), axis=0)
            wv__ = tf.reduce_mean(outer(tf.squeeze(a), self.rsri_M), axis=0)

            if self.flow_r is not None:
                M, logrm = self.flow_r.get_output_for(tf.expand_dims(M, 0))
                M = tf.squeeze(M)
                logrm = logrm[0]

            logrm += tf.reduce_sum(-.5 * tf.exp(wv__) * tf.square(M - w__) -
                                   .5 * tf.log(2 * np.pi) + .5 * wv__)

        return -kldiv_w + logrm - logqm - kldiv_bias

    def call(self, x, sample=True, **kwargs):
        sample_M, _ = self.sample_z(size_M=tf.shape(x)[0], sample=sample)
        sample_M = tf.expand_dims(tf.expand_dims(sample_M, 1), 2)
        mean_out, var_out = self.get_mean_var(x)
        mean_gout = mean_out * sample_M
        var_gout = tf.sqrt(var_out) * tf.random_normal(tf.shape(mean_gout))
        out = mean_gout + var_gout

        output = out if sample else mean_gout
        return output
Ejemplo n.º 5
0
class DenseMNF(Layer):
    '''Fully connected layer with a multiplicative normalizing flow (MNF) aproximate posterior over the weights.
    Prior is a standard normal.
    '''
    def __init__(self,
                 output_dim,
                 activation=tf.identity,
                 N=1,
                 input_dim=None,
                 flows_q=2,
                 flows_r=2,
                 learn_p=False,
                 use_z=True,
                 prior_var=1.,
                 name=None,
                 logging=False,
                 flow_dim_h=50,
                 prior_var_b=1.,
                 thres_var=1.,
                 **kwargs):

        self.output_dim = output_dim
        self.learn_p = learn_p
        self.prior_var = prior_var
        self.prior_var_b = prior_var_b
        self.thres_var = thres_var

        self.n_flows_q = flows_q
        self.n_flows_r = flows_r
        self.use_z = use_z
        self.flow_dim_h = flow_dim_h

        self.input_dim = input_dim
        super(DenseMNF, self).__init__(N=N,
                                       nonlin=activation,
                                       name=name,
                                       logging=logging)

    def build(self):
        dim_in, dim_out = self.input_dim, self.output_dim

        with tf.variable_scope(self.name):
            self.mu_W = randmat((dim_in, dim_out),
                                name='mean_W',
                                extra_scale=1.)
            self.logvar_W = randmat((dim_in, dim_out),
                                    mu=-9.,
                                    name='var_W',
                                    extra_scale=1e-6)
            self.mu_bias = tf.Variable(tf.zeros((dim_out, )), name='mean_bias')
            self.logvar_bias = randmat((dim_out, ),
                                       mu=-9.,
                                       name='var_bias',
                                       extra_scale=1e-6)

            if self.use_z:
                self.qzero_mean = randmat((dim_in, ),
                                          name='dropout_rates_mean',
                                          mu=1. if self.n_flows_q == 0 else 0.)
                self.qzero = randmat((dim_in, ),
                                     mu=np.log(0.1),
                                     name='dropout_rates',
                                     extra_scale=1e-6)
                self.rsr_M = randmat((dim_in, ), name='var_r_aux')
                self.apvar_M = randmat((dim_in, ), name='apvar_r_aux')
                self.rsri_M = randmat((dim_in, ), name='var_r_auxi')

            self.pvar = randmat((dim_in, ),
                                mu=np.log(self.prior_var),
                                name='prior_var_r_p',
                                trainable=self.learn_p,
                                extra_scale=1e-6)
            self.pvar_bias = randmat((1, ),
                                     mu=np.log(self.prior_var_b),
                                     name='prior_var_r_p_bias',
                                     trainable=self.learn_p,
                                     extra_scale=1e-6)

        if self.n_flows_r > 0:
            if dim_in == 1:
                self.flow_r = PlanarFlow(dim_in,
                                         n_flows=self.n_flows_r,
                                         name=self.name + '_fr',
                                         scope=self.name)
            else:
                self.flow_r = MaskedNVPFlow(dim_in,
                                            n_flows=self.n_flows_r,
                                            name=self.name + '_fr',
                                            n_hidden=0,
                                            dim_h=2 * self.flow_dim_h,
                                            scope=self.name)

        if self.n_flows_q > 0:
            if dim_in == 1:
                self.flow_q = PlanarFlow(dim_in,
                                         n_flows=self.n_flows_q,
                                         name=self.name + '_fq',
                                         scope=self.name)
            else:
                self.flow_q = MaskedNVPFlow(dim_in,
                                            n_flows=self.n_flows_q,
                                            name=self.name + '_fq',
                                            n_hidden=0,
                                            dim_h=self.flow_dim_h,
                                            scope=self.name)

        print 'Built layer', self.name, 'prior_var: {}'.format(self.prior_var), \
            'flows_q: {}, flows_r: {}, use_z: {}'.format(self.n_flows_q, self.n_flows_r, self.use_z), \
            'learn_p: {}, thres_var: {}'.format(self.learn_p, self.thres_var)

    def sample_z(self, size_M=1, sample=True):
        if not self.use_z:
            return ones_d((size_M, self.input_dim)), zeros_d((size_M, ))

        qm0 = self.get_params_m()
        isample_M = tf.tile(tf.expand_dims(self.qzero_mean, 0), [size_M, 1])
        eps = tf.random_normal(tf.stack((size_M, self.input_dim)))
        sample_M = isample_M + tf.sqrt(qm0) * eps if sample else isample_M

        logdets = zeros_d((size_M, ))
        if self.n_flows_q > 0:
            sample_M, logdets = self.flow_q.get_output_for(sample_M,
                                                           sample=sample)

        return sample_M, logdets

    def get_params_m(self):
        if not self.use_z:
            return None

        return tf.exp(self.qzero)

    def get_params_W(self):
        return tf.exp(self.logvar_W)

    def kldiv(self):
        M, logdets = self.sample_z()
        logdets = logdets[0]
        M = tf.squeeze(M)

        std_mg = self.get_params_W()
        qm0 = self.get_params_m()
        if len(M.get_shape()) == 0:
            Mexp = M
        else:
            Mexp = tf.expand_dims(M, 1)

        Mtilde = Mexp * self.mu_W
        Vtilde = tf.square(std_mg)

        iUp = outer(tf.exp(self.pvar), ones_d((self.output_dim, )))

        logqm = 0.
        if self.use_z:
            logqm = -tf.reduce_sum(.5 * (tf.log(2 * np.pi) + tf.log(qm0) + 1))
            logqm -= logdets

        kldiv_w = tf.reduce_sum(.5 * tf.log(iUp) - tf.log(std_mg) +
                                ((Vtilde + tf.square(Mtilde)) /
                                 (2 * iUp)) - .5)
        kldiv_bias = tf.reduce_sum(
            .5 * self.pvar_bias - .5 * self.logvar_bias +
            ((tf.exp(self.logvar_bias) + tf.square(self.mu_bias)) /
             (2 * tf.exp(self.pvar_bias))) - .5)

        if self.use_z:
            apvar_M = self.apvar_M
            # shared network for hidden layer
            mw = tf.matmul(tf.expand_dims(apvar_M, 0), Mtilde)
            eps = tf.expand_dims(tf.random_normal((self.output_dim, )), 0)
            varw = tf.matmul(tf.square(tf.expand_dims(apvar_M, 0)), Vtilde)
            a = tf.nn.tanh(mw + tf.sqrt(varw) * eps)
            # split at output layer
            if len(tf.squeeze(a).get_shape()) != 0:
                w__ = tf.reduce_mean(outer(self.rsr_M, tf.squeeze(a)), axis=1)
                wv__ = tf.reduce_mean(outer(self.rsri_M, tf.squeeze(a)),
                                      axis=1)
            else:
                w__ = self.rsr_M * tf.squeeze(a)
                wv__ = self.rsri_M * tf.squeeze(a)

            logrm = 0.
            if self.flow_r is not None:
                M, logrm = self.flow_r.get_output_for(tf.expand_dims(M, 0))
                M = tf.squeeze(M)
                logrm = logrm[0]

            logrm += tf.reduce_sum(-.5 * tf.exp(wv__) * tf.square(M - w__) -
                                   .5 * tf.log(2 * np.pi) + .5 * wv__)
        else:
            logrm = 0.

        return -kldiv_w + logrm - logqm - kldiv_bias

    def call(self, x, sample=True, **kwargs):
        std_mg = tf.clip_by_value(self.get_params_W(), 0., self.thres_var)
        var_mg = tf.square(std_mg)
        sample_M, _ = self.sample_z(size_M=tf.shape(x)[0], sample=sample)
        xt = x * sample_M

        mu_out = tf.matmul(xt, self.mu_W) + self.mu_bias
        varin = tf.matmul(tf.square(x), var_mg) + tf.clip_by_value(
            tf.exp(self.logvar_bias), 0., self.thres_var**2)
        xin = tf.sqrt(varin)
        sigma_out = xin * tf.random_normal(tf.shape(mu_out))

        output = mu_out + sigma_out if sample else mu_out
        return output
Ejemplo n.º 6
0
    def build(self):
        dim_in, dim_out = self.input_dim, self.output_dim

        with tf.variable_scope(self.name):
            self.mu_W = randmat((dim_in, dim_out),
                                name='mean_W',
                                extra_scale=1.)
            self.logvar_W = randmat((dim_in, dim_out),
                                    mu=-9.,
                                    name='var_W',
                                    extra_scale=1e-6)
            self.mu_bias = tf.Variable(tf.zeros((dim_out, )), name='mean_bias')
            self.logvar_bias = randmat((dim_out, ),
                                       mu=-9.,
                                       name='var_bias',
                                       extra_scale=1e-6)

            if self.use_z:
                self.qzero_mean = randmat((dim_in, ),
                                          name='dropout_rates_mean',
                                          mu=1. if self.n_flows_q == 0 else 0.)
                self.qzero = randmat((dim_in, ),
                                     mu=np.log(0.1),
                                     name='dropout_rates',
                                     extra_scale=1e-6)
                self.rsr_M = randmat((dim_in, ), name='var_r_aux')
                self.apvar_M = randmat((dim_in, ), name='apvar_r_aux')
                self.rsri_M = randmat((dim_in, ), name='var_r_auxi')

            self.pvar = randmat((dim_in, ),
                                mu=np.log(self.prior_var),
                                name='prior_var_r_p',
                                trainable=self.learn_p,
                                extra_scale=1e-6)
            self.pvar_bias = randmat((1, ),
                                     mu=np.log(self.prior_var_b),
                                     name='prior_var_r_p_bias',
                                     trainable=self.learn_p,
                                     extra_scale=1e-6)

        if self.n_flows_r > 0:
            if dim_in == 1:
                self.flow_r = PlanarFlow(dim_in,
                                         n_flows=self.n_flows_r,
                                         name=self.name + '_fr',
                                         scope=self.name)
            else:
                self.flow_r = MaskedNVPFlow(dim_in,
                                            n_flows=self.n_flows_r,
                                            name=self.name + '_fr',
                                            n_hidden=0,
                                            dim_h=2 * self.flow_dim_h,
                                            scope=self.name)

        if self.n_flows_q > 0:
            if dim_in == 1:
                self.flow_q = PlanarFlow(dim_in,
                                         n_flows=self.n_flows_q,
                                         name=self.name + '_fq',
                                         scope=self.name)
            else:
                self.flow_q = MaskedNVPFlow(dim_in,
                                            n_flows=self.n_flows_q,
                                            name=self.name + '_fq',
                                            n_hidden=0,
                                            dim_h=self.flow_dim_h,
                                            scope=self.name)

        print 'Built layer', self.name, 'prior_var: {}'.format(self.prior_var), \
            'flows_q: {}, flows_r: {}, use_z: {}'.format(self.n_flows_q, self.n_flows_r, self.use_z), \
            'learn_p: {}, thres_var: {}'.format(self.learn_p, self.thres_var)
Ejemplo n.º 7
0
class NIAFLinear(nn.Module):
    def __init__(self,
                 in_features,
                 out_features,
                 hidden_dim,
                 n_hidden,
                 n_flows_q,
                 n_flows_r,
                 use_cuda=True,
                 prior_var=1.0,
                 threshold_var=0.5):
        nn.Module.__init__(self)
        self.in_features = in_features
        self.out_features = out_features
        self.hidden_dim = hidden_dim
        self.n_hidden = n_hidden
        self.n_flows_q = n_flows_q
        self.n_flows_r = n_flows_r
        self.prior_var = prior_var
        self.threshold_var = threshold_var
        self.use_cuda = use_cuda

        self.weight_mu = nn.Parameter(torch.Tensor(in_features, out_features))
        self.weight_logstd = nn.Parameter(
            torch.Tensor(in_features, out_features))
        self.bias_mu = nn.Parameter(torch.Tensor(out_features))
        self.bias_logvar = nn.Parameter(torch.Tensor(out_features))

        self.qzero_mu = nn.Parameter(torch.Tensor(in_features))
        self.qzero_logvar = nn.Parameter(torch.Tensor(in_features))

        self.rzero_c = nn.Parameter(torch.Tensor(in_features))
        self.rzero_b1 = nn.Parameter(torch.Tensor(in_features))
        self.rzero_b2 = nn.Parameter(torch.Tensor(in_features))
        self.flow_q = NAF(flowtype=0, n=1, dim=self.in_features)
        self.flow_r = MaskedNVPFlow(in_features, hidden_dim, n_hidden,
                                    n_flows_r)

        self.register_buffer('epsilon_z', torch.Tensor(self.in_features))
        self.register_buffer('epsilon_linear', torch.Tensor(self.out_features))
        self.reset_parameters()
        self.reset_noise()

    def reset_noise(self):
        epsilon_z = torch.randn(self.in_features)
        epsilon_linear = torch.randn(self.out_features)
        self.epsilon_z.copy_(epsilon_z)
        self.epsilon_linear.copy_(epsilon_linear)
        self.flow_r.reset_noise()

    def reset_parameters(self):

        in_stdv = np.sqrt(4.0 / self.in_features)
        out_stdv = np.sqrt(4.0 / self.out_features)
        stdv2 = np.sqrt(4.0 / (self.in_features + self.out_features))

        self.weight_mu.data.normal_(0, stdv2)
        self.weight_logstd.data.normal_(-9, 1e-3 * stdv2)
        self.bias_mu.data.zero_()
        self.bias_logvar.data.normal_(-9, 1e-3 * out_stdv)

        self.qzero_mu.data.normal_(1 if self.n_flows_q == 0 else 0, in_stdv)
        self.qzero_logvar.data.normal_(np.log(0.1), 1e-3 * in_stdv)
        self.rzero_c.data.normal_(0, in_stdv)
        self.rzero_b1.data.normal_(0, in_stdv)
        self.rzero_b2.data.normal_(0, in_stdv)

    def sample_particle(self, batch_size, kl=True, same_noise=False):
        assert kl == False
        z = self.qzero_mu
        z = self.flow_q(z, kl=False)
        return z

    def sample_z(self, batch_size, kl=True, same_noise=False):
        if self.training:
            if batch_size > 1:
                assert kl == False
                qzero_std = torch.exp(0.5 * self.qzero_logvar)
                qzero_std = qzero_std.expand(batch_size, self.in_features)
                z_mu = self.qzero_mu.expand(batch_size, self.in_features)
                if same_noise:
                    epsilon_z = self.epsilon_z.expand(batch_size,
                                                      self.in_features)
                else:
                    epsilon_z = Variable(
                        torch.randn(batch_size, self.in_features))
                    if self.use_cuda:
                        epsilon_z = epsilon_z.cuda()
                z = z_mu + qzero_std * epsilon_z
                z = self.flow_q(z, kl=False)
                return z
            if batch_size == 1:
                qzero_std = torch.exp(0.5 * self.qzero_logvar)
                z = self.qzero_mu + qzero_std * self.epsilon_z
                z = z.unsqueeze(0)
                if kl:
                    z, logdets = self.flow_q(z, kl=True)
                    return z, logdets
                else:
                    z = self.flow_q(z, kl=False)
                    return z
        else:
            assert kl == False
            z = self.qzero_mu
            z = self.flow_q(z, kl=False)
            return z

    def forward(self, x, same_noise=False):
        batch_size = x.size()[0]
        if self.training:
            z = self.sample_z(batch_size, kl=False, same_noise=same_noise)
            self.z_gen = z
            weight_std = torch.clamp(torch.exp(self.weight_logstd), 0,
                                     self.threshold_var)
            bias_std = torch.clamp(torch.exp(0.5 * self.bias_logvar), 0,
                                   self.threshold_var)
            out_mu = torch.matmul(x * z, self.weight_mu) + self.bias_mu
            out_var = torch.matmul(x * x, weight_std * weight_std) + bias_std
            if batch_size > 1:
                if same_noise:
                    epsilon_linear = self.epsilon_linear.expand(
                        batch_size, self.out_features)
                else:
                    epsilon_linear = Variable(
                        torch.randn(batch_size, self.out_features))
                    if self.use_cuda:
                        epsilon_linear = epsilon_linear.cuda()
            if batch_size == 1:
                epsilon_linear = self.epsilon_linear

            out = out_mu + torch.sqrt(out_var) * epsilon_linear
            return out
        else:
            z = self.sample_z(1, kl=False)
            weight_mu = z.view(-1, 1) * self.weight_mu
            out = torch.matmul(x, weight_mu) + self.bias_mu
            return out

    def kldiv(self):
        z, logdets = self.sample_z(1, kl=True)
        weight_mu = z.view(-1, 1) * self.weight_mu
        kldiv_weight = 0.5 * (-2 * self.weight_logstd + torch.exp(
            2 * self.weight_logstd) + weight_mu * weight_mu - 1).sum()
        kldiv_bias = 0.5 * (-self.bias_logvar + torch.exp(self.bias_logvar) +
                            self.bias_mu * self.bias_mu - 1).sum()

        logq = -0.5 * self.qzero_logvar.sum()
        logq -= logdets[0]

        cw_mu = torch.matmul(self.rzero_c, weight_mu)
        epsilon = Variable(torch.randn(self.out_features))
        if self.use_cuda:
            epsilon = epsilon.cuda()
        cw_var = torch.matmul(self.rzero_c * self.rzero_c,
                              torch.exp(2 * self.weight_logstd))
        cw = F.tanh(cw_mu + torch.sqrt(cw_var) * epsilon)

        mu_tilde = torch.mean(self.rzero_b1.ger(cw), dim=1)
        neg_log_var_tilde = torch.mean(self.rzero_b2.ger(cw), dim=1)

        z, logr = self.flow_r(z, kl=True)

        z_mu_square = (z - mu_tilde) * (z - mu_tilde)
        logr += 0.5 * (-torch.exp(neg_log_var_tilde) * z_mu_square +
                       neg_log_var_tilde).sum()

        kldiv = kldiv_weight + kldiv_bias + logq - logr

        return kldiv