def construct_nets(self):

        self.num_disc_layers = 5
        self.num_gen_layers = 5
        self.d_batch_norm = AttributeDict([
            ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i))
            for dbn_i in range(self.num_disc_layers)
        ])
        self.sup_d_batch_norm = AttributeDict([
            ("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i))
            for dbn_i in range(self.num_disc_layers)
        ])
        self.g_batch_norm = AttributeDict([
            ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i))
            for gbn_i in range(self.num_gen_layers)
        ])

        s_h, s_w = self.x_dim[0], self.x_dim[1]
        s_h2, s_w2 = conv_out_size(s_h, 2), conv_out_size(s_w, 2)
        s_h4, s_w4 = conv_out_size(s_h2, 2), conv_out_size(s_w2, 2)
        s_h8, s_w8 = conv_out_size(s_h4, 2), conv_out_size(s_w4, 2)
        s_h16, s_w16 = conv_out_size(s_h8, 2), conv_out_size(s_w8, 2)

        self.gen_output_dims = OrderedDict([("g_h0_out", (s_h16, s_w16)),
                                            ("g_h1_out", (s_h8, s_w8)),
                                            ("g_h2_out", (s_h4, s_w4)),
                                            ("g_h3_out", (s_h2, s_w2)),
                                            ("g_h4_out", (s_h, s_w))])

        self.gen_weight_dims = OrderedDict([
            ("g_h0_lin_W", (self.z_dim, self.gf_dim * 8 * s_h16 * s_w16)),
            ("g_h0_lin_b", (self.gf_dim * 8 * s_h16 * s_w16, )),
            ("g_h1_W", (5, 5, self.gf_dim * 4, self.gf_dim * 8)),
            ("g_h1_b", (self.gf_dim * 4, )),
            ("g_h2_W", (5, 5, self.gf_dim * 2, self.gf_dim * 4)),
            ("g_h2_b", (self.gf_dim * 2, )),
            ("g_h3_W", (5, 5, self.gf_dim * 1, self.gf_dim * 2)),
            ("g_h3_b", (self.gf_dim * 1, )),
            ("g_h4_W", (5, 5, self.c_dim, self.gf_dim * 1)),
            ("g_h4_b", (self.c_dim, ))
        ])

        self.disc_weight_dims = OrderedDict([
            ("d_h0_W", (5, 5, self.c_dim, self.df_dim)),
            ("d_h0_b", (self.df_dim, )),
            ("d_h1_W", (5, 5, self.df_dim, self.df_dim * 2)),
            ("d_h1_b", (self.df_dim * 2, )),
            ("d_h2_W", (5, 5, self.df_dim * 2, self.df_dim * 4)),
            ("d_h2_b", (self.df_dim * 4, )),
            ("d_h3_W", (5, 5, self.df_dim * 4, self.df_dim * 8)),
            ("d_h3_b", (self.df_dim * 8, )),
            ("d_h_end_lin_W", (self.df_dim * 8 * s_h16 * s_w16,
                               self.df_dim * 4)),
            ("d_h_end_lin_b", (self.df_dim * 4, )),
            ("d_h_out_lin_W", (self.df_dim * 4, self.K)),
            ("d_h_out_lin_b", (self.K, ))
        ])
Example #2
0
    def build_bgan_graph(self):
    
        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + self.x_dim, name='real_images')
        
        self.labeled_inputs = tf.placeholder(tf.float32,
                                             [self.batch_size] + self.x_dim, name='real_images_w_labels')
        
        self.labels = tf.placeholder(tf.float32,
                                     [self.batch_size, self.K+1], name='real_targets')


        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        #self.z_sum = histogram_summary("z", self.z) TODO looks cool

        ### Generator
        self.gen_param_list = []
        with tf.variable_scope("generator") as scope:
            gen_params = AttributeDict()
            for name, shape in self.weight_dims.items():
                gen_params[name] = tf.get_variable("%s" % (name),
                                                   shape, initializer=tf.random_normal_initializer(stddev=0.02))
            self.gen_param_list.append(gen_params)

  

        self.generation = {}
        for gen_params in self.gen_param_list:
            self.generation["g_prior"]=self.gen_prior(gen_params)
            self.generation["generators"]=self.generator(self.z, gen_params)
            self.generation["gen_samplers"]=self.sampler(self.z, gen_params)
    def initialize_wgts(self, scope_str):

        if scope_str == "generator":
            weight_dims = self.gen_weight_dims
            numz = self.num_gen
        elif scope_str == "discriminator":
            weight_dims = self.disc_weight_dims
            numz = self.num_disc
        else:
            raise RuntimeError("invalid scope!")

        param_list = []
        with tf.variable_scope(scope_str) as scope:
            for zi in range(numz):
                for m in range(self.num_mcmc):
                    wgts_ = AttributeDict()
                    for name, shape in weight_dims.items():

                        wgts_[name] = tf.get_variable(
                            "%s_%04d_%04d" % (name, zi, m),
                            shape,
                            initializer=tf.random_normal_initializer(
                                stddev=0.02))
                    param_list.append(wgts_)
            return param_list
Example #4
0
    def initialize_dist_wgts(self, scope_str):

        if scope_str == "distrib_classifier":
            weight_dims = self.distrib_weight_dims
        else:
            raise RuntimeError("invalid scope!")

        param_list = []
        with tf.variable_scope(scope_str) as scope:
            wgts_ = AttributeDict()

            for zi in range(self.num_gen):
                mu_name = "dist_%i_mu" % (zi)
                mu_shape = weight_dims[mu_name]

                var_name = "dist_%i_var" % (zi)
                var_shape = weight_dims[var_name]

                wgts_[mu_name] = tf.get_variable(
                    "%s" % mu_name,
                    mu_shape,
                    initializer=tf.random_normal_initializer(mean=1 /
                                                             self.num_gen,
                                                             stddev=0.02))
                wgts_[var_name] = tf.get_variable(
                    "%s" % var_name,
                    var_shape,
                    initializer=tf.random_normal_initializer(stddev=0.02))

            # for name, shape in weight_dims.items():
            #    wgts_[name] = tf.get_variable("%s" % name, shape, initializer=tf.random_normal_initializer(mean=1/self.num_gen, stddev=0.02))

            param_list.append(wgts_)

            return param_list
Example #5
0
    def initialize_wgts(self, scope_str):

        if scope_str == GEN:
            weight_dims = self.gen_weight_dims
            numz = self.num_gen
        elif scope_str == DISC:
            weight_dims = self.disc_weight_dims
            numz = self.num_disc
        elif scope_str == ENC:
            weight_dims = self.enc_weight_dims
            numz = self.num_enc
        else:
            raise RuntimeError("invalid scope!")

        param_list = []
        with tf.variable_scope(scope_str) as scope:
            for zi in range(numz):
                for m in range(self.num_mcmc):
                    wgts_ = AttributeDict()
                    for name, shape in weight_dims.items():
                        wgts_[name] = tf.get_variable(
                            "%s_%04d_%04d" % (name, zi, m),
                            shape,
                            initializer=tf.glorot_uniform_initializer())
                    param_list.append(wgts_)

            return param_list
Example #6
0
    def construct_gen_from_hypers(self,
                                  gen_kernel_size=5,
                                  gen_strides=[2, 2, 2, 2],
                                  num_gfs=None):

        self.g_batch_norm = AttributeDict(
            [("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) \
             for gbn_i in range(len(gen_strides))])
        if num_gfs is None:
            num_gfs = [
                self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim
            ]

        assert len(gen_strides) == len(num_gfs), "invalid hypers!"

        s_h, s_w = self.x_dim[0], self.x_dim[1]
        ks = gen_kernel_size
        self.gen_output_dims = OrderedDict()
        self.gen_weight_dims = OrderedDict()
        num_gfs = num_gfs + [self.c_dim]
        self.gen_kernel_sizes = [ks]
        for layer in range(len(gen_strides))[::-1]:
            self.gen_output_dims["g_h%i_out" % (layer + 1)] = (s_h, s_w)
            assert gen_strides[layer] <= 2, "invalid stride"
            assert ks % 2 == 1, "invalid kernel size"
            self.gen_weight_dims["g_h%i_W" % (layer + 1)] = \
                (ks, ks, num_gfs[layer + 1], num_gfs[layer])
            self.gen_weight_dims["g_h%i_b" % (layer + 1)] = (num_gfs[layer +
                                                                     1], )
            s_h = conv_out_size(s_h, gen_strides[layer])
            s_w = conv_out_size(s_w, gen_strides[layer])
            ks = kernel_sizer(ks, gen_strides[layer])
            self.gen_kernel_sizes.append(ks)

        self.gen_weight_dims.update(
            OrderedDict([("g_h0_lin_W", (self.z_dim, num_gfs[0] * s_h * s_w)),
                         ("g_h0_lin_b", (num_gfs[0] * s_h * s_w, ))]))
        self.gen_output_dims["g_h0_out"] = (s_h, s_w)

        for k, v in self.gen_output_dims.items():
            print("%s: %s" % (k, v))
        print('****')
        for k, v in self.gen_weight_dims.items():
            print("%s: %s" % (k, v))
        print('****')
Example #7
0
    def construct_disc_from_hypers(self,
                                   disc_kernel_size=5,
                                   disc_strides=[2, 2, 2, 2],
                                   num_dfs=None):
        self.d_batch_norm = AttributeDict(
            [("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) \
             for dbn_i in range(len(disc_strides))])
        if num_dfs is None:
            num_dfs = [
                self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8
            ]

        assert len(disc_strides) == len(num_dfs), "invalid hypers!"

        self.disc_weight_dims = OrderedDict()
        s_h, s_w = self.x_dim[0], self.x_dim[1]
        num_dfs = [self.c_dim] + num_dfs
        ks = disc_kernel_size
        self.disc_kernel_sizes = [ks]
        for layer in range(len(disc_strides)):
            assert disc_strides[layer] <= 2, "invalid stride"
            assert ks % 2 == 1, "invalid kernel size"
            self.disc_weight_dims["d_h%i_W" % layer] = \
                (ks, ks, num_dfs[layer], num_dfs[layer + 1])
            self.disc_weight_dims["d_h%i_b" % layer] = (num_dfs[layer + 1], )
            s_h = conv_out_size(s_h, disc_strides[layer])
            s_w = conv_out_size(s_w, disc_strides[layer])
            ks = kernel_sizer(ks, disc_strides[layer])
            self.disc_kernel_sizes.append(ks)
        self.disc_weight_dims.update(
            OrderedDict([("d_h0_enc_lin_W", (self.z_dim, num_dfs[-1])),
                         ("d_h0_enc_lin_b", (num_dfs[-1])),
                         ("d_h1_enc_lin_W", (num_dfs[-1], num_dfs[-1])),
                         ("d_h1_enc_lin_b", (num_dfs[-1], )),
                         ("d_h2_enc_lin_W", (num_dfs[-1], num_dfs[-1])),
                         ("d_h2_enc_lin_b", (num_dfs[-1], )),
                         ("d_h_lin_W", (num_dfs[-1] * s_h * s_w, num_dfs[-1])),
                         ("d_h_lin_b", (num_dfs[-1], )),
                         ("d_h_out_lin_W", (num_dfs[-1], self.K)),
                         ("d_h_out_lin_b", (self.K, ))]))
        for k, v in self.disc_weight_dims.items():
            print("%s: %s" % (k, v))
        print("*****")
Example #8
0
    def construct_enc_from_hypers(self,
                                  enc_kernel_size=5,
                                  enc_strides=[2, 2, 2, 2],
                                  num_efs=None):

        self.e_batch_norm = AttributeDict(
            [("e_bn%i" % ebn_i, batch_norm(name="e_bn%i" % ebn_i)) \
             for ebn_i in range(len(enc_strides))])

        if num_efs is None:
            num_efs = [self.ef_dim * (2**i) for i in range(4)]

        assert len(enc_strides) == len(num_efs), "invalid hypers!"

        self.enc_weight_dims = OrderedDict()
        s_h, s_w = self.x_dim[0], self.x_dim[1]
        num_efs = [self.c_dim] + num_efs
        ks = enc_kernel_size
        self.enc_kernel_sizes = [ks]
        for layer in range(len(enc_strides)):
            assert enc_strides[layer] <= 2, "invalid strides"
            assert ks % 2 == 1, "invalid kernel size"
            self.enc_weight_dims["e_h%i_W" % layer] = \
                (ks, ks, num_efs[layer], num_efs[layer + 1])
            self.enc_weight_dims["e_h%i_b" % layer] = (num_efs[layer + 1], )
            s_h = conv_out_size(s_h, enc_strides[layer])
            s_w = conv_out_size(s_w, enc_strides[layer])
            ks = kernel_sizer(ks, enc_strides[layer])
            self.enc_kernel_sizes.append(ks)

        self.enc_weight_dims.update(
            OrderedDict([("e_h_end_lin_W", (num_efs[-1] * s_h * s_w,
                                            num_efs[-1])),
                         ("e_h_end_lin_b", (num_efs[-1], )),
                         ("e_h_out_lin_W", (num_efs[-1], self.z_dim)),
                         ("e_h_out_lin_b", (self.z_dim, ))]))

        for k, v in self.enc_weight_dims.items():
            print("%s: %s" % (k, v))
        print('****')
    def build_bgan_graph(self):

        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + self.x_dim,
                                     name='real_images')
        self.labeled_inputs = tf.placeholder(tf.float32,
                                             [self.batch_size] + self.x_dim,
                                             name='real_images_w_labels')
        self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K + 1],
                                     name='real_targets')
        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        self.drop_prob = tf.placeholder(tf.float32, name='keep_prob')
        #self.z_sum = histogram_summary("z", self.z) TODO looks cool

        self.gen_param_list = []

        #Creating a generator parameter list under generator scope
        with tf.variable_scope("generator") as scope:
            self.gen_params = AttributeDict(
            )  #A dictionary to dump the generator parameters
            for name, shape in self.weight_dims.items():
                if ('_W' in name):
                    self.gen_params[name] = tf.get_variable(
                        "%s" % (name),
                        shape,
                        initializer=tf.random_normal_initializer(
                            stddev=1. / tf.sqrt(shape[0] / 2.)))
                else:
                    self.gen_params[name] = tf.get_variable(
                        "%s" % (name),
                        shape,
                        initializer=tf.random_normal_initializer(stddev=0.02))

        self.D, self.D_logits = self.discriminator(self.inputs, self.K)

        self.d_loss_real = -tf.reduce_mean(tf.log(self.D + 1e-8))
        self.generation = defaultdict(list)
        self.generation["generators"].append(
            self.generator(self.z, self.gen_params, self.drop_prob))
        self.D_, D_logits_ = self.discriminator(self.generator(
            self.z, self.gen_params, self.drop_prob),
                                                self.K,
                                                reuse=True)
        self.generation["d_logits"].append(D_logits_)
        self.generation["d_probs"].append(self.D_)

        self.d_loss_fake = -tf.reduce_mean(
            tf.log(1 - self.generation["d_probs"][0] + 1e-8))
        #print(d_loss_fake)

        g_loss_ = -tf.reduce_mean(tf.log(self.generation["d_probs"][0] + 1e-8))

        t_vars = tf.trainable_variables()

        reg_loss_g = 0
        reg_loss_d = 0
        with tf.variable_scope("generator") as scope:
            for name, value in self.gen_params.items():
                if ('_W' in name):
                    reg_loss_g += tf.nn.l2_loss(value)

        self.d_vars = [var for var in t_vars if 'd_' in var.name]

        clip_d = [
            w.assign(tf.clip_by_value(w, -0.01, 0.01)) for w in self.d_vars
        ]
        self.clip_d = clip_d

        self.cnt = 0
        for var in t_vars:
            if ('_W' in var.name and 'd_' in var.name):
                reg_loss_d += tf.nn.l2_loss(var)
                self.cnt += 1
        self.d_loss = tf.reduce_mean(self.d_loss_real + self.d_loss_fake +
                                     self.alpha * reg_loss_d)

        self.g_vars = []
        self.g_vars.append([var for var in t_vars if 'g_' in var.name])

        self.g_loss = tf.reduce_mean(g_loss_ + self.alpha * reg_loss_g)

        self.d_learning_rate = tf.placeholder(tf.float32, shape=[])

        d_opt_adam = tf.train.AdamOptimizer(learning_rate=self.d_learning_rate)
        self.d_optim_adam = d_opt_adam.minimize(self.d_loss,
                                                var_list=self.d_vars)

        self.g_learning_rate = tf.placeholder(tf.float32, shape=[])

        g_opt_adam = tf.train.AdamOptimizer(learning_rate=self.g_learning_rate)
        self.g_optims_adam = g_opt_adam.minimize(self.g_loss,
                                                 var_list=self.g_vars)
class BGAN(object):
    def __init__(self,
                 x_dim,
                 z_dim,
                 dataset_size,
                 batch_size=64,
                 alpha=0.001,
                 lr=0.0002,
                 optimizer='adam'):
        self.batch_size = batch_size
        self.dataset_size = dataset_size
        self.x_dim = x_dim
        self.z_dim = z_dim
        self.optimizer = optimizer.lower()
        self.alpha = alpha
        self.lr = lr
        self.weight_dims = OrderedDict([("g_h0_lin_W", (self.z_dim, 100)),
                                        ("g_h0_lin_b", (100, )),
                                        ("g_h1_lin_W", (100, 100)),
                                        ("g_h1_lin_b", (100, )),
                                        ("g_lin_W", (100, self.x_dim[0])),
                                        ("g_lin_b", (self.x_dim[0]))])

        self.K = 1  # 1 means unsupervised, label == 0 always reserved for fake
        self.build_bgan_graph()

    def _get_optimizer(self, lr):
        if self.optimizer == 'adam':
            return tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5)
        elif self.optimizer == 'sgd':
            return tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.5)
        else:
            raise ValueError("Optimizer must be either 'adam' or 'sgd'")

    def build_bgan_graph(self):

        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + self.x_dim,
                                     name='real_images')
        self.labeled_inputs = tf.placeholder(tf.float32,
                                             [self.batch_size] + self.x_dim,
                                             name='real_images_w_labels')
        self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K + 1],
                                     name='real_targets')
        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        self.drop_prob = tf.placeholder(tf.float32, name='keep_prob')
        #self.z_sum = histogram_summary("z", self.z) TODO looks cool

        self.gen_param_list = []

        #Creating a generator parameter list under generator scope
        with tf.variable_scope("generator") as scope:
            self.gen_params = AttributeDict(
            )  #A dictionary to dump the generator parameters
            for name, shape in self.weight_dims.items():
                if ('_W' in name):
                    self.gen_params[name] = tf.get_variable(
                        "%s" % (name),
                        shape,
                        initializer=tf.random_normal_initializer(
                            stddev=1. / tf.sqrt(shape[0] / 2.)))
                else:
                    self.gen_params[name] = tf.get_variable(
                        "%s" % (name),
                        shape,
                        initializer=tf.random_normal_initializer(stddev=0.02))

        self.D, self.D_logits = self.discriminator(self.inputs, self.K)

        self.d_loss_real = -tf.reduce_mean(tf.log(self.D + 1e-8))
        self.generation = defaultdict(list)
        self.generation["generators"].append(
            self.generator(self.z, self.gen_params, self.drop_prob))
        self.D_, D_logits_ = self.discriminator(self.generator(
            self.z, self.gen_params, self.drop_prob),
                                                self.K,
                                                reuse=True)
        self.generation["d_logits"].append(D_logits_)
        self.generation["d_probs"].append(self.D_)

        self.d_loss_fake = -tf.reduce_mean(
            tf.log(1 - self.generation["d_probs"][0] + 1e-8))
        #print(d_loss_fake)

        g_loss_ = -tf.reduce_mean(tf.log(self.generation["d_probs"][0] + 1e-8))

        t_vars = tf.trainable_variables()

        reg_loss_g = 0
        reg_loss_d = 0
        with tf.variable_scope("generator") as scope:
            for name, value in self.gen_params.items():
                if ('_W' in name):
                    reg_loss_g += tf.nn.l2_loss(value)

        self.d_vars = [var for var in t_vars if 'd_' in var.name]

        clip_d = [
            w.assign(tf.clip_by_value(w, -0.01, 0.01)) for w in self.d_vars
        ]
        self.clip_d = clip_d

        self.cnt = 0
        for var in t_vars:
            if ('_W' in var.name and 'd_' in var.name):
                reg_loss_d += tf.nn.l2_loss(var)
                self.cnt += 1
        self.d_loss = tf.reduce_mean(self.d_loss_real + self.d_loss_fake +
                                     self.alpha * reg_loss_d)

        self.g_vars = []
        self.g_vars.append([var for var in t_vars if 'g_' in var.name])

        self.g_loss = tf.reduce_mean(g_loss_ + self.alpha * reg_loss_g)

        self.d_learning_rate = tf.placeholder(tf.float32, shape=[])

        d_opt_adam = tf.train.AdamOptimizer(learning_rate=self.d_learning_rate)
        self.d_optim_adam = d_opt_adam.minimize(self.d_loss,
                                                var_list=self.d_vars)

        self.g_learning_rate = tf.placeholder(tf.float32, shape=[])

        g_opt_adam = tf.train.AdamOptimizer(learning_rate=self.g_learning_rate)
        self.g_optims_adam = g_opt_adam.minimize(self.g_loss,
                                                 var_list=self.g_vars)

    def discriminator(self, x, K, reuse=False):
        with tf.variable_scope("discriminator") as scope:
            if reuse:
                scope.reuse_variables()
            h0 = lrelu(linear(x, 100, 'd_lin_0'))
            drop = dropout(h0,
                           dropout_rate=0.01,
                           name='dropout_layer',
                           training=False)
            h1 = linear(drop, K, 'd_lin_1')
            return tf.nn.sigmoid(h1), h1

    '''def generator3(self, z, gen_params):
        with tf.variable_scope("generator") as scope:
            h0 = lrelu(linear(z, 2000, 'g_h0_lin', matrix=gen_params.g_h0_lin_W, bias=gen_params.g_h0_lin_b))
            drop = dropout(h0, dropout_rate=0.95, name='dropout_layer', training=True)
            h2 = linear(drop, self.x_dim[0], 'g_lin', matrix=gen_params.g_lin_W, bias=gen_params.g_lin_b)
            self.x_ = tanh(h2)
            return self.x_'''

    def generator(self, z, gen_params, drop_prob):
        with tf.variable_scope("generator") as scope:
            self.h0 = lrelu(
                linear(z,
                       100,
                       'g_h0_lin',
                       matrix=gen_params.g_h0_lin_W,
                       bias=gen_params.g_h0_lin_b))
            self.drop = dropout(self.h0,
                                dropout_rate=drop_prob,
                                name='dropout_layer',
                                training=True)
            self.h1 = lrelu(
                linear(self.drop,
                       100,
                       'g_h1_lin',
                       matrix=gen_params.g_h1_lin_W,
                       bias=gen_params.g_h1_lin_b))
            self.drop = dropout(self.h1,
                                dropout_rate=drop_prob,
                                name='dropout_layer',
                                training=True)
            self.x = linear(self.drop,
                            self.x_dim[0],
                            'g_lin',
                            matrix=gen_params.g_lin_W,
                            bias=gen_params.g_lin_b)
            #self.x_ = tanh(h2)
            return self.x
Example #11
0
    def construct_from_hypers(self,
                              gen_kernel_size=3,
                              num_dfs=None,
                              num_gfs=None):

        self.num_disc_layers = 5
        self.num_gen_layers = 19 - 4

        self.d_batch_norm = AttributeDict([
            ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i))
            for dbn_i in range(self.num_disc_layers)
        ])
        self.g_batch_norm = AttributeDict([
            ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i))
            for gbn_i in range(self.num_gen_layers)
        ])

        if num_dfs is None:
            num_dfs = [
                self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8,
                self.df_dim
            ]

        if num_gfs is None:

            num_gfs = [
                self.gf_dim,
                self.gf_dim,
                self.gf_dim * 2,
                self.gf_dim * 2,
                self.gf_dim * 4,
                self.gf_dim * 4,
                self.gf_dim * 8,
                self.gf_dim * 8,  # middle layer
                # self.gf_dim * 16, self.gf_dim * 16,
                # self.gf_dim * 8, self.gf_dim * 8,
                self.gf_dim * 4,
                self.gf_dim * 4,
                self.gf_dim * 2,
                self.gf_dim * 2,
                self.gf_dim,
                self.gf_dim,
                self.num_classes,
                self.num_classes
            ]  ## output logits

        s_h, s_w, s_d = self.x_dim[0], self.x_dim[1], self.x_dim[2]
        s_h2, s_w2, s_d2 = conv_out_size(s_h, 2), conv_out_size(
            s_w, 2), conv_out_size(s_d, 2)
        s_h4, s_w4, s_d4 = conv_out_size(s_h2, 2), conv_out_size(
            s_w2, 2), conv_out_size(s_d2, 2)
        s_h8, s_w8, s_d8 = conv_out_size(s_h4, 2), conv_out_size(
            s_w4, 2), conv_out_size(s_d4, 2)
        s_h16, s_w16, s_d16 = conv_out_size(s_h8, 2), conv_out_size(
            s_w8, 2), conv_out_size(s_d8, 2)

        ks = gen_kernel_size
        #        self.gen_output_dims = OrderedDict()
        self.gen_weight_dims = OrderedDict()

        num_gfs = num_gfs + [self.channel]
        self.gen_kernel_sizes = [ks]

        ################ build unet_generator from the one-by-one ####################

        self.gen_weight_dims["g_h%i_W" % 0] = (3, 3, 3, self.channel,
                                               num_gfs[0])  # from the image
        self.gen_weight_dims["g_h%i_b" % 0] = (num_gfs[0], )
        self.gen_weight_dims["g_h%i_W" % 1] = (3, 3, 3, num_gfs[1], num_gfs[1]
                                               )  # conv1
        self.gen_weight_dims["g_h%i_b" % 1] = (num_gfs[1], )

        self.gen_weight_dims["g_h%i_W" % 2] = (3, 3, 3, num_gfs[1], num_gfs[2])
        self.gen_weight_dims["g_h%i_b" % 2] = (num_gfs[2], )
        self.gen_weight_dims["g_h%i_W" % 3] = (3, 3, 3, num_gfs[3], num_gfs[3]
                                               )  # conv2
        self.gen_weight_dims["g_h%i_b" % 3] = (num_gfs[3], )

        self.gen_weight_dims["g_h%i_W" % 4] = (3, 3, 3, num_gfs[3], num_gfs[4])
        self.gen_weight_dims["g_h%i_b" % 4] = (num_gfs[4], )
        self.gen_weight_dims["g_h%i_W" % 5] = (3, 3, 3, num_gfs[5], num_gfs[5]
                                               )  # conv3
        self.gen_weight_dims["g_h%i_b" % 5] = (num_gfs[5], )

        #############################################################################################

        self.gen_weight_dims["g_h%i_W" % 6] = (3, 3, 3, num_gfs[5], num_gfs[6])
        self.gen_weight_dims["g_h%i_b" % 6] = (num_gfs[6], )
        self.gen_weight_dims["g_h%i_W" % 7] = (3, 3, 3, num_gfs[7], num_gfs[7]
                                               )  # conv4
        self.gen_weight_dims["g_h%i_b" % 7] = (num_gfs[7], )

        ##############################################################################################

        self.gen_weight_dims["g_h%i_W" % 8] = (
            3, 3, 3, num_gfs[5] + num_gfs[7], num_gfs[8])  # conv6 concat conv4
        self.gen_weight_dims["g_h%i_b" % 8] = (num_gfs[8], )
        self.gen_weight_dims["g_h%i_W" % 9] = (3, 3, 3, num_gfs[9], num_gfs[9])
        self.gen_weight_dims["g_h%i_b" % 9] = (num_gfs[9], )

        self.gen_weight_dims["g_h%i_W" %
                             10] = (3, 3, 3, num_gfs[9] + num_gfs[3],
                                    num_gfs[10])  # conv7 concat conv3
        self.gen_weight_dims["g_h%i_b" % 10] = (num_gfs[10], )
        self.gen_weight_dims["g_h%i_W" % 11] = (3, 3, 3, num_gfs[11],
                                                num_gfs[11])
        self.gen_weight_dims["g_h%i_b" % 11] = (num_gfs[11], )

        self.gen_weight_dims["g_h%i_W" %
                             12] = (3, 3, 3, num_gfs[11] + num_gfs[1],
                                    num_gfs[12])  # conv8 concat conv2
        self.gen_weight_dims["g_h%i_b" % 12] = (num_gfs[12], )
        self.gen_weight_dims["g_h%i_W" % 13] = (3, 3, 3, num_gfs[13],
                                                num_gfs[13])
        self.gen_weight_dims["g_h%i_b" % 13] = (num_gfs[13], )

        ################### output layer #########################

        self.gen_weight_dims["g_h%i_W" % 14] = (1, 1, 1, num_gfs[13],
                                                num_gfs[14])
        self.gen_weight_dims["g_h%i_b" % 14] = (num_gfs[14], )

        #########################################################################################################

        self.disc_weight_dims = OrderedDict()
        self.disc_weight_dims["d_h%i_W" % 0] = (
            5, 5, 5, self.num_classes + self.channel, num_dfs[0]
        )  # output = ( s_h / 2, s_w / 2 )
        self.disc_weight_dims["d_h%i_b" % 0] = (num_dfs[0], )
        self.disc_weight_dims["d_h%i_W" % 1] = (
            5, 5, 5, num_dfs[0], num_dfs[1])  # output = ( s_h / 4, s_w / 4 )
        self.disc_weight_dims["d_h%i_b" % 1] = (num_dfs[1], )
        self.disc_weight_dims["d_h%i_W" % 2] = (
            5, 5, 5, num_dfs[1], num_dfs[2])  # output = ( s_h / 8, s_w / 8 )
        self.disc_weight_dims["d_h%i_b" % 2] = (num_dfs[2], )
        self.disc_weight_dims["d_h%i_W" % 3] = (
            5, 5, 5, num_dfs[2], num_dfs[3]
        )  # output = ( s_h / 16, s_w / 16 )   # pre: 1, 1,
        self.disc_weight_dims["d_h%i_b" % 3] = (num_dfs[3], )
        # self.disc_weight_dims["d_h%i_W" % 3] = (1, 1, 1, num_dfs[2], 1)  # output = ( s_h / 16, s_w / 16 )   # pre: 1, 1,
        # self.disc_weight_dims["d_h%i_b" % 3] = (1,)
        self.disc_weight_dims["d_h%i_W" % 4] = (1, 1, 1, num_dfs[3], 1)
        self.disc_weight_dims["d_h%i_b" % 4] = (1, )

        # self.disc_weight_dims.update(OrderedDict([("d_h_out_lin_W", (num_dfs[3] * s_h8 * s_w8, 1)),
        #                                           ("d_h_out_lin_b", (1,))]))

        #####################################################################################################################

        self.distrib_weight_dims = OrderedDict()
        for zi in range(self.num_gen):
            self.distrib_weight_dims["dist_%i_mu" % zi] = (
                1, self.x_dim[0], self.x_dim[1], self.x_dim[2], 1
            )  # self.num_classes
            self.distrib_weight_dims["dist_%i_var" % zi] = (1, self.x_dim[0],
                                                            self.x_dim[1],
                                                            self.x_dim[2], 1)
Example #12
0
    def build_bgan_graph(self):

        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + self.x_dim,
                                     name='real_images')

        self.labeled_inputs = tf.placeholder(tf.float32,
                                             [self.batch_size] + self.x_dim,
                                             name='real_images_w_labels')

        self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K + 1],
                                     name='real_targets')

        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        #self.z_sum = histogram_summary("z", self.z) TODO looks cool

        self.gen_param_list = []
        with tf.variable_scope("generator") as scope:
            for gi in range(self.num_gen):
                for m in range(self.num_mcmc):
                    gen_params = AttributeDict()
                    for name, shape in self.weight_dims.items():
                        gen_params[name] = tf.get_variable(
                            "%s_%04d_%04d" % (name, gi, m),
                            shape,
                            initializer=tf.random_normal_initializer(
                                stddev=0.02))
                    self.gen_param_list.append(gen_params)

        self.D, self.D_logits = self.discriminator(self.inputs, self.K + 1)
        self.Dsup, self.Dsup_logits = self.discriminator(self.labeled_inputs,
                                                         self.K + 1,
                                                         reuse=True)

        if self.K == 1:
            if self.wasserstein:
                self.d_loss_real = tf.reduce_mean(self.D_logits)
            else:
                # regular GAN
                constant_labels = np.zeros((self.batch_size, 2))
                constant_labels[:, 1] = 1.0
                self.d_loss_real = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=self.D_logits,
                        labels=tf.constant(constant_labels)))
        else:
            self.d_loss_sup = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.Dsup_logits, labels=self.labels))
            self.d_loss_real = -tf.reduce_mean(
                tf.log((1.0 - self.D[:, 0]) + 1e-8))

        self.generation = defaultdict(list)
        for gen_params in self.gen_param_list:
            self.generation["g_prior"].append(self.gen_prior(gen_params))
            self.generation["g_noise"].append(self.gen_noise(gen_params))
            self.generation["generators"].append(
                self.generator(self.z, gen_params))
            self.generation["gen_samplers"].append(
                self.sampler(self.z, gen_params))
            D_, D_logits_ = self.discriminator(self.generator(
                self.z, gen_params),
                                               self.K + 1,
                                               reuse=True)
            self.generation["d_logits"].append(D_logits_)
            self.generation["d_probs"].append(D_)

        all_d_logits = tf.concat(self.generation["d_logits"], 0)
        if self.wasserstein:
            self.d_loss_fake = -tf.reduce_mean(all_d_logits)
        else:
            constant_labels = np.zeros(
                (self.batch_size * self.num_gen * self.num_mcmc, self.K + 1))
            constant_labels[:,
                            0] = 1.0  # class label indicating it came from generator, aka fake
            self.d_loss_fake = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    logits=all_d_logits, labels=tf.constant(constant_labels)))

        t_vars = tf.trainable_variables()
        self.d_vars = [var for var in t_vars if 'd_' in var.name]

        self.d_loss = self.d_loss_real + self.d_loss_fake
        if not self.ml:
            self.d_loss += self.disc_prior() + self.disc_noise()
        if self.K > 1:
            self.d_loss_semi = self.d_loss_sup + self.d_loss_real + self.d_loss_fake
            if not self.ml:
                self.d_loss_semi += self.disc_prior() + self.disc_noise()

        self.g_vars = []
        for gi in range(self.num_gen):
            for m in range(self.num_mcmc):
                self.g_vars.append([
                    var for var in t_vars
                    if 'g_' in var.name and "_%04d_%04d" % (gi, m) in var.name
                ])

        self.d_learning_rate = tf.placeholder(tf.float32, shape=[])
        d_opt = self._get_optimizer(self.d_learning_rate)
        self.d_optim = d_opt.minimize(self.d_loss, var_list=self.d_vars)

        d_opt_adam = tf.train.AdamOptimizer(learning_rate=self.d_learning_rate,
                                            beta1=0.5)
        self.d_optim_adam = d_opt_adam.minimize(self.d_loss,
                                                var_list=self.d_vars)

        clip_d = [
            w.assign(tf.clip_by_value(w, -0.01, 0.01)) for w in self.d_vars
        ]
        self.clip_d = clip_d

        if self.K > 1:
            self.d_semi_learning_rate = tf.placeholder(tf.float32, shape=[])
            d_opt_semi = self._get_optimizer(self.d_semi_learning_rate)
            self.d_optim_semi = d_opt_semi.minimize(self.d_loss_semi,
                                                    var_list=self.d_vars)
            d_opt_semi_adam = tf.train.AdamOptimizer(
                learning_rate=self.d_semi_learning_rate, beta1=0.5)
            self.d_optim_semi_adam = d_opt_semi_adam.minimize(
                self.d_loss_semi, var_list=self.d_vars)

        self.g_optims, self.g_optims_adam = [], []
        self.g_learning_rate = tf.placeholder(tf.float32, shape=[])
        for gi in range(self.num_gen * self.num_mcmc):
            if self.wasserstein:
                g_loss = tf.reduce_mean(self.generation["d_logits"][gi])
            else:
                g_loss = -tf.reduce_mean(
                    tf.log((1.0 - self.generation["d_probs"][gi][:, 0]) +
                           1e-8))
            if not self.ml:
                g_loss += self.generation["g_prior"][gi] + self.generation[
                    "g_noise"][gi]
            self.generation["g_losses"].append(g_loss)
            g_opt = self._get_optimizer(self.g_learning_rate)
            self.g_optims.append(
                g_opt.minimize(g_loss, var_list=self.g_vars[gi]))
            g_opt_adam = tf.train.AdamOptimizer(
                learning_rate=self.g_learning_rate, beta1=0.5)
            self.g_optims_adam.append(
                g_opt_adam.minimize(g_loss, var_list=self.g_vars[gi]))
Example #13
0
    def __init__(self,
                 x_dim,
                 z_dim,
                 dataset_size,
                 batch_size=64,
                 gf_dim=64,
                 df_dim=64,
                 prior_std=1.0,
                 J=1,
                 M=1,
                 num_classes=1,
                 eta=2e-4,
                 alpha=0.01,
                 lr=0.0002,
                 optimizer='adam',
                 wasserstein=False,
                 ml=False,
                 gen_observed=1000):

        assert len(x_dim) == 3, "invalid image dims"

        c_dim = x_dim[2]
        self.is_grayscale = (c_dim == 1)
        self.optimizer = optimizer.lower()
        self.dataset_size = dataset_size
        self.batch_size = batch_size
        self.gen_observed = gen_observed

        self.x_dim = x_dim
        self.z_dim = z_dim

        self.gf_dim = gf_dim
        self.df_dim = df_dim
        self.c_dim = c_dim
        self.lr = lr

        self.d = None
        self.d1 = None

        # Parallel Tempering
        self.invT = 1
        self.TGap = 1
        self.LRGap = 1
        self.EGap = 1
        self.anneal = 1
        self.lr_anneal = 1

        self.wasserstein = wasserstein

        self.d_bn1 = batch_norm(name='d_bn1')
        self.d_bn2 = batch_norm(name='d_bn2')
        self.d_bn3 = batch_norm(name='d_bn3')

        self.d1_bn1 = batch_norm(name='d1_bn1')
        self.d1_bn2 = batch_norm(name='d1_bn2')
        self.d1_bn3 = batch_norm(name='d1_bn3')

        self.sd_bn1 = batch_norm(name='sd_bn1')
        self.sd_bn2 = batch_norm(name='sd_bn2')
        self.sd_bn3 = batch_norm(name='sd_bn3')

        self.g_bn0 = batch_norm(name='g_bn0')
        self.g_bn1 = batch_norm(name='g_bn1')
        self.g_bn2 = batch_norm(name='g_bn2')
        self.g_bn3 = batch_norm(name='g_bn3')

        self.wasserstein = wasserstein
        self.chain0_params = []
        self.chain1_params = []

        # Bayes
        self.prior_std = prior_std
        self.num_gen = J
        self.num_mcmc = M
        self.eta = eta
        self.alpha = alpha
        # ML
        self.ml = ml
        if self.ml:
            assert self.num_gen == 1, "cannot have >1 generator for ml"

        self.output_height = x_dim[0]
        self.output_width = x_dim[1]

        s_h, s_w = self.output_height, self.output_width
        s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)
        s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)
        s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)
        s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)

        self.gen_params = AttributeDict()
        self.bgen_params = AttributeDict()
        self.weight_dims = OrderedDict([
            ("g_h0_lin_W", (self.z_dim, self.gf_dim * 8 * s_h16 * s_w16)),
            ("g_h0_lin_b", (self.gf_dim * 8 * s_h16 * s_w16, )),
            ("g_h1_W", (5, 5, self.gf_dim * 4, self.gf_dim * 8)),
            ("g_h1_b", (self.gf_dim * 4, )),
            ("g_h2_W", (5, 5, self.gf_dim * 2, self.gf_dim * 4)),
            ("g_h2_b", (self.gf_dim * 2, )),
            ("g_h3_W", (5, 5, self.gf_dim * 1, self.gf_dim * 2)),
            ("g_h3_b", (self.gf_dim * 1, )),
            ("g_h4_W", (5, 5, self.c_dim, self.gf_dim * 1)),
            ("g_h4_b", (self.c_dim, ))
        ])

        self.sghmc_noise = {}

        self.noise_std = np.sqrt(2 * self.alpha * self.eta)

        for name, dim in self.weight_dims.iteritems():
            self.sghmc_noise[name] = tf.distributions.Normal(
                0., self.noise_std * tf.ones(self.weight_dims[name]))

        self.K = num_classes  # 1 means unsupervised, label == 0 always reserved for fake

        self.build_bgan_graph()

        if self.K > 1:
            print("self.K")
            print(self.K)
            self.build_test_graph()
Example #14
0
    def build_bgan_graph(self):

        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + self.x_dim,
                                     name='real_images')

        self.labeled_inputs = tf.placeholder(tf.float32,
                                             [self.batch_size] + self.x_dim,
                                             name='real_images_w_labels')

        self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K + 1],
                                     name='real_targets')

        self.z = tf.placeholder(tf.float32, [None, self.z_dim], name='z')
        #self.z_sum = histogram_summary("z", self.z) TODO looks cool

        self.gen_param_list = []
        with tf.variable_scope("generator") as scope:
            for gi in xrange(self.num_gen):
                for m in xrange(self.num_mcmc):
                    gen_params = AttributeDict()
                    for name, shape in self.weight_dims.iteritems():
                        gen_params[name] = tf.get_variable(
                            "%s_%04d_%04d" % (name, gi, m),
                            shape,
                            initializer=tf.random_normal_initializer(
                                stddev=0.02))
                    self.gen_param_list.append(gen_params)

        # Liyao: 11/Nov/2019, adding another markov chain (discriminator)
        # Parameters of chain0, the first chain. Note here this part of chain0 is the same as code in original version
        # of Bayesian GANs
        #
        ################################## Some Important Parameters of Chain0 #################################################
        # self.D: discriminator
        # self.D_logits: final logisitc layer of discriminator0
        # self.Dsup: discriminator0 for supervised learning
        # self.Dsup_logits: final logisitc layer of discriminator0 for supervised learning
        # self.d_loss_sup: supervised loss of discriminator0
        # self.d_loss_real: loss of discriminator0 on real images
        # self.d_loss_fake: loss of discriminator0 on fake images
        # self.d_vars: all the variables we could get stored in discriminator0
        # self.d_loss_semi: semi-supervised loss for discriminator0
        # self.d_semi_learning_rate: semi-supervised learning rate for discriminator0
        ################################## End of Some Important Parameters of Chain0 ###########################################
        #
        ################################## Some Important Parameters of Chain1 ##################################################
        # self.D1: discriminator1
        # self.D1_logits: final logisitc layer of discriminator1
        # self.Dsup1: discriminator1 for supervised learning
        # self.D1sup_logits: final logisitc layer of discriminator1 for supervised learning
        # self.d1_loss_sup: supervised loss of discriminator1
        # self.d1_loss_real: loss of discriminator1 on real images
        # self.d1_loss_fake: loss of discriminator1 on fake images
        # self.d1_vars: all the variables we could get stored in discriminator1
        # self.d1_loss_semi: semi-supervised loss for discriminator1
        # self.d1_semi_learning_rate: semi-supervised learning rate for discriminator1
        ################################## End of Some Important Parameters of Chain1 ###########################################

        self.D, self.D_logits = self.discriminator(self.inputs, self.K + 1)
        self.D1, self.D1_logits = self.discriminator1(self.inputs, self.K + 1)
        self.Dsup, self.Dsup_logits = self.discriminator(self.labeled_inputs,
                                                         self.K + 1,
                                                         reuse=True)
        self.Dsup1, self.Dsup1_logits = self.discriminator1(
            self.labeled_inputs, self.K + 1, reuse=True)

        self.d_loss_sup = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.Dsup_logits,
                                                       labels=self.labels))
        self.d1_loss_sup = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=self.Dsup1_logits, labels=self.labels))
        self.d_loss_real = -tf.reduce_mean(tf.log((1.0 - self.D[:, 0]) + 1e-8))
        self.d1_loss_real = -tf.reduce_mean(
            tf.log((1.0 - self.D1[:, 0]) + 1e-8))

        self.generation = defaultdict(list)
        for gen_params in self.gen_param_list:
            self.generation["g_prior"].append(self.gen_prior(gen_params))
            self.generation["g_noise"].append(self.gen_noise(gen_params))
            self.generation["generators"].append(
                self.generator(self.z, gen_params))
            self.generation["gen_samplers"].append(
                self.sampler(self.z, gen_params))

            D_, D_logits_ = self.discriminator(self.generator(
                self.z, gen_params),
                                               self.K + 1,
                                               reuse=True)
            D_1, D_logits_1 = self.discriminator1(self.generator(
                self.z, gen_params),
                                                  self.K + 1,
                                                  reuse=True)

            self.generation["d_logits"].append(D_logits_)
            self.generation["d_probs"].append(D_)
            self.generation["d1_logits"].append(D_logits_1)
            self.generation["d1_probs"].append(D_1)

        all_d_logits = tf.concat(self.generation["d_logits"], 0)
        all_d1_logits = tf.concat(self.generation["d1_logits"], 0)

        constant_labels = np.zeros(
            (self.batch_size * self.num_gen * self.num_mcmc, self.K + 1))
        constant_labels[:,
                        0] = 1.0  # class label indicating it came from generator, aka fake
        self.d_loss_fake = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=all_d_logits, labels=tf.constant(constant_labels)))
        self.d1_loss_fake = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=all_d1_logits, labels=tf.constant(constant_labels)))

        t_vars = tf.trainable_variables()
        self.d_vars = [var for var in t_vars if 'd_' in var.name]
        self.d1_vars = [var1 for var1 in t_vars if 'd1_' in var1.name]

        self.d_loss_semi = self.d_loss_sup + self.d_loss_real + self.d_loss_fake
        self.d1_loss_semi = self.d1_loss_sup + self.d1_loss_real + self.d1_loss_fake

        self.d_loss_semi += self.disc_prior() + self.disc_noise()

        # inverse temperature for chain member
        self.d1_loss_semi += self.disc1_prior() + self.disc1_noise(self.TGap)

        self.g_vars = []
        for gi in xrange(self.num_gen):
            for m in xrange(self.num_mcmc):
                self.g_vars.append([
                    var for var in t_vars
                    if 'g_' in var.name and "_%04d_%04d" % (gi, m) in var.name
                ])

        self.d_semi_learning_rate = tf.placeholder(tf.float32, shape=[])
        d_opt_semi = self._get_optimizer(self.d_semi_learning_rate)
        self.d_optim_semi = d_opt_semi.minimize(self.d_loss_semi,
                                                var_list=self.d_vars)
        d_opt_semi_adam = tf.train.AdamOptimizer(
            learning_rate=self.d_semi_learning_rate, beta1=0.5)
        self.d_optim_semi_adam = d_opt_semi_adam.minimize(self.d_loss_semi,
                                                          var_list=self.d_vars)

        self.d1_semi_learning_rate = tf.placeholder(tf.float32, shape=[])
        d1_opt_semi = self._get_optimizer(self.d1_semi_learning_rate)
        self.d1_optim_semi = d1_opt_semi.minimize(self.d1_loss_semi,
                                                  var_list=self.d1_vars)
        d1_opt_semi_adam = tf.train.AdamOptimizer(
            learning_rate=self.d1_semi_learning_rate, beta1=0.5)
        self.d1_optim_semi_adam = d1_opt_semi_adam.minimize(
            self.d1_loss_semi, var_list=self.d1_vars)

        self.g_optims, self.g_optims_adam, self.g1_optims, self.g1_optims_adam = [], [], [], []
        self.g_learning_rate = tf.placeholder(tf.float32, shape=[])
        for gi in xrange(self.num_gen * self.num_mcmc):
            g_loss = -tf.reduce_mean(
                tf.log((1.0 - self.generation["d_probs"][gi][:, 0]) + 1e-8))
            g1_loss = -tf.reduce_mean(
                tf.log((1.0 - self.generation["d1_probs"][gi][:, 0]) + 1e-8))

            g_loss += self.generation["g_prior"][gi] + self.generation[
                "g_noise"][gi]
            g1_loss += self.generation["g_prior"][gi] + self.generation[
                "g_noise"][gi]

            self.generation["g_losses"].append(g_loss)
            self.generation["g1_losses"].append(g1_loss)
            g_opt = self._get_optimizer(self.g_learning_rate)
            self.g_optims.append(
                g_opt.minimize(g_loss, var_list=self.g_vars[gi]))
            g_opt_adam = tf.train.AdamOptimizer(
                learning_rate=self.g_learning_rate, beta1=0.5)
            self.g_optims_adam.append(
                g_opt_adam.minimize(g_loss, var_list=self.g_vars[gi]))

            g1_opt = self._get_optimizer(self.g_learning_rate)
            self.g1_optims.append(
                g1_opt.minimize(g1_loss, var_list=self.g_vars[gi]))
            g1_opt_adam = tf.train.AdamOptimizer(
                learning_rate=self.g_learning_rate, beta1=0.5)
            self.g1_optims_adam.append(
                g1_opt_adam.minimize(g1_loss, var_list=self.g_vars[gi]))
Example #15
0
    def construct_from_hypers(self,
                              gen_kernel_size=3,
                              num_dfs=None,
                              num_gfs=None):

        self.num_disc_layers = 5
        self.num_gen_layers = 19

        self.d_batch_norm = AttributeDict([
            ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i))
            for dbn_i in range(self.num_disc_layers)
        ])
        self.sup_g_batch_norm = AttributeDict([
            ("sup_g_bn%i" % gbn_i, batch_norm(name='sup_g_bn%i' % gbn_i))
            for gbn_i in range(self.num_gen_layers)
        ])
        self.g_batch_norm = AttributeDict([
            ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i))
            for gbn_i in range(self.num_gen_layers)
        ])

        if num_dfs is None:
            num_dfs = [
                self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8,
                self.df_dim
            ]

        if num_gfs is None:
            num_gfs = [
                self.gf_dim, self.gf_dim, self.gf_dim * 2, self.gf_dim * 2,
                self.gf_dim * 4, self.gf_dim * 4, self.gf_dim * 8,
                self.gf_dim * 8, self.gf_dim * 16, self.gf_dim * 16,
                self.gf_dim * 8, self.gf_dim * 8, self.gf_dim * 4,
                self.gf_dim * 4, self.gf_dim * 2, self.gf_dim * 2, self.gf_dim,
                self.gf_dim, 1, 1
            ]  ## output logits

        s_h, s_w, s_d = self.x_dim[0], self.x_dim[1], self.x_dim[2]
        s_h2, s_w2, s_d2 = conv_out_size(s_h, 2), conv_out_size(
            s_w, 2), conv_out_size(s_d, 2)
        s_h4, s_w4, s_d4 = conv_out_size(s_h2, 2), conv_out_size(
            s_w2, 2), conv_out_size(s_d2, 2)
        s_h8, s_w8, s_d8 = conv_out_size(s_h4, 2), conv_out_size(
            s_w4, 2), conv_out_size(s_d4, 2)
        s_h16, s_w16, s_d16 = conv_out_size(s_h8, 2), conv_out_size(
            s_w8, 2), conv_out_size(s_d8, 2)

        ks = gen_kernel_size
        #        self.gen_output_dims = OrderedDict()
        self.gen_weight_dims = OrderedDict()

        num_gfs = num_gfs + [0]  # ?? channel = 1
        self.gen_kernel_sizes = [ks]

        #### build unet_generator from the one-by-one

        self.gen_weight_dims["g_h%i_W" % 0] = (3, 3, 3, 1, num_gfs[0]
                                               )  # from the image
        self.gen_weight_dims["g_h%i_b" % 0] = (num_gfs[0], )
        self.gen_weight_dims["g_h%i_W" % 1] = (3, 3, 3, num_gfs[1], num_gfs[1]
                                               )  # conv1
        self.gen_weight_dims["g_h%i_b" % 1] = (num_gfs[1], )

        self.gen_weight_dims["g_h%i_W" % 2] = (3, 3, 3, num_gfs[1], num_gfs[2])
        self.gen_weight_dims["g_h%i_b" % 2] = (num_gfs[2], )
        self.gen_weight_dims["g_h%i_W" % 3] = (3, 3, 3, num_gfs[3], num_gfs[3]
                                               )  # conv2
        self.gen_weight_dims["g_h%i_b" % 3] = (num_gfs[3], )

        self.gen_weight_dims["g_h%i_W" % 4] = (3, 3, 3, num_gfs[3], num_gfs[4])
        self.gen_weight_dims["g_h%i_b" % 4] = (num_gfs[4], )
        self.gen_weight_dims["g_h%i_W" % 5] = (3, 3, 3, num_gfs[5], num_gfs[5]
                                               )  # conv3
        self.gen_weight_dims["g_h%i_b" % 5] = (num_gfs[5], )

        self.gen_weight_dims["g_h%i_W" % 6] = (3, 3, 3, num_gfs[5], num_gfs[6])
        self.gen_weight_dims["g_h%i_b" % 6] = (num_gfs[6], )
        self.gen_weight_dims["g_h%i_W" % 7] = (3, 3, 3, num_gfs[7], num_gfs[7]
                                               )  # conv4
        self.gen_weight_dims["g_h%i_b" % 7] = (num_gfs[7], )

        self.gen_weight_dims["g_h%i_W" % 8] = (3, 3, 3, num_gfs[7], num_gfs[8])
        self.gen_weight_dims["g_h%i_b" % 8] = (num_gfs[8], )
        self.gen_weight_dims["g_h%i_W" % 9] = (3, 3, 3, num_gfs[9], num_gfs[9]
                                               )  # conv5
        self.gen_weight_dims["g_h%i_b" % 9] = (num_gfs[9], )

        self.gen_weight_dims["g_h%i_W" %
                             10] = (3, 3, 3, num_gfs[9] + num_gfs[7],
                                    num_gfs[10])  # conv6 concat conv4
        self.gen_weight_dims["g_h%i_b" % 10] = (num_gfs[10], )
        self.gen_weight_dims["g_h%i_W" % 11] = (3, 3, 3, num_gfs[11],
                                                num_gfs[11])
        self.gen_weight_dims["g_h%i_b" % 11] = (num_gfs[11], )

        self.gen_weight_dims["g_h%i_W" %
                             12] = (3, 3, 3, num_gfs[11] + num_gfs[5],
                                    num_gfs[12])  # conv7 concat conv3
        self.gen_weight_dims["g_h%i_b" % 12] = (num_gfs[12], )
        self.gen_weight_dims["g_h%i_W" % 13] = (3, 3, 3, num_gfs[13],
                                                num_gfs[13])
        self.gen_weight_dims["g_h%i_b" % 13] = (num_gfs[13], )

        self.gen_weight_dims["g_h%i_W" %
                             14] = (3, 3, 3, num_gfs[13] + num_gfs[3],
                                    num_gfs[14])  # conv8 concat conv2
        self.gen_weight_dims["g_h%i_b" % 14] = (num_gfs[14], )
        self.gen_weight_dims["g_h%i_W" % 15] = (3, 3, 3, num_gfs[15],
                                                num_gfs[15])
        self.gen_weight_dims["g_h%i_b" % 15] = (num_gfs[15], )

        self.gen_weight_dims["g_h%i_W" %
                             16] = (3, 3, 3, num_gfs[15] + num_gfs[1],
                                    num_gfs[16])  # conv9 concat conv1
        self.gen_weight_dims["g_h%i_b" % 16] = (num_gfs[16], )
        self.gen_weight_dims["g_h%i_W" % 17] = (3, 3, 3, num_gfs[17],
                                                num_gfs[17])
        self.gen_weight_dims["g_h%i_b" % 17] = (num_gfs[17], )

        ################### output layer #########################

        self.gen_weight_dims["g_h%i_W" % 18] = (1, 1, 1, num_gfs[17],
                                                num_gfs[18])
        self.gen_weight_dims["g_h%i_b" % 18] = (num_gfs[18], )

        #########################################################################################################

        self.disc_weight_dims = OrderedDict()
        self.disc_weight_dims["d_h%i_W" % 0] = (
            5, 5, 5, 1, num_dfs[0])  # output = ( s_h / 2, s_w / 2 )
        self.disc_weight_dims["d_h%i_b" % 0] = (num_dfs[0], )
        self.disc_weight_dims["d_h%i_W" % 1] = (
            5, 5, 5, num_dfs[0], num_dfs[1])  # output = ( s_h / 4, s_w / 4 )
        self.disc_weight_dims["d_h%i_b" % 1] = (num_dfs[1], )
        self.disc_weight_dims["d_h%i_W" % 2] = (
            5, 5, 5, num_dfs[1], num_dfs[2])  # output = ( s_h / 8, s_w / 8 )
        self.disc_weight_dims["d_h%i_b" % 2] = (num_dfs[2], )
        self.disc_weight_dims["d_h%i_W" % 3] = (
            5, 5, 5, num_dfs[2], num_dfs[3]
        )  # output = ( s_h / 16, s_w / 16 )   # pre: 1, 1,
        self.disc_weight_dims["d_h%i_b" % 3] = (num_dfs[3], )
        self.disc_weight_dims["d_h%i_W" % 4] = (1, 1, 1, num_dfs[3], 1)
        self.disc_weight_dims["d_h%i_b" % 4] = (1, )

        #####################################################################################################################

        self.distrib_weight_dims = OrderedDict()
        for zi in range(self.num_gen):
            self.distrib_weight_dims["dist_%i_mu" %
                                     zi] = (1, self.x_dim[0], self.x_dim[1],
                                            self.x_dim[2])  # self.num_classes
            self.distrib_weight_dims["dist_%i_var" % zi] = (1, self.x_dim[0],
                                                            self.x_dim[1],
                                                            self.x_dim[2])

#####################################################################################################################

        for k, v in self.gen_weight_dims.items(
        ):  # k is the name, v is the dim
            print("gen_weight_dims - %s: %s" % (k, v))
        print('****')
        for k, v in self.disc_weight_dims.items():
            print("dics_weight_dims - %s: %s" % (k, v))
        print('****')
        for k, v in self.distrib_weight_dims.items():
            print("dics_weight_dims - %s: %s" % (k, v))
class BDCGAN_Semi(object):
    def __init__(self,
                 x_dim,
                 z_dim,
                 dataset_size,
                 batch_size=64,
                 gf_dim=64,
                 df_dim=64,
                 prior_std=1.0,
                 J=1,
                 M=1,
                 num_classes=1,
                 eta=2e-4,
                 num_layers=4,
                 alpha=0.01,
                 lr=0.0002,
                 optimizer='adam',
                 wasserstein=False,
                 ml=False,
                 J_d=None):

        assert len(x_dim) == 3, "invalid image dims"
        c_dim = x_dim[2]
        self.is_grayscale = (c_dim == 1)
        self.optimizer = optimizer.lower()
        self.dataset_size = dataset_size
        self.batch_size = batch_size

        self.K = num_classes
        self.x_dim = x_dim
        self.z_dim = z_dim

        self.gf_dim = gf_dim
        self.df_dim = df_dim
        self.c_dim = c_dim
        self.lr = lr

        # Bayes
        self.prior_std = prior_std
        self.num_gen = J
        self.num_disc = J_d if J_d is not None else 1
        self.num_mcmc = M
        self.eta = eta
        self.alpha = alpha
        # ML
        self.ml = ml
        if self.ml:
            assert self.num_gen == 1 and self.num_disc == 1 and self.num_mcmc == 1, "invalid settings for ML training"

        self.noise_std = np.sqrt(2 * self.alpha * self.eta)

        def get_strides(num_layers, num_pool):
            interval = int(math.floor(num_layers / float(num_pool)))
            strides = np.array([1] * num_layers)
            strides[0:interval * num_pool:interval] = 2
            return strides

        self.num_pool = 4
        self.max_num_dfs = 512
        self.gen_strides = get_strides(num_layers, self.num_pool)
        self.disc_strides = self.gen_strides
        num_dfs = np.cumprod(np.array([self.df_dim] +
                                      list(self.disc_strides)))[:-1]
        num_dfs[num_dfs >= self.max_num_dfs] = self.max_num_dfs  # memory
        self.num_dfs = list(num_dfs)
        self.num_gfs = self.num_dfs[::-1]

        self.construct_from_hypers(gen_strides=self.gen_strides,
                                   disc_strides=self.disc_strides,
                                   num_gfs=self.num_gfs,
                                   num_dfs=self.num_dfs)

        self.build_bgan_graph()
        self.build_test_graph()

    def construct_from_hypers(self,
                              gen_kernel_size=5,
                              gen_strides=[2, 2, 2, 2],
                              disc_kernel_size=5,
                              disc_strides=[2, 2, 2, 2],
                              num_dfs=None,
                              num_gfs=None):

        self.d_batch_norm = AttributeDict([
            ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i))
            for dbn_i in range(len(disc_strides))
        ])
        self.sup_d_batch_norm = AttributeDict([
            ("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i))
            for dbn_i in range(5)
        ])
        self.g_batch_norm = AttributeDict([
            ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i))
            for gbn_i in range(len(gen_strides))
        ])

        if num_dfs is None:
            num_dfs = [
                self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8
            ]

        if num_gfs is None:
            num_gfs = [
                self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim
            ]

        assert len(gen_strides) == len(num_gfs), "invalid hypers!"
        assert len(disc_strides) == len(num_dfs), "invalid hypers!"

        s_h, s_w = self.x_dim[0], self.x_dim[1]
        ks = gen_kernel_size
        self.gen_output_dims = OrderedDict()
        self.gen_weight_dims = OrderedDict()
        num_gfs = num_gfs + [self.c_dim]
        self.gen_kernel_sizes = [ks]
        for layer in range(len(gen_strides))[::-1]:
            self.gen_output_dims["g_h%i_out" % (layer + 1)] = (s_h, s_w)
            assert gen_strides[layer] <= 2, "invalid stride"
            assert ks % 2 == 1, "invalid kernel size"
            self.gen_weight_dims["g_h%i_W" %
                                 (layer + 1)] = (ks, ks, num_gfs[layer + 1],
                                                 num_gfs[layer])
            self.gen_weight_dims["g_h%i_b" % (layer + 1)] = (num_gfs[layer +
                                                                     1], )
            s_h, s_w = conv_out_size(s_h, gen_strides[layer]), conv_out_size(
                s_w, gen_strides[layer])
            ks = kernel_sizer(ks, gen_strides[layer])
            self.gen_kernel_sizes.append(ks)

        self.gen_weight_dims.update(
            OrderedDict([("g_h0_lin_W", (self.z_dim, num_gfs[0] * s_h * s_w)),
                         ("g_h0_lin_b", (num_gfs[0] * s_h * s_w, ))]))
        self.gen_output_dims["g_h0_out"] = (s_h, s_w)

        self.disc_weight_dims = OrderedDict()
        s_h, s_w = self.x_dim[0], self.x_dim[1]
        num_dfs = [self.c_dim] + num_dfs
        ks = disc_kernel_size
        self.disc_kernel_sizes = [ks]
        for layer in range(len(disc_strides)):
            assert disc_strides[layer] <= 2, "invalid stride"
            assert ks % 2 == 1, "invalid kernel size"
            self.disc_weight_dims["d_h%i_W" % layer] = (ks, ks, num_dfs[layer],
                                                        num_dfs[layer + 1])
            self.disc_weight_dims["d_h%i_b" % layer] = (num_dfs[layer + 1], )
            s_h, s_w = conv_out_size(s_h, disc_strides[layer]), conv_out_size(
                s_w, disc_strides[layer])
            ks = kernel_sizer(ks, disc_strides[layer])
            self.disc_kernel_sizes.append(ks)

        self.disc_weight_dims.update(
            OrderedDict([("d_h_end_lin_W", (num_dfs[-1] * s_h * s_w,
                                            num_dfs[-1])),
                         ("d_h_end_lin_b", (num_dfs[-1], )),
                         ("d_h_out_lin_W", (num_dfs[-1], self.K)),
                         ("d_h_out_lin_b", (self.K, ))]))

        for k, v in self.gen_output_dims.items():
            print "%s: %s" % (k, v)
        print '****'
        for k, v in self.gen_weight_dims.items():
            print "%s: %s" % (k, v)
        print '****'
        for k, v in self.disc_weight_dims.items():
            print "%s: %s" % (k, v)

    def construct_nets(self):

        self.num_disc_layers = 5
        self.num_gen_layers = 5
        self.d_batch_norm = AttributeDict([
            ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i))
            for dbn_i in range(self.num_disc_layers)
        ])
        self.sup_d_batch_norm = AttributeDict([
            ("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i))
            for dbn_i in range(self.num_disc_layers)
        ])
        self.g_batch_norm = AttributeDict([
            ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i))
            for gbn_i in range(self.num_gen_layers)
        ])

        s_h, s_w = self.x_dim[0], self.x_dim[1]
        s_h2, s_w2 = conv_out_size(s_h, 2), conv_out_size(s_w, 2)
        s_h4, s_w4 = conv_out_size(s_h2, 2), conv_out_size(s_w2, 2)
        s_h8, s_w8 = conv_out_size(s_h4, 2), conv_out_size(s_w4, 2)
        s_h16, s_w16 = conv_out_size(s_h8, 2), conv_out_size(s_w8, 2)

        self.gen_output_dims = OrderedDict([("g_h0_out", (s_h16, s_w16)),
                                            ("g_h1_out", (s_h8, s_w8)),
                                            ("g_h2_out", (s_h4, s_w4)),
                                            ("g_h3_out", (s_h2, s_w2)),
                                            ("g_h4_out", (s_h, s_w))])

        self.gen_weight_dims = OrderedDict([
            ("g_h0_lin_W", (self.z_dim, self.gf_dim * 8 * s_h16 * s_w16)),
            ("g_h0_lin_b", (self.gf_dim * 8 * s_h16 * s_w16, )),
            ("g_h1_W", (5, 5, self.gf_dim * 4, self.gf_dim * 8)),
            ("g_h1_b", (self.gf_dim * 4, )),
            ("g_h2_W", (5, 5, self.gf_dim * 2, self.gf_dim * 4)),
            ("g_h2_b", (self.gf_dim * 2, )),
            ("g_h3_W", (5, 5, self.gf_dim * 1, self.gf_dim * 2)),
            ("g_h3_b", (self.gf_dim * 1, )),
            ("g_h4_W", (5, 5, self.c_dim, self.gf_dim * 1)),
            ("g_h4_b", (self.c_dim, ))
        ])

        self.disc_weight_dims = OrderedDict([
            ("d_h0_W", (5, 5, self.c_dim, self.df_dim)),
            ("d_h0_b", (self.df_dim, )),
            ("d_h1_W", (5, 5, self.df_dim, self.df_dim * 2)),
            ("d_h1_b", (self.df_dim * 2, )),
            ("d_h2_W", (5, 5, self.df_dim * 2, self.df_dim * 4)),
            ("d_h2_b", (self.df_dim * 4, )),
            ("d_h3_W", (5, 5, self.df_dim * 4, self.df_dim * 8)),
            ("d_h3_b", (self.df_dim * 8, )),
            ("d_h_end_lin_W", (self.df_dim * 8 * s_h16 * s_w16,
                               self.df_dim * 4)),
            ("d_h_end_lin_b", (self.df_dim * 4, )),
            ("d_h_out_lin_W", (self.df_dim * 4, self.K)),
            ("d_h_out_lin_b", (self.K, ))
        ])

    def _get_optimizer(self, lr):
        if self.optimizer == 'adam':
            return tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5)
        elif self.optimizer == 'sgd':
            return tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.5)
        else:
            raise ValueError("Optimizer must be either 'adam' or 'sgd'")

    def initialize_wgts(self, scope_str):

        if scope_str == "generator":
            weight_dims = self.gen_weight_dims
            numz = self.num_gen
        elif scope_str == "discriminator":
            weight_dims = self.disc_weight_dims
            numz = self.num_disc
        else:
            raise RuntimeError("invalid scope!")

        param_list = []
        with tf.variable_scope(scope_str) as scope:
            for zi in xrange(numz):
                for m in xrange(self.num_mcmc):
                    wgts_ = AttributeDict()
                    for name, shape in weight_dims.iteritems():
                        wgts_[name] = tf.get_variable(
                            "%s_%04d_%04d" % (name, zi, m),
                            shape,
                            initializer=tf.random_normal_initializer(
                                stddev=0.02))
                    param_list.append(wgts_)
            return param_list

    def build_bgan_graph(self):

        self.inputs = tf.placeholder(tf.float32,
                                     [self.batch_size] + self.x_dim,
                                     name='real_images')

        self.labeled_inputs = tf.placeholder(tf.float32,
                                             [self.batch_size] + self.x_dim,
                                             name='real_images_w_labels')

        self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K],
                                     name='real_targets')

        self.z = tf.placeholder(tf.float32,
                                [self.batch_size, self.z_dim, self.num_gen],
                                name='z')
        self.z_sampler = tf.placeholder(tf.float32,
                                        [self.batch_size, self.z_dim],
                                        name='z_sampler')

        # initialize generator weights
        self.gen_param_list = self.initialize_wgts("generator")
        self.disc_param_list = self.initialize_wgts("discriminator")
        ### build discrimitive losses and optimizers
        # prep optimizer args
        self.d_semi_learning_rate = tf.placeholder(tf.float32, shape=[])

        # compile all disciminative weights
        t_vars = tf.trainable_variables()
        self.d_vars = []
        for di in xrange(self.num_disc):
            for m in xrange(self.num_mcmc):
                self.d_vars.append([
                    var for var in t_vars
                    if 'd_' in var.name and "_%04d_%04d" % (di, m) in var.name
                ])

        ### build disc losses and optimizers
        self.d_losses, self.d_optims_semi, self.d_optims_semi_adam = [], [], []
        for di, disc_params in enumerate(self.disc_param_list):

            d_probs, d_logits, _ = self.discriminator(self.inputs, self.K,
                                                      disc_params)

            d_loss_real = -tf.reduce_mean(tf.reduce_logsumexp(d_logits, 1)) +\
            tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits, 1)))

            d_loss_fakes = []
            for gi, gen_params in enumerate(self.gen_param_list):
                d_probs_, d_logits_, _ = self.discriminator(
                    self.generator(self.z[:, :, gi % self.num_gen],
                                   gen_params), self.K, disc_params)
                d_loss_fake_ = tf.reduce_mean(
                    tf.nn.softplus(tf.reduce_logsumexp(d_logits_, 1)))
                d_loss_fakes.append(d_loss_fake_)

            d_sup_probs, d_sup_logits, _ = self.discriminator(
                self.labeled_inputs, self.K, disc_params)
            d_loss_sup = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(logits=d_sup_logits,
                                                        labels=self.labels))
            d_losses_semi = []
            for d_loss_fake_ in d_loss_fakes:
                d_loss_semi_ = d_loss_sup + d_loss_real * float(
                    self.num_gen) + d_loss_fake_
                if not self.ml:
                    d_loss_semi_ += self.disc_prior(
                        disc_params) + self.disc_noise(disc_params)
                d_losses_semi.append(tf.reshape(d_loss_semi_, [1]))

            d_loss_semi = tf.reduce_logsumexp(tf.concat(d_losses_semi, 0))
            self.d_losses.append(d_loss_semi)
            d_opt_semi = self._get_optimizer(self.d_semi_learning_rate)
            self.d_optims_semi.append(
                d_opt_semi.minimize(d_loss_semi, var_list=self.d_vars[di]))
            d_opt_semi_adam = tf.train.AdamOptimizer(
                learning_rate=self.d_semi_learning_rate, beta1=0.5)
            self.d_optims_semi_adam.append(
                d_opt_semi_adam.minimize(d_loss_semi,
                                         var_list=self.d_vars[di]))

        ### build generative losses and optimizers
        self.g_learning_rate = tf.placeholder(tf.float32, shape=[])
        self.g_vars = []
        for gi in xrange(self.num_gen):
            for m in xrange(self.num_mcmc):
                self.g_vars.append([
                    var for var in t_vars
                    if 'g_' in var.name and "_%04d_%04d" % (gi, m) in var.name
                ])

        self.g_losses, self.g_optims_semi, self.g_optims_semi_adam = [], [], []
        for gi, gen_params in enumerate(self.gen_param_list):

            gi_losses = []
            for disc_params in self.disc_param_list:
                d_probs_, d_logits_, d_features_fake = self.discriminator(
                    self.generator(self.z[:, :, gi % self.num_gen],
                                   gen_params), self.K, disc_params)
                _, _, d_features_real = self.discriminator(
                    self.inputs, self.K, disc_params)
                g_loss_ = -tf.reduce_mean(tf.reduce_logsumexp(d_logits_, 1)) +\
                tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits_, 1))) # not needed?!
                g_loss_ += tf.reduce_mean(
                    huber_loss(d_features_real[-1], d_features_fake[-1]))
                if not self.ml:
                    g_loss_ += self.gen_prior(gen_params) + self.gen_noise(
                        gen_params)
                gi_losses.append(tf.reshape(g_loss_, [1]))

            g_loss = tf.reduce_logsumexp(tf.concat(gi_losses, 0))
            self.g_losses.append(g_loss)
            g_opt = self._get_optimizer(self.g_learning_rate)
            self.g_optims_semi.append(
                g_opt.minimize(g_loss, var_list=self.g_vars[gi]))
            g_opt_adam = tf.train.AdamOptimizer(
                learning_rate=self.g_learning_rate, beta1=0.5)
            self.g_optims_semi_adam.append(
                g_opt_adam.minimize(g_loss, var_list=self.g_vars[gi]))

        ### build samplers
        self.gen_samplers = []
        for gi, gen_params in enumerate(self.gen_param_list):
            self.gen_samplers.append(self.generator(self.z_sampler,
                                                    gen_params))

        ### build vanilla supervised loss
        self.lbls = tf.placeholder(tf.float32, [self.batch_size, self.K],
                                   name='real_sup_targets')

        self.S, self.S_logits = self.sup_discriminator(self.inputs, self.K)
        self.s_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=self.S_logits,
                                                    labels=self.lbls))
        t_vars = tf.trainable_variables()
        self.sup_vars = [var for var in t_vars if 'sup_' in var.name]
        supervised_lr = 0.05 * self.lr
        s_opt = self._get_optimizer(supervised_lr)
        self.s_optim = s_opt.minimize(self.s_loss, var_list=self.sup_vars)
        s_opt_adam = tf.train.AdamOptimizer(learning_rate=supervised_lr,
                                            beta1=0.5)
        self.s_optim_adam = s_opt_adam.minimize(self.s_loss,
                                                var_list=self.sup_vars)

    def build_test_graph(self):

        self.test_inputs = tf.placeholder(tf.float32,
                                          [self.batch_size] + self.x_dim,
                                          name='real_test_images')

        self.test_d_probs, self.test_d_logits = [], []
        for disc_params in self.disc_param_list:
            test_d_probs_, test_d_logits_, _ = self.discriminator(
                self.test_inputs, self.K, disc_params, train=False)
            self.test_d_probs.append(test_d_probs_)
            self.test_d_logits.append(test_d_logits_)

        # build standard purely supervised losses and optimizers
        self.test_s_probs, self.test_s_logits = self.sup_discriminator(
            self.test_inputs, self.K, reuse=True)

    def sup_discriminator(self, image, K, reuse=False):
        # TODO collapse this into disc
        with tf.variable_scope("sup_discriminator") as scope:
            if reuse:
                scope.reuse_variables()

            h0 = lrelu(conv2d(image, self.df_dim, name='sup_h0_conv'))
            h1 = lrelu(
                self.sup_d_batch_norm.sd_bn1(
                    conv2d(h0, self.df_dim * 2, name='sup_h1_conv')))
            h2 = lrelu(
                self.sup_d_batch_norm.sd_bn2(
                    conv2d(h1, self.df_dim * 4, name='sup_h2_conv')))
            h3 = lrelu(
                self.sup_d_batch_norm.sd_bn3(
                    conv2d(h2, self.df_dim * 8, name='sup_h3_conv')))
            h4 = linear(tf.reshape(h3, [self.batch_size, -1]), K, 'sup_h3_lin')
            return tf.nn.softmax(h4), h4

    def discriminator(self, image, K, disc_params, train=True):

        with tf.variable_scope("discriminator") as scope:

            h = image
            for layer in range(len(self.disc_strides)):
                if layer == 0:
                    h = lrelu(
                        conv2d(h,
                               self.disc_weight_dims["d_h%i_W" % layer][-1],
                               name='d_h%i_conv' % layer,
                               k_h=self.disc_kernel_sizes[layer],
                               k_w=self.disc_kernel_sizes[layer],
                               d_h=self.disc_strides[layer],
                               d_w=self.disc_strides[layer],
                               w=disc_params["d_h%i_W" % layer],
                               biases=disc_params["d_h%i_b" % layer]))
                else:
                    h = lrelu(self.d_batch_norm["d_bn%i" % layer](conv2d(
                        h,
                        self.disc_weight_dims["d_h%i_W" % layer][-1],
                        name='d_h%i_conv' % layer,
                        k_h=self.disc_kernel_sizes[layer],
                        k_w=self.disc_kernel_sizes[layer],
                        d_h=self.disc_strides[layer],
                        d_w=self.disc_strides[layer],
                        w=disc_params["d_h%i_W" % layer],
                        biases=disc_params["d_h%i_b" % layer]),
                                                                  train=train))

            h_end = lrelu(
                linear(tf.reshape(h, [self.batch_size, -1]),
                       self.df_dim * 4,
                       "d_h_end_lin",
                       matrix=disc_params.d_h_end_lin_W,
                       bias=disc_params.d_h_end_lin_b))  # for feature norm
            h_out = linear(h_end,
                           K,
                           'd_h_out_lin',
                           matrix=disc_params.d_h_out_lin_W,
                           bias=disc_params.d_h_out_lin_b)

            return tf.nn.softmax(h_out), h_out, [h_end]

    def generator(self, z, gen_params):

        with tf.variable_scope("generator") as scope:

            h = linear(z,
                       self.gen_weight_dims["g_h0_lin_W"][-1],
                       'g_h0_lin',
                       matrix=gen_params.g_h0_lin_W,
                       bias=gen_params.g_h0_lin_b)
            h = tf.nn.relu(self.g_batch_norm.g_bn0(h))

            h = tf.reshape(h, [
                self.batch_size, self.gen_output_dims["g_h0_out"][0],
                self.gen_output_dims["g_h0_out"][1], -1
            ])

            for layer in range(1, len(self.gen_strides) + 1):

                out_shape = [
                    self.batch_size,
                    self.gen_output_dims["g_h%i_out" % layer][0],
                    self.gen_output_dims["g_h%i_out" % layer][1],
                    self.gen_weight_dims["g_h%i_W" % layer][-2]
                ]

                h = deconv2d(h,
                             out_shape,
                             k_h=self.gen_kernel_sizes[layer - 1],
                             k_w=self.gen_kernel_sizes[layer - 1],
                             d_h=self.gen_strides[layer - 1],
                             d_w=self.gen_strides[layer - 1],
                             name='g_h%i' % layer,
                             w=gen_params["g_h%i_W" % layer],
                             biases=gen_params["g_h%i_b" % layer])
                if layer < len(self.gen_strides):
                    h = tf.nn.relu(self.g_batch_norm["g_bn%i" % layer](h))

            return tf.nn.tanh(h)

    def gen_prior(self, gen_params):
        with tf.variable_scope("generator") as scope:
            prior_loss = 0.0
            for var in gen_params.values():
                nn = tf.divide(var, self.prior_std)
                prior_loss += tf.reduce_mean(tf.multiply(nn, nn))

        prior_loss /= self.dataset_size

        return prior_loss

    def gen_noise(self, gen_params):
        with tf.variable_scope("generator") as scope:
            noise_loss = 0.0
            for name, var in gen_params.iteritems():
                noise_ = tf.contrib.distributions.Normal(
                    mu=0., sigma=self.noise_std * tf.ones(var.get_shape()))
                noise_loss += tf.reduce_sum(var * noise_.sample())
        noise_loss /= self.dataset_size
        return noise_loss

    def disc_prior(self, disc_params):
        with tf.variable_scope("discriminator") as scope:
            prior_loss = 0.0
            for var in disc_params.values():
                nn = tf.divide(var, self.prior_std)
                prior_loss += tf.reduce_mean(tf.multiply(nn, nn))

        prior_loss /= self.dataset_size

        return prior_loss

    def disc_noise(self, disc_params):
        with tf.variable_scope("discriminator") as scope:
            noise_loss = 0.0
            for var in disc_params.values():
                noise_ = tf.contrib.distributions.Normal(
                    mu=0., sigma=self.noise_std * tf.ones(var.get_shape()))
                noise_loss += tf.reduce_sum(var * noise_.sample())
        noise_loss /= self.dataset_size
        return noise_loss
    def construct_from_hypers(self,
                              gen_kernel_size=5,
                              gen_strides=[2, 2, 2, 2],
                              disc_kernel_size=5,
                              disc_strides=[2, 2, 2, 2],
                              num_dfs=None,
                              num_gfs=None):

        self.d_batch_norm = AttributeDict([
            ("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i))
            for dbn_i in range(len(disc_strides))
        ])
        self.sup_d_batch_norm = AttributeDict([
            ("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i))
            for dbn_i in range(5)
        ])
        self.g_batch_norm = AttributeDict([
            ("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i))
            for gbn_i in range(len(gen_strides))
        ])

        if num_dfs is None:
            num_dfs = [
                self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8
            ]

        if num_gfs is None:
            num_gfs = [
                self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim
            ]

        assert len(gen_strides) == len(num_gfs), "invalid hypers!"
        assert len(disc_strides) == len(num_dfs), "invalid hypers!"

        s_h, s_w = self.x_dim[0], self.x_dim[1]
        ks = gen_kernel_size
        self.gen_output_dims = OrderedDict()
        self.gen_weight_dims = OrderedDict()
        num_gfs = num_gfs + [self.c_dim]
        self.gen_kernel_sizes = [ks]
        for layer in range(len(gen_strides))[::-1]:
            self.gen_output_dims["g_h%i_out" % (layer + 1)] = (s_h, s_w)
            assert gen_strides[layer] <= 2, "invalid stride"
            assert ks % 2 == 1, "invalid kernel size"
            self.gen_weight_dims["g_h%i_W" %
                                 (layer + 1)] = (ks, ks, num_gfs[layer + 1],
                                                 num_gfs[layer])
            self.gen_weight_dims["g_h%i_b" % (layer + 1)] = (num_gfs[layer +
                                                                     1], )
            s_h, s_w = conv_out_size(s_h, gen_strides[layer]), conv_out_size(
                s_w, gen_strides[layer])
            ks = kernel_sizer(ks, gen_strides[layer])
            self.gen_kernel_sizes.append(ks)

        self.gen_weight_dims.update(
            OrderedDict([("g_h0_lin_W", (self.z_dim, num_gfs[0] * s_h * s_w)),
                         ("g_h0_lin_b", (num_gfs[0] * s_h * s_w, ))]))
        self.gen_output_dims["g_h0_out"] = (s_h, s_w)

        self.disc_weight_dims = OrderedDict()
        s_h, s_w = self.x_dim[0], self.x_dim[1]
        num_dfs = [self.c_dim] + num_dfs
        ks = disc_kernel_size
        self.disc_kernel_sizes = [ks]
        for layer in range(len(disc_strides)):
            assert disc_strides[layer] <= 2, "invalid stride"
            assert ks % 2 == 1, "invalid kernel size"
            self.disc_weight_dims["d_h%i_W" % layer] = (ks, ks, num_dfs[layer],
                                                        num_dfs[layer + 1])
            self.disc_weight_dims["d_h%i_b" % layer] = (num_dfs[layer + 1], )
            s_h, s_w = conv_out_size(s_h, disc_strides[layer]), conv_out_size(
                s_w, disc_strides[layer])
            ks = kernel_sizer(ks, disc_strides[layer])
            self.disc_kernel_sizes.append(ks)

        self.disc_weight_dims.update(
            OrderedDict([("d_h_end_lin_W", (num_dfs[-1] * s_h * s_w,
                                            num_dfs[-1])),
                         ("d_h_end_lin_b", (num_dfs[-1], )),
                         ("d_h_out_lin_W", (num_dfs[-1], self.K)),
                         ("d_h_out_lin_b", (self.K, ))]))

        for k, v in self.gen_output_dims.items():
            print "%s: %s" % (k, v)
        print '****'
        for k, v in self.gen_weight_dims.items():
            print "%s: %s" % (k, v)
        print '****'
        for k, v in self.disc_weight_dims.items():
            print "%s: %s" % (k, v)
Example #18
0
class BDCGAN_Semi_3d(object):

    def __init__(self, x_dim, z_dim, dataset_size, batch_size=64, gf_dim=64, df_dim=64,
                 prior_std=1.0, J=1, M=1, num_classes=1, eta=1, num_layers=4,
                 alpha=0.01, lr=0.0002, optimizer='adam', wasserstein=False,
                 ml=False, J_d=None):  # eta=2e-4,

        print("ml = ", ml)

        self.optimizer = optimizer.lower()
        self.dataset_size = dataset_size
        self.batch_size = batch_size

        self.K = num_classes
        self.x_dim = x_dim
        self.z_dim = z_dim    # generated sample's dim

        self.gf_dim = gf_dim  # ?? what is df_dim = 64 ?
        self.df_dim = df_dim
        self.c_dim = x_dim[3] # x_dim = [x, y, z, c]
        self.is_grayscale = (self.c_dim == 1)
        self.lr = lr

        # Bayes
        self.prior_std = prior_std
        self.num_gen = J     # what is num_gen ??
        self.num_disc = J_d if J_d is not None else 1
        self.num_mcmc = M
        self.eta = eta       # not required in variational inference and MC dropout
        self.alpha = alpha   # not required in variational inference and MC dropout

        # ML
        self.ml = ml
        if self.ml:
            assert self.num_gen == 1 and self.num_disc == 1 and self.num_mcmc == 1, "invalid settings for ML training"

        self.noise_std = 10  # np.sqrt(2 * self.alpha * self.eta)\


        def get_strides(num_layers, num_pool):
            interval = int(math.floor(num_layers / float(num_pool)))
            strides = np.array([1] * num_layers)
            strides[0:interval * num_pool:interval] = 2
            return strides

        self.num_pool = 4
        self.max_num_dfs = 1024   # default - 512
        self.gen_strides = get_strides(num_layers, self.num_pool)
        self.disc_strides = self.gen_strides
        num_dfs = np.cumprod(np.array([self.df_dim] + list(self.disc_strides)))[:-1]
        num_dfs[num_dfs >= self.max_num_dfs] = self.max_num_dfs  # memory
        self.num_dfs = list(num_dfs)
        self.num_gfs = self.num_dfs[::-1]

        self.construct_from_hypers(gen_strides=self.gen_strides, disc_strides=self.disc_strides,
                                   num_gfs=self.num_gfs, num_dfs=self.num_dfs)

        self.build_bgan_graph()
        self.build_test_graph()

    def construct_from_hypers(self, gen_kernel_size=5, gen_strides=[2, 2, 2, 2],
                              disc_kernel_size=5, disc_strides=[2, 2, 2, 2],
                              num_dfs=None, num_gfs=None):

        self.d_batch_norm = AttributeDict(
            [("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(len(disc_strides))])
        self.sup_d_batch_norm = AttributeDict(
            [("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i)) for dbn_i in range(5)])
        self.g_batch_norm = AttributeDict(
            [("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(len(gen_strides))])

        if num_dfs is None:
            num_dfs = [self.df_dim, self.df_dim * 2, self.df_dim * 4, self.df_dim * 8]

        if num_gfs is None:
            num_gfs = [self.gf_dim * 8, self.gf_dim * 4, self.gf_dim * 2, self.gf_dim]

        assert len(gen_strides) == len(num_gfs), "invalid hypers!"
        assert len(disc_strides) == len(num_dfs), "invalid hypers!"

        s_h, s_w = self.x_dim[0], self.x_dim[1]
        ks = gen_kernel_size
        self.gen_output_dims = OrderedDict()
        self.gen_weight_dims = OrderedDict()
        num_gfs = num_gfs + [self.c_dim]
        self.gen_kernel_sizes = [ks]

        for layer in range(len(gen_strides))[::-1]:
            self.gen_output_dims["g_h%i_out" % (layer + 1)] = (s_h, s_w)
            assert gen_strides[layer] <= 2, "invalid stride"
            assert ks % 2 == 1, "invalid kernel size"

            self.gen_weight_dims["g_h%i_W" % (layer + 1)] = (ks, ks, num_gfs[layer + 1], num_gfs[layer])
            self.gen_weight_dims["g_h%i_b" % (layer + 1)] = (num_gfs[layer + 1],)
            s_h, s_w = conv_out_size(s_h, gen_strides[layer]), conv_out_size(s_w, gen_strides[layer])
            ks = kernel_sizer(ks, gen_strides[layer])
            self.gen_kernel_sizes.append(ks)

        self.gen_weight_dims.update(OrderedDict([("g_h0_lin_W", (self.z_dim, num_gfs[0] * s_h * s_w)),
                                                 ("g_h0_lin_b", (num_gfs[0] * s_h * s_w,))]))
        self.gen_output_dims["g_h0_out"] = (s_h, s_w)

        self.disc_weight_dims = OrderedDict()
        s_h, s_w = self.x_dim[0], self.x_dim[1]
        num_dfs = [self.c_dim] + num_dfs
        ks = disc_kernel_size
        self.disc_kernel_sizes = [ks]

        for layer in range(len(disc_strides)):
            assert disc_strides[layer] <= 2, "invalid stride"
            assert ks % 2 == 1, "invalid kernel size"

            self.disc_weight_dims["d_h%i_W" % layer] = (ks, ks, num_dfs[layer], num_dfs[layer + 1])
            self.disc_weight_dims["d_h%i_b" % layer] = (num_dfs[layer + 1],)
            s_h, s_w = conv_out_size(s_h, disc_strides[layer]), conv_out_size(s_w, disc_strides[layer])
            ks = kernel_sizer(ks, disc_strides[layer])
            self.disc_kernel_sizes.append(ks)

        self.disc_weight_dims.update(OrderedDict([("d_h_end_lin_W", (num_dfs[-1] * s_h * s_w, num_dfs[-1])),
                                                  ("d_h_end_lin_b", (num_dfs[-1],)),
                                                  ("d_h_out_lin_W", (num_dfs[-1], self.K)),
                                                  ("d_h_out_lin_b", (self.K,))]))

        for k, v in self.gen_output_dims.items():
            print("gen_output_dims - %s: %s" % (k, v))
        print('****')
        for k, v in self.gen_weight_dims.items():
            print("gen_weight_dims - %s: %s" % (k, v))
        print('****')
        for k, v in self.disc_weight_dims.items():
            print("dics_weight_dims - %s: %s" % (k, v))

    def construct_nets(self):

        self.num_disc_layers = 5
        self.num_gen_layers = 5

        self.d_batch_norm = AttributeDict(
            [("d_bn%i" % dbn_i, batch_norm(name='d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers)])
        self.sup_d_batch_norm = AttributeDict(
            [("sd_bn%i" % dbn_i, batch_norm(name='sup_d_bn%i' % dbn_i)) for dbn_i in range(self.num_disc_layers)])
        self.g_batch_norm = AttributeDict(
            [("g_bn%i" % gbn_i, batch_norm(name='g_bn%i' % gbn_i)) for gbn_i in range(self.num_gen_layers)])

        s_h, s_w = self.x_dim[0], self.x_dim[1]
        s_h2, s_w2 = conv_out_size(s_h, 2), conv_out_size(s_w, 2)
        s_h4, s_w4 = conv_out_size(s_h2, 2), conv_out_size(s_w2, 2)
        s_h8, s_w8 = conv_out_size(s_h4, 2), conv_out_size(s_w4, 2)
        s_h16, s_w16 = conv_out_size(s_h8, 2), conv_out_size(s_w8, 2)

        self.gen_output_dims = OrderedDict([("g_h0_out", (s_h16, s_w16)),
                                            ("g_h1_out", (s_h8, s_w8)),
                                            ("g_h2_out", (s_h4, s_w4)),
                                            ("g_h3_out", (s_h2, s_w2)),
                                            ("g_h4_out", (s_h, s_w))])

        self.gen_weight_dims = OrderedDict([("g_h0_lin_W", (self.z_dim, self.gf_dim * 8 * s_h16 * s_w16)),
                                            ("g_h0_lin_b", (self.gf_dim * 8 * s_h16 * s_w16,)),
                                            ("g_h1_W", (5, 5, self.gf_dim * 4, self.gf_dim * 8)),
                                            ("g_h1_b", (self.gf_dim * 4,)),
                                            ("g_h2_W", (5, 5, self.gf_dim * 2, self.gf_dim * 4)),
                                            ("g_h2_b", (self.gf_dim * 2,)),
                                            ("g_h3_W", (5, 5, self.gf_dim * 1, self.gf_dim * 2)),
                                            ("g_h3_b", (self.gf_dim * 1,)),
                                            ("g_h4_W", (5, 5, self.c_dim, self.gf_dim * 1)),
                                            ("g_h4_b", (self.c_dim,))])

        self.disc_weight_dims = OrderedDict([("d_h0_W", (5, 5, self.c_dim, self.df_dim)),
                                             ("d_h0_b", (self.df_dim,)),
                                             ("d_h1_W", (5, 5, self.df_dim, self.df_dim * 2)),
                                             ("d_h1_b", (self.df_dim * 2,)),
                                             ("d_h2_W", (5, 5, self.df_dim * 2, self.df_dim * 4)),
                                             ("d_h2_b", (self.df_dim * 4,)),
                                             ("d_h3_W", (5, 5, self.df_dim * 4, self.df_dim * 8)),
                                             ("d_h3_b", (self.df_dim * 8,)),
                                             ("d_h_end_lin_W", (self.df_dim * 8 * s_h16 * s_w16, self.df_dim * 4)),
                                             ("d_h_end_lin_b", (self.df_dim * 4,)),
                                             ("d_h_out_lin_W", (self.df_dim * 4, self.K)),
                                             ("d_h_out_lin_b", (self.K,))])

    def _get_optimizer(self, lr):

        if self.optimizer == 'adam':
            return tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5)
        elif self.optimizer == 'sgd':
            return tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.5)
        else:
            raise ValueError("Optimizer must be either 'adam' or 'sgd'")

    def initialize_wgts(self, scope_str):

        if scope_str == "generator":
            weight_dims = self.gen_weight_dims
            numz = self.num_gen
        elif scope_str == "discriminator":
            weight_dims = self.disc_weight_dims
            numz = self.num_disc
        else:
            raise RuntimeError("invalid scope!")

        param_list = []
        with tf.variable_scope(scope_str) as scope:  # iterated J (numz / num_gen) x num_mcmc = 20
            for zi in range(numz):  # numz: num_gen / num_disc
                for m in range(self.num_mcmc):
                    wgts_ = AttributeDict()
                    for name, shape in weight_dims.items():
                        wgts_[name] = tf.get_variable("%s_%04d_%04d" % (name, zi, m), shape,
                                                      initializer=tf.random_normal_initializer(stddev=0.02))
                    param_list.append(wgts_)

            return param_list

    def build_bgan_graph(self):

        # unsupervised images from data distribution
        self.inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images')

        # for discrinimator: from supervised batch images
        self.labeled_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_images_w_labels')
        self.labels = tf.placeholder(tf.float32, [self.batch_size, self.K], name='real_targets')

        # for generator
        self.z = tf.placeholder(tf.float32, [self.batch_size, self.z_dim, self.num_gen], name='z')  # [64, 100, 10]
        self.z_sampler = tf.placeholder(tf.float32, [self.batch_size, self.z_dim], name='z_sampler')

        # initialize generator weights
        self.gen_param_list = self.initialize_wgts("generator")  # num_gen * num_mcmc - list
        self.disc_param_list = self.initialize_wgts("discriminator")  # num_disc * num_mcmc

        ############################ build discrimitive losses and optimizers ##########################################

        self.d_semi_learning_rate = tf.placeholder(tf.float32, shape=[])

        t_vars = tf.trainable_variables()  # compile all disciminative weights  # returns a list of trainable variables
        self.d_vars = []
        for di in range(self.num_disc):
            for m in range(self.num_mcmc):
                self.d_vars.append([var for var in t_vars if 'd_' in var.name and "_%04d_%04d" % (di, m) in var.name])

        self.d_losses, self.d_optims_semi, self.d_optims_semi_adam = [], [], []  ### self.d_optims_semi is user specified optimizer

        for di, disc_params in enumerate(self.disc_param_list):  # with len(disc_param_list) > 1, the first discrinimator could be reuse = False, however, the second should use the variables

            # Part I: real ####################
            # d_probs = softmax(d_logits), d_logits = linear(pre-layer)
            d_probs_real, d_logits_real, _ = self.discriminator(self.inputs, self.K, disc_params, reuse=tf.AUTO_REUSE)

            # JT-0228: d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real, labels=tf.ones_like(d_probs_real)))
            d_loss_real = - tf.reduce_mean(tf.reduce_logsumexp(d_logits_real, 1)) \
                          + tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits_real, 1)))

            # Part II: fake ####################
            d_loss_fakes = []
            for gi, gen_params in enumerate(self.gen_param_list):  # iterate num_gen * num_mcmc times

                d_probs_fake, d_logits_fake, _ = self.discriminator(
                    self.generator(self.z[:, :, gi % self.num_gen], gen_params), self.K, disc_params, reuse=True)

                # JT-0228: d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_probs_fake)))
                d_loss_fake = tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits_fake, 1)))
                d_loss_fakes.append(d_loss_fake)

            # Part III: sup ####################
            d_sup_probs, d_sup_logits, _ = self.discriminator(self.labeled_inputs, self.K, disc_params, reuse=tf.AUTO_REUSE)

            d_loss_sup = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(logits=d_sup_logits, labels=self.labels))

            ################### total loss for semi-supervised discriminator ######################

            d_losses_semi = []
            for d_loss_fake_ in d_loss_fakes:
                d_loss_semi_ = d_loss_sup + d_loss_real * float(self.num_gen) + d_loss_fake_

                if not self.ml:
                    # bayes term: log( theta_d | alpha_d )

                    d_loss_semi_ += self.disc_prior(disc_params) + self.disc_noise(disc_params)  # 12

                d_losses_semi.append(tf.reshape(d_loss_semi_, [1]))

            d_loss_semi = tf.reduce_logsumexp(tf.concat(d_losses_semi, 0))
            self.d_losses.append(d_loss_semi)

            ################### total optimizer for semi-supervised discriminator ######################

            # after 5000 iterations
            d_opt_semi = self._get_optimizer(
                self.d_semi_learning_rate)  # what the f**k ?? have you switched the optimizer ??
            self.d_optims_semi.append(d_opt_semi.minimize(d_loss_semi, var_list=self.d_vars[di]))

            # default iterations
            d_opt_semi_adam = tf.train.AdamOptimizer(learning_rate=self.d_semi_learning_rate, beta1=0.5)
            self.d_optims_semi_adam.append(d_opt_semi_adam.minimize(d_loss_semi, var_list=self.d_vars[di]))

        ############################ build generator losses and optimizers ##########################################

        self.g_learning_rate = tf.placeholder(tf.float32, shape=[])
        self.g_vars = []
        for gi in range(self.num_gen):
            for m in range(self.num_mcmc):
                self.g_vars.append([var for var in t_vars if 'g_' in var.name and "_%04d_%04d" % (gi, m) in var.name])

        self.g_losses, self.g_optims_semi, self.g_optims_semi_adam = [], [], []

        for gi, gen_params in enumerate(self.gen_param_list):

            gi_losses = []
            for disc_params in self.disc_param_list:

                d_probs_fake, d_logits_fake, d_features_fake = self.discriminator(self.generator(self.z[:, :, gi % self.num_gen], gen_params), self.K, disc_params, reuse=tf.AUTO_REUSE)
                _, _, d_features_real = self.discriminator(self.inputs, self.K, disc_params, reuse=tf.AUTO_REUSE)

                # JT-0228: g_loss_ = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.ones_like(d_probs_fake)))
                g_loss_ = -tf.reduce_mean(tf.reduce_logsumexp(d_logits_fake, 1)) + tf.reduce_mean(tf.nn.softplus(tf.reduce_logsumexp(d_logits_fake, 1)))
                g_loss_ += tf.reduce_mean(huber_loss(d_features_real[-1], d_features_fake[-1]))  ## Huber loss is a variation of the squared loss, which is more robust to noise

                if not self.ml:

                    # return the prior_loss + noise_loss
                    g_loss_ += self.gen_prior(gen_params) + self.gen_noise(gen_params)  # 10

                gi_losses.append(tf.reshape(g_loss_, [1]))

            g_loss = tf.reduce_logsumexp(tf.concat(gi_losses, 0))
            self.g_losses.append(g_loss)

            ################### total optimizer for semi-supervised generator ######################

            g_opt = self._get_optimizer(self.g_learning_rate)
            self.g_optims_semi.append(g_opt.minimize(g_loss, var_list=self.g_vars[gi]))

            g_opt_adam = tf.train.AdamOptimizer(learning_rate=self.g_learning_rate, beta1=0.5)
            self.g_optims_semi_adam.append(g_opt_adam.minimize(g_loss, var_list=self.g_vars[gi]))

        self.gen_samplers = []  ### build samplers
        for gi, gen_params in enumerate(self.gen_param_list):
            self.gen_samplers.append(self.generator(self.z_sampler, gen_params))

        ### build vanilla supervised loss
        self.lbls = tf.placeholder(tf.float32, [self.batch_size, self.K], name='real_sup_targets')   # create a place for the variables,and then pass the real numbers
        self.S, self.S_logits = self.sup_discriminator(self.inputs, self.K)
        self.s_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.S_logits, labels=self.lbls))

        ################### total optimizer for semi-supervised discrinimator ######################

        t_vars = tf.trainable_variables()
        self.sup_vars = [var for var in t_vars if 'sup_' in var.name]
        supervised_lr = 0.05 * self.lr

        s_opt = self._get_optimizer(supervised_lr)
        self.s_optim = s_opt.minimize(self.s_loss, var_list=self.sup_vars)

        s_opt_adam = tf.train.AdamOptimizer(learning_rate=supervised_lr, beta1=0.5)  # what the f**k? is adam the SGHMC you mentioned in the work ??
        self.s_optim_adam = s_opt_adam.minimize(self.s_loss, var_list=self.sup_vars)

    def build_test_graph(self):

        self.test_inputs = tf.placeholder(tf.float32, [self.batch_size] + self.x_dim, name='real_test_images')

        self.test_d_probs, self.test_d_logits = [], []  # self.test_d_probs : 2 x (64, 10)
        for disc_params in self.disc_param_list:  # no generator, just discriminator

            test_d_probs_, test_d_logits_, _ = self.discriminator(self.test_inputs, self.K, disc_params, train=False, reuse=True)

            self.test_d_probs.append(test_d_probs_)  # test_d_probs_.shape = (64, 10)
            self.test_d_logits.append(test_d_logits_)

        # build standard purely supervised losses and optimizers
        self.test_s_probs, self.test_s_logits = self.sup_discriminator(self.test_inputs, self.K)

    def sup_discriminator(self, image, K):

        # TODO collapse this into disc
        with tf.variable_scope("sup_discriminator", reuse=tf.AUTO_REUSE) as scope:
            h0 = lrelu(conv2d(image, self.df_dim, name='sup_h0_conv'))
            h1 = lrelu(self.sup_d_batch_norm.sd_bn1(conv2d(h0, self.df_dim * 2, name='sup_h1_conv')))
            h2 = lrelu(self.sup_d_batch_norm.sd_bn2(conv2d(h1, self.df_dim * 4, name='sup_h2_conv')))
            h3 = lrelu(self.sup_d_batch_norm.sd_bn3(conv2d(h2, self.df_dim * 8, name='sup_h3_conv')))
            h4 = linear(tf.reshape(h3, [self.batch_size, -1]), K, 'sup_h3_lin')
            return tf.nn.softmax(h4), h4

    def discriminator(self, image, K, disc_params, train=True, reuse=False):

        with tf.variable_scope("discriminator", reuse=reuse) as scope:  # reuse=tf.AUTO_REUSE

            h = image
            for layer in range(len(self.disc_strides)):
                if layer == 0:
                    h = lrelu(conv2d(h, self.disc_weight_dims["d_h%i_W" % layer][-1], name='d_h%i_conv' % layer,
                                     k_h=self.disc_kernel_sizes[layer], k_w=self.disc_kernel_sizes[layer],
                                     d_h=self.disc_strides[layer], d_w=self.disc_strides[layer],
                                     w=disc_params["d_h%i_W" % layer], biases=disc_params["d_h%i_b" % layer]))

                # conv - bn - relu
                else:
                    h = lrelu(self.d_batch_norm["d_bn%i" % layer](
                        conv2d(h, self.disc_weight_dims["d_h%i_W" % layer][-1], name='d_h%i_conv' % layer,
                               k_h=self.disc_kernel_sizes[layer], k_w=self.disc_kernel_sizes[layer],
                               d_h=self.disc_strides[layer], d_w=self.disc_strides[layer],
                               w=disc_params["d_h%i_W" % layer], biases=disc_params["d_h%i_b" % layer]), train=train))

            h_end = lrelu(linear(tf.reshape(h, [self.batch_size, -1]), self.df_dim * 4, "d_h_end_lin",
                                 matrix=disc_params.d_h_end_lin_W, bias=disc_params.d_h_end_lin_b))  # for feature norm
            h_out = linear(h_end, K, 'd_h_out_lin',
                           matrix=disc_params.d_h_out_lin_W, bias=disc_params.d_h_out_lin_b)

            return tf.nn.softmax(h_out), h_out, [h_end]

    def generator(self, z, gen_params):

        with tf.variable_scope("generator", reuse=tf.AUTO_REUSE) as scope:

            h = linear(z, self.gen_weight_dims["g_h0_lin_W"][-1], 'g_h0_lin',
                       matrix=gen_params.g_h0_lin_W, bias=gen_params.g_h0_lin_b)

            h = tf.nn.relu(self.g_batch_norm.g_bn0(h))

            h = tf.reshape(h, [self.batch_size, self.gen_output_dims["g_h0_out"][0],
                               self.gen_output_dims["g_h0_out"][1], -1])

            for layer in range(1, len(self.gen_strides) + 1):

                out_shape = [self.batch_size, self.gen_output_dims["g_h%i_out" % layer][0],
                             self.gen_output_dims["g_h%i_out" % layer][1], self.gen_weight_dims["g_h%i_W" % layer][-2]]

                h = deconv2d(h,
                             out_shape,
                             k_h=self.gen_kernel_sizes[layer - 1], k_w=self.gen_kernel_sizes[layer - 1],
                             d_h=self.gen_strides[layer - 1], d_w=self.gen_strides[layer - 1],
                             name='g_h%i' % layer,
                             w=gen_params["g_h%i_W" % layer], biases=gen_params["g_h%i_b" % layer])
                if layer < len(self.gen_strides):
                    h = tf.nn.relu(self.g_batch_norm["g_bn%i" % layer](h))

            return tf.nn.tanh(h)

    def gen_prior(self, gen_params):

        with tf.variable_scope("generator") as scope:
            prior_loss = 0.0
            for var in gen_params.values():
                nn = tf.divide(var, self.prior_std)
                prior_loss += tf.reduce_mean(tf.multiply(nn, nn))

        prior_loss /= self.dataset_size
        return prior_loss

    def gen_noise(self, gen_params):  # noise_ : gaussian distribution
        with tf.variable_scope("generator") as scope:
            noise_loss = 0.0
            for name, var in gen_params.items():  # .iteritems():

                noise_ = tf.distributions.Normal(loc=0., scale=self.noise_std * tf.ones(var.get_shape()))  # tf.contrib.distributions.Normal(mu=0., sigma=self.noise_std*tf.ones(var.get_shape()))
                noise_loss += tf.reduce_sum(var * noise_.sample())

        noise_loss /= self.dataset_size
        return noise_loss

    def disc_prior(self, disc_params):

        with tf.variable_scope("discriminator") as scope:
            prior_loss = 0.0
            for var in disc_params.values():

                # print("var_disc_prior shape = ", var.get_shape(), var)
                # (5, 5, 3, 96) <tf.Variable 'discriminator/d_h0_W_0000_0000:0' shape=(5, 5, 3, 96) dtype=float32_ref>

                nn = tf.divide(var, self.prior_std)
                prior_loss += tf.reduce_mean(tf.multiply(nn, nn))

        prior_loss /= self.dataset_size
        return prior_loss

    def disc_noise(self, disc_params):

        with tf.variable_scope("discriminator") as scope:
            noise_loss = 0.0
            for var in disc_params.values():
                noise_ = tf.distributions.Normal(loc=0., scale=self.noise_std * tf.ones(var.get_shape()))  # tf.contrib.distributions.Normal(mu=0., sigma=self.noise_std*tf.ones(var.get_shape()))
                noise_loss += tf.reduce_sum(var * noise_.sample())

        noise_loss /= self.dataset_size
        return noise_loss