def load_energy_model(model_params_dict):

    # FEATURE LAYER 0 (DECONV)
    print 'SET ENERGY FEATURE CONV LAYER 0'
    conv_w0   = sharedX(model_params_dict[   'feat_conv_w0'], name='feat_conv_w0')
    conv_b0   = sharedX(model_params_dict[   'feat_conv_b0'], name='feat_conv_b0')

    # FEATURE LAYER 1 (DECONV)
    print 'SET ENERGY FEATURE CONV LAYER 1'
    conv_w1   = sharedX(model_params_dict[   'feat_conv_w1'], name='feat_conv_w1')
    conv_b1   = sharedX(model_params_dict[   'feat_conv_b1'], name='feat_conv_b1')

    # FEATURE LAYER 2 (DECONV)
    print 'SET ENERGY FEATURE CONV LAYER 2'
    conv_w2   = sharedX(model_params_dict[   'feat_conv_w2'], name='feat_conv_w2')
    conv_b2   = sharedX(model_params_dict[   'feat_conv_b2'], name='feat_conv_b2')

    print 'SET ENERGY FEATURE EXTRACTOR'
    def energy_feature_function(input_data, is_train=True):
        # layer 0 (conv)
        h0 = relu(dnn_conv(input_data, conv_w0, subsample=(2, 2), border_mode=(2, 2))+conv_b0.dimshuffle('x', 0, 'x', 'x'))
        # layer 1 (conv)
        h1 = relu(dnn_conv(        h0, conv_w1, subsample=(2, 2), border_mode=(2, 2))+conv_b1.dimshuffle('x', 0, 'x', 'x'))
        # layer 2 (conv)
        h2 = tanh(dnn_conv(        h1, conv_w2, subsample=(2, 2), border_mode=(2, 2))+conv_b2.dimshuffle('x', 0, 'x', 'x'))
        feature = T.flatten(h2, 2)
        return feature

    # ENERGY LAYER (LINEAR)
    print 'SET ENERGY FUNCTION LINEAR LAYER 3'

    norm_w = sharedX(model_params_dict[   'gen_norm_w'], name='gen_norm_w')
    norm_b = sharedX(model_params_dict[   'gen_norm_b'], name='gen_norm_b')
    def energy_normalize_function(input_data, is_train=True):
        input_data = T.flatten(input_data, 2)
        return batchnorm(input_data, g=norm_w, b=norm_b, a=0.0)

    expert_w = sharedX(model_params_dict[   'eng_expert_w'], name='eng_expert_w')
    expert_b = sharedX(model_params_dict[   'eng_expert_b'], name='eng_expert_b')

    def energy_expert_function(feature_data, is_train=True):
        e = softplus(T.dot(feature_data, expert_w)+expert_b)
        e = T.sum(-e, axis=1, keepdims=True)
        return e

    def energy_prior_function(input_data, is_train=True):
        e = T.sum(T.sqr(input_data), axis=1, keepdims=True)
        return e

    energy_params = [conv_w0, conv_b0,
                     conv_w1, conv_b1,
                     conv_w2, conv_b2,
                     norm_w, norm_b,
                     expert_w, expert_b]

    return [energy_feature_function,
            energy_normalize_function,
            energy_expert_function,
            energy_prior_function,
            energy_params]
Esempio n. 2
0
def get_params(model_file, n_layers, n_f, nz=100, nc=3):
    print('LOADING...')
    t = time()

    disc_params = init_disc_params(n_f=n_f, n_layers=n_layers, nc=nc)
    gen_params = init_gen_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc)
    predict_params = init_predict_params(nz=nz, n_f=n_f, n_layers=n_layers)
    # load the model
    model = utils.PickleLoad(model_file)
    print('load model from %s' % model_file)
    set_model(disc_params, model['disc_params'])
    set_model(gen_params, model['gen_params'])

    if 'predict_params' in model:
        set_model(predict_params, model['predict_params'])
    disc_batchnorm = model['disc_batchnorm']
    gen_batchnorm = model['gen_batchnorm']
    if 'predict_batchnorm' in model:
        predict_batchnorm = model['predict_batchnorm']
        predict_batchnorm = [sharedX(d) for d in predict_batchnorm]
    else:
        predict_batchnorm = None
    disc_batchnorm = [sharedX(d) for d in disc_batchnorm]
    gen_batchnorm = [sharedX(d) for d in gen_batchnorm]

    print('%.2f seconds to load theano models' % (time() - t))
    return disc_params, gen_params, predict_params, disc_batchnorm, gen_batchnorm, predict_batchnorm
Esempio n. 3
0
    def __init__(self,model,
                 dis_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5)),
                 gen_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5))):

        X = model.X
        Z = model.Z
        targets = T.matrix()

        genX = model.genX

        disX = model.disX
        disgenX = model.disgenX

        disX_loss = bce(disX, T.ones(disX.shape)).mean()
        disgenX_loss = bce(disgenX, T.zeros(disgenX.shape)).mean()
        genX_loss = bce(disgenX, T.ones(disgenX.shape)).mean()

        dis_loss = disX_loss + disgenX_loss
        gen_loss = genX_loss

        trainable_discrim_params = model.trainable_discrim_params
        trainable_gen_params = model.trainable_gen_params

        dis_updates = dis_updater(trainable_discrim_params, dis_loss) + model.other_discrim_updates
        gen_updates = gen_updater(trainable_gen_params, gen_loss) + model.other_gen_updates

        print 'COMPILING'
        t = time()
        self._train_gen = theano.function([Z], gen_loss, updates=gen_updates)
        self._train_dis = theano.function([X, Z], dis_loss, updates=dis_updates)
        self._gen = theano.function([Z], genX)
        print '%.2f seconds to compile theano functions'%(time()-t)
Esempio n. 4
0
def get_params(model_file, n_layers, n_f, nz=100, nc=3):
    print('LOADING...')
    t = time()

    disc_params = init_disc_params(n_f=n_f, n_layers=n_layers, nc=nc)
    gen_params = init_gen_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc)
    predict_params = init_predict_params(nz=nz, n_f=n_f, n_layers=n_layers)
    # load the model
    model = utils.PickleLoad(model_file)
    print('load model from %s' % model_file)
    set_model(disc_params, model['disc_params'])
    set_model(gen_params, model['gen_params'])

    if 'predict_params' in model:
        set_model(predict_params, model['predict_params'])
    disc_batchnorm = model['disc_batchnorm']
    gen_batchnorm = model['gen_batchnorm']
    if 'predict_batchnorm' in model:
        predict_batchnorm = model['predict_batchnorm']
        predict_batchnorm = [sharedX(d) for d in predict_batchnorm]
    else:
        predict_batchnorm = None
    disc_batchnorm = [sharedX(d) for d in disc_batchnorm]
    gen_batchnorm = [sharedX(d) for d in gen_batchnorm]

    print('%.2f seconds to load theano models' % (time() - t))
    return disc_params, gen_params, predict_params, disc_batchnorm, gen_batchnorm, predict_batchnorm
 def __init__(self,
              td_modules,
              bu_modules_gen, im_modules_gen,
              bu_modules_inf, im_modules_inf,
              merge_info):
     # grab the bottom-up, top-down, and info merging modules
     self.td_modules = [m for m in td_modules]
     self.bu_modules_gen = [m for m in bu_modules_gen]
     self.im_modules_gen = [m for m in im_modules_gen]
     self.bu_modules_inf = [m for m in bu_modules_inf]
     self.im_modules_inf = [m for m in im_modules_inf]
     # get dicts for referencing modules by name
     self.td_modules_dict = {m.mod_name: m for m in td_modules}
     self.td_modules_dict[None] = None
     self.bu_modules_gen_dict = {m.mod_name: m for m in bu_modules_gen}
     self.bu_modules_gen_dict[None] = None
     self.bu_modules_inf_dict = {m.mod_name: m for m in bu_modules_inf}
     self.bu_modules_inf_dict[None] = None
     self.im_modules_gen_dict = {m.mod_name: m for m in im_modules_gen}
     self.im_modules_gen_dict[None] = None
     self.im_modules_inf_dict = {m.mod_name: m for m in im_modules_inf}
     self.im_modules_inf_dict[None] = None
     # grab the full set of trainable parameters in these modules
     self.gen_params = []  # modules that aren't just for inference
     self.inf_params = []  # modules that are just for inference
     # get generator params (these only get to adapt to the training set)
     self.generator_modules = self.td_modules + self.bu_modules_gen + \
         self.im_modules_gen
     for mod in self.generator_modules:
         self.gen_params.extend(mod.params)
     # get inferencer params (these can be fine-tuned at test time)
     self.inferencer_modules = self.bu_modules_inf + self.im_modules_inf
     for mod in self.inferencer_modules:
         self.inf_params.extend(mod.params)
     # filter redundant parameters, to allow parameter sharing
     p_dict = {}
     for p in self.gen_params:
         p_dict[p.name] = p
     self.gen_params = p_dict.values()
     p_dict = {}
     for p in self.inf_params:
         p_dict[p.name] = p
     self.inf_params = p_dict.values()
     # add a distribution scaling parameter to the generator
     self.dist_scale = sharedX(floatX([0.2]))
     self.gen_params.append(self.dist_scale)
     # gather a list of all parameters in this network
     self.all_params = self.inf_params + self.gen_params
     # get instructions for how to merge bottom-up and top-down info
     self.merge_info = merge_info
     # make a switch for alternating between generator and inferencer
     # conditionals over the latent variables
     self.sample_switch = sharedX(floatX([1.0]))
     return
Esempio n. 6
0
def transform_im(x, npx=64, nc=3):
    if nc == 3:
        x1 = (x + sharedX(1.0)) * sharedX(127.5)
    else:
        x1 = T.tile(x, [1, 1, 1, 3]) * sharedX(255.0)  #[hack] to-be-tested

    mean_channel = np.load('../lib/ilsvrc_2012_mean.npy').mean(1).mean(1)
    mean_im = mean_channel[np.newaxis, :, np.newaxis, np.newaxis]
    mean_im = floatX(np.tile(mean_im, [1, 1, npx, npx]))
    x2 = x1[:, [2, 1, 0], :, :]
    y = x2 - mean_im
    return y
Esempio n. 7
0
def transform_im(x, npx=64, nc=3):
    if nc == 3:  # default option
        # (-1,1) => (0,255)
        x1 = (x + sharedX(1.0)) * sharedX(127.5)
    else:
        x1 = T.tile(x, [1, 1, 1, 3]) * sharedX(255.0)  #[hack] to-be-tested

    mean_channel = np.load(os.path.join(
        pkg_dir, 'ilsvrc_2012_mean.npy')).mean(1).mean(1)
    mean_im = mean_channel[np.newaxis, :, np.newaxis, np.newaxis]
    mean_im = floatX(np.tile(mean_im, [1, 1, npx, npx]))
    x2 = x1[:, [2, 1, 0], :, :]
    y = x2 - mean_im
    return y
    def def_invert(self,
                   model,
                   batch_size=1,
                   beta=0.5,
                   lr=0.1,
                   b1=0.9,
                   nz=100,
                   use_bin=True):
        beta_r = sharedX(beta)
        x_c = T.tensor4()
        m_c = T.tensor4()
        x_e = T.tensor4()
        m_e = T.tensor4()
        z0 = T.matrix()
        z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))))
        gx = model.model_G(z)

        mm_c = T.tile(m_c, (1, gx.shape[1], 1, 1))
        color_all = T.mean(T.sqr(gx - x_c) * mm_c, axis=(1, 2, 3)) / (
            T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5))
        gx_edge = HOGNet.get_hog(gx, use_bin)
        x_edge = HOGNet.get_hog(x_e, use_bin)
        mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1))
        sum_e = T.sum(T.abs_(mm_e))
        sum_x_edge = T.sum(T.abs_(x_edge))
        edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (
            T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5))
        rec_all = color_all + edge_all * sharedX(0.2)
        z_const = sharedX(10.0)
        init_all = T.mean(T.sqr(z0 - z)) * z_const

        if beta > 0:
            print('using D')
            p_gen = model.model_D(gx)
            real_all = T.nnet.binary_crossentropy(p_gen, T.ones(
                p_gen.shape)).T  # costs.bce(p_gen, T.ones(p_gen.shape))
            cost_all = rec_all + beta_r * real_all[0] + init_all
        else:
            print('without D')
            cost_all = rec_all + init_all
            real_all = T.zeros(cost_all.shape)

        cost = T.sum(cost_all)
        d_updater = updates.Adam(
            lr=sharedX(lr),
            b1=sharedX(b1))  # ,regularizer=updates.Regularizer(l2=l2))
        output = [
            gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge
        ]

        print 'COMPILING...'
        t = time()

        z_updates = d_updater([z], cost)
        _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0],
                                  outputs=output,
                                  updates=z_updates)
        print '%.2f seconds to compile _invert function' % (time() - t)
        return [_invert, z_updates, z, beta_r, z_const]
Esempio n. 9
0
def def_bfgs(model_G, layer='conv4', npx=64, alpha=0.002):
    print('COMPILING...')
    t = time()

    # 符号化定义
    x_f = T.tensor4()
    x = T.tensor4()
    z = T.matrix()  # 随机种子
    tanh = activations.Tanh()
    gx = model_G(tanh(z))  # 生成的图像

    if layer is 'hog':
        gx_f = HOGNet.get_hog(gx, use_bin=True, BS=4)
    else:
        # 调整图像格式
        gx_t = AlexNet.transform_im(gx)
        gx_net = AlexNet.build_model(gx_t,
                                     layer=layer,
                                     shape=(None, 3, npx, npx))
        AlexNet.load_model(gx_net, layer=layer)
        # AlexNet截止在layer的输出
        gx_f = lasagne.layers.get_output(gx_net[layer], deterministic=True)

    f_rec = T.mean(T.sqr(x_f - gx_f), axis=(1, 2, 3)) * sharedX(alpha)
    x_rec = T.mean(T.sqr(x - gx), axis=(1, 2, 3))
    cost = T.sum(f_rec) + T.sum(x_rec)
    grad = T.grad(cost, z)
    output = [cost, grad, gx]
    _invert = theano.function(inputs=[z, x, x_f], outputs=output)

    print('%.2f seconds to compile _bfgs function' % (time() - t))
    return _invert, z
Esempio n. 10
0
def get_params(model_file, n_layers, n_f, nz=100, nc=3):
    print 'LOADING...'
    t = time()

    disc_params = init_disc_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc)
    gen_params = init_gen_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc)
    # load the model
    model = utils.PickleLoad(model_file)
    print 'load model from %s' % model_file
    set_model(disc_params, model['disc_params'])
    set_model(gen_params, model['gen_params'])
    [disc_pl, gen_pl] = model['postlearn_params']
    disc_pl = [sharedX(d) for d in disc_pl]
    gen_pl = [sharedX(d) for d in gen_pl]
    print '%.2f seconds to load theano models' % (time() - t)
    return disc_params, gen_params, disc_pl, gen_pl
Esempio n. 11
0
 def __call__(self, shape, name=None):
     print('called orthogonal init with shape', shape)
     flat_shape = (shape[0], np.prod(shape[1:]))
     a = np_rng.normal(0.0, 1.0, flat_shape)
     u, _, v = np.linalg.svd(a, full_matrices=False)
     q = u if u.shape == flat_shape else v  # pick the one with the correct shape
     q = q.reshape(shape)
     return sharedX(self.scale * q[:shape[0], :shape[1]], name=name)
Esempio n. 12
0
 def __call__(self, shape, name=None):
     if len(shape) == 2:
         scale = np.sqrt(2. / shape[0])
     elif len(shape) == 4:
         scale = np.sqrt(2. / np.prod(shape[1:]))
     else:
         raise NotImplementedError
     return sharedX(np_rng.normal(size=shape, scale=scale), name=name)
Esempio n. 13
0
    def get_hog(self, x_o):
        use_bin = self.use_bin
        NO = self.NO
        BS = self.BS
        nc = self.nc
        x = (x_o + sharedX(1)) / (sharedX(2))
        Gx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) / 4.0
        Gy = Gx.T
        f1_w = []
        for i in range(NO):
            t = np.pi / NO * i
            g = np.cos(t) * Gx + np.sin(t) * Gy
            gg = np.tile(g[np.newaxis, np.newaxis, :, :], [1, 1, 1, 1])
            f1_w.append(gg)
        f1_w = np.concatenate(f1_w, axis=0)
        G = np.concatenate([
            Gx[np.newaxis, np.newaxis, :, :], Gy[np.newaxis, np.newaxis, :, :]
        ],
                           axis=0)
        G_f = sharedX(floatX(G))

        a = np.cos(np.pi / NO)
        l1 = sharedX(floatX(1 / (1 - a)))
        l2 = sharedX(floatX(a / (1 - a)))
        eps = sharedX(1e-3)
        if nc == 3:
            x_gray = T.mean(x, axis=1).dimshuffle(0, 'x', 1, 2)
        else:
            x_gray = x
        f1 = sharedX(floatX(f1_w))
        h0 = T.abs_(dnn_conv(x_gray, f1, subsample=(1, 1), border_mode=(1, 1)))
        g = dnn_conv(x_gray, G_f, subsample=(1, 1), border_mode=(1, 1))

        if use_bin:
            gx = g[:, [0], :, :]
            gy = g[:, [1], :, :]
            gg = T.sqrt(gx * gx + gy * gy + eps)
            hk = T.maximum(0, l1 * h0 - l2 * gg)

            bf_w = np.zeros((NO, NO, 2 * BS, 2 * BS))
            b = 1 - np.abs(
                (np.arange(1, 2 * BS + 1) - (2 * BS + 1.0) / 2.0) / BS)
            b = b[np.newaxis, :]
            bb = b.T.dot(b)
            for n in range(NO):
                bf_w[n, n] = bb

            bf = sharedX(floatX(bf_w))
            h_f = dnn_conv(hk,
                           bf,
                           subsample=(BS, BS),
                           border_mode=(BS / 2, BS / 2))
            return h_f
        else:
            return g
Esempio n. 14
0
    def __init__(self,model,
                 dis_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5)),
                 gen_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5)),
                 cls_updater = updates.Adam(lr=sharedX(0.0002), b1=0.5, regularizer=updates.Regularizer(l2=1e-5))):

        X = model.X
        Z = model.Z
        y = T.matrix() # input con ceros o unos segun si X es gen o real respectivamente
        targets = T.matrix()

        genX = model.genX

        disX = model.disX
        classX = model.classX
        disgenX = model.disgenX
        classgenX = model.classgenX
        classXTest = model.classXTest

        disX_loss = bce(disX, y).mean()
        disgenX_loss = bce(disgenX, T.zeros(disgenX.shape)).mean()
        genX_loss = bce(disgenX, T.ones(disgenX.shape)).mean()
        cls_loss = cce(classX, targets).mean()
        cls_err = T.mean(T.neq(T.argmax(classXTest,axis=1),T.argmax(targets,axis=1)))

        dis_loss = disX_loss + disgenX_loss
        gen_loss = genX_loss

        trainable_discrim_params = model.trainable_discrim_params
        trainable_gen_params = model.trainable_gen_params
        trainable_classif_params = model.trainable_classif_params

        dis_updates = dis_updater(trainable_discrim_params, dis_loss) + model.other_discrim_updates
        gen_updates = gen_updater(trainable_gen_params, gen_loss) + model.other_gen_updates
        cls_updates = cls_updater(trainable_classif_params, cls_loss) + model.other_classif_updates

        print 'COMPILING'
        t = time()
        self._train_gen = theano.function([Z], gen_loss, updates=gen_updates)
        self._train_dis = theano.function([X, y, Z], dis_loss, updates=dis_updates)
        self._train_cls = theano.function([X, targets], cls_loss, updates=cls_updates)
        self._gen = theano.function([Z], genX)
        self._cls_predict = theano.function([X],classXTest)
        self._cls_error = theano.function([X,targets], cls_err)
        print '%.2f seconds to compile theano functions'%(time()-t)
Esempio n. 15
0
 def def_comp_mask(self):
     BS = self.BS
     print('COMPILING')
     t = time()
     m = T.tensor4()
     bf_w = np.ones((1, 1, 2 * BS, 2 * BS))
     bf = sharedX(floatX(bf_w))
     m_b = dnn_conv(m, bf, subsample=(BS, BS), border_mode=(BS / 2, BS / 2))
     _comp_mask = theano.function(inputs=[m], outputs=m_b)
     print('%.2f seconds to compile [compMask] functions' % (time() - t))
     return _comp_mask
Esempio n. 16
0
 def __call__(self, shape, name=None):
     if shape[0] != shape[1]:
         w = np.zeros(shape)
         o_idxs = np.arange(shape[0])
         i_idxs = np.random.permutation(
             np.tile(np.arange(shape[1]),
                     shape[0] / shape[1] + 1))[:shape[0]]
         w[o_idxs, i_idxs] = self.scale
     else:
         w = np.identity(shape[0]) * self.scale
     return sharedX(w, name=name)
Esempio n. 17
0
def get_params(model_file, n_layers, n_f, nz=100, nc=3):
    
    t = time()

    disc_params = init_disc_params(n_f=n_f, n_layers=n_layers, nc=nc)
    gen_params = init_gen_params(nz=nz, n_f=n_f, n_layers=n_layers, nc=nc)
    predict_params = init_predict_params(nz=nz, n_f=n_f, n_layers=n_layers)
    # load the model
    model = utils.PickleLoad(model_file)
    
    set_model(disc_params, model['disc_params'])
    set_model(gen_params, model['gen_params'])
    set_model(predict_params, model['predict_params'])
    disc_batchnorm = model['disc_batchnorm']
    gen_batchnorm = model['gen_batchnorm']
    predict_batchnorm = model['predict_batchnorm']
    disc_batchnorm = [sharedX(d) for d in disc_batchnorm]
    gen_batchnorm = [sharedX(d) for d in gen_batchnorm]
    predict_batchnorm = [sharedX(d) for d in predict_batchnorm]
    
    return disc_params, gen_params, predict_params, disc_batchnorm, gen_batchnorm, predict_batchnorm
Esempio n. 18
0
    def __call__(self, shape, name=None):
        w = np.zeros(shape)
        ycenter = shape[2] // 2
        xcenter = shape[3] // 2

        if shape[0] == shape[1]:
            o_idxs = np.arange(shape[0])
            i_idxs = np.arange(shape[1])
        elif shape[1] < shape[0]:
            o_idxs = np.arange(shape[0])
            i_idxs = np.random.permutation(
                np.tile(np.arange(shape[1]),
                        shape[0] / shape[1] + 1))[:shape[0]]
        w[o_idxs, i_idxs, ycenter, xcenter] = self.scale
        return sharedX(w, name=name)
Esempio n. 19
0
 def _add_param(self, name, value, learnable=True, layer_name='',
                dtype=theano.config.floatX):
     if self.reuse:
         assert name in self.source_params, \
             'param "%s does not exist and self.reuse==True' % name
         param = self.source_params[name][0]
         existing_shape = param.get_value().shape
         if value.shape != existing_shape:
             raise ValueError('Param "%s": incompatible shapes %s vs. %s' %
                              (name, existing_shape, value.shape))
         print '(%s) Reusing param "%s" with shape: %s' % \
             (layer_name, name, value.shape)
     else:
         print '(%s) Adding param "%s" with shape: %s' % \
               (layer_name, name, value.shape)
         param = sharedX(value, dtype=dtype, name=name)
     assert name not in self._params, 'param "%s already exists' % name
     self._params[name] = (param, bool(learnable))
     return param
Esempio n. 20
0
    def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True):
        d_weight_r = sharedX(d_weight)
        x_c = T.tensor4()
        m_c = T.tensor4()
        x_e = T.tensor4()
        m_e = T.tensor4()
        z0 = T.matrix()
        z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))))
        gx = model.model_G(z)
        # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge

        if nc == 1:  # gx, range [0, 1] => edge, 1
            gx3 = 1.0 - gx  # T.tile(gx, (1, 3, 1, 1))
        else:
            gx3 = gx
        mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1))
        color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5))
        gx_edge = self.hog.get_hog(gx3)
        x_edge = self.hog.get_hog(x_e)
        mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1))
        sum_e = T.sum(T.abs_(mm_e))
        sum_x_edge = T.sum(T.abs_(x_edge))
        edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5))
        rec_all = color_all + edge_all * sharedX(0.2)
        z_const = sharedX(5.0)
        init_all = T.mean(T.sqr(z0 - z)) * z_const

        if d_weight > 0:
            print('using D')
            p_gen = model.model_D(gx)
            real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T
            cost_all = rec_all + d_weight_r * real_all[0] + init_all
        else:
            print('without D')
            cost_all = rec_all + init_all
            real_all = T.zeros(cost_all.shape)

        cost = T.sum(cost_all)
        d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1))
        output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge]

        print('COMPILING...')
        t = time()

        z_updates = d_updater([z], cost)
        _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates)
        print('%.2f seconds to compile _invert function' % (time() - t))
        return [_invert, z_updates, z, d_weight_r, z_const]
Esempio n. 21
0
    def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True):
        d_weight_r = sharedX(d_weight)
        x_c = T.tensor4()
        m_c = T.tensor4()
        x_e = T.tensor4()
        m_e = T.tensor4()
        z0 = T.matrix()
        z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))))
        gx = model.model_G(z)
        # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge

        if nc == 1: # gx, range [0, 1] => edge, 1
            gx3 = 1.0-gx #T.tile(gx, (1, 3, 1, 1))
        else:
            gx3 = gx
        mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1))
        color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5))
        gx_edge = self.hog.get_hog(gx3)
        x_edge = self.hog.get_hog(x_e)
        mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1))
        sum_e = T.sum(T.abs_(mm_e))
        sum_x_edge = T.sum(T.abs_(x_edge))
        edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5))
        rec_all = color_all + edge_all * sharedX(0.2)
        z_const = sharedX(5.0)
        init_all = T.mean(T.sqr(z0 - z)) * z_const

        if d_weight > 0:
            print('using D')
            p_gen = model.model_D(gx)
            real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T
            cost_all = rec_all + d_weight_r * real_all[0] + init_all
        else:
            print('without D')
            cost_all = rec_all + init_all
            real_all = T.zeros(cost_all.shape)

        cost = T.sum(cost_all)
        d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1))
        output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge]

        print('COMPILING...')
        t = time()

        z_updates = d_updater([z], cost)
        _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates)
        print('%.2f seconds to compile _invert function' % (time() - t))
        return [_invert, z_updates, z, d_weight_r, z_const]
def def_bfgs(net, layer='conv4', npx=64, alpha=0.002):
    print('COMPILING...')
    t = time()

    x_f = T.tensor4()
    x = T.tensor4()
    z = T.matrix()

    z = theano.printing.Print('this is z')(z)
    tanh = activations.Tanh()
    tz = tanh(z)
    # tz = printing_op(tz)

    # tz = z_scale * tz
    net.labels_var  = T.TensorType('float32', [False] * 512) ('labels_var')
    gx = net.G.eval(z, net.labels_var, ignore_unused_inputs=True)
    # gx = printing_op(gx)
    # gx = misc.adjust_dynamic_range(gx, [-1,1], [0,1])
    scale_factor = 16
    gx = theano.tensor.signal.pool.pool_2d(gx, ds=(scale_factor, scale_factor), mode='average_exc_pad', ignore_border=True)
    # gx = printing_op(gx)

    if layer is 'hog':
        gx_f = HOGNet.get_hog(gx, use_bin=True, BS=4)
    else:
        gx_t = AlexNet.transform_im(gx)
        gx_net = AlexNet.build_model(gx_t, layer=layer, shape=(None, 3, npx, npx))
        AlexNet.load_model(gx_net, layer=layer)
        gx_f = lasagne.layers.get_output(gx_net[layer], deterministic=True)

    f_rec = T.mean(T.sqr(x_f - gx_f), axis=(1, 2, 3)) * sharedX(alpha)
    x_rec = T.mean(T.sqr(x - gx), axis=(1, 2, 3))
    cost = T.sum(f_rec) + T.sum(x_rec)
    grad = T.grad(cost, z)
    output = [cost, grad, gx]
    _invert = theano.function(inputs=[z, x, x_f], outputs=output)

    print('%.2f seconds to compile _bfgs function' % (time() - t))
    return _invert,z
Esempio n. 23
0
# define pixel loss
pixel_loss = costs.L2Loss(gx, x)

# define feature loss
x_t = AlexNet.transform_im(x, npx=npx, nc=nc)
x_net = AlexNet.build_model(x_t, layer=args.layer, shape=(None, 3, npx, npx))
AlexNet.load_model(x_net, layer=args.layer)
x_f = lasagne.layers.get_output(x_net[args.layer], deterministic=True)
gx_t = AlexNet.transform_im(gx, npx=npx, nc=nc)
gx_net = AlexNet.build_model(gx_t, layer=args.layer, shape=(None, 3, npx, npx))
AlexNet.load_model(gx_net, layer=args.layer)
gx_f = lasagne.layers.get_output(gx_net[args.layer], deterministic=True)
ftr_loss = costs.L2Loss(gx_f, x_f)

# add two losses together
cost = pixel_loss + ftr_loss * sharedX(args.alpha)
output = [cost, z]
lrt = sharedX(args.lr)
b1t = sharedX(args.b1)
p_updater = updates.Adam(lr=lrt,
                         b1=b1t,
                         regularizer=updates.Regularizer(l2=args.weight_decay))
p_updates = p_updater(predict_params, cost)

print('COMPILING')
t = time()
_train_p = theano.function([x], cost, updates=p_updates)
_train_p_cost = theano.function([x], [cost, gx])
_predict_z = theano.function([x], z)
_gen = theano.function([z], gx)
print('%.2f seconds to compile theano functions' % (time() - t))
Esempio n. 24
0
    exec(tmp)

# print conditional,type(batchsize),Channel[-1],kernal

gifn = inits.Normal(scale=0.02)
difn = inits.Normal(scale=0.02)

## filter_shape: (output channels, input channels, filter height, filter width, filter depth)

## load the parameters

# gen_params = [gw1, gw2, gw3, gw4, gw5, gwx]
# discrim_params = [dw1, dw2, dw3, dw4, dw5, dwy]

temp = joblib.load('models%d/50_gen_params.jl' % objectNumber)
gw1 = sharedX(temp[0])
gg1 = sharedX(temp[1])
gb1 = sharedX(temp[2])
gw2 = sharedX(temp[3])
gg2 = sharedX(temp[4])
gb2 = sharedX(temp[5])
gw3 = sharedX(temp[6])
gg3 = sharedX(temp[7])
gb3 = sharedX(temp[8])
gw4 = sharedX(temp[9])
gg4 = sharedX(temp[10])
gb4 = sharedX(temp[11])
gwx = sharedX(temp[12])

gen_params = [gw1, gg1, gb1, gw2, gg2, gb2, gw3, gg3, gb3, gw4, gg4, gb4, gwx]
Esempio n. 25
0
    DlZ = sigmoid(T.dot(Dl4, wz))
    return DlZ


# def gen_Z(dist):
# 	mu = dist[:Nz]
# 	sigma = dist[Nz:]

X = T.tensor5()

encodeZ = encoder(X, *encode_params)
decodeX = decoder(encodeZ, *decode_params)

cost = bce(T.flatten(decodeX, 2), T.flatten(X, 2)).mean()

lrt = sharedX(lrate)
AutoEnc_parameter = encode_params + decode_params

updater = updates.Adam(lr=lrt, b1=0.8, regularizer=updates.Regularizer(l2=l2))
updates = updater(AutoEnc_parameter, cost)

print 'COMPILING'
t = time()
_train_ = theano.function([X], cost, updates=updates)
print '%.2f seconds to compile theano functions' % (time() - t)

mat = scipy.io.loadmat('models_stats.mat')
mat = mat['models']
num = np.array(mat[0][0][1])
names = mat[0][0][0][0]
objname = []
Esempio n. 26
0
 def __init__(self, leak=0.2):
     self.leak = sharedX(leak)
Esempio n. 27
0
X = T.fmatrix()
y = T.fvector()

theta = T.fmatrix()
deltaX = T.fmatrix() # svgd gradient
data_N = T.scalar('data_N')

block = T.fmatrix()


gX_1 = langevin_sampler(X, y, theta, data_N, *net_params)
cost_1 = -1 * T.mean(T.sum(gX_1 * deltaX, axis=1))


lrt = sharedX(lr)
g_updater_1 = updates.Adagrad(lr=lr, regularizer=updates.Regularizer(l2=l2))
g_updates_1 = g_updater_1(net_params, cost_1)


print 'COMPILING'
t = time()
_gen_1 = theano.function([X, y, theta, data_N], gX_1)
_train_g_1 = theano.function([X, y, theta, deltaX, data_N], cost_1, updates=g_updates_1)
_svgd_gradient = theano.function([X, y, theta, data_N], svgd_gradient(X, y, theta, data_N))
_score_bayes_lr = theano.function([X, y, theta, data_N], score_bayes_lr(X, y, theta, data_N))
_evaluate = theano.function([X, y, theta], evaluate(X, y, theta))
print '%.2f seconds to compile theano functions'%(time()-t)


n_iter = 10000
Esempio n. 28
0
        num_filters_list = [128]
        lr_list = [1e-3]
        lambda_eng_list = [1e-5]

        for lr in lr_list:
            for num_filters in num_filters_list:
                for hidden_size in hidden_size_list:
                    for expert_size in expert_size_list:
                        for lambda_eng in lambda_eng_list:
                            model_config_dict["hidden_size"] = hidden_size
                            model_config_dict["expert_size"] = expert_size
                            model_config_dict["min_num_gen_filters"] = num_filters
                            model_config_dict["min_num_eng_filters"] = num_filters

                            # set updates
                            energy_optimizer = Adagrad(lr=sharedX(lr), regularizer=Regularizer(l2=lambda_eng))
                            generator_optimizer = Adagrad(lr=sharedX(2.0 * lr))
                            model_test_name = (
                                model_name
                                + "_f{}".format(int(num_filters))
                                + "_h{}".format(int(hidden_size))
                                + "_e{}".format(int(expert_size))
                                + "_re{}".format(int(-np.log10(lambda_eng)))
                                + "_lr{}".format(int(-np.log10(lr)))
                            )
                            train_model(
                                data_stream=data_stream,
                                energy_optimizer=energy_optimizer,
                                generator_optimizer=generator_optimizer,
                                model_config_dict=model_config_dict,
                                model_test_name=model_test_name,
Esempio n. 29
0
def load_energy_model(num_experts,
                      model_params_dict):

    # FEATURE LAYER 0 (DECONV)
    print 'SET ENERGY FEATURE CONV LAYER 0'
    conv_w0   = sharedX(model_params_dict[   'feat_conv_w0'], name='feat_conv_w0')
    conv_b0   = sharedX(model_params_dict[   'feat_conv_b0'], name='feat_conv_b0')
    # FEATURE LAYER 1 (DECONV)
    print 'SET ENERGY FEATURE CONV LAYER 1'
    conv_w1   = sharedX(model_params_dict[   'feat_conv_w1'], name='feat_conv_w1')
    conv_b1   = sharedX(model_params_dict[   'feat_conv_b1'], name='feat_conv_b1')
    # FEATURE LAYER 2 (DECONV)
    print 'SET ENERGY FEATURE CONV LAYER 2'
    conv_w2   = sharedX(model_params_dict[   'feat_conv_w2'], name='feat_conv_w2')
    conv_b2   = sharedX(model_params_dict[   'feat_conv_b2'], name='feat_conv_b2')
    # FEATURE LAYER 3 (DECONV)
    print 'SET ENERGY FEATURE CONV LAYER 3'
    conv_w3   = sharedX(model_params_dict[   'feat_conv_w3'], name='feat_conv_w3')
    conv_b3   = sharedX(model_params_dict[   'feat_conv_b3'], name='feat_conv_b3')
    print 'SET ENERGY FEATURE EXTRACTOR'
    def energy_feature_function(input_data, is_train=True):
        # layer 0 (conv)
        h0 = relu(dnn_conv(input_data, conv_w0, subsample=(2, 2), border_mode=(2, 2))+conv_b0.dimshuffle('x', 0, 'x', 'x'))
        # layer 1 (conv)
        h1 = relu(dnn_conv(        h0, conv_w1, subsample=(2, 2), border_mode=(2, 2))+conv_b1.dimshuffle('x', 0, 'x', 'x'))
        # layer 2 (conv)
        h2 = relu(dnn_conv(        h1, conv_w2, subsample=(2, 2), border_mode=(2, 2))+conv_b2.dimshuffle('x', 0, 'x', 'x'))
        # layer 3 (conv)
        h3 = tanh(dnn_conv(        h2, conv_w3, subsample=(2, 2), border_mode=(2, 2))+conv_b3.dimshuffle('x', 0, 'x', 'x'))
        # output feature
        feature = T.flatten(h3, 2)
        return feature

    # ENERGY FEATURE NORM LAYER (BN)
    # print 'SET ENERGY FUNCTION FEATURE NORM LAYER'
    # norm_w = sharedX(model_params_dict[   'gen_norm_w'], name='gen_norm_w')
    # norm_b = sharedX(model_params_dict[   'gen_norm_b'], name='gen_norm_b')
    #
    # def energy_normalize_function(feature_data, is_train=True):
    #     return norm_layer(feature_data, g=norm_w, b=norm_b)

    # ENERGY EXPERT LAYER (LINEAR)
    print 'SET ENERGY FUNCTION EXPERT LAYER'

    expert_w = sharedX(model_params_dict[   'eng_expert_w'], name='eng_expert_w')
    expert_b = sharedX(model_params_dict[   'eng_expert_b'], name='eng_expert_b')

    def energy_expert_function(feature_data, is_train=True):
        e = softplus(T.dot(feature_data, expert_w)+expert_b)
        e = T.sum(-e, axis=1, keepdims=True)
        return e

    # def energy_prior_function(input_data, is_train=True):
    #     e = num_experts*T.mean(T.sqr(input_data), axis=1, keepdims=True)
    #     return e

    energy_params = [conv_w0, conv_b0,
                     conv_w1, conv_b1,
                     conv_w2, conv_b2,
                     conv_w3, conv_b3,
                     # norm_w, norm_b,
                     expert_w, expert_b]

    return [energy_feature_function,
            # energy_normalize_function,
            energy_expert_function,
            # energy_prior_function,
            energy_params]
Esempio n. 30
0
ngf = 64  # # of gen filters in first conv layer
ndf = 64  # # of discrim filters in first conv layer
nx = npx * npx * nc  # # of dimensions in X

niter = 3000  # # of iter at starting learning rate
niter_decay = 3000  # # of iter to linearly decay learning rate to zero
temp = npx / 4

relu = activations.Rectify()
sigmoid = activations.Sigmoid()
lrelu = activations.LeakyRectify()
tanh = activations.Tanh()

model_path = 'models/cond_dcgan/'
gen_params = [
    sharedX(p) for p in joblib.load(model_path + '5999_gen_params.jl')
]
discrim_params = [
    sharedX(p) for p in joblib.load(model_path + '5999_discrim_params.jl')
]


def gen(Z, Y, w, w2, w3, wx):
    yb = Y.dimshuffle(0, 1, 'x', 'x')
    Z = T.concatenate([Z, Y], axis=1)
    h = relu(batchnorm(T.dot(Z, w)))
    h = T.concatenate([h, Y], axis=1)
    h2 = relu(batchnorm(T.dot(h, w2)))
    h2 = h2.reshape((h2.shape[0], ngf * 2, temp, temp))
    h2 = conv_cond_concat(h2, yb)
    h3 = relu(batchnorm(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2))))
Esempio n. 31
0
relu = activations.Rectify()
lrelu = activations.LeakyRectify(leak=0.2)
sigmoid = activations.Sigmoid()

trX, vaX, teX, trY, vaY, teY = svhn_with_valid_set(extra=False)

vaX = floatX(vaX)/127.5-1.
trX = floatX(trX)/127.5-1.
teX = floatX(teX)/127.5-1.

X = T.tensor4()

desc = 'svhn_unsup_all_conv_dcgan_100z_gaussian_lr_0.0005_64mb'
epoch = 200
params = [sharedX(p) for p in joblib.load('../models/%s/%d_discrim_params.jl'%(desc, epoch))]
print desc.upper()
print 'epoch %d'%epoch

def mean_and_var(X):
    u = T.mean(X, axis=[0, 2, 3])
    s = T.mean(T.sqr(X - u.dimshuffle('x', 0, 'x', 'x')), axis=[0, 2, 3])
    return u, s

def bnorm_statistics(X, w, w2, g2, b2, w3, g3, b3, wy):
    h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2)))

    h2 = dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2))
    h2_u, h2_s = mean_and_var(h2)
    h2 = lrelu(batchnorm(h2, g=g2, b=b2))
Esempio n. 32
0
    lambda_eng_list  = [1e-5]
    lambda_gen_list  = [1e-5]

    for lr in lr_list:
        for num_filters in num_filters_list:
            for hidden_size in hidden_size_list:
                for expert_size in expert_size_list:
                    for lambda_eng in lambda_eng_list:
                        for lambda_gen in lambda_gen_list:
                            model_config_dict['hidden_size']         = hidden_size
                            model_config_dict['expert_size']         = expert_size
                            model_config_dict['min_num_gen_filters'] = num_filters
                            model_config_dict['min_num_eng_filters'] = num_filters

                            # set updates
                            energy_optimizer_reg_on  = Adam(lr=sharedX(lr),
                                                            regularizer=Regularizer(l2=lambda_eng))
                            energy_optimizer_reg_off = Adam(lr=sharedX(lr),
                                                            regularizer=Regularizer(l2=0.0))
                            generator_optimizer_reg_on  = Adam(lr=sharedX(lr),
                                                               b1=0.1, b2=0.1,
                                                               regularizer=Regularizer(l2=lambda_eng))
                            generator_optimizer_reg_off = Adam(lr=sharedX(lr),
                                                               b1=0.1, b2=0.1,
                                                               regularizer=Regularizer(l2=0.0))
                            model_test_name = model_name \
                                              + '_f{}'.format(int(num_filters)) \
                                              + '_h{}'.format(int(hidden_size)) \
                                              + '_e{}'.format(int(expert_size)) \
                                              + '_re{}'.format(int(-np.log10(lambda_eng))) \
                                              + '_rg{}'.format(int(-np.log10(lambda_gen))) \
Esempio n. 33
0
File: job.py Progetto: mehdidc/dcgan
def run(hp, folder):
    trX, trY, nb_classes = load_data()
    k = 1             # # of discrim updates for each gen update
    l2 = 2.5e-5       # l2 weight decay
    b1 = 0.5          # momentum term of adam
    nc = 1            # # of channels in image
    ny = nb_classes   # # of classes
    nbatch = 128      # # of examples in batch
    npx = 28          # # of pixels width/height of images
    nz = 100          # # of dim for Z
    ngfc = 512       # # of gen units for fully connected layers
    ndfc = 512      # # of discrim units for fully connected layers
    ngf = 64          # # of gen filters in first conv layer
    ndf = 64          # # of discrim filters in first conv layer
    nx = npx*npx*nc   # # of dimensions in X
    niter = 200       # # of iter at starting learning rate
    niter_decay = 100 # # of iter to linearly decay learning rate to zero
    lr = 0.0002       # initial learning rate for adam
    scale = 0.02

    k = hp['k']
    l2 = hp['l2']
    #b1 = hp['b1']
    nc = 1
    ny = nb_classes
    nbatch = hp['nbatch']
    npx = 28
    nz = hp['nz']
    ngfc = hp['ngfc']       # # of gen units for fully connected layers
    ndfc = hp['ndfc']      # # of discrim units for fully connected layers
    ngf = hp['ngf']          # # of gen filters in first conv layer
    ndf = hp['ndf']          # # of discrim filters in first conv layer
    nx = npx*npx*nc   # # of dimensions in X
    niter = hp['niter']       # # of iter at starting learning rate
    niter_decay = hp['niter_decay'] # # of iter to linearly decay learning rate to zero
    lr = hp['lr']       # initial learning rate for adam


    scale = hp['scale']

    #k = 1             # # of discrim updates for each gen update
    #l2 = 2.5e-5       # l2 weight decay
    b1 = 0.5          # momentum term of adam
    #nc = 1            # # of channels in image
    #ny = nb_classes   # # of classes
    budget_hours = hp.get('budget_hours', 2)
    budget_secs = budget_hours * 3600

    ntrain = len(trX)
    def transform(X):
        return (floatX(X)).reshape(-1, nc, npx, npx)

    def inverse_transform(X):
        X = X.reshape(-1, npx, npx)
        return X
    
    model_dir = folder
    samples_dir = os.path.join(model_dir, 'samples')
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if not os.path.exists(samples_dir):
        os.makedirs(samples_dir)

    relu = activations.Rectify()
    sigmoid = activations.Sigmoid()
    lrelu = activations.LeakyRectify()
    bce = T.nnet.binary_crossentropy

    gifn = inits.Normal(scale=scale)
    difn = inits.Normal(scale=scale)

    gw  = gifn((nz, ngfc), 'gw')
    gw2 = gifn((ngfc, ngf*2*7*7), 'gw2')
    gw3 = gifn((ngf*2, ngf, 5, 5), 'gw3')
    gwx = gifn((ngf, nc, 5, 5), 'gwx')

    dw  = difn((ndf, nc, 5, 5), 'dw')
    dw2 = difn((ndf*2, ndf, 5, 5), 'dw2')
    dw3 = difn((ndf*2*7*7, ndfc), 'dw3')
    dwy = difn((ndfc, 1), 'dwy')

    gen_params = [gw, gw2, gw3, gwx]
    discrim_params = [dw, dw2, dw3, dwy]

    def gen(Z, w, w2, w3, wx, use_batchnorm=True):
        if use_batchnorm:
            batchnorm_ = batchnorm
        else:
            batchnorm_ = lambda x:x
        h = relu(batchnorm_(T.dot(Z, w)))
        h2 = relu(batchnorm_(T.dot(h, w2)))
        h2 = h2.reshape((h2.shape[0], ngf*2, 7, 7))
        h3 = relu(batchnorm_(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2))))
        x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2)))
        return x

    def discrim(X, w, w2, w3, wy):
        h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2)))
        h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2))))
        h2 = T.flatten(h2, 2)
        h3 = lrelu(batchnorm(T.dot(h2, w3)))
        y = sigmoid(T.dot(h3, wy))
        return y

    X = T.tensor4()
    Z = T.matrix()

    gX = gen(Z, *gen_params)

    p_real = discrim(X, *discrim_params)
    p_gen = discrim(gX, *discrim_params)

    d_cost_real = bce(p_real, T.ones(p_real.shape)).mean()
    d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()
    g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean()

    d_cost = d_cost_real + d_cost_gen
    g_cost = g_cost_d

    cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]

    lrt = sharedX(lr)
    d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
    g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
    d_updates = d_updater(discrim_params, d_cost)
    g_updates = g_updater(gen_params, g_cost)
    #updates = d_updates + g_updates

    print 'COMPILING'
    t = time()
    _train_g = theano.function([X, Z], cost, updates=g_updates)
    _train_d = theano.function([X, Z], cost, updates=d_updates)
    _gen = theano.function([Z], gX)
    print '%.2f seconds to compile theano functions'%(time()-t)

    tr_idxs = np.arange(len(trX))
    sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz)))

    def gen_samples(n, nbatch=128):
        samples = []
        labels = []
        n_gen = 0
        for i in range(n/nbatch):
            zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
            xmb = _gen(zmb)
            samples.append(xmb)
            n_gen += len(xmb)
        n_left = n-n_gen
        zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
        xmb = _gen(zmb)
        samples.append(xmb)
        return np.concatenate(samples, axis=0)

    s = floatX(np_rng.uniform(-1., 1., size=(10000, nz)))
    n_updates = 0
    n_check = 0
    n_epochs = 0
    n_updates = 0
    n_examples = 0
    t = time()
    begin = datetime.now()
    for epoch in range(1, niter+niter_decay+1): 
        t = time()
        print("Epoch {}".format(epoch))
        trX = shuffle(trX)
        for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch):
            imb = transform(imb)
            zmb = floatX(np_rng.uniform(-1., 1., size=(len(imb), nz)))
            if n_updates % (k+1) == 0:
                cost = _train_g(imb, zmb)
            else:
                cost = _train_d(imb, zmb)
            n_updates += 1
            n_examples += len(imb)
        samples = np.asarray(_gen(sample_zmb))
        grayscale_grid_vis(inverse_transform(samples), (10, 20), '{}/{:05d}.png'.format(samples_dir, n_epochs))
        n_epochs += 1
        if n_epochs > niter:
            lrt.set_value(floatX(lrt.get_value() - lr/niter_decay))
        if n_epochs % 50 == 0 or epoch == niter + niter_decay or epoch == 1:
            imgs = []
            for i in range(0, s.shape[0], nbatch):
                imgs.append(_gen(s[i:i+nbatch]))
            img = np.concatenate(imgs, axis=0)
            samples_filename = '{}/{:05d}_gen.npz'.format(model_dir, n_epochs)
            joblib.dump(img, samples_filename, compress=9)
            shutil.copy(samples_filename, '{}/gen.npz'.format(model_dir))
            joblib.dump([p.get_value() for p in gen_params], '{}/d_gen_params.jl'.format(model_dir, n_epochs), compress=9)
            joblib.dump([p.get_value() for p in discrim_params], '{}/discrim_params.jl'.format(model_dir, n_epochs), compress=9)
        print('Elapsed : {}sec'.format(time() - t))

        if (datetime.now() - begin).total_seconds() >= budget_secs:
            print("Budget finished.quit.")
            break
z = T.matrix()

gx = train_dcgan_utils.gen(z, gen_params, n_layers=n_layers, n_f=n_f, nc=nc)
p_real = train_dcgan_utils.discrim(x, disc_params, n_layers=n_layers)
p_gen = train_dcgan_utils.discrim(gx, disc_params, n_layers=n_layers)

d_cost_real = costs.bce(p_real, T.ones(p_real.shape))
d_cost_gen = costs.bce(p_gen, T.zeros(p_gen.shape))
g_cost_d = costs.bce(p_gen, T.ones(p_gen.shape))

d_cost = d_cost_real + d_cost_gen
g_cost = g_cost_d

cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]

lrt = sharedX(args.lr)
d_updater = updates.Adam(lr=lrt,
                         b1=args.b1,
                         regularizer=updates.Regularizer(l2=args.weight_decay))
g_updater = updates.Adam(lr=lrt,
                         b1=args.b1,
                         regularizer=updates.Regularizer(l2=args.weight_decay))
d_updates = d_updater(disc_params, d_cost)
g_updates = g_updater(gen_params, g_cost)
updates = d_updates + g_updates

print('COMPILING')
t = time()
_train_g = theano.function([x, z], cost, updates=g_updates)
_train_d = theano.function([x, z], cost, updates=d_updates)
_gen = theano.function([z], gx)
Esempio n. 35
0
# compute costs based on discriminator output for real/generated data
d_cost_real = sum([bce(p, T.ones(p.shape)).mean() for p in p_real])
d_cost_gen = sum([bce(p, T.zeros(p.shape)).mean() for p in p_gen])
g_cost_d = sum([bce(p, T.ones(p.shape)).mean() for p in p_gen])

#d_cost_real = bce(p_real[-1], T.ones(p_real[-1].shape)).mean()
#d_cost_gen = bce(p_gen[-1], T.zeros(p_gen[-1].shape)).mean()
#g_cost_d = bce(p_gen[-1], T.ones(p_gen[-1].shape)).mean()

d_cost = d_cost_real + d_cost_gen + (
    1e-5 * sum([T.sum(p**2.0) for p in discrim_params]))
g_cost = g_cost_d + (1e-5 * sum([T.sum(p**2.0) for p in gen_params]))

cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]

lrt = sharedX(lr)
d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
d_updates = d_updater(discrim_params, d_cost)
g_updates = g_updater(gen_params, g_cost)
updates = d_updates + g_updates

print 'COMPILING'
t = time()
_train_g = theano.function([X, Z0], cost, updates=g_updates)
_train_d = theano.function([X, Z0], cost, updates=d_updates)
_gen = theano.function([Z0], gX)
print '%.2f seconds to compile theano functions' % (time() - t)

f_log = open("{}/{}.ndjson".format(log_dir, desc), 'wb')
log_fields = [
Esempio n. 36
0
        num_filters_list = [128]
        lr_list          = [1e-3]
        lambda_eng_list  = [1e-5]

        for lr in lr_list:
            for num_filters in num_filters_list:
                for hidden_size in hidden_size_list:
                    for expert_size in expert_size_list:
                        for lambda_eng in lambda_eng_list:
                            model_config_dict['hidden_size']         = hidden_size
                            model_config_dict['expert_size']         = expert_size
                            model_config_dict['min_num_gen_filters'] = num_filters
                            model_config_dict['min_num_eng_filters'] = num_filters

                            # set updates
                            energy_optimizer    = Adagrad(lr=sharedX(lr),
                                                          regularizer=Regularizer(l2=lambda_eng))
                            generator_optimizer = Adagrad(lr=sharedX(2.*lr))
                            model_test_name = model_name \
                                              + '_f{}'.format(int(num_filters)) \
                                              + '_h{}'.format(int(hidden_size)) \
                                              + '_e{}'.format(int(expert_size)) \
                                              + '_re{}'.format(int(-np.log10(lambda_eng))) \
                                              + '_lr{}'.format(int(-np.log10(lr)))

                            if is_continue is True:
                                continue_train_model(last_batch_idx=last_batch_idx,
                                                     data_stream=data_stream,
                                                     energy_optimizer=energy_optimizer,
                                                     generator_optimizer=generator_optimizer,
                                                     model_config_dict=model_config_dict,
Esempio n. 37
0
def load_model():
    [e_params, g_params, d_params] = pickle.load(open("faces_dcgan.pkl", "rb"))
    gwx = g_params[-1]
    dwy = d_params[-1]
    # inputs
    X = T.tensor4()
    ## encode layer
    e_layer_sizes = [128, 64, 32, 16, 8]
    e_filter_sizes = [3, 256, 256, 512, 1024]
    eX, e_params, e_layers = make_conv_set(X,
                                           e_layer_sizes,
                                           e_filter_sizes,
                                           "e",
                                           weights=e_params)
    ## generative layer
    g_layer_sizes = [8, 16, 32, 64, 128]
    g_num_filters = [1024, 512, 256, 256, 128]
    g_out, g_params, g_layers = make_conv_set(eX,
                                              g_layer_sizes,
                                              g_num_filters,
                                              "g",
                                              weights=g_params)
    g_params += [gwx]
    gX = tanh(deconv(g_out, gwx, subsample=(1, 1), border_mode=(2, 2)))
    ## discrim layer(s)

    df1 = 128
    d_layer_sizes = [128, 64, 32, 16, 8]
    d_filter_sizes = [3, df1, 2 * df1, 4 * df1, 8 * df1]

    def discrim(input, name, weights=None):
        d_out, disc_params, d_layers = make_conv_set(input,
                                                     d_layer_sizes,
                                                     d_filter_sizes,
                                                     name,
                                                     weights=weights)
        d_flat = T.flatten(d_out, 2)

        disc_params += [dwy]
        y = sigmoid(T.dot(d_flat, dwy))

        return y, disc_params, d_layers

    # target outputs
    target = T.tensor4()

    p_real, d_params, d_layers = discrim(target, "d", weights=d_params)
    # we need to make sure the p_gen params are the same as the p_real params
    p_gen, d_params2, d_layers = discrim(gX, "d", weights=d_params)

    ## GAN costs
    d_cost_real = bce(p_real, T.ones(p_real.shape)).mean()
    d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()
    g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean()

    ## MSE encoding cost is done on an (averaged) downscaling of the image
    target_pool = max_pool_2d(target, (4, 4),
                              mode="average_exc_pad",
                              ignore_border=True)
    target_flat = T.flatten(target_pool, 2)
    gX_pool = max_pool_2d(gX, (4, 4),
                          mode="average_exc_pad",
                          ignore_border=True)
    gX_flat = T.flatten(gX_pool, 2)
    enc_cost = mse(gX_flat, target_flat).mean()

    ## generator cost is a linear combination of the discrim cost plus the MSE enocding cost
    d_cost = d_cost_real + d_cost_gen
    g_cost = g_cost_d + enc_cost / 10  ## if the enc_cost is weighted too highly it will take a long time to train

    ## N.B. e_cost and e_updates will only try and minimise MSE loss on the autoencoder (for debugging)
    e_cost = enc_cost

    cost = [g_cost_d, d_cost_real, enc_cost]

    elrt = sharedX(0.002)
    lrt = sharedX(lr)
    d_updater = updates.Adam(lr=lrt,
                             b1=b1,
                             regularizer=updates.Regularizer(l2=l2))
    g_updater = updates.Adam(lr=lrt,
                             b1=b1,
                             regularizer=updates.Regularizer(l2=l2))
    e_updater = updates.Adam(lr=elrt,
                             b1=b1,
                             regularizer=updates.Regularizer(l2=l2))

    d_updates = d_updater(d_params, d_cost)
    g_updates = g_updater(e_params + g_params, g_cost)
    e_updates = e_updater(e_params, e_cost)

    print 'COMPILING'
    t = time()
    _train_g = theano.function([X, target], cost, updates=g_updates)
    _train_d = theano.function([X, target], cost, updates=d_updates)
    _train_e = theano.function([X, target], cost, updates=e_updates)
    _get_cost = theano.function([X, target], cost)
    print('%.2f seconds to compile theano functions' % (time() - t))
    img_dir = "gen_images/"
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)
    ae_encode = theano.function([X, target], [gX, target])
    return ae_encode
Esempio n. 38
0
#target_pool = max_pool_2d(target, (4,4), mode="average_exc_pad",ignore_border=True)
target_flat = T.flatten(target, 2)
#gX_pool = max_pool_2d(gX, (4,4), mode="average_exc_pad",ignore_border=True)
gX_flat = T.flatten(gX,2)
enc_cost = mse(gX_flat, target_flat).mean() 

## generator cost is a linear combination of the discrim cost plus the MSE enocding cost
d_cost = d_cost_real + d_cost_gen
g_cost = g_cost_d + enc_cost / 100   ## if the enc_cost is weighted too highly it will take a long time to train

## N.B. e_cost and e_updates will only try and minimise MSE loss on the autoencoder (for debugging)
e_cost = enc_cost

cost = [g_cost_d, d_cost_real, enc_cost]

elrt = sharedX(0.002)
lrt = sharedX(lr)
d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
e_updater = updates.Adam(lr=elrt, b1=b1, regularizer=updates.Regularizer(l2=l2))

d_updates = d_updater(d_params, d_cost)
g_updates = g_updater(e_params + g_params, g_cost)
e_updates = e_updater(e_params, e_cost)

print 'COMPILING'
t = time()
_train_g = theano.function([X, target], cost, updates=g_updates)
_train_d = theano.function([X, target], cost, updates=d_updates)
_train_e = theano.function([X, target], cost, updates=e_updates)
_get_cost = theano.function([X, target], cost)
Esempio n. 39
0
td_modules = inf_gen_model.td_modules
bu_modules = inf_gen_model.bu_modules
im_modules = inf_gen_model.im_modules
mix_module = inf_gen_model.mix_module

# inf_gen_model.load_params(inf_gen_param_file)


def clip_sigmoid(x):
    output = sigmoid(T.clip(x, -15.0, 15.0))
    return output

####################################
# Setup the optimization objective #
####################################
lam_kld = sharedX(floatX([1.0]))
gen_params = inf_gen_model.gen_params
inf_params = inf_gen_model.inf_params
g_params = gen_params + inf_params

######################################################
# BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS #
######################################################

# Setup symbolic vars for the model inputs, outputs, and costs
Xg = T.tensor4()  # symbolic var for inputs to bottom-up inference network
Z0 = T.matrix()   # symbolic var for "noise" inputs to the generative stuff

##########################################################
# CONSTRUCT COST VARIABLES FOR THE VAE PART OF OBJECTIVE #
##########################################################
def load_vgg_feature_extractor():
    vgg_param_dict = h5py.File(vgg_filepath, "r")
    # input_channel x output_channel x filter_size x filter_size
    # conv stage 0 (64x64=>32x32)
    # (3 x 64 x 3 x 3)
    conv_w0_0 = sharedX(vgg_param_dict["layer_1"]["param_0"], name="feat_conv_w0_0")
    conv_b0_0 = sharedX(vgg_param_dict["layer_1"]["param_1"], name="feat_conv_b0_0")

    # (64 x 64 x 3 x 3)
    conv_w0_1 = sharedX(vgg_param_dict["layer_3"]["param_0"], name="feat_conv_w0_1")
    conv_b0_1 = sharedX(vgg_param_dict["layer_3"]["param_1"], name="feat_conv_b0_1")

    # conv stage 1 (32x32=>16x16)
    # (64 x 128 x 3 x 3)
    conv_w1_0 = sharedX(vgg_param_dict["layer_6"]["param_0"], name="feat_conv_w1_0")
    conv_b1_0 = sharedX(vgg_param_dict["layer_6"]["param_1"], name="feat_conv_w1_0")

    # (128 x 128 x 3 x 3)
    conv_w1_1 = sharedX(vgg_param_dict["layer_8"]["param_0"], name="feat_conv_w1_1")
    conv_b1_1 = sharedX(vgg_param_dict["layer_8"]["param_1"], name="feat_conv_b1_1")

    # conv stage 2 (16x16=>8x8)
    # (128 x 256 x 3 x 3)
    conv_w2_0 = sharedX(vgg_param_dict["layer_11"]["param_0"], name="feat_conv_w2_0")
    conv_b2_0 = sharedX(vgg_param_dict["layer_11"]["param_1"], name="feat_conv_b2_0")

    # (256 x 256 x 3 x 3)
    conv_w2_1 = sharedX(vgg_param_dict["layer_13"]["param_0"], name="feat_conv_w2_1")
    conv_b2_1 = sharedX(vgg_param_dict["layer_13"]["param_1"], name="feat_conv_b2_1")

    # (256 x 256 x 3 x 3)
    conv_w2_2 = sharedX(vgg_param_dict["layer_15"]["param_0"], name="feat_conv_w2_2")
    conv_b2_2 = sharedX(vgg_param_dict["layer_15"]["param_1"], name="feat_conv_b2_2")

    # conv stage 3 (8x8=>4x4)
    # (256 x 512 x 3 x 3)
    conv_w3_0 = sharedX(vgg_param_dict["layer_18"]["param_0"], name="feat_conv_w3_0")
    conv_b3_0 = sharedX(vgg_param_dict["layer_18"]["param_1"], name="feat_conv_b3_0")

    # (512 x 512 x 3 x 3)
    conv_w3_1 = sharedX(vgg_param_dict["layer_20"]["param_0"], name="feat_conv_w3_1")
    conv_b3_1 = sharedX(vgg_param_dict["layer_20"]["param_1"], name="feat_conv_b3_1")

    # (512 x 512 x 3 x 3)
    conv_w3_2 = sharedX(vgg_param_dict["layer_22"]["param_0"], name="feat_conv_w3_2")
    conv_b3_2 = sharedX(vgg_param_dict["layer_22"]["param_1"], name="feat_conv_b3_2")

    # conv stage 4 (4x4=>2x2)
    # (512 x 512 x 3 x 3)
    conv_w4_0 = sharedX(vgg_param_dict["layer_25"]["param_0"], name="feat_conv_w4_0")
    conv_b4_0 = sharedX(vgg_param_dict["layer_25"]["param_1"], name="feat_conv_b4_0")

    # (512 x 512 x 3 x 3)
    conv_w4_1 = sharedX(vgg_param_dict["layer_27"]["param_0"], name="feat_conv_w4_1")
    conv_b4_1 = sharedX(vgg_param_dict["layer_27"]["param_1"], name="feat_conv_b4_1")

    # (512 x 512 x 3 x 3)
    conv_w4_2 = sharedX(vgg_param_dict["layer_29"]["param_0"], name="feat_conv_w4_2")
    conv_b4_2 = sharedX(vgg_param_dict["layer_29"]["param_1"], name="feat_conv_b4_2")

    parameter_set = [
        conv_w0_0,
        conv_b0_0,
        conv_w0_1,
        conv_b0_1,
        conv_w1_0,
        conv_b1_0,
        conv_w1_1,
        conv_b1_1,
        conv_w2_0,
        conv_b2_0,
        conv_w2_1,
        conv_b2_1,
        conv_w2_2,
        conv_b2_2,
        conv_w3_0,
        conv_b3_0,
        conv_w3_1,
        conv_b3_1,
        conv_w3_2,
        conv_b3_2,
        conv_w4_0,
        conv_b4_0,
        conv_w4_1,
        conv_b4_1,
        conv_w4_2,
        conv_b4_2,
    ]

    def feature_extractor(input_data):
        # conv stage 0 (64x64=>32x32)
        h0_0 = dnn_conv(input_data, conv_w0_0, border_mode=(1, 1)) + conv_b0_0.dimshuffle("x", 0, "x", "x")
        h0_1 = dnn_conv(relu(h0_0), conv_w0_1, border_mode=(1, 1)) + conv_b0_1.dimshuffle("x", 0, "x", "x")
        h0 = dnn_pool(relu(h0_1), ws=(2, 2), stride=(2, 2))
        # conv stage 1 (32x32=>16x16)
        h1_0 = dnn_conv(h0, conv_w1_0, border_mode=(1, 1)) + conv_b1_0.dimshuffle("x", 0, "x", "x")
        h1_1 = dnn_conv(relu(h1_0), conv_w1_1, border_mode=(1, 1)) + conv_b1_1.dimshuffle("x", 0, "x", "x")
        h1 = dnn_pool(relu(h1_1), ws=(2, 2), stride=(2, 2))
        # conv stage 2 (16x16=>8x8)
        h2_0 = dnn_conv(h1, conv_w2_0, border_mode=(1, 1)) + conv_b2_0.dimshuffle("x", 0, "x", "x")
        h2_1 = dnn_conv(relu(h2_0), conv_w2_1, border_mode=(1, 1)) + conv_b2_1.dimshuffle("x", 0, "x", "x")
        h2_2 = dnn_conv(relu(h2_1), conv_w2_2, border_mode=(1, 1)) + conv_b2_2.dimshuffle("x", 0, "x", "x")
        h2 = dnn_pool(relu(h2_2), ws=(2, 2), stride=(2, 2))
        # conv stage 3 (8x8=>4x4)
        h3_0 = dnn_conv(h2, conv_w3_0, border_mode=(1, 1)) + conv_b3_0.dimshuffle("x", 0, "x", "x")
        h3_1 = dnn_conv(relu(h3_0), conv_w3_1, border_mode=(1, 1)) + conv_b3_1.dimshuffle("x", 0, "x", "x")
        h3_2 = dnn_conv(relu(h3_1), conv_w3_2, border_mode=(1, 1)) + conv_b3_2.dimshuffle("x", 0, "x", "x")
        h3 = dnn_pool(relu(h3_2), ws=(2, 2), stride=(2, 2))
        # conv stage 4 (4x4=>2x2)
        h4_0 = dnn_conv(h3, conv_w4_0, border_mode=(1, 1)) + conv_b4_0.dimshuffle("x", 0, "x", "x")
        h4_1 = dnn_conv(relu(h4_0), conv_w4_1, border_mode=(1, 1)) + conv_b4_1.dimshuffle("x", 0, "x", "x")
        h4_2 = dnn_conv(relu(h4_1), conv_w4_2, border_mode=(1, 1)) + conv_b4_2.dimshuffle("x", 0, "x", "x")
        h4 = dnn_pool(relu(h4_2), ws=(2, 2), stride=(2, 2))

        return T.flatten(h4, 2)

    return feature_extractor, parameter_set
Esempio n. 41
0
the file samples.png
"""

nz = 256
nc = 3
npx = 32
ngf = 128
ndf = 128

relu = activations.Rectify()
sigmoid = activations.Sigmoid()
lrelu = activations.LeakyRectify()
tanh = activations.Tanh()
#%%
model_path = 'C:/Users/zhanq/OneDrive - Washington University in St. Louis/GitHub/dcgan_code/models/imagenet_gan_pretrain_128f_relu_lrelu_7l_3x3_256z/'
gen_params = [sharedX(p) for p in joblib.load(model_path + '30_gen_params.jl')]

discrim_params = [
    sharedX(p) for p in joblib.load(model_path + '30_discrim_params.jl')
]


#%%
def gen(Z, w, g, b, w2, g2, b2, w3, g3, b3, w4, g4, b4, w5, g5, b5, w6, g6, b6,
        wx):
    h = relu(batchnorm(T.dot(Z, w), g=g, b=b))
    h = h.reshape((h.shape[0], ngf * 4, 4, 4))
    h2 = relu(
        batchnorm(deconv(h, w2, subsample=(2, 2), border_mode=(1, 1)),
                  g=g2,
                  b=b2))
# construct the "wrapper" object for managing all our modules
inf_gen_model = CondInfGenModel(
    td_modules=td_modules,
    bu_modules_gen=bu_modules_gen,
    im_modules_gen=im_modules_gen,
    bu_modules_inf=bu_modules_inf,
    im_modules_inf=im_modules_inf,
    merge_info=merge_info,
    output_transform=output_noop)

# inf_gen_model.load_params(inf_gen_param_file)

####################################
# Setup the optimization objective #
####################################
lam_kld = sharedX(floatX([1.0]))
X_init = sharedX(floatX(np.zeros((1, nc, npx, npx))))  # default "initial state"
noise = sharedX(floatX([noise_std]))
gen_params = inf_gen_model.gen_params
inf_params = inf_gen_model.inf_params
all_params = inf_gen_model.all_params + [X_init]

######################################################
# BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS #
######################################################

# Setup symbolic vars for the model inputs, outputs, and costs
Xg_gen = T.tensor4()  # symbolic var for inputs to inference network
Xm_gen = T.tensor4()
Xg_inf = T.tensor4()  # symbolic var for inputs to generator network
Xm_inf = T.tensor4()
Esempio n. 43
0
    for lr in lr_list:
        for num_filters in num_filters_list:
            for hidden_size in hidden_size_list:
                for dropout in dropout_list:
                    for lambda_eng in lambda_eng_list:
                        for lambda_gen in lambda_gen_list:
                            for init_noise in init_noise_list:
                                for noise_decay in noise_decay_list:
                                    model_config_dict['hidden_size']         = hidden_size
                                    model_config_dict['min_num_gen_filters'] = num_filters
                                    model_config_dict['min_num_eng_filters'] = num_filters
                                    model_config_dict['init_noise']          = init_noise
                                    model_config_dict['noise_decay']         = noise_decay

                                    # set updates
                                    energy_optimizer    = RMSprop(lr=sharedX(lr),
                                                                  regularizer=Regularizer(l2=lambda_eng))
                                    generator_optimizer = RMSprop(lr=sharedX(lr*10),
                                                                  regularizer=Regularizer(l2=lambda_gen))
                                    model_test_name = model_name \
                                                      + '_f{}'.format(int(num_filters)) \
                                                      + '_h{}'.format(int(hidden_size)) \
                                                      + '_d{}'.format(int(dropout)) \
                                                      + '_re{}'.format(int(-np.log10(lambda_eng))) \
                                                      + '_rg{}'.format(int(-np.log10(lambda_gen))) \
                                                      + '_n{}'.format(int(-np.log10(init_noise))) \
                                                      + '_d{}'.format(int(1 if noise_decay is 1.0 else 0)) \
                                                      + '_lr{}'.format(int(-np.log10(lr))) \

                                    train_model(data_stream=data_stream,
                                                energy_optimizer=energy_optimizer,
Esempio n. 44
0
from lib.rng import py_rng, np_rng
from lib.vis import color_grid_vis
from lib.img_utils import inverse_transform, transform

from sklearn.externals import joblib

import theano
import theano.tensor as T

dcgan_root = "/mnt/disk1/vittal/dcgan_code/visual_concepts/"

desc = "vcgan_orig_multi"
model_dir = dcgan_root + '/models/%s/'%desc
model_number = "35_gen_params.jl"
gen_params_np = joblib.load(model_dir + model_number)
gen_params = [sharedX(element) for element in gen_params_np]
vc_nums = [41, 35, 37, 10, 3, 57, 60]
costs = np.zeros((len(vc_nums), 1))
for ii, vc_num in enumerate(vc_nums):
    Z = T.matrix()
    gX = models.gen(Z, *gen_params)
    X = T.tensor4()
    cost = T.mean(T.sqr(gX - X))

    if 'vc_num' in locals():
        from load import visual_concepts
        from lib.config import data_dir
        import os
        path = os.path.join(data_dir, "vc.hdf5")
        tr_data, tr_stream = visual_concepts(path, ntrain=None)
        tr_handle = tr_data.open()
    lambda_gen_list  = [1e-10]

    for lr in lr_list:
        for num_filters in num_filters_list:
            for hidden_size in hidden_size_list:
                for expert_size in expert_size_list:
                    for dropout in dropout_list:
                        for lambda_eng in lambda_eng_list:
                            for lambda_gen in lambda_gen_list:
                                model_config_dict['hidden_size']         = hidden_size
                                model_config_dict['expert_size']         = expert_size
                                model_config_dict['min_num_gen_filters'] = num_filters
                                model_config_dict['min_num_eng_filters'] = num_filters

                                # set updates
                                energy_optimizer    = Adagrad(lr=sharedX(lr),
                                                              regularizer=Regularizer(l2=lambda_eng))
                                generator_optimizer = Adagrad(lr=sharedX(lr*2),
                                                              regularizer=Regularizer(l2=lambda_gen))
                                generator_bn_optimizer = Adagrad(lr=sharedX(lr*2),
                                                                 regularizer=Regularizer(l2=0.0))
                                model_test_name = model_name \
                                                  + '_f{}'.format(int(num_filters)) \
                                                  + '_h{}'.format(int(hidden_size)) \
                                                  + '_e{}'.format(int(expert_size)) \
                                                  + '_d{}'.format(int(dropout)) \
                                                  + '_re{}'.format(int(-np.log10(lambda_eng))) \
                                                  + '_rg{}'.format(int(-np.log10(lambda_gen))) \
                                                  + '_lr{}'.format(int(-np.log10(lr))) \

                                train_model(data_stream=data_stream,
Esempio n. 46
0
d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()
d_error_gen = T.mean(p_gen)
g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean()

d_cost = d_cost_real + d_cost_gen
if args.onlyclassify:
    d_cost = d_classify
elif args.classify:
    d_cost += d_classify
g_cost = g_cost_d

cost_target = [
    g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen, d_error_real,
    d_error_gen, d_classify, d_classify_error
]
lrg = sharedX(lr)
lrd = sharedX(lr)
l2t = sharedX(l2d)
d_updater = updates.Adam(lr=lrd,
                         b1=b1,
                         regularizer=updates.Regularizer(l2=l2t))
g_updater = updates.Adam(lr=lrg, b1=b1, regularizer=updates.Regularizer(l2=l2))
"""
#old model
if args.onlyclassify:
    d_updates = d_updater(discrim_params[:-2]+discrim_params[-1:], d_cost)
elif args.classify:
    d_updates = d_updater(discrim_params, d_cost)
else:
    d_updates = d_updater(discrim_params[:-1], d_cost)
"""
Esempio n. 47
0
    lr_list = [1e-4]
    dropout_list = [False]
    lambda_eng_list = [1e-10]
    lambda_gen_list = [1e-10]

    for lr in lr_list:
        for num_filters in num_filters_list:
            for hidden_size in hidden_size_list:
                for lambda_eng in lambda_eng_list:
                    for lambda_gen in lambda_gen_list:
                        model_config_dict["hidden_size"] = hidden_size
                        model_config_dict["min_num_gen_filters"] = num_filters
                        model_config_dict["min_num_eng_filters"] = num_filters

                        # set updates
                        model_optimizer = RMSprop(lr=sharedX(lr), regularizer=Regularizer(l2=lambda_eng))
                        model_test_name = (
                            model_name
                            + "_f{}".format(int(num_filters))
                            + "_h{}".format(int(hidden_size))
                            + "_re{}".format(int(-np.log10(lambda_eng)))
                            + "_rg{}".format(int(-np.log10(lambda_gen)))
                            + "_lr{}".format(int(-np.log10(lr)))
                        )
                        train_model(
                            data_stream=data_stream,
                            model_optimizer=model_optimizer,
                            model_config_dict=model_config_dict,
                            model_test_name=model_test_name,
                        )
e_real_n = discrim(X+N, *discrim_params).sum(axis=1, keepdims=True)
e_gen    = discrim(gX, *discrim_params).sum(axis=1, keepdims=True)
e_gen_n  = discrim(gX+N, *discrim_params).sum(axis=1, keepdims=True)

######################################
# SET DISCRIMINATOR & GENERATOR COST #
######################################
e_cost = e_real_n.mean()-e_gen_n.mean()
g_cost = e_gen_n.mean()

cost = [e_cost, g_cost, e_real, e_gen, annealing]

###############
# SET UPDATER #
###############
d_updater = updates.RMSprop(lr=sharedX(0.0001), rho=0.5, regularizer=updates.Regularizer(l2=l2))
g_updater = updates.RMSprop(lr=sharedX(0.0001), rho=0.5, regularizer=updates.Regularizer(l2=l2))
d_updates = d_updater(discrim_params, e_cost)
g_updates = g_updater(gen_params, annealing*g_cost)
updates = d_updates + g_updates

######################################
# RANDOM SELECT INPUT DATA & DISPLAY #
######################################
vis_idxs = py_rng.sample(np.arange(len(vaX)), nvis)
vaX_vis = inverse_transform(vaX[vis_idxs])
color_grid_vis(vaX_vis.transpose([0,2,3,1]), (14, 14), 'samples/%s_etl_test.png'%desc)


####################
# COMPILE FUNCTION #
Esempio n. 49
0
deltaX = T.tensor4()

# random noise
Z = T.matrix()

f_real = discrim(X)  # data
f_gen = discrim(X0)  # vgd particles

cost_data = -1 * f_real.mean()
cost_vgd = -1 * f_gen.mean()

gX = gen(Z, *gen_params)
g_cost = -1 * T.sum(T.sum(T.flatten(gX, 2) * T.flatten(
    deltaX, 2), axis=1))  #update generate models by minimize reconstruct mse

balance_weight = sharedX(1.)
d_cost = cost_data - balance_weight * cost_vgd  # for discriminative model, minimize cost

lrt = sharedX(lr)
d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))

d_updates = d_updater(rbm_params, d_cost)
g_updates = g_updater(gen_params, g_cost)

print 'COMPILING'
t = time()
_train_d = theano.function([X, X0], d_cost, updates=d_updates)
_train_g = theano.function([Z, deltaX], g_cost, updates=g_updates)
_gen = theano.function([Z], gen(Z, *gen_params))
_logp_rbm = theano.function([X], logp_rbm(X))
# construct the "wrapper" object for managing all our modules
seq_cond_gen_model = \
    DeepSeqCondGenRNN(
        td_modules=td_modules,
        bu_modules_gen=bu_modules_gen,
        im_modules_gen=im_modules_gen,
        bu_modules_inf=bu_modules_inf,
        im_modules_inf=im_modules_inf,
        merge_info=merge_info)

# inf_gen_model.load_params(inf_gen_param_file)

####################################
# Setup the optimization objective #
####################################
lam_kld = sharedX(floatX([1.0]))
c0 = sharedX(floatX(np.zeros((1, nc, npx, npx))))
gen_params = seq_cond_gen_model.gen_params + [c0]
inf_params = seq_cond_gen_model.inf_params
all_params = seq_cond_gen_model.all_params + [c0]


######################################################
# BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS #
######################################################

def clip_sigmoid(x):
    output = sigmoid(T.clip(x, -15.0, 15.0))
    return output

    rX = dv1
    mse = T.sqrt(T.sum(T.flatten((X - rX)**2, 2), axis=1))
    return (T.flatten(cv6, 2), rX, mse)


X = T.tensor4()  # data
X0 = T.tensor4()  # vgd samples
X1 = T.tensor4()  # vgd samples
deltaX = T.tensor4()  #vgd gradient
Z = T.matrix()

### define discriminative cost ###
_, rX_data, mse_data = discrim(X)
_, rX_vgd, mse_vgd = discrim(X0)
balance_weight = sharedX(0.3)
d_cost = T.mean(mse_data - balance_weight * mse_vgd)


################################# VGD ################################
def vgd_kernel(X0):
    XY = T.dot(X0, X0.transpose())
    x2 = T.reshape(T.sum(T.square(X0), axis=1), (X0.shape[0], 1))
    X2e = T.repeat(x2, X0.shape[0], axis=1)
    H = T.sub(T.add(X2e, X2e.transpose()), 2 * XY)

    V = H.flatten()

    # median distance
    h = T.switch(
        T.eq((V.shape[0] % 2), 0),
Esempio n. 52
0
from lib.img_utils import transform
from sklearn.externals import joblib

import theano
import theano.tensor as T
from tqdm import tqdm

from load import visual_concepts

dcgan_root = "/mnt/disk1/vittal/dcgan_code/visual_concepts/"

desc = "vcgan_orig_multi"
model_dir = dcgan_root + '/models/%s/'%desc
model_number = "25_discrim_params.jl"
discrim_params_np = joblib.load(model_dir + model_number)
discrim_params = [sharedX(element) for element in discrim_params_np]
X = T.tensor4()
Y = T.matrix()
YMULTI = T.matrix()
YHAT = T.matrix()
YHAT_MULTI = T.matrix()

dX = models.discrim(X, *discrim_params)
print 'COMPILING...'
_dis = theano.function([X], dX)
print 'Done!'

# Data processing
path = os.path.join(data_dir, "vc.hdf5")
tr_data, tr_stream = visual_concepts(path, ntrain=None)
patches_idx = tr_stream.dataset.provides_sources.index('patches')
Esempio n. 53
0
# define pixel loss
pixel_loss = costs.L2Loss(gx, x)

# define feature loss
x_t = AlexNet.transform_im(x, npx=npx, nc=nc)
x_net = AlexNet.build_model(x_t, layer=args.layer, shape=(None, 3, npx, npx))
AlexNet.load_model(x_net, layer=args.layer)
x_f = lasagne.layers.get_output(x_net[args.layer], deterministic=True)
gx_t = AlexNet.transform_im(gx, npx=npx, nc=nc)
gx_net = AlexNet.build_model(gx_t, layer=args.layer, shape=(None, 3, npx, npx))
AlexNet.load_model(gx_net, layer=args.layer)
gx_f = lasagne.layers.get_output(gx_net[args.layer], deterministic=True)
ftr_loss = costs.L2Loss(gx_f, x_f)

# add two losses together
cost = pixel_loss + ftr_loss * sharedX(args.alpha)
output = [cost, z]
lrt = sharedX(args.lr)
b1t = sharedX(args.b1)
p_updater = updates.Adam(lr=lrt, b1=b1t, regularizer=updates.Regularizer(l2=args.weight_decay))
p_updates = p_updater(predict_params, cost)

print('COMPILING')
t = time()
_train_p = theano.function([x], cost, updates=p_updates)
_train_p_cost = theano.function([x], [cost, gx])
_predict_z = theano.function([x], z)
_gen = theano.function([z], gx)
print('%.2f seconds to compile theano functions' % (time() - t))

Esempio n. 54
0
def load_generator_model(min_num_gen_filters,
                         model_params_dict):
    # initial square image size
    init_image_size  = 4

    # set num of filters for each layer
    num_gen_filters0 = min_num_gen_filters*8

    # LAYER 0 (LINEAR W/ BN)
    print 'LOAD GENERATOR LINEAR LAYER 0'
    linear_w0    = sharedX(model_params_dict[   'gen_linear_w0'], name='gen_linear_w0')
    linear_bn_w0 = sharedX(model_params_dict['gen_linear_bn_w0'], name='gen_linear_bn_w0')
    linear_bn_b0 = sharedX(model_params_dict['gen_linear_bn_b0'], name='gen_linear_bn_b0')

    # LAYER 1 (DECONV)
    print 'SET GENERATOR CONV LAYER 1'
    conv_w1    = sharedX(model_params_dict[   'gen_conv_w1'], name='gen_conv_w1')
    conv_bn_w1 = sharedX(model_params_dict['gen_conv_bn_w1'], name='gen_conv_bn_w1')
    conv_bn_b1 = sharedX(model_params_dict['gen_conv_bn_b1'], name='gen_conv_bn_b1')

    # LAYER 2 (DECONV)
    print 'SET GENERATOR CONV LAYER 2'
    conv_w2    = sharedX(model_params_dict[   'gen_conv_w2'], name='gen_conv_w2')
    conv_bn_w2 = sharedX(model_params_dict['gen_conv_bn_w2'], name='gen_conv_bn_w2')
    conv_bn_b2 = sharedX(model_params_dict['gen_conv_bn_b2'], name='gen_conv_bn_b2')

    # LAYER 2 (DECONV)
    print 'SET GENERATOR CONV LAYER 3'
    conv_w3    = sharedX(model_params_dict[   'gen_conv_w3'], name='gen_conv_w3')
    conv_bn_w3 = sharedX(model_params_dict['gen_conv_bn_w3'], name='gen_conv_bn_w3')
    conv_bn_b3 = sharedX(model_params_dict['gen_conv_bn_b3'], name='gen_conv_bn_b3')

    # LAYER 3 (DECONV)
    print 'SET GENERATOR CONV LAYER 4'
    conv_w4 = sharedX(model_params_dict[   'gen_conv_w4'], name='gen_conv_w4')
    conv_b4 = sharedX(model_params_dict[   'gen_conv_b4'], name='gen_conv_b4')

    generator_params = [[linear_w0, linear_bn_b0,
                         conv_w1, conv_bn_b1,
                         conv_w2, conv_bn_b2,
                         conv_w3, conv_bn_b3,
                         conv_w4, conv_b4],
                        [linear_bn_w0,
                         conv_bn_w1,
                         conv_bn_w2,
                         conv_bn_w3]]

    print 'SET GENERATOR FUNCTION'
    def generator_function(hidden_data, is_train=True):
        # layer 0 (linear)
        h0     = T.dot(hidden_data, linear_w0)
        h0     = h0 + t_rng.normal(size=h0.shape, std=0.01, dtype=t_floatX)
        h0     = relu(batchnorm(X=h0, g=linear_bn_w0, b=linear_bn_b0))
        h0     = h0.reshape((h0.shape[0], num_gen_filters0, init_image_size, init_image_size))
        # layer 1 (deconv)
        h1     = deconv(h0, conv_w1, subsample=(2, 2), border_mode=(2, 2))
        h1     = h1 + t_rng.normal(size=h1.shape, std=0.01, dtype=t_floatX)
        h1     = relu(batchnorm(h1, g=conv_bn_w1, b=conv_bn_b1))
        # layer 2 (deconv)
        h2     = deconv(h1, conv_w2, subsample=(2, 2), border_mode=(2, 2))
        h2     = h2 + t_rng.normal(size=h2.shape, std=0.01, dtype=t_floatX)
        h2     = relu(batchnorm(h2, g=conv_bn_w2, b=conv_bn_b2))
        # layer 3 (deconv)
        h3     = deconv(h2, conv_w3, subsample=(2, 2), border_mode=(2, 2))
        h3     = h3 + t_rng.normal(size=h3.shape, std=0.01, dtype=t_floatX)
        h3     = relu(batchnorm(h3, g=conv_bn_w3, b=conv_bn_b3))
        # layer 4 (deconv)
        output = tanh(deconv(h3, conv_w4, subsample=(2, 2), border_mode=(2, 2))+conv_b4.dimshuffle('x', 0, 'x', 'x'))
        return output

    return [generator_function, generator_params]
inf_gen_model = InfGenModel(
    bu_modules=bu_modules,
    td_modules=td_modules,
    im_modules=im_modules,
    sc_modules=[],
    merge_info=merge_info,
    output_transform=output_transform,
    use_sc=False
)

#inf_gen_model.load_params(inf_gen_param_file)

####################################
# Setup the optimization objective #
####################################
lam_vae = sharedX(floatX([1.0]))
lam_kld = sharedX(floatX([1.0]))
noise = sharedX(floatX([noise_std]))
gen_params = inf_gen_model.gen_params
inf_params = inf_gen_model.inf_params
g_params = gen_params + inf_params

######################################################
# BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS #
######################################################

# Setup symbolic vars for the model inputs, outputs, and costs
Xg = T.tensor4()  # symbolic var for inputs to bottom-up inference network
Z0 = T.matrix()   # symbolic var for "noise" inputs to the generative stuff

##########################################################
Esempio n. 56
0
def load_batchnorm(model_path):
    bn = utils.PickleLoad(model_path)
    bn_params = [sharedX(b) for b in bn]
    return bn_params
                np.save(file=samples_dir + "/" + model_name + "_MOMENT_COST", arr=np.asarray(moment_cost_list))
                np.save(file=samples_dir + "/" + model_name + "_VAE_COST", arr=np.asarray(vae_cost_list))


if __name__ == "__main__":

    batch_size = 128
    num_epochs = 100
    _, data_stream = faces(batch_size=batch_size)

    num_hiddens = 1024
    learning_rate = 1e-4
    l2_weight = 1e-5

    optimizer = Adagrad(lr=sharedX(learning_rate), regularizer=Regularizer(l2=l2_weight))

    model_test_name = (
        model_name
        + "_HIDDEN{}".format(int(num_hiddens))
        + "_REG{}".format(int(-np.log10(l2_weight)))
        + "_LR{}".format(int(-np.log10(learning_rate)))
    )
    train_model(
        model_name=model_test_name,
        data_stream=data_stream,
        num_hiddens=num_hiddens,
        num_epochs=num_epochs,
        optimizer=optimizer,
    )
Esempio n. 58
0
# compute costs based on discriminator output for real/generated data
d_cost_real = sum([bce(p, T.ones(p.shape)).mean() for p in p_real])
d_cost_gen = sum([bce(p, T.zeros(p.shape)).mean() for p in p_gen])
g_cost_d = sum([bce(p, T.ones(p.shape)).mean() for p in p_gen])

# d_cost_real = bce(p_real[-1], T.ones(p_real[-1].shape)).mean()
# d_cost_gen = bce(p_gen[-1], T.zeros(p_gen[-1].shape)).mean()
# g_cost_d = bce(p_gen[-1], T.ones(p_gen[-1].shape)).mean()

d_cost = d_cost_real + d_cost_gen + (1e-5 * sum([T.sum(p**2.0) for p in discrim_params]))
g_cost = g_cost_d + (1e-5 * sum([T.sum(p**2.0) for p in gen_params]))

cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]

lrt = sharedX(lr)
d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
d_updates = d_updater(discrim_params, d_cost)
g_updates = g_updater(gen_params, g_cost)
updates = d_updates + g_updates

print 'COMPILING'
t = time()
_train_g = theano.function([X, Z0], cost, updates=g_updates)
_train_d = theano.function([X, Z0], cost, updates=d_updates)
_gen = theano.function([Z0], gX)
print "{0:.2f} seconds to compile theano functions".format(time()-t)


f_log = open("{}/{}.ndjson".format(log_dir, desc), 'wb')
        get_masked_data(x_in, im_shape=(nc, npx, npx), drop_prob=0.,
                        occ_shape=(16, 16), occ_count=3,
                        data_mean=Xmu)
    # reshape and process data for use as model input
    xm_gen = 1. - xm_gen  # mask is 1 for unobserved pixels
    xm_inf = xm_gen       # mask is 1 for pixels to predict
    xg_gen = train_transform(xg_gen)
    xm_gen = train_transform(xm_gen, add_fuzz=False)
    xg_inf = train_transform(xg_inf)
    xm_inf = train_transform(xm_inf, add_fuzz=False)
    return xg_gen, xm_gen, xg_inf, xm_inf

####################################
# Setup the optimization objective #
####################################
lam_kld = sharedX(floatX([1.0]))
log_var = sharedX(floatX([1.0]))
X_init = sharedX(floatX(np.zeros((1, nc, npx, npx))))
gen_params = inf_gen_model.gen_params
inf_params = inf_gen_model.inf_params
all_params = inf_gen_model.all_params + [log_var, X_init]

######################################################
# BUILD THE MODEL TRAINING COST AND UPDATE FUNCTIONS #
######################################################

# Setup symbolic vars for the model inputs, outputs, and costs
Xg_gen = T.tensor4()  # input to generator, with some parts masked out
Xm_gen = T.tensor4()  # mask indicating parts that are masked out
Xg_inf = T.tensor4()  # complete observation, for input to inference net
Xm_inf = T.tensor4()  # mask for which bits to predict