def __init__(self,
              name,
              nPass,
              nPassD,
              nGenerators,
              batchSize,
              metric="default"):
     super().__init__(name, nPass, nPassD, nGenerators, batchSize, metric)
     self.generator_builder = models_uncond.build_generator_64  #128
     self.discriminator_builder = models_uncond.build_discriminator_64  #128
     self.imagesize = 64  #128
     self.dataset = bedrooms(self.imagesize)
     self.X_train = shuffle(self.dataset)
     self.X_test = shuffle(self.dataset)[0:1024]
def load_data_file(df_name, va_count=2000):
    # load all data into memory
    print('loading data from {}...'.format(df_name))
    xtr, x_std = load_and_scale_data(df_name)
    # split data into validation and training bits based
    # on placement within the data file.
    xva = xtr[:va_count, :]
    xtr = xtr[va_count:, :]
    # shuffle training/validation data and compute mean
    xtr = shuffle(xtr)
    xva = shuffle(xva)
    xmu = np.mean(xtr, axis=0)
    print('done.')
    return xtr, xva, xmu
def mnist_with_valid_set():
    trX, teX, trY, teY = mnist()

    trX, trY = shuffle(trX, trY)
    vaX = trX[50000:]
    vaY = trY[50000:]
    trX = trX[:50000]
    trY = trY[:50000]

    return trX, vaX, teX, trY, vaY, teY
Beispiel #4
0
def svhn_with_valid_set(extra=False):
    if extra:
        trX, exX, teX, trY, exY, teY = svhn(extra=extra)
    else:
        trX, teX, trY, teY = svhn(extra=extra)
    trX, trY = shuffle(trX, trY)
    vaX = trX[:10000]
    vaY = trY[:10000]
    trX = trX[10000:]
    trY = trY[10000:]
    if extra:
        trS = np.asarray([1 for _ in range(len(trY))] + [0 for _ in range(len(exY))])
        trX = np.concatenate([trX, exX], axis=0)
        trY = np.concatenate([trY, exY], axis=0)
        trX, trY, trS = shuffle(trX, trY, trS)
    if extra:
        return trX, vaX, teX, trY, vaY, teY, trS
    else:
        return trX, vaX, teX, trY, vaY, teY
Beispiel #5
0
def mnist_with_valid_set():
    trX, teX, trY, teY = mnist()

    trX, trY = shuffle(trX, trY)
    vaX = trX[50000:]
    vaY = trY[50000:]
    trX = trX[:50000]
    trY = trY[:50000]

    return trX, vaX, teX, trY, vaY, teY
Beispiel #6
0
def svhn_with_valid_set(extra=False):
    if extra:
        trX, exX, teX, trY, exY, teY = svhn(extra=extra)
    else:
        trX, teX, trY, teY = svhn(extra=extra)
    trX, trY = shuffle(trX, trY)
    vaX = trX[:10000]
    vaY = trY[:10000]
    trX = trX[10000:]
    trY = trY[10000:]
    if extra:
        trS = np.asarray([1 for _ in range(len(trY))] +
                         [0 for _ in range(len(exY))])
        trX = np.concatenate([trX, exX], axis=0)
        trY = np.concatenate([trY, exY], axis=0)
        trX, trY, trS = shuffle(trX, trY, trS)
    if extra:
        return trX, vaX, teX, trY, vaY, teY, trS
    else:
        return trX, vaX, teX, trY, vaY, teY
 def iter_data_discriminator(self, xreals, instances):
     #prepare
     xfake_list = instances[0].img[0:self.miniBatchForD, :, :, :]
     for i in range(1, len(instances)):
         xfake = instances[i].img[0:self.miniBatchForD, :, :, :]
         xfake_list = np.append(xfake_list, xfake, axis=0)
     #iteration
     for xreal, xfake in iter_data(xreals,
                                   shuffle(xfake_list),
                                   size=self.batchSize):
         yield xreal, xfake
Beispiel #8
0
def cal_margin():
    ll = 0
    teX = shuffle(X_test)

    m = 1000
    batch_size = ntest // m
    for i in range(m):
        batch = [t % ntest for t in range(i*batch_size, (i+1)*batch_size)]
        imb = floatX(teX[batch])
        ll += _marginal(imb) * len(batch)

    return ll / ntest
 def __init__(self,
              name,
              nPass,
              nPassD,
              nGenerators,
              batchSize,
              metric="default"):
     super().__init__(name, nPass, nPassD, nGenerators, batchSize, metric)
     self.generator_builder = models_uncond.build_generator_32
     self.discriminator_builder = models_uncond.build_discriminator_32
     self.imagesize = 32
     self.dataset = cifar10()
     self.X_train = shuffle(self.dataset["X_train"])
     self.X_test = self.dataset["X_test"]
Beispiel #10
0
        frontal_face = _face_rotator(eigen_code.reshape(1, -1, 1, 1),
                                     frontal_code.reshape(1, -1, 1, 1))
        frontal_face = inverse_transform(frontal_face).reshape(npx, npx)
        imsave(frontal_path + image_names[image_idx], frontal_face)


if phase == 'TEST':
    #generate_rotated_multipie_setting1()
    test(10000)
if phase == 'TRAIN':
    log = open('logs/log.txt', 'w')
    log.close()
    for epoch in range(1, niter + niter_decay + 1):
        print 'epoch', epoch
        trY_A, trX_B, trY_B, trX_A = one_epoch_traning_data()
        trY_A, trX_B, trY_B, trX_A = shuffle(trY_A, trX_B, trY_B, trX_A)
        mean_vars_array = [[], [], [], [], [], [], [], [], [], [], [], [], [],
                           [], [], [], [], [], [], []]
        for ymb_A, xmb_B, ymb_B, xmb_A in tqdm(iter_data(trY_A,
                                                         trX_B,
                                                         trY_B,
                                                         trX_A,
                                                         size=nbatch),
                                               total=len(trY_A) / nbatch):
            ymb_A = transform(ymb_A)
            xmb_B = transform(xmb_B)
            ymb_B = transform(ymb_B)
            xmb_A = transform(xmb_A)
            if n_updates % (k + 1) == 0:
                output_g = _train_g(ymb_A, xmb_B, ymb_B, xmb_A)
            else:
Beispiel #11
0
def run(hp, folder):
    trX, trY, nb_classes = load_data()
    k = 1             # # of discrim updates for each gen update
    l2 = 2.5e-5       # l2 weight decay
    b1 = 0.5          # momentum term of adam
    nc = 1            # # of channels in image
    ny = nb_classes   # # of classes
    nbatch = 128      # # of examples in batch
    npx = 28          # # of pixels width/height of images
    nz = 100          # # of dim for Z
    ngfc = 512       # # of gen units for fully connected layers
    ndfc = 512      # # of discrim units for fully connected layers
    ngf = 64          # # of gen filters in first conv layer
    ndf = 64          # # of discrim filters in first conv layer
    nx = npx*npx*nc   # # of dimensions in X
    niter = 200       # # of iter at starting learning rate
    niter_decay = 100 # # of iter to linearly decay learning rate to zero
    lr = 0.0002       # initial learning rate for adam
    scale = 0.02

    k = hp['k']
    l2 = hp['l2']
    #b1 = hp['b1']
    nc = 1
    ny = nb_classes
    nbatch = hp['nbatch']
    npx = 28
    nz = hp['nz']
    ngfc = hp['ngfc']       # # of gen units for fully connected layers
    ndfc = hp['ndfc']      # # of discrim units for fully connected layers
    ngf = hp['ngf']          # # of gen filters in first conv layer
    ndf = hp['ndf']          # # of discrim filters in first conv layer
    nx = npx*npx*nc   # # of dimensions in X
    niter = hp['niter']       # # of iter at starting learning rate
    niter_decay = hp['niter_decay'] # # of iter to linearly decay learning rate to zero
    lr = hp['lr']       # initial learning rate for adam


    scale = hp['scale']

    #k = 1             # # of discrim updates for each gen update
    #l2 = 2.5e-5       # l2 weight decay
    b1 = 0.5          # momentum term of adam
    #nc = 1            # # of channels in image
    #ny = nb_classes   # # of classes
    budget_hours = hp.get('budget_hours', 2)
    budget_secs = budget_hours * 3600

    ntrain = len(trX)
    def transform(X):
        return (floatX(X)).reshape(-1, nc, npx, npx)

    def inverse_transform(X):
        X = X.reshape(-1, npx, npx)
        return X
    
    model_dir = folder
    samples_dir = os.path.join(model_dir, 'samples')
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if not os.path.exists(samples_dir):
        os.makedirs(samples_dir)

    relu = activations.Rectify()
    sigmoid = activations.Sigmoid()
    lrelu = activations.LeakyRectify()
    bce = T.nnet.binary_crossentropy

    gifn = inits.Normal(scale=scale)
    difn = inits.Normal(scale=scale)

    gw  = gifn((nz, ngfc), 'gw')
    gw2 = gifn((ngfc, ngf*2*7*7), 'gw2')
    gw3 = gifn((ngf*2, ngf, 5, 5), 'gw3')
    gwx = gifn((ngf, nc, 5, 5), 'gwx')

    dw  = difn((ndf, nc, 5, 5), 'dw')
    dw2 = difn((ndf*2, ndf, 5, 5), 'dw2')
    dw3 = difn((ndf*2*7*7, ndfc), 'dw3')
    dwy = difn((ndfc, 1), 'dwy')

    gen_params = [gw, gw2, gw3, gwx]
    discrim_params = [dw, dw2, dw3, dwy]

    def gen(Z, w, w2, w3, wx, use_batchnorm=True):
        if use_batchnorm:
            batchnorm_ = batchnorm
        else:
            batchnorm_ = lambda x:x
        h = relu(batchnorm_(T.dot(Z, w)))
        h2 = relu(batchnorm_(T.dot(h, w2)))
        h2 = h2.reshape((h2.shape[0], ngf*2, 7, 7))
        h3 = relu(batchnorm_(deconv(h2, w3, subsample=(2, 2), border_mode=(2, 2))))
        x = sigmoid(deconv(h3, wx, subsample=(2, 2), border_mode=(2, 2)))
        return x

    def discrim(X, w, w2, w3, wy):
        h = lrelu(dnn_conv(X, w, subsample=(2, 2), border_mode=(2, 2)))
        h2 = lrelu(batchnorm(dnn_conv(h, w2, subsample=(2, 2), border_mode=(2, 2))))
        h2 = T.flatten(h2, 2)
        h3 = lrelu(batchnorm(T.dot(h2, w3)))
        y = sigmoid(T.dot(h3, wy))
        return y

    X = T.tensor4()
    Z = T.matrix()

    gX = gen(Z, *gen_params)

    p_real = discrim(X, *discrim_params)
    p_gen = discrim(gX, *discrim_params)

    d_cost_real = bce(p_real, T.ones(p_real.shape)).mean()
    d_cost_gen = bce(p_gen, T.zeros(p_gen.shape)).mean()
    g_cost_d = bce(p_gen, T.ones(p_gen.shape)).mean()

    d_cost = d_cost_real + d_cost_gen
    g_cost = g_cost_d

    cost = [g_cost, d_cost, g_cost_d, d_cost_real, d_cost_gen]

    lrt = sharedX(lr)
    d_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
    g_updater = updates.Adam(lr=lrt, b1=b1, regularizer=updates.Regularizer(l2=l2))
    d_updates = d_updater(discrim_params, d_cost)
    g_updates = g_updater(gen_params, g_cost)
    #updates = d_updates + g_updates

    print 'COMPILING'
    t = time()
    _train_g = theano.function([X, Z], cost, updates=g_updates)
    _train_d = theano.function([X, Z], cost, updates=d_updates)
    _gen = theano.function([Z], gX)
    print '%.2f seconds to compile theano functions'%(time()-t)

    tr_idxs = np.arange(len(trX))
    sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz)))

    def gen_samples(n, nbatch=128):
        samples = []
        labels = []
        n_gen = 0
        for i in range(n/nbatch):
            zmb = floatX(np_rng.uniform(-1., 1., size=(nbatch, nz)))
            xmb = _gen(zmb)
            samples.append(xmb)
            n_gen += len(xmb)
        n_left = n-n_gen
        zmb = floatX(np_rng.uniform(-1., 1., size=(n_left, nz)))
        xmb = _gen(zmb)
        samples.append(xmb)
        return np.concatenate(samples, axis=0)

    s = floatX(np_rng.uniform(-1., 1., size=(10000, nz)))
    n_updates = 0
    n_check = 0
    n_epochs = 0
    n_updates = 0
    n_examples = 0
    t = time()
    begin = datetime.now()
    for epoch in range(1, niter+niter_decay+1): 
        t = time()
        print("Epoch {}".format(epoch))
        trX = shuffle(trX)
        for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch):
            imb = transform(imb)
            zmb = floatX(np_rng.uniform(-1., 1., size=(len(imb), nz)))
            if n_updates % (k+1) == 0:
                cost = _train_g(imb, zmb)
            else:
                cost = _train_d(imb, zmb)
            n_updates += 1
            n_examples += len(imb)
        samples = np.asarray(_gen(sample_zmb))
        grayscale_grid_vis(inverse_transform(samples), (10, 20), '{}/{:05d}.png'.format(samples_dir, n_epochs))
        n_epochs += 1
        if n_epochs > niter:
            lrt.set_value(floatX(lrt.get_value() - lr/niter_decay))
        if n_epochs % 50 == 0 or epoch == niter + niter_decay or epoch == 1:
            imgs = []
            for i in range(0, s.shape[0], nbatch):
                imgs.append(_gen(s[i:i+nbatch]))
            img = np.concatenate(imgs, axis=0)
            samples_filename = '{}/{:05d}_gen.npz'.format(model_dir, n_epochs)
            joblib.dump(img, samples_filename, compress=9)
            shutil.copy(samples_filename, '{}/gen.npz'.format(model_dir))
            joblib.dump([p.get_value() for p in gen_params], '{}/d_gen_params.jl'.format(model_dir, n_epochs), compress=9)
            joblib.dump([p.get_value() for p in discrim_params], '{}/discrim_params.jl'.format(model_dir, n_epochs), compress=9)
        print('Elapsed : {}sec'.format(time() - t))

        if (datetime.now() - begin).total_seconds() >= budget_secs:
            print("Budget finished.quit.")
            break
Beispiel #12
0
    'n_seconds',
    'g_cost',
    'd_cost',
]

print desc.upper()
n_updates = 0
n_check = 0
n_epochs = 0
n_updates = 0
n_examples = 0
t = time()
sample_z0mb = rand_gen(size=(200,
                             nz0))  # noise samples for top generator module
for epoch in range(1, niter + niter_decay + 1):
    trX = shuffle(trX)
    for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain / nbatch):
        imb = transform(imb)
        z0mb = rand_gen(size=(len(imb), nz0))
        if n_updates % (k + 1) == 0:
            cost = _train_g(imb, z0mb)
        else:
            cost = _train_d(imb, z0mb)
        n_updates += 1
        n_examples += len(imb)
    samples = np.asarray(_gen(sample_z0mb))
    grayscale_grid_vis(inverse_transform(samples), (10, 20),
                       "{}/{}.png".format(sample_dir, n_epochs))
    n_epochs += 1
    if n_epochs > niter:
        lrt.set_value(floatX(lrt.get_value() - lr / niter_decay))
Beispiel #13
0
def main():
    # Parameters
    task = 'toy'
    name = '8G_MOEGAN_MMDu2'  #'8G_MOEGAN_PFq_NFd_t2'

    DIM = 512
    begin_save = 0
    loss_type = ['trickLogD', 'minimax', 'ls']  #['trickLogD', 'minimax', 'ls']
    nloss = 3  #2
    DATASET = '8gaussians'
    batchSize = 64

    ncandi = 8
    kD = 1  # # of discrim updates for each gen update
    kG = 1  # # of discrim updates for each gen update
    ntf = 256
    b1 = 0.5  # momentum term of adam
    nz = 2  # # of dim for Z
    niter = 4  # # of iter at starting learning rate
    lr = 0.0001  # initial learning rate for adam G
    lrd = 0.0001  # initial learning rate for adam D
    N_up = 100000
    save_freq = 10000 / 10
    show_freq = 10000 / 10
    test_deterministic = True
    beta = 1.
    GP_norm = False  # if use gradients penalty on discriminator
    LAMBDA = 2.  # hyperparameter of GP
    NSGA2 = True
    # Load the dataset

    # MODEL D
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = T.matrix('real_imgs')
    fake_imgs = T.matrix('fake_imgs')
    # Create neural network model
    discriminator = models_uncond.build_discriminator_toy(nd=DIM,
                                                          GP_norm=GP_norm)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (
        lasagne.objectives.binary_crossentropy(real_out, 1) +
        lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()

    # Gradients penalty norm
    if GP_norm is True:
        alpha = t_rng.uniform((batchSize, 1), low=0., high=1.)
        differences = fake_imgs - real_imgs
        interpolates = real_imgs + (alpha * differences)
        gradients = theano.grad(lasagne.layers.get_output(
            discriminator, interpolates).sum(),
                                wrt=interpolates)
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1)))
        gradient_penalty = T.mean((slopes - 1.)**2)

        D_loss = discriminator_loss + LAMBDA * gradient_penalty
        b1_d = 0.
    else:
        D_loss = discriminator_loss
        b1_d = 0.

    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator,
                                                         trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(D_loss,
                                     discriminator_params,
                                     learning_rate=lrtd,
                                     beta1=b1_d)
    lrt = theano.shared(lasagne.utils.floatX(lr))

    # Fd Socre
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params)
    Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd))

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                              discriminator_loss,
                              updates=updates_d)
    # Compile another function generating some data
    dis_fn = theano.function([real_imgs, fake_imgs],
                             [(fake_out).mean(), Fd_score])
    disft_fn = theano.function([real_imgs, fake_imgs], [
        real_out.mean(),
        fake_out.mean(), (real_out > 0.5).mean(),
        (fake_out > 0.5).mean(), Fd_score
    ])

    # Finally, launch the training loop.
    print("Starting training...")
    desc = task + '_' + name
    print(desc)

    if not os.path.isdir('logs'):
        os.mkdir(os.path.join('logs'))
    f_log = open('logs/%s.ndjson' % desc, 'wb')
    if not os.path.isdir('models'):
        os.mkdir(os.path.join('models/'))
    if not os.path.isdir('models/' + desc):
        os.mkdir(os.path.join('models/', desc))

    gen_new_params = []

    # We iterate over epochs:
    for n_updates in range(N_up):
        xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD)
        xmb = xmb[0:batchSize * kD]
        # initial G cluster
        if n_updates == 0:
            for can_i in range(0, ncandi):
                train_g, gen_fn, generator = create_G(
                    loss_type=loss_type[can_i % nloss],
                    discriminator=discriminator,
                    lr=lr,
                    b1=b1,
                    DIM=DIM)
                for _ in range(0, kG):
                    zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                    cost = train_g(zmb)
                sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                gen_imgs = gen_fn(sample_zmb)

                gen_new_params.append(
                    lasagne.layers.get_all_param_values(generator))

                if can_i == 0:
                    g_imgs_old = gen_imgs
                    fmb = gen_imgs[0:int(batchSize / ncandi * kD), :]
                else:
                    g_imgs_old = np.append(g_imgs_old, gen_imgs, axis=0)
                    newfmb = gen_imgs[0:int(batchSize / ncandi * kD), :]
                    fmb = np.append(fmb, newfmb, axis=0)
            # print gen_new_params
            # MODEL G
            noise = T.matrix('noise')
            generator = models_uncond.build_generator_toy(noise, nd=DIM)
            Tgimgs = lasagne.layers.get_output(generator)
            Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)

            g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out,
                                                                 1).mean()
            g_loss_minimax = - \
                lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
            g_loss_ls = T.mean(T.sqr((Tfake_out - 1)))

            g_params = lasagne.layers.get_all_params(generator, trainable=True)

            up_g_logD = lasagne.updates.adam(g_loss_logD,
                                             g_params,
                                             learning_rate=lrt,
                                             beta1=b1)
            up_g_minimax = lasagne.updates.adam(g_loss_minimax,
                                                g_params,
                                                learning_rate=lrt,
                                                beta1=b1)
            up_g_ls = lasagne.updates.adam(g_loss_ls,
                                           g_params,
                                           learning_rate=lrt,
                                           beta1=b1)

            train_g = theano.function([noise], g_loss_logD, updates=up_g_logD)
            train_g_minimax = theano.function([noise],
                                              g_loss_minimax,
                                              updates=up_g_minimax)
            train_g_ls = theano.function([noise], g_loss_ls, updates=up_g_ls)

            gen_fn = theano.function([noise],
                                     lasagne.layers.get_output(
                                         generator, deterministic=True))
        else:

            class Instance:
                def __init__(self, fq, fd, params, img_values, image_copy):
                    self.fq = fq
                    self.fd = fd
                    self.params = params
                    self.vimg = img_values
                    self.cimg = image_copy

                def f(self):
                    return self.fq - self.fd

            instances = []
            fq_list = np.zeros(ncandi)
            fd_list = np.zeros(ncandi)

            gen_old_params = gen_new_params
            for can_i in range(0, ncandi):
                for type_i in range(0, nloss):
                    lasagne.layers.set_all_param_values(
                        generator, gen_old_params[can_i])
                    if loss_type[type_i] == 'trickLogD':
                        for _ in range(0, kG):
                            zmb = floatX(
                                np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g(zmb)
                    elif loss_type[type_i] == 'minimax':
                        for _ in range(0, kG):
                            zmb = floatX(
                                np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g_minimax(zmb)
                    elif loss_type[type_i] == 'ls':
                        for _ in range(0, kG):
                            zmb = floatX(
                                np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g_ls(zmb)

                    sample_zmb = floatX(np_rng.uniform(-1., 1.,
                                                       size=(ntf, nz)))
                    gen_imgs = gen_fn(sample_zmb)
                    frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs)
                    instances.append(
                        Instance(
                            frr_score, fd_score,
                            lasagne.layers.get_all_param_values(generator),
                            gen_imgs,
                            gen_imgs[0:int(batchSize / ncandi * kD), :]))
            if ncandi < len(instances):
                if NSGA2 == True:
                    cromos = {
                        idx: [float(inst.fq), -float(inst.fd)]
                        for idx, inst in enumerate(instances)
                    }
                    cromos_idxs = [idx for idx, _ in enumerate(instances)]
                    finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs)

                    for idx, p in enumerate(finalpop):
                        inst = instances[p]
                        gen_new_params[idx] = inst.params
                        fq_list[idx] = inst.fq
                        fd_list[idx] = inst.fd
                        fake_rate[idx] = inst.f()
                        g_imgs_old[idx * ntf:(idx + 1) * ntf, :] = inst.vimg
                        fmb[int(idx * batchSize / ncandi *
                                kD):math.ceil((idx + 1) * batchSize / ncandi *
                                              kD), :] = inst.cimg

                    with open('front/%s.tsv' % desc, 'wb') as ffront:
                        for idx, p in enumerate(finalpop):
                            inst = instances[p]
                            ffront.write(
                                (str(inst.fq) + "\t" + str(inst.fd)).encode())
                            ffront.write("\n".encode())
                else:
                    for idx, inst in enumerate(instances):
                        if idx < ncandi:
                            gen_new_params[idx] = inst.params
                            fake_rate[idx] = inst.f()
                            fq_list[idx] = inst.fq
                            fd_list[idx] = inst.fd
                            g_imgs_old[idx * ntf:(idx + 1) *
                                       ntf, :] = inst.vimg
                            fmb[int(idx * batchSize / ncandi *
                                    kD):math.ceil((idx + 1) * batchSize /
                                                  ncandi * kD), :] = inst.cimg
                        else:
                            fr_com = fake_rate - inst.f()
                            if min(fr_com) < 0:
                                idr = np.where(fr_com == min(fr_com))[0][0]
                                gen_new_params[idr] = inst.params
                                fake_rate[idr] = inst.f()
                                g_imgs_old[idr * ntf:(idr + 1) *
                                           ntf, :] = inst.vimg
                                fmb[int(idr * batchSize / ncandi *
                                        kD):math.ceil((idr + 1) * batchSize /
                                                      ncandi *
                                                      kD), :] = inst.cimg

        sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf)
        sample_xmb = sample_xmb[0:ncandi * ntf]
        for i in range(0, ncandi):
            xfake = g_imgs_old[i * ntf:(i + 1) * ntf, :]
            xreal = sample_xmb[i * ntf:(i + 1) * ntf, :]
            tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake)
            if i == 0:
                fake_rate = np.array([fr])
                real_rate = np.array([tr])
                fake_rate_p = np.array([frp])
                real_rate_p = np.array([trp])
                FDL = np.array([fdscore])
            else:
                fake_rate = np.append(fake_rate, fr)
                real_rate = np.append(real_rate, tr)
                fake_rate_p = np.append(fake_rate_p, frp)
                real_rate_p = np.append(real_rate_p, trp)
                FDL = np.append(FDL, fdscore)

        print(fake_rate, fake_rate_p, FDL)
        print(n_updates, real_rate.mean(), real_rate_p.mean())
        f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' +
                     str(n_updates) + ' ' + str(real_rate.mean()) + ' ' +
                     str(real_rate_p.mean()) + '\n').encode())
        f_log.flush()

        # train D
        for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize):
            cost = train_d(xreal, xfake)

        if n_updates % show_freq == 0:
            s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz)))
            params_max = gen_new_params[np.argmax(fake_rate)]
            lasagne.layers.set_all_param_values(generator, params_max)
            g_imgs_max = gen_fn(s_zmb)

        if n_updates % show_freq == 0 and n_updates != 0:
            #metric
            s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz)))
            xmb = toy_dataset(DATASET=DATASET, size=512)
            mmd2_all = []
            for i in range(0, ncandi):
                lasagne.layers.set_all_param_values(generator,
                                                    gen_new_params[i])
                g_imgs_min = gen_fn(s_zmb)
                mmd2_all.append(compute_metric_mmd2(g_imgs_min, xmb))
            mmd2_all = np.array(mmd2_all)
            if NSGA2:
                with open('front/%s_mmd2u.tsv' % desc, 'wb') as ffront:
                    for idx in range(0, ncandi):
                        ffront.write(
                            (str(fq_list[idx]) + "\t" + str(fd_list[idx]) +
                             "\t" + str(mmd2_all[idx])).encode())
                        ffront.write("\n".encode())
            #save best
            params = gen_new_params[np.argmin(mmd2_all)]
            lasagne.layers.set_all_param_values(generator, params)
            g_imgs_min = gen_fn(s_zmb)
            generate_image(xmb,
                           g_imgs_min,
                           n_updates / save_freq,
                           desc,
                           postfix="_mmu2d")
            np.savez('models/%s/gen_%d.npz' % (desc, n_updates / save_freq),
                     *lasagne.layers.get_all_param_values(discriminator))
            np.savez('models/%s/dis_%d.npz' % (desc, n_updates / save_freq),
                     *lasagne.layers.get_all_param_values(generator))
Beispiel #14
0
total_svgd_ll = []

total_langevin_acc = []
total_langevin_ll = []

total_m_acc = []
total_m_ll = []

print "Start testing on separete data set"
for data_i in range(0, 8):
    # For each dataset training on dev and testing on test dataset

    X_dev, y_dev = total_dev[data_i]
    X_test, y_test = total_test[data_i]
    dev_N = X_dev.shape[0]
    X_dev, y_dev = shuffle(X_dev, y_dev)

    X_dev, y_dev = shuffle(X_dev, y_dev)
    X_test, y_test = shuffle(X_test, y_test)
    dev_N = X_dev.shape[0]
    print "data size %d" %(dev_N)


    ### svgd
    x0 = init_theta(n_particle=n_particle)
    x0 = sharedX(x0)
    _svgd_step = _make_svgd_step(x0, lr=1e-6 * (2 ** 13))
    for i in tqdm(range(n_iter)):
        imb = [t % dev_N for t in range(i*nbatch, (i+1)*nbatch)]
        _svgd_step(X_dev[imb], y_dev[imb], dev_N)
Beispiel #15
0
def main():
    # Parameters
    task = 'toy'
    name = '25G'
    
    DIM=512
    begin_save = 0 
    loss_type = ['trickLogD','minimax','ls'] 
    nloss = 3
    DATASET = '25gaussians'
    batchSize = 64 
   
    ncandi = 1 
    kD = 1             # # of discrim updates for each gen update
    kG = 1            # # of discrim updates for each gen update
    ntf = 256 
    b1 = 0.5          # momentum term of adam
    nz = 2          # # of dim for Z
    niter = 4       # # of iter at starting learning rate
    lr = 0.0001       # initial learning rate for adam G
    lrd = 0.0001       # initial learning rate for adam D
    N_up = 100000 
    save_freq = 10000 
    show_freq = 10000 
    test_deterministic = True   
    beta = 1.
    GP_norm = False     # if use gradients penalty on discriminator
    LAMBDA = 2.       # hyperparameter of GP 
   
    # Load the dataset
   
    # MODEL D
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = T.matrix('real_imgs')
    fake_imgs = T.matrix('fake_imgs')
    # Create neural network model
    discriminator = models_uncond.build_discriminator_toy(nd=DIM, GP_norm=GP_norm)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1)
            + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()
 
    # Gradients penalty norm 
    if GP_norm is True: 
        alpha = t_rng.uniform((batchSize,1), low=0.,high=1.)  
        differences = fake_imgs - real_imgs  
        interpolates = real_imgs + (alpha*differences)
        gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) 
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1)))
        gradient_penalty = T.mean((slopes-1.)**2)
 
        D_loss = discriminator_loss +  LAMBDA*gradient_penalty 
        b1_d = 0. 
    else:
        D_loss = discriminator_loss 
        b1_d = 0. 
 
    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(
            D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d)
    lrt = theano.shared(lasagne.utils.floatX(lr))

    # Fd Socre 
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params) 
    Fd_score  = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd))
     
    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                               discriminator_loss,
                               updates=updates_d)
    # Compile another function generating some data
    dis_fn = theano.function([real_imgs,fake_imgs],[(fake_out).mean(),Fd_score])
    disft_fn = theano.function([real_imgs,fake_imgs],
                               [real_out.mean(),
                                fake_out.mean(),
                                (real_out>0.5).mean(),
                                (fake_out>0.5).mean(),
                                Fd_score])
 
    # Finally, launch the training loop.
    print("Starting training...") 
    desc = task + '_' + name 
    print desc
    
    if not os.path.isdir('logs'):
        os.mkdir(os.path.join('logs'))
    f_log = open('logs/%s.ndjson'%desc, 'wb')
    if not os.path.isdir('models'):
        os.mkdir(os.path.join('models/'))
    if not os.path.isdir('models/'+desc):
        os.mkdir(os.path.join('models/',desc))
     
    gen_new_params = [] 
     
    # We iterate over epochs:
    for n_updates in range(N_up):
        xmb = toy_dataset(DATASET=DATASET, size=batchSize*kD) 
        xmb = xmb[0:batchSize*kD] 
        # initial G cluster 
        if n_updates == 0:
            for can_i in range(0,ncandi): 
                train_g, gen_fn, generator = create_G(
                        loss_type=loss_type[can_i%nloss],
                        discriminator=discriminator, lr=lr, b1=b1, DIM=DIM) 
                for _ in range(0,kG):
                    zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                    cost = train_g(zmb)
                sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                gen_imgs = gen_fn(sample_zmb)
 
                gen_new_params.append(lasagne.layers.get_all_param_values(generator)) 
                    
                if can_i == 0: 
                    g_imgs_old=gen_imgs
                    fmb = gen_imgs[0:batchSize/ncandi*kD,:]
                else: 
                    g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0)
                    fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:],axis=0)
            #print gen_new_params
            # MODEL G
            noise = T.matrix('noise')
            generator = models_uncond.build_generator_toy(noise,nd=DIM)
            Tgimgs = lasagne.layers.get_output(generator)
            Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)

            g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean()
            g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
            g_loss_ls = T.mean(T.sqr((Tfake_out - 1)))

            g_params = lasagne.layers.get_all_params(generator, trainable=True)

            up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1)
            up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1)
            up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1)

            train_g = theano.function([noise],g_loss_logD,updates=up_g_logD)
            train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax)
            train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls)

            gen_fn = theano.function([noise], lasagne.layers.get_output(
                                    generator,deterministic=True))
        else:
            gen_old_params = gen_new_params
            for can_i in range(0,ncandi):
                for type_i in range(0,nloss):
                    lasagne.layers.set_all_param_values(generator, gen_old_params[can_i])
                    if loss_type[type_i] == 'trickLogD':
                        for _ in range(0,kG):
                            zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g(zmb)
                    elif loss_type[type_i] == 'minimax': 
                        for _ in range(0,kG):
                            zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g_minimax(zmb)
                    elif loss_type[type_i] == 'ls': 
                        for _ in range(0,kG):
                            zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                            cost = train_g_ls(zmb)

                    sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                    gen_imgs = gen_fn(sample_zmb)
                    frr_score, fd_score = dis_fn(xmb[0:ntf],gen_imgs)
                    #frr = frr[0]
                    frr = frr_score - fd_score 
                    if can_i*nloss + type_i < ncandi: 
                        idx = can_i*nloss + type_i
                        gen_new_params[idx]=lasagne.layers.get_all_param_values(generator)
                        fake_rate[idx]=frr
                        g_imgs_old[idx*ntf:(idx+1)*ntf,:]=gen_imgs
                        fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:] = \
                            gen_imgs[0:batchSize/ncandi*kD,:]
                    else: 
                        fr_com = fake_rate - frr
                        if min(fr_com) < 0:
                            ids_replace = np.where(fr_com==min(fr_com)) 
                            idr = ids_replace[0][0]
                            fake_rate[idr]=frr

                            gen_new_params[idr] = lasagne.layers.get_all_param_values(generator)

                            g_imgs_old[idr*ntf:(idr+1)*ntf,:]=gen_imgs
                            fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:] = \
                                gen_imgs[0:batchSize/ncandi*kD,:]

        sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi*ntf) 
        sample_xmb = sample_xmb[0:ncandi*ntf] 
        for i in range(0, ncandi):
            xfake = g_imgs_old[i*ntf:(i+1)*ntf,:]
            xreal = sample_xmb[i*ntf:(i+1)*ntf,:]
            tr, fr, trp, frp, fdscore = disft_fn(xreal,xfake) 
            if i == 0:
                fake_rate = np.array([fr])   
                real_rate = np.array([tr])     
                fake_rate_p = np.array([frp])   
                real_rate_p = np.array([trp])     
                FDL = np.array([fdscore])     
            else:
                fake_rate = np.append(fake_rate,fr)
                real_rate = np.append(real_rate,tr)
                fake_rate_p = np.append(fake_rate_p,frp)
                real_rate_p = np.append(real_rate_p,trp)
                FDL = np.append(FDL,fdscore)
        print fake_rate, fake_rate_p, FDL
        print (n_updates, real_rate.mean(), real_rate_p.mean()) 
        f_log.write(str(fake_rate)+' '+str(fake_rate_p)+'\n'+ str(n_updates) + ' ' + str(real_rate.mean())+ ' ' +str(real_rate_p.mean())+'\n')
        f_log.flush()

        # train D 
	for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize):
            cost = train_d(xreal, xfake)

	if n_updates%show_freq == 0:
            s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz)))
            g_imgs = gen_fn(s_zmb)
            xmb = toy_dataset(DATASET=DATASET, size=512) 
            generate_image(xmb,g_imgs,n_updates/save_freq,desc)
Beispiel #16
0
    'n_seconds',
    'g_cost',
    'd_cost',
]

print desc.upper()
n_updates = 0
n_check = 0
n_epochs = 0
n_updates = 0
n_examples = 0
t = time()
sample_z0mb = rand_gen(size=(200,
                             nz0))  # noise samples for top generator module
for epoch in range(1, niter + niter_decay + 1):
    Xtr = shuffle(Xtr)
    g_cost = 0
    d_cost = 0
    gc_iter = 0
    dc_iter = 0
    for imb in tqdm(iter_data(Xtr, size=nbatch), total=ntrain / nbatch):
        imb = train_transform(imb)
        z0mb = rand_gen(size=(len(imb), nz0))
        if n_updates % (k + 1) == 0:
            g_cost += _train_g(imb, z0mb)[0]
            gc_iter += 1
        else:
            d_cost += _train_d(imb, z0mb)[1]
            dc_iter += 1
        n_updates += 1
        n_examples += len(imb)
Beispiel #17
0
def main(
        problem,
        popsize,
        algorithm,
        save_freq,
        loss_type=['trickLogD', 'minimax', 'ls'],
        postfix=None,
        nPassD=1,  #backpropagation pass for discriminator
        batchSize=64,
        metric="default",
        output_dir="runs",
        gradients_penalty=False):

    if not (problem in problem_table.keys()):
        exit(-1)

    #task
    task_args = problem_table[problem][1]
    task = problem_table[problem][0](nPassD, popsize, batchSize, metric)
    net_otype = task.net_output_type()

    # description
    description_name = '{}_{}_{}_{}'.format(
        str(task),
        algorithm,
        popsize,
        postfix if postfix is not None else "",
    )

    # share params
    nloss = len(loss_type)
    lr = task_args['lr']  # initial learning rate for adam G
    lrd = task_args['lrd']  # initial learning rate for adam D
    b1 = task_args['b1']  # momentum term of adam
    beta = task_args['beta']  # momentum term of adam
    samples = task_args['metric_samples']  # metric samples
    DIM = task_args['dim']  # momentum term of adam
    GP_norm = gradients_penalty  # if use gradients penalty on discriminator
    LAMBDA = 2.  # hyperparameter sudof GP

    # algorithm params
    if algorithm == "egan":
        VARIATION = "all"
        MULTI_OBJECTIVE_SELECTION = False
    elif algorithm == "moegan":
        VARIATION = "all"
        MULTI_OBJECTIVE_SELECTION = True
    elif algorithm == "smoegan":
        VARIATION = "deepqlearning"
        MULTI_OBJECTIVE_SELECTION = True
    else:
        exit(-2)

    # Load the dataset
    def create_generator_trainer(noise=None,
                                 discriminator=None,
                                 lr=0.0002,
                                 b1=0.5,
                                 DIM=64):
        return GeneratorTrainer(noise, task.create_geneator(noise, DIM),
                                discriminator, lr, b1)

    # MODEL D
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = net_otype('real_imgs')
    fake_imgs = net_otype('fake_imgs')
    # Create neural network model
    discriminator = task.create_discriminator(DIM, GP_norm)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (
        lasagne.objectives.binary_crossentropy(real_out, 1) +
        lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()

    # Gradients penalty norm
    if GP_norm is True:
        alpha = t_rng.uniform((batchSize, 1), low=0., high=1.)
        differences = fake_imgs - real_imgs
        interpolates = real_imgs + (alpha * differences)
        gradients = theano.grad(lasagne.layers.get_output(
            discriminator, interpolates).sum(),
                                wrt=interpolates)
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1)))
        gradient_penalty = T.mean((slopes - 1.)**2)
        D_loss = discriminator_loss + LAMBDA * gradient_penalty
        b1_d = 0.
    else:
        D_loss = discriminator_loss
        b1_d = 0.

    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator,
                                                         trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(D_loss,
                                     discriminator_params,
                                     learning_rate=lrtd,
                                     beta1=b1_d)
    #lrt = theano.shared(lasagne.utils.floatX(lr))

    # Fd Socre
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params)
    Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd))
    # max is ~7.5 for toy dataset and ~0.025 for real ones (it will be updated after 1 iteration, which is likely the worst one)
    Fd_auto_normalization = AutoNormalization(float(0.1))

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                              discriminator_loss,
                              updates=updates_d)

    # Compile another function generating some data
    dis_fn = theano.function([real_imgs, fake_imgs],
                             [fake_out.mean(), Fd_score])
    disft_fn = theano.function([real_imgs, fake_imgs], [
        real_out.mean(),
        fake_out.mean(), (real_out > 0.5).mean(),
        (fake_out > 0.5).mean(), Fd_score
    ])

    #main MODEL G
    noise = T.matrix('noise')
    generator_trainer = create_generator_trainer(noise, discriminator, lr, b1,
                                                 DIM)

    # Finally, launch the training loop.
    print("Starting training...")
    print(description_name)

    #build dirs
    path_front, path_logs, path_models, path_models_last, path_images = build_output_dirs(
        output_dir, description_name)

    #define a problem instance
    instances = []
    instances_old = []

    #generator of a offspring
    def generate_offsptring(xreal, loss_id, pop_id, inst=None):
        if inst == None:
            newparams = create_generator_trainer(noise=noise,
                                                 discriminator=discriminator,
                                                 lr=lr,
                                                 b1=b1,
                                                 DIM=DIM).get()
            inst = Instance(-float("inf"), float("inf"), newparams, -1, pop_id,
                            None)
        #init gen
        generator_trainer.set(inst.params)
        #train
        generator_trainer.train(loss_type[loss_id], task.noise_batch())
        #score
        xfake = generator_trainer.gen(task.noise_batch())
        frr_score, fd_score = dis_fn(xreal, xfake)
        #new instance
        new_instance = Instance(frr_score, fd_score, generator_trainer.get(),
                                loss_id, pop_id, xfake)
        #save
        instances.append(new_instance)
        #info stuff
        return new_instance

    #init varation
    variation = get_varation(VARIATION)(popsize, nloss, generate_offsptring)

    #reval pop with new D
    def reval_pupulation(in_instances):
        #ret
        out_instances = []
        #generates new batches of images for each generator, and then eval these sets by means (new) D
        for inst in in_instances:
            generator_trainer.set(inst.params)
            xfake = generator_trainer.gen(task.noise_batch())
            frr_score, fd_score = dis_fn(xreal_eval, xfake)
            out_instances.append(
                Instance(frr_score,
                         fd_score,
                         generator_trainer.get(),
                         inst.loss_id,
                         inst.pop_id,
                         xfake,
                         im_parent=True))
        return out_instances

    #log stuff
    LOG_HEADER, LOG_TEMPLATE = build_log_template(popsize, nloss)
    log = Logger(os.path.join(path_logs, 'logs.tsv'),
                 header=LOG_HEADER.encode())
    timer = Timer()
    losses_counter = [0] * nloss

    # We iterate over epochs:
    for n_updates in task.get_range():
        #get batch
        xmb = task.batch()
        #get eval batch
        if xmb.shape[0] == batchSize:
            xreal_eval = xmb
        else:
            xreal_eval = shuffle(xmb)[:batchSize]
        # initial G cluster
        if MULTI_OBJECTIVE_SELECTION:
            instances_old = reval_pupulation(instances)
        else:
            instances_old = instances
        #reset
        instances = []
        variation.update(instances_old, task.is_last())
        for pop_id in range(0, popsize):
            variation.gen(xreal_eval,
                          instances_old[pop_id] if n_updates else None, pop_id)

        if popsize <= (len(instances) + len(instances_old)):
            if MULTI_OBJECTIVE_SELECTION == True:
                #add parents in the pool
                instances = [*instances_old, *instances]
                #from the orginal code, we have to maximize D(G(X)),
                #Since in NSGA2 performences a minimization,
                #We are going to minimize -D(G(X)),
                #also we want maximize the diversity score,
                #So, we are going to minimize -diversity score (also we wanna normalize that value)
                cromos = {
                    idx:
                    [-float(inst.fq), -float(Fd_auto_normalization(inst.fd))]
                    for idx, inst in enumerate(instances)
                }  # S2
                cromos_idxs = [idx for idx, _ in enumerate(instances)]
                finalpop = nsga_2_pass(popsize, cromos, cromos_idxs)
                instances = [instances[p] for p in finalpop]
                with open(os.path.join(path_front, 'last.tsv'),
                          'wb') as ffront:
                    for inst in instances:
                        ffront.write(
                            (str(inst.fq) + "\t" + str(inst.fd)).encode())
                        ffront.write("\n".encode())
            elif nloss > 1:
                #sort new
                instances.sort(key=lambda inst: inst.f()
                               )  #(from the orginal code in github) maximize
                #cut best ones
                instances = instances[len(instances) - popsize:]

        for i in range(0, popsize):
            xreal, xfake = task.statistic_datas(instances[i].img)
            tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake)
            fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr)
            real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr)
            fake_rate_p = np.array([frp]) if i == 0 else np.append(
                fake_rate_p, frp)
            real_rate_p = np.array([trp]) if i == 0 else np.append(
                real_rate_p, trp)
            FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore)
            losses_counter[instances[i].loss_id] += 1

        # train D
        for xreal, xfake in task.iter_data_discriminator(xmb, instances):
            train_d(xreal, xfake)

        #show it info
        print(n_updates, real_rate.mean(), real_rate_p.mean())

        #write logs
        log.writeln(
            LOG_TEMPLATE.format(n_updates, str(timer), fake_rate.mean(),
                                real_rate.mean(), *fake_rate, *real_rate, *FDL,
                                *losses_counter).encode())
        #varation logs
        variation.logs(path_logs, n_updates, last_iteration=task.is_last())

        if (n_updates % save_freq == 0
                and n_updates != 0) or n_updates == 1 or task.is_last():
            #it same
            if task.is_last():
                id_name_update = math.ceil(float(n_updates) / save_freq)
            else:
                id_name_update = math.floor(float(n_updates) / save_freq)
            #if is egan, eval only the best one.
            if MULTI_OBJECTIVE_SELECTION == True:
                instances_to_eval = instances
            else:
                instances_to_eval = [instances[-1]]
            #metric
            metric_results = task.compute_metrics(
                instances_to_eval,
                lambda inst, nz: generator_trainer.set(inst.params).gen(nz),
                samples)
            #mmd2 output
            print(n_updates, "metric:", np.min(metric_results), "id:",
                  np.argmin(metric_results))
            #best
            best = np.argmin(metric_results)
            worst = np.argmax(metric_results)
            np.savez(
                os.path.join(path_models, 'dis_%s.npz') % (id_name_update),
                *lasagne.layers.get_all_param_values(discriminator))
            np.savez(
                os.path.join(path_models, 'gen_%s.npz') % (id_name_update),
                *instances_to_eval[best].params)
            #save best
            generator_trainer.set(instances_to_eval[best].params)
            xfake_best = generator_trainer.gen(task.noise_batch(samples))
            #worst_debug
            generator_trainer.set(instances_to_eval[worst].params)
            xfake_worst = generator_trainer.gen(task.noise_batch(samples))
            #save images
            task.save_image(xmb, xfake_best, path_images,
                            "best_%s" % (id_name_update))
            task.save_image(xmb, xfake_worst, path_images,
                            "worst_%s" % (id_name_update))
            #print pareto front
            with open(
                    os.path.join(path_front, '%s.tsv') % (id_name_update),
                    'wb') as ffront:
                for idx in range(len(instances_to_eval)):
                    ffront.write((str(instances_to_eval[idx].fq) + "\t" +
                                  str(instances_to_eval[idx].fd) + "\t" +
                                  str(metric_results[idx])).encode())
                    ffront.write("\n".encode())
            #save all last models:
            if task.is_last():
                for key, inst in enumerate(instances_to_eval):
                    np.savez(
                        os.path.join(path_models_last, 'gen_%s.npz') % (key),
                        *inst.params)
                  'vae_kld_cost']
    g_cost_outputs = g_basic_costs
    # compile function for computing generator costs and updates
    i_train_func = theano.function([Xg], g_cost_outputs, updates=inf_updates)
    print "{0:.2f} seconds to compile theano functions".format(time() - t)

    # make file for recording test progress
    log_name = "{}/FINE-TUNE.txt".format(result_dir)
    out_file = open(log_name, 'wb')

    print("EXPERIMENT: {}".format(desc.upper()))
    n_check = 0
    n_updates = 0
    t = time()
    for epoch in range(1, 200):
        Xva = shuffle(Xva)
        # initialize cost arrays
        g_epoch_costs = [0. for gco in g_cost_outputs]
        g_batch_count = 0.
        if (epoch < 25):
            lrt.set_value(floatX(0.00001))
        elif (epoch < 50):
            lrt.set_value(floatX(0.00003))
        for imb in tqdm(iter_data(Xva, size=100), total=(ntrain / 100)):
            # transform training batch to "image format"
            imb_img = train_transform(imb)
            # train vae on training batch
            g_result = i_train_func(floatX(imb_img))
            g_epoch_costs = [(v1 + v2) for v1, v2 in zip(g_result, g_epoch_costs)]
            g_batch_count += 1
        if (epoch == 75) or (epoch == 150):
Beispiel #19
0
def main(
        problem,
        popsize,
        moegan,
        freq,
        loss_type=['trickLogD', 'minimax', 'ls'],
        postfix=None,
        nPassD=1,  #backpropagation pass for discriminator
        inBatchSize=64):

    # Parameters
    task = 'toy'
    name = '{}_{}_{}MMDu2'.format(
        problem, "MOEGAN" if moegan else "EGAN",
        postfix + "_" if postfix is not None else "")  #'8G_MOEGAN_PFq_NFd_t2'

    DIM = 512
    begin_save = 0
    nloss = len(loss_type)
    batchSize = inBatchSize

    if problem == "8G":
        DATASET = '8gaussians'
    elif problem == "25G":
        DATASET = '25gaussians'
    else:
        exit(-1)

    ncandi = popsize
    kD = nPassD  # # of discrim updates for each gen update
    kG = 1  # # of discrim updates for each gen update
    ntf = 256
    b1 = 0.5  # momentum term of adam
    nz = 2  # # of dim for Z
    niter = 4  # # of iter at starting learning rate
    lr = 0.0001  # initial learning rate for adam G
    lrd = 0.0001  # initial learning rate for adam D
    N_up = 100000
    save_freq = freq
    show_freq = freq
    test_deterministic = True
    beta = 1.
    GP_norm = False  # if use gradients penalty on discriminator
    LAMBDA = 2.  # hyperparameter sudof GP
    NSGA2 = moegan
    # Load the dataset

    # MODEL D
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = T.matrix('real_imgs')
    fake_imgs = T.matrix('fake_imgs')
    # Create neural network model
    discriminator = models_uncond.build_discriminator_toy(nd=DIM,
                                                          GP_norm=GP_norm)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (
        lasagne.objectives.binary_crossentropy(real_out, 1) +
        lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()

    # Gradients penalty norm
    if GP_norm is True:
        alpha = t_rng.uniform((batchSize, 1), low=0., high=1.)
        differences = fake_imgs - real_imgs
        interpolates = real_imgs + (alpha * differences)
        gradients = theano.grad(lasagne.layers.get_output(
            discriminator, interpolates).sum(),
                                wrt=interpolates)
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1)))
        gradient_penalty = T.mean((slopes - 1.)**2)

        D_loss = discriminator_loss + LAMBDA * gradient_penalty
        b1_d = 0.
    else:
        D_loss = discriminator_loss
        b1_d = 0.

    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator,
                                                         trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(D_loss,
                                     discriminator_params,
                                     learning_rate=lrtd,
                                     beta1=b1_d)
    lrt = theano.shared(lasagne.utils.floatX(lr))

    # Fd Socre
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params)
    Fd_score = beta * T.log(sum(T.sum(T.sqr(x)) for x in Fd))

    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                              discriminator_loss,
                              updates=updates_d)

    # Compile another function generating some data
    dis_fn = theano.function([real_imgs, fake_imgs],
                             [(fake_out).mean(), Fd_score])
    disft_fn = theano.function([real_imgs, fake_imgs], [
        real_out.mean(),
        fake_out.mean(), (real_out > 0.5).mean(),
        (fake_out > 0.5).mean(), Fd_score
    ])

    #main MODEL G
    noise = T.matrix('noise')
    generator_trainer = create_G(noise=noise,
                                 discriminator=discriminator,
                                 lr=lr,
                                 b1=b1,
                                 DIM=DIM)

    # Finally, launch the training loop.
    print("Starting training...")
    desc = task + '_' + name
    print(desc)

    if not os.path.isdir('front'):
        os.mkdir(os.path.join('front'))
    if not os.path.isdir('front/' + desc):
        os.mkdir(os.path.join('front/', desc))
    if not os.path.isdir('logs'):
        os.mkdir(os.path.join('logs'))
    f_log = open('logs/%s.ndjson' % desc, 'wb')
    if not os.path.isdir('models'):
        os.mkdir(os.path.join('models/'))
    if not os.path.isdir('models/' + desc):
        os.mkdir(os.path.join('models/', desc))

    instances = []

    class Instance:
        def __init__(self, fq, fd, params, img_values):
            self.fq = fq
            self.fd = fd
            self.params = params
            self.img = img_values

        def f(self):
            return self.fq - self.fd

    # We iterate over epochs:
    for n_updates in range(N_up):
        xmb = toy_dataset(DATASET=DATASET, size=batchSize * kD)
        xmb = xmb[0:batchSize * kD]
        # initial G cluster
        if n_updates == 0:
            for can_i in range(0, ncandi):
                init_generator_trainer = create_G(noise=noise,
                                                  discriminator=discriminator,
                                                  lr=lr,
                                                  b1=b1,
                                                  DIM=DIM)
                zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                cost = init_generator_trainer.train(loss_type[can_i % nloss],
                                                    zmb)
                sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                gen_imgs = init_generator_trainer.gen(sample_zmb)
                frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs)
                instances.append(
                    Instance(
                        frr_score, fd_score,
                        lasagne.layers.get_all_param_values(
                            init_generator_trainer.generator), gen_imgs))
        else:
            instances_old = instances
            instances = []
            for can_i in range(0, ncandi):
                for type_i in range(0, nloss):
                    generator_trainer.set(instances_old[can_i].params)
                    #train
                    zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                    generator_trainer.train(loss_type[type_i], zmb)
                    #score
                    sample_zmb = floatX(np_rng.uniform(-1., 1.,
                                                       size=(ntf, nz)))
                    gen_imgs = generator_trainer.gen(sample_zmb)
                    frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs)
                    #save
                    instances.append(
                        Instance(frr_score, fd_score, generator_trainer.get(),
                                 gen_imgs))
            if ncandi <= (len(instances) + len(instances_old)):
                if NSGA2 == True:
                    #add parents in the pool
                    for inst in instances_old:
                        generator_trainer.set(inst.params)
                        sample_zmb = floatX(
                            np_rng.uniform(-1., 1., size=(ntf, nz)))
                        gen_imgs = generator_trainer.gen(sample_zmb)
                        frr_score, fd_score = dis_fn(xmb[0:ntf], gen_imgs)
                        instances.append(
                            Instance(frr_score, fd_score,
                                     generator_trainer.get(), gen_imgs))
                    #cromos = { idx:[float(inst.fq),-0.5*float(inst.fd)] for idx,inst in enumerate(instances) } # S1
                    cromos = {
                        idx: [-float(inst.fq), 0.5 * float(inst.fd)]
                        for idx, inst in enumerate(instances)
                    }  # S2
                    cromos_idxs = [idx for idx, _ in enumerate(instances)]
                    finalpop = nsga_2_pass(ncandi, cromos, cromos_idxs)
                    instances = [instances[p] for p in finalpop]
                    with open('front/%s.tsv' % desc, 'wb') as ffront:
                        for inst in instances:
                            ffront.write(
                                (str(inst.fq) + "\t" + str(inst.fd)).encode())
                            ffront.write("\n".encode())
                elif nloss > 1:
                    #sort new
                    instances.sort(
                        key=lambda inst: -inst.f())  #wrong def in the paper
                    #print([inst.f() for inst in instances])
                    #cut best ones
                    instances = instances[len(instances) - ncandi:]
                    #print([inst.f() for inst in instances])

        sample_xmb = toy_dataset(DATASET=DATASET, size=ncandi * ntf)
        sample_xmb = sample_xmb[0:ncandi * ntf]
        for i in range(0, ncandi):
            xfake = instances[i].img[0:ntf, :]
            xreal = sample_xmb[i * ntf:(i + 1) * ntf, :]
            tr, fr, trp, frp, fdscore = disft_fn(xreal, xfake)
            fake_rate = np.array([fr]) if i == 0 else np.append(fake_rate, fr)
            real_rate = np.array([tr]) if i == 0 else np.append(real_rate, tr)
            fake_rate_p = np.array([frp]) if i == 0 else np.append(
                fake_rate_p, frp)
            real_rate_p = np.array([trp]) if i == 0 else np.append(
                real_rate_p, trp)
            FDL = np.array([fdscore]) if i == 0 else np.append(FDL, fdscore)

        print(fake_rate, fake_rate_p, FDL)
        print(n_updates, real_rate.mean(), real_rate_p.mean())
        f_log.write((str(fake_rate) + ' ' + str(fake_rate_p) + '\n' +
                     str(n_updates) + ' ' + str(real_rate.mean()) + ' ' +
                     str(real_rate_p.mean()) + '\n').encode())
        f_log.flush()

        # train D
        #for xreal, xfake in iter_data(xmb, shuffle(fmb), size=batchSize):
        #    cost = train_d(xreal, xfake)
        imgs_fakes = instances[0].img[0:int(batchSize / ncandi * kD), :]
        for i in range(1, len(instances)):
            img = instances[i].img[0:int(batchSize / ncandi * kD), :]
            imgs_fakes = np.append(imgs_fakes, img, axis=0)
        for xreal, xfake in iter_data(xmb, shuffle(imgs_fakes),
                                      size=batchSize):
            cost = train_d(xreal, xfake)

        if (n_updates % show_freq == 0 and n_updates != 0) or n_updates == 1:
            id_update = int(n_updates / save_freq)
            #metric
            s_zmb = floatX(np_rng.uniform(-1., 1., size=(512, nz)))
            xmb = toy_dataset(DATASET=DATASET, size=512)
            #compue mmd for all points
            mmd2_all = []
            for i in range(0, ncandi):
                generator_trainer.set(instances[i].params)
                g_imgs = generator_trainer.gen(s_zmb)
                mmd2_all.append(abs(compute_metric_mmd2(g_imgs, xmb)))
            mmd2_all = np.array(mmd2_all)
            #print pareto front
            if NSGA2 == True:
                front_path = os.path.join('front/', desc)
                with open('%s/%d_%s_mmd2u.tsv' % (front_path, id_update, desc),
                          'wb') as ffront:
                    for idx in range(0, ncandi):
                        ffront.write((str(instances[idx].fq) + "\t" +
                                      str(instances[idx].fd) + "\t" +
                                      str(mmd2_all[idx])).encode())
                        ffront.write("\n".encode())
            #mmd2 output
            print(n_updates, "mmd2u:", np.min(mmd2_all), "id:",
                  np.argmin(mmd2_all))
            #save best
            params = instances[np.argmin(mmd2_all)].params
            generator_trainer.set(params)
            g_imgs_min = generator_trainer.gen(s_zmb)
            generate_image(xmb,
                           g_imgs_min,
                           id_update,
                           desc,
                           postfix="_mmu2d_best")
            np.savez('models/%s/gen_%d.npz' % (desc, id_update),
                     *lasagne.layers.get_all_param_values(discriminator))
            np.savez('models/%s/dis_%d.npz' % (desc, id_update),
                     *generator_trainer.get())
            #worst_debug
            params = instances[np.argmax(mmd2_all)].params
            generator_trainer.set(params)
            g_imgs_max = generator_trainer.gen(s_zmb)
            generate_image(xmb,
                           g_imgs_max,
                           id_update,
                           desc,
                           postfix="_mmu2d_worst")
print 'COMPILING'
t = time()
_gen = theano.function([Z], gX)
_train_d = theano.function([X, X0], d_cost, updates=d_updates)
_train_g = theano.function([Z, deltaX], g_cost, updates=g_updates)
_vgd_gradient = theano.function([X0, X1], vgd_gradient(X0, X1))
_reconstruction_cost = theano.function([X], T.mean(mse_data))
print '%.2f seconds to compile theano functions' % (time() - t)

sample_zmb = floatX(np_rng.uniform(-1., 1., size=(nvis, nz)))

n_updates = 0
t = time()
for epoch in range(1, niter + 1):
    for filename in npzfiles:
        batch_data = shuffle(
            np.load(filename)['images'].astype(theano.config.floatX))

        for idx in tqdm(xrange(0, batch_data.shape[0] // nbatch)):
            imb = transform(batch_data[idx * nbatch:(idx + 1) * nbatch])

            zmb = floatX(np_rng.uniform(-1., 1., size=(imb.shape[0], nz)))
            # generate samples
            samples = _gen(zmb)

            vgd_grad = _vgd_gradient(samples, samples)
            if n_updates % (k + 1) == 0:
                _train_g(zmb, floatX(vgd_grad))
            else:
                _train_d(imb, samples)
            n_updates += 1
Beispiel #21
0
 def reset_data(self):
     self.data, self.labels = shuffle(self.data, self.labels)
     self.index = 0
iwae_cost_func = theano.function([Xg], [log_p_x, log_p_z, log_q_z])
g_eval_func = theano.function([Xg], g_basic_costs)
print "{0:.2f} seconds to compile theano functions".format(time()-t)

# make file for recording test progress
log_name = "{}/EVAL.txt".format(result_dir)
out_file = open(log_name, 'wb')

print("EXPERIMENT: {}".format(desc.upper()))

Xva_blocks = [Xva] #np.split(Xva, 2, axis=0)
for epoch in range(5):
    epoch_vae_cost = 0.0
    epoch_iwae_cost = 0.0
    for block_num, Xva_block in enumerate(Xva_blocks):
        Xva_block = shuffle(Xva_block)
        obs_count = Xva_block.shape[0]
        g_epoch_costs = [0. for c in g_basic_costs]
        g_batch_count = 0.
        for imb in tqdm(iter_data(Xva_block, size=nbatch), total=obs_count/nbatch):
            # transform validation batch to "image format"
            imb_img = floatX( train_transform(imb) )
            # evaluate costs
            g_result = g_eval_func(imb_img)
            # evaluate costs more thoroughly
            iwae_bounds = iwae_multi_eval(imb_img, 25*25,
                                          cost_func=iwae_cost_func,
                                          iwae_num=iwae_samples)
            g_result[4] = np.mean(iwae_bounds)  # swap in tighter bound
            # accumulate costs
            g_epoch_costs = [(v1 + v2) for v1, v2 in zip(g_result, g_epoch_costs)]
def main():
    # Parameters
    data_path = '../datasets/'
    task = 'face'
    name = '128'

    start = 0
    stop = 202560 
    input_nc = 3 
    loss_type = ['trickLogD','minimax','ls'] 
    nloss = 3
    shuffle_ = True 
    batchSize = 32 
    fineSize = 128 
    flip = True
   
    ncandi = 1         # # of survived childern 
    kD = 3             # # of discrim updates for each gen update
    kG = 1             # # of discrim updates for each gen update
    ntf = batchSize*kD 
    b1 = 0.5           # momentum term of adam
    nz = 100           # # of dim for Z
    ngf = 64           # # of gen filters in first conv layer
    ndf = 64           # # of discrim filters in first conv layer
    niter = 25         # # of iter at starting learning rate
    lr = 0.0002        # initial learning rate for adam G
    lrd = 0.0002       # initial learning rate for adam D
    beta = 0.001       # the hyperparameter that balance fitness score 
    GP_norm = False    # if use gradients penalty on discriminator
    LAMBDA = 2.        # hyperparameter of GP 
    
    save_freq = 5000 
    show_freq = 500 
    begin_save = 0 
    test_deterministic = True   
    
    # Load the dataset
    print("Loading data...")
    f = h5py.File(data_path+'img_align_celeba_128.hdf5','r') 
    trX = f['data'] 
    ids = range(start, stop)
    
    ################## MODEL D ####################### 
    print("Building model and compiling functions...")
    # Prepare Theano variables for inputs and targets
    real_imgs = T.tensor4('real_imgs')
    fake_imgs = T.tensor4('fake_imgs')
    # Create neural network model
    discriminator = models_uncond.build_discriminator_128(ndf=ndf)
    # Create expression for passing real data through the discriminator
    real_out = lasagne.layers.get_output(discriminator, real_imgs)
    # Create expression for passing fake data through the discriminator
    fake_out = lasagne.layers.get_output(discriminator, fake_imgs)
    # Create loss expressions
    discriminator_loss = (lasagne.objectives.binary_crossentropy(real_out, 1)
            + lasagne.objectives.binary_crossentropy(fake_out, 0)).mean()
    
    # Gradients penalty norm 
    if GP_norm is True: 
        alpha = t_rng.uniform((batchSize,1,1,1), low=0.,high=1.)  
        differences = fake_imgs - real_imgs  
        interpolates = real_imgs + (alpha*differences)
        gradients = theano.grad(lasagne.layers.get_output(discriminator, interpolates).sum(), wrt=interpolates) 
        slopes = T.sqrt(T.sum(T.sqr(gradients), axis=(1,2,3)))
        gradient_penalty = T.mean((slopes-1.)**2)
 
        D_loss = discriminator_loss +  LAMBDA*gradient_penalty 
        b1_d = 0. 
    else:
        D_loss = discriminator_loss 
        b1_d = b1 
 
    # Create update expressions for training
    discriminator_params = lasagne.layers.get_all_params(discriminator, trainable=True)
    lrtd = theano.shared(lasagne.utils.floatX(lrd))
    updates_d = lasagne.updates.adam(
            D_loss, discriminator_params, learning_rate=lrtd, beta1=b1_d)
    lrt = theano.shared(lasagne.utils.floatX(lr))
    
    # Diversity fitnees 
    Fd = theano.gradient.grad(discriminator_loss, discriminator_params) 
    Fd_score  = beta*T.log(sum(T.sum(T.sqr(x)) for x in Fd))
    
    # Compile a function performing a training step on a mini-batch (by giving
    # the updates dictionary) and returning the corresponding training loss:
    train_d = theano.function([real_imgs, fake_imgs],
                               discriminator_loss,
                               updates=updates_d)
    
    # Compile another function generating some data
    disft_fn = theano.function([real_imgs,fake_imgs],
                               [(real_out).mean(),
                                (fake_out).mean(),
                                Fd_score])
 
    # Finally, launch the training loop.
    print("Starting training...") 
    desc = task + '_' + name 
    print desc
    
    if not os.path.isdir('logs'):
        os.mkdir(os.path.join('logs'))
    f_log = open('logs/%s.ndjson'%desc, 'wb')
    if not os.path.isdir('samples'):
        os.mkdir(os.path.join('samples/'))
    if not os.path.isdir('samples/'+desc):
        os.mkdir(os.path.join('samples/',desc))
    if not os.path.isdir('models'):
        os.mkdir(os.path.join('models/'))
    if not os.path.isdir('models/'+desc):
        os.mkdir(os.path.join('models/',desc))
     
    gen_new_params = [] 
    n_updates = 0
     
    # We iterate over epochs:
    for epoch in range(niter):
        t = time()
	if shuffle_ is True:
	    ids = shuffle(ids) 
	for index_ in iter_data(ids, size=batchSize*kD):
            index = sorted(index_) 
            xmb = trX[index,:,:,:]
	    xmb = Batch(xmb,fineSize,input_nc,flip=flip) 
	    xmb = processing_img(xmb, center=True, scale=True, convert=False)
                        
            rand_idx = random.randint(start,stop-ntf-1) 
            rand_ids = ids[rand_idx:rand_idx+ntf] 
            rand_ids = sorted(rand_ids) 
            sample_xmb = trX[rand_ids,:,:,:]
	    sample_xmb = Batch(sample_xmb,fineSize,input_nc,flip=flip) 
	    sample_xmb = processing_img(sample_xmb, center=True, scale=True, convert=False)
     
            # initial G cluster 
            if epoch + n_updates == 0:
                for can_i in range(0,ncandi): 
                    train_g, gen_fn, generator = create_G(
                            loss_type=loss_type[can_i%nloss],
                            discriminator=discriminator, lr=lr, b1=b1, ngf=ngf) 
                    for _ in range(0,kG):
                        zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                        cost = train_g(zmb)
                    sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                    gen_imgs = gen_fn(sample_zmb)
 
                    gen_new_params.append(lasagne.layers.get_all_param_values(generator)) 
                    
                    if can_i == 0: 
                        g_imgs_old=gen_imgs
                        fmb = gen_imgs[0:batchSize/ncandi*kD,:,:,:]
                    else: 
                        g_imgs_old = np.append(g_imgs_old,gen_imgs,axis=0)
                        fmb = np.append(fmb,gen_imgs[0:batchSize/ncandi*kD,:,:,:],axis=0)
                #print gen_new_params
                # MODEL G
                noise = T.matrix('noise')
                generator = models_uncond.build_generator_128(noise,ngf=ngf)
                Tgimgs = lasagne.layers.get_output(generator)
                Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs)

                g_loss_logD = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean()
                g_loss_minimax = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean()
                g_loss_ls = T.mean(T.sqr((Tfake_out - 1)))

                g_params = lasagne.layers.get_all_params(generator, trainable=True)

                up_g_logD = lasagne.updates.adam(g_loss_logD, g_params, learning_rate=lrt, beta1=b1)
                up_g_minimax = lasagne.updates.adam(g_loss_minimax, g_params, learning_rate=lrt, beta1=b1)
                up_g_ls = lasagne.updates.adam(g_loss_ls, g_params, learning_rate=lrt, beta1=b1)

                train_g = theano.function([noise],g_loss_logD,updates=up_g_logD)
                train_g_minimax = theano.function([noise],g_loss_minimax,updates=up_g_minimax)
                train_g_ls = theano.function([noise],g_loss_ls,updates=up_g_ls)

                gen_fn = theano.function([noise], lasagne.layers.get_output(
                                        generator,deterministic=True))
            else:
                gen_old_params = gen_new_params
                for can_i in range(0,ncandi):
                    for type_i in range(0,nloss):
                        
                        lasagne.layers.set_all_param_values(generator, gen_old_params[can_i])
                        if loss_type[type_i] == 'trickLogD':
                            for _ in range(0,kG):
                                zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                                cost = train_g(zmb)
                        elif loss_type[type_i] == 'minimax': 
                            for _ in range(0,kG):
                                zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                                cost = train_g_minimax(zmb)
                        elif loss_type[type_i] == 'ls': 
                            for _ in range(0,kG):
                                zmb = floatX(np_rng.uniform(-1., 1., size=(batchSize, nz)))
                                cost = train_g_ls(zmb)

                        sample_zmb = floatX(np_rng.uniform(-1., 1., size=(ntf, nz)))
                        gen_imgs = gen_fn(sample_zmb)

                        _, fr_score, fd_score = disft_fn(sample_xmb,gen_imgs)
                        fit = fr_score - fd_score 
                     
                        if can_i*nloss + type_i < ncandi: 
                            idx = can_i*nloss + type_i
                            gen_new_params[idx]=lasagne.layers.get_all_param_values(generator)
                            fitness[idx]=fit
                            fake_rate[idx]=fr_score
                            g_imgs_old[idx*ntf:(idx+1)*ntf,:,:,:]=gen_imgs
                            fmb[idx*batchSize/ncandi*kD:(idx+1)*batchSize/ncandi*kD,:,:,:] = \
                                gen_imgs[0:batchSize/ncandi*kD,:,:,:]
                        else: 
                            fit_com = fitness - fit
                            if min(fit_com) < 0:
                                ids_replace = np.where(fit_com==min(fit_com)) 
                                idr = ids_replace[0][0]
                                fitness[idr]=fit
                                fake_rate[idr]=fr_score

                                gen_new_params[idr] = lasagne.layers.get_all_param_values(generator)

                                g_imgs_old[idr*ntf:(idr+1)*ntf,:,:,:]=gen_imgs
                                fmb[idr*batchSize/ncandi*kD:(idr+1)*batchSize/ncandi*kD,:,:,:] = \
                                    gen_imgs[0:batchSize/ncandi*kD,:,:,:]

                print fake_rate, fitness
                f_log.write(str(fake_rate) + ' '+str(fd_score) +' ' + str(fitness)+ '\n')

            # train D 
	    for xreal,xfake in iter_data(xmb, shuffle(fmb), size=batchSize):
                cost = train_d(xreal, xfake)
            for i in range(0, ncandi):
                xfake = g_imgs_old[i*ntf:(i+1)*ntf,:,:,:]
                xreal = sample_xmb[0:ntf,:,:,:]
                tr, fr, fd = disft_fn(xreal,xfake) 
                if i == 0:
                    fake_rate = np.array([fr])   
                    fitness = np.array([0.])   
                    real_rate = np.array([tr])     
                    FDL = np.array([fd])     
                else:
                    fake_rate = np.append(fake_rate,fr)
                    fitness = np.append(fitness,[0.])
                    real_rate = np.append(real_rate,tr)
                    FDL = np.append(FDL,fd)
            print fake_rate, FDL
            print (n_updates, epoch,real_rate.mean()) 
            n_updates += 1
            f_log.write(str(fake_rate)+' '+str(FDL)+ '\n'+ str(epoch)+' '+str(n_updates)+' '+str(real_rate.mean())+'\n')
            f_log.flush()

	    if n_updates%show_freq == 0:
	        blank_image = Image.new("RGB",(fineSize*8+9,fineSize*8+9))
	        for i in range(8):
                    for ii in range(8):
                        img = g_imgs_old[i*8+ii,:,:,:]
                        img = ImgRescale(img, center=True, scale=True, convert_back=True)
                        blank_image.paste(Image.fromarray(img),(ii*fineSize+ii+1,i*fineSize+i+1)) 
                blank_image.save('samples/%s/%s_%d.png'%(desc,desc,n_updates/save_freq))

            if n_updates%save_freq == 0 and epoch > begin_save - 1:
            # Optionally, you could now dump the network weights to a file like this:
                np.savez('models/%s/gen_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(generator))
                np.savez('models/%s/dis_%d.npz'%(desc,n_updates/save_freq), *lasagne.layers.get_all_param_values(discriminator))
Beispiel #24
0
    'n_examples', 
    'n_seconds',
    'g_cost',
    'd_cost',
]

print desc.upper()
n_updates = 0
n_check = 0
n_epochs = 0
n_updates = 0
n_examples = 0
t = time()
sample_z0mb = rand_gen(size=(200, nz0)) # noise samples for top generator module
for epoch in range(1, niter+niter_decay+1):
    trX = shuffle(trX)
    for imb in tqdm(iter_data(trX, size=nbatch), total=ntrain/nbatch):
        imb = transform(imb)
        z0mb = rand_gen(size=(len(imb), nz0))
        if n_updates % (k+1) == 0:
            cost = _train_g(imb, z0mb)
        else:
            cost = _train_d(imb, z0mb)
        n_updates += 1
        n_examples += len(imb)
    samples = np.asarray(_gen(sample_z0mb))
    grayscale_grid_vis(inverse_transform(samples), (10, 20), "{}/{}.png".format(sample_dir, n_epochs))
    n_epochs += 1
    if n_epochs > niter:
        lrt.set_value(floatX(lrt.get_value() - lr/niter_decay))
    if n_epochs in [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 200]:
Beispiel #25
0

print 'COMPILING'
t = time()
_train = theano.function([X, num_z], decost, updates=gupdates)
_reconstruct = theano.function([X], func_res_x)
_encoder = theano.function([X], func_z)
_decoder = theano.function([func_z], func_res_x)
print '%.2f seconds to compile theano functions'%(time()-t)


n_updates = 0

t = time()
zmb = floatX(np_rng.normal(0, 1, size=(100, nz)))
xmb = floatX(shuffle(X_test)[:100])
number_z = 5

for epoch in range(1, niter+niter_decay+1):
    X_train = shuffle(X_train)

    logpxz = 0
    for imb in tqdm(iter_data(X_train, size=nbatch), total=ntrain/nbatch):
        imb = floatX(imb)

        logpxz += _train(imb, number_z) * len(imb)

        n_updates+=1

    print epoch, 'logpxz', logpxz / ntrain
g_train_func = theano.function([x_in], g_cost_outputs, updates=all_updates)
g_eval_func = theano.function([x_in], g_cost_outputs)
print "{0:.2f} seconds to compile theano functions".format(time() - t)

# make file for recording test progress
log_name = "{}/RESULTS.txt".format(result_dir)
out_file = open(log_name, 'wb')

print("EXPERIMENT: {}".format(desc.upper()))

n_check = 0
n_updates = 0
t = time()
kld_weights = np.linspace(0.0, 1.0, 10)
for epoch in range(1, (niter + niter_decay + 1)):
    Xtr = shuffle(Xtr)
    Xva = shuffle(Xva)
    # mess with the KLd cost
    # if ((epoch-1) < len(kld_weights)):
    #     lam_kld.set_value(floatX([kld_weights[epoch-1]]))
    lam_kld.set_value(floatX([1.0]))
    # initialize cost arrays
    g_epoch_costs = [0. for i in range(5)]
    v_epoch_costs = [0. for i in range(5)]
    i_epoch_costs = [0. for i in range(5)]
    epoch_layer_klds = [0. for i in range(len(vae_layer_names))]
    vae_nlls = []
    vae_klds = []
    g_batch_count = 0.
    i_batch_count = 0.
    v_batch_count = 0.
    "1k_va_nnd",
    "10k_va_nnd",
    "100k_va_nnd",
    "g_cost",
    "d_cost",
]

print desc.upper()
n_updates = 0
n_check = 0
n_epochs = 0
n_updates = 0
n_examples = 0
t = time()
for epoch in range(1, niter + niter_decay + 1):
    trX, trY = shuffle(trX, trY)
    for imb, ymb in tqdm(iter_data(trX, trY, size=nbatch), total=ntrain / nbatch):
        imb = transform(imb)
        ymb = floatX(OneHot(ymb, ny))
        zmb = floatX(np_rng.uniform(-1.0, 1.0, size=(len(imb), nz)))
        if n_updates % (k + 1) == 0:
            cost = _train_g(imb, zmb, ymb)
        else:
            cost = _train_d(imb, zmb, ymb)
        n_updates += 1
        n_examples += len(imb)
    if (epoch - 1) % 5 == 0:
        g_cost = float(cost[0])
        d_cost = float(cost[1])
        gX, gY = gen_samples(100000)
        gX = gX.reshape(len(gX), -1)
Beispiel #28
0
    'n_examples', 
    'n_seconds',
    'g_cost',
    'd_cost',
]

print desc.upper()
n_updates = 0
n_check = 0
n_epochs = 0
n_updates = 0
n_examples = 0
t = time()
sample_z0mb = rand_gen(size=(200, nz0)) # noise samples for top generator module
for epoch in range(1, niter+niter_decay+1):
    Xtr = shuffle(Xtr)
    g_cost = 0
    d_cost = 0
    gc_iter = 0
    dc_iter = 0
    for imb in tqdm(iter_data(Xtr, size=nbatch), total=ntrain/nbatch):
        imb = train_transform(imb)
        z0mb = rand_gen(size=(len(imb), nz0))
        if n_updates % (k+1) == 0:
            g_cost += _train_g(imb, z0mb)[0]
            gc_iter += 1
        else:
            d_cost += _train_d(imb, z0mb)[1]
            dc_iter += 1
        n_updates += 1
        n_examples += len(imb)
_train_g = theano.function([Z, Y, deltaX], g_cost, updates=g_updates)
_vgd_gradient = theano.function([X0, X1, Y], vgd_gradient(X0, X1, Y))
_reconstruction_cost = theano.function([X], T.mean(mse_data))
print '%.2f seconds to compile theano functions' % (time() - t)

sample_zmb = floatX(np_rng.uniform(-1., 1., size=(200, nz)))
sample_ymb = floatX(
    OneHot(
        np.asarray([[i for _ in range(20)] for i in range(10)]).flatten(), ny))

n_updates = 0

t = time()
for epoch in range(niter):
    print 'cifar 10, vgd, %s, iter %d' % (desc, epoch)
    trX, trY = shuffle(trX, trY)
    for imb, ymb in tqdm(iter_data(trX, trY, size=nbatch),
                         total=ntrain / nbatch):
        imb = transform(imb.reshape(imb.shape[0], nc, npx, npx))
        ymb = floatX(OneHot(ymb, ny))
        zmb = floatX(np_rng.uniform(-1., 1., size=(imb.shape[0], nz)))

        # generate samples
        samples = _gen(zmb, ymb)

        vgd_grad = _vgd_gradient(samples, samples, ymb)
        if n_updates % (k + 1) == 0:
            _train_g(zmb, ymb, floatX(vgd_grad))
        else:
            _train_d(imb, samples, ymb)