Beispiel #1
0
 def Update(self, grads, useSGD=False):
     if useSGD:
         print('Use pure SGD for Label-->Image tasks')
         optimizer = tf.train.GradientDescentOptimizer(
             learning_rate=self.tf_lr)
         apply_op = optimizer.apply_gradients(zip(grads, self.all_params))
         self.update_ops = tf.group(apply_op)
     else:
         self.update_ops = tf.group(
             nn.adam_updates(self.all_params,
                             grads,
                             lr=self.tf_lr,
                             mom1=0.95,
                             mom2=0.9995), self.maintain_averages_op)
Beispiel #2
0
        else:
            new_x_gen.append(
                nn.sample_from_discretized_mix_logistic(
                    out, args.nr_logistic_mix, args.take_max))

# add losses and gradients together and get training updates
tf_lr = tf.placeholder(tf.float32, shape=[])
with tf.device('/gpu:0'):
    for i in range(1, args.nr_gpu):
        loss_gen[0] += loss_gen[i]
        loss_gen_test[0] += loss_gen_test[i]
        for j in range(len(grads[0])):
            grads[0][j] += grads[i][j]
    # training op
    optimizer = tf.group(
        nn.adam_updates(all_params, grads[0], lr=tf_lr, mom1=0.95,
                        mom2=0.9995), maintain_averages_op)

# convert loss to bits/dim
bits_per_dim = loss_gen[0] / (args.nr_gpu * np.log(2.) * np.prod(obs_shape) *
                              args.batch_size)
bits_per_dim_test = loss_gen_test[0] / (args.nr_gpu * np.log(2.) *
                                        np.prod(obs_shape) * args.batch_size)


# sample from the model
def sample_from_model(sess):
    x_gen = [
        np.zeros((args.batch_size, ) + obs_shape, dtype=np.float32)
        for i in range(args.nr_gpu)
    ]
    for yi in range(obs_shape[0]):
Beispiel #3
0
        losses[i] = tf.reduce_mean(MSEs[i] +
                                   FLAGS.beta * tf.maximum(FLAGS.lam, KLDs[i]))
        grads[i] = tf.gradients(losses[i],
                                all_params,
                                colocate_gradients_with_ops=True)

with tf.device('/gpu:0'):
    for i in range(1, FLAGS.nr_gpu):
        losses[0] += losses[i]
        for j in range(len(grads[0])):
            grads[0][j] += grads[i][j]

    MSE = tf.concat(MSEs, axis=0)
    KLD = tf.concat(KLDs, axis=0)

    train_step = adam_updates(all_params, grads[0], lr=FLAGS.learning_rate)

    loss = losses[0] / FLAGS.nr_gpu

initializer = tf.global_variables_initializer()
saver = tf.train.Saver()


def make_feed_dict(data, mgen=None):
    data = np.cast[np.float32](data / 255.)
    ds = np.split(data, FLAGS.nr_gpu)
    for i in range(FLAGS.nr_gpu):
        feed_dict = {xs[i]: ds[i] for i in range(FLAGS.nr_gpu)}
    if mgen is not None:
        masks = mgen.gen(data.shape[0])
        masks = np.split(masks, FLAGS.nr_gpu)
Beispiel #4
0
    with tf.device('/gpu:%d' % i):
        MSEs[i] = tf.reduce_sum(tf.square(flatten(xs[i])-flatten(x_hats[i])), 1)
        KLDs[i] = - 0.5 * tf.reduce_mean(1 + log_vars[i] - tf.square(locs[i]) - tf.exp(log_vars[i]), axis=-1)
        losses[i] = tf.reduce_mean(MSEs[i] + FLAGS.beta * tf.maximum(FLAGS.lam, KLDs[i]))
        grads[i] = tf.gradients(losses[i], all_params, colocate_gradients_with_ops=True)

with tf.device('/gpu:0'):
    for i in range(1, FLAGS.nr_gpu):
        losses[0] += losses[i]
        for j in range(len(grads[0])):
            grads[0][j] += grads[i][j]

    MSE = tf.concat(MSEs, axis=0)
    KLD = tf.concat(KLDs, axis=0)

    train_step = adam_updates(all_params, grads[0], lr=0.0001)

    loss = losses[0] / FLAGS.nr_gpu

initializer = tf.global_variables_initializer()
saver = tf.train.Saver()

def make_feed_dict(data, mgen=None):
    data = np.cast[np.float32](data/255.)
    ds = np.split(data, FLAGS.nr_gpu)
    for i in range(FLAGS.nr_gpu):
        feed_dict = { xs[i]:ds[i] for i in range(FLAGS.nr_gpu) }
    if mgen is not None:
        masks = mgen.gen(data.shape[0])
        masks = np.split(masks, FLAGS.nr_gpu)
        for i in range(FLAGS.nr_gpu):
def main(args):
  import os
  import sys
  import time
  import json
  from mpi4py import MPI
  import numpy as np
  import tensorflow as tf
  from tqdm import trange

  import pixel_cnn_pp.nn as nn
  import pixel_cnn_pp.plotting as plotting
  from pixel_cnn_pp import model as pxpp_models
  import data.cifar10_data as cifar10_data
  import data.imagenet_data as imagenet_data

  import tf_utils as tfu

  comm = MPI.COMM_WORLD
  num_tasks, task_id = comm.Get_size(), comm.Get_rank()
  save_dir = args.save_dir

  if task_id == 0:
    os.makedirs(save_dir)
    f_log = open(os.path.join(save_dir, 'print.log'), 'w')

  def lprint(*a, **kw):
    if task_id == 0:
      print(*a, **kw)
      print(*a, **kw, file=f_log)

  lprint('input args:\n', json.dumps(vars(args), indent=4,
                                     separators=(',', ':')))  # pretty print args
  # -----------------------------------------------------------------------------
  # fix random seed for reproducibility
  rng = np.random.RandomState(args.seed + task_id)
  tf.set_random_seed(args.seed + task_id)

  # initialize data loaders for train/test splits
  if args.data_set == 'imagenet' and args.class_conditional:
    raise("We currently don't have labels for the small imagenet data set")
  DataLoader = {'cifar': cifar10_data.DataLoader,
                'imagenet': imagenet_data.DataLoader}[args.data_set]
  train_data = DataLoader(args.data_dir, 'train', args.batch_size,
                          rng=rng, shuffle=True, return_labels=args.class_conditional)
  test_data = DataLoader(args.data_dir, 'test', args.batch_size,
                         shuffle=False, return_labels=args.class_conditional)
  obs_shape = train_data.get_observation_size()  # e.g. a tuple (32,32,3)
  assert len(obs_shape) == 3, 'assumed right now'

  if args.nr_gpu is None:
    from tensorflow.python.client import device_lib
    args.nr_gpu = len([d for d in device_lib.list_local_devices()
                       if d.device_type == 'GPU'])

  # data place holders
  x_init = tf.placeholder(tf.float32,
                          shape=(args.init_batch_size,) + obs_shape)
  xs = [tf.placeholder(tf.float32, shape=(args.batch_size, ) + obs_shape)
        for _ in range(args.nr_gpu)]

  def _get_batch(is_training):
    if is_training:
      x = train_data.__next__(args.batch_size)
    else:
      x = test_data.__next__(args.batch_size)
    x = np.cast[np.float32]((x - 127.5) / 127.5)
    return dict(x=x)

  batch_def = dict(x=tfu.vdef(args.batch_size, obs_shape))
  qr = tfu.Struct(
      train=tfu.PyfuncRunner(batch_def, 64, 8, True,
                             _get_batch, is_training=True),
      test=tfu.PyfuncRunner(batch_def, 64, 8, True,
                            _get_batch, is_training=False),
  )
  tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, qr.train)
  tf.add_to_collection(tf.GraphKeys.QUEUE_RUNNERS, qr.test)

  if args.nr_gpu is None:
    from tensorflow.python.client import device_lib
    args.nr_gpu = len([d for d in device_lib.list_local_devices()
                       if d.device_type == 'GPU'])

  sess = tfu.Session(allow_soft_placement=True).__enter__()

  # if the model is class-conditional we'll set up label placeholders +
  # one-hot encodings 'h' to condition on
  if args.class_conditional:
    raise NotImplementedError
    num_labels = train_data.get_num_labels()
    y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
    h_init = tf.one_hot(y_init, num_labels)
    y_sample = np.split(
        np.mod(np.arange(args.batch_size), num_labels), args.nr_gpu)
    h_sample = [tf.one_hot(tf.Variable(y_sample[i], trainable=False), num_labels)
                for i in range(args.nr_gpu)]
    ys = [tf.placeholder(tf.int32, shape=(args.batch_size,))
          for i in range(args.nr_gpu)]
    hs = [tf.one_hot(ys[i], num_labels) for i in range(args.nr_gpu)]
  else:
    h_init = None
    h_sample = [None] * args.nr_gpu
    hs = h_sample

  # create the model
  model_opt = {'nr_resnet': args.nr_resnet, 'nr_filters': args.nr_filters,
               'nr_logistic_mix': args.nr_logistic_mix, 'resnet_nonlinearity': args.resnet_nonlinearity}
  model = tf.make_template('model', getattr(pxpp_models, args.model + "_spec"))

  # run once for data dependent initialization of parameters
  with tf.device('/gpu:0'):
    gen_par = model(x_init, h_init, init=True,
                    dropout_p=args.dropout_p, **model_opt)

  # keep track of moving average
  all_params = tf.trainable_variables()
  lprint('# of Parameters', sum(np.prod(p.get_shape().as_list())
                                for p in all_params))
  ema = tf.train.ExponentialMovingAverage(decay=args.polyak_decay)
  maintain_averages_op = tf.group(ema.apply(all_params))

  loss_gen, loss_gen_test, grads = [], [], []
  for i in range(args.nr_gpu):
    with tf.device('/gpu:%d' % i):
      x = qr.train.batch().x
      gen_par = model(x, hs[i], ema=None,
                      dropout_p=args.dropout_p, **model_opt)
      if isinstance(gen_par, tuple) and len(gen_par) == 3:
        loss_gen.append(nn.discretized_mix_logistic_loss_per_chn(x, *gen_par))
      else:
        loss_gen.append(nn.discretized_mix_logistic_loss(x, gen_par))
      grads.append(tf.gradients(loss_gen[i], all_params))

      x = qr.test.batch().x
      gen_par = model(x, hs[i], ema=ema, dropout_p=0., **model_opt)
      if isinstance(gen_par, tuple) and len(gen_par) == 3:
        loss_gen_test.append(
            nn.discretized_mix_logistic_loss_per_chn(x, *gen_par))
      else:
        loss_gen_test.append(nn.discretized_mix_logistic_loss(x, gen_par))

  # add losses and gradients together and get training updates
  tf_lr = tf.placeholder(tf.float32, shape=[])
  with tf.device('/gpu:0'):
    for i in range(1, args.nr_gpu):
      loss_gen[0] += loss_gen[i]
      loss_gen_test[0] += loss_gen_test[i]
      for j in range(len(grads[0])):
        grads[0][j] += grads[i][j]

  if num_tasks > 1:
    lprint('creating mpi optimizer')
    # If we have multiple mpi processes, average across them.
    flat_grad = tf.concat([tf.reshape(g, (-1,)) for g in grads[0]], axis=0)
    shapes = [g.shape.as_list() for g in grads[0]]
    sizes = [int(np.prod(s)) for s in shapes]
    buf = np.zeros(sum(sizes), np.float32)

    def _gather_grads(my_flat_grad):
      comm.Allreduce(my_flat_grad, buf, op=MPI.SUM)
      np.divide(buf, float(num_tasks), out=buf)
      return buf

    avg_flat_grad = tf.py_func(_gather_grads, [flat_grad], tf.float32)
    avg_flat_grad.set_shape(flat_grad.shape)
    avg_grads = tf.split(avg_flat_grad, sizes, axis=0)
    grads[0] = [tf.reshape(g, v.shape) for g, v in zip(avg_grads, grads[0])]

  # training op
  optimizer = tf.group(nn.adam_updates(
      all_params, grads[0], lr=tf_lr, mom1=0.95, mom2=0.9995, eps=1e-6), maintain_averages_op)

  # convert loss to bits/dim
  total_gpus = sum(comm.allgather(args.nr_gpu))
  lprint('using %d gpus across %d machines' % (total_gpus, num_tasks))
  norm_const = np.log(2.) * np.prod(obs_shape) * args.batch_size
  norm_const *= total_gpus / num_tasks
  bits_per_dim = loss_gen[0] / norm_const
  bits_per_dim_test = loss_gen_test[0] / norm_const

  bits_per_dim = tf.check_numerics(bits_per_dim, 'train loss is nan')
  bits_per_dim_test = tf.check_numerics(bits_per_dim_test, 'test loss is nan')

  new_x_gen = []
  for i in range(args.nr_gpu):
    with tf.device('/gpu:%d' % i):
      gen_par = model(xs[i], hs[i], ema=ema, dropout_p=0, **model_opt)
      new_x_gen.append(
          nn.sample_from_discretized_mix_logistic(gen_par, args.nr_logistic_mix))

  def sample_from_model(sess, n_samples=args.nr_gpu * args.batch_size):
    sample_x = np.zeros((0,) + obs_shape, dtype=np.float32)
    while len(sample_x) < n_samples:
      x_gen = [np.zeros((args.batch_size,) + obs_shape, dtype=np.float32)
               for i in range(args.nr_gpu)]
      for yi in range(obs_shape[0]):
        for xi in range(obs_shape[1]):
          new_x_gen_np = sess.run(new_x_gen,
                                  {xs[i]: x_gen[i] for i in range(args.nr_gpu)})
          for i in range(args.nr_gpu):
            x_gen[i][:, yi, xi, :] = new_x_gen_np[i][:, yi, xi, :]

      sample_x = np.concatenate([sample_x] + x_gen, axis=0)

    img_tile = plotting.img_tile(
        sample_x[:int(np.floor(np.sqrt(n_samples))**2)],
        aspect_ratio=1.0, border_color=1.0, stretch=True)
    img = plotting.plot_img(img_tile, title=args.data_set + ' samples')
    plotting.plt.savefig(
        os.path.join(save_dir, '%s_samples.png' % args.data_set))
    np.save(os.path.join(save_dir, '%s_samples.npy' % args.data_set), sample_x)
    plotting.plt.close('all')

  # init & save
  initializer = tf.global_variables_initializer()
  saver = tf.train.Saver()

  # turn numpy inputs into feed_dict for use with tensorflow
  def make_feed_dict(data, init=False):
    if type(data) is tuple:
      x, y = data
    else:
      x = data
      y = None
    # input to pixelCNN is scaled from uint8 [0,255] to float in range [-1,1]
    x = np.cast[np.float32]((x - 127.5) / 127.5)
    if init:
      feed_dict = {x_init: x}
      if y is not None:
        feed_dict.update({y_init: y})
    else:
      x = np.split(x, args.nr_gpu)
      feed_dict = {xs[i]: x[i] for i in range(args.nr_gpu)}
      if y is not None:
        y = np.split(y, args.nr_gpu)
        feed_dict.update({ys[i]: y[i] for i in range(args.nr_gpu)})
    return feed_dict

  # //////////// perform training //////////////
  lprint('dataset size: %d' % len(train_data.data))
  test_bpd = []
  lr = args.learning_rate

  # manually retrieve exactly init_batch_size examples
  feed_dict = make_feed_dict(train_data.next(args.init_batch_size), init=True)
  train_data.reset()  # rewind the iterator back to 0 to do one full epoch
  lprint('initializing the model...')

  sess.run(initializer, feed_dict)
  if args.load_params:
    # ckpt_file = save_dir + '/params_' + args.data_set + '.ckpt'
    ckpt_file = args.load_params
    lprint('restoring parameters from', ckpt_file)
    saver.restore(sess, ckpt_file)

  # Sync params before starting.
  my_vals = sess.run(all_params)
  vals = [np.zeros_like(v) for v in my_vals]
  [comm.Allreduce(mv, v, op=MPI.SUM) for mv, v in zip(my_vals, vals)]
  assign_ops = [var.assign(val / num_tasks)
                for var, val in zip(all_params, vals)]
  sess.run(assign_ops)

  coord = tfu.start_queue_runners(sess)
  batch_size = args.batch_size * total_gpus
  iters_per_train_epoch = len(train_data.data) // batch_size
  iters_per_test_epoch = len(test_data.data) // batch_size

  lprint('starting training')
  for epoch in range(args.max_epochs):
    begin = time.time()
    # train for one epoch
    train_losses = []

    ti = trange(iters_per_train_epoch)
    for itr in ti:
      if coord.should_stop():
        tfu.stop_queue_runners(coord)

      # forward/backward/update model on each gpu
      lr *= args.lr_decay
      l, _ = sess.run([bits_per_dim, optimizer], {tf_lr: lr})
      train_losses.append(l)
      ti.set_postfix(loss=l, lr=lr)

    train_loss_gen = np.mean(train_losses)

    # compute likelihood over test data
    test_losses = []
    for itr in trange(iters_per_test_epoch):
      if coord.should_stop():
        tfu.stop_queue_runners(coord)

      l = sess.run(bits_per_dim_test)
      test_losses.append(l)

    test_loss_gen = np.mean(test_losses)
    test_bpd.append(test_loss_gen)

    # log progress to console
    stats = dict(epoch=epoch, time=time.time() - begin, lr=lr,
                 train_bpd=train_loss_gen,
                 test_bpd=test_loss_gen)
    all_stats = comm.gather(stats)
    if task_id == 0:
      lprint('-' * 16)
      for k in stats:
        lprint('%s:\t%s' % (k, np.mean([s[k] for s in all_stats])))
      if epoch % args.save_interval == 0:
        path = os.path.join(save_dir, str(epoch))
        os.makedirs(path, exist_ok=True)
        saver.save(sess, os.path.join(path, 'params_%s.ckpt' % args.data_set))

    sample_from_model(sess)
Beispiel #6
0
def main():
    # initialize data loaders for train/test splits
    if args.data_set == 'imagenet' and args.class_conditional:
        raise("We currently don't have labels for the small imagenet data set")
    if args.data_set == 'cifar':
        import data.cifar10_data as cifar10_data
        DataLoader = cifar10_data.DataLoader
    elif args.data_set == 'imagenet':
        import data.imagenet_data as imagenet_data
        DataLoader = imagenet_data.DataLoader
    else:
        raise("unsupported dataset")
    train_data = DataLoader(args.data_dir, 'train', args.batch_size * args.nr_gpu, rng=rng, shuffle=True, return_labels=args.class_conditional)
    test_data = DataLoader(args.data_dir, 'test', args.batch_size * args.nr_gpu, shuffle=False, return_labels=args.class_conditional)
    obs_shape = train_data.get_observation_size() # e.g. a tuple (32,32,3)
    assert len(obs_shape) == 3, 'assumed right now'

    # data place holders
    x_init = tf.placeholder(tf.float32, shape=(args.init_batch_size,) + obs_shape)
    xs = [tf.placeholder(tf.float32, shape=(args.batch_size, ) + obs_shape) for i in range(args.nr_gpu)]

    # if the model is class-conditional we'll set up label placeholders + one-hot encodings 'h' to condition on
    if args.class_conditional:
        num_labels = train_data.get_num_labels()
        y_init = tf.placeholder(tf.int32, shape=(args.init_batch_size,))
        h_init = tf.one_hot(y_init, num_labels)
        y_sample = np.split(np.mod(np.arange(args.batch_size*args.nr_gpu), num_labels), args.nr_gpu)
        h_sample = [tf.one_hot(tf.Variable(y_sample[i], trainable=False), num_labels) for i in range(args.nr_gpu)]
        ys = [tf.placeholder(tf.int32, shape=(args.batch_size,)) for i in range(args.nr_gpu)]
        hs = [tf.one_hot(ys[i], num_labels) for i in range(args.nr_gpu)]
    else:
        h_init = None
        h_sample = [None] * args.nr_gpu
        hs = h_sample

    # create the model
    model_opt = { 'nr_resnet': args.nr_resnet, 'nr_filters': args.nr_filters, 'nr_logistic_mix': args.nr_logistic_mix, 'resnet_nonlinearity': args.resnet_nonlinearity}
    model = tf.make_template('model', model_spec)

    # run once for data dependent initialization of parameters
    data_dependent_init = model(x_init, h_init, init=True, dropout_p=args.dropout_p, **model_opt)

    # keep track of moving average
    all_params = tf.trainable_variables()
    ema = tf.train.ExponentialMovingAverage(decay=args.polyak_decay)
    maintain_averages_op = tf.group(ema.apply(all_params))
    ema_params = [ema.average(p) for p in all_params]

    # get loss gradients over multiple GPUs + sampling
    grads = []
    loss_gen = []
    loss_gen_test = []
    new_x_gen = []
    for i in range(args.nr_gpu):
        with tf.device('/gpu:%d' % i):
            if args.graph_cloning and i>0:
                # already defined the graph once, use it again via template rather than redefining again
                in_ = [xs[i]] + tf.global_variables()
                res = gpu_template.apply(in_)
                loss_train, loss_test, sx = res[:3]
                grad = res[3:]

                loss_gen.append(loss_train)
                loss_gen_test.append(loss_test)
                new_x_gen.append(sx)
                grads.append(grad)

            else:
                # train
                out = model(xs[i], hs[i], ema=None, dropout_p=args.dropout_p, **model_opt)
                loss_gen.append(nn.discretized_mix_logistic_loss(tf.stop_gradient(xs[i]), out))

                # gradients
                grads.append(gradients(loss_gen[i], all_params))

                # test
                out = model(xs[i], hs[i], ema=ema, dropout_p=0., **model_opt)
                loss_gen_test.append(nn.discretized_mix_logistic_loss(xs[i], out))

                # sample
                out = model(xs[i], h_sample[i], ema=ema, dropout_p=0, **model_opt)
                new_x_gen.append(nn.sample_from_discretized_mix_logistic(out, args.nr_logistic_mix))

                if args.graph_cloning:
                    in_ = [xs[0]] + tf.global_variables()
                    out_ = [loss_gen[0], loss_gen_test[0], new_x_gen[0]] + grads[0]
                    gpu_template = GraphTemplate(in_, outputs=out_)

    # add losses and gradients together and get training updates
    tf_lr = tf.placeholder(tf.float32, shape=[])
    with tf.device('/gpu:0'):
        for i in range(1,args.nr_gpu):
            loss_gen[0] += loss_gen[i]
            loss_gen_test[0] += loss_gen_test[i]
            for j in range(len(grads[0])):
                grads[0][j] += grads[i][j]
        # training op
        optimizer = tf.group(nn.adam_updates(all_params, grads[0], lr=tf_lr, mom1=0.95, mom2=0.9995), maintain_averages_op)

    # convert loss to bits/dim
    bits_per_dim = loss_gen[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)
    bits_per_dim_test = loss_gen_test[0]/(args.nr_gpu*np.log(2.)*np.prod(obs_shape)*args.batch_size)

    # sample from the model
    def sample_from_model(sess):
        x_gen = [np.zeros((args.batch_size,) + obs_shape, dtype=np.float32) for i in range(args.nr_gpu)]
        for yi in range(obs_shape[0]):
            for xi in range(obs_shape[1]):
                new_x_gen_np = sess.run(new_x_gen, {xs[i]: x_gen[i] for i in range(args.nr_gpu)})
                for i in range(args.nr_gpu):
                    x_gen[i][:,yi,xi,:] = new_x_gen_np[i][:,yi,xi,:]
        return np.concatenate(x_gen, axis=0)

    # turn numpy inputs into feed_dict for use with tensorflow
    def make_feed_dict(data, init=False):
        if type(data) is tuple:
            x,y = data
        else:
            x = data
            y = None
        x = np.cast[np.float32]((x - 127.5) / 127.5) # input to pixelCNN is scaled from uint8 [0,255] to float in range [-1,1]
        if init:
            feed_dict = {x_init: x}
            if y is not None:
                feed_dict.update({y_init: y})
        else:
            x = np.split(x, args.nr_gpu)
            feed_dict = {xs[i]: x[i] for i in range(args.nr_gpu)}
            if y is not None:
                y = np.split(y, args.nr_gpu)
                feed_dict.update({ys[i]: y[i] for i in range(args.nr_gpu)})
        return feed_dict

    # //////////// perform training //////////////
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)
    print('starting training')
    test_bpd = []
    lr = args.learning_rate
    saver = tf.train.Saver()
    with tf.Session() as sess:
        for epoch in range(args.max_epochs):
            begin = time.time()

            # init
            if epoch == 0:
                feed_dict = make_feed_dict(train_data.next(args.init_batch_size), init=True) # manually retrieve exactly init_batch_size examples
                train_data.reset()  # rewind the iterator back to 0 to do one full epoch
                print('initializing the model...')
                sess.run(tf.global_variables_initializer())
                sess.run(data_dependent_init, feed_dict)

            # train for one epoch
            train_losses = []
            counter = 0
            for d in train_data:
                counter+=1
                feed_dict = make_feed_dict(d)
                # forward/backward/update model on each gpu
                lr *= args.lr_decay
                feed_dict.update({ tf_lr: lr })
                l,_ = sess.run([bits_per_dim, optimizer], feed_dict)
                print(counter, l)
                train_losses.append(l)
                if counter>50:
                    if l>6.5:
                        assert False, "Test failed, expected loss 6.28537 at iteration 50"
                    else:
                        print("Test passed, loss %f (expected %f)"%(l, 6.28537))
                        sys.exit()
            train_loss_gen = np.mean(train_losses)

            # compute likelihood over test data
            test_losses = []
            for d in test_data:
                feed_dict = make_feed_dict(d)
                l = sess.run(bits_per_dim_test, feed_dict)
                test_losses.append(l)
            test_loss_gen = np.mean(test_losses)
            test_bpd.append(test_loss_gen)

            # log progress to console
            print("Iteration %d, time = %ds, train bits_per_dim = %.4f, test bits_per_dim = %.4f" % (epoch, time.time()-begin, train_loss_gen, test_loss_gen))
            sys.stdout.flush()

            if epoch % args.save_interval == 0:

                # generate samples from the model
                sample_x = []
                for i in range(args.num_samples):
                    sample_x.append(sample_from_model(sess))
                sample_x = np.concatenate(sample_x,axis=0)
                #img_tile = plotting.img_tile(sample_x[:100], aspect_ratio=1.0, border_color=1.0, stretch=True)
                #img = plotting.plot_img(img_tile, title=args.data_set + ' samples')
                #plotting.plt.savefig(os.path.join(args.save_dir,'%s_sample%d.png' % (args.data_set, epoch)))
                #plotting.plt.close('all')
                np.savez(os.path.join(args.save_dir,'%s_sample%d.npz' % (args.data_set, epoch)), sample_x)

                # save params
                saver.save(sess, args.save_dir + '/params_' + args.data_set + '.ckpt')
                np.savez(args.save_dir + '/test_bpd_' + args.data_set + '.npz', test_bpd=np.array(test_bpd))
Beispiel #7
0
        # test
        gen_par = model(xs[i], ema=ema, dropout_p=0., **model_opt)
        loss_gen_test.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))

# add gradients together and get training updates
tf_lr = tf.placeholder(tf.float32, shape=[])
with tf.device('/gpu:0'):
    for i in range(1, args.nr_gpu):
        loss_gen[0] += loss_gen[i]
        loss_gen_test[0] += loss_gen_test[i]
        for j in range(len(grads[0])):
            grads[0][j] += grads[i][j]
    # training op
    optimizer = nn.adam_updates(all_params,
                                grads[0],
                                lr=tf_lr,
                                mom1=0.95,
                                mom2=0.9995)

# convert loss to bits/dim
bits_per_dim = loss_gen[0] / (args.nr_gpu * np.log(2.) * np.prod(obs_shape) *
                              args.batch_size)
bits_per_dim_test = loss_gen_test[0] / (args.nr_gpu * np.log(2.) *
                                        np.prod(obs_shape) * args.batch_size)

# init & save
initializer = tf.initialize_all_variables()
saver = tf.train.Saver()


# input to pixelCNN is scaled from uint8 [0,255] to float in range [-1,1]
Beispiel #8
0
        # gradients
        grads.append(tf.gradients(loss_gen[i], all_params))
        # test
        gen_par = model(xs[i], hs[i], ema=ema, dropout_p=0., **model_opt)
        loss_gen_test.append(nn.discretized_mix_logistic_loss(xs[i], gen_par))

# add losses and gradients together and get training updates
tf_lr = tf.placeholder(tf.float32, shape=[])
with tf.device('/gpu:0'):
    for i in range(1, args.nr_gpu):
        loss_gen[0] += loss_gen[i]
        loss_gen_test[0] += loss_gen_test[i]
        for j in range(len(grads[0])):
            grads[0][j] += grads[i][j]
    # training op
    optimizer = tf.group(nn.adam_updates(
        all_params, grads[0], lr=tf_lr, mom1=0.95, mom2=0.9995), maintain_averages_op)

# convert loss to bits/dim
bits_per_dim = loss_gen[
    0] / (args.nr_gpu * np.log(2.) * np.prod(obs_shape) * args.batch_size)
bits_per_dim_test = loss_gen_test[
    0] / (args.nr_gpu * np.log(2.) * np.prod(obs_shape) * args.batch_size)

# sample from the model
new_x_gen = []
for i in range(args.nr_gpu):
    with tf.device('/gpu:%d' % i):
        gen_par = model(xs[i], h_sample[i], ema=ema, dropout_p=0, **model_opt)
        new_x_gen.append(nn.sample_from_discretized_mix_logistic(
            gen_par, args.nr_logistic_mix))