def train(self): # train baseline model input_ph = tf.placeholder(shape=[batch_size, 28, 28, 1], dtype=tf.float32) label_ph = tf.placeholder(shape=[ batch_size, ], dtype=tf.int32) predict = self.forward(input_ph) loss_tensor = tf.reduce_mean(predict.sg_ce(target=label_ph)) # use to update network parameters optim = tf.sg_optim(loss_tensor, optim='Adam', lr=1e-3) # use saver to save a new model saver = tf.train.Saver() sess = tf.Session() with tf.sg_queue_context(sess): # inital tf.sg_init(sess) # validation acc = (predict.sg_reuse( input=Mnist.valid.image).sg_softmax().sg_accuracy( target=Mnist.valid.label, name='validation')) tf.sg_train(loss=loss, eval_metric=[acc], max_ep=max_ep, save_dir=save_dir, ep_size=Mnist.train.num_batch, log_interval=10)
def trainIt(): data = prepareData() x = data['train'][0] # x = data['train'] z = tf.random_normal((batch_size, rand_dim)) gen = generator(z) disc_real = discriminator(x) disc_fake = discriminator(gen) loss_d_r = disc_real.sg_mse(target=data['train'][1], name='disc_real') # loss_d_r = disc_real.sg_mse(target = tf.ones(batch_size), name = 'disc_real') loss_d_f = disc_fake.sg_mse(target=tf.zeros(batch_size), name='disc_fake') loss_d = (loss_d_r + loss_d_f) / 2 loss_g = disc_fake.sg_mse(target=tf.ones(batch_size), name='gen') # train_disc = tf.sg_optim(loss_d, lr=0.01, name = 'train_disc', category = 'discriminator') # discriminator train ops train_disc = tf.sg_optim(loss_d_r, lr=0.01, name='train_disc', category='discriminator') train_gen = tf.sg_optim(loss_g, lr=0.01, category='generator') # generator train ops @tf.sg_train_func def alt_train(sess, opt): if sess.run(tf.sg_global_step()) % 1 == 0: l_disc = sess.run([loss_d_r, train_disc])[0] # training discriminator else: l_disc = sess.run(loss_d) # l_gen = sess.run([loss_g, train_gen])[0] # training generator # print np.mean(l_gen) return np.mean(l_disc) #+ np.mean(l_gen) alt_train(log_interval=10, max_ep=25, ep_size=(1100 + 690) / batch_size, early_stop=False, save_dir='asset/train/gan', save_interval=10)
# categorical factor loss loss_c_r = cat_real.sg_ce(target=y, name='cat_real') loss_c_d = cat_fake.sg_ce(target=z_cat, name='cat_fake') loss_c = (loss_c_r + loss_c_d) / 2 # continuous factor loss loss_con = con_fake.sg_mse(target=z_con, name='con').sg_mean(dims=1) # # train ops # # discriminator train ops train_disc = tf.sg_optim(loss_d + loss_c + loss_con, lr=0.0001, category='discriminator') # generator train ops train_gen = tf.sg_optim(loss_g + loss_c + loss_con, lr=0.001, category='generator') # # training # # def alternate training func @tf.sg_train_func def alt_train(sess, opt): l_disc = sess.run([loss_d, train_disc])[0] # training discriminator
xx = tf.concat([x_real_pair, x_fake_pair], 0) with tf.sg_context(name='discriminator', size=4, stride=2, act='leaky_relu'): # discriminator part disc = (xx.sg_conv(dim=64).sg_conv(dim=128).sg_flatten().sg_dense( dim=1024).sg_dense(dim=1, act='linear').sg_squeeze()) # # loss and train ops # loss_disc = tf.reduce_mean(disc.sg_bce(target=y_disc)) # discriminator loss loss_gen = tf.reduce_mean( disc.sg_reuse(input=x_fake_pair).sg_bce(target=y)) # generator loss train_disc = tf.sg_optim(loss_disc, lr=0.0001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen, lr=0.001, category='generator') # generator train ops # # training # # def alternate training func @tf.sg_train_func def alt_train(sess, opt): l_disc = sess.run([loss_disc, train_disc])[0] # training discriminator l_gen = sess.run([loss_gen, train_gen])[0] # training generator return np.mean(l_disc) + np.mean(l_gen)
disc = shared.sg_dense(dim=1, act='linear').sg_squeeze() # categorical recognizer end recog_cat = recog_shared.sg_dense(dim=num_category, act='linear') # continuous recognizer end recog_cont = recog_shared.sg_dense(dim=num_cont, act='sigmoid') # # loss and train ops # loss_disc = tf.reduce_mean(disc.sg_bce(target=y_disc)) # discriminator loss loss_gen = tf.reduce_mean(disc.sg_reuse(input=gen).sg_bce(target=y)) # generator loss loss_recog = tf.reduce_mean(recog_cat.sg_ce(target=z_cat)) \ + tf.reduce_mean(recog_cont.sg_mse(target=z_cont)) # recognizer loss train_disc = tf.sg_optim(loss_disc + loss_recog, lr=0.0001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen + loss_recog, lr=0.001, category='generator') # generator train ops # # training # # def alternate training func @tf.sg_train_func def alt_train(sess, opt): l_disc = sess.run([loss_disc, train_disc])[0] # training discriminator l_gen = sess.run([loss_gen, train_gen])[0] # training generator return np.mean(l_disc) + np.mean(l_gen)
def classifier_train(**kwargs): r"""Trains the model. Args: **kwargs: optim: A name for optimizer. 'MaxProp' (default), 'AdaMax', 'Adam', 'RMSProp' or 'sgd'. loss: A 0-D `Tensor` containing the value to minimize. lr: A Python Scalar (optional). Learning rate. Default is .001. beta1: A Python Scalar (optional). Default is .9. beta2: A Python Scalar (optional). Default is .99. save_dir: A string. The root path to which checkpoint and log files are saved. Default is `asset/train`. max_ep: A positive integer. Maximum number of epochs. Default is 1000. ep_size: A positive integer. Number of Total batches in an epoch. For proper display of log. Default is 1e5. save_interval: A Python scalar. The interval of saving checkpoint files. By default, for every 600 seconds, a checkpoint file is written. log_interval: A Python scalar. The interval of recoding logs. By default, for every 60 seconds, logging is executed. max_keep: A positive integer. Maximum number of recent checkpoints to keep. Default is 5. keep_interval: A Python scalar. How often to keep checkpoints. Default is 1 hour. category: Scope name or list to train eval_metric: A list of tensors containing the value to evaluate. Default is []. tqdm: Boolean. If True (Default), progress bars are shown. If False, a series of loss will be shown on the console. """ opt = tf.sg_opt(kwargs) assert opt.loss is not None, 'loss is mandatory.' # default training options opt += tf.sg_opt(optim='MaxProp', lr=0.001, beta1=0.9, beta2=0.99, category='', ep_size=100000) # get optimizer train_op = tf.sg_optim(opt.loss, optim=opt.optim, lr=0.001, beta1=opt.beta1, beta2=opt.beta2, category=opt.category) # for console logging loss_ = opt.loss # use only first loss when multiple GPU case if isinstance(opt.loss, (tuple, list)): loss_ = opt.loss[0] # define train function # noinspection PyUnusedLocal @sg_train_func def train_func(sess, arg): return sess.run([loss_, train_op])[0] # run train function train_func(**opt)
def train_with_GP(self): input_ph = tf.placeholder(shape=[batch_size, 28, 28, 1], dtype=tf.float32) label_ph = tf.placeholder(shape=[ batch_size, ], dtype=tf.int32) predict = self.forward(input_ph) loss_tensor = tf.reduce_mean(predict.sg_ce(target=label_ph)) # use to update network parameters optim = tf.sg_optim(loss_tensor, optim='Adam', lr=1e-3) # use saver to save a new model saver = tf.train.Saver() sess = tf.Session() with tf.sg_queue_context(sess): # inital tf.sg_init(sess) # train by GP guilding for e in range(max_ep): previous_loss = None for i in range(Mnist.train.num_batch): [image_array, label_array ] = sess.run([Mnist.train.image, Mnist.train.label]) if (e == 0 or e == 1 ): # first and second epoch train no noisy image loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_array, label_ph: label_array })[0] print 'Baseline loss = ', loss elif ( e == 2 ): # third epoch train with gp image and original image gpIn1 = np.squeeze(image_array) gpIn2 = np.zeros((28, 28)) image_gp = GP(gpIn1, gpIn2, seed=0.8) image_gp2 = image_gp[np.newaxis, ...] image_gp2 = image_gp2[..., np.newaxis] loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_array, label_ph: label_array })[0] print 'GP without nosiy loss = ', loss loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_gp2, label_ph: label_array })[0] print 'GP loss = ', loss else: # other epoch train with gp evolution gpIn1 = np.squeeze(image_array) gpIn2 = np.zeros((28, 28)) image_gp = GP(gpIn1, gpIn2, seed=random.random()) image_gp2 = image_gp[np.newaxis, ...] image_gp2 = image_gp2[..., np.newaxis] loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_array, label_ph: label_array })[0] print 'GP without nosiy loss = ', loss loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_gp2, label_ph: label_array })[0] print 'GP loss = ', loss if loss < previous_loss: for i in range(5): loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_gp2, label_ph: label_array })[0] gpIn1 = image_gp2 image_gp2[0, :, :, 0] = GP(gpIn1[0, :, :, 0], gpIn2, seed=random.random()) print 'GP EV loss = ', loss previous_loss = loss saver.save(sess, os.path.join(save_dir, 'gp_model'), global_step=e) # close session sess.close()
def __init__(self): with tf.sg_context(name='generator'): self.x = tf.sg_initializer.he_uniform(name="x", shape=[1, 224, 224, 1]) # noise image self.y = tf.placeholder(dtype=tf.float32, shape=[1, 224, 224, 1]) # true target image with tf.sg_context(name='conv', act='relu'): self.x_conv1 = (self.x.sg_conv(dim=64).sg_conv().sg_pool() ) # (1, 112, 112, 64) self.x_conv2 = (self.x_conv1.sg_conv(dim=128).sg_conv().sg_pool() ) # (1, 56, 56, 128) self.x_conv3 = (self.x_conv2.sg_conv( dim=256).sg_conv().sg_conv().sg_conv().sg_pool() ) # (1, 28, 28, 256) self.x_conv4 = (self.x_conv3.sg_conv( dim=512).sg_conv().sg_conv().sg_conv().sg_pool() ) # (1, 14, 14, 512) # .sg_conv(dim=512) # .sg_conv() # .sg_conv() # .sg_conv() # .sg_pool()) self.y_conv1 = self.x_conv1.sg_reuse(input=self.y) self.y_conv2 = self.x_conv2.sg_reuse(input=self.y) self.y_conv3 = self.x_conv3.sg_reuse(input=self.y) self.y_conv4 = self.x_conv4.sg_reuse(input=self.y) # def get_gram_mat(tensor): ''' Arg: tensor: 4-D tensor. The first dimension must be 1. Returns: gram matrix. Read `https://en.wikipedia.org/wiki/Gramian_matrix` for details. 512 by 512. ''' assert tensor.get_shape( ).ndims == 4, "The tensor must be 4 dimensions." dim0, dim1, dim2, dim3 = tensor.get_shape().as_list() tensor = tensor.sg_reshape(shape=[dim0 * dim1 * dim2, dim3]) #(1*7*7, 512) # normalization: Why? Because the original value of gram mat. would be too huge. mean, variance = tf.nn.moments(tensor, [0, 1]) tensor = (tensor - mean) / tf.sqrt(variance + tf.sg_eps) tensor_t = tensor.sg_transpose(perm=[1, 0]) #(512, 1*7*7) gram_mat = tf.matmul(tensor_t, tensor) # (512, 512) return gram_mat # Loss: Add the loss of each layer self.mse = tf.squared_difference(get_gram_mat(self.x_conv1), get_gram_mat(self.y_conv1)).sg_mean() +\ tf.squared_difference(get_gram_mat(self.x_conv2), get_gram_mat(self.y_conv2)).sg_mean() +\ tf.squared_difference(get_gram_mat(self.x_conv3), get_gram_mat(self.y_conv3)).sg_mean() +\ tf.squared_difference(get_gram_mat(self.x_conv4), get_gram_mat(self.y_conv4)).sg_mean() self.train_gen = tf.sg_optim( self.mse, lr=0.0001, category='generator') # Note that we train only variable x.
# concat merge target source enc = enc.sg_concat(target=z_y) # decode graph ( causal convolution ) dec = decode(enc, voca_size) # cross entropy loss with logit and mask loss = dec.sg_ce(target=y, mask=True) #opt += tf.sg_opt(ep_size=data.num_batch) opt += tf.sg_opt(ep_size=100) # train opt += tf.sg_opt(loss=loss) # get optimizer train_op = sg_optim(opt.loss, optim=opt.optim, lr=lr, beta1=opt.beta1, beta2=opt.beta2, category=opt.category) # checkpoint saver saver = tf.train.Saver(max_to_keep=opt.max_keep, keep_checkpoint_every_n_hours=opt.keep_interval) # create supervisor sv = tf.train.Supervisor(logdir=opt.save_dir, saver=saver, save_model_secs=opt.save_interval, summary_writer=None, save_summaries_secs=opt.log_interval, global_step=tf.sg_global_step(), local_init_op=tf.sg_phase().assign(True))