def train(self): # train baseline model input_ph = tf.placeholder(shape=[batch_size, 28, 28, 1], dtype=tf.float32) label_ph = tf.placeholder(shape=[ batch_size, ], dtype=tf.int32) predict = self.forward(input_ph) loss_tensor = tf.reduce_mean(predict.sg_ce(target=label_ph)) # use to update network parameters optim = tf.sg_optim(loss_tensor, optim='Adam', lr=1e-3) # use saver to save a new model saver = tf.train.Saver() sess = tf.Session() with tf.sg_queue_context(sess): # inital tf.sg_init(sess) # validation acc = (predict.sg_reuse( input=Mnist.valid.image).sg_softmax().sg_accuracy( target=Mnist.valid.label, name='validation')) tf.sg_train(loss=loss, eval_metric=[acc], max_ep=max_ep, save_dir=save_dir, ep_size=Mnist.train.num_batch, log_interval=10)
def start_training(self): tf.sg_train(optim='Adam', lr=0.0001, loss=self.reduced_loss, eval_metric=[self.reduced_loss], ep_size=self.num_batch, save_dir='save/train/small', max_ep=self.max_ep, early_stop=False)
def train(): g = ModelGraph() print "Graph loaded!" tf.sg_train(log_interval=10, loss=g.loss, eval_metric=[g.acc], max_ep=5, save_dir='asset/train', early_stop=False, max_keep=10)
def main(): g = Graph() tf.sg_train(log_interval=10, lr_reset=True, loss=g.reduced_loss, eval_metric=[g.acc_train, g.acc_val], ep_size=g.num_batch, save_dir='asset/train', max_ep=100, early_stop=False)
def main(): g = Graph() print("Graph Loaded") tf.sg_train(optim="Adam", lr=0.0001, lr_reset=True, loss=g.reduced_loss, ep_size=g.num_batch, save_dir='asset/train', max_ep=10, early_stop=False)
def train(): g = ModelGraph() print "Graph loaded!" tf.sg_train(lr=0.00001, lr_reset=True, log_interval=10, loss=g.ce, eval_metric=[g.acc], max_ep=5, save_dir='asset/train', early_stop=False, max_keep=5)
def train(): g = ModelGraph() print("Graph loaded!") tf.sg_train(optim="Adam", lr=0.00001, lr_reset=True, loss=g.reduced_loss, eval_metric=[], max_ep=20000, save_dir='asset/train', early_stop=False, ep_size=g.num_batch)
def train(): g = ModelGraph() print("Graph loaded!") if Hyperparams.isqwerty: save_dir = 'qwerty/asset/train' else: save_dir = 'nine/asset/train' tf.sg_train(lr=0.0001, lr_reset=True, log_interval=10, loss=g.reduced_loss, max_ep=50, save_dir=save_dir, early_stop=False, max_keep=5, ep_size=g.num_batch)
# inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append(tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) # parallel loss tower @tf.sg_parallel def get_loss(opt): # encode audio feature logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size) # CTC loss return logit.sg_ctc(target=opt.target[opt.gpu_index], seq_len=opt.seq_len[opt.gpu_index]) # # train # tf.sg_train(lr=0.0001, loss=get_loss(input=inputs, target=labels, seq_len=seq_len), ep_size=data.num_batch, max_ep=50)
with tf.sg_context(name='encoder', size=4, stride=2, act='relu'): mu = (x.sg_conv(dim=64).sg_conv(dim=128).sg_flatten().sg_dense( dim=1024).sg_dense(dim=num_dim, act='linear')) # re-parameterization trick with random gaussian z = mu + tf.random_normal(mu.get_shape()) # decoder network with tf.sg_context(name='decoder', size=4, stride=2, act='relu'): xx = (z.sg_dense(dim=1024).sg_dense(dim=7 * 7 * 128).sg_reshape( shape=(-1, 7, 7, 128)).sg_upconv(dim=64).sg_upconv(dim=1, act='sigmoid')) # add image summary tf.sg_summary_image(x, name='origin') tf.sg_summary_image(xx, name='recon') # loss loss_recon = xx.sg_mse(target=x, name='recon').sg_mean(axis=[1, 2, 3]) loss_kld = tf.square(mu).sg_sum(axis=1) / (28 * 28) tf.sg_summary_loss(loss_kld, name='kld') loss = loss_recon + loss_kld * 0.5 # do training tf.sg_train(loss=loss, log_interval=10, ep_size=data.train.num_batch, max_ep=30, early_stop=False, save_dir='asset/train/vae')
# simple wrapping function with decorator for parallel training @tf.sg_parallel def get_loss(opt): # conv layers with tf.sg_context(name='convs', act='relu', bn=True): conv = (opt.input[opt.gpu_index].sg_conv( dim=16, name='conv1').sg_pool().sg_conv( dim=32, name='conv2').sg_pool().sg_conv(dim=32, name='conv3').sg_pool()) # fc layers with tf.sg_context(name='fcs', act='relu', bn=True): logit = (conv.sg_flatten().sg_dense(dim=256, name='fc1').sg_dense(dim=10, act='linear', bn=False, name='fc2')) # cross entropy loss with logit return logit.sg_ce(target=opt.target[opt.gpu_index]) # parallel training ( same as single GPU training ) tf.sg_train(loss=get_loss(input=inputs, target=labels), ep_size=data.train.num_batch, log_interval=10, save_dir='asset/train/conv_par')
out = out.sg_conv1d(size=1, dim=dim, act='tanh', bn=True) # residual and skip output return out + tensor, out # expand dimension z = x.sg_conv1d(size=1, dim=num_dim, act='tanh', bn=True) # dilated conv block loop skip = 0 # skip connections for i in range(num_blocks): for r in [1, 2, 4, 8, 16]: z, s = res_block(z, size=7, rate=r) skip += s # final logit layers logit = (skip.sg_conv1d(size=1, act='tanh', bn=True).sg_conv1d(size=1, dim=voca_size)) # CTC loss loss = logit.sg_ctc(target=y, seq_len=seq_len) # train tf.sg_train(log_interval=30, lr=0.0001, loss=loss, ep_size=data.num_batch, max_ep=20, early_stop=False)
import sugartensor as tf __author__ = '*****@*****.**' # set log level to debug tf.sg_verbosity(10) # MNIST input tensor ( with QueueRunner ) data = tf.sg_data.Mnist() # inputs x = data.train.image y = data.train.label # create training graph with tf.sg_context(regularizer='l1'): logit = (x.sg_flatten().sg_dense(dim=400, act='relu', bn=True).sg_dense( dim=200, act='relu', bn=True).sg_dense(dim=10)) # cross entropy loss with logit ( for training set ) + L1 regularizer loss ( = sparsity regularizer ) loss = logit.sg_ce(target=y) + tf.sg_regularizer_loss(scale=1.) # train tf.sg_train(loss=loss, ep_size=data.train.num_batch, log_interval=10, save_dir='asset/train/l1_regul')
import sugartensor as tf # set log level to debug tf.sg_verbosity(10) # MNIST input tensor ( with QueueRunner ) data = tf.sg_data.Mnist() # inputs x = data.train.image y = data.train.label # create training graph logit = (x.sg_flatten().sg_dense(dim=400, act='relu', bn=True).sg_dense(dim=200, act='relu', bn=True).sg_dense(dim=10)) # cross entropy loss with logit ( for training set ) loss = logit.sg_ce(target=y) # accuracy evaluation ( for validation set ) acc = (logit.sg_reuse(input=data.valid.image).sg_softmax().sg_accuracy( target=data.valid.label, name='val')) # train tf.sg_train(loss=loss, eval_metric=[acc], ep_size=data.train.num_batch, log_interval=10)
# set log level to debug tf.sg_verbosity(10) # MNIST input tensor ( with QueueRunner ) data = tf.sg_data.Mnist() # inputs x = data.train.image y = data.train.label # create training graph with tf.sg_context(act='relu', bn=True): logit = (x.sg_conv(dim=16).sg_pool() .sg_conv(dim=32).sg_pool() .sg_conv(dim=32).sg_pool() .sg_flatten() .sg_dense(dim=256) .sg_dense(dim=10, act='linear', bn=False)) # cross entropy loss with logit ( for training set ) loss = logit.sg_ce(target=y) # accuracy evaluation ( for validation set ) acc = (logit.sg_reuse(input=data.valid.image).sg_softmax() .sg_accuracy(target=data.valid.label, name='val')) # train tf.sg_train(loss=loss, eval_metric=[acc], ep_size=data.train.num_batch, save_dir='asset/train/conv')
y_in = tf.concat([tf.zeros((batch_size, 1), tf.sg_intx), y[:, :-1]], axis=1) # vocabulary size voca_size = data.voca_size # make embedding matrix for source and target emb_x = tf.sg_emb(name='emb_x', voca_size=voca_size, dim=latent_dim) emb_y = tf.sg_emb(name='emb_y', voca_size=voca_size, dim=latent_dim) # latent from embed table z_x = x.sg_lookup(emb=emb_x) z_y = y_in.sg_lookup(emb=emb_y) # encode graph ( atrous convolution ) enc = encode(z_x) # concat merge target source enc = enc.sg_concat(target=z_y) # decode graph ( causal convolution ) dec = decode(enc, voca_size) # cross entropy loss with logit and mask loss = dec.sg_ce(target=y, mask=True) # train tf.sg_train(loss=loss, log_interval=30, lr=0.0001, ep_size=data.num_batch, max_ep=20)
# -*- coding: utf-8 -*- import sugartensor as tf # set log level to debug tf.sg_verbosity(10) # MNIST input tensor ( with QueueRunner ) data = tf.sg_data.Mnist() # inputs x = data.train.image.sg_squeeze() y = data.train.label # create training graph ( GRU + layer normalization ) logit = (x.sg_gru(dim=200, ln=True, last_only=True).sg_dense(dim=10)) # cross entropy loss with logit ( for training set ) loss = logit.sg_ce(target=y) # accuracy evaluation ( for validation set ) acc = (logit.sg_reuse(input=data.valid.image).sg_softmax().sg_accuracy( target=data.valid.label, name='val')) # train tf.sg_train(log_interval=10, loss=loss, eval_metric=[acc], ep_size=data.train.num_batch, save_dir='asset/train/rnn')
model = ResNet() inference_fn = model.inference # logit = model.inference(x) def my_loss(input_, labels, inference_fn, num_gpu=1): assert num_gpu >= 0 tower_loss = [] input_batch = tf.split(input_, num_gpu, axis=0) label_batch = tf.split(labels, num_gpu, axis=0) for i in range(num_gpu): with tf.device('/gpu:%d' % i): with tf.name_scope('gpu_%d' % i): reuse = False if i == 0 else True logit = inference_fn(input_batch[i], reuse=reuse) loss = tf.reduce_mean(tf.square(logit - label_batch[i])) tower_loss.append(loss) return tower_loss loss = tf.reduce_mean(logit.sg_ce(target=tf.cast(y, tf.int32))) sg.sg_train(loss=my_loss(input_=input_, labels=label, inference_fn=inference_fn, num_gpu=4), lr=0.0001, ep_size=data.num_batch, log_interval=10, save_dir='log', optim='sgd') # tf.sg_train(loss=my_loss(input_=input_, labels=label, inference_fn=inference_fn, num_gpu=4), ep_size=data.num_batch, max_ep=120)
#loss = logit.sg_ctc(target=y, seq_len=seq_len) reg_lambda = 0.0002 trainable = tf.trainable_variables() lossL2 = tf.add_n([tf.nn.l2_loss(v) for v in trainable]) * reg_lambda loss = logit.sg_ce(target=y, one_hot=True) + lossL2 # train config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=6, intra_op_parallelism_threads=6) sess = tf.Session(config=config) tf.sg_init(sess) learning_rate = tf.train.exponential_decay(0.00001, tf.sg_global_step(), 100, 0.95, staircase=False) with tf.name_scope('summaries'): tf.summary.scalar('global_step', tf.sg_global_step()) tf.summary.scalar('real_lr', learning_rate) tf.sg_train(log_interval=30, lr=learning_rate, loss=loss, ep_size=data.num_batch, max_ep=8, early_stop=False, lr_reset=True)
# encoder network with tf.sg_context(name='encoder', size=4, stride=2, act='relu'): z = (x .sg_conv(dim=64) .sg_conv(dim=128) .sg_flatten() .sg_dense(dim=1024) .sg_dense(dim=num_dim, act='linear')) # decoder network with tf.sg_context(name='decoder', size=4, stride=2, act='relu'): xx = (z .sg_dense(dim=1024) .sg_dense(dim=7*7*128) .sg_reshape(shape=(-1, 7, 7, 128)) .sg_upconv(dim=64) .sg_upconv(dim=1, act='sigmoid')) # add image summary tf.sg_summary_image(x, name='origin') tf.sg_summary_image(xx, name='recon') # loss loss = xx.sg_mse(target=x) # do training tf.sg_train(loss=loss, log_interval=10, ep_size=data.train.num_batch, save_dir='asset/train/sae')
# encode audio feature with tf.variable_scope("model"): logit_clean = get_logit(opt.input[opt.gpu_index], voca_size=voca_size) loss_clean = logit_clean.sg_ctc(target=opt.target[opt.gpu_index], seq_len=opt.seq_len[opt.gpu_index]) with tf.variable_scope("model", reuse=True): logit_noise = get_logit(opt.input_noise[opt.gpu_index], voca_size=voca_size) loss_noise = logit_noise.sg_ctc(target=opt.target[opt.gpu_index], seq_len=opt.seq_len[opt.gpu_index]) # CTC loss loss_penalize = penalize_loss(opt.gamma, opt.lambd, logit_clean, logit_noise) return loss_clean + opt.alpha * loss_noise + loss_penalize # # train # tf.sg_train(lr=0.0001, loss=get_loss(input=inputs, input_noise=inputs_noise, target=labels, seq_len=seq_len, alpha=1, gamma=0.01, lambd=0.01), ep_size=data.num_batch, max_ep=50)
# mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append( tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) # parallel loss tower @tf.sg_parallel def get_loss(opt): # encode audio feature logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size) # CTC loss return logit.sg_ctc(target=opt.target[opt.gpu_index], seq_len=opt.seq_len[opt.gpu_index]) # # train # tf.sg_train(lr=0.0001, loss=get_loss(input=inputs, target=labels, seq_len=seq_len), ep_size=data.num_batch, max_ep=50)
import tensorflow as tf import sugartensor as sg from input_data import Surv from model import MLP sg.sg_verbosity(10) batch_size = 16 data = Surv() x = data.data y = data.label model = MLP() logit = model.inference(x) loss = tf.reduce_mean(logit.sg_mse(target=tf.cast(y, tf.float32))) sg.sg_train(loss=loss, ep_size=data.num_batch, log_interval=10, save_dir='log')