def aux_classifier(self, inputs, labels, input_channels, is_training, scope=None): """ Auxiliary Classifier used in Inception Module to help propagate gradients backward. """ with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): # pooling layer with 5x5 kernel and stride 3 (new size: 4x4xC) network = tf.nn.avg_pool(inputs, 5, 3, 'VALID', name='pool') # convolution with 1x1 kernel and stride 1 (new size: 4x4x128) network = ops.convolution(network, input_channels, 128, 1, 128, batch_norm=False, is_training=is_training, scope='auxconv') # flatten (new size: 2048) network = ops.flatten(network, scope='flatten') # fully connected layer (new size: 1024) network = ops.dense(network, 2048, 1024, dropout=True, dropout_rate=0.7, is_training=is_training, scope='fc1') # output layer (new size: 10) -- Original Paper Size: 1000 (for ImageNet) network = ops.dense(network, 1024, 10, activation=None, is_training=is_training, scope='fc2') # loss of auxiliary classifier loss = ops.loss(network, labels, scope='auxloss') return loss
def build_model(self, inputs, labels, is_training): # pad inputs to size 224x224x3 - NOTE: may change to bilinear upsampling pad = int((self.image_size - self.height) / 2) inputs = tf.pad(inputs, [[0, 0], [pad, pad], [pad, pad], [0, 0]]) # convolution with 11x11 kernel and stride 4 (new size: 55x55x96) self.network = ops.convolution(inputs, self.channels, 96, 11, 96, stride=4, padding='VALID', is_training=is_training, scope='conv1') # pooling with 3x3 kernel and stride 2 (new size: 27x27x96) self.network = ops.pooling(self.network, k_size=3, scope='pool1') # convolution with 5x5 kernel and stride 1 (new size: 27x27x256) self.network = ops.convolution(self.network, 96, 256, 5, 256, is_training=is_training, scope='conv2') # pooling with 3x3 kernel and stride 2 (new size: 13x13x256) self.network = ops.pooling(self.network, k_size=3, scope='pool2') # convolution with 3x3 kernel and stride 1 (new size: 13x13x384) self.network = ops.convolution(self.network, 256, 384, 3, 384, batch_norm=False, is_training=is_training, scope='conv3') # convolution with 3x3 kernel and stride 1 (new size: 13x13x384) self.network = ops.convolution(self.network, 384, 384, 3, 384, batch_norm=False, is_training=is_training, scope='conv4') # convolution with 3x3 kernel and stride 1 (new size: 13x13x256) self.network = ops.convolution(self.network, 384, 256, 3, 256, batch_norm=False, is_training=is_training, scope='conv5') # pooling with 3x3 kernel and stride 2 (new size: 6x6x256) self.network = ops.pooling(self.network, k_size=3, scope='pool3') # flatten (new size: 9216) self.network = ops.flatten(self.network, scope='flatten') # fully connected layer (new size: 4096) self.network = ops.dense(self.network, 9216, 4096, dropout=True, dropout_rate=0.2, is_training=is_training, scope='fc1') # fully connected layer (new size: 1024) -- Original Paper Size: 4096 (for ImageNet) self.network = ops.dense(self.network, 4096, 1024, dropout=True, dropout_rate=0.2, is_training=is_training, scope='fc2') # output layer (new size: 10) -- Original Paper Size: 1000 (for ImageNet) self.network = ops.dense(self.network, 1024, 10, activation=None, is_training=is_training, scope='fc3') self.loss = ops.loss(self.network, labels, scope='loss') if is_training: self.optimizer = ops.optimize(self.loss, self.learning_rate, scope='update')
def __init__(self, lr=0.0001, optimizer=tf.train.Optimizer, fine_tuning=True, dropout=False, adaptive_ratio=1.0): ''' ----------Hyperparameters ------------- :param fine_tuning: If True, the parameters of CNN layers will also be fine-tuned. Otherwise, only the parameters of FC layers will be trained. :param dropout: If True, dropout is applied to all fully connected layers except for the last one. Also, dropout_keep_prob should be fed. (default value is 1.0) :param adaptive_ratio: If True, the learning rate of convolutional layer will be learning rate * adaptive_ratio :return: ''' self.desc = "Learning rate : {}, optimizer : {}, fine_tuning : {}, dropout : {}, adaptive ratio : {}"\ .format(lr, optimizer.__name__, fine_tuning, dropout, adaptive_ratio) print(self.desc) self.params = { 'lr': lr, 'optimizer': optimizer, 'fine_tuning': fine_tuning, 'dropout': dropout, 'adaptive_ratio': adaptive_ratio } self.xs = tf.placeholder(tf.float32, [None, 32, 32, 3]) self.ys = tf.placeholder(tf.int32, [None]) self.dropout_keep_prob = tf.placeholder_with_default(1.0, None) pool5 = self.build_convnet(fine_tuning) fc3 = self.build_fcnet(pool5, dropout) self.probs = tf.nn.softmax(fc3, name='softmax') self.loss = ops.loss(logits=self.probs, labels=self.ys, one_hot=False) self.accuracy = ops.accuracy(logits=self.probs, labels=self.ys, one_hot=False) if adaptive_ratio < 1.0: self.train = ops.train(self.loss, optimizer=optimizer, conv_lr=lr * adaptive_ratio, fc_lr=lr) else: self.train = optimizer(learning_rate=lr).minimize(self.loss)
def build_model(self, inputs, labels, is_training=False): self.network = ops.convolution(inputs, self.channels, 50, 5, 50, is_training=is_training, scope='conv1') self.network = ops.pooling(self.network, scope='pool1') self.network = ops.convolution(self.network, 50, 20, 5, 20, is_training=is_training, scope='conv2') self.network = ops.pooling(self.network, scope='pool2') self.network = ops.flatten(self.network, scope='flatten') self.network = ops.dense(self.network, self.network.get_shape().as_list()[1], 200, scope='fc1') self.network = ops.dense(self.network, 200, 50, scope='fc2') self.network = ops.dense(self.network, 50, 10, activation=None, scope='fc3') self.loss = ops.loss(self.network, labels, scope='loss') self.accuracy = ops.accuracy(self.network, labels, scope='accuracy') if is_training: self.optimizer = ops.optimize(self.loss, self.learning_rate, scope='update')
def train(args, data, params): train = data['train'] valid = data['valid'] learning_rate = args.learning_rate with tf.Graph().as_default(): input_ph = tf.placeholder(tf.int32, shape=[args.batch_size, params['gram_size']]) targ_ph = tf.placeholder(tf.int32, shape=[args.batch_size]) learning_rate_ph = tf.placeholder(tf.float32, shape=[]) if args.w2v: with h5py.File(args.w2v, 'r') as datafile: embeds = datafile['w2v'][:] scores, normalize_op, vars = ops.model(input_ph, params, embeds) else: scores, normalize_op, vars = ops.model(input_ph, params) loss = ops.loss(scores, targ_ph) train_op, print_op = ops.train(loss, learning_rate_ph, args) #sess = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=NUM_THREADS,\ # intra_op_parallelism_threads=NUM_THREADS)) sess = tf.Session() init = tf.initialize_all_variables( ) # initialize variables before they can be used saver = tf.train.Saver() sess.run(init) if args.modelfile: saver.restore(sess, args.modelfile) print "Model restored from %s" % args.modelfile valid_loss = 0. for i in xrange(valid.nbatches): valid_feed_dict = get_feed_dict(valid, i, input_ph, targ_ph, learning_rate_ph) batch_loss = sess.run([loss], feed_dict=valid_feed_dict)[0] valid_loss += batch_loss last_valid = valid_loss print 'Initial valid loss: %.3f' % math.exp( valid_loss / valid.nbatches) for epoch in xrange(args.nepochs): print "Training epoch %d with learning rate %.3f" % (epoch + 1, learning_rate) vals = sess.run(vars) start_time = time.time() train_loss = 0. valid_loss = 0. for i in xrange(train.nbatches): train_feed_dict = get_feed_dict(train, i, input_ph, targ_ph, \ learning_rate_ph, learning_rate) #grads = sess.run(print_op, feed_dict=train_feed_dict) _, batch_loss = sess.run([train_op, loss], feed_dict=train_feed_dict) train_loss += batch_loss for i in xrange(valid.nbatches): valid_feed_dict = get_feed_dict(valid, i, input_ph, targ_ph, learning_rate_ph) batch_loss = sess.run([loss], feed_dict=valid_feed_dict)[0] valid_loss += batch_loss if args.normalize: _ = sess.run(normalize_op) duration = time.time() - start_time print "\tloss = %.3f, valid ppl = %.3f, %.3f s" % \ (math.exp(train_loss/train.nbatches), \ math.exp(valid_loss/valid.nbatches), duration) if last_valid < valid_loss: learning_rate /= 2. elif args.outfile: saver.save(sess, args.outfile) if epoch >= args.decay_after: learning_rate /= 1.2 last_valid = valid_loss return sess.run([normalize_op ])[0] # return final normalized embeddings
weights = tf.Variable( tf.truncated_normal(shape=[num_hidden, num_classes], stddev=1.0 / math.sqrt(float(num_classes)))) biases = tf.Variable(tf.zeros([num_classes])) # Define TensorFlow Operations # =========================================================================== # Assigns values for each example across all classes corresponding to # likelihood that the sequence belongs to that class (logits are unscaled # values, not a probability distribution) logit = ops.inference(sequence, weights, biases, num_hidden) # Initialize tensorflow operations to train network: # Loss function: cross entropy loss = ops.loss(logit, target) # Add a scalar summary for the snapshot loss. tf.scalar_summary(loss.op.name, loss) # Gradient calculations: # Initialize gradient descent optimizer optimizer = ops.optimizer(learning_rate) # Step 1: Calculate gradient gradients = ops.calc_gradient(optimizer, loss) # Step 2: Calculate gradient norm for stopping criteria gradient_norm = ops.gradient_norm(gradients) # Step 3: Apply gradients and update model
def __init__(self, img_shape, model_path, logdir, sampledir, epochs=200, gen_lr=0.001, dis_lr=0.001, z_shape=100, batch_size=64, beta1=0.5, SampleAfter=100, SaveAfter=1000): ## Loading parameters so other methods can access them easily self.height, self.width, self.channels = img_shape self.epochs = epochs self.gen_lr = gen_lr self.dis_lr = dis_lr self.z_shape = z_shape #single integer value self.batch_size = batch_size self.beta1 = beta1 self.SampleAfter = SampleAfter self.SaveAfter = SaveAfter self.model_path = model_path self.logdir = logdir self.sampledir = sampledir # Initiating genrator and discriminator object self.genrator = genrator(img_shape, z_shape) self.discriminator = discriminator(img_shape) # Loading Dataset mnist = tf.keras.datasets.mnist (x_train, _), (x_test, _) = mnist.load_data() x_train = np.concatenate([x_train, x_test]) self.x_train = x_train / 127.5 - 1 ## Input placeholders self.in_x = tf.placeholder(tf.float32, [None, self.height, self.width]) self.in_z = tf.placeholder(tf.float32, [None, z_shape]) ## genrate images self.genrated = self.genrator.feed(self.in_z) ## Feeding both fake and real images into discriminator DisFake = self.discriminator.feed(self.genrated) DiscReal = self.discriminator.feed(self.in_x) ## Calculating loss , trying to predict genrated images as fake and real images as real FakeLoss = loss(tf.zeros_like(DisFake), DisFake) RealLoss = loss(tf.ones_like(DiscReal), DiscReal) #Defining genrator and discriminator loss self.DisLoss = tf.add(FakeLoss, RealLoss) self.GenLoss = loss(tf.ones_like(DisFake), DisFake) ## Adding summary for tensorboard visualization tf.summary.scalar("DisLos", self.DisLoss) #tf.summary.scalar("GenLoss",self.GenLoss) ## Seprating descriminator and genrator trainable variables TrainVar = tf.trainable_variables() DisVar = [var for var in TrainVar if 'DIS' in var.name] GenVar = [var for var in TrainVar if 'GEN' in var.name] self.DisOpt = tf.train.AdamOptimizer(self.dis_lr, self.beta1).minimize( self.DisLoss, var_list=DisVar) self.GenOpt = tf.train.AdamOptimizer(self.gen_lr, self.beta1).minimize( self.GenLoss, var_list=GenVar) self.SummaryOp = tf.summary.merge_all() self.saver = tf.train.Saver()
def build_model(self, inputs, labels, is_training): pad = int((self.image_size - self.height) / 2) inputs = tf.pad(inputs, [[0, 0], [pad, pad], [pad, pad], [0, 0]]) # convolution with 7x7 kernel and stride 2 (new size: 112x112x64) self.network = ops.convolution(inputs, self.channels, 64, 7, 64, stride=2, is_training=is_training, scope='conv1') # pooling with 3x3 kernel and stride 2 (new size: 56x56x64) self.network = ops.pooling(self.network, k_size=3, scope='pool1') # convolution with 1x1 kernel and stride 1 (new size: 56x56x192) self.network = ops.convolution(self.network, 64, 192, 1, 192, batch_norm=False, is_training=is_training, scope='conv2') # convolution with 3x3 kernel and stride 1 (new size: 56x56x192) self.network = ops.convolution(self.network, 192, 192, 3, 192, is_training=is_training, scope='conv3') # pooling with 3x3 kernel and stride 2 (new size: 28x28x192) self.network = ops.pooling(self.network, k_size=3, scope='pool2') # inception module (3a) self.network = self.inception_module(self.network, [[64, 96, 16], [128, 32, 32]], scope='incept1') # inception module (3b) self.network = self.inception_module(self.network, [[128, 128, 32], [192, 96, 64]], final_pool=True, scope='incept' + str(i)) # inception module (4a) self.network = self.inception_module(self.network, [[192, 96, 16], [208, 48, 64]], scope='incept' + str(i)) # auxiliary classifier if is_training: aux_loss1 = self.aux_classifier(self.network, labels, 512, is_training, scope='auxclass1') # inception module (4b) self.network = self.inception_module(self.network, [[160, 112, 24], [224, 64, 64]], scope='incept' + str(i)) # inception module (4c) self.network = self.inception_module(self.network, [[128, 128, 24], [256, 64, 64]], scope='incept' + str(i)) # inception module (4d) self.network = self.inception_module(self.network, [[112, 144, 32], [288, 64, 64]], scope='incept' + str(i)) # auxiliary classifier if is_training: aux_loss2 = self.aux_classifier(self.network, labels, 528, is_training, scope='auxclass2') # inception module (4e) self.network = self.inception_module(self.network, [[256, 160, 32], [320, 128, 128]], final_pool=True, scope='incept' + str(i)) # inception module (5a) self.network = self.inception_module(self.network, [[256, 160, 32], [320, 128, 128]], scope='incept' + str(i)) # inception module (5b) self.network = self.inception_module(self.network, [[384, 192, 48], [384, 128, 128]], scope='incept' + str(i)) # pooling with 7x7 kernel and stride 1 (new size: 1x1x1024) with tf.variable_scope('final_pool', reuse=tf.AUTO_REUSE): self.network = tf.nn.avg_pool(self.network, 7, 1, 'SAME', scope='pool') # flatten (new size: 1024) self.network = ops.flatten(self.network, scope='flatten') # fully connected layer (new size: 1024) self.network = ops.dense(self.network, 1024, 1024, dropout=True, dropout_rate=0.4, is_training=is_training, scope='fc1') # output layer (new size: 10) -- Original Paper Size: 1000 (for ImageNet) self.network = ops.dense(self.network, 1024, 10, activation=None, is_training=is_training, scope='fc2') loss = ops.loss(self.network, labels, scope='loss') self.accuracy = ops.accuracy(self.network, labels, scope='accuracy') if is_training: # if training use auxiliary classifiers as well self.loss = loss + aux_loss1 + aux_loss2 self.optimizer = ops.optimize(self.loss, self.learning_rate, scope='update') else: self.loss = loss
def build_model(self, inputs, labels, is_training): def res_block(inputs, in_channels, out_channels, is_training, idx): net = ops.convolution(inputs, in_channels[0], out_channels[0], 1, out_channels[0], is_training=is_training, scope='res%s_conv1' % idx) net = ops.convolution(net, in_channels[1], out_channels[1], 3, out_channels[1], is_training=is_training, scope='res%s_conv2' % idx) net = ops.convolution(net, in_channels[2], out_channels[2], 1, out_channels[2], activation=None, is_training=is_training, scope='res%s_conv3' % idx) return tf.nn.relu(inputs + net, scope='res%s_relu' % idx) def res_conv_block(inputs, in_channel, out_channel, stride, is_training, idx): skip = ops.convolution(inputs, in_channels[0], out_channels[2], 1, out_channels[2], stride=stride, activation=None, is_training=is_training, scope='res%s_skip' % idx) net = ops.convolution(inputs, in_channels[0], out_channels[0], 1, out_channels[0], is_training=is_training, scope='res%s_conv1' % idx) net = ops.convolution(net, in_channels[1], out_channels[1], 3, out_channels[1], is_training=is_training, scope='res%s_conv2' % idx) net = ops.convolution(net, in_channels[2], out_channels[2], 1, out_channels[2], stride=stride, activation=None, is_training=is_training, scope='res%s_conv3' % idx) return tf.nn.relu(skip + net, scope='res%s_relu' % idx) # pad inputs to size 224x224x3 - NOTE: may change to bilinear upsampling pad = int((self.image_size - self.height) / 2) inputs = tf.pad(inputs, [[0, 0], [pad, pad], [pad, pad], [0, 0]]) # convolution with 7x7 kernel and stride 2 (new size: 112x112x64) self.network = ops.convolution(inputs, self.channels, 64, 7, 64, stride=2, is_training=is_training, scope='conv1') # pooling with 3x3 kernel and stride 2 (new size: 56x56x64) self.network = ops.pooling(self.network, k_size=3, scope='pool1') # residual block 1 stride = 1 out_channels = [64, 64, 256] self.network = res_conv_block(self.network, [64, 64, 64], out_channels, stride, is_training, 1) self.network = res_block(self.network, [256, 64, 64], out_channels, is_training, 2) self.network = res_block(self.network, [256, 64, 64], out_channels, is_training, 3) # residual block 2 stride = 2 out_channels = [128, 128, 512] self.network = res_conv_block(self.network, [256, 128, 128], out_channels, stride, is_training, 4) self.network = res_block(self.network, [512, 128, 128], out_channels, is_training, 5) self.network = res_block(self.network, [512, 128, 128], out_channels, is_training, 6) self.network = res_block(self.network, [512, 128, 128], out_channels, is_training, 7) # residual block 3 stride = 2 out_channels = [256, 256, 1024] self.network = res_conv_block(self.network, [512, 256, 256], out_channels, stride, is_training, 8) self.network = res_block(self.network, [1024, 256, 256], out_channels, is_training, 9) self.network = res_block(self.network, [1024, 256, 256], out_channels, is_training, 10) self.network = res_block(self.network, [1024, 256, 256], out_channels, is_training, 11) self.network = res_block(self.network, [1024, 256, 256], out_channels, is_training, 12) self.network = res_block(self.network, [1024, 256, 256], out_channels, is_training, 13) # residual block 4 stride = 2 out_channels = [512, 512, 2048] self.network = res_conv_block(self.network, [1024, 512, 512], out_channels, stride, is_training, 14) self.network = res_block(self.network, [2048, 512, 512], out_channels, is_training, 15) self.network = res_block(self.network, [2048, 512, 512], out_channels, is_training, 16) # average pooling self.network = tf.nn.avg_pool(self.network, 7, 1, 'SAME', scope='avg_pool') self.network = ops.flatten(self.network, scope='flatten') # fully connected self.network = ops.dense(self.network, 2048, 10, activation=None, is_training=is_training, scope='fc') self.loss = ops.loss(self.network, labels, scope='loss') self.accuracy = ops.accuracy(self.network, labels, scope='accuracy') if is_training: self.optimizer = ops.optimize(self.loss, self.learning_rate, scope='update')
def build_model(self, inputs, labels, is_training): # pad inputs to size 224x224x3 - NOTE: may change to bilinear upsampling pad = int((self.image_size - self.height) / 2) inputs = tf.pad(inputs, [[0, 0], [pad, pad], [pad, pad], [0, 0]]) # convolution with 3x3 kernel and stride 1 (new size: 224x224x64) self.network = ops.convolution(inputs, self.channels, 64, 3, 64, is_training=is_training, scope='conv1') # convolution with 3x3 kernel and stride 1 (new size: 224x224x64) self.network = ops.convolution(self.network, 64, 64, 3, 64, is_training=is_training, scope='conv2') # pooling with 2x2 kernel and stride 2 (new size: 112x112x64) self.network = ops.pooling(self.network, scope='pool1') # convolution with 3x3 kernel and stride 1 (new size: 112x112x128) self.network = ops.convolution(self.network, 64, 128, 3, 128, is_training=is_training, scope='conv3') # convolution with 3x3 kernel and stride 1 (new size: 112x112x128) self.network = ops.convolution(self.network, 128, 128, 3, 128, is_training=is_training, scope='conv4') # pooling with 2x2 kernel and stride 2 (new size: 56x56x128) self.network = ops.pooling(self.network, scope='pool2') # convolution with 3x3 kernel and stride 1 (new size: 56x56x256) self.network = ops.convolution(self.network, 128, 256, 3, 256, is_training=is_training, scope='conv5') # 3 convolutions with 3x3 kernel and stride 1 (new size: 56x56x256) for idx in range(6, 9): self.network = ops.convolution(self.network, 256, 256, 3, 256, is_training=is_training, scope='conv' + str(idx)) # pooling with 2x2 kernel and stride 2 (new size: 28x28x256) self.network = ops.pooling(self.network, scope='pool3') # convolution with 3x3 kernel and stride 1 (new size: 28x28x512) self.network = ops.convolution(self.network, 256, 512, 3, 512, is_training=is_training, scope='conv9') # 3 convolutions with 3x3 kernel and stride 1 (new size: 28x28x512) for idx in range(10, 13): self.network = ops.convolution(self.network, 512, 512, 3, 512, is_training=is_training, scope='conv' + str(idx)) # pooling with 2x2 kernel and stride 2 (new size: 14x14x512) self.network = ops.pooling(self.network, scope='pool4') # 4 convolutions with 3x3 kernel and stride 1 (new size: 14x14x512) for idx in range(13, 17): self.network = ops.convolution(self.network, 512, 512, 3, 512, is_training=is_training, scope='conv' + str(idx)) # pooling with 2x2 kernel and stride 2 (new size: 7x7x512) self.network = ops.pooling(self.network, scope='pool5') # flatten (new size: 25088) self.network = ops.flatten(self.network, scope='flatten') # fully connected layer (new size: 4096) self.network = ops.dense(self.network, 25088, 4096, dropout=True, dropout_rate=0.2, is_training=is_training, scope='fc1') # fully connected layer (new size: 1024) -- Original Paper Size: 4096 (for ImageNet) self.network = ops.dense(self.network, 4096, 1024, dropout=True, dropout_rate=0.2, is_training=is_training, scope='fc2') # output layer (new size: 10) -- Original Paper Size: 1000 (for ImageNet) self.network = ops.dense(self.network, 1024, 10, activation=None, is_training=is_training, scope='fc3') self.loss = ops.loss(self.network, labels, scope='loss') self.accuracy = ops.accuracy(self.network, labels, scope='accuracy') if is_training: self.optimizer = ops.optimize(self.loss, self.learning_rate, scope='update')
Y = tf.placeholder(tf.float32, shape=[BATCH_SIZE, 6, 1]) img1, img2 = getBatch('2001',ID, BATCH_SIZE, WIDTH, HEIGHT) output_data = np.genfromtxt('2001/results.csv', delimiter=',') output = getGT(output_data, ID, BATCH_SIZE)[:,:,np.newaxis] arg = {} net = KLTNet( arg ) p, dp = net( X1, X2 ) warp = get_warp(p) # print p.shape # loss_op = tf.reduce_mean( tf.square( X2[:,EPS:(HEIGHT-EPS), EPS:(WIDTH-EPS),:] - spatial_transformer_network(X1, warp)[:,5:67, 5:99,:] ), axis=[1,2,3] ) loss_op = loss( p, Y, BATCH_SIZE, HEIGHT, WIDTH ) # transform_op = spatial_transformer_network(X1, warp) # loss2 = tf.reduce_mean(dp) init = tf.global_variables_initializer() config = tf.ConfigProto( device_count = {'GPU': 0} ) with tf.Session( config=config ) as sess: sess.run(init) # print( sess.run(p,feed_dict={X1:img1, X2:img2} ) )
def run_training(datasets): """Train the autoencoder for a number of steps.""" # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. placeholders = placeholder_inputs(FLAGS.batch_size, datasets.emb_size) inputs_placeholder, labels_placeholder, mask_placeholder = placeholders # Build a Graph that computes predictions from the inference model. logits = ops.inference(inputs_placeholder, datasets.emb_size, FLAGS.hidden1, FLAGS.hidden2, FLAGS.dropout_rate) # Add to the Graph the Ops for loss calculation. loss = ops.loss(logits, labels_placeholder, mask_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = ops.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct, eval_total = ops.evaluation(logits, labels_placeholder, mask_placeholder) # keep track of the epoch count = tf.Variable(0) increment = tf.count_up_to(count, FLAGS.num_epochs) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. if os.path.exists(CP_INFO) and not FLAGS.retrain: restore_variables(sess) # redo epoch that we last quit in the middle of sess.run(count.assign_sub(1)) else: init = tf.initialize_all_variables() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.summary_dir, graph_def=sess.graph_def) def do_eval(data_set): """Runs one evaluation against the full epoch of data. :param data_set: The data_set which we will use to retrieve batches """ # And run one epoch of eval. steps_per_epoch = data_set.num_examples // FLAGS.batch_size counts = np.zeros(2) for _ in xrange(steps_per_epoch): feed_dict = fill_feed_dict(data_set, placeholders) run = sess.run([eval_correct, eval_total], feed_dict=feed_dict) counts += run correct, total = counts print(' Num examples: %d Num correct: %d Precision @ 1: %0.04f' % (int(total), int(correct), correct / total)) # And then after everything is built, start the training loop. steps_per_epoch = datasets.train.num_examples // FLAGS.batch_size start_time = time.time() # TODO: make epoch a saved variable so that training picks up where it left off for epoch in xrange(FLAGS.num_epochs): for step in xrange(steps_per_epoch): # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(datasets.train, placeholders) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss], feed_dict=feed_dict) if step == 0: # Write the summaries and print an overview fairly often. duration = time.time() - start_time # Print status to stdout. print('Epoch %d: loss = %.2f (%.3f sec)' % (epoch, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, epoch) # create the save directory if not already there if not os.path.isdir(FLAGS.save_dir): os.mkdir(FLAGS.save_dir) save_dir = os.path.join(FLAGS.save_dir, 'model.ckpt') saver.save(sess, save_dir, global_step=step) if epoch % 10 == 0 or (epoch + 1) == FLAGS.num_epochs: # Evaluate against the training set. print('Training Data Eval:') do_eval(datasets.train) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(datasets.validation) # Evaluate against the test set. print('Test Data Eval:') do_eval(datasets.test)