def tower_loss(scope): reader = read.Reader(path=FLAGS.buckets + 'wavFile_train_frame_60.tfr', batch_size=FLAGS.batch_size, window_size=FLAGS.frequency // FLAGS.frame_count, kwidth=FLAGS.kwidth) logits = inference.Inference(reader.wav_raw, FLAGS.kwidth, 2, FLAGS.isTrain, scope=scope).build_model() loss.loss(logits=logits, labels=reader.label) losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') tf.add_to_collection('summary', tf.summary.scalar(scope + 'loss', losses[0])) return total_loss
def load(self): myModel = model(self.args).getModel() trainable = filter(lambda x: x.requires_grad, myModel.parameters()) if self.args.optimizer == 'SGD': myOptimizer = optim.SGD(trainable, lr=self.args.lr, momentum=self.args.momentum) elif self.args.optimizer == 'ADAM': myOptimizer = optim.Adam(trainable, lr=self.args.lr, betas=(self.args.beta1, self.args.beta2), eps=self.args.epsilon) elif self.args.optimizer == 'RMSprop': myOptimizer = optim.RMSprop(trainable, lr=self.args.lr, eps=self.args.epsilon) if self.args.load == '.': myLoss = loss(self.args).getLoss() self.trainingLog = torch.Tensor() self.testLog = torch.Tensor() else: myModel.load_state_dict( torch.load(self.dir + '/model/model_lastest.pt')) myLoss = torch.load(self.dir + '/loss.pt') myOptimizer.load_state_dict(torch.load(self.dir + '/optimizer.pt')) self.trainingLog = torch.load(self.dir + '/trainingLog.pt') self.testLog = torch.load(self.dir + '/testLog.pt') print('Load loss function from checkpoint...') print('Continue from epoch {}...'.format(len(self.testLog))) return myModel, myLoss, myOptimizer
def main(args): net = model() if torch.cuda.is_available(): net = net.cuda() criterion = loss() opt = torch.optim.Adam(net.parameters(), lr=args.lr) sch = torch.optim.lr_scheduler.MultiStepLR(opt, args.lr_milestone, gamma=0.5) train_set = Dataset(train=True) test_set = Dataset(train=False) train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True) train(net, train_loader, test_loader, opt, sch, criterion, args)
def main(arg=None): images, labels = image_loader.read_batch() logits = inference.inference(images) loss = ls.loss(logits, labels) saver = tf.train.Saver() summary_opt = tf.summary.merge_all() init = tf.global_variables_initializer() sess = tf.InteractiveSession() sess.run(init) tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(flag.log_dir, graph=sess.graph) train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) for i in xrange(5001): if i % 100 == 0: print 'step {0}, loss: {1}'.format(i, sess.run(ls.get_loss())) sess.run(train_step) if i % 50 == 0: summary_str = sess.run(summary_opt) summary_writer.add_summary(summary_str, i) saver.save(sess=sess, save_path=flag.save_dir) summary_writer.close()
def load(self): my_model = model(self.args).get_model() trainable = filter(lambda x: x.requires_grad, my_model.parameters()) if self.args.optimizer == 'SGD': optimizer_function = optim.SGD kwargs = {'momentum': self.args.momentum} elif self.args.optimizer == 'ADAM': optimizer_function = optim.Adam kwargs = { 'betas': (self.args.beta1, self.args.beta2), 'eps': self.args.epsilon } elif self.args.optimizer == 'RMSprop': optimizer_function = optim.RMSprop kwargs = {'eps': self.args.epsilon} kwargs['lr'] = self.args.lr kwargs['weight_decay'] = 0 my_optimizer = optimizer_function(trainable, **kwargs) if self.args.decay_type == 'step': my_scheduler = lrs.StepLR(my_optimizer, step_size=self.args.lr_decay, gamma=self.args.gamma) elif self.args.decay_type.find('step') >= 0: milestones = self.args.decay_type.split('_') milestones.pop(0) milestones = list(map(lambda x: int(x), milestones)) my_scheduler = lrs.MultiStepLR(my_optimizer, milestones=milestones, gamma=self.args.gamma) self.log_training = torch.Tensor() self.log_test = torch.Tensor() if self.args.load == '.': my_loss = loss(self.args).get_loss() else: if not self.args.test_only: self.log_training = torch.load(self.dir + '/log_training.pt') self.log_test = torch.load(self.dir + '/log_test.pt') resume = self.args.resume if resume == -1: my_model.load_state_dict( torch.load(self.dir + '/model/model_lastest.pt')) resume = len(self.log_test) else: my_model.load_state_dict( torch.load(self.dir + '/model/model_{}.pt'.format(resume))) my_loss = torch.load(self.dir + '/loss.pt') my_optimizer.load_state_dict(torch.load(self.dir + '/optimizer.pt')) print('Load loss function from checkpoint...') print('Continue from epoch {}...'.format(resume)) return my_model, my_loss, my_optimizer, my_scheduler
def gradient_descent(A, b, w, sigma, T): for i in range(T): w = gradient_descent_step(A, b, w, sigma) if i % 5000 == 0: print("Fraction wrong:", fraction_wrong(A, b, w)) print("Loss:", loss(A, b, w)) return w
def train_discriminator(discriminator, loss, optimizer, real_data, fake_data): N = real_data.size(0) optimizer.zero_grad() prediction_real = discriminator(real_data) error_real = loss(prediction_real, ones_target(N).cuda()) error_real.backward() prediction_fake = discriminator(fake_data) error_fake = loss(prediction_fake, zeros_target(N).cuda()) error_fake.backward() optimizer.step() return error_real + error_fake
def test(data_loader, Hnet, Rnet): print_log("---------- test begin ---------", opt.test_log) print_log(time.asctime(time.localtime(time.time())), opt.test_log, False) Hnet.eval() Rnet.eval() for i, data in enumerate(data_loader): all_pics = data # allpics contains cover images and secret images this_batch_size = int(all_pics.size()[0] / 2) # get true batch size of this step # first half of images will become cover images, the rest are treated as secret images cover_img = all_pics[0:this_batch_size, :, :, :] # batchsize,3,256,256 secret_img = all_pics[this_batch_size:this_batch_size * 2, :, :, :] # concat cover images and secret images as input of H-net concat_img = torch.cat([cover_img, secret_img], dim=1) if opt.use_gpu: cover_img = cover_img.cuda() secret_img = secret_img.cuda() concat_img = concat_img.cuda() concat_imgv = Variable(concat_img, requires_grad=False) cover_imgv = Variable(cover_img, requires_grad=False) secret_imgv = Variable(secret_img, requires_grad=False) with torch.no_grad(): stego = Hnet(concat_imgv) secret_rev = Rnet(stego) errH = loss(stego, cover_imgv) # loss between cover and container errR = loss(secret_rev, secret_imgv) # loss between secret and revealed secret err_sum = errH + opt.beta * errR save_pic2('test', cover_img, stego, secret_img, secret_rev, opt.test_pics, opt.batch_size, i) log = 'test: loss is %.6f' % (err_sum.item()) + '\n' print_log(log, opt.test_log) print_log("---------- test end ----------", opt.test_log)
def train_epoch(epoch, model, optimizer, args): losses = 0.0 for i, (x, y) in enumerate( data.train_batches(args.batch_size, use_cuda=args.use_cuda), 1): optimizer.zero_grad() y_pred = model(x) l = loss(y_pred, y, use_cuda=args.use_cuda) l.backward() optimizer.step() losses += l.data[0] print("Epoch: {}, Ave loss: {}".format(epoch, losses / i)) return losses / i
def train_generator(discriminator, loss, optimizer, fake_data): N = fake_data.size(0) # Reset gradients optimizer.zero_grad() # Sample noise and generate fake data pred_fake = discriminator(fake_data) error = loss(pred_fake, ones_target(N).cuda()) error.backward() optimizer.step() # Return error return error
def _BuildGraph(self): """Builds computational graph using pretrained VGG16""" putils.Log_and_print("Building computational graph ...") # restrict GPU usage putils.AllocateGPU(self.N_GPUs) tf.reset_default_graph() # Load and apply pretrained network vgg = fcn8_vgg.FCN8VGG() # Placeholders and feed data vgg.images = tf.placeholder("float") vgg.labels = tf.placeholder("float") vgg.cumloss = tf.placeholder( "float") # cumulative loss from previous sub-batches # AUGMENTATION if (not self.IS_TESTING) and self.AUGMENT: # Random brightness and contrast adjustment vgg.images = tf.image.random_brightness(vgg.images, max_delta=63) vgg.images = tf.image.random_contrast(vgg.images, lower=0.2, upper=1.8) with tf.name_scope("content_vgg"): vgg.build(vgg.images, train = (not self.IS_TESTING), \ num_classes = self.Model.NUM_CLASSES, \ random_init_fc8 = (not self.IS_TESTING), debug = False) # define loss and optimizer vgg.cost = loss.loss(vgg.upscore32, vgg.labels, \ num_classes = self.Model.NUM_CLASSES, \ head = self.Model.CLASSWEIGHTS) vgg.cumLoss = vgg.cost + vgg.cumloss vgg.optimizer = tf.train.AdamOptimizer(self.LEARN_RATE).minimize( vgg.cumLoss) putils.Log_and_print('Finished building Network.') # check trinable variables # tf.trainable_variables() # Assign graph as a class attribute self.vgg = vgg
def __init__(self, COMdataset, Model, optimizer, checkpoint_dir, batch_size): self.COMdataset = COMdataset self.model = self.Model # optimizer to perform backpropagation self.optimizer = optimizer # directory to save the model self.checkpoint_dir = checkpoint_dir # gpu or cpu self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.model.to(self.device) # split the dataset into training, validating, and testing sets self.train_val_test = [0.7,0.1,0.2] self.batch_size = batch_size # loss self.criterion = loss() self.save_per_epochs = 1
def test(batch_size, cuda, path): """ Test performance of testing dataset on trainined model. Input: batch_size: int testing batch size. cuda: bool running on GPU or CPU. path: str path to trained model. Return: """ print "testing..." print "cuda: ", cuda test_data = generate_dataset(train="test") print "len of image: ", len(test_data[0]) test_dataset = SVHNDataset(test_data[0], test_data[1]) kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {} test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, **kwargs) model = MultiDigitsNet() if cuda: model.cuda() model.eval() model.load_state_dict(torch.load(path)) batch_losses = [] batch_accuracy = [] for batch_idx, (data, target) in enumerate(test_loader): if cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) data = data.float() output = model(data) losses = loss(output, target, cuda) batch_losses.append(losses.data[0]) accuracy = accu(output, target, cuda) batch_accuracy.append(accuracy.data[0]) if batch_idx % 100 == 0: print "[{} / {}]: testing loss: {}, testing accuracy: {}".format( batch_idx * batch_size, len(test_dataset), losses.data[0], accuracy.data[0]) test_loss = np.mean(np.array(batch_losses)) test_accuracy = np.mean(np.array(batch_accuracy)) print "testing loss: {}, testing accuracy: {}".format( test_loss, test_accuracy)
def tower_loss(images, label_maps, training_masks, reuse_variables=None): # Build inference graph with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): f_score, f_geometry = model.model(images, is_training=True) score_maps, geo_maps = tf.split(label_maps, num_or_size_splits=[1, 5], axis=-1) model_loss = loss.loss(score_maps, f_score, geo_maps, f_geometry, training_masks) total_loss = tf.add_n([model_loss] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # add summary if reuse_variables is None: tf.summary.image('input', images) tf.summary.image('score_map', score_maps) tf.summary.image('score_map_pred', f_score) tf.summary.image('geo_map_0', geo_maps[:, :, :, 0:4] * score_maps) tf.summary.image('geo_map_0_pred', f_geometry[:, :, :, 0:4] * score_maps) tf.summary.image('training_masks', training_masks) tf.summary.scalar('model_loss', model_loss) tf.summary.scalar('total_loss', total_loss) return total_loss, model_loss
def train(self): thresd = 100 # if self.args.load_model: # print('load model') # self.model.d = util.load_mdoel(self.args.result_dir,'discriminator') # self.model.g = util.load_mdoel(self.args.result_dir,'best_generator') # self.save_d_loss = util.load_loss(self.args.result_dir,'discriminator') # self.save_g_loss = util.load_loss(self.args.result_dir,'generator') for epoch in range(self.args.epochs): start_time = time.time() d_cnt = 0 for n_batch, (lr, hr, _) in enumerate(self.data.loader_train): if not self.args.cpu: lr = lr.cuda() hr = hr.cuda() lr = Variable(lr, requires_grad=True) hr = Variable(hr, requires_grad=True) loss_ = loss(self.args, lr, hr, self.model) # if d_cnt < self.args.d_count: # self.update_d(loss_) # a = [self.d_r_loss.cpu().view(-1),self.d_f_loss.cpu().view(-1),self.d_cost.cpu().view(-1),self.wasserstein.cpu().view(-1)] # a = np.array([[l.detach().numpy()[0] for l in a]]) # self.save_d_loss = util.add_loss(self.save_d_loss,a) # #print( # # 'batch:{}/{}--d_real_loss = {:0.6f}, d_fake_loss = {:0.6f},d_cost = {:0.6f}, wasserstein = {:0.6f}\n' \ # # .format(n_batch, self.args.n_train // self.args.batch_size + 1, self.d_r_loss, # # self.d_f_loss, self.d_cost, self.wasserstein) # #) # else: # d_cnt = 0 self.update_g(loss_) # a=self.g_cost.cpu().view(-1).detach().numpy() # self.save_g_loss = util.add_loss(self.save_g_loss,a) # self.save_g_loss = torch.cat([self.save_g_loss, a], 0) # del(a) print('p_loss={:0.6f}'.format(self.p_loss)) # d_cnt += 1 util.save_mdoel(self.args.result_dir, self.model.g, 'single_generator')
def train_on_epoch(self): self.model.train() lr = self.adjust_lr() train_loader = torch.utils.data.DataLoader( self.datasets['train'], batch_size=self.args.train_bs, shuffle=True, num_workers=1, drop_last=True) # train_loader = torch.utils.data.DataLoader(self.datasets['train'], batch_size=self.args.train_bs, sampler = Sampler(self.datasets['train'],self.epoch), # num_workers=1, drop_last=True) p_bar = tqdm(train_loader, desc='Train (task {}, lr {}, epoch {}, device {})'.format( self.task_id, lr, self.epoch, self.device), ncols=120, leave=True) p_bar.L = 0 for bi, (x, y) in enumerate(p_bar): x = x.to(self.device) y = y.to(self.device) # if bi == 0: # self.print_('{}'.format(y)) output = self.model(x) l = loss(output, y, self.args.M, self.theta) self.optim.zero_grad() l.backward() self.optim.step() p_bar.L = (p_bar.L * bi + l.item()) / (bi + 1) p_bar.set_postfix_str('loss={:.4f}'.format(p_bar.L)) self.print_( 'Train (task {}, lr {}, epoch {}, device {}, loss {})'.format( self.task_id, lr, self.epoch, self.device, p_bar.L))
def tower_loss(scope, images, labels): labelclasses = np.arange(2) labelclasses = np.append(labelclasses, [0]) upscore32_pred = fcn8.inference(rgb=images) _ = cost.loss(upscore32_pred, labels, labelclasses) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def testModel(args, inputLoader): print("Testing saved model") os.system("rm -rf " + args.imagesOutDir) os.system("mkdir " + args.imagesOutDir) args.imageHeight = 500 args.imageWidth = 500 # Now we make sure the variable is now a constant, and that the graph still produces the expected result. with tf.Session() as session: with tf.variable_scope('FCN8_VGG'): batchInputImages = tf.placeholder(dtype=tf.float32, shape=[None, args.imageHeight, args.imageWidth, args.imageChannels], name="batchInputImages") batchInputLabels = tf.placeholder(dtype=tf.float32, shape=[None, args.imageHeight, args.imageWidth, 1], name="batchInputLabels") keepProb = tf.placeholder(dtype=tf.float32, name="keepProb") vgg_fcn = fcn8.FCN8VGG(batchSize=args.batchSize, enableTensorboard=args.tensorboard, vgg16_npy_path=args.pretrained) with tf.name_scope('Model'): vgg_fcn.build(rgb=batchInputImages, keepProb=keepProb, num_classes=args.numClasses, random_init_fc8=True, debug=(args.verbose > 0)) with tf.name_scope('Loss'): # weights = tf.cast(batchInputLabels != args.ignoreLabel, dtype=tf.float32) loss = cost.loss(vgg_fcn.upscore32_pred, batchInputLabels, inputLoader.getAnnotationClasses()) with tf.name_scope('Optimizer'): optimizer = tf.train.AdamOptimizer(learning_rate=args.learningRate) gradients = tf.gradients(loss, tf.trainable_variables()) gradients = list(zip(gradients, tf.trainable_variables())) applyGradients = optimizer.apply_gradients(grads_and_vars=gradients) # saver = tf.train.import_meta_graph(args.modelDir + args.modelName + ".meta") saver = tf.train.Saver() saver.restore(session, args.modelDir + args.modelName) # Get reference to placeholders # outputNode = session.graph.get_tensor_by_name("Model/probabilities:0") # inputBatchImages = session.graph.get_tensor_by_name("FCN8_VGG/batchInputImages:0") # inputKeepProbability = session.graph.get_tensor_by_name("FCN8_VGG/keepProb:0") # Sample 50 test batches args.batchSize = 1 # 50 numBatch = 8 for i in tqdm(range(1, numBatch), desc='Testing'): # print("Processing batch # %d" % i) batchImagesTest, _ = inputLoader.getTestBatch(readMask=False) # For testing without GT mask imagesProbabilityMap = session.run(vgg_fcn.probabilities, feed_dict={batchInputImages: batchImagesTest, keepProb: 1.0}) # Save image results print("Saving images...") inputLoader.saveLastBatchResults(imagesProbabilityMap, isTrain=False) print("Model tested!") return
def trainModel(args, inputLoader): step = 1 print('Train mode') with tf.variable_scope('FCN8_VGG'): batchInputImages = tf.placeholder(dtype=tf.float32, shape=[None, args.imageHeight, args.imageWidth, args.imageChannels], name="batchInputImages") batchInputLabels = tf.placeholder(dtype=tf.float32, shape=[None, args.imageHeight, args.imageWidth, 1], name="batchInputLabels") keepProb = tf.placeholder(dtype=tf.float32, name="keepProb") vgg_fcn = fcn8.FCN8VGG(enableTensorboard=args.tensorboard, vgg16_npy_path=args.pretrained) with tf.name_scope('Model'): vgg_fcn.build(rgb=batchInputImages, keepProb=keepProb, num_classes=args.numClasses, random_init_fc8=True, debug=(args.verbose > 0)) with tf.name_scope('Loss'): # weights = tf.cast(batchInputLabels != args.ignoreLabel, dtype=tf.float32) loss = cost.loss(vgg_fcn.upscore32_pred, batchInputLabels, inputLoader.getAnnotationClasses()) with tf.name_scope('Optimizer'): optimizer = tf.train.AdamOptimizer(learning_rate=args.learningRate) gradients = tf.gradients(loss, tf.trainable_variables()) gradients = list(zip(gradients, tf.trainable_variables())) applyGradients = optimizer.apply_gradients(grads_and_vars=gradients) init = tf.global_variables_initializer() if args.tensorboard: tf.summary.scalar("loss", loss) for var in tf.trainable_variables(): tf.summary.histogram(var.name, var) for grad, var in gradients: tf.summary.histogram(var.name + '/gradient', grad) mergedSummaryOp = tf.summary.merge_all() saver = tf.train.Saver() ### with tf.Session() as sess: sess.run(init) if args.clean: print("Removing previous checkpoints and logs") os.system("rm -rf " + args.logsDir) os.system("rm -rf " + args.imagesOutDir) os.system("rm -rf " + args.modelDir) os.system("mkdir " + args.imagesOutDir) os.system("mkdir " + args.modelDir) else: # Restore checkpoint print("Restoring from checkpoint") # saver = tf.train.import_meta_graph(args.modelDir + args.modelName + ".meta") saver.restore(sess, args.modelDir + args.modelName) if args.tensorboard: # Op for writing logs to Tensorboard summaryWriter = tf.summary.FileWriter(args.logsDir, graph=tf.get_default_graph()) print("Starting network training") # Keep training until reach max iterations while True: batchImagesTrain, batchLabelsTrain = inputLoader.getTrainBatch() if batchImagesTrain is None: print("Training completed!") break # Run optimization op (backprop) if args.tensorboard: _, summary = sess.run([applyGradients, mergedSummaryOp], feed_dict={batchInputImages: batchImagesTrain, batchInputLabels: batchLabelsTrain, keepProb: args.keepProb}) summaryWriter.add_summary(summary, step) else: [trainLoss, _] = sess.run([loss, applyGradients], feed_dict={batchInputImages: batchImagesTrain, batchInputLabels: batchLabelsTrain, keepProb: args.keepProb}) print("Iteration: %d, Minibatch Loss: %f" % (step, trainLoss)) if (np.isnan(trainLoss)): print("Nan reached. Terminating training.") break if step % args.displayStep == 0: [trainLoss, trainImagesProbabilityMap] = sess.run([loss, vgg_fcn.probabilities], feed_dict={batchInputImages: batchImagesTrain, batchInputLabels: batchLabelsTrain, keepProb: 1.0}) # Save image results print("Saving images...") inputLoader.saveLastBatchResults(trainImagesProbabilityMap, isTrain=True) print("Saved images.") print('step: ' + str(step)) step += 1 if step % args.saveStep == 0: # Save model weights to disk saver.save(sess, args.modelDir + args.modelName + str(step)) print("Model saved: %s" % (args.modelDir + args.modelName)) # Check the accuracy on test data if step % args.evaluateStep == 0: batchImagesTest, batchLabelsTest = inputLoader.getTestBatch() if args.evaluateStepDontSaveImages: [testLoss] = sess.run([loss], feed_dict={batchInputImages: batchImagesTest, batchInputLabels: batchLabelsTest, keepProb: 1.0}) print("Test loss: %f" % testLoss) else: [testLoss, testImagesProbabilityMap] = sess.run([loss, vgg_fcn.probabilities], feed_dict={batchInputImages: batchImagesTest, batchInputLabels: batchLabelsTest, keepProb: 1.0}) print("Test loss: %f" % testLoss) # Save image results print("Saving images") inputLoader.saveLastBatchResults(testImagesProbabilityMap, isTrain=False) # Save final model weights to disk saver.save(sess, args.modelDir + args.modelName) print("Model saved: %s" % (args.modelDir + args.modelName)) # Report loss on test data batchImagesTest, batchLabelsTest = inputLoader.getTestBatch() testLoss = sess.run(loss, feed_dict={batchInputImages: batchImagesTest, batchInputLabels: batchLabelsTest, keepProb: 1.0}) print("Test loss (current): %f" % testLoss) print("Optimization Finished!") return
input_shape = (423, 512, 3) embedding_size = 100 batch_size = 16 train_steps_per_epoch = int(train.shape[0] / batch_size) validation_steps_per_epoch = int(validation.shape[0] / batch_size) train_generator = DataGenerator(train, input_shape, embedding_size, train_steps_per_epoch, images_dir, augmentations, batch_size=batch_size) validation_generator = DataGenerator(validation, input_shape, embedding_size, validation_steps_per_epoch, images_dir, batch_size=batch_size) num_classes = train_generator.num_classes model = siamese_model(input_shape, num_classes, embedding_size).siamese model.compile(loss=loss(num_classes, embedding_size), optimizer=optimizers.SGD(lr=0.001)) model.fit(train_generator, validation_data=validation_generator, callbacks=callbacks_list, epochs=100)
def trainModel(args, inputLoader): bestLoss = 1e9 step = 1 print('Train mode') train_file_name = args.imagesInDir + 'train.txt' val_file_name = args.imagesInDir + 'val.txt' with tf.variable_scope('FCN8_VGG'): batchInputImages = tf.placeholder(dtype=tf.float32, shape=[ None, args.imageHeight, args.imageWidth, args.imageChannels ], name="batchInputImages") batchInputLabels = tf.placeholder( dtype=tf.float32, shape=[None, args.imageHeight, args.imageWidth, 1], name="batchInputLabels") # keepProb = tf.placeholder(dtype=tf.float32, name="keepProb") trainDataGen = ImageDataGenerator(args, train_file_name, args.numClasses, 'training', args.batchSize, num_preprocess_threads=5, shuffle=True, min_queue_examples=1000) validationDataGen = ImageDataGenerator(args, val_file_name, args.numClasses, 'validation', args.batchSize, num_preprocess_threads=5, shuffle=False, min_queue_examples=50) train_imgBatch = trainDataGen.img_batch train_labelBatch = trainDataGen.label_batch val_imgBatch = validationDataGen.img_batch val_labelBatch = validationDataGen.label_batch vgg_fcn = fcn8.FCN8VGG(batchSize=args.batchSize, keepProb=0.5, num_classes=args.numClasses, random_init_fc8=True, debug=(args.verbose > 0), enableTensorboard=args.tensorboard, vgg16_npy_path=args.pretrained) # with tf.name_scope('Model'): with tf.name_scope('Loss'): upscore32_pred = vgg_fcn.inference(rgb=batchInputImages) # weights = tf.cast(batchInputLabels != args.ignoreLabel, dtype=tf.float32) loss = cost.loss(upscore32_pred, batchInputLabels, trainDataGen.getAnnotationClasses()) with tf.name_scope('Optimizer'): optimizer = tf.train.AdamOptimizer(learning_rate=args.learningRate) gradients = tf.gradients(loss, tf.trainable_variables()) gradients = list(zip(gradients, tf.trainable_variables())) applyGradients = optimizer.apply_gradients(grads_and_vars=gradients) init = tf.global_variables_initializer() # tf.summary.scalar("loss", loss) training_summary = tf.summary.scalar("training_loss", loss) validation_summary = tf.summary.scalar("validation_loss", loss) mergedSummaryOp = tf.summary.merge_all() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) if args.clean: print("Removing previous checkpoints and logs") os.system("rm -rf " + args.logsDir) os.system("rm -rf " + args.imagesOutDir) # os.system("rm -rf " + args.modelDir) os.system("mkdir " + args.imagesOutDir) # os.system("mkdir " + args.modelDir) else: # Restore checkpoint print("Restoring from checkpoint") #saver = tf.train.import_meta_graph(args.modelDir + args.modelName + ".meta") saver.restore(sess, args.modelDir + args.modelName) if args.tensorboard: # Op for writing logs to Tensorboard summaryWriter = tf.summary.FileWriter(args.logsDir, graph=tf.get_default_graph()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) print("Starting network training") # Keep training until reach max iterations while True: # batchImagesTrain, batchLabelsTrain = inputLoader.getTrainBatch() imagesNpyBatch, labelsNpyBatch = sess.run( [train_imgBatch, train_labelBatch]) if imagesNpyBatch is None: print("Training completed!") break # Run optimization op (backprop) else: [trainLoss, _, train_summ] = sess.run( [loss, applyGradients, training_summary], feed_dict={ batchInputImages: imagesNpyBatch, batchInputLabels: labelsNpyBatch }) summaryWriter.add_summary(train_summ, step) print("Iteration: %d, Minibatch Loss: %f" % (step, trainLoss)) # if (np.isnan(trainLoss)): # print("Nan reached. Terminating training.") # break # # Check the accuracy on test data if step % args.evaluateStep == 0: imagesNpyBatch, labelsNpyBatch = sess.run( [val_imgBatch, val_labelBatch]) [validationLoss, valid_summ] = sess.run( [loss, validation_summary], feed_dict={ batchInputImages: imagesNpyBatch, batchInputLabels: labelsNpyBatch }) summaryWriter.add_summary(valid_summ, step) print("Validation loss: %f" % validationLoss) if step % args.displayStep == 0: _, summary = sess.run( [applyGradients, mergedSummaryOp], feed_dict={ batchInputImages: imagesNpyBatch, batchInputLabels: labelsNpyBatch }) summaryWriter.add_summary(summary, step) print('step: ' + str(step)) step += 1 # if step % args.saveStep == 0: # Save model weights to disk saver.save(sess, args.modelDir + args.modelName + str(step)) print("Model saved: %s" % (args.modelDir + args.modelName)) # Save final model weights to disk saver.save(sess, args.modelDir + args.modelName) print("Model saved: %s" % (args.modelDir + args.modelName)) # Report loss on test data # batchImagesTest, batchLabelsTest = inputLoader.getTestBatch() testLoss = sess.run(loss, feed_dict={ batchInputImages: imagesNpyBatch, batchInputLabels: labelsNpyBatch }) print("Test loss (current): %f" % testLoss) print("Optimization Finished!") return
def __init__(self, FLAGS): """ create the network and training, validation reader functions Input: FLAGS: gpu_number: int; the gpu number positive_size: int; the positive pair size to be random chosen in each batch total_batch_number: int; batches to create in reader file tolerance_margin: float; how much negative distance should be greater than positive distance batch_size: int; training and validation batch size feature_size: int; size of feature output from the network data_dir: string; data set directory training_data_filename: string; training data filename, e.g. CG_train.txt validation_data_filename: string; validation data filename, e.g. CG_test.txt """ gpu_number = FLAGS.gpu_number self.positive_size = FLAGS.positive_size total_batch_number = FLAGS.total_batch_number tolerance_margin = FLAGS.tolerance_margin pick_same_room = True # when validating, should we pick same room to generate negative pair? batch_size = FLAGS.batch_size feature_size = FLAGS.feature_size data_dir = FLAGS.data_dir training_data_filename = FLAGS.training_data_filename validation_data_filename = FLAGS.validation_data_filename self.control_distances = [(0, 0.5), (0.5, 1.0), (1.0, 1.5), (1.5, 2.0), (2.0, 2.5), (2.5, 3.0), (3.0, 4.0), (4.0, float('inf'))] self.sigma_R = 30 self.margin = 1.0 # imagenet mean #images_mean = tf.constant([104./127.5 - 1.0, 117./127.5 -1.0, 123./127.5 - 1.0]) # place365 mean images_mean = tf.constant( [104. / 127.5 - 1.0, 113. / 127.5 - 1.0, 117. / 127.5 - 1.0]) self.images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, 384, 640, 3)) images = self.images_placeholder / 127.5 - 1.0 - images_mean # the positions self.position_placeholder = tf.placeholder(tf.float32, shape=(batch_size, 2)) # rotational gauss placeholder self.rot_gauss_placeholder = tf.placeholder(tf.float32, shape=(batch_size, batch_size, 20)) # positive pair index placeholder (chosen randomly using numpy) self.pospair_index_placeholder = tf.placeholder( tf.int32, shape=(self.positive_size, 2)) print('Building graph...') with tf.device('/gpu:' + str(gpu_number)): self.net = CaffeNet({'data': images}) # before dropout conv_features = self.net.get_output() B, H, W, C = conv_features.get_shape().as_list() F = feature_size self.B = B self.W = W self.C = C conv_features = tf.reshape(conv_features, [B, H * W, C]) t_list = tf.split(conv_features, num_or_size_splits=W, axis=1) indices = np.arange(W) self.out_branches = [] for _ in range(W): # form a branch br = [t_list[idx] for idx in indices] br = tf.concat(br, axis=1) # append new branch into branch list self.out_branches.append(br) # update indices, this uses numpy indices = np.roll(indices, -1) self.rolling_features_list = self.out_branches self.lifted_loss = loss.loss(self.rolling_features_list, self.position_placeholder, self.rot_gauss_placeholder, self.pospair_index_placeholder, self.margin, self.sigma_R, tolerance_margin) reg_loss = 0 for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): #print var.name reg_loss += tf.nn.l2_loss(var) self.total_loss = 0.0008 * reg_loss + self.lifted_loss self.training_summary = tf.summary.scalar('training_loss', self.total_loss) self.validation_summary = tf.summary.scalar( 'validation_loss', self.total_loss) self.validation_accuracy_placeholder = tf.placeholder(tf.float32, shape=()) self.training_accuracy_placeholder = tf.placeholder(tf.float32, shape=()) validation_accuracy = tf.get_variable('validation_accuracy', [], trainable=False) training_accuracy = tf.get_variable('training_accuracy', [], trainable=False) self.validation_assign_op = tf.assign( validation_accuracy, self.validation_accuracy_placeholder) self.training_assign_op = tf.assign( training_accuracy, self.training_accuracy_placeholder) self.validation_accuracy_summary = tf.summary.scalar( 'validation_accuracy', validation_accuracy) self.training_accuracy_summary = tf.summary.scalar( 'training_accuracy', training_accuracy) # Adam optimizer global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) opt = tf.train.MomentumOptimizer(0.00001, 0.9) grads_and_vars = opt.compute_gradients(self.total_loss) self.train_op = opt.apply_gradients(grads_and_vars, global_step=global_step) print('Finish building graph') print("start creating reader objects") #create training batch reader object parser = argparse.ArgumentParser() train_FLAGS, _ = parser.parse_known_args() train_FLAGS.data_dir = data_dir train_FLAGS.batch_size = batch_size train_FLAGS.total_batch_number = total_batch_number train_FLAGS.tolerance_margin = tolerance_margin train_FLAGS.pick_same_room = pick_same_room train_FLAGS.data_filename = training_data_filename self.training_reader = Reader(train_FLAGS) self.training_triplet_reader = Triplet_Reader(train_FLAGS, self.control_distances) # create validation batch reader object parser = argparse.ArgumentParser() validation_FLAGS, _ = parser.parse_known_args() validation_FLAGS.data_dir = data_dir validation_FLAGS.batch_size = batch_size validation_FLAGS.total_batch_number = total_batch_number validation_FLAGS.tolerance_margin = tolerance_margin validation_FLAGS.pick_same_room = pick_same_room validation_FLAGS.data_filename = validation_data_filename self.validation_reader = Reader(validation_FLAGS) self.validation_triplet_reader = Triplet_Reader( validation_FLAGS, self.control_distances) print("Finish creating reader objects")
size_index = randint(0, len(cfg.multi_scale_inp_size) - 1) print('new scale is {}'.format(cfg.multi_scale_inp_size[size_index])) batch = dataset.fetch_parse(batch_of_index, size_index) im = batch['images'] gt_boxes = batch['gt_boxes'] gt_classes = batch['gt_classes'] dontcare = batch['dontcare'] origin_im = ['origin_im'] # sending images onto gpu after turning them into torch variable im = net_utils.np_to_variable(im, is_cuda=True, volatile=False).permute(0, 3, 1, 2) bbox_pred, iou_pred, prob_pred = net(im) bbox_loss_i, iou_loss_i, cls_loss_i = loss(gt_boxes, gt_classes, dontcare, size_index, bbox_pred, iou_pred, prob_pred) # accumulating mini-batch loss loss = bbox_loss_i + iou_loss_i + cls_loss_i bbox_loss += bbox_loss_i.data.cpu().numpy()[0] iou_loss += iou_loss_i.data.cpu().numpy()[0] cls_loss += cls_loss_i.data.cpu().numpy()[0] train_loss += loss.data.cpu().numpy()[0] # clearing grads before calculating new ones and then updating wts optimizer.zero_grad() loss.backward() optimizer.step() cnt += 1 step_cnt += 1 j += 1
dtype=tf.bool) #setting up the network model = Deeplab_v3(dataset.classes, batch_norm_decay=args.batch_norm_decay, is_training=is_training) logits = model.forward_pass(train_images) predicts = tf.argmax(logits, axis=-1, name='predicts') variables_to_restore = tf.trainable_variables(scope='resnet_v2_50') # finetune resnet_v2_50的参数(block1到block4) restorer = tf.train.Saver(variables_to_restore) cross_entropy = loss(logits, train_annotations, dataset.classes, ignore_label=dataset.ignore_label) # l2_norm l2正则化 l2_loss = args.weight_decay * tf.add_n( [tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()]) loss = cross_entropy + l2_loss tf.summary.scalar("loss", loss) lr = tf.Variable(initial_value=0., trainable=False, name='lr', dtype=tf.float32) train_op = train(loss, model, lr) #train benchmark out = tf.reshape(tf.argmax(logits, axis=3), shape=[-1]) #[B,H,W]
def __init__(self, FLAGS): """ create the network and training, validation reader functions Input: FLAGS: gpu_number: int; the gpu number positive_size: int; the positive pair size to be random chosen in each batch total_batch_number: int; batches to create in reader file tolerance_margin: float; how much negative distance should be greater than positive distance batch_size: int; training and validation batch size feature_size: int; size of feature output from the network data_dir: string; data set directory training_data_filename: string; training data filename, e.g. CG_train.txt validation_data_filename: string; validation data filename, e.g. CG_test.txt """ gpu_number = FLAGS.gpu_number self.positive_size = FLAGS.positive_size total_batch_number = FLAGS.total_batch_number tolerance_margin = FLAGS.tolerance_margin pick_same_room = True # when validating, should we pick same room to generate negative pair? batch_size = FLAGS.batch_size feature_size = FLAGS.feature_size data_dir = FLAGS.data_dir training_data_filename = FLAGS.training_data_filename validation_data_filename = FLAGS.validation_data_filename self.control_distances = [(0,0.5), (0.5, 1.0), (1.0, 1.5), (1.5,2.0), (2.0,2.5), (2.5,3.0),(3.0,4.0),(4.0,float('inf'))] self.sigma_R = 30 self.margin = 1.0 # imagenet mean #images_mean = tf.constant([104./127.5 - 1.0, 117./127.5 -1.0, 123./127.5 - 1.0]) # place365 mean images_mean = tf.constant([104./127.5 - 1.0, 113./127.5 -1.0, 117./127.5 - 1.0]) self.images_placeholder = tf.placeholder(tf.float32, shape=(batch_size,384,640,3)) images = self.images_placeholder/127.5 - 1.0 - images_mean # the positions self.position_placeholder = tf.placeholder(tf.float32, shape=(batch_size, 2)) # rotational gauss placeholder self.rot_gauss_placeholder = tf.placeholder(tf.float32, shape=(batch_size, batch_size, 20)) # positive pair index placeholder (chosen randomly using numpy) self.pospair_index_placeholder = tf.placeholder(tf.int32, shape=(self.positive_size, 2)) print('Building graph...') with tf.device('/gpu:'+str(gpu_number)): self.net = CaffeNet({'data': images}) # before dropout conv_features = self.net.get_output() B, H, W, C = conv_features.get_shape().as_list() F = feature_size self.B = B self.W = W self.C = C conv_features = tf.reshape(conv_features, [B, H*W, C]) t_list = tf.split(conv_features, num_or_size_splits=W, axis=1) indices = np.arange(W) self.out_branches = [] for _ in range(W): # form a branch br = [t_list[idx] for idx in indices] br = tf.concat(br, axis = 1) # append new branch into branch list self.out_branches.append(br) # update indices, this uses numpy indices = np.roll(indices, -1) self.rolling_features_list = self.out_branches self.lifted_loss = loss.loss(self.rolling_features_list, self.position_placeholder, self.rot_gauss_placeholder, self.pospair_index_placeholder, self.margin, self.sigma_R, tolerance_margin) reg_loss = 0 for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): #print var.name reg_loss += tf.nn.l2_loss(var) self.total_loss = 0.0008 * reg_loss + self.lifted_loss self.training_summary = tf.summary.scalar('training_loss', self.total_loss) self.validation_summary = tf.summary.scalar('validation_loss', self.total_loss) self.validation_accuracy_placeholder = tf.placeholder(tf.float32, shape=()) self.training_accuracy_placeholder = tf.placeholder(tf.float32, shape=()) validation_accuracy = tf.get_variable('validation_accuracy', [], trainable=False) training_accuracy = tf.get_variable('training_accuracy', [], trainable=False) self.validation_assign_op = tf.assign(validation_accuracy, self.validation_accuracy_placeholder) self.training_assign_op = tf.assign(training_accuracy, self.training_accuracy_placeholder) self.validation_accuracy_summary = tf.summary.scalar('validation_accuracy', validation_accuracy) self.training_accuracy_summary = tf.summary.scalar('training_accuracy', training_accuracy) # Adam optimizer global_step = tf.get_variable('global_step', [], initializer = tf.constant_initializer(0), trainable = False) opt = tf.train.MomentumOptimizer(0.00001, 0.9) grads_and_vars = opt.compute_gradients(self.total_loss) self.train_op = opt.apply_gradients(grads_and_vars, global_step = global_step) print('Finish building graph') print("start creating reader objects") #create training batch reader object parser = argparse.ArgumentParser() train_FLAGS, _ = parser.parse_known_args() train_FLAGS.data_dir = data_dir train_FLAGS.batch_size = batch_size train_FLAGS.total_batch_number = total_batch_number train_FLAGS.tolerance_margin = tolerance_margin train_FLAGS.pick_same_room = pick_same_room train_FLAGS.data_filename = training_data_filename self.training_reader = Reader(train_FLAGS) self.training_triplet_reader = Triplet_Reader(train_FLAGS, self.control_distances) # create validation batch reader object parser = argparse.ArgumentParser() validation_FLAGS, _ = parser.parse_known_args() validation_FLAGS.data_dir = data_dir validation_FLAGS.batch_size = batch_size validation_FLAGS.total_batch_number = total_batch_number validation_FLAGS.tolerance_margin = tolerance_margin validation_FLAGS.pick_same_room = pick_same_room validation_FLAGS.data_filename = validation_data_filename self.validation_reader = Reader(validation_FLAGS) self.validation_triplet_reader = Triplet_Reader(validation_FLAGS, self.control_distances) print("Finish creating reader objects")
def main_worker(gpu, ngpus_per_node, cfg): cfg['GPU'] = gpu if gpu != 0: def print_pass(*args): pass builtins.print = print_pass cfg['RANK'] = cfg['RANK'] * ngpus_per_node + gpu dist.init_process_group(backend=cfg['DIST_BACKEND'], init_method=cfg["DIST_URL"], world_size=cfg['WORLD_SIZE'], rank=cfg['RANK']) # Data loading code batch_size = int(cfg['BATCH_SIZE']) per_batch_size = int(batch_size / ngpus_per_node) #workers = int((cfg['NUM_WORKERS'] + ngpus_per_node - 1) / ngpus_per_node) # dataload threads workers = int(cfg['NUM_WORKERS']) DATA_ROOT = cfg[ 'DATA_ROOT'] # the parent root where your train/val/test data are stored VAL_DATA_ROOT = cfg['VAL_DATA_ROOT'] RECORD_DIR = cfg['RECORD_DIR'] RGB_MEAN = cfg['RGB_MEAN'] # for normalize inputs RGB_STD = cfg['RGB_STD'] DROP_LAST = cfg['DROP_LAST'] LR_SCHEDULER = cfg['LR_SCHEDULER'] LR_STEP_SIZE = cfg['LR_STEP_SIZE'] LR_DECAY_EPOCH = cfg['LR_DECAY_EPOCH'] LR_DECAT_GAMMA = cfg['LR_DECAT_GAMMA'] LR_END = cfg['LR_END'] WARMUP_EPOCH = cfg['WARMUP_EPOCH'] WARMUP_LR = cfg['WARMUP_LR'] NUM_EPOCH = cfg['NUM_EPOCH'] USE_APEX = cfg['USE_APEX'] EVAL_FREQ = cfg['EVAL_FREQ'] SYNC_BN = cfg['SYNC_BN'] print("=" * 60) print("Overall Configurations:") print(cfg) print("=" * 60) transform_list = [ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=RGB_MEAN, std=RGB_STD), ] if cfg['RANDOM_ERASING']: transform_list.append(RandomErasing()) if cfg['CUTOUT']: transform_list.append(Cutout()) train_transform = transforms.Compose(transform_list) if cfg['RANDAUGMENT']: train_transform.transforms.insert( 0, RandAugment(n=cfg['RANDAUGMENT_N'], m=cfg['RANDAUGMENT_M'])) dataset_train = FaceDataset(DATA_ROOT, RECORD_DIR, train_transform) train_sampler = torch.utils.data.distributed.DistributedSampler( dataset_train) train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=per_batch_size, shuffle=(train_sampler is None), num_workers=workers, pin_memory=True, sampler=train_sampler, drop_last=DROP_LAST) SAMPLE_NUMS = dataset_train.get_sample_num_of_each_class() NUM_CLASS = len(train_loader.dataset.classes) print("Number of Training Classes: {}".format(NUM_CLASS)) lfw, cfp_fp, agedb_30, vgg2_fp, lfw_issame, cfp_fp_issame, agedb_30_issame, vgg2_fp_issame = get_val_data( VAL_DATA_ROOT) #======= model & loss & optimizer =======# BACKBONE_DICT = { 'MobileFaceNet': MobileFaceNet, 'ResNet_50': ResNet_50, 'ResNet_101': ResNet_101, 'ResNet_152': ResNet_152, 'IR_50': IR_50, 'IR_100': IR_100, 'IR_101': IR_101, 'IR_152': IR_152, 'IR_185': IR_185, 'IR_200': IR_200, 'IR_SE_50': IR_SE_50, 'IR_SE_100': IR_SE_100, 'IR_SE_101': IR_SE_101, 'IR_SE_152': IR_SE_152, 'IR_SE_185': IR_SE_185, 'IR_SE_200': IR_SE_200, 'AttentionNet_IR_56': AttentionNet_IR_56, 'AttentionNet_IRSE_56': AttentionNet_IRSE_56, 'AttentionNet_IR_92': AttentionNet_IR_92, 'AttentionNet_IRSE_92': AttentionNet_IRSE_92, 'PolyNet': PolyNet, 'PolyFace': PolyFace, 'EfficientPolyFace': EfficientPolyFace, 'ResNeSt_50': resnest50, 'ResNeSt_101': resnest101, 'ResNeSt_100': resnest100, 'GhostNet': GhostNet } #'HRNet_W30': HRNet_W30, 'HRNet_W32': HRNet_W32, 'HRNet_W40': HRNet_W40, 'HRNet_W44': HRNet_W44, 'HRNet_W48': HRNet_W48, 'HRNet_W64': HRNet_W64 BACKBONE_NAME = cfg['BACKBONE_NAME'] INPUT_SIZE = cfg['INPUT_SIZE'] assert INPUT_SIZE == [112, 112] backbone = BACKBONE_DICT[BACKBONE_NAME](INPUT_SIZE) print("=" * 60) print(backbone) print("{} Backbone Generated".format(BACKBONE_NAME)) print("=" * 60) HEAD_DICT = { 'Softmax': Softmax, 'ArcFace': ArcFace, 'Combined': Combined, 'CosFace': CosFace, 'SphereFace': SphereFace, 'Am_softmax': Am_softmax, 'CurricularFace': CurricularFace, 'ArcNegFace': ArcNegFace, 'SVX': SVXSoftmax, 'AirFace': AirFace, 'QAMFace': QAMFace, 'CircleLoss': CircleLoss } HEAD_NAME = cfg['HEAD_NAME'] EMBEDDING_SIZE = cfg['EMBEDDING_SIZE'] # feature dimension head = HEAD_DICT[HEAD_NAME](in_features=EMBEDDING_SIZE, out_features=NUM_CLASS) print("Params: ", count_model_params(backbone)) print("Flops:", count_model_flops(backbone)) #backbone = backbone.eval() #print("Flops: ", flops_to_string(2*float(profile_macs(backbone.eval(), torch.randn(1, 3, 112, 112))))) #backbone = backbone.train() print("=" * 60) print(head) print("{} Head Generated".format(HEAD_NAME)) print("=" * 60) #--------------------optimizer----------------------------- if BACKBONE_NAME.find("IR") >= 0: backbone_paras_only_bn, backbone_paras_wo_bn = separate_irse_bn_paras( backbone ) # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability else: backbone_paras_only_bn, backbone_paras_wo_bn = separate_resnet_bn_paras( backbone ) # separate batch_norm parameters from others; do not do weight decay for batch_norm parameters to improve the generalizability LR = cfg['LR'] # initial LR WEIGHT_DECAY = cfg['WEIGHT_DECAY'] MOMENTUM = cfg['MOMENTUM'] optimizer = optim.SGD( [{ 'params': backbone_paras_wo_bn + list(head.parameters()), 'weight_decay': WEIGHT_DECAY }, { 'params': backbone_paras_only_bn }], lr=LR, momentum=MOMENTUM) if LR_SCHEDULER == 'step': scheduler = StepLR(optimizer, step_size=LR_STEP_SIZE, gamma=LR_DECAT_GAMMA) elif LR_SCHEDULER == 'multi_step': scheduler = MultiStepLR(optimizer, milestones=LR_DECAY_EPOCH, gamma=LR_DECAT_GAMMA) elif LR_SCHEDULER == 'cosine': scheduler = CosineWarmupLR(optimizer, batches=len(train_loader), epochs=NUM_EPOCH, base_lr=LR, target_lr=LR_END, warmup_epochs=WARMUP_EPOCH, warmup_lr=WARMUP_LR) print("=" * 60) print(optimizer) print("Optimizer Generated") print("=" * 60) # loss LOSS_NAME = cfg['LOSS_NAME'] LOSS_DICT = { 'Softmax': nn.CrossEntropyLoss(), 'LabelSmooth': LabelSmoothCrossEntropyLoss(classes=NUM_CLASS), 'Focal': FocalLoss(), 'HM': HardMining(), 'Softplus': nn.Softplus() } loss = LOSS_DICT[LOSS_NAME].cuda(gpu) print("=" * 60) print(loss) print("{} Loss Generated".format(loss)) print("=" * 60) torch.cuda.set_device(cfg['GPU']) backbone.cuda(cfg['GPU']) head.cuda(cfg['GPU']) #optionally resume from a checkpoint BACKBONE_RESUME_ROOT = cfg[ 'BACKBONE_RESUME_ROOT'] # the root to resume training from a saved checkpoint HEAD_RESUME_ROOT = cfg[ 'HEAD_RESUME_ROOT'] # the root to resume training from a saved checkpoint IS_RESUME = cfg['IS_RESUME'] if IS_RESUME: print("=" * 60) if os.path.isfile(BACKBONE_RESUME_ROOT): print("Loading Backbone Checkpoint '{}'".format( BACKBONE_RESUME_ROOT)) loc = 'cuda:{}'.format(cfg['GPU']) backbone.load_state_dict( torch.load(BACKBONE_RESUME_ROOT, map_location=loc)) if os.path.isfile(HEAD_RESUME_ROOT): print("Loading Head Checkpoint '{}'".format(HEAD_RESUME_ROOT)) checkpoint = torch.load(HEAD_RESUME_ROOT, map_location=loc) cfg['START_EPOCH'] = checkpoint['EPOCH'] head.load_state_dict(checkpoint['HEAD']) optimizer.load_state_dict(checkpoint['OPTIMIZER']) del (checkpoint) else: print( "No Checkpoint Found at '{}' and '{}'. Please Have a Check or Continue to Train from Scratch" .format(BACKBONE_RESUME_ROOT, HEAD_RESUME_ROOT)) print("=" * 60) ori_backbone = copy.deepcopy(backbone) if SYNC_BN: backbone = apex.parallel.convert_syncbn_model(backbone) if USE_APEX: [backbone, head], optimizer = amp.initialize([backbone, head], optimizer, opt_level='O2') backbone = DDP(backbone) head = DDP(head) else: backbone = torch.nn.parallel.DistributedDataParallel( backbone, device_ids=[cfg['GPU']]) head = torch.nn.parallel.DistributedDataParallel( head, device_ids=[cfg['GPU']]) # checkpoint and tensorboard dir MODEL_ROOT = cfg['MODEL_ROOT'] # the root to buffer your checkpoints LOG_ROOT = cfg['LOG_ROOT'] # the root to log your train/val status os.makedirs(MODEL_ROOT, exist_ok=True) os.makedirs(LOG_ROOT, exist_ok=True) writer = SummaryWriter( LOG_ROOT) # writer for buffering intermedium results # train for epoch in range(cfg['START_EPOCH'], cfg['NUM_EPOCH']): train_sampler.set_epoch(epoch) if LR_SCHEDULER != 'cosine': scheduler.step() #train for one epoch DISP_FREQ = 100 # 100 batch batch = 0 # batch index backbone.train() # set to training mode head.train() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() for inputs, labels in tqdm(iter(train_loader)): if LR_SCHEDULER == 'cosine': scheduler.step() # compute output start_time = time.time() inputs = inputs.cuda(cfg['GPU'], non_blocking=True) labels = labels.cuda(cfg['GPU'], non_blocking=True) if cfg['MIXUP']: inputs, labels_a, labels_b, lam = mixup_data( inputs, labels, cfg['GPU'], cfg['MIXUP_PROB'], cfg['MIXUP_ALPHA']) inputs, labels_a, labels_b = map(Variable, (inputs, labels_a, labels_b)) elif cfg['CUTMIX']: inputs, labels_a, labels_b, lam = cutmix_data( inputs, labels, cfg['GPU'], cfg['CUTMIX_PROB'], cfg['MIXUP_ALPHA']) inputs, labels_a, labels_b = map(Variable, (inputs, labels_a, labels_b)) features = backbone(inputs) outputs = head(features, labels) if cfg['MIXUP'] or cfg['CUTMIX']: lossx = mixup_criterion(loss, outputs, labels_a, labels_b, lam) else: lossx = loss(outputs, labels) if HEAD_NAME != 'CircleLoss' else loss( outputs).mean() end_time = time.time() duration = end_time - start_time if ((batch + 1) % DISP_FREQ == 0) and batch != 0: print("batch inference time", duration) # compute gradient and do SGD step optimizer.zero_grad() if USE_APEX: with amp.scale_loss(lossx, optimizer) as scaled_loss: scaled_loss.backward() else: lossx.backward() optimizer.step() # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, labels, topk=( 1, 5)) if HEAD_NAME != 'CircleLoss' else accuracy( features.data, labels, topk=(1, 5)) losses.update(lossx.data.item(), inputs.size(0)) top1.update(prec1.data.item(), inputs.size(0)) top5.update(prec5.data.item(), inputs.size(0)) # dispaly training loss & acc every DISP_FREQ if ((batch + 1) % DISP_FREQ == 0) or batch == 0: print("=" * 60) print('Epoch {}/{} Batch {}/{}\t' 'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, cfg['NUM_EPOCH'], batch + 1, len(train_loader), loss=losses, top1=top1, top5=top5)) print("=" * 60) # perform validation & save checkpoints per epoch # validation statistics per epoch (buffer for visualization) if (batch + 1) % EVAL_FREQ == 0: #lr = scheduler.get_last_lr() lr = optimizer.param_groups[0]['lr'] print("Current lr", lr) print("=" * 60) print( "Perform Evaluation on LFW, CFP_FP, AgeD and VGG2_FP, and Save Checkpoints..." ) accuracy_lfw, best_threshold_lfw, roc_curve_lfw = perform_val( EMBEDDING_SIZE, per_batch_size, backbone, lfw, lfw_issame) buffer_val(writer, "LFW", accuracy_lfw, best_threshold_lfw, roc_curve_lfw, epoch + 1) accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp = perform_val( EMBEDDING_SIZE, per_batch_size, backbone, cfp_fp, cfp_fp_issame) buffer_val(writer, "CFP_FP", accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp, epoch + 1) accuracy_agedb_30, best_threshold_agedb_30, roc_curve_agedb_30 = perform_val( EMBEDDING_SIZE, per_batch_size, backbone, agedb_30, agedb_30_issame) buffer_val(writer, "AgeDB", accuracy_agedb_30, best_threshold_agedb_30, roc_curve_agedb_30, epoch + 1) accuracy_vgg2_fp, best_threshold_vgg2_fp, roc_curve_vgg2_fp = perform_val( EMBEDDING_SIZE, per_batch_size, backbone, vgg2_fp, vgg2_fp_issame) buffer_val(writer, "VGGFace2_FP", accuracy_vgg2_fp, best_threshold_vgg2_fp, roc_curve_vgg2_fp, epoch + 1) print( "Epoch {}/{}, Evaluation: LFW Acc: {}, CFP_FP Acc: {}, AgeDB Acc: {}, VGG2_FP Acc: {}" .format(epoch + 1, NUM_EPOCH, accuracy_lfw, accuracy_cfp_fp, accuracy_agedb_30, accuracy_vgg2_fp)) print("=" * 60) print("=" * 60) print("Save Checkpoint...") if cfg['RANK'] % ngpus_per_node == 0: #torch.save(backbone.module.state_dict(), os.path.join(MODEL_ROOT, "Backbone_{}_Epoch_{}_Time_{}_checkpoint.pth".format(BACKBONE_NAME, epoch + 1, get_time()))) #save_dict = {'EPOCH': epoch+1, # 'HEAD': head.module.state_dict(), # 'OPTIMIZER': optimizer.state_dict()} #torch.save(save_dict, os.path.join(MODEL_ROOT, "Head_{}_Epoch_{}_Time_{}_checkpoint.pth".format(HEAD_NAME, epoch + 1, get_time()))) ori_backbone.load_state_dict(backbone.module.state_dict()) ori_backbone.eval() x = torch.randn(1, 3, 112, 112).cuda() traced_cell = torch.jit.trace(ori_backbone, (x)) #torch.save(ori_backbone, os.path.join(MODEL_ROOT, "model.pth")) torch.jit.save( traced_cell, os.path.join( MODEL_ROOT, "Epoch_{}_Time_{}_checkpoint.pth".format( epoch + 1, get_time()))) sys.stdout.flush() batch += 1 # batch index epoch_loss = losses.avg epoch_acc = top1.avg print("=" * 60) print('Epoch: {}/{}\t' 'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, cfg['NUM_EPOCH'], loss=losses, top1=top1, top5=top5)) sys.stdout.flush() print("=" * 60) if cfg['RANK'] % ngpus_per_node == 0: writer.add_scalar("Training_Loss", epoch_loss, epoch + 1) writer.add_scalar("Training_Accuracy", epoch_acc, epoch + 1) writer.add_scalar("Top1", top1.avg, epoch + 1) writer.add_scalar("Top5", top5.avg, epoch + 1)
def train(): '''Do training ''' # Create queue coordinator. coord = tf.train.Coordinator() h, w = INPUT_SIZE sess = tf.Session() # Load reader. # print ('Load reader.......................................') with tf.name_scope("create_inputs"): reader = LIP_reader.ImageReader(TRAIN_IMAGE_PATH, TRAIN_LABEL_PATH, TRAIN_ID_LIST, INPUT_SIZE, N_CLASSES, RANDOM_SCALE, RANDOM_MIRROR, SHUFFLE, coord) image_batch, label_batch = reader.dequeue(BATCH_SIZE) # # Set image_batch and label_batch as placeholder # # logits = tf.placeholder(tf.float32, shape = [BATCH_SIZE, w, h, N_CLASSES]) # image_batch = tf.placeholder(tf.float32, shape = [BATCH_SIZE, w, h, 3]) # label_batch = tf.placeholder(tf.float32, shape = [BATCH_SIZE, w, h, N_CLASSES]) # Build FCN network fcn = fcn8_vgg.FCN8VGG() # fcn.build(image_batch, train=True, num_classes=N_CLASSES, random_init_fc8=True, debug=True) # fcn.build(image_batch, train=True, num_classes=N_CLASSES, random_init_fc8=False, debug=True) with tf.name_scope("content_vgg"): fcn.build(image_batch, num_classes=N_CLASSES, random_init_fc8=False, debug=True) print('Finished building Network.') # Define loss and optimisation parameters. with tf.name_scope('loss'): logits = fcn.upscore32 labels = label_batch # print (logits) # print (labels) loss_ = loss.loss(logits, labels, num_classes=N_CLASSES) # total_loss = tf.get_collection('losses') loss_summary = tf.summary.scalar("loss", loss_) # Summary merged = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph) train_op = tf.train.AdamOptimizer(ADAM_OPT_RATE).minimize(loss_) # Saver for storing checkpoints of the model. saver = tf.train.Saver(max_to_keep=5) # Start queue threads. threads = tf.train.start_queue_runners(coord=coord, sess=sess) # Initialize logging.info("Start Initializing Variables.") init = tf.global_variables_initializer() sess.run(init) # print ('Getting para.................') # tvars = tf.trainable_variables() # root_dir = '/versa/alexissanchez/LIP-FCN-hair-mask/layer_para/' # print (tvars) # for i in range(1,6): # for j in range(1,3): # conv_name = 'conv{:d}_{:d}'.format(i,j) # if not os.path.exists(root_dir+conv_name): # os.makedirs(root_dir+conv_name) # filter_name = conv_name + '/filter:0' # filter_var = sess.run(filter_name) # print (filter_name) # print (filter_var.shape) # for x in range(3): # for y in range(3): # np.savetxt('./layer_para/' + filter_name + '_{:d}_{:d}.txt'.format(x,y), filter_var[x,y,:,:] , fmt = '%.3f') # bias_name = conv_name + '/biases:0' # bias_var = sess.run(bias_name) # print (bias_name) # print (bias_var.shape) # np.savetxt('./layer_para/' + bias_name + '.txt', bias_var , fmt = '%.3f') # var_name = 'score_fr/biases:0' # var = sess.run(var_name) # print (var_name + '.................') # print (var.shape) # np.savetxt('./layer_para/'+var_name+'.txt', var , fmt = '%.3f') # Checking demo save path demo_dir = os.getcwd() + '/train_demo/train_2/' shutil.rmtree(demo_dir) subdir_list = ['image','label','predict'] for subdir in subdir_list: if not os.path.exists(demo_dir+subdir): os.makedirs(demo_dir+subdir) # Iterate over training steps. print ('Start training......') for step in range(NUM_STEPS): start_time = time.time() loss_value = 0 # Do training process tensors = [merged, loss_, train_op, image_batch, label_batch, fcn.pred_up] merged_summary, loss_value, _, origin_image, origin_label, pred_up= sess.run(tensors) # merged_summary, loss_value, _, pred_up, bgr = sess.run([merged, loss_, train_op, fcn.pred_up, fcn.bgr]) summary_writer.add_summary(merged_summary, step) if step % PRINT_PRED_EVERY == 0: duration = time.time() - start_time print('step {:d} \t loss = {:.3f}, ({:.3f} sec/step)'.format(step, loss_value, duration)) if step % PREDICT_EVERY == 0: print (' Doing Demo......') origin_image = np.array(origin_image, np.int32) origin_label = np.array(origin_label, np.int32) # print (origin_image.shape) # print (origin_label.shape) for im in range(BATCH_SIZE): # print (origin_image[im].shape) image_color = convert_RGB_TO_BGR(origin_image[im]) label_color = color_label(origin_label[im]) pred_result = color_image(pred_up[im]) cv2.imwrite('{:s}/image/image_{:d}_{:d}.png'.format(demo_dir, step,im), image_color) cv2.imwrite('{:s}/label/label_{:d}_{:d}.png'.format(demo_dir, step,im), label_color) cv2.imwrite('{:s}/predict/predict_{:d}_{:d}.png'.format(demo_dir, step,im), pred_result) duration = time.time() - start_time print (' Done. {:.3f} sec'.format(duration)) if step % SAVE_PRED_EVERY == 0: print (' Saving Model......') save_path = SNAPSHOT_DIR + '/model.ckpt' saver.save(sess,save_path, global_step = step) duration = time.time() - start_time print (' Done. {:.3f} sec'.format(duration)) coord.request_stop() coord.join(threads)
with tf.device('/cpu:0'): config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: input_placeholder = tf.placeholder(tf.float32, [None, height, width, num_classes]) output_placeholder = tf.placeholder(tf.float32, [None, height, width, num_classes]) vgg_fcn = fcn16_vgg.FCN16VGG('./vgg16.npy') with tf.name_scope('content_vgg'): vgg_fcn.build(input_placeholder, train=True, num_classes=num_classes, debug=True) with tf.name_scope('loss'): loss = loss.loss(vgg_fcn.upscore32, output_placeholder, num_classes) optimizer = tf.train.AdamOptimizer(0.0001).minimize(loss) print('Finished building Network.') # Initializing the variables. init = tf.global_variables_initializer() # Run initialized variables. sess.run(init) print('Running the Network') print('Training the Network') for step in range(num_steps): offset = (step * batch_size) % size batch_input = input_set[offset:(offset + batch_size), :]
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") FM_pl = tf.placeholder(tf.float32, []) total_acc_pl = tf.placeholder(tf.float32, []) acc_pl = tf.placeholder(tf.float32, []) iu_pl = tf.placeholder(tf.float32, []) fwavacc_pl = tf.placeholder(tf.float32, []) # is_traing = tf.placeholder('bool') vgg_fcn = fcn32_vgg.FCN32VGG() vgg_fcn.build(image, num_classes=num_classes, keep_probability=keep_probability, random_init_fc8=True) logits = vgg_fcn.upscore pred_annotation = vgg_fcn.pred_up # loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, # labels=tf.squeeze(annotation, squeeze_dims=[3]), # name="entropy"))) # loss_summary = tf.summary.scalar("entropy", loss) # trainable_var = tf.trainable_variables() # S_vars = [svar for svar in tf.trainable_variables() if 'weight' in svar.name] # l2 = tf.add_n([tf.nn.l2_loss(var) for var in S_vars]) # # loss = loss + l2 * FLAGS.weight_decay # # train_op = tf.train.MomentumOptimizer(FLAGS.learning_rate, 0.9).minimize(loss + l2 * FLAGS.weight_decay) # train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss + l2 * FLAGS.weight_decay) # # train_op = train(loss, trainable_var) """ median-frequency re-weighting """ # class_weights = np.array([ # 0.5501, # 5.4915 # ]) # loss = tf.reduce_mean((tf.nn.weighted_cross_entropy_with_logits(logits=logits, # targets=tf.one_hot(tf.squeeze(annotation, squeeze_dims=[3]), depth=num_classes), # pos_weight=class_weights, # name="entropy"))) loss = LOSS.loss( logits, tf.one_hot(tf.squeeze(annotation, squeeze_dims=[3]), depth=num_classes)) regularization_loss = tf.add_n( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) t_loss = loss + regularization_loss loss_summary = tf.summary.scalar("entropy", loss) FM_summary = tf.summary.scalar('FM', FM_pl) acc_total_summary = tf.summary.scalar("total_acc", total_acc_pl) acc_summary = tf.summary.scalar("acc", acc_pl) iu_summary = tf.summary.scalar("iu", iu_pl) fwavacc_summary = tf.summary.scalar("fwavacc", fwavacc_pl) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(loss) #train_op = tf.train.MomentumOptimizer(FLAGS.learning_rate, 0.9).minimize(t_loss) # train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(t_loss) summary_op = tf.summary.merge_all() train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': False, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session(config=config) saver = tf.train.Saver(max_to_keep=3) # create two summary writers to show training loss and validation loss in the same graph # need to create two folders 'train' and 'validation' inside FLAGS.logs_dir praph_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/graph', sess.graph) train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train') validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation') sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == "train": for itr in range(1, MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.5 } sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) train_writer.add_summary(summary_str, itr) if itr % 210 == 0: valid_iamges, valid_annotations = validation_dataset_reader.get_records( ) val_count = 0 total_loss = 0 hist = np.zeros((num_classes, num_classes)) fm = 0 for i in range(1, 21): val_images = valid_iamges[val_count:val_count + val_batch_size] val_annotations = valid_annotations[val_count:val_count + val_batch_size] val_loss, val_pred_dense = sess.run( [loss, logits], feed_dict={ image: val_images, annotation: val_annotations, keep_probability: 1.0 }) total_loss = total_loss + val_loss val_count = val_count + val_batch_size hist += get_hist(val_pred_dense, val_annotations) fm += get_FM(val_pred_dense, val_annotations) valid_loss = total_loss / 20 FM = fm / (20 * val_batch_size) acc_total = np.diag(hist).sum() / hist.sum() acc = np.diag(hist) / hist.sum(1) iu = np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist)) freq = hist.sum(1) / hist.sum() # summary_st = sess.run(summary_op,feed_dict=feed_dict) summary_sva = sess.run(loss_summary, feed_dict={loss: valid_loss}) summary_FM = sess.run(FM_summary, feed_dict={FM_pl: FM}) summary_acc_total = sess.run( acc_total_summary, feed_dict={total_acc_pl: acc_total}) summary_acc = sess.run(acc_summary, feed_dict={acc_pl: np.nanmean(acc)}) summary_iu = sess.run(iu_summary, feed_dict={iu_pl: np.nanmean(iu)}) summary_fwavacc = sess.run( fwavacc_summary, feed_dict={ fwavacc_pl: (freq[freq > 0] * iu[freq > 0]).sum() }) print("Step: %d, Valid_loss:%g" % (itr, valid_loss)) print(" >>> Step: %d, f1_score:%g" % (itr, FM)) # overall accuracy print(" >>> Step: %d, overall accuracy:%g" % (itr, acc_total)) print(" >>> Step: %d, mean accuracy:%g" % (itr, np.nanmean(acc))) print(" >>> Step: %d, mean IU:%g" % (itr, np.nanmean(iu))) print(" >>> Step: %d, fwavacc:%g" % (itr, (freq[freq > 0] * iu[freq > 0]).sum())) # validation_writer.add_summary(summary_st, step) validation_writer.add_summary(summary_sva, itr) validation_writer.add_summary(summary_FM, itr) validation_writer.add_summary(summary_acc_total, itr) validation_writer.add_summary(summary_acc, itr) validation_writer.add_summary(summary_iu, itr) validation_writer.add_summary(summary_fwavacc, itr) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) va_images, va_annotations = validation_dataset_reader.get_random_batch( 20) pred = sess.run(pred_annotation, feed_dict={ image: va_images, annotation: va_annotations, keep_probability: 1.0 }) va_annotations = np.squeeze(va_annotations, axis=3) # pred = np.squeeze(pred, axis=3) pred = pred * 255 va_annotations = va_annotations * 255 for it in range(20): utils.save_image(va_images[it].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5 + it)) utils.save_image(va_annotations[it].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5 + it)) utils.save_image(pred[it].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5 + it)) elif FLAGS.mode == "visualize": it = 0 valid_iamge, val_annotation = validation_dataset_reader.get_records() val_annotation = np.squeeze(val_annotation, axis=3) val_annotation = val_annotation * 255 for filename in valid_records: val_image = np.array(misc.imread(filename['image'])) val_image = np.reshape(val_image, (1, 256, 256, 3)) pred = sess.run(pred_annotation, feed_dict={ image: val_image, keep_probability: 1.0 }) pred = pred * 255 # pred = sess.run(pred_annotation, feed_dict={image: val_image, # keep_probability:1.0}) utils.save_image(pred[0].astype(np.uint8), FLAGS.logs_dir + 'pred01', name=os.path.splitext( filename['image'].split("/")[-1])[0]) utils.save_image(val_annotation[it].astype(np.uint8), FLAGS.logs_dir + 'gt01', name=os.path.splitext( filename['annotation'].split("/")[-1])[0]) it = it + 1
def train(): """Train ring_net for a number of steps.""" with tf.Graph().as_default(): # make inputs x = tf.placeholder(tf.float32, [FLAGS.batch_size, 32, 32, 1]) # possible dropout inside (default is 1.0) keep_prob = tf.placeholder("float") # make model if FLAGS.model=="fully_connected": mean, stddev, y_sampled, x_prime = arc.fully_connected_model(x, keep_prob) elif FLAGS.model=="conv": mean, stddev, y_sampled, x_prime = arc.conv_model(x, keep_prob) elif FLAGS.model=="all_conv": mean, stddev, y_sampled, x_prime = arc.all_conv_model(x, keep_prob) else: print("model requested not found, now some error!") # calc loss stuff loss_vae, loss_reconstruction, loss, train_op = ls.loss(mean, stddev, x, x_prime) # List of all Variables variables = tf.all_variables() # Build a saver saver = tf.train.Saver(tf.all_variables()) # Summary op summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session() # init if this is the very time training print("init network from scratch") sess.run(init) # Summary op graph_def = sess.graph.as_graph_def(add_shapes=True) summary_writer = tf.train.SummaryWriter(train_dir_save, graph_def=graph_def) for step in xrange(FLAGS.max_step): dat = b.bounce_vec(32, FLAGS.num_balls, FLAGS.batch_size) t = time.time() _, loss_r = sess.run([train_op, loss],feed_dict={x:dat, keep_prob:FLAGS.keep_prob}) elapsed = time.time() - t if step%2000 == 0: _ , loss_vae_r, loss_reconstruction_r, y_sampled_r, x_prime_r, stddev_r = sess.run([train_op, loss_vae, loss_reconstruction, y_sampled, x_prime, stddev],feed_dict={x:dat, keep_prob:FLAGS.keep_prob}) summary_str = sess.run(summary_op, feed_dict={x:dat, keep_prob:FLAGS.keep_prob}) summary_writer.add_summary(summary_str, step) print("loss vae value at " + str(loss_vae_r)) print("loss reconstruction value at " + str(loss_reconstruction_r)) print("time per batch is " + str(elapsed)) cv2.imwrite("real_balls.jpg", np.uint8(dat[0, :, :, :]*255)) cv2.imwrite("generated_balls.jpg", np.uint8(x_prime_r[0, :, :, :]*255)) stddev_r = np.sort(np.sum(stddev_r, axis=0)) plt.plot(stddev_r/FLAGS.batch_size, label="step " + str(step)) plt.legend() plt.savefig('stddev_num_balls_' + str(FLAGS.num_balls) + '_beta_' + str(FLAGS.beta) + '.png') assert not np.isnan(loss_r), 'Model diverged with loss = NaN' if step%1000 == 0: checkpoint_path = os.path.join(train_dir_save, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print("saved to " + train_dir_save) print("step " + str(step))
# Data placeholders inputBatchImages = tf.placeholder(dtype=tf.float32, shape=[None, 512, 640, 3], name="inputBatchImages") inputBatchLabels = tf.placeholder(dtype=tf.float32, shape=[None, 512, 640, options.numClasses], name="inputBatchLabels") inputKeepProbability = tf.placeholder(dtype=tf.float32, name="inputKeepProbability") vgg_fcn = fcn8_vgg_imp.FCN2VGG(batchSize = options.batchSize, statsFile=options.statsFileName, enableTensorboardVisualization=options.tensorboardVisualization) with tf.name_scope('Model'): # Construct model vgg_fcn.build(inputBatchImages, inputKeepProbability, num_classes=options.numClasses, random_init_fc8=True, debug=(options.verbose > 0)) with tf.name_scope('Loss'): # Define loss weights = tf.cast(inputBatchLabels != options.ignoreLabel, dtype=tf.float32) loss = loss.loss(vgg_fcn.softmax, inputBatchLabels, options.numClasses, weights) with tf.name_scope('Optimizer'): # Define Optimizer #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss) optimizer = tf.train.AdamOptimizer(learning_rate=options.learningRate) # Op to calculate every variable gradient gradients = tf.gradients(loss, tf.trainable_variables()) gradients = list(zip(gradients, tf.trainable_variables())) # Op to update all variables according to their gradient applyGradients = optimizer.apply_gradients(grads_and_vars=gradients) # Initializing the variables # init = tf.initialize_all_variables() init = tf.global_variables_initializer() # TensorFlow v0.11
tf.flags.DEFINE_integer('batch_size', 1, '批大小') tf.flags.DEFINE_integer('frame_count', 60, "帧数") tf.flags.DEFINE_integer('frequency', 16000, "采样率") tf.flags.DEFINE_integer('kwidth', 18, '窗格大小') tf.flags.DEFINE_integer('num_train', 1000, "训练次数") tf.flags.DEFINE_float('learning_rate', 3e-4, "学习速率") tf.flags.DEFINE_float('beta1', 0.5, "Adam动量") sess = tf.InteractiveSession() coord = tf.train.Coordinator() reader = read.Reader(path=FLAGS.test_file, batch_size=FLAGS.batch_size, window_size=FLAGS.frequency // FLAGS.frame_count, kwidth=FLAGS.kwidth) tf.train.start_queue_runners(sess=sess, coord=coord) logits = inference.Inference(reader.wav_raw, FLAGS.kwidth, 2, isTrain=False).build_model() loss_val = loss.loss(logits=logits, labels=reader.label) saver = tf.train.Saver() tf.global_variables_initializer().run() saver.restore(sess, os.path.join(FLAGS.checkpointDir)) tf.train.start_queue_runners(sess=sess, coord=coord) labels = tf.reshape(reader.label, [-1]) logits_predict, ground_truth = sess.run([logits, labels]) plt.figure(1, [20, 12]) plt.subplot(411) plt.title('predict') plt.plot(logits_predict)
images_iter = dataset.make_initializable_iterator() images = images_iter.get_next() # Build the graph wrecked_images = tf.placeholder(tf.float32, shape=(None, 218, 178, 3)) real_images = tf.placeholder(tf.float32, shape=(None, 218, 178, 3)) l1_ratio = 0.3 # Build the graph with tf.variable_scope('') as scope: fake_images = build_generator(wrecked_images) fake_logits = build_discriminator(fake_images) scope.reuse_variables() real_logits = build_discriminator(real_images) G_loss, D_loss = loss(fake_logits, real_logits, fake_images, real_images, batch_size, build_discriminator) G_loss = (1 - l1_ratio) * G_loss + l1_ratio * tf.losses.mean_squared_error( fake_images, real_images) D_solver = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1) G_solver = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1) D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') D_train_step = D_solver.minimize(D_loss, var_list=D_vars) G_train_step = G_solver.minimize(G_loss, var_list=G_vars) # train pointer = 0
def train(self): thresd = 100 if self.args.load_loss: print('load loss') self.save_d_loss = util.load_loss(self.args.result_dir, 'discriminator') self.save_g_loss = util.load_loss(self.args.result_dir, 'generator') self.model.d.train() self.model.g.train() for epoch in range(self.args.epochs): start_time = time.time() d_cnt = 0 for n_batch, (lr, hr, _) in enumerate(self.data.loader_train): if not self.args.cpu: lr = lr.cuda() hr = hr.cuda() lr = Variable(lr, requires_grad=True) hr = Variable(hr, requires_grad=True) loss_ = loss(self.args, lr, hr, self.model) if d_cnt < self.args.d_count: self.update_d(loss_) if self.args.save_loss: a = [ self.d_r_loss.cpu().view(-1), self.d_f_loss.cpu().view(-1), self.d_cost.cpu().view(-1), self.wasserstein.cpu().view(-1) ] a = np.array([[l.detach().numpy()[0] for l in a]]) self.save_d_loss = util.add_loss(self.save_d_loss, a) print( 'batch:{}/{}--d_real_loss = {:0.6f}, d_fake_loss = {:0.6f},d_cost = {:0.6f}, wasserstein = {:0.6f}\n' \ .format(n_batch, self.args.n_train // self.args.batch_size + 1, self.d_r_loss, self.d_f_loss, self.d_cost, self.wasserstein) ) else: d_cnt = 0 self.update_g(loss_) print('g_loss={:0.6f},p_loss={:0.6f}'.format( self.g_cost, self.p_loss)) if self.args.save_loss: a = self.g_cost.cpu().view(-1).detach().numpy() self.save_g_loss = util.add_loss(self.save_g_loss, a) self.save_g_loss = torch.cat([self.save_g_loss, a], 0) del (a) d_cnt += 1 util.save_mdoel(self.args.result_dir, self.model.d, 'discriminator') util.save_mdoel(self.args.result_dir, self.model.g, 'generator') if self.args.save_loss: util.save_loss(self.args.result_dir, self.save_d_loss, 'discriminator') util.save_loss(self.args.result_dir, self.save_g_loss, 'generator') if self.wasserstein.abs() < thresd: if epoch > 5: thresd = self.wasserstein.abs() util.save_mdoel(self.args.result_dir, self.model.d, 'best_discriminator') util.save_mdoel(self.args.result_dir, self.model.g, 'best_generator') print('best epoch {}'.format(epoch)) print('epoch:{:0>4d} takes {:.2f} seconds--d_cost:{:.4f},wasserstein:{:.4f}' \ .format(epoch, start_time - time.time(), self.d_cost, self.wasserstein))