def get_restorer(): checkpoint_path = tf.train.latest_checkpoint(os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)) if checkpoint_path != None: if RESTORE_FROM_RPN: print('___restore from rpn___') model_variables = slim.get_model_variables() restore_variables = [var for var in model_variables if not var.name.startswith('Fast_Rcnn')] + [slim.get_or_create_global_step()] for var in restore_variables: print(var.name) restorer = tf.train.Saver(restore_variables) else: restorer = tf.train.Saver() print("model restore from :", checkpoint_path) else: checkpoint_path = cfgs.PRETRAINED_CKPT print("model restore from pretrained mode, path is :", checkpoint_path) model_variables = slim.get_model_variables() restore_variables = [var for var in model_variables if (var.name.startswith(cfgs.NET_NAME) and not var.name.startswith('{}/logits'.format(cfgs.NET_NAME)))] for var in restore_variables: print(var.name) restorer = tf.train.Saver(restore_variables) return restorer, checkpoint_path
def _get_init_fn(): """Returns a function run by the chief worker to warm-start the training. Note that the init_fn is only run when initializing the model during the very first global step. Returns: An init function run by the supervisor. """ if FLAGS.checkpoint_path is None: return None exclusions = [] if FLAGS.checkpoint_exclude_scopes: exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')] # TODO(sguada) variables.filter_variables() variables_to_restore = [] for var in slim.get_model_variables(): #print var.op.name excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) tf.logging.info('Fine-tuning from %s' % FLAGS.checkpoint_path) return slim.assign_from_checkpoint_fn(FLAGS.checkpoint_path,variables_to_restore,ignore_missing_vars=False)
def test_clean_accuracy(self): """Check model is accurate on unperturbed images.""" input_dir = FLAGS.input_image_dir metadata_file_path = FLAGS.metadata_file_path num_images = 16 batch_shape = (num_images, 299, 299, 3) images, labels = load_images( input_dir, metadata_file_path, batch_shape) num_classes = 1001 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) y_label = tf.placeholder(tf.int32, shape=(num_images,)) model = InceptionModel(num_classes) logits = model.get_logits(x_input) acc = _top_1_accuracy(logits, y_label) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) with tf.train.MonitoredSession( session_creator=session_creator) as sess: acc_val = sess.run(acc, feed_dict={ x_input: images, y_label: labels}) tf.logging.info('Accuracy: %s', acc_val) assert acc_val > 0.8
def load_det_weights(self, path): variables = slim.get_model_variables() weights = np.load(path) for var in variables: if var.name in weights.item(): print var.name self.sess.run(var.assign(weights.item()[var.name]))
def save_weights(self, path): variables = slim.get_model_variables() weights = {} for var in variables: weights[var.name] = self.sess.run(var) np.save(path+ '/weights', weights)
def _get_init_fn(): if FLAGS.checkpoint_path is None: return None exclusions = [] if FLAGS.checkpoint_exclude_scopes: exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')] # TODO(sguada) variables.filter_variables() variables_to_restore = [] for var in slim.get_model_variables(): #print var.op.name excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) tf.logging.info('Fine-tuning from %s' % FLAGS.checkpoint_path) return slim.assign_from_checkpoint_fn(FLAGS.checkpoint_path,variables_to_restore,ignore_missing_vars=False)
def load_weights(self, path): variables = slim.get_model_variables() print 'Loading weights...' for var in tqdm(variables): if ('conv' in var.name) and ('weights' in var.name): self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').transpose((2,3,1,0)))) elif ('fc' in var.name) and ('weights' in var.name): self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/W.npy').T)) elif 'biases' in var.name: self.sess.run(var.assign(np.load(path+var.name.split('/')[0]+'/b.npy'))) print 'Weights loaded!!'
def _get_init_fn(): """Returns a function run by the chief worker to warm-start the training. Note that the init_fn is only run when initializing the model during the very first global step. Returns: An init function run by the supervisor. """ if FLAGS.checkpoint_path is None: return None # Warn the user if a checkpoint exists in the train_dir. Then we'll be # ignoring the checkpoint anyway. if tf.train.latest_checkpoint(FLAGS.train_dir): tf.logging.info( 'Ignoring --checkpoint_path because a checkpoint already exists in %s' % FLAGS.train_dir) return None exclusions = [] if FLAGS.checkpoint_exclude_scopes: exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')] # TODO(sguada) variables.filter_variables() variables_to_restore = [] for var in slim.get_model_variables(): excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Fine-tuning from %s' % checkpoint_path) return slim.assign_from_checkpoint_fn( checkpoint_path, variables_to_restore, ignore_missing_vars=FLAGS.ignore_missing_vars)
def test_attack_success(self): """Check SPSA creates misclassified images.""" epsilon = 4. / 255 input_dir = FLAGS.input_image_dir metadata_file_path = FLAGS.metadata_file_path num_images = 8 batch_shape = (num_images, 299, 299, 3) images, labels = load_images( input_dir, metadata_file_path, batch_shape) num_classes = 1001 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=(1,) + batch_shape[1:]) y_label = tf.placeholder(tf.int32, shape=(1,)) model = InceptionModel(num_classes) attack = SPSA(model) x_adv = attack.generate( x_input, y=y_label, epsilon=epsilon, num_steps=30, early_stop_loss_threshold=-1., batch_size=32, spsa_iters=16, is_debug=True) logits = model.get_logits(x_adv) acc = _top_1_accuracy(logits, y_label) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) num_correct = 0. with tf.train.MonitoredSession( session_creator=session_creator) as sess: for i in xrange(num_images): acc_val = sess.run(acc, feed_dict={ x_input: np.expand_dims(images[i], axis=0), y_label: np.expand_dims(labels[i], axis=0), }) tf.logging.info('Accuracy: %s', acc_val) num_correct += acc_val assert (num_correct / num_images) < 0.1
def get_init_fn(): """Returns a function run by the chief worker to warm-start the training.""" checkpoint_exclude_scopes=["InceptionV1/Logits"] exclusions = [scope.strip() for scope in checkpoint_exclude_scopes] checkpoints_dir = "D:\\zero\\work\\models-master\\model\\" variables_to_restore = [] for var in slim.get_model_variables(): excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) return slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v1.ckpt'), variables_to_restore)
def test_attack_bounds(self): """Check SPSA respects perturbation limits.""" epsilon = 4. / 255 input_dir = FLAGS.input_image_dir metadata_file_path = FLAGS.metadata_file_path num_images = 8 batch_shape = (num_images, 299, 299, 3) images, labels = load_images( input_dir, metadata_file_path, batch_shape) num_classes = 1001 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=(1,) + batch_shape[1:]) y_label = tf.placeholder(tf.int32, shape=(1,)) model = InceptionModel(num_classes) attack = SPSA(model) x_adv = attack.generate( x_input, y=y_label, epsilon=epsilon, num_steps=10, early_stop_loss_threshold=-1., batch_size=32, spsa_iters=1, is_debug=True) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path, master=FLAGS.master) with tf.train.MonitoredSession( session_creator=session_creator) as sess: for i in xrange(num_images): adv_image = sess.run(x_adv, feed_dict={ x_input: np.expand_dims(images[i], axis=0), y_label: np.expand_dims(labels[i], axis=0), }) diff = adv_image - images[i] assert np.max(np.abs(diff)) < epsilon + 1e-4 assert np.max(adv_image < 1. + 1e-4) assert np.min(adv_image > -1e-4)
def build_prediction_graph(self, serialized_examples): video_id, model_input_raw, labels_batch, num_frames = ( self.reader.prepare_serialized_examples(serialized_examples)) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) with tf.variable_scope("tower"): result = self.model.create_model( model_input, num_frames=num_frames, vocab_size=self.reader.num_classes, labels=labels_batch, is_training=False) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] top_predictions, top_indices = tf.nn.top_k(predictions, _TOP_PREDICTIONS_IN_OUTPUT) return video_id, top_indices, top_predictions
def main(_): print 'reading npy...' data = np.load('../1st.npy') test_order = np.load('../test.npy') #jpg_list = np.load('128bin.npy') jpg_list = np.load('../nlcd+vae+image64/input_images_64.npy') print 'reading finished' sess = tf.Session() print 'building network...' hg = fc.fc(is_training=True) global_step = tf.Variable(0,name='global_step',trainable=False) merged_summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.summary_dir,sess.graph) saver = tf.train.Saver(max_to_keep=None) saver.restore(sess,FLAGS.checkpoint_path) print 'restoring from '+FLAGS.checkpoint_path for var in slim.get_model_variables(): print var.op.name if var.op.name.startswith('decoder/logits/weights'): a=sess.run(var) a=np.transpose(a) print np.shape(a) np.save('embed_17.npy',a)
def main(parsed): np.random.seed(cfg.RNG_SEED) # load database imdb, roidb, imdb_val, roidb_val, data_layer, data_layer_val = load_database( args) global nr_classes nr_classes = len(imdb._classes) args.nr_classes.append(nr_classes) # replaces keywords with function handles in training assignements # save_objectness_function_handles(args, imdb) # tensorflow session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # input and output tensors if "DeepScores" in args.dataset: input = tf.placeholder(tf.float32, shape=[None, None, None, 1]) resnet_dir = cfg.PRETRAINED_DIR + "/DeepScores/" refinenet_dir = cfg.PRETRAINED_DIR + "/DeepScores_semseg/" num_classes = len(imdb._classes) print("Initializing Model:" + args.model) # model has all possible output heads (even if unused) to ensure saving and loading goes smoothly network, init_fn = build_refinenet(input, preset_model=args.model, num_classes=len(imdb._classes), pretrained_dir=resnet_dir, substract_mean=False) output = tf.placeholder(tf.float32, shape=[None, None, None, num_classes]) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=network, labels=output)) opt = tf.train.RMSPropOptimizer( learning_rate=0.0001, decay=0.995).minimize( loss, var_list=[var for var in tf.trainable_variables()]) # init tensorflow session saver = tf.train.Saver(max_to_keep=1000) sess.run(tf.global_variables_initializer()) # load model weights checkpoint_dir = get_checkpoint_dir(args) checkpoint_name = "backbone" if args.continue_training == "True": print("Loading checkpoint") saver.restore(sess, checkpoint_dir + "/" + checkpoint_name) else: if args.pretrain_lvl == "semseg": #load all variables except the ones in scope "deep_watershed" pretrained_vars = [] for var in slim.get_model_variables(): if not ("deep_watershed" in var.name or "gt_feed_head" in var.name): pretrained_vars.append(var) print("Loading network pretrained on semantic segmentation") loading_checkpoint_name = refinenet_dir + args.model + ".ckpt" init_fn = slim.assign_from_checkpoint_fn(loading_checkpoint_name, pretrained_vars) init_fn(sess) elif args.pretrain_lvl == "class": print("Loading pretrained weights for level: " + args.pretrain_lvl) init_fn(sess) else: print("Not loading a pretrained network") # set up tensorboard writer = tf.summary.FileWriter(checkpoint_dir, sess.graph) assign = { 'ds_factors': [1], 'downsample_marker': True, 'overlap_solution': 'no', 'stamp_func': 'stamp_class', 'layer_loss_aggregate': 'avg', 'mask_zeros': True, 'stamp_args': { 'marker_dim': None, 'size_percentage': 1, "shape": "square", "class_resolution": "class", "loss": "softmax" } } assign["stamp_func"] = [assign["stamp_func"], stamp_class] for itr in range(1, (5000)): batch_not_loaded = True while batch_not_loaded: blob = data_layer.forward(args, assign, None) batch_not_loaded = len(blob["gt_boxes"].shape) != 3 train_images = blob["data"] train_annotations = blob["gt_map0"] im_data = np.concatenate([ train_images[0], train_images[0], np.expand_dims(train_annotations[0, :, :, 0], -1) * 255 ], -1) im = Image.fromarray(im_data.astype(np.uint8)) im.save(sys.argv[0][:-31] + "overlayed_gt.png") dat_argmax = np.argmax(train_annotations[0], -1) dat_argmax[dat_argmax == 0] = 255 im_argmax = Image.fromarray(dat_argmax.astype(np.uint8)) im_argmax.save(sys.argv[0][:-31] + "argmax_gt.png") _, current = sess.run([opt, loss], feed_dict={ input: train_images, output: train_annotations }) if itr == 1: print("initial loss" + str(current)) if itr % 21 == 0: print("loss of current batch:" + str(current)) if itr % 2001 == 0: print("saving weights") # execute tasks print("done :)")
checkpoints_dir = '/Users/zhangxin/data_public/goolenet/v4' # inception_v4.ckpt with tf.Graph().as_default(): url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg' image_string = urllib.urlopen(url).read() image = tf.image.decode_jpeg(image_string, channels=3) processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(inception.inception_v4_arg_scope()): logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v4.ckpt'), slim.get_model_variables('InceptionV4')) with tf.Session() as sess: init_fn(sess) np_image, probabilities = sess.run([image, probabilities]) probabilities = probabilities[0, 0:] sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])] # plt.figure() # plt.imshow(np_image.astype(np.uint8)) # plt.axis('off') # plt.show() names = imagenet.create_readable_names_for_imagenet_labels() for i in range(5): index = sorted_inds[i]
def main(margin, batch_size, output_size, learning_rate, whichGPU, is_finetuning, pretrained_net): def handler(signum, frame): print 'Saving checkpoint before closing' pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'Checkpoint-', pretrained_net + '-' + str(step), ' saved!' sys.exit(0) signal.signal(signal.SIGINT, handler) ckpt_dir = './output/expedia/ckpts' log_dir = './output/expedia/logs' train_filename = './input/expedia_train_by_hotel.txt' mean_file = './input/meanIm.npy' img_size = [256, 256] crop_size = [224, 224] num_iters = 200000 summary_iters = 100 save_iters = 5000 featLayer = 'resnet_v2_50/logits' is_training = True margin = float(margin) batch_size = int(batch_size) output_size = int(output_size) learning_rate = float(learning_rate) whichGPU = str(whichGPU) if batch_size % 30 != 0: print 'Batch size must be divisible by 30!' sys.exit(0) num_pos_examples = batch_size / 30 # Create data "batcher" train_data = CombinatorialTripletSet(train_filename, mean_file, img_size, crop_size, batch_size, num_pos_examples, isTraining=is_training) numClasses = len(train_data.hotels.keys()) numIms = np.sum( [len(train_data.hotels[h]['ims']) for h in train_data.hotels.keys()]) datestr = datetime.now().strftime("%Y_%m_%d_%H%M") param_str = datestr + '_lr' + str(learning_rate).replace( '.', 'pt') + '_outputSz' + str(output_size) + '_margin' + str( margin).replace('.', 'pt') logfile_path = os.path.join(log_dir, param_str + '_train.txt') train_log_file = open(logfile_path, 'a') print '------------' print '' print 'Going to train with the following parameters:' print '# Classes: ', numClasses train_log_file.write('# Classes: ' + str(numClasses) + '\n') print '# Ims: ', numIms train_log_file.write('# Ims: ' + str(numIms) + '\n') print 'Margin: ', margin train_log_file.write('Margin: ' + str(margin) + '\n') print 'Output size: ', output_size train_log_file.write('Output size: ' + str(output_size) + '\n') print 'Learning rate: ', learning_rate train_log_file.write('Learning rate: ' + str(learning_rate) + '\n') print 'Logging to: ', logfile_path train_log_file.write('Param_str: ' + param_str + '\n') train_log_file.write('----------------\n') print '' print '------------' # Queuing op loads data into input tensor image_batch = tf.placeholder( tf.float32, shape=[batch_size, crop_size[0], crop_size[0], 3]) noise = tf.random_normal(shape=[batch_size, crop_size[0], crop_size[0], 1], mean=0.0, stddev=0.0025, dtype=tf.float32) final_batch = tf.add(image_batch, noise) print("Preparing network...") with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, layers = resnet_v2.resnet_v2_50(final_batch, num_classes=output_size, is_training=True) variables_to_restore = [] for var in slim.get_model_variables(): excluded = False if is_finetuning.lower() == 'true' and var.op.name.startswith( 'resnet_v2_50/logits') or 'momentum' in var.op.name.lower(): excluded = True if not excluded: variables_to_restore.append(var) # feat = tf.squeeze(tf.nn.l2_normalize(layers[featLayer],3)) feat = tf.squeeze(layers[featLayer]) expanded_a = tf.expand_dims(feat, 1) expanded_b = tf.expand_dims(feat, 0) D = tf.reduce_sum(tf.squared_difference(expanded_a, expanded_b), 2) # D = 1 - tf.reduce_sum(tf.multiply(expanded_a, expanded_b), 2) # if not train_data.isOverfitting: # D_max = tf.reduce_max(D) # D_mean, D_var = tf.nn.moments(D, axes=[0,1]) # lowest_nonzero_distance = tf.reduce_max(-D) # bottom_thresh = 1.2*lowest_nonzero_distance # top_thresh = (D_max + D_mean)/2.0 # bool_mask = tf.logical_and(D>=bottom_thresh,D<=top_thresh) # D = tf.multiply(D,tf.cast(bool_mask,tf.float32)) posIdx = np.floor(np.arange(0, batch_size) / num_pos_examples).astype('int') posIdx10 = num_pos_examples * posIdx posImInds = np.tile(posIdx10, (num_pos_examples, 1)).transpose() + np.tile( np.arange(0, num_pos_examples), (batch_size, 1)) anchorInds = np.tile(np.arange(0, batch_size), (num_pos_examples, 1)).transpose() posImInds_flat = posImInds.ravel() anchorInds_flat = anchorInds.ravel() posPairInds = zip(posImInds_flat, anchorInds_flat) posDists = tf.reshape(tf.gather_nd(D, posPairInds), (batch_size, num_pos_examples)) shiftPosDists = tf.reshape(posDists, (1, batch_size, num_pos_examples)) posDistsRep = tf.tile(shiftPosDists, (batch_size, 1, 1)) allDists = tf.tile(tf.expand_dims(D, 2), (1, 1, num_pos_examples)) ra, rb, rc = np.meshgrid(np.arange(0, batch_size), np.arange(0, batch_size), np.arange(0, num_pos_examples)) bad_negatives = np.floor((ra) / num_pos_examples) == np.floor( (rb) / num_pos_examples) bad_positives = np.mod(rb, num_pos_examples) == np.mod(rc, num_pos_examples) mask = ((1 - bad_negatives) * (1 - bad_positives)).astype('float32') # loss = tf.reduce_sum(tf.maximum(0.,tf.multiply(mask,margin + posDistsRep - allDists)))/batch_size loss = tf.reduce_mean( tf.maximum(0., tf.multiply(mask, margin + posDistsRep - allDists))) # slightly counterintuitive to not define "init_op" first, but tf vars aren't known until added to graph update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): # train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = slim.learning.create_train_op(loss, optimizer) summary_op = tf.summary.merge_all() init_op = tf.global_variables_initializer() # Create a saver for writing training checkpoints. saver = tf.train.Saver(max_to_keep=2000) # tf will consume any GPU it finds on the system. Following lines restrict it to specific gpus c = tf.ConfigProto() c.gpu_options.visible_device_list = whichGPU print("Starting session...") sess = tf.Session(config=c) sess.run(init_op) writer = tf.summary.FileWriter(log_dir, sess.graph) restore_fn = slim.assign_from_checkpoint_fn(pretrained_net, variables_to_restore) restore_fn(sess) print("Start training...") ctr = 0 for step in range(num_iters): start_time = time.time() batch, labels, ims = train_data.getBatch() _, loss_val = sess.run([train_op, loss], feed_dict={image_batch: batch}) end_time = time.time() duration = end_time - start_time out_str = 'Step %d: loss = %.6f -- (%.3f sec)' % (step, loss_val, duration) # print(out_str) if step % summary_iters == 0: print(out_str) train_log_file.write(out_str + '\n') # Update the events file. # summary_str = sess.run(summary_op) # writer.add_summary(summary_str, step) # writer.flush() # # Save a checkpoint if (step + 1) % save_iters == 0: print('Saving checkpoint at iteration: %d' % (step)) pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'checkpoint-', pretrained_net + '-' + str(step), ' saved!' if (step + 1) == num_iters: print('Saving final') pretrained_net = os.path.join(ckpt_dir, 'final-' + param_str) saver.save(sess, pretrained_net, global_step=step) print 'final-', pretrained_net + '-' + str(step), ' saved!' sess.close() train_log_file.close()
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") learning_rate = tf.train.exponential_decay( base_learning_rate, global_step * batch_size, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar('learning_rate', learning_rate) optimizer = optimizer_class(learning_rate) unused_video_id, model_input_raw, labels_batch, num_frames = ( get_input_data_tensors( reader, train_data_pattern, batch_size=batch_size, num_readers=num_readers, num_epochs=num_epochs)) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) with tf.name_scope("model"): result = model.create_model( model_input, num_frames=num_frames, vocab_size=reader.num_classes, labels=labels_batch) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss(predictions, labels_batch) tf.summary.scalar("label_loss", label_loss) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) if regularization_penalty != 0: tf.summary.scalar("reg_loss", reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss train_op = slim.learning.create_train_op( final_loss, optimizer, global_step=global_step, clip_gradient_norm=clip_gradient_norm) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", predictions) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op)
def train(): seed = 8964 tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) pp = pprint.PrettyPrinter() pp.pprint(flags.FLAGS.__flags) if not os.path.exists(opt.checkpoint_dir): os.makedirs(opt.checkpoint_dir) with tf.Graph().as_default(): # Data Loader loader = DataLoader(opt) tgt_image, src_image_stack, intrinsics = loader.load_train_batch() # Build Model model = GeoNetModel(opt, tgt_image, src_image_stack, intrinsics) loss = model.total_loss # Train Op if opt.mode == 'train_flow' and opt.flownet_type == "residual": # we pretrain DepthNet & PoseNet, then finetune ResFlowNetS train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "flow_net") vars_to_restore = slim.get_variables_to_restore(include=["depth_net", "pose_net"]) else: train_vars = [var for var in tf.trainable_variables()] vars_to_restore = slim.get_model_variables() if opt.init_ckpt_file != None: init_assign_op, init_feed_dict = slim.assign_from_checkpoint( opt.init_ckpt_file, vars_to_restore) optim = tf.train.AdamOptimizer(opt.learning_rate, 0.9) train_op = slim.learning.create_train_op(loss, optim, variables_to_train=train_vars) # Global Step global_step = tf.Variable(0, name='global_step', trainable=False) incr_global_step = tf.assign(global_step, global_step+1) # Parameter Count parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in train_vars]) # Saver saver = tf.train.Saver([var for var in tf.model_variables()] + \ [global_step], max_to_keep=opt.max_to_keep) # Session sv = tf.train.Supervisor(logdir=opt.checkpoint_dir, save_summaries_secs=0, saver=None) config = tf.ConfigProto() config.gpu_options.allow_growth = True with sv.managed_session(config=config) as sess: print('Trainable variables: ') for var in train_vars: print(var.name) print("parameter_count =", sess.run(parameter_count)) if opt.init_ckpt_file != None: sess.run(init_assign_op, init_feed_dict) start_time = time.time() for step in range(1, opt.max_steps): fetches = { "train": train_op, "global_step": global_step, "incr_global_step": incr_global_step } if step % 100 == 0: fetches["loss"] = loss results = sess.run(fetches) if step % 100 == 0: time_per_iter = (time.time() - start_time) / 100 start_time = time.time() print('Iteration: [%7d] | Time: %4.4fs/iter | Loss: %.3f' \ % (step, time_per_iter, results["loss"])) if step % opt.save_ckpt_freq == 0: saver.save(sess, os.path.join(opt.checkpoint_dir, 'model'), global_step=step)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') # Parameters for random generation max_theta, max_dist = map(float, FLAGS.list_param.split(',')) tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # (Modified) ###################### dataset = factory_data.get_dataset(FLAGS.dataset_name, FLAGS.dataset_dir, 'train', FLAGS.list_param.split(',')) ###################### # Select the network # ###################### network_fn = factory_nets.get_network_fn( FLAGS.model_name, num_preds=dataset.num_preds, weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) lidar_pool = [int(l_i) for l_i in FLAGS.lidar_pool.split(',')] ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, points, mat_intrinsic, mat_rect, mat_extrinsic] = provider.get([ 'image', 'points', 'mat_intrinsic', 'mat_rect', 'mat_extrinsic' ]) points = tf.reshape(points, [-1, 4])[:, :3] mat_intrinsic = tf.reshape(mat_intrinsic, [3, 4]) mat_rect = tf.reshape(mat_rect, [4, 4]) mat_extrinsic = tf.reshape(mat_extrinsic, [4, 4]) image, lidar, y_true = tf_prepare_train(image, points, mat_intrinsic, mat_rect, mat_extrinsic, max_theta, max_dist) train_image_size = FLAGS.train_image_size or network_fn.default_image_size image, lidar = preprocessing_fn(image, lidar, train_image_size, train_image_size, pool_size=lidar_pool) images, lidars, y_trues = tf.train.batch( [image, lidar, y_true], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) batch_queue = slim.prefetch_queue.prefetch_queue( [images, lidars, y_trues], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" # with tf.device(deploy_config.inputs_device()): images, lidars, y_trues = batch_queue.dequeue() y_preds, end_points = network_fn(images, lidars) ############################# # Specify the loss function # ############################# # tf.losses.mean_squared_error( # labels=y_trues, # predictions=y_preds, # weights=1.0) if FLAGS.weight_loss: weights_preds = np.ones(sum(dataset.num_preds['num_preds'])) i_reg_start = 0 for i_reg, is_normalize in enumerate( dataset.num_preds['is_normalize']): num_preds = dataset.num_preds['num_preds'][i_reg] if is_normalize: weights_preds[i_reg_start:i_reg_start + num_preds] = FLAGS.weight_loss i_reg_start += num_preds weights_preds = tf.constant( np.tile(weights_preds, (FLAGS.batch_size, 1))) else: weights_preds = 1.0 slim.losses.mean_squared_error(y_preds, labels=y_trues, weights=weights_preds) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add( tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') ########################### # Kicks off the training. # ########################### path_log = os.path.join( FLAGS.train_dir, FLAGS.model_name, 'weight_{}'.format(FLAGS.weight_loss if FLAGS.weight_loss else 1)) slim.learning.train( train_tensor, logdir=path_log, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def check_TSNE(self): source_images, source_labels = self.load_office(split=self.src_dir) target_images, target_labels = self.load_office(split=self.trg_dir) # build a graph model = self.model model.build_model() # make directory if not exists if tf.gfile.Exists(self.log_dir): tf.gfile.DeleteRecursively(self.log_dir) tf.gfile.MakeDirs(self.log_dir) #~ self.config = tf.ConfigProto(device_count = {'GPU': 0}) with tf.Session(config=self.config) as sess: # initialize G and D tf.global_variables_initializer().run() if sys.argv[1] == 'test': print('Loading test model.') # Do not change next two lines. Necessary because slim.get_model_variables(scope='blablabla') works only for model built with slim. variables_to_restore = tf.global_variables() variables_to_restore = [ v for v in variables_to_restore if np.all([ s not in str(v.name) for s in [ 'encoder', 'sampler_generator', 'disc_e', 'source_train_op' ] ]) ] restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.test_model) elif sys.argv[1] == 'pretrain': print('Loading pretrained model.') # Do not change next two lines. Necessary because slim.get_model_variables(scope='blablabla') works only for model built with slim. variables_to_restore = tf.global_variables() variables_to_restore = [ v for v in variables_to_restore if np.all([ s not in str(v.name) for s in [ 'encoder', 'sampler_generator', 'disc_e', 'source_train_op' ] ]) ] restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.pretrained_model) elif sys.argv[1] == 'convdeconv': print('Loading convdeconv model.') variables_to_restore = slim.get_model_variables( scope='conv_deconv') restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.convdeconv_model) else: raise NameError('Unrecognized mode.') n_samples = 400 src_labels = utils.one_hot(source_labels[:n_samples].astype(int), 31) trg_labels = utils.one_hot(target_labels[:n_samples].astype(int), 31) src_noise = utils.sample_Z(n_samples, 100, 'uniform') if sys.argv[1] == 'convdeconv': feed_dict = { model.src_noise: src_noise, model.src_labels: src_labels, model.src_images: source_images, model.trg_images: target_images[:n_samples] } h_repr = sess.run(model.h_repr, feed_dict) else: print('Loading sampler.') variables_to_restore = slim.get_model_variables( scope='sampler_generator') restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.pretrained_sampler) feed_dict = { model.src_noise: src_noise, model.src_labels: src_labels, model.src_images: source_images[:n_samples], model.trg_images: target_images[:n_samples] } fzy, fx_src, fx_trg = sess.run( [model.fzy, model.fx_src, model.fx_trg], feed_dict) src_labels = np.argmax(src_labels, 1) trg_labels = np.argmax(trg_labels, 1) print 'Computing T-SNE.' model = TSNE(n_components=2, random_state=0) if sys.argv[2] == '1': TSNE_hA = model.fit_transform(np.vstack((fx_src))) plt.figure(2) plt.scatter(TSNE_hA[:, 0], TSNE_hA[:, 1], c=np.hstack((np.ones((n_samples)))), s=3, cmap=mpl.cm.jet) plt.figure(3) plt.scatter(TSNE_hA[:, 0], TSNE_hA[:, 1], c=np.hstack((src_labels)), s=3, cmap=mpl.cm.jet) elif sys.argv[2] == '2': TSNE_hA = model.fit_transform(np.vstack((fzy, fx_src))) plt.figure(2) plt.scatter(TSNE_hA[:, 0], TSNE_hA[:, 1], c=np.hstack((np.ones((n_samples, )), 2 * np.ones( (n_samples, )))), s=3, cmap=mpl.cm.jet) plt.figure(3) plt.scatter(TSNE_hA[:, 0], TSNE_hA[:, 1], c=np.hstack((src_labels, src_labels)), s=3, cmap=mpl.cm.jet) elif sys.argv[2] == '3': TSNE_hA = model.fit_transform(np.vstack((fzy, fx_src, fx_trg))) plt.figure(2) plt.scatter(TSNE_hA[:, 0], TSNE_hA[:, 1], c=np.hstack(( src_labels, src_labels, trg_labels, )), s=3, cmap=mpl.cm.jet) plt.figure(3) plt.scatter(TSNE_hA[:, 0], TSNE_hA[:, 1], c=np.hstack((np.ones((n_samples, )), 2 * np.ones( (n_samples, )), 3 * np.ones((n_samples, )))), s=3, cmap=mpl.cm.jet) elif sys.argv[2] == '4': TSNE_hA = model.fit_transform(h_repr) plt.scatter(TSNE_hA[:, 0], TSNE_hA[:, 1], c=np.argmax(trg_labels, 1), s=3, cmap=mpl.cm.jet) plt.show()
def main(args=None): print(args) tf.reset_default_graph() """ Read dataset parser """ flags.network_name = args[0].split('/')[-1].split('.')[0].split( 'main_')[-1] flags.logs_dir = './logs_' + flags.network_name dataset_parser = GANParser(flags=flags) """ Transform data to TFRecord format (Only do once.) """ if False: dataset_parser.load_paths(is_jpg=True, load_val=True) dataset_parser.data2record(name='{}_train.tfrecords'.format( dataset_parser.dataset_name), set_type='train', test_num=None) dataset_parser.data2record(name='{}_val.tfrecords'.format( dataset_parser.dataset_name), set_type='val', test_num=None) # coco_parser.data2record_test(name='coco_stuff2017_test-dev_all_label.tfrecords', is_dev=True, test_num=None) # coco_parser.data2record_test(name='coco_stuff2017_test_all_label.tfrecords', is_dev=False, test_num=None) return """ Build Graph """ with tf.Graph().as_default(): """ Input (TFRecord) """ with tf.name_scope('TFRecord'): # DatasetA training_a_dataset = dataset_parser.tfrecord_get_dataset( name='{}_trainA.tfrecords'.format(dataset_parser.dataset_name), batch_size=flags.batch_size, shuffle_size=None) val_a_dataset = dataset_parser.tfrecord_get_dataset( name='{}_valA.tfrecords'.format(dataset_parser.dataset_name), batch_size=flags.batch_size, need_flip=(flags.mode == 'train')) # DatasetB training_b_dataset = dataset_parser.tfrecord_get_dataset( name='{}_trainB.tfrecords'.format(dataset_parser.dataset_name), batch_size=flags.batch_size, shuffle_size=None) val_b_dataset = dataset_parser.tfrecord_get_dataset( name='{}_valB.tfrecords'.format(dataset_parser.dataset_name), batch_size=flags.batch_size, is_label=True, need_flip=(flags.mode == 'train')) # A feed-able iterator with tf.name_scope('RealA'): handle_a = tf.placeholder(tf.string, shape=[]) iterator_a = tf.contrib.data.Iterator.from_string_handle( handle_a, training_a_dataset.output_types, training_a_dataset.output_shapes) real_a, real_a_name, real_a_shape = iterator_a.get_next() with tf.name_scope('RealB'): handle_b = tf.placeholder(tf.string, shape=[]) iterator_b = tf.contrib.data.Iterator.from_string_handle( handle_b, training_b_dataset.output_types, training_b_dataset.output_shapes) real_b, real_b_name, real_b_shape = iterator_b.get_next() with tf.name_scope('InitialA_op'): training_a_iterator = training_a_dataset.make_initializable_iterator( ) validation_a_iterator = val_a_dataset.make_initializable_iterator( ) with tf.name_scope('InitialB_op'): training_b_iterator = training_b_dataset.make_initializable_iterator( ) validation_b_iterator = val_b_dataset.make_initializable_iterator( ) """ Network (Computes predictions from the inference model) """ with tf.name_scope('Network'): # Input global_step = tf.Variable(0, trainable=False, name='global_step', dtype=tf.int32) global_step_update_op = tf.assign_add(global_step, 1, name='global_step_update_op') mean_rgb = tf.constant((123.68, 116.78, 103.94), dtype=tf.float32) fake_b_pool = tf.placeholder(tf.float32, shape=[ None, flags.image_height, flags.image_width, flags.c_in_dim ], name='fake_B_pool') image_linear_shape = tf.constant( flags.image_height * flags.image_width * flags.c_in_dim, dtype=tf.int32, name='image_linear_shape') # A -> B ''' with tf.name_scope('Generator'): with slim.arg_scope(vgg.vgg_arg_scope()): net, end_points = vgg.vgg_16(real_a - mean_rgb, num_classes=1, is_training=True, spatial_squeeze=False) print(net) return with tf.variable_scope('Generator_A2B'): pred = tf.layers.conv2d(tf.nn.relu(net), 1, 1, 1) pred_upscale = tf.image.resize_bilinear(pred, (flags.image_height, flags.image_width), name='up_scale') segment_a = tf.nn.sigmoid(pred_upscale, name='segment_a') # sigmoid cross entropy Loss with tf.name_scope('loss_gen_a2b'): loss_gen_a2b = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=pred_upscale, labels=real_b/255.0, name='sigmoid'), name='mean') ''' # A -> B with tf.name_scope('Generator'): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(real_a - mean_rgb, num_classes=None, is_training=True, global_pool=False, output_stride=8) with tf.variable_scope('Generator_A2B'): d1 = deconv2d(net, 256, 3, 2, name='g_d1_dc') d1 = tf.nn.relu(instance_normalization(d1, 'g_d1_bn')) d2 = deconv2d(d1, 128, 3, 2, name='g_d2_dc') d2 = tf.nn.relu(instance_normalization(d2, 'g_d2_bn')) d3 = deconv2d(d2, 64, 3, 2, name='g_d3_dc') d3 = tf.nn.relu(instance_normalization(d3, 'g_d3_bn')) d3 = tf.pad(d3, [[0, 0], [3, 3], [3, 3], [0, 0]], "REFLECT") logits_a = conv2d(d3, 1, 7, 1, padding='VALID', name='g_pred_c') # A -> B adjusted_a = high_light(real_a, name='high_light') # adjusted_a = tf.zeros_like(real_a, tf.float32, name='mask', optimize=True) # logits_a = generator_resnet(real_a, flags, False, name="Generator_A2B") # adjusted_a = tf.layers.average_pooling2d(real_a, 11, strides=1, padding='same', name='adjusted_a') segment_a = tf.nn.tanh(logits_a, name='segment_a') logits_a_ori = tf.image.resize_bilinear( logits_a, (real_a_shape[0][0], real_b_shape[0][1]), name='logits_a_ori') segment_a_ori = tf.nn.tanh(logits_a_ori, name='segment_a_ori') with tf.variable_scope('Fake_B'): foreground = tf.multiply(real_a, segment_a, name='foreground') background = tf.multiply(adjusted_a, (1 - segment_a), name='background') fake_b_logits = tf.add(foreground, background, name='fake_b_logits') fake_b = tf.clip_by_value(fake_b_logits, 0, 255, name='fake_b') # fake_b_f = tf.reshape(fake_b, [-1, image_linear_shape], name='fake_b_f') fake_b_pool_f = tf.reshape(fake_b_pool, [-1, image_linear_shape], name='fake_b_pool_f') real_b_f = tf.reshape(real_b, [-1, image_linear_shape], name='real_b_f') dis_fake_b = discriminator_se_wgangp(fake_b_f, flags, reuse=False, name="Discriminator_B") dis_fake_b_pool = discriminator_se_wgangp(fake_b_pool_f, flags, reuse=True, name="Discriminator_B") dis_real_b = discriminator_se_wgangp(real_b_f, flags, reuse=True, name="Discriminator_B") # WGAN Loss with tf.name_scope('loss_gen_a2b'): loss_gen_a2b = -tf.reduce_mean(dis_fake_b) with tf.name_scope('loss_dis_b'): loss_dis_b_adv_real = -tf.reduce_mean(dis_real_b) loss_dis_b_adv_fake = tf.reduce_mean(dis_fake_b_pool) loss_dis_b = tf.reduce_mean(dis_fake_b_pool) - tf.reduce_mean( dis_real_b) with tf.name_scope('wgan-gp'): alpha = tf.random_uniform(shape=[flags.batch_size, 1], minval=0., maxval=1.) differences = fake_b_pool_f - real_b_f interpolates = real_b_f + (alpha * differences) gradients = tf.gradients( discriminator_se_wgangp(interpolates, flags, reuse=True, name="Discriminator_B"), [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) loss_dis_b += flags.lambda_gp * gradient_penalty # Optimizer trainable_var_resnet = tf.get_collection( key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='resnet_v1_50') trainable_var_gen_a2b = tf.get_collection( key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator_A2B') + trainable_var_resnet # slim.model_analyzer.analyze_vars(trainable_var_gen_a2b, print_info=True) # trainable_var_gen_a2b = tf.get_collection( # key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator_A2B') trainable_var_dis_b = tf.get_collection( key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Discriminator_B') with tf.name_scope('learning_rate_decay'): decay = tf.maximum( 0., 1. - (tf.cast(global_step, tf.float32) / flags.training_iter), name='decay') learning_rate = tf.multiply(flags.learning_rate, decay, name='learning_rate') train_op_gen_a2b = train_op(loss_gen_a2b, learning_rate, flags, trainable_var_gen_a2b, name='gen_a2b') train_op_dis_b = train_op(loss_dis_b, learning_rate, flags, trainable_var_dis_b, name='dis_b') saver = tf.train.Saver(max_to_keep=2) # Graph Logs with tf.name_scope('GEN_a2b'): tf.summary.scalar("loss/gen_a2b/all", loss_gen_a2b) with tf.name_scope('DIS_b'): tf.summary.scalar("loss/dis_b/all", loss_dis_b) tf.summary.scalar("loss/dis_b/adv_real", loss_dis_b_adv_real) tf.summary.scalar("loss/dis_b/adv_fake", loss_dis_b_adv_fake) summary_op = tf.summary.merge_all() """ Session """ tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: with tf.name_scope('Initial'): ckpt = tf.train.get_checkpoint_state( dataset_parser.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Model restored: {}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: print("No Model found.") init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) init_fn = slim.assign_from_checkpoint_fn( './pretrained/resnet_v1_50.ckpt', slim.get_model_variables('resnet_v1_50')) init_fn(sess) summary_writer = tf.summary.FileWriter(dataset_parser.logs_dir, sess.graph) """ Training Mode """ if flags.mode == 'train': print('Training mode! Batch size:{:d}'.format( flags.batch_size)) with tf.variable_scope('Input_port'): training_a_handle = sess.run( training_a_iterator.string_handle()) training_b_handle = sess.run( training_b_iterator.string_handle()) # val_a_handle = sess.run(validation_a_iterator.string_handle()) # val_b_handle = sess.run(validation_b_iterator.string_handle()) image_pool_a, image_pool_b = ImagePool( flags.pool_size), ImagePool(flags.pool_size) print('Start Training!') start_time = time.time() sess.run([ training_a_iterator.initializer, training_b_iterator.initializer ]) feed_dict_train = { handle_a: training_a_handle, handle_b: training_b_handle } # feed_dict_valid = {is_training: False} global_step_sess = sess.run(global_step) while global_step_sess < flags.training_iter: try: # Update gen_A2B, gen_B2A _, fake_b_sess = sess.run([train_op_gen_a2b, fake_b], feed_dict=feed_dict_train) # _, loss_gen_a2b_sess = sess.run([train_op_gen_a2b, loss_gen_a2b], feed_dict=feed_dict_train) # Update dis_B, dis_A fake_b_pool_query = image_pool_b.query(fake_b_sess) _ = sess.run(train_op_dis_b, feed_dict={ fake_b_pool: fake_b_pool_query, handle_b: training_b_handle }) sess.run(global_step_update_op) global_step_sess, learning_rate_sess = sess.run( [global_step, learning_rate]) print( 'global step:[{:d}/{:d}], learning rate:{:f}, time:{:4.4f}' .format(global_step_sess, flags.training_iter, learning_rate_sess, time.time() - start_time)) # Logging the events if global_step_sess % flags.log_freq == 1: print('Logging the events') summary_op_sess = sess.run(summary_op, feed_dict={ handle_a: training_a_handle, handle_b: training_b_handle, fake_b_pool: fake_b_pool_query }) summary_writer.add_summary(summary_op_sess, global_step_sess) # summary_writer.flush() # Observe training situation (For debugging.) if flags.debug and global_step_sess % flags.observe_freq == 1: real_a_sess, real_b_sess, adjusted_a_sess, segment_a_sess, fake_b_sess, \ real_a_name_sess, real_b_name_sess = \ sess.run([real_a, real_b, adjusted_a, segment_a, fake_b, real_a_name, real_b_name], feed_dict={handle_a: training_a_handle, handle_b: training_b_handle}) print('Logging training images.') dataset_parser.visualize_data( real_a=real_a_sess, real_b=real_b_sess, adjusted_a=adjusted_a_sess, segment_a=segment_a_sess, fake_b=fake_b_sess, shape=(1, 1), global_step=global_step_sess, logs_dir=dataset_parser.logs_image_train_dir, real_a_name=real_a_name_sess[0].decode(), real_b_name=real_b_name_sess[0].decode()) """ Saving the checkpoint """ if global_step_sess % flags.save_freq == 0: print('Saving model...') saver.save(sess, dataset_parser.checkpoint_dir + '/model.ckpt', global_step=global_step_sess) except tf.errors.OutOfRangeError: print( '----------------One epochs finished!----------------' ) sess.run([ training_a_iterator.initializer, training_b_iterator.initializer ]) elif flags.mode == 'test': from PIL import Image import scipy.ndimage.filters import scipy.io as sio import numpy as np print('Start Testing!') ''' with tf.variable_scope('Input_port'): val_a_handle = sess.run(validation_a_iterator.string_handle()) val_b_handle = sess.run(validation_b_iterator.string_handle()) sess.run([validation_a_iterator.initializer, validation_b_iterator.initializer]) ''' with tf.variable_scope('Input_port'): val_a_handle = sess.run( validation_a_iterator.string_handle()) val_b_handle = sess.run( validation_b_iterator.string_handle()) sess.run([ validation_a_iterator.initializer, validation_b_iterator.initializer ]) feed_dict_test = { handle_a: val_a_handle, handle_b: val_b_handle } image_idx = 0 while True: try: segment_a_ori_sess, real_a_name_sess, real_b_sess, real_a_sess, fake_b_sess = \ sess.run([segment_a_ori, real_a_name, real_b, real_a, fake_b], feed_dict=feed_dict_test) segment_a_np = (np.squeeze(segment_a_ori_sess) + 1.0) * 127.5 binary_a = np.zeros_like(segment_a_np, dtype=np.uint8) # binary_a[segment_a_np > 127.5] = 255 binary_mean = np.mean(segment_a_np) binary_a_high = np.mean( segment_a_np[segment_a_np > binary_mean]) binary_a_low = np.mean( segment_a_np[segment_a_np < binary_mean]) binary_a_ave = (binary_a_high + binary_a_low) / 2.0 segment_a_np_blur = scipy.ndimage.filters.gaussian_filter( segment_a_np, sigma=3) binary_a[segment_a_np_blur > binary_a_ave] = 255 # sio.savemat('{}/{}.mat'.format( # dataset_parser.logs_mat_output_dir, real_a_name_sess[0].decode()), # {'pred': segment_a_np, 'binary': binary_a}) # ----------------------------------------------------------------------------- if image_idx % 1 == 0: real_a_sess = np.squeeze(real_a_sess) x_png = Image.fromarray( real_a_sess.astype(np.uint8)) x_png.save('{}/{}_0_img.png'.format( dataset_parser.logs_image_val_dir, real_a_name_sess[0].decode()), format='PNG') x_png = Image.fromarray( segment_a_np.astype(np.uint8)) x_png.save('{}/{}_1_pred.png'.format( dataset_parser.logs_image_val_dir, real_a_name_sess[0].decode()), format='PNG') x_png = Image.fromarray(binary_a.astype(np.uint8)) x_png.save('{}/{}_2_binary.png'.format( dataset_parser.logs_image_val_dir, real_a_name_sess[0].decode()), format='PNG') fake_b_sess = np.squeeze(fake_b_sess) x_png = Image.fromarray( fake_b_sess.astype(np.uint8)) x_png.save('{}/{}_3_fake.png'.format( dataset_parser.logs_image_val_dir, real_a_name_sess[0].decode()), format='PNG') real_b_sess = np.squeeze(real_b_sess) x_png = Image.fromarray( real_b_sess.astype(np.uint8)) x_png.save('{}/{}_4_gt.png'.format( dataset_parser.logs_image_val_dir, real_a_name_sess[0].decode()), format='PNG') print(image_idx) image_idx += 1 except tf.errors.OutOfRangeError: print( '----------------One epochs finished!----------------' ) break
def attack_with_mim_aux(images, labels, target_classes, params, save_dir): tf.reset_default_graph() num_images = images.shape[0] batch_size = params.get('batch_size', 50) batch_shape = (None, 299, 299, 3) num_classes = 1001 logger.info('Running attack with parameters:') pprint.pprint(params) # Get save path adv_imgs_save_path = get_attack_images_filename_prefix( attack_name=ATTACKS.MIM, params=params, model='inception', targeted_prefix='targeted' ) adv_imgs_save_path = os.path.join(save_dir, adv_imgs_save_path) # Run inference graph = tf.Graph() with graph.as_default(): sess = tf.Session(graph=graph) # Prepare graph x_input = tf.placeholder(tf.float32, shape=(batch_size,) + batch_shape[1:]) y_label = tf.placeholder(tf.int32, shape=(batch_size, num_classes)) y_target = tf.placeholder(tf.int32, shape=(batch_size, num_classes)) model = InceptionModel(num_classes) attack = attack_name_to_class[ATTACKS.MIM](model=model, sess=sess) x_adv = attack.generate(x_input, y_target=y_target, **params) logits = model.get_logits(x_adv) acc = _top_k_accuracy(logits, tf.argmax(y_label, axis=1), k=1) success_rate = _top_k_accuracy(logits, tf.argmax(y_target, axis=1), k=1) # Run computation saver = tf.train.Saver(slim.get_model_variables()) saver.restore(sess, save_path=FLAGS.checkpoint_path) list_adv_images = [] if num_images % batch_size == 0: num_batches = int(num_images / batch_size) else: num_batches = int(num_images / batch_size + 1) acc_store = [] succ_store = [] for i in tqdm.tqdm(range(num_batches)): feed_dict_i = {x_input: images[i * batch_size:(i + 1) * batch_size], y_target: target_classes[i * batch_size:(i + 1) * batch_size], y_label: labels[i * batch_size:(i + 1) * batch_size]} acc_batch, suc_batch, adv_img = sess.run([acc, success_rate, x_adv], feed_dict=feed_dict_i) list_adv_images.append(adv_img) acc_store.extend(acc_batch) succ_store.extend(suc_batch) adv_images = np.concatenate((list_adv_images)) np.save(adv_imgs_save_path, adv_images) logger.info('Accuracy is: {:.4f}'.format(np.mean(acc_store))) logger.info('Success Rate is: {:.4f}'.format(np.mean(succ_store)))
def get_restorer(self): checkpoint_path = tf.train.latest_checkpoint( os.path.join(cfgs.TRAINED_CKPT, cfgs.VERSION)) #Uncomment for testing specfic checkpoint ONLY #checkpoint_path = '/home/adhitya/Anush-KAIST/FCOS_Tensorflow/output/trained_weights/FCOS_Res50_20190428/coco_400000model.ckpt' if checkpoint_path != None: restorer = tf.train.Saver() print("model restore from :", checkpoint_path) return None, restorer, checkpoint_path else: checkpoint_path = cfgs.PRETRAINED_CKPT print("model restore from pretrained mode, path is :", checkpoint_path) model_variables = slim.get_model_variables() # for var in model_variables: # print(var.name) # print(20*"__++__++__") def name_in_ckpt_rpn(var): return var.op.name def name_in_ckpt_fastrcnn_head(var): ''' Fast-RCNN/resnet_v1_50/block4 -->resnet_v1_50/block4 Fast-RCNN/MobilenetV2/** -- > MobilenetV2 ** :param var: :return: ''' return '/'.join(var.op.name.split('/')[3:]) nameInCkpt_Var_dict = {} rgb_nameInCkpt_Var_dict = {} ir_nameInCkpt_Var_dict = {} multi_nameInCkpt_Var_dict = {} for var in model_variables: if var.name.startswith(self.base_network_name): var_name_in_ckpt = name_in_ckpt_rpn(var) nameInCkpt_Var_dict[var_name_in_ckpt] = var elif var.name.startswith('RGB/resnet_v1_101/RGB/' + self.base_network_name): # +'/block4' var_name_in_ckpt = name_in_ckpt_fastrcnn_head(var) rgb_nameInCkpt_Var_dict[var_name_in_ckpt] = var elif var.name.startswith('IR/resnet_v1_101/IR/' + self.base_network_name): # +'/block4' var_name_in_ckpt = name_in_ckpt_fastrcnn_head(var) ir_nameInCkpt_Var_dict[var_name_in_ckpt] = var ''' elif var.name.startswith('MULTI/resnet_v1_50/MULTI/'+self.base_network_name): # +'/block4' var_name_in_ckpt = name_in_ckpt_fastrcnn_head(var) multi_nameInCkpt_Var_dict[var_name_in_ckpt] = var ''' rgb_restore_variables = rgb_nameInCkpt_Var_dict ir_restore_variables = ir_nameInCkpt_Var_dict #multi_restore_variables = multi_nameInCkpt_Var_dict for key, item in rgb_restore_variables.items(): print("var_in_graph: ", item.name) print("var_in_ckpt: ", key) print(20 * "___") for key, item in ir_restore_variables.items(): print("var_in_graph: ", item.name) print("var_in_ckpt: ", key) print(20 * "___") ''' for key, item in multi_restore_variables.items(): print("var_in_graph: ", item.name) print("var_in_ckpt: ", key) print(20*"___") ''' rgb_restorer = tf.train.Saver(rgb_restore_variables) ir_restorer = tf.train.Saver(ir_restore_variables) #multi_restorer = tf.train.Saver(multi_restore_variables) print(20 * "****") print("restore from pretrained_weighs in IMAGE_NET") return rgb_restorer, ir_restorer, checkpoint_path
def inference(reader, train_dir, data_pattern, out_file_location, batch_size, top_k): if FLAGS.gpu_only == 0: device = "/cpu:0" else: device = "/gpu:0" # getting a proper number of keep_prob parameters for dropout # max is 10 and we have to pad the vector with 1s tmp_layers = [] if FLAGS.layers_keep_probs is not None: tmp_layers = [ float(x) for x in FLAGS.layers_keep_probs.replace(' ', '').split(',') ] tmp_layers_padded = tmp_layers + [1.0 for x in range(10 - len(tmp_layers))] config = tf.ConfigProto(allow_soft_placement=True, device_count={'GPU': FLAGS.gpu_only}) config.gpu_options.allow_growth = True with tf.device(device): with tf.Session(config=config) as sess, gfile.Open( out_file_location, "w+") as out_file: video_id_batch, video_batch, num_frames_batch = get_input_data_tensors( reader, data_pattern, batch_size) if FLAGS.check_point != '': latest_checkpoint = FLAGS.train_dir + '/model.ckpt-' + FLAGS.check_point else: latest_checkpoint = tf.train.latest_checkpoint(train_dir) if latest_checkpoint is None: raise Exception("unable to find a checkpoint at location: %s" % train_dir) else: meta_graph_location = latest_checkpoint + ".meta" logging.info("loading meta-graph: " + meta_graph_location) saver = tf.train.import_meta_graph(meta_graph_location, clear_devices=True) logging.info("restoring variables from " + latest_checkpoint) saver.restore(sess, latest_checkpoint) input_tensor = tf.get_collection("input_batch_raw")[0] num_frames_tensor = tf.get_collection("num_frames")[0] predictions_tensor = tf.get_collection("predictions")[0] layers_keep_probs = tf.get_collection("layers_keep_probs")[0] layers_op = tf.assign(layers_keep_probs, tmp_layers_padded) #ess.run(tf.variables_initializer([layers_keep_probs])) #ayers_keep_probs_val = sess.run([layers_keep_probs]) if FLAGS.use_ema_var: logging.info( "\n\n ******* \n Using EMA version of the variables!\n") ckpt_reader = pywrap_tensorflow.NewCheckpointReader( latest_checkpoint) #var_to_shape_map = ckpt_reader.get_variable_to_shape_map() for v in slim.get_model_variables(): ema_var_name = 'tower/' + v.op.name + '/ExponentialMovingAverage' sess.run(tf.assign(v, ckpt_reader.get_tensor(ema_var_name))) logging.info("Replaced {} with {}.".format( v.op.name, ema_var_name)) logging.info("\n EMA variable assigned!\n ******* \n") # Workaround for num_epochs issue. def set_up_init_ops(variables): init_op_list = [] for variable in list(variables): if "train_input" in variable.name: init_op_list.append(tf.assign(variable, 1)) variables.remove(variable) init_op_list.append(tf.variables_initializer(variables)) return init_op_list sess.run( set_up_init_ops( tf.get_collection_ref(tf.GraphKeys.LOCAL_VARIABLES))) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) num_examples_processed = 0 start_time = time.time() out_file.write("VideoId,LabelConfidencePairs\n") sess.run(layers_op) layers_keep_probs_val = sess.run([layers_keep_probs]) logging.info(layers_keep_probs) try: while not coord.should_stop(): video_id_batch_val, video_batch_val, num_frames_batch_val = sess.run( [video_id_batch, video_batch, num_frames_batch]) predictions_val, = sess.run( [predictions_tensor], feed_dict={ input_tensor: video_batch_val, num_frames_tensor: num_frames_batch_val, layers_keep_probs: layers_keep_probs_val[0] }) now = time.time() num_examples_processed += len(video_batch_val) num_classes = predictions_val.shape[1] logging.info("num examples processed: " + str(num_examples_processed) + " elapsed seconds: " + "{0:.2f}".format(now - start_time)) for line in format_lines(video_id_batch_val, predictions_val, top_k): out_file.write(line) out_file.flush() except tf.errors.OutOfRangeError: logging.info( 'Done with inference. The output file was written to ' + out_file_location) finally: coord.request_stop() coord.join(threads) sess.close()
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True, width_multiplier=FLAGS.width_multiplier) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) # gt_bboxes format [ymin, xmin, ymax, xmax] [image, img_shape, gt_labels, gt_bboxes] = provider.get(['image', 'shape', 'object/label', 'object/bbox']) # Preprocesing # gt_bboxes = scale_bboxes(gt_bboxes, img_shape) # bboxes format [0,1) for tf draw image, gt_labels, gt_bboxes = image_preprocessing_fn(image, config.IMG_HEIGHT, config.IMG_WIDTH, labels=gt_labels, bboxes=gt_bboxes, ) ############################################# # Encode annotations for losses computation # ############################################# # anchors format [cx, cy, w, h] anchors = tf.convert_to_tensor(config.ANCHOR_SHAPE, dtype=tf.float32) # encode annos, box_input format [cx, cy, w, h] input_mask, labels_input, box_delta_input, box_input = encode_annos(gt_labels, gt_bboxes, anchors, config.NUM_CLASSES) images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input = tf.train.batch( [image, input_mask, labels_input, box_delta_input, box_input], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) batch_queue = slim.prefetch_queue.prefetch_queue( [images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input = batch_queue.dequeue() anchors = tf.convert_to_tensor(config.ANCHOR_SHAPE, dtype=tf.float32) end_points = network_fn(images) end_points["viz_images"] = images conv_ds_14 = end_points['MobileNet/conv_ds_14/depthwise_conv'] dropout = slim.dropout(conv_ds_14, keep_prob=0.5, is_training=True) num_output = config.NUM_ANCHORS * (config.NUM_CLASSES + 1 + 4) predict = slim.conv2d(dropout, num_output, kernel_size=(3, 3), stride=1, padding='SAME', activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.0001), scope="MobileNet/conv_predict") with tf.name_scope("Interpre_prediction") as scope: pred_box_delta, pred_class_probs, pred_conf, ious, det_probs, det_boxes, det_class = \ interpre_prediction(predict, b_input_mask, anchors, b_box_input) end_points["viz_det_probs"] = det_probs end_points["viz_det_boxes"] = det_boxes end_points["viz_det_class"] = det_class with tf.name_scope("Losses") as scope: losses(b_input_mask, b_labels_input, ious, b_box_delta_input, pred_class_probs, pred_conf, pred_box_delta) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: if end_point not in ["viz_images", "viz_det_probs", "viz_det_boxes", "viz_det_class"]: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for det result TODO(shizehao): vizulize prediction # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def train(): seed = 8964 tf.set_random_seed(seed) np.random.seed(seed) random.seed(seed) pp = pprint.PrettyPrinter() pp.pprint(flags.FLAGS.__flags) if not os.path.exists(opt.checkpoint_dir): os.makedirs(opt.checkpoint_dir) with tf.Graph().as_default(): # Data Loader loader = DataLoader(opt) tgt_image, src_image_stack, intrinsics = loader.load_train_batch() # Build Model model = GeoNetModel(opt, tgt_image, src_image_stack, intrinsics) loss = model.total_loss # Train Op if opt.mode == 'train_flow' and opt.flownet_type == "residual": # we pretrain DepthNet & PoseNet, then finetune ResFlowNetS train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "flow_net") vars_to_restore = slim.get_variables_to_restore(include=["depth_net", "pose_net"]) else: train_vars = [var for var in tf.trainable_variables()] vars_to_restore = slim.get_model_variables() if opt.init_ckpt_file != None: init_assign_op, init_feed_dict = slim.assign_from_checkpoint( opt.init_ckpt_file, vars_to_restore) optim = tf.train.AdamOptimizer(opt.learning_rate, 0.9) train_op = slim.learning.create_train_op(loss, optim, variables_to_train=train_vars) # Global Step global_step = tf.Variable(0, name='global_step', trainable=False) incr_global_step = tf.assign(global_step, global_step+1) # Parameter Count parameter_count = tf.reduce_sum([tf.reduce_prod(tf.shape(v)) \ for v in train_vars]) # Saver saver = tf.train.Saver([var for var in tf.model_variables()] + \ [global_step], max_to_keep=opt.max_to_keep) # Session sv = tf.train.Supervisor(logdir=opt.checkpoint_dir, save_summaries_secs=0, saver=None) config = tf.ConfigProto() config.gpu_options.allow_growth = True with sv.managed_session(config=config) as sess: print('Trainable variables: ') for var in train_vars: print(var.name) print("parameter_count =", sess.run(parameter_count)) if opt.init_ckpt_file != None: sess.run(init_assign_op, init_feed_dict) start_time = time.time() for step in range(1, opt.max_steps): fetches = { "train": train_op, "global_step": global_step, "incr_global_step": incr_global_step } if step % 100 == 0: fetches["loss"] = loss results = sess.run(fetches) if step % 100 == 0: time_per_iter = (time.time() - start_time) / 100 start_time = time.time() print('Iteration: [%7d] | Time: %4.4fs/iter | Loss: %.3f' \ % (step, time_per_iter, results["loss"])) if step % opt.save_ckpt_freq == 0: saver.save(sess, os.path.join(opt.checkpoint_dir, 'model'), global_step=step)
def _add_variables_summaries(learning_rate): summaries = [] for variable in slim.get_model_variables(): summaries.append(tf.summary.histogram(variable.op.name, variable)) summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate)) return summaries
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") local_device_protos = device_lib.list_local_devices() gpus = [x.name for x in local_device_protos if x.device_type == 'GPU'] num_gpus = len(gpus) if num_gpus > 0: logging.info("Using the following GPUs to train: " + str(gpus)) num_towers = num_gpus device_string = '/gpu:%d' else: logging.info("No GPUs found. Training on CPU.") num_towers = 1 device_string = '/cpu:%d' learning_rate = tf.train.exponential_decay(base_learning_rate, global_step * batch_size * num_towers, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar('learning_rate', learning_rate) optimizer = optimizer_class(learning_rate) unused_video_id, model_input_raw, labels_batch, num_frames = ( get_input_data_tensors(reader, train_data_pattern, batch_size=batch_size * num_towers, num_readers=num_readers, num_epochs=num_epochs)) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) tower_inputs = tf.split(model_input, num_towers) tower_labels = tf.split(labels_batch, num_towers) tower_num_frames = tf.split(num_frames, num_towers) tower_gradients = [] tower_predictions = [] tower_label_losses = [] tower_reg_losses = [] for i in range(num_towers): # For some reason these 'with' statements can't be combined onto the same # line. They have to be nested. with tf.device(device_string % i): with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)): with (slim.arg_scope( [slim.model_variable, slim.variable], device="/cpu:0" if num_gpus != 1 else "/gpu:0")): result = model.create_model(tower_inputs[i], num_frames=tower_num_frames[i], vocab_size=reader.num_classes, labels=tower_labels[i]) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] tower_predictions.append(predictions) if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss( predictions, tower_labels[i]) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) tower_reg_losses.append(reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) tower_label_losses.append(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss gradients = optimizer.compute_gradients( final_loss, colocate_gradients_with_ops=False) tower_gradients.append(gradients) label_loss = tf.reduce_mean(tf.stack(tower_label_losses)) tf.summary.scalar("label_loss", label_loss) if regularization_penalty != 0: reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses)) tf.summary.scalar("reg_loss", reg_loss) merged_gradients = utils.combine_gradients(tower_gradients) if clip_gradient_norm > 0: with tf.name_scope('clip_grads'): merged_gradients = utils.clip_gradient_norms( merged_gradients, clip_gradient_norm) train_op = optimizer.apply_gradients(merged_gradients, global_step=global_step) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", tf.concat(tower_predictions, 0)) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op)
# Dummy placeholders for arbitrary number of 1d inputs and outputs inputs = tf.placeholder(tf.float32, shape=(None, 1)) outputs = tf.placeholder(tf.float32, shape=(None, 1)) # Build model predictions, end_points = regression_model(inputs) # Print name and shape of each tensor. print("Layers") for k, v in end_points.items(): print('name = {}, shape = {}'.format(v.name, v.get_shape())) # Print name and shape of parameter nodes (values not yet initialized) print("\n") print("Parameters") for v in slim.get_model_variables(): print('name = {}, shape = {}'.format(v.name, v.get_shape())) def produce_batch(batch_size, noise=0.3): xs = np.random.random(size=[batch_size, 1]) * 10 ys = np.sin(xs) + 5 + np.random.normal(size=[batch_size, 1], scale=noise) return [xs.astype(np.float32), ys.astype(np.float32)] x_train, y_train = produce_batch(200) x_test, y_test = produce_batch(200) plt.scatter(x_train, y_train) def convert_data_to_tensors(x, y): inputs = tf.constant(x) inputs.set_shape([None, 1])
def build_refinenet(inputs, num_classes, preset_model='RefineNet-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the RefineNet model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: RefineNet model """ inputs = mean_image_subtraction(inputs) if preset_model == 'RefineNet-Res50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training, scope='resnet_v1_50') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'RefineNet-Res101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs, is_training=is_training, scope='resnet_v1_101') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'RefineNet-Res152': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152( inputs, is_training=is_training, scope='resnet_v1_152') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] g = [None, None, None, None] h = [None, None, None, None] for i in range(4): h[i] = slim.conv2d(f[i], 256, 1) g[0] = RefineBlock(high_inputs=None, low_inputs=h[0]) g[1] = RefineBlock(g[0], h[1]) g[2] = RefineBlock(g[1], h[2]) g[3] = RefineBlock(g[2], h[3]) # g[3]=Upsampling(g[3],scale=4) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.conv2d(g[3], num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def predict(output_file, model, image_dir, checkpoint_file_or_dir, log_dir="test_logs/", test_image_num=12500): image_path = os.path.join(image_dir, "*.jpg") BATCH_SIZE = 32 BATCH_THREADS = 12 BATCH_CAPACITY = BATCH_SIZE * 2 step = 0 max_step = math.ceil(test_image_num / BATCH_SIZE) count = 0 config_log(log_dir + "/" + model + ".log") config = tf.ConfigProto() config.gpu_options.allow_growth = True with open(output_file, "w", buffering=4096) as fp: writer = csv.writer(fp) writer.writerow(["id", "label"]) with tf.Graph().as_default(): with tf.Session(config=config) as sess: if model == "vgg_16": image_h, image_w = 224, 224 model_fn = nets.vgg.vgg_16 inputs, files = read_test_image( image_path, image_h, image_w, epochs=1, batch_size=BATCH_SIZE, batch_threads=BATCH_THREADS, batch_capacity=BATCH_CAPACITY) predictions, _ = model_fn(inputs, num_classes=2, is_training=False) elif model == "inception_resnet_v2": image_h, image_w = 299, 299 model_fn = inception_resnet_v2.inception_resnet_v2 inputs, files = read_test_image( image_path, image_h, image_w, epochs=1, batch_size=BATCH_SIZE, batch_threads=BATCH_THREADS, batch_capacity=BATCH_CAPACITY) with slim.arg_scope(inception_resnet_v2. inception_resnet_v2_arg_scope()): predictions, _ = model_fn(inputs, num_classes=2, is_training=False) else: raise ValueError("model {} not supported".format(model)) variables_to_restore = slim.get_model_variables() sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) restorer = tf.train.Saver(variables_to_restore) if os.path.isdir(checkpoint_file_or_dir): ckpt = tf.train.latest_checkpoint(checkpoint_file_or_dir) else: ckpt = checkpoint_file_or_dir tf.logging.info("found ckpt : {}".format(ckpt)) restorer.restore(sess, ckpt) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): if count >= test_image_num: break ret = tf.nn.softmax(predictions) ret = tf.clip_by_value(ret, 0.005, 0.995) preds, img_ids = sess.run([ret, files]) tf.logging.info("step = {}".format(step)) for i, pred in enumerate(preds): writer.writerow([int(img_ids[i]), pred[1]]) count += len(img_ids) step += 1 except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def train_dsn(self): source_images, source_labels = self.load_office(split=self.src_dir) target_images, target_labels = self.load_office(split=self.trg_dir) # build a graph model = self.model model.build_model() # make directory if not exists if tf.gfile.Exists(self.log_dir): tf.gfile.DeleteRecursively(self.log_dir) tf.gfile.MakeDirs(self.log_dir) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: with tf.device('/gpu:1'): # initialize G and D tf.global_variables_initializer().run() # restore variables of F print('Loading pretrained model.') # Do not change next two lines. Necessary because slim.get_model_variables(scope='blablabla') works only for model built with slim. variables_to_restore = tf.global_variables() #~ variables_to_restore = [v for v in variables_to_restore if 'encoder' in v.name] restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.pretrained_model) #~ print ('Loading pretrained encoder disc.') #~ variables_to_restore = slim.get_model_variables(scope='disc_e') #~ restorer = tf.train.Saver(variables_to_restore) #~ restorer.restore(sess, self.pretrained_sampler) print('Loading sample generator.') variables_to_restore = slim.get_model_variables( scope='sampler_generator') restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.pretrained_sampler) summary_writer = tf.summary.FileWriter( logdir=self.log_dir, graph=tf.get_default_graph()) saver = tf.train.Saver() print('Start training.') trg_count = 0 t = 0 G_loss = 1. DG_loss = 1. noise_dim = 100 for step in range(10000000): trg_count += 1 t += 1 i = step % int(source_images.shape[0] / self.batch_size) j = step % int(target_images.shape[0] / self.batch_size) src_images = source_images[i * self.batch_size:(i + 1) * self.batch_size] src_labels = utils.one_hot( source_labels[i * self.batch_size:(i + 1) * self.batch_size].astype(int), 31) src_noise = utils.sample_Z(self.batch_size, 100, 'uniform') trg_images = target_images[j * self.batch_size:(j + 1) * self.batch_size] feed_dict = { model.src_images: src_images, model.src_noise: src_noise, model.src_labels: src_labels, model.trg_images: trg_images } sess.run(model.E_train_op, feed_dict) sess.run(model.DE_train_op, feed_dict) if (step + 1) % 10 == 0: logits_E_real, logits_E_fake = sess.run( [model.logits_E_real, model.logits_E_fake], feed_dict) summary, E, DE = sess.run( [model.summary_op, model.E_loss, model.DE_loss], feed_dict) summary_writer.add_summary(summary, step) print ('Step: [%d/%d] E: [%.6f] DE: [%.6f] E_real: [%.2f] E_fake: [%.2f]' \ %(step+1, self.train_iter, E, DE,logits_E_real.mean(),logits_E_fake.mean())) if (step + 1) % 20 == 0: saver.save(sess, os.path.join(self.model_save_path, 'dtn'))
def build_gcn(inputs, num_classes, preset_model='GCN-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the GCN model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: GCN model """ inputs = mean_image_subtraction(inputs) if preset_model == 'GCN-Res50': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(inputs, is_training=is_training, scope='resnet_v1_50') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'GCN-Res101': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101(inputs, is_training=is_training, scope='resnet_v1_101') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'GCN-Res152': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152(inputs, is_training=is_training, scope='resnet_v1_152') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError("Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) res = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] down_5 = GlobalConvBlock(res[0], n_filters=21, size=3) down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3]) down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2) down_4 = GlobalConvBlock(res[1], n_filters=21, size=3) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = tf.add(down_4, down_5) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2) down_3 = GlobalConvBlock(res[2], n_filters=21, size=3) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = tf.add(down_3, down_4) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2) down_2 = GlobalConvBlock(res[3], n_filters=21, size=3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = tf.add(down_2, down_3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3]) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def main(_): with tf.Graph().as_default() as graph: summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) global_summaries = set([]) num_batches_epoch = num_samples // (FLAGS.batch_size * FLAGS.num_clones) print(num_batches_epoch) ####################### # Config model_deploy # ####################### config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.ps_tasks) # Create global_step with tf.device(config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### with tf.device(config.inputs_device()): # Train Process dataset = get_split('train', FLAGS.dataset_dir) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=FLAGS.batch_size * 20, common_queue_min=FLAGS.batch_size * 10) [image_a, image_b, label] = provider.get(['image_a', 'image_b', 'label']) probe = image_a galleries = tf.unstack(image_b) galleries_process = [] probe = process_image(probe) probe.set_shape([FLAGS.target_height, FLAGS.target_width, 3]) gallery_target = tf.slice(image_b, [label, 0, 0, 0], [1, -1, -1, -1]) gallery_target = tf.squeeze(gallery_target, axis=[0]) gallery = process_image(gallery_target) gallery.set_shape([FLAGS.target_height, FLAGS.target_width, 3]) galleries_process.append(gallery) for Idx in range(FLAGS.top_k - 1): imgIdx = tf.cond(Idx >= label, lambda: Idx + 1, lambda: Idx) gallery_other = tf.slice(image_b, [imgIdx, 0, 0, 0], [1, -1, -1, -1]) gallery_other = tf.squeeze(gallery_other, axis=[0]) gallery = process_image(gallery_other) gallery.set_shape([FLAGS.target_height, FLAGS.target_width, 3]) galleries_process.append(gallery) label_new = 0 galleries_process = tf.stack(galleries_process) probe_batch, galleries_batch, labels = tf.train.batch( [probe, galleries_process, label_new], batch_size=FLAGS.batch_size, num_threads=8, capacity=FLAGS.batch_size * 10) inputs_queue = prefetch_queue( [probe_batch, galleries_batch, labels]) ###################### # Select the network # ###################### def model_fn(inputs_queue): probe_batch, galleries_batch, labels = inputs_queue.dequeue() probe_batch_tile = tf.tile(tf.expand_dims(probe_batch, axis=1), [1, FLAGS.top_k, 1, 1, 1]) shape = probe_batch_tile.get_shape().as_list() probe_batch_reshape = tf.reshape( probe_batch_tile, [-1, shape[2], shape[3], shape[4]]) galleries_batch_reshape = tf.reshape( galleries_batch, [-1, shape[2], shape[3], shape[4]]) images_a = probe_batch_reshape images_b = galleries_batch_reshape model = find_class_by_name(FLAGS.model, [models])() logits = model.create_model(images_a, images_b, reuse=False, is_training=True) logits = tf.reshape(logits, [FLAGS.batch_size, -1]) label_onehot = tf.one_hot(labels, FLAGS.top_k) crossentropy_loss = tf.losses.softmax_cross_entropy( onehot_labels=label_onehot, logits=logits) tf.summary.histogram('images_a', images_a) clones = model_deploy.create_clones(config, model_fn, [inputs_queue]) first_clone_scope = clones[0].scope ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(config.optimizer_device()): learning_rate_step_boundaries = [ int(num_batches_epoch * num_epoches * 0.60), int(num_batches_epoch * num_epoches * 0.75), int(num_batches_epoch * num_epoches * 0.90) ] learning_rate_sequence = [FLAGS.learning_rate] learning_rate_sequence += [ FLAGS.learning_rate * 0.1, FLAGS.learning_rate * 0.01, FLAGS.learning_rate * 0.001 ] learning_rate = learning_schedules.manual_stepping( global_step, learning_rate_step_boundaries, learning_rate_sequence) # learning_rate = learning_schedules.exponential_decay_with_burnin(global_step, # FLAGS.learning_rate,num_batches_epoch*num_epoches,0.001/FLAGS.learning_rate, # burnin_learning_rate=0.01, # burnin_steps=5000) if FLAGS.optimizer == 'adam': opt = tf.train.AdamOptimizer(learning_rate) if FLAGS.optimizer == 'momentum': opt = tf.train.MomentumOptimizer(learning_rate, momentum=0.9) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) with tf.device(config.optimizer_device()): training_optimizer = opt # Create ops required to initialize the model from a given checkpoint. TODO!! init_fn = None if FLAGS.model == 'DCSL': if FLAGS.weights is None: # if not FLAGS.moving_average_decay: variables = slim.get_model_variables('InceptionResnetV2') init_fn = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.checkpoints_dir, 'inception_resnet_v2.ckpt'), slim.get_model_variables('InceptionResnetV2')) if FLAGS.model == 'DCSL_inception_v1': if FLAGS.weights is None: # if not FLAGS.moving_average_decay: variables = slim.get_model_variables('InceptionV1') init_fn = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) if FLAGS.model == 'DCSL_NAS': # if FLAGS.weights is None: # # if not FLAGS.moving_average_decay: # variables = slim.get_model_variables('NAS') # init_fn = slim.assign_from_checkpoint_fn( # os.path.join(FLAGS.checkpoints_dir, 'nasnet-a_large_04_10_2017/model.ckpt'), # slim.get_model_variables('NAS')) def restore_map(): variables_to_restore = {} for variable in tf.global_variables(): for scope_name in ['NAS']: if variable.op.name.startswith(scope_name): var_name = variable.op.name.replace( scope_name + '/', '') # var_name = variable.op.name variables_to_restore[ var_name + '/ExponentialMovingAverage'] = variable # variables_to_restore[var_name] = variable return variables_to_restore var_map = restore_map() # restore_var = [v for v in tf.global_variables() if 'global_step' not in v.name] available_var_map = ( variables_helper.get_variables_available_in_checkpoint( var_map, FLAGS.weights)) init_saver = tf.train.Saver(available_var_map) def initializer_fn(sess): init_saver.restore(sess, FLAGS.weights) init_fn = initializer_fn if FLAGS.model == 'MultiHeadAttentionBaseModel_set': if FLAGS.weights is None: # if not FLAGS.moving_average_decay: variables = slim.get_model_variables('InceptionV1') init_fn = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) else: restore_var = [ v for v in slim.get_model_variables() if 'Score' not in v.name ] init_fn = slim.assign_from_checkpoint_fn( FLAGS.weights, restore_var) if FLAGS.model == 'MultiHeadAttentionBaseModel_set_share': if FLAGS.weights is None: # if not FLAGS.moving_average_decay: variables = slim.get_model_variables('InceptionV1') init_fn = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) else: restore_var = [ v for v in slim.get_model_variables() if 'Score' not in v.name ] init_fn = slim.assign_from_checkpoint_fn( FLAGS.weights, restore_var) if FLAGS.model == 'MultiHeadAttentionBaseModel_set_share_softmatch': if FLAGS.weights is None: # if not FLAGS.moving_average_decay: variables = slim.get_model_variables('InceptionV1') init_fn = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) else: restore_var = [ v for v in slim.get_model_variables() if 'Score' not in v.name ] init_fn = slim.assign_from_checkpoint_fn( FLAGS.weights, restore_var) if FLAGS.model == 'MultiHeadAttentionBaseModel_set_share_softmatch_v2': if FLAGS.weights is None: # if not FLAGS.moving_average_decay: variables = slim.get_model_variables('InceptionV1') init_fn = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) else: restore_var = [ v for v in slim.get_model_variables() if 'Score' not in v.name ] init_fn = slim.assign_from_checkpoint_fn( FLAGS.weights, restore_var) if FLAGS.model == 'MultiHeadAttentionBaseModel_set_share_res50': if FLAGS.weights is None: # if not FLAGS.moving_average_decay: variables = slim.get_model_variables('resnet_v2_50') init_fn = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.checkpoints_dir, 'resnet_v2_50.ckpt'), slim.get_model_variables('resnet_v2_50')) if FLAGS.model == 'MultiHeadAttentionBaseModel_set_inv3': # if not FLAGS.moving_average_decay: variables = slim.get_model_variables('InceptionV3') init_fn = slim.assign_from_checkpoint_fn( os.path.join(FLAGS.checkpoints_dir, 'inception_v3.ckpt'), slim.get_model_variables('InceptionV3')) # compute and update gradients with tf.device(config.optimizer_device()): if FLAGS.moving_average_decay: update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. all_trainable = tf.trainable_variables() # and returns a train_tensor and summary_op total_loss, grads_and_vars = model_deploy.optimize_clones( clones, training_optimizer, regularization_losses=None, var_list=all_trainable) grad_mult = utils.get_model_gradient_multipliers( FLAGS.last_layer_gradient_multiplier) grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Optionally clip gradients # with tf.name_scope('clip_grads'): # grads_and_vars = slim.learning.clip_gradient_norms(grads_and_vars, 10) total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.') # Create gradient updates. grad_updates = training_optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add summaries. for loss_tensor in tf.losses.get_losses(): global_summaries.add( tf.summary.scalar(loss_tensor.op.name, loss_tensor)) global_summaries.add( tf.summary.scalar('TotalLoss', tf.losses.get_total_loss())) # Add the summaries from the first clone. These contain the summaries summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) summaries |= global_summaries # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # GPU settings session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_config.gpu_options.allow_growth = False # Save checkpoints regularly. keep_checkpoint_every_n_hours = 2.0 saver = tf.train.Saver( keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours) ########################### # Kicks off the training. # ########################### slim.learning.train(train_tensor, logdir=logdir, master=FLAGS.master, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=10, summary_op=summary_op, init_fn=init_fn, number_of_steps=num_batches_epoch * FLAGS.num_epoches, save_summaries_secs=240, sync_optimizer=None, saver=saver)
def print_variables(self): variables = slim.get_model_variables() print 'Model Variables:' for var in variables: print var.name, ' ', var.get_shape()
def execute(checkpoint_path, model_no): if FLAGS.num_validation == 0: print("FLAGS.num_validation is 0, no need to validation") return None if checkpoint_path == None: checkpoint_path = tf.train.latest_checkpoint(FLAGS.train_dir) print("Begin validation_confusion_matrix %s" % format(datetime.now().isoformat())) t1 = time() preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) labels_to_names = dataset_utils.read_label_file(FLAGS.dataset_dir) num_classes = len(labels_to_names) def decode(serialized_example): feature = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpg'), 'image/class/label': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)), } features = tf.parse_single_example(serialized_example, features=feature) # image image_string = features['image/encoded'] image = tf.image.decode_jpeg(image_string, channels=3) image = image_preprocessing_fn(image, FLAGS.train_image_size, FLAGS.train_image_size) # label label = features['image/class/label'] label = tf.one_hot(label, num_classes) return image, label def input_iter(filenames, batch_size, num_epochs): if not num_epochs: num_epochs = 1 dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=FLAGS.num_readers) dataset = dataset.map(decode) dataset = dataset.repeat(num_epochs) dataset = dataset.batch(batch_size) # dataset = dataset.shuffle(buffer_size=NUM_IMAGES) iterator = dataset.make_one_shot_iterator() return iterator with tf.Graph().as_default() as graph: tf_global_step = slim.get_or_create_global_step() network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=False) eval_image_size = FLAGS.train_image_size x = tf.placeholder(tf.float32, [None, eval_image_size, eval_image_size, 3]) y_ = tf.placeholder(tf.float32, [None, num_classes]) logits, endpoints = network_fn(x) predictions_key = "Predictions" if FLAGS.model_name.startswith("resnet"): predictions_key = "predictions" y = endpoints[predictions_key] test_labels = tf.argmax(y_, 1, name="label") if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() input_dir = [] for i in range(5): data_file = os.path.join( FLAGS.dataset_dir, "garbage_validation_0000%d-of-00005.tfrecord") % i input_dir.append(data_file) iter = input_iter(input_dir, batch_size, 1) next_batch = iter.get_next() saver = tf.train.Saver( var_list=variables_to_restore) #Same as slim.get_variables() init1 = tf.global_variables_initializer() init2 = tf.local_variables_initializer() with tf.Session() as sess: sess.run(init1) sess.run(init2) saver.restore(sess, checkpoint_path) images, labels = sess.run(next_batch) predictions = sess.run(fetches=y, feed_dict={x: images}) predictions = np.squeeze(predictions) ids = sess.run(test_labels, feed_dict={y_: labels}) errorList = [] v_records = [] for i in range(batch_size): prediction = predictions[i] top_k = prediction.argsort()[-5:][::-1] if ids[i] != top_k[0]: errorList.append(str(ids[i]) + ":" + str(top_k[0])) v_record = str(ids[i]) + " " + labels_to_names[ids[i]] + " => " #print(ids[i], labels_to_names[ids[i]], "=> ", end='') for id in top_k: human_string = labels_to_names[id] score = prediction[id] v_record = v_record + str( id) + ":" + human_string + "(P=" + str(score) + "), " #print('%d:%s(P=%.5f), ' % (id, human_string, score), end='') print(v_record) v_records.append(v_record) print(errorList) errorid_filename = os.path.join(FLAGS.inference_dir, model_no + "_error.csv") print("Write file: %s ..." % errorid_filename) with tf.gfile.Open(errorid_filename, 'w') as f: for idmap in errorList: f.write('%s\n' % (idmap)) validation_record_filename = os.path.join( FLAGS.inference_dir, model_no + "_validation_record.txt") print("Write file: %s ..." % validation_record_filename) with tf.gfile.Open(validation_record_filename, 'w') as f: for v_rec in v_records: f.write('%s\n' % (v_rec)) sess.close() t2 = time() print("End validation_confusion_matrix %d s" % (t2 - t1)) sys.stdout.flush()
def restore_model(checkpoint_paths, variables_to_restore, ignore_missing_vars=False, num_streams=1, checkpoint_style=None, special_assign_vars=None): all_ops = [] if len(checkpoint_paths) == 1 and num_streams > 1: logging.info('Provided one checkpoint for multi-stream ' 'network. Will use this as a saved model ' 'with this exact multi stream network.') all_ops.append(slim.assign_from_checkpoint_fn( checkpoint_paths[0], variables_to_restore, ignore_missing_vars=ignore_missing_vars)) else: for sid in range(num_streams): this_checkpoint_style = checkpoint_style.split(',')[sid] if \ checkpoint_style is not None else None checkpoint_path = checkpoint_paths[sid] # assert tf.gfile.Exists(checkpoint_path) this_stream_name = 'stream%d/' % sid this_checkpoint_variables = [var for var in variables_to_restore if var in slim.get_model_variables(this_stream_name)] if checkpoint_path.endswith('.npy'): vars_to_restore_names = [ el.name for el in this_checkpoint_variables] key_name_mapper = var_name_mapper.map() init_weights = np.load(checkpoint_path).item() init_weights_final = {} vars_restored = [] for key in init_weights.keys(): for subkey in init_weights[key].keys(): prefix = this_stream_name if this_checkpoint_style == 'v2_withStream': prefix = 'stream0/' # because any model trained with stream # will have that stream as 0 final_key_name = prefix + key_name_mapper( key + '/' + subkey) if final_key_name not in vars_to_restore_names: logging.error('Not using %s from npy' % final_key_name) continue target_shape = slim.get_model_variables( final_key_name)[0].get_shape().as_list() pretrained_wts = init_weights[key][subkey] target_shape_squeezed = np.delete( target_shape, np.where(np.array(target_shape) == 1)) pretrained_shape_squeezed = np.delete( pretrained_wts.shape, np.where(np.array(pretrained_wts.shape) == 1)) if np.all(target_shape_squeezed != pretrained_shape_squeezed): logging.error('Shape mismatch var: %s from npy [%s vs %s]' % (final_key_name, target_shape, pretrained_wts.shape)) init_weights_final[final_key_name] = \ pretrained_wts vars_restored.append(final_key_name) init_weights = init_weights_final for v in vars_to_restore_names: if v not in vars_restored: logging.fatal('No weights found for %s' % v) all_ops.append(slim.assign_from_values_fn( init_weights)) else: if this_checkpoint_style != 'v2_withStream': all_ops.append(slim.assign_from_checkpoint_fn( checkpoint_path, # stripping the stream name to map variables dict( [('/'.join(el.name.split('/')[1:]).split(':')[0], el) for el in this_checkpoint_variables]), ignore_missing_vars=ignore_missing_vars)) else: all_ops.append(slim.assign_from_checkpoint_fn( checkpoint_path, # stripping the stream name to map variables, to stream0, # as the model is v2_withStream, hence must be trained with # stream0/ prefix dict( [('/'.join(['stream0'] + el.name.split('/')[1:]).split(':')[0], el) for el in this_checkpoint_variables]), ignore_missing_vars=ignore_missing_vars)) if special_assign_vars is not None: all_ops.append(get_special_assigns(special_assign_vars)) def combined(sess): for op in all_ops: op(sess) return combined
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): # Config model_deploy. Keep TF Slim Models structure. # Useful if want to need multiple GPUs and/or servers in the future. deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=0, num_replicas=1, num_ps_tasks=0) # Create global_step. with tf.device(deploy_config.variables_device()): # 分配设备 global_step = slim.create_global_step() # Select the dataset.#得到数据 dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) # Get the SSD network and its anchors. ssd_class = nets_factory.get_network(FLAGS.model_name) # 返回ssd_vgg_300.SSDNet ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes) ssd_net = ssd_class(ssd_params) ssd_shape = ssd_net.params.img_shape ssd_anchors = ssd_net.anchors(ssd_shape) # 为每个特征图生成anchors # Select the preprocessing function. preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name # 得到处理数据的程序 image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) tf_utils.print_configuration(FLAGS.__flags, ssd_params, dataset.data_sources, FLAGS.train_dir) # =================================================================== # # Create a dataset provider and batches. # =================================================================== # with tf.device(deploy_config.inputs_device()): with tf.name_scope(FLAGS.dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size, shuffle=True) # Get for SSD network: image, labels, bboxes. [image, glabels, gbboxes] = provider.get(['image', 'object/label', 'object/bbox']) # Pre-processing image, labels and bboxes. # 对图像进行预处理 image, glabels, gbboxes = image_preprocessing_fn(image, glabels, gbboxes, out_shape=ssd_shape, data_format=DATA_FORMAT) # Encode groundtruth labels and bboxes. ###############################################################没看懂 gclasses, glocalisations, gscores = ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) batch_shape = [1] + [len(ssd_anchors)] * 3 # Training batches and queue. r = tf.train.batch( tf_utils.reshape_list([image, gclasses, glocalisations, gscores]), batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) b_image, b_gclasses, b_glocalisations, b_gscores = \ tf_utils.reshape_list(r, batch_shape) # Intermediate queueing: unique batch computation pipeline for all # GPUs running the training. batch_queue = slim.prefetch_queue.prefetch_queue( tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]), capacity=2 * deploy_config.num_clones) # =================================================================== # # Define the model running on every GPU. # =================================================================== # def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" # Dequeue batch. b_image, b_gclasses, b_glocalisations, b_gscores = \ tf_utils.reshape_list(batch_queue.dequeue(), batch_shape) # Construct SSD network. arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay, data_format=DATA_FORMAT) with slim.arg_scope(arg_scope): predictions, localisations, logits, end_points = \ ssd_net.net(b_image, is_training=True,DSSD_FLAG = FLAGS.DSSD_FLAG) # print( [image, glabels, gbboxes]) # Add loss function. ssd_net.losses(logits, localisations, b_gclasses, b_glocalisations, b_gscores, match_threshold=FLAGS.match_threshold, negative_ratio=FLAGS.negative_ratio, alpha=FLAGS.loss_alpha, label_smoothing=FLAGS.label_smoothing) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # =================================================================== # # Add summaries from first clone. # =================================================================== # ##########################################没看懂 clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses and extra losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar(loss.op.name, loss)) for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope): summaries.add(tf.summary.scalar(loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) # =================================================================== # # Configure the moving averages. # =================================================================== # if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None # =================================================================== # # Configure the optimization procedure. # =================================================================== # with tf.device(deploy_config.optimizer_device()): learning_rate = tf_utils.configure_learning_rate(FLAGS, dataset.num_samples, global_step) optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = tf_utils.get_variables_to_train(FLAGS) # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # =================================================================== # # Kicks off the training. # =================================================================== # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1.0, write_version=2, pad_step_number=False) # n = tf.all_variables() if FLAGS.DSSD_FLAG: FLAGS.checkpoint_path = './checkpoints' FLAGS.train_dir = './checkpoints_dssd' ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) # reader = tf.train.NewCheckpointReader(ckpt.model_checkpoint_path) variables_to_restore = [var.name for var in tf.all_variables() if var.name.startswith("_box", 18) or var.name.startswith("_box", 19)] # saver_resote = tf.train.Saver(ckpt.model_checkpoint_path, variables_to_restore) variables_to_restore = slim.get_variables_to_restore(exclude=variables_to_restore) init_fn = slim.assign_from_checkpoint_fn(ckpt.model_checkpoint_path, variables_to_restore, ignore_missing_vars=True, reshape_variables=False) else: init_fn = tf_utils.get_init_fn(FLAGS) # with tf.Session() as sess: # init_fn(sess) # saver_resote.restore() # ckpt_filename = './checkpoints_fpn/model.ckpt-87149' # saver.restore(sess, ckpt_filename) # print(".................................") slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master='', is_chief=True, init_fn=init_fn, summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, saver=saver, save_interval_secs=FLAGS.save_interval_secs, session_config=config, sync_optimizer=None)
def main(argv=None): # 加载处理好的数据 processed_data = np.load(INPUT_DATA) training_images = processed_data[0] n_training_examples = len(training_images) training_labels = processed_data[1] validation_images = processed_data[2] validation_labels = processed_data[3] testing_images = processed_data[4] testing_labels = processed_data[5] print('%d training, %d validation, %d testing' % (n_training_examples, len(validation_labels), len(testing_labels))) # 定义inception_resNet v2的输入, default_image_size = 299 images = tf.placeholder(tf.float32, [None, 299, 299, 3], name='input_image') labels = tf.placeholder(tf.int64, [None], name='labels') # 引用定义定义inception_resNet模型 arg_scope = inception_resnet_v2_arg_scope with slim.arg_scope(arg_scope()): logits, _ = inception_resnet_v2(images, num_classes=N_CLASSES) # with tf.variable_scope('squeeze_logits'): # logits = tf.squeeze(logits, axis=[1, 2]) trainable_var = get_trainable_variables() # 损失函数 tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits) # 训练 train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize( tf.losses.get_total_loss()) # 只训练最后一层 # train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(tf.losses.get_total_loss(), # var_list=get_trainable_variables()) # 正确率 with tf.variable_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), labels) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) ckpt = tf.train.get_checkpoint_state(SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: # 加载之前训练的参数继续训练 variables_to_restore = slim.get_model_variables() print('continue training from %s' % ckpt) step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] step = int(step) ckpt = ckpt.model_checkpoint_path else: # 没有训练数据,就先迁移一部分训练好的 ckpt = TRAINED_CKPT_FILE variables_to_restore = get_tuned_variable() print('loading tuned variables from %s' % TRAINED_CKPT_FILE) step = 0 load_fn = slim.assign_from_checkpoint_fn(ckpt, variables_to_restore, ignore_missing_vars=True) # 开启会话训练 saver = tf.train.Saver() with tf.Session() as sess: # 初始化所有参数 init = tf.global_variables_initializer() sess.run(init) load_fn(sess) start = 0 end = BATCH for i in range(step + 1, step + 1 + STEPS): start_time = time.time() # 运行训练,不会更新所有参数 sess.run(train_step, feed_dict={ images: training_images[start:end], labels: training_labels[start:end] }) duration = time.time() - start_time print('current train step duration %.3f' % duration) # 输出日志 if i % 100 == 0: saver.save(sess, TRAIN_FILE, global_step=i) validation_accuracy = sess.run(evaluation_step, feed_dict={ images: validation_images, labels: validation_labels }) print('Step %d Validation accuracy = %.1f%%' % (i, validation_accuracy * 100.0)) start = end if start == n_training_examples: start = 0 end = start + BATCH if end > n_training_examples: end = n_training_examples # 在测试集上测试正确率 test_accuracy = sess.run(evaluation_step, feed_dict={ images: testing_images, labels: testing_labels }) print('Final test accuracy = %.1f%%' % (test_accuracy * 100.0))
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k(logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") learning_rate = tf.train.exponential_decay(base_learning_rate, global_step * batch_size, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar('learning_rate', learning_rate) optimizer = optimizer_class(learning_rate) unused_video_id, model_input_raw, labels_batch, num_frames = ( get_input_data_tensors(reader, train_data_pattern, batch_size=batch_size, num_readers=num_readers, num_epochs=num_epochs)) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) with tf.name_scope("model"): result = model.create_model(model_input, num_frames=num_frames, vocab_size=reader.num_classes, labels=labels_batch) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss(predictions, labels_batch) tf.summary.scalar("label_loss", label_loss) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) if regularization_penalty != 0: tf.summary.scalar("reg_loss", reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss train_op = slim.learning.create_train_op( final_loss, optimizer, global_step=global_step, clip_gradient_norm=clip_gradient_norm) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", predictions) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op)
def iterate_through_cwl2_attacks(): tf.logging.set_verbosity(tf.logging.INFO) input_dir = FLAGS.input_image_dir metadata_file_path = FLAGS.metadata_file_path num_images = len(os.listdir(input_dir)) batch_shape = (num_images, 299, 299, 3) num_classes = 1001 batch_size = attack_name_to_params[ATTACKS.CARLINI_WAGNER]['batch_size'] images, labels, target_classes = load_images(input_dir, metadata_file_path, batch_shape, num_classes) list_param_dict = expand_param_dict( attack_name_to_params[ATTACKS.CARLINI_WAGNER], attack_name_to_configurable_params[ATTACKS.CARLINI_WAGNER] ) save_dir = 'saves' os.makedirs(save_dir, exist_ok=True) for idx, params in enumerate(list_param_dict): tf.reset_default_graph() logger.info('Running attack with parameters: {}'.format(params)) logger.info('Current index of parameters: {}/{}'.format(idx, len(list_param_dict))) # Get save path adv_imgs_save_path = get_attack_images_filename_prefix( attack_name=ATTACKS.CARLINI_WAGNER, params=params, model='inception', targeted_prefix='targeted' ) adv_imgs_save_path = os.path.join(save_dir, adv_imgs_save_path) # Run inference graph = tf.Graph() with graph.as_default(): sess = tf.Session(graph=graph) # Prepare graph x_input = tf.placeholder(tf.float32, shape=(batch_size,) + batch_shape[1:]) y_label = tf.placeholder(tf.int32, shape=(batch_size, num_classes)) y_target = tf.placeholder(tf.int32, shape=(batch_size, num_classes)) model = InceptionModel(num_classes) cwl2 = True if cwl2: attack = CarliniWagnerL2(model=model, sess=sess) x_adv = attack.generate(x_input, y_target=y_target, **params) else: attack = SPSA(model=model) x_adv = attack.generate(x_input, y_target=y_label, epsilon=4. / 255, num_steps=30, early_stop_loss_threshold=-1., batch_size=32, spsa_iters=16, is_debug=True) logits = model.get_logits(x_input) acc = _top_k_accuracy(logits, tf.argmax(y_label, axis=1), k=1) success_rate = _top_k_accuracy(logits, tf.argmax(y_target, axis=1), k=1) # Run computation saver = tf.train.Saver(slim.get_model_variables()) saver.restore(sess, save_path=FLAGS.checkpoint_path) list_adv_images = [] if num_images % batch_size == 0: num_batches = int(num_images / batch_size) else: num_batches = int(num_images / batch_size + 1) for i in tqdm.tqdm(range(num_batches)): feed_dict_i = {x_input: images[i * batch_size:(i + 1) * batch_size], y_target: target_classes[i * batch_size:(i + 1) * batch_size]} adv_img = sess.run(x_adv, feed_dict=feed_dict_i) list_adv_images.append(adv_img) adv_images = np.concatenate((list_adv_images)) np.save(adv_imgs_save_path, adv_images) acc_store = [] succ_store = [] for i in tqdm.tqdm(range(num_batches)): feed_dict_i = {x_input: adv_images[i * batch_size:(i + 1) * batch_size], y_target: target_classes[i * batch_size:(i + 1) * batch_size], y_label: labels[i * batch_size:(i + 1) * batch_size]} succ_batch, acc_batch = sess.run([success_rate, acc], feed_dict=feed_dict_i) acc_store.extend(acc_batch) succ_store.extend(succ_batch) logger.info('Accuracy is: {:.4f}'.format(np.mean(acc_store))) logger.info('Success Rate is: {:.4f}'.format(np.mean(succ_store)))
def build_graph(reader, model, train_data_pattern, label_loss_fn=losses.CrossEntropyLoss(), batch_size=1000, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_readers=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: reader: The data file reader. It should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_pattern: glob path to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_readers: How many threads to use for I/O operations. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") local_device_protos = device_lib.list_local_devices() gpus = [x.name for x in local_device_protos if x.device_type == 'GPU'] gpus = gpus[:FLAGS.num_gpu] num_gpus = len(gpus) if num_gpus > 0: logging.info("Using the following GPUs to train: " + str(gpus)) num_towers = num_gpus device_string = '/gpu:%d' else: logging.info("No GPUs found. Training on CPU.") num_towers = 1 device_string = '/cpu:%d' learning_rate = tf.train.exponential_decay( base_learning_rate, global_step * batch_size * num_towers, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar('learning_rate', learning_rate) optimizer = optimizer_class(learning_rate) unused_video_id, model_input_raw, labels_batch, num_frames = ( get_input_data_tensors( reader, train_data_pattern, batch_size=batch_size * num_towers, num_readers=num_readers, num_epochs=num_epochs)) tf.summary.histogram("model/input_raw", model_input_raw) feature_dim = len(model_input_raw.get_shape()) - 1 model_input = tf.nn.l2_normalize(model_input_raw, feature_dim) tower_inputs = tf.split(model_input, num_towers) tower_labels = tf.split(labels_batch, num_towers) tower_num_frames = tf.split(num_frames, num_towers) tower_gradients = [] tower_predictions = [] tower_label_losses = [] tower_reg_losses = [] for i in range(num_towers): # For some reason these 'with' statements can't be combined onto the same # line. They have to be nested. with tf.device(device_string % i): with (tf.variable_scope(("tower"), reuse=True if i > 0 else None)): with (slim.arg_scope([slim.model_variable, slim.variable], device="/cpu:0" if num_gpus!=1 else "/gpu:0")): result = model.create_model( tower_inputs[i], num_frames=tower_num_frames[i], vocab_size=reader.num_classes, labels=tower_labels[i]) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] tower_predictions.append(predictions) if "loss" in result.keys(): label_loss = result["loss"] else: label_loss = label_loss_fn.calculate_loss(predictions, tower_labels[i]) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) tower_reg_losses.append(reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) tower_label_losses.append(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss gradients = optimizer.compute_gradients(final_loss, colocate_gradients_with_ops=False) tower_gradients.append(gradients) label_loss = tf.reduce_mean(tf.stack(tower_label_losses)) tf.summary.scalar("label_loss", label_loss) if regularization_penalty != 0: reg_loss = tf.reduce_mean(tf.stack(tower_reg_losses)) tf.summary.scalar("reg_loss", reg_loss) merged_gradients = utils.combine_gradients(tower_gradients) if clip_gradient_norm > 0: with tf.name_scope('clip_grads'): merged_gradients = utils.clip_gradient_norms(merged_gradients, clip_gradient_norm) train_op = optimizer.apply_gradients(merged_gradients, global_step=global_step) tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", tf.concat(tower_predictions, 0)) tf.add_to_collection("input_batch_raw", model_input_raw) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("num_frames", num_frames) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op)
def main(args=None): print(args) tf.reset_default_graph() """ Read dataset parser """ flags.network_name = args[0].split('/')[-1].split('.')[0].split('main_')[-1] flags.logs_dir = './logs_' + flags.network_name dataset_parser = SemanticParser(flags=flags) """ Transform data to TFRecord format (Only do once.) """ if False: dataset_parser.load_paths(is_jpg=True, load_val=True) dataset_parser.data2record(name='{}_train.tfrecords'.format(dataset_parser.dataset_name), set_type='train', test_num=None) dataset_parser.data2record(name='{}_val.tfrecords'.format(dataset_parser.dataset_name), set_type='val', test_num=None) # coco_parser.data2record_test(name='coco_stuff2017_test-dev_all_label.tfrecords', is_dev=True, test_num=None) # coco_parser.data2record_test(name='coco_stuff2017_test_all_label.tfrecords', is_dev=False, test_num=None) return """ Build Graph """ with tf.Graph().as_default(): """ Input (TFRecord) """ with tf.name_scope('TFRecord'): # DatasetA training_a_dataset = dataset_parser.tfrecord_get_dataset( name='{}_trainA.tfrecords'.format(dataset_parser.dataset_name), batch_size=flags.batch_size, shuffle_size=None) val_a_dataset = dataset_parser.tfrecord_get_dataset( name='{}_valA.tfrecords'.format(dataset_parser.dataset_name), batch_size=flags.batch_size) # DatasetB training_b_dataset = dataset_parser.tfrecord_get_dataset( name='{}_trainB.tfrecords'.format(dataset_parser.dataset_name), batch_size=flags.batch_size, is_label=True, shuffle_size=None) val_b_dataset = dataset_parser.tfrecord_get_dataset( name='{}_valB.tfrecords'.format(dataset_parser.dataset_name), batch_size=flags.batch_size, is_label=True) # A feed-able iterator with tf.name_scope('RealA'): handle_a = tf.placeholder(tf.string, shape=[]) iterator_a = tf.contrib.data.Iterator.from_string_handle( handle_a, training_a_dataset.output_types, training_a_dataset.output_shapes) real_a = iterator_a.get_next() with tf.name_scope('RealB'): handle_b = tf.placeholder(tf.string, shape=[]) iterator_b = tf.contrib.data.Iterator.from_string_handle( handle_b, training_b_dataset.output_types, training_b_dataset.output_shapes) real_b = iterator_b.get_next() with tf.name_scope('InitialA_op'): training_a_iterator = training_a_dataset.make_initializable_iterator() validation_a_iterator = val_a_dataset.make_initializable_iterator() with tf.name_scope('InitialB_op'): training_b_iterator = training_b_dataset.make_initializable_iterator() validation_b_iterator = val_b_dataset.make_initializable_iterator() """ Network (Computes predictions from the inference model) """ with tf.name_scope('Network'): # Input global_step = tf.Variable(0, trainable=False, name='global_step', dtype=tf.int32) global_step_update_op = tf.assign_add(global_step, 1, name='global_step_update_op') mean_rgb = tf.constant((123.68, 116.78, 103.94), dtype=tf.float32) ''' fake_b_pool = tf.placeholder(tf.float32, shape=[None, flags.image_height, flags.image_width, flags.c_in_dim], name='fake_B_pool') image_linear_shape = tf.constant(flags.image_height * flags.image_width * flags.c_in_dim, dtype=tf.int32, name='image_linear_shape') real_a_test = tf.placeholder(tf.float32, shape=[None, flags.image_height, flags.image_width, flags.c_in_dim], name='real_a_test') ''' # A -> B with tf.name_scope('Generator'): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(real_a - mean_rgb, num_classes=None, is_training=True, global_pool=False, output_stride=8) with tf.variable_scope('Generator_A2B'): pred = tf.layers.conv2d(tf.nn.relu(net), 1, 1, 1) pred_upscale = tf.image.resize_bilinear(pred, (flags.image_height, flags.image_width), name='up_scale') segment_a = tf.nn.sigmoid(pred_upscale, name='segment_a') # sigmoid cross entropy Loss with tf.name_scope('loss_gen_a2b'): loss_gen_a2b = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=pred_upscale, labels=real_b/255.0, name='sigmoid'), name='mean') # Optimizer trainable_var_resnet = tf.get_collection( key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='resnet_v1_50') # trainable_var_gen_a2b = tf.get_collection( # key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator_A2B') trainable_var_gen_a2b = tf.get_collection( key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Generator_A2B') + trainable_var_resnet if True: slim.model_analyzer.analyze_vars(trainable_var_gen_a2b, print_info=True) # trainable_var_dis_b = tf.get_collection( # key=tf.GraphKeys.TRAINABLE_VARIABLES, scope='Network/Discriminator_B') with tf.name_scope('learning_rate_decay'): decay = tf.maximum(0., 1. - (tf.cast(global_step, tf.float32) / flags.training_iter), name='decay') learning_rate = tf.multiply(flags.learning_rate, decay, name='learning_rate') train_op_gen_a2b = train_op(loss_gen_a2b, learning_rate, flags, trainable_var_gen_a2b, name='gen_a2b') # train_op_dis_b = train_op(loss_dis_b, learning_rate, flags, trainable_var_dis_b, name='dis_b') saver = tf.train.Saver(max_to_keep=2) # Graph Logs with tf.name_scope('GEN_a2b'): tf.summary.scalar("loss/gen_a2b/all", loss_gen_a2b) ''' with tf.name_scope('DIS_b'): tf.summary.scalar("loss/dis_b/all", loss_dis_b) tf.summary.scalar("loss/dis_b/adv_real", loss_dis_b_adv_real) tf.summary.scalar("loss/dis_b/adv_fake", loss_dis_b_adv_fake) ''' summary_op = tf.summary.merge_all() """ Session """ tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: with tf.name_scope('Initial'): ckpt = tf.train.get_checkpoint_state(dataset_parser.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print("Model restored: {}".format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) else: print("No Model found.") init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) init_fn = slim.assign_from_checkpoint_fn('./pretrained/resnet_v1_50.ckpt', slim.get_model_variables('resnet_v1_50')) init_fn(sess) summary_writer = tf.summary.FileWriter(dataset_parser.logs_dir, sess.graph) """ Training Mode """ if flags.mode == 'train': print('Training mode! Batch size:{:d}'.format(flags.batch_size)) with tf.variable_scope('Input_port'): training_a_handle = sess.run(training_a_iterator.string_handle()) val_a_handle = sess.run(validation_a_iterator.string_handle()) training_b_handle = sess.run(training_b_iterator.string_handle()) val_b_handle = sess.run(validation_b_iterator.string_handle()) image_pool_a, image_pool_b = ImagePool(flags.pool_size), ImagePool(flags.pool_size) print('Start Training!') start_time = time.time() sess.run([training_a_iterator.initializer, training_b_iterator.initializer]) feed_dict_train = {handle_a: training_a_handle, handle_b: training_b_handle} # feed_dict_valid = {is_training: False} global_step_sess = sess.run(global_step) while global_step_sess < flags.training_iter: try: # Update gen_A2B, gen_B2A # _, fake_b_sess, = sess.run([train_op_gen_a2b, fake_b], feed_dict=feed_dict_train) _, loss_gen_a2b_sess = sess.run([train_op_gen_a2b, loss_gen_a2b], feed_dict=feed_dict_train) # Update dis_B, dis_A #fake_b_pool_query = image_pool_b.query(fake_b_sess) #_ = sess.run(train_op_dis_b, feed_dict={ # fake_b_pool: fake_b_pool_query, handle_b: training_b_handle}) sess.run(global_step_update_op) global_step_sess, learning_rate_sess = sess.run([global_step, learning_rate]) print('global step:[{:d}/{:d}], learning rate:{:f}, time:{:4.4f}'.format( global_step_sess, flags.training_iter, learning_rate_sess, time.time() - start_time)) # Logging the events if global_step_sess % flags.log_freq == 1: print('Logging the events') summary_op_sess = sess.run(summary_op, feed_dict={ handle_a: training_a_handle, handle_b: training_b_handle,}) summary_writer.add_summary(summary_op_sess, global_step_sess) # summary_writer.flush() # Observe training situation (For debugging.) if flags.debug and global_step_sess % flags.observe_freq == 1: real_a_sess, real_b_sess, segment_a_sess = \ sess.run([real_a, real_b, segment_a], feed_dict={handle_a: training_a_handle, handle_b: training_b_handle}) print('Logging training images.') dataset_parser.visualize_data( real_a=real_a_sess, real_b=real_b_sess, segment_a=segment_a_sess, shape=(1, 1), global_step=global_step_sess, logs_dir=dataset_parser.logs_image_train_dir) """ Saving the checkpoint """ if global_step_sess % flags.save_freq == 0: print('Saving model...') saver.save(sess, dataset_parser.checkpoint_dir + '/model.ckpt', global_step=global_step_sess) except tf.errors.OutOfRangeError: print('----------------One epochs finished!----------------') sess.run([training_a_iterator.initializer, training_b_iterator.initializer]) elif flags.mode == 'test': import numpy as np from PIL import Image print('Start Testing!') with tf.variable_scope('Input_port'): val_a_handle = sess.run(validation_a_iterator.string_handle()) val_b_handle = sess.run(validation_b_iterator.string_handle()) sess.run([validation_a_iterator.initializer, validation_b_iterator.initializer]) feed_dict_test = {handle_a: val_a_handle, handle_b: val_b_handle} image_idx = 0 while True: try: segment_a_sess = sess.run(segment_a, feed_dict=feed_dict_test) segment_a_sess = np.squeeze(segment_a_sess) * 255 x_png = Image.fromarray(segment_a_sess.astype(np.uint8)) # x_png = x_png.resize(test_img_size, Image.BILINEAR) x_png.save('{}/{:d}.png'.format(dataset_parser.logs_image_val_dir, image_idx), format='PNG') print(image_idx) image_idx += 1 except tf.errors.OutOfRangeError: print('----------------One epochs finished!----------------') break # feed_dict_valid = {is_training: False} '''
def build_graph(all_readers, all_train_data_patterns, input_reader, input_data_pattern, model, label_loss_fn=losses.CrossEntropyLoss(), batch_size=256, base_learning_rate=0.01, learning_rate_decay_examples=1000000, learning_rate_decay=0.95, optimizer_class=tf.train.AdamOptimizer, clip_gradient_norm=1.0, regularization_penalty=1, num_epochs=None): """Creates the Tensorflow graph. This will only be called once in the life of a training model, because after the graph is created the model will be restored from a meta graph file rather than being recreated. Args: all_readers: The data file readers. Every element in it should inherit from BaseReader. model: The core model (e.g. logistic or neural net). It should inherit from BaseModel. train_data_patterns: glob paths to the training data files. label_loss_fn: What kind of loss to apply to the model. It should inherit from BaseLoss. batch_size: How many examples to process at a time. base_learning_rate: What learning rate to initialize the optimizer with. optimizer_class: Which optimization algorithm to use. clip_gradient_norm: Magnitude of the gradient to clip to. regularization_penalty: How much weight to give the regularization loss compared to the label loss. num_epochs: How many passes to make over the data. 'None' means an unlimited number of passes. """ global_step = tf.Variable(0, trainable=False, name="global_step") learning_rate = tf.train.exponential_decay(base_learning_rate, global_step * batch_size, learning_rate_decay_examples, learning_rate_decay, staircase=True) tf.summary.scalar('learning_rate', learning_rate) original_input = None if input_data_pattern is not None: original_video_id, original_input, unused_labels_batch, unused_num_frames = ( get_input_data_tensors(input_reader, input_data_pattern, batch_size=batch_size, num_epochs=num_epochs)) optimizer = optimizer_class(learning_rate) model_input_raw_tensors = [] labels_batch_tensor = None for reader, data_pattern in zip(all_readers, all_train_data_patterns): video_id, model_input_raw, labels_batch, unused_num_frames = ( get_input_data_tensors(reader, data_pattern, batch_size=batch_size, num_epochs=num_epochs)) if labels_batch_tensor is None: labels_batch_tensor = labels_batch model_input_raw_tensors.append(tf.expand_dims(model_input_raw, axis=2)) if original_input is not None: id_match = tf.ones_like(original_video_id, dtype=tf.float32) id_match = id_match * tf.cast( tf.equal(original_video_id, video_id), dtype=tf.float32) tf.summary.scalar("model/id_match", tf.reduce_mean(id_match)) model_input = tf.concat(model_input_raw_tensors, axis=2) labels_batch = labels_batch_tensor tf.summary.histogram("model/input", model_input) with tf.name_scope("model"): if FLAGS.noise_level > 0: noise_level_tensor = tf.placeholder_with_default( 0.0, shape=[], name="noise_level") else: noise_level_tensor = None if FLAGS.dropout: keep_prob_tensor = tf.placeholder_with_default(1.0, shape=[], name="keep_prob") result = model.create_model(model_input, labels=labels_batch, vocab_size=reader.num_classes, original_input=original_input, dropout=FLAGS.dropout, keep_prob=keep_prob_tensor, noise_level=noise_level_tensor) else: result = model.create_model(model_input, labels=labels_batch, vocab_size=reader.num_classes, original_input=original_input, noise_level=noise_level_tensor) for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) predictions = result["predictions"] if "loss" in result.keys(): label_loss = result["loss"] else: video_weights_batch = None if FLAGS.reweight: video_weights_batch = get_video_weights(video_id) else: video_weights_batch = None if FLAGS.multitask: print "using multitask loss" support_predictions = result["support_predictions"] tf.summary.histogram("model/support_predictions", support_predictions) print "support_predictions", support_predictions label_loss = label_loss_fn.calculate_loss( predictions, support_predictions, labels_batch, weights=video_weights_batch) else: print "using original loss" label_loss = label_loss_fn.calculate_loss( predictions, labels_batch, weights=video_weights_batch) tf.summary.histogram("model/predictions", predictions) tf.summary.scalar("label_loss", label_loss) if "regularization_loss" in result.keys(): reg_loss = result["regularization_loss"] else: reg_loss = tf.constant(0.0) reg_losses = tf.losses.get_regularization_losses() if reg_losses: reg_loss += tf.add_n(reg_losses) if regularization_penalty != 0: tf.summary.scalar("reg_loss", reg_loss) # Adds update_ops (e.g., moving average updates in batch normalization) as # a dependency to the train_op. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if "update_ops" in result.keys(): update_ops += result["update_ops"] if update_ops: with tf.control_dependencies(update_ops): barrier = tf.no_op(name="gradient_barrier") with tf.control_dependencies([barrier]): label_loss = tf.identity(label_loss) # Incorporate the L2 weight penalties etc. final_loss = regularization_penalty * reg_loss + label_loss if FLAGS.training: gradients = optimizer.compute_gradients( final_loss, colocate_gradients_with_ops=False) if clip_gradient_norm > 0: with tf.name_scope('clip_grads'): gradients = utils.clip_gradient_norms( gradients, clip_gradient_norm) train_op = optimizer.apply_gradients(gradients, global_step=global_step) else: train_op = tf.no_op() tf.add_to_collection("global_step", global_step) tf.add_to_collection("loss", label_loss) tf.add_to_collection("predictions", predictions) tf.add_to_collection("input_batch_raw", model_input) tf.add_to_collection("input_batch", model_input) tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32)) tf.add_to_collection("train_op", train_op) if FLAGS.dropout: tf.add_to_collection("keep_prob", keep_prob_tensor) if FLAGS.noise_level > 0: tf.add_to_collection("noise_level", noise_level_tensor)
def _train(sess): with tf.name_scope('data_input'): image_batch, label_batch = setup_train_input(sess) with tf.name_scope('Network'): with slim.arg_scope(cifarnet.cifarnet_arg_scope()): logits, end_points = cifarnet.cifarnet(image_batch) with tf.name_scope('X_entropy_loss'): x_entropy_loss = slim.losses.softmax_cross_entropy( label_batch, logits, label_smoothing=FLAGS.label_smoothing, weights=1.0) with tf.name_scope('total_loss'): total_loss = slim.losses.get_total_loss() with tf.name_scope('global_step'): global_step = tf.train.get_or_create_global_step() incr_global_step = tf.assign(global_step, global_step + 1) with tf.name_scope('train'): learning_rate = _configure_learning_rate( 50000, global_step) #to be modified with val optimizer = _configure_optimizer(learning_rate) var_to_train = [var for var in tf.trainable_variables()] gradients = optimizer.compute_gradients(total_loss, var_list=var_to_train) train_op = optimizer.apply_gradients(gradients) init_op = tf.global_variables_initializer() sess.run(init_op) saver = tf.train.Saver(max_to_keep=5) with tf.name_scope('Summary'): # Add summaries for end_points. for end_point in end_points: x = end_points[end_point] tf.summary.histogram('activations/' + end_point, x) tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES): tf.summary.scalar('losses/%s' % loss.op.name, loss) tf.summary.scalar('total_loss', total_loss) # Add summaries for variables. for variable in slim.get_model_variables(): tf.summary.histogram(variable.op.name, variable) tf.summary.scalar('learning_rate', learning_rate) summary_writer = tf.summary.FileWriter(FLAGS.logdir, graph=sess.graph) summary_op = tf.summary.merge_all() step = 0 max_step = math.ceil(50000 * FLAGS.num_epoch * 10 / FLAGS.batch_size) start_time = time.time() print('Start training...') print('Batch size: %d, number of epoch: %d' % (FLAGS.batch_size, FLAGS.num_epoch)) while True: try: options = None run_metadata = None if should_log(FLAGS.trace_freq, step, max_step): options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() fetches = { 'train': train_op, 'global_step': global_step, 'incr_global_step': incr_global_step } if should_log(FLAGS.log_freq, step, max_step): fetches['x_entropy_loss'] = x_entropy_loss fetches['total_loss'] = total_loss if should_log(FLAGS.sum_freq, step, max_step): fetches['summary'] = summary_op results = sess.run(fetches, options=options, run_metadata=run_metadata) current_time = time.time() process_time = current_time - start_time remain_time = (max_step - step + 1) * process_time / (step + 1) if should_log(FLAGS.log_freq, step, max_step): print( '-------------------------------------------------------------------------------' ) print( 'Global step: %d, X_loss: %.4f, total loss: %.4f, process time: %d mins, remain time: %d mins' % (results['global_step'], results['x_entropy_loss'], results['total_loss'], process_time / 60, remain_time / 60)) if should_log(FLAGS.sum_freq, step, max_step): summary_writer.add_summary(results['summary']) if should_log(FLAGS.trace_freq, step, max_step): print('Recording trace...') summary_writer.add_run_metadata( run_metadata, 'step_%d' % results['global_step']) if should_log(FLAGS.save_freq, step, max_step): print('Saving model...') saver.save(sess, os.path.join(FLAGS.logdir, 'model'), global_step=results['global_step']) step = step + 1 except tf.errors.OutOfRangeError: print( '----------------------------------------------------------------------------------' ) print('Done training!') current_time = time.time() process_time = current_time - start_time print( 'Total training time is %d hours and %d minutes' % (math.floor(process_time / 3600), (process_time - math.floor(process_time / 3600) * 3600) / 60)) break
def freeze_model(model_folder): # We precise the file fullname of our freezed graph output_graph = 'model/' + 'frozen_model.pb' # 'output_tensor1,output_tensor2,...' output_node_names = network.evaluate_network_slim() input_graph_def = tf.get_default_graph().as_graph_def() def name_in_checkpoint(var, type): if type in var.op.name: return var.op.name.replace(type, "student") vars_to_restore = slim.get_model_variables() vars_color_to_restore = { name_in_checkpoint(var, 'color'): var for var in vars_to_restore if 'color' in var.op.name } color_restorer = tf.train.Saver(vars_color_to_restore) vars_gray_to_restore = { name_in_checkpoint(var, 'gray'): var for var in vars_to_restore if 'gray' in var.op.name } gray_restorer = tf.train.Saver(vars_gray_to_restore) vars_gradient_to_restore = { name_in_checkpoint(var, 'gradient'): var for var in vars_to_restore if 'gradient' in var.op.name } gradient_restorer = tf.train.Saver(vars_gradient_to_restore) with tf.Session() as sess: # We retrieve our checkpoint fullpath color_ckpt = tf.train.get_checkpoint_state( os.path.join(model_folder, 'color_0')) gray_ckpt = tf.train.get_checkpoint_state( os.path.join(model_folder, 'gray_0')) gradient_ckpt = tf.train.get_checkpoint_state( os.path.join(model_folder, 'gradient_0')) #sess.run(tf.global_variables_initializer()) #train_vars = slim.get_model_variables() #for each in train_vars: # print each.op.name, each.eval() color_restorer.restore(sess, color_ckpt.model_checkpoint_path) gray_restorer.restore(sess, gray_ckpt.model_checkpoint_path) gradient_restorer.restore(sess, gradient_ckpt.model_checkpoint_path) train_vars = slim.get_model_variables() for each in train_vars: #if 'conv1/weights' in each.op.name: print each.op.name, each.eval() output_graph_def = graph_util.convert_variables_to_constants( sess, # The session is used to retrieve the weights input_graph_def, # The graph_def is used to retrieve the nodes output_node_names # The output node names are used to select the usefull nodes ) with tf.gfile.FastGFile(output_graph, "w") as f: f.write(output_graph_def.SerializeToString()) print("%d ops in the final graph." % len(output_graph_def.node))
"\\tensorflow-dataset\\test images\\017_267.jpg") image = tf.image.decode_jpeg(image_input, channels=3) processed_image = inception_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) with slim.arg_scope(inception.inception_v1_arg_scope()): logits, _ = inception.inception_v1(processed_images, num_classes=17, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(classification_checkpoint_dir, 'model.ckpt-500'), slim.get_model_variables('InceptionV1')) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init_fn(sess) np_image, probabilities = sess.run([image, probabilities]) probabilities = probabilities[0, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1]) ] #plt.figure() #plt.imshow(np_image.astype(np.uint8)) #plt.axis('off') #plt.show()
def train(self): # load svhn dataset svhn_images, _ = self.load_svhn(self.svhn_dir, split='train') mnist_images, _ = self.load_mnist(self.mnist_dir, split='train') # build a graph model = self.model model.build_model() # make log directory if not exists if tf.gfile.Exists(self.log_dir): tf.gfile.DeleteRecursively(self.log_dir) tf.gfile.MakeDirs(self.log_dir) with tf.Session(config=self.config) as sess: # initialize G and D tf.global_variables_initializer().run() # restore variables of F print('loading pretrained model F..') variables_to_restore = slim.get_model_variables( scope='content_extractor') restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.pretrained_model) summary_writer = tf.summary.FileWriter( logdir=self.log_dir, graph=tf.get_default_graph()) saver = tf.train.Saver() print('start training..!') for step in range(self.train_iter + 1): i = step % int(svhn_images.shape[0] / self.batch_size) # train the model for source domain S src_images = svhn_images[i * self.batch_size:(i + 1) * self.batch_size] feed_dict = {model.src_images: src_images} sess.run(model.d_train_op_src, feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) if i % 15 == 0: sess.run(model.f_train_op_src, feed_dict) if (step + 1) % 10 == 0: summary, dl, gl, fl = sess.run([model.summary_op_src, \ model.d_loss_src, model.g_loss_src, model.f_loss_src], feed_dict) summary_writer.add_summary(summary, step) print ('[Source] step: [%d/%d] d_loss: [%.6f] g_loss: [%.6f] f_loss: [%.6f]' \ %(step+1, self.train_iter, dl, gl, fl)) # train the model for target domain T j = step % int(mnist_images.shape[0] / self.batch_size) trg_images = mnist_images[j * self.batch_size:(j + 1) * self.batch_size] feed_dict = { model.src_images: src_images, model.trg_images: trg_images } sess.run(model.d_train_op_trg, feed_dict) sess.run(model.d_train_op_trg, feed_dict) sess.run(model.g_train_op_trg, feed_dict) sess.run(model.g_train_op_trg, feed_dict) sess.run(model.g_train_op_trg, feed_dict) sess.run(model.g_train_op_trg, feed_dict) if (step + 1) % 10 == 0: summary, dl, gl = sess.run([model.summary_op_trg, \ model.d_loss_trg, model.g_loss_trg], feed_dict) summary_writer.add_summary(summary, step) print ('[Target] step: [%d/%d] d_loss: [%.6f] g_loss: [%.6f]' \ %(step+1, self.train_iter, dl, gl)) if (step + 1) % 200 == 0: saver.save(sess, os.path.join(self.model_save_path, 'dtn'), global_step=step + 1) print('model/dtn-%d saved' % (step + 1))
logits, vgg_variables = model.fcn_32s(images, num_classes, is_training=True) valid_labels, valid_logits = utils.remove_ambiguous(labels, logits) loss = tf.nn.softmax_cross_entropy_with_logits(labels=valid_labels, logits=valid_logits) loss = tf.reduce_mean(loss) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss=loss) # Most of parameters could be intialized with pre-trained parameters of original vgg-16 network vgg_variables_without_fc8 = utils.extract_vgg_variables(vgg_variables) init = slim.assign_from_checkpoint_fn(vgg_16_parameters_dir, vgg_variables_without_fc8) global_vars_init_op = tf.global_variables_initializer() local_vars_init_op = tf.local_variables_initializer() combined_op = tf.group(local_vars_init_op, global_vars_init_op) model_variables = slim.get_model_variables() saver = tf.train.Saver(model_variables) with tf.Session() as sess: sess.run(combined_op) init(sess) # If restart training after break saver.restore(sess, fcn32s_parameters_dir) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in range(training_rounds): error, _ = sess.run([loss, optimizer]) print("Round %d, Loss = %f" % (i, error)) if i % 199 == 0: saver.save(sess, fcn32s_parameters_dir) coord.request_stop()
def train(self): # load svhn dataset svhn_images, _ = self.load_svhn(self.svhn_dir, split='train') mnist_images, _ = self.load_mnist(self.mnist_dir, split='train') # build a graph model = self.model model.build_model() # make directory if not exists if tf.gfile.Exists(self.log_dir): tf.gfile.DeleteRecursively(self.log_dir) tf.gfile.MakeDirs(self.log_dir) with tf.Session(config=self.config) as sess: # initialize G and D tf.global_variables_initializer().run() # restore variables of F print ('loading pretrained model F..') variables_to_restore = slim.get_model_variables(scope='content_extractor') restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, self.pretrained_model) summary_writer = tf.summary.FileWriter(logdir=self.log_dir, graph=tf.get_default_graph()) saver = tf.train.Saver() print ('start training..!') f_interval = 15 for step in range(self.train_iter+1): i = step % int(svhn_images.shape[0] / self.batch_size) # train the model for source domain S src_images = svhn_images[i*self.batch_size:(i+1)*self.batch_size] feed_dict = {model.src_images: src_images} sess.run(model.d_train_op_src, feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) sess.run([model.g_train_op_src], feed_dict) if step > 1600: f_interval = 30 if i % f_interval == 0: sess.run(model.f_train_op_src, feed_dict) if (step+1) % 10 == 0: summary, dl, gl, fl = sess.run([model.summary_op_src, \ model.d_loss_src, model.g_loss_src, model.f_loss_src], feed_dict) summary_writer.add_summary(summary, step) print ('[Source] step: [%d/%d] d_loss: [%.6f] g_loss: [%.6f] f_loss: [%.6f]' \ %(step+1, self.train_iter, dl, gl, fl)) # train the model for target domain T j = step % int(mnist_images.shape[0] / self.batch_size) trg_images = mnist_images[j*self.batch_size:(j+1)*self.batch_size] feed_dict = {model.src_images: src_images, model.trg_images: trg_images} sess.run(model.d_train_op_trg, feed_dict) sess.run(model.d_train_op_trg, feed_dict) sess.run(model.g_train_op_trg, feed_dict) sess.run(model.g_train_op_trg, feed_dict) sess.run(model.g_train_op_trg, feed_dict) sess.run(model.g_train_op_trg, feed_dict) if (step+1) % 10 == 0: summary, dl, gl = sess.run([model.summary_op_trg, \ model.d_loss_trg, model.g_loss_trg], feed_dict) summary_writer.add_summary(summary, step) print ('[Target] step: [%d/%d] d_loss: [%.6f] g_loss: [%.6f]' \ %(step+1, self.train_iter, dl, gl)) if (step+1) % 200 == 0: saver.save(sess, os.path.join(self.model_save_path, 'dtn'), global_step=step+1) print ('model/dtn-%d saved' %(step+1))
def build_deeplabv3_plus(inputs, num_classes, preset_model='DeepLabV3+-Res50', weight_decay=1e-5, is_training=True, pretrained_dir="models"): """ Builds the DeepLabV3 model. Arguments: inputs: The input tensor= preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: DeepLabV3 model """ if preset_model == 'DeepLabV3_plus-Res50': with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_50(inputs, is_training=is_training, scope='resnet_v2_50') resnet_scope='resnet_v2_50' # DeepLabV3 requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v2_50.ckpt'), slim.get_model_variables('resnet_v2_50')) elif preset_model == 'DeepLabV3_plus-Res101': with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_101(inputs, is_training=is_training, scope='resnet_v2_101') resnet_scope='resnet_v2_101' # DeepLabV3 requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v2_101.ckpt'), slim.get_model_variables('resnet_v2_101')) elif preset_model == 'DeepLabV3_plus-Res152': with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_152(inputs, is_training=is_training, scope='resnet_v2_152') resnet_scope='resnet_v2_152' # DeepLabV3 requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v2_152.ckpt'), slim.get_model_variables('resnet_v2_152')) else: raise ValueError("Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152" % (preset_model)) label_size = tf.shape(inputs)[1:3] encoder_features = end_points['pool2'] net = AtrousSpatialPyramidPoolingModule(end_points['pool4']) net = slim.conv2d(net, 256, [1, 1], scope="conv_1x1_output", activation_fn=None) decoder_features = Upsampling(net, label_size / 4) encoder_features = slim.conv2d(encoder_features, 48, [1, 1], activation_fn=tf.nn.relu, normalizer_fn=None) net = tf.concat((encoder_features, decoder_features), axis=3) net = slim.conv2d(net, 256, [3, 3], activation_fn=tf.nn.relu, normalizer_fn=None) net = slim.conv2d(net, 256, [3, 3], activation_fn=tf.nn.relu, normalizer_fn=None) net = Upsampling(net, label_size) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def build_refinenet(inputs, num_classes=None, preset_model='RefineNet-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models", substract_mean=True, individual_upsamp="False", n_filters=256): """ Builds the RefineNet model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: RefineNet model """ # if substract_mean: # inputs = mean_image_subtraction(inputs) if preset_model == 'RefineNet-Res50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training, scope='downsampling/resnet_v1_50') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('downsampling/resnet_v1_50')) elif preset_model == 'RefineNet-Res101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs, is_training=is_training, scope='downsampling/resnet_v1_101') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('downsampling/resnet_v1_101')) elif preset_model == 'RefineNet-Res152': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152( inputs, is_training=is_training, scope='downsampling/resnet_v1_152') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('downsampling/resnet_v1_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) with tf.variable_scope('upsampling'): f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] g_list = list() for sub_b in individual_upsamp: g = [None, None, None, None] h = [None, None, None, None] for i in range(4): h[i] = slim.conv2d(f[i], n_filters, 1) g[0] = RefineBlock(high_inputs=None, low_inputs=h[0], filters=n_filters) g[1] = RefineBlock(g[0], h[1], filters=n_filters) g[2] = RefineBlock(g[1], h[2], filters=n_filters) g[3] = RefineBlock(g[2], h[3], filters=n_filters) g[3] = Upsampling(g[3], tf.shape(inputs)[1], tf.shape(inputs)[2]) g_list.append(g) return g_list, init_fn