def _preproc_image_batch(self, batch_size, num_threads=1): ''' This function is only used for queue input pipeline. It reads a filename from the filename queue, decodes the image, pushes it through a pre-processing function and then uses tf.train.batch to generate batches. :param batch_size: int, batch size :param num_threads: int, number of input threads (default=1) :return: tf.Tensor, batch of pre-processed input images ''' if ("resnet_v2" in self._network_name) and (self._preproc_func_name is None): raise ValueError("When using ResNet, please perform the pre-processing " "function manually. See here for details: " "https://github.com/tensorflow/models/tree/master/slim") # Read image file from disk and decode JPEG reader = tf.WholeFileReader() image_filename, image_raw = reader.read(self._filename_queue) image = tf.image.decode_jpeg(image_raw, channels=3) # Image preprocessing preproc_func_name = self._network_name if self._preproc_func_name is None else self._preproc_func_name image_preproc_fn = preprocessing_factory.get_preprocessing(preproc_func_name, is_training=False) image_preproc = image_preproc_fn(image, self.image_size, self.image_size) # Read a batch of preprocessing images from queue image_batch = tf.train.batch( [image_preproc, image_filename], batch_size, num_threads=num_threads, allow_smaller_final_batch=True) return image_batch
def imagenet_input(is_training): """Data reader for imagenet. Reads in imagenet data and performs pre-processing on the images. Args: is_training: bool specifying if train or validation dataset is needed. Returns: A batch of images and labels. """ if is_training: dataset = dataset_factory.get_dataset('imagenet', 'train', FLAGS.dataset_dir) else: dataset = dataset_factory.get_dataset('imagenet', 'validation', FLAGS.dataset_dir) provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=is_training, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) image_preprocessing_fn = preprocessing_factory.get_preprocessing( 'mobilenet_v1', is_training=is_training) image = image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size) images, labels = tf.train.batch( tensors=[image, label], batch_size=FLAGS.batch_size, num_threads=4, capacity=5 * FLAGS.batch_size) return images, labels
def main(): with tf.Graph().as_default(): if not dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') deploy_config = model_deploy.DeploymentConfig( num_clones=num_clones, clone_on_cpu=clone_on_cpu, replica_id=task, num_replicas=worker_replicas, num_ps_tasks=num_ps_tasks) dataset = dataset_factory.get_dataset( dataset_name, dataset_split_name, dataset_dir) image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) with tf.device(deploy_config.inputs_device()): with tf.name_scope('inputs'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=num_readers, common_queue_capacity=20 * batch_size, common_queue_min=10 * batch_size) [image, label, fp] = provider.get(['image', 'label', 'filepath']) label -= labels_offset train_image_size = 224 image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels, fps = tf.train.batch( [image, label, fp], batch_size=batch_size, num_threads=num_preprocessing_threads, capacity=5 * batch_size) tf.image_summary('image', images, max_images=5) labels = slim.one_hot_encoding( labels, dataset.num_classes - labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels, fps], capacity=2 * deploy_config.num_clones) images, labels, fps = batch_queue.dequeue() sess = tf.Session() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) image_data, label_data, fp_data = sess.run([images, labels, fps]) coord.request_stop() coord.join(threads) sess.close() return image_data, label_data, fp_data
def get_style_features(FLAGS): """ For the "style_image", the preprocessing step is: 1. Resize the shorter side to FLAGS.image_size 2. Apply central crop """ with tf.Graph().as_default(): network_fn = nets_factory.get_network_fn( FLAGS.loss_model, num_classes=1, is_training=False) image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) # Get the style image data size = FLAGS.image_size img_bytes = tf.read_file(FLAGS.style_image) if FLAGS.style_image.lower().endswith('png'): image = tf.image.decode_png(img_bytes) else: image = tf.image.decode_jpeg(img_bytes) # image = _aspect_preserving_resize(image, size) # Add the batch dimension images = tf.expand_dims(image_preprocessing_fn(image, size, size), 0) # images = tf.stack([image_preprocessing_fn(image, size, size)]) _, endpoints_dict = network_fn(images, spatial_squeeze=False) features = [] for layer in FLAGS.style_layers: feature = endpoints_dict[layer] feature = tf.squeeze(gram(feature), [0]) # remove the batch dimension features.append(feature) with tf.Session() as sess: # Restore variables for loss network. init_func = utils._get_init_fn(FLAGS) init_func(sess) # Make sure the 'generated' directory is exists. if os.path.exists('generated') is False: os.makedirs('generated') # Indicate cropped style image path save_file = 'generated/target_style_' + FLAGS.naming + '.jpg' # Write preprocessed style image to indicated path with open(save_file, 'wb') as f: target_image = image_unprocessing_fn(images[0, :]) value = tf.image.encode_jpeg(tf.cast(target_image, tf.uint8)) f.write(sess.run(value)) tf.logging.info('Target style pattern is saved to: %s.' % save_file) # Return the features those layers are use for measuring style loss. return sess.run(features)
def main(_): # Get image's height and width. height = 0 width = 0 with open(FLAGS.image_file, 'rb') as img: with tf.Session().as_default() as sess: if FLAGS.image_file.lower().endswith('png'): image = sess.run(tf.image.decode_png(img.read())) else: image = sess.run(tf.image.decode_jpeg(img.read())) height = image.shape[0] width = image.shape[1] tf.logging.info('Image size: %dx%d' % (width, height)) with tf.Graph().as_default(): with tf.Session().as_default() as sess: # Read image data. image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn) # Add batch dimension image = tf.expand_dims(image, 0) generated = model.net(image, training=False) generated = tf.cast(generated, tf.uint8) # Remove batch dimension generated = tf.squeeze(generated, [0]) # Restore model variables. saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1) sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) # Use absolute path FLAGS.model_file = os.path.abspath(FLAGS.model_file) saver.restore(sess, FLAGS.model_file) # Make sure 'generated' directory exists. generated_file = 'generated/res.jpg' if os.path.exists('generated') is False: os.makedirs('generated') # Generate and write image data to file. with open(generated_file, 'wb') as img: start_time = time.time() img.write(sess.run(tf.image.encode_jpeg(generated))) end_time = time.time() tf.logging.info('Elapsed time: %fs' % (end_time - start_time)) tf.logging.info('Done. Please check %s.' % generated_file)
def get_data(dataset, model_name, batch_size = 32, shuffle_config = None, shuffle=None, is_training=True, height=0, width=0): """return input data for Model input Args: dataset: a slim Dataset object. model_name: specify Network. shuffle_config: a namedtuple to control shuffle queue. fields: {queue_capacity, num_batching_threads, min_after_dequeue}. shuffle: control data provider whether shuffle. is_training: if Ture preprocess image for train. width: excepted resized width height: excepted resized height """ if not shuffle_config: shuffle_config = DEFAULT_SHUFFLE_CONFIG provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=shuffle, common_queue_capacity = 2 * batch_size, common_queue_min = batch_size ) [image_orig, label] = provider.get(['image', 'label']) tf.summary.image('image_org', tf.expand_dims(image_orig, 0)) tf.summary.scalar('label_orig', label) preprocessing_fn = preprocessing_factory.get_preprocessing(model_name) image = preprocessing_fn(image_orig, width, height, is_training) label_one_shot = slim.one_hot_encoding(label, dataset.num_classes) images, labels, labels_one_hot = (tf.train.shuffle_batch( tensors=[image, label, label_one_shot], batch_size = batch_size, capacity=shuffle_config.queue_capacity, num_threads=shuffle_config.num_batching_threads, min_after_dequeue=shuffle_config.min_after_dequeue)) return InputEndpoints( images=images, labels=labels, labels_one_hot=labels_one_hot)
def main(_): height = 0 width = 0 with open(FLAGS.image_file, 'rb') as img: with tf.Session().as_default() as sess: if FLAGS.image_file.lower().endswith('png'): image = sess.run(tf.image.decode_png(img.read())) else: image = sess.run(tf.image.decode_jpeg(img.read())) height = image.shape[0] width = image.shape[1] tf.logging.info('Image size: %dx%d' % (width, height)) with tf.Graph().as_default(): with tf.Session().as_default() as sess: image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) """获取经过预处理的输入图片,用于后面获取图片的content""" image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn) image = tf.expand_dims(image, 0) generated = model.transform_network(image, training=False) generated = tf.squeeze(generated, [0]) saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1) sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) """获取已训练好的model""" FLAGS.model_file = os.path.abspath(FLAGS.model_file) saver.restore(sess, FLAGS.model_file) """生成转换style后的image""" start_time = time.time() generated = sess.run(generated) generated = tf.cast(generated, tf.uint8) end_time = time.time() tf.logging.info('Elapsed time: %fs' % (end_time - start_time)) generated_file = FLAGS.target_file if os.path.exists('static/img/generated') is False: os.makedirs('static/img/generated') with open(generated_file, 'wb') as img: img.write(sess.run(tf.image.encode_jpeg(generated))) tf.logging.info('Done. Please check %s.' % generated_file)
def get_style_features(FLAGS): """ 对于风格图片,预处理步骤: 1. Resize the shorter side to FLAGS.image_size 2. Apply central crop """ with tf.Graph().as_default(): network_fn = nets_factory.get_network_fn( FLAGS.loss_model, num_classes=1, is_training=False) image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) size = FLAGS.image_size img_bytes = tf.read_file(FLAGS.style_image) if FLAGS.style_image.lower().endswith('png'): image = tf.image.decode_png(img_bytes) else: image = tf.image.decode_jpeg(img_bytes) # image = _aspect_preserving_resize(image, size) images = tf.stack([image_preprocessing_fn(image, size, size)]) _, endpoints_dict = network_fn(images, spatial_squeeze=False) features = [] for layer in FLAGS.style_layers: feature = endpoints_dict[layer] feature = tf.squeeze(gram(feature), [0]) # remove the batch dimension features.append(feature) with tf.Session() as sess: init_func = utils._get_init_fn(FLAGS) init_func(sess) if os.path.exists('static/img/generated') is False: os.makedirs('static/img/generated') save_file = 'static/img/generated/target_style_' + FLAGS.naming + '.jpg' with open(save_file, 'wb') as f: target_image = image_unprocessing_fn(images[0, :]) value = tf.image.encode_jpeg(tf.cast(target_image, tf.uint8)) f.write(sess.run(value)) tf.logging.info('Target style pattern is saved to: %s.' % save_file) return sess.run(features)
def style_transform(style, model_file, img_file, result_file): height = 0 width = 0 with open(img_file, 'rb') as img: with tf.Session().as_default() as sess: if img_file.lower().endswith('png'): image = sess.run(tf.image.decode_png(img.read())) else: image = sess.run(tf.image.decode_jpeg(img.read())) height = image.shape[0] width = image.shape[1] print('Image size: %dx%d' % (width, height)) with tf.Graph().as_default(): with tf.Session().as_default() as sess: image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) image = reader.get_image(img_file, height, width, image_preprocessing_fn) image = tf.expand_dims(image, 0) generated = model.transform_network(image, training=False) generated = tf.squeeze(generated, [0]) saver = tf.train.Saver(tf.global_variables()) sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) FLAGS.model_file = os.path.abspath(model_file) saver.restore(sess, FLAGS.model_file) start_time = time.time() generated = sess.run(generated) generated = tf.cast(generated, tf.uint8) end_time = time.time() print('Elapsed time: %fs' % (end_time - start_time)) generated_file = 'static/img/generated/' + result_file if os.path.exists('static/img/generated') is False: os.makedirs('static/img/generated') with open(generated_file, 'wb') as img: img.write(sess.run(tf.image.encode_jpeg(generated))) print('Done. Please check %s.' % generated_file)
def main(_): height = 0 width = 0 with open(FLAGS.image_file, 'rb') as img: with tf.Session().as_default() as sess: if FLAGS.image_file.lower().endswith('png'): image = sess.run(tf.image.decode_png(img.read())) else: image = sess.run(tf.image.decode_jpeg(img.read())) height = image.shape[0] width = image.shape[1] tf.logging.info('Image size: %dx%d' % (width, height)) with tf.Graph().as_default(): with tf.Session().as_default() as sess: image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn) image = tf.expand_dims(image, 0) generated = model.net(image, training=False) generated = tf.squeeze(generated, [0]) saver = tf.train.Saver(tf.all_variables()) sess.run([tf.initialize_all_variables(), tf.initialize_local_variables()]) FLAGS.model_file = os.path.abspath(FLAGS.model_file) saver.restore(sess, FLAGS.model_file) start_time = time.time() generated = sess.run(generated) generated = tf.cast(generated, tf.uint8) end_time = time.time() tf.logging.info('Elapsed time: %fs' % (end_time - start_time)) generated_file = 'generated/res.jpg' if os.path.exists('generated') is False: os.makedirs('generated') with open(generated_file, 'wb') as img: img.write(sess.run(tf.image.encode_jpeg(generated))) tf.logging.info('Done. Please check %s.' % generated_file)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): # Create global_step. # with tf.device('/gpu:0'): global_step = slim.create_global_step() # ckpt = tf.train.get_checkpoint_state(os.path.dirname('./logs/checkpoint')) #os.path.dirname('./logs/') ckpt_filename = os.path.dirname( './logs/') + '/mobilenet_v1_1.0_224.ckpt' sess = tf.InteractiveSession() # Select the dataset. dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) dataset_kitti = dataset_factory.get_dataset('kitti', FLAGS.dataset_split_name, FLAGS.dataset_dir) # Get the SSD network and its anchors. ssd_class = nets_factory.get_network(FLAGS.model_name) ssd_params = ssd_class.default_params._replace( num_classes=FLAGS.num_classes) ssd_net = ssd_class(ssd_params) ssd_shape = ssd_net.params.img_shape ssd_anchors = ssd_net.anchors(ssd_shape) preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) with tf.name_scope(FLAGS.dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size, shuffle=True) [image, shape, glabels, gbboxes ] = provider.get(['image', 'shape', 'object/label', 'object/bbox']) image, glabels, gbboxes = \ image_preprocessing_fn(image, glabels, gbboxes, out_shape = ssd_shape, data_format = DATA_FORMAT) gclasses, glocalisations, gscores = \ ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) batch_shape = [1] + [len(ssd_anchors)] * 3 r = tf.train.batch(tf_utils.reshape_list( [image, gclasses, glocalisations, gscores]), batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) b_image, b_gclasses, b_glocalisations, b_gscores = \ tf_utils.reshape_list(r, batch_shape) summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) summaries.add(tf.summary.image("imgs", tf.cast(b_image, tf.float32))) f_i = 0 for gt_map in b_gscores: gt_features = tf.reduce_max(gt_map, axis=3) gt_features = tf.expand_dims(gt_features, -1) summaries.add( tf.summary.image("gt_map_%d" % f_i, tf.cast(gt_features, tf.float32))) f_i += 1 # for festures in gt_list: # summaries.add(tf.summary.image("gt_map_%d" % f_i, tf.cast(festures, tf.float32))) # f_i += 1 arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay, data_format=DATA_FORMAT) with slim.arg_scope(arg_scope): predictions, localisations, logits, end_points = \ ssd_net.net(b_image, is_training=True) f_i = 0 for predict_map in predictions: predict_map = predict_map[:, :, :, :, 1:] predict_map = tf.reduce_max(predict_map, axis=4) predict_map = tf.reduce_max(predict_map, axis=3) predict_map = tf.expand_dims(predict_map, -1) summaries.add( tf.summary.image("predicte_map_%d" % f_i, tf.cast(predict_map, tf.float32))) f_i += 1 ssd_net.losses(logits, localisations, b_gclasses, b_glocalisations, b_gscores, 0, match_threshold=FLAGS.match_threshold, negative_ratio=FLAGS.negative_ratio, alpha=FLAGS.loss_alpha, label_smoothing=FLAGS.label_smoothing) # with tf.name_scope('kitti' + '_data_provider'): # provider_k = slim.dataset_data_provider.DatasetDataProvider( # dataset_kitti, # num_readers = FLAGS.num_readers, # common_queue_capacity = 20 * FLAGS.batch_size, # common_queue_min = 10 * FLAGS.batch_size, # shuffle = True # ) # [image_k, shape_k, glabels_k, gbboxes_k] = provider_k.get(['image', 'shape', 'object/label', 'object/bbox']) # # image_preprocessing_fn_k = preprocessing_factory.get_preprocessing('kitti', is_training=True) # image_k, glabels_k, gbboxes_k = \ # image_preprocessing_fn_k(image_k, glabels_k, gbboxes_k, out_shape = ssd_shape, data_format = DATA_FORMAT) # # gclasses_k, glocalisations_k, gscores_k = \ # ssd_net.bboxes_encode(glabels_k, gbboxes_k, ssd_anchors) # #batch_shape = [1] + [len(ssd_anchors)] * 3 # # r_k = tf.train.batch( # tf_utils.reshape_list([image_k, gclasses_k, glocalisations_k, gscores_k]), # batch_size=FLAGS.batch_size, # num_threads=FLAGS.num_preprocessing_threads, # capacity= 5 * FLAGS.batch_size # ) # # b_image_k, b_gclasses_k, b_glocalisations_k, b_gscores_k = \ # tf_utils.reshape_list(r_k, batch_shape) # # summaries.add(tf.summary.image("k_imgs", tf.cast(b_image_k, tf.float32))) # # f_i = 0 # for gt_map in b_gscores_k: # gt_features = tf.reduce_max(gt_map, axis=3) # gt_features = tf.expand_dims(gt_features, -1) # summaries.add(tf.summary.image("k_gt_map_%d" % f_i, tf.cast(gt_features, tf.float32))) # f_i += 1 # # for festures in gt_list: # # summaries.add(tf.summary.image("gt_map_%d" % f_i, tf.cast(festures, tf.float32))) # # f_i += 1 # # arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay, data_format=DATA_FORMAT) # with slim.arg_scope(arg_scope): # predictions_k, localisations_k, logits_k, end_points_k = \ # ssd_net.net(b_image_k, is_training=True, reuse=True) # # f_i = 0 # for predict_map in predictions_k: # predict_map = predict_map[:, :, :, :, 1:] # predict_map = tf.reduce_max(predict_map, axis=4) # predict_map = tf.reduce_max(predict_map, axis=3) # predict_map = tf.expand_dims(predict_map, -1) # summaries.add(tf.summary.image("k_predicte_map_%d" % f_i, tf.cast(predict_map, tf.float32))) # f_i += 1 # # ssd_net.losses(logits_k, localisations_k, b_gclasses_k, b_glocalisations_k, b_gscores_k, 2, # match_threshold=FLAGS.match_threshold, # negative_ratio=FLAGS.negative_ratio, # alpha=FLAGS.loss_alpha, # label_smoothing=FLAGS.label_smoothing) #total_loss = slim.losses.get_total_loss() total_loss = tf.losses.get_total_loss() summaries.add(tf.summary.scalar('loss', total_loss)) for loss in tf.get_collection(tf.GraphKeys.LOSSES): summaries.add(tf.summary.scalar(loss.op.name, loss)) # for variable in slim.get_model_variables(): # summaries.add(tf.summary.histogram(variable.op.name, variable)) for variable in tf.trainable_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) learning_rate = tf_utils.configure_learning_rate( FLAGS, dataset.num_samples, global_step) optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) # optimizer = tf.train.AdamOptimizer(learning_rate, beta1=FLAGS.adam_beta1, # beta2=FLAGS.adam_beta2, epsilon=FLAGS.opt_epsilon) #optimizer = tf.train.GradientDescentOptimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(extra_update_ops): train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=False) summary_op = tf.summary.merge(list(summaries), name='summary_op') train_writer = tf.summary.FileWriter('./logs/', sess.graph) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) #variables_to_exclude = slim.get_variables_by_suffix("Adam") variables_to_restore = slim.get_variables_to_restore( exclude=["MobilenetV1/Logits", "MobilenetV1/Box", "global_step"]) restorer = tf.train.Saver(variables_to_restore) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1.0, write_version=2, pad_step_number=False) sess.run(tf.global_variables_initializer()) restorer.restore(sess, ckpt_filename) # if ckpt and ckpt.model_checkpoint_path: # saver.restore(sess, ckpt.model_checkpoint_path) i = 0 with slim.queues.QueueRunners(sess): while (i < FLAGS.max_number_of_steps): _, summary_str = sess.run([train_op, summary_op]) if i % 50 == 0: global_step_str = global_step.eval() print('%diteraton' % (global_step_str)) train_writer.add_summary(summary_str, global_step_str) if i % 100 == 0: global_step_str = global_step.eval() saver.save(sess, "./logs/", global_step=global_step_str) i += 1
help="Directory containing the dataset") parser.add_argument('--only_use_index_embedding', default="0", help="Directory containing the dataset") if __name__ == '__main__': tf.logging.set_verbosity(tf.logging.INFO) # Load the parameters from json file args = parser.parse_args() args.use_attr = bool(int(args.use_attr)) os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_no print("CUDA Visible device", device_lib.list_local_devices()) image_preprocessing_fn = None if args.preprocessing_name != "": image_preprocessing_fn = preprocessing_factory.get_preprocessing(args.preprocessing_name, is_training=False) args.image_size = int(args.image_size) def train_pre_process(example_proto): features = {"image/encoded": tf.FixedLenFeature((), tf.string, default_value=""), "image/class/label": tf.FixedLenFeature((), tf.int64, default_value=0), 'image/height': tf.FixedLenFeature((), tf.int64, default_value=0), 'image/width': tf.FixedLenFeature((), tf.int64, default_value=0) } if args.use_attr: features["image/attr"] = tf.VarLenFeature(dtype=tf.int64) parsed_features = tf.parse_single_example(example_proto, features) image = tf.image.decode_jpeg(parsed_features["image/encoded"], 3)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True, use_grayscale=FLAGS.use_grayscale) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None if FLAGS.quantize_delay >= 0: contrib_quantize.create_training_graph(quant_delay=FLAGS.quantize_delay) ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, total_num_replicas=FLAGS.worker_replicas, variable_averages=variable_averages, variables_to_average=moving_average_variables) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
self.image_idx = 0 image, bboxes = self.data[self.image_idx] labels = [1] * len(bboxes) labels = np.reshape(labels, [-1, 1]) return image, [bboxes], labels if __name__ == "__main__": from preprocessing.preprocessing_factory import get_preprocessing data_provider = ICDARData() import numpy as np import tensorflow as tf import time import util util.proc.set_proc_name('proc-test') fn = get_preprocessing(True) with tf.Graph().as_default(): sess = tf.Session() sess.as_default() out_shape = [150, 150] images = tf.placeholder("float", name='images', shape=[None, None, 3]) bboxes = tf.placeholder("float", name='bboxes', shape=[1, None, 4]) labels = tf.placeholder('int32', name='labels', shape=[None, 1]) sampled_image, sampled_labels, sampled_bboxes = fn( images, labels, bboxes, out_shape) step = 0 data = [] while step < 10: step += 1 start = time.time()
def main(_): print(tf.gfile.Glob('./debug/example_01?.jpg')) if not FLAGS.data_dir: raise ValueError( 'You must supply the dataset directory with --data_dir') tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): global_step = slim.create_global_step() #print(tf.gfile.Glob('./debug/example_01?.jpg')) preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = lambda image_, shape_, glabels_, gbboxes_: preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True)(image_, glabels_, gbboxes_, out_shape= [FLAGS.train_image_size] * 2, data_format=DATA_FORMAT) anchor_creator = anchor_manipulator.AnchorCreator( [FLAGS.train_image_size] * 2, layers_shapes=[(38, 38), (19, 19), (10, 10)], anchor_scales=[[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]], extra_anchor_scales=[[0.15], [0.35], [0.55]], anchor_ratios=[[2, .5], [2, .5, 3, 1. / 3], [2, .5, 3, 1. / 3]], layer_steps=[8, 16, 32]) all_anchors = anchor_creator.get_all_anchors()[0] # sess = tf.Session() # print(all_anchors) # print(sess.run(all_anchors)) anchor_operator = anchor_manipulator.AnchorEncoder( all_anchors, num_classes=FLAGS.num_classes, ignore_threshold=0., prior_scaling=[0.1, 0.1, 0.2, 0.2]) #anchor_encoder_fn = lambda next_iter, _ = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.data_dir, image_preprocessing_fn, file_pattern=None, reader=None, batch_size=FLAGS.batch_size, num_readers=FLAGS.num_readers, num_preprocessing_threads=FLAGS.num_preprocessing_threads, anchor_encoder=anchor_operator.encode_all_anchors) sess = tf.Session() sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer())) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) count = 0 start_time = time.time() try: while not coord.should_stop(): count += 1 _ = sess.run([next_iter]) if count % 10 == 0: time_elapsed = time.time() - start_time print('time: {}'.format(time_elapsed / 10.)) start_time = time.time() except tf.errors.OutOfRangeError: log.info('Queue Done!') finally: pass # Wait for threads to finish. coord.join(threads) sess.close() for i in range(6): list_from_batch = sess.run(next_iter) # imsave('./debug/example_%03d.jpg' % (i,), list_from_batch[0][0]) # imsave('./debug/example_%03d_.jpg' % (i,), list_from_batch[1][0]) image = list_from_batch[-1] shape = list_from_batch[-2] glabels = list_from_batch[:len(all_anchors)] gtargets = list_from_batch[len(all_anchors):2 * len(all_anchors)] gscores = list_from_batch[2 * len(all_anchors):3 * len(all_anchors)] imsave('./debug/example_%03d.jpg' % (i, ), image[0]) print(image.shape, shape.shape, glabels[0].shape, gtargets[0].shape, gscores[0].shape)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) accuracy = slim.metrics.accuracy(tf.to_int32(tf.argmax(logits, 1)), tf.to_int32(tf.argmax(labels, 1))) tf.add_to_collection('accuracy', accuracy) end_points['train_accuracy'] = accuracy return end_points # Get accuracies for the batch # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: if 'accuracy' in end_point: continue x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add( tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) train_acc = end_points['train_accuracy'] summaries.add( tf.summary.scalar('train_accuracy', end_points['train_accuracy'])) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # @philkuz # Add accuracy summaries # TODO add if statemetn for n iterations # images_val, labels_val= tf.train.batch( # [image, label], # batch_size=FLAGS.batch_size, # num_threads=FLAGS.num_preprocessing_threads, # capacity=5 * FLAGS.batch_size) # # labels_val = slim.one_hot_encoding( # # labels_val, dataset.num_classes - FLAGS.labels_offset) # batch_queue_val = slim.prefetch_queue.prefetch_queue( # [images_val, labels_val], capacity=2 * deploy_config.num_clones) # logits, end_points = network_fn(images, reuse=True) # # predictions = tf.nn.softmax(logits) # predictions = tf.to_in32(tf.argmax(logits,1)) # logits_val, end_points_val = network_fn(images_val, reuse=True) # predictions_val = tf.to_in32(tf.argmax(logits_val,1)) # labels_val = tf.squeeze(labels_val) # labels = tf.squeeze(labels) # names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ # 'train/accuracy': slim.metrics.streaming_accuracy(predictions, labels), # 'val/accuracy': slim.metrics.streaming_accuracy(predictions_val, labels_val), # }) # for metric_name, metric_value in names_to_values.items(): # op = tf.summary.scalar(metric_name, metric_value) # # op = tf.Print(op, [metric_value], metric_name) # summaries.add(op) # Add summaries for variables. # TODO something to remove some of these from tensorboard scalars for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, total_num_replicas=FLAGS.worker_replicas, variable_averages=variable_averages, variables_to_average=moving_average_variables) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # @philkuz # set the max_number_of_steps parameter if num_epochs is available print('FLAGS.num_epochs', FLAGS.num_epochs) if FLAGS.num_epochs is not None and FLAGS.max_number_of_steps is None: FLAGS.max_number_of_steps = int( FLAGS.num_epochs * dataset.num_samples / FLAGS.batch_size) # FLAGS.max_number_of_steps = int(math.round(FLAGS.num_epochs / dataset.num_samples)) # setup the logdir # @philkuz the train_dir setup if FLAGS.experiment_name is not None: experiment_dir = 'bs={},lr={},epochs={}/{}'.format( FLAGS.batch_size, FLAGS.learning_rate, FLAGS.num_epochs, FLAGS.experiment_name) print(experiment_dir) FLAGS.train_dir = os.path.join(FLAGS.train_dir, experiment_dir) print(FLAGS.train_dir) # @philkuz overriding train_step def train_step(sess, train_op, global_step, train_step_kwargs): """Function that takes a gradient step and specifies whether to stop. Args: sess: The current session. train_op: An `Operation` that evaluates the gradients and returns the total loss. global_step: A `Tensor` representing the global training step. train_step_kwargs: A dictionary of keyword arguments. Returns: The total loss and a boolean indicating whether or not to stop training. Raises: ValueError: if 'should_trace' is in `train_step_kwargs` but `logdir` is not. """ start_time = time.time() trace_run_options = None run_metadata = None should_acc = True # TODO make this not hardcoded @philkuz if 'should_trace' in train_step_kwargs: if 'logdir' not in train_step_kwargs: raise ValueError( 'logdir must be present in train_step_kwargs when ' 'should_trace is present') if sess.run(train_step_kwargs['should_trace']): trace_run_options = config_pb2.RunOptions( trace_level=config_pb2.RunOptions.FULL_TRACE) run_metadata = config_pb2.RunMetadata() if not should_acc: total_loss, np_global_step = sess.run( [train_op, global_step], options=trace_run_options, run_metadata=run_metadata) else: total_loss, acc, np_global_step = sess.run( [train_op, train_acc, global_step], options=trace_run_options, run_metadata=run_metadata) time_elapsed = time.time() - start_time if run_metadata is not None: tl = timeline.Timeline(run_metadata.step_stats) trace = tl.generate_chrome_trace_format() trace_filename = os.path.join( train_step_kwargs['logdir'], 'tf_trace-%d.json' % np_global_step) tf.logging.info('Writing trace to %s', trace_filename) file_io.write_string_to_file(trace_filename, trace) if 'summary_writer' in train_step_kwargs: train_step_kwargs['summary_writer'].add_run_metadata( run_metadata, 'run_metadata-%d' % np_global_step) if 'should_log' in train_step_kwargs: if sess.run(train_step_kwargs['should_log']): if not should_acc: tf.logging.info( 'global step %d: loss = %.4f (%.3f sec/step)', np_global_step, total_loss, time_elapsed) else: tf.logging.info( 'global step %d: loss = %.4f train_acc = %.4f (%.3f sec/step)', np_global_step, total_loss, acc, time_elapsed) if 'should_stop' in train_step_kwargs: should_stop = sess.run(train_step_kwargs['should_stop']) else: should_stop = False return total_loss, should_stop ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, train_step_fn=train_step, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
ret, img = capture.read() shape = img.shape height = shape[0] width = shape[1] with tf.Graph().as_default(): output_graph_path = './models/wave.pb' with tf.gfile.FastGFile(output_graph_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') with tf.Session() as sess: tf.initialize_all_variables().run() image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing( 'vgg_16', is_training=False) input_x = sess.graph.get_tensor_by_name("input:0") print(input_x) output = sess.graph.get_tensor_by_name("output:0") print(output) generated = tf.cast(output, tf.uint8) generated = tf.squeeze(generated, [0]) while True: start_time = time.time() ret, img = capture.read() image_transfer = sess.run(generated, feed_dict={input_x: img}) #print(frame) #image_transfer = cv2.cvtColor(image_transfer, cv2.COLOR_BGR2RGB) cv2.imshow('camera', img) cv2.imshow('transfer', image_transfer)
def main_fun(argv, ctx): import math import tensorflow as tf from datasets import dataset_factory from nets import nets_factory from preprocessing import preprocessing_factory sys.argv = argv slim = tf.contrib.slim tf.app.flags.DEFINE_integer('batch_size', 100, 'The number of samples in each batch.') tf.app.flags.DEFINE_integer( 'max_num_batches', None, 'Max number of batches to evaluate by default use all.') tf.app.flags.DEFINE_string('master', '', 'The address of the TensorFlow master to use.') tf.app.flags.DEFINE_string( 'checkpoint_path', '/tmp/tfmodel/', 'The directory where the model was written to or an absolute path to a ' 'checkpoint file.') tf.app.flags.DEFINE_string('eval_dir', '/tmp/tfmodel/', 'Directory where the results are saved to.') tf.app.flags.DEFINE_integer( 'num_preprocessing_threads', 4, 'The number of threads used to create the batches.') tf.app.flags.DEFINE_string('dataset_name', 'imagenet', 'The name of the dataset to load.') tf.app.flags.DEFINE_string('dataset_split_name', 'test', 'The name of the train/test split.') tf.app.flags.DEFINE_string( 'dataset_dir', None, 'The directory where the dataset files are stored.') tf.app.flags.DEFINE_integer( 'labels_offset', 0, 'An offset for the labels in the dataset. This flag is primarily used to ' 'evaluate the VGG and ResNet architectures which do not use a background ' 'class for the ImageNet dataset.') tf.app.flags.DEFINE_string('model_name', 'inception_v3', 'The name of the architecture to evaluate.') tf.app.flags.DEFINE_string( 'preprocessing_name', None, 'The name of the preprocessing to use. If left ' 'as `None`, then the model_name flag is used.') tf.app.flags.DEFINE_float( 'moving_average_decay', None, 'The decay to use for the moving average.' 'If left as None, then moving averages are not used.') tf.app.flags.DEFINE_integer('eval_image_size', None, 'Eval image size') FLAGS = tf.app.flags.FLAGS if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') cluster_spec, server = TFNode.start_cluster_server(ctx) tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall@5': slim.metrics.streaming_recall_at_k(logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.iteritems(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=names_to_updates.values(), variables_to_restore=variables_to_restore)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=1, clone_on_cpu=False, replica_id=0, num_replicas=1, num_ps_tasks=0) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( 'flowers', 'train', FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( 'mobilenet_v1', num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### image_preprocessing_fn = preprocessing_factory.get_preprocessing( 'mobilenet_v1', is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=4, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=4, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): num_epochs_per_decay = 2.5 decay_steps = int(dataset.num_samples / FLAGS.batch_size * num_epochs_per_decay) learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps, _LEARNING_RATE_DECAY_FACTOR, staircase=True, name='exponential_decay_learning_rate') optimizer = tf.train.RMSPropOptimizer( learning_rate, decay=FLAGS.rmsprop_decay, momentum=FLAGS.rmsprop_momentum, epsilon=FLAGS.opt_epsilon) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=True, session_config=session_config, init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=10, save_summaries_secs=300, save_interval_secs=300, sync_optimizer=optimizer if False else None)
def main(_): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.device if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=False, replica_id=0, num_replicas=1, num_ps_tasks=0) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=dataset.num_classes, weight_decay=FLAGS.weight_decay, batch_norm_decay=None, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding(labels, dataset.num_classes) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() # Noise up the images - don't do that for models where we are preprocessing the images with an existing ISP. with tf.device("/cpu:0"): noisy_batch, a, gauss_std = sensor_model.sensor_noise_rand_light_level( images, [FLAGS.ll_low, FLAGS.ll_high], scale=1.0) bayer_mask = sensor_model.get_bayer_mask(train_image_size, train_image_size) inputs = noisy_batch * bayer_mask # These parameters are only relevant for our special ISP functions. Mobilenet for instance will just eat them and not act upon them. logits, end_points, _ = network_fn( images=inputs, num_classes=dataset.num_classes, alpha=a, sigma=gauss_std, bayer_mask=bayer_mask, use_anscombe=FLAGS.use_anscombe, noise_channel=FLAGS.noise_channel, num_iters=FLAGS.num_iters, num_layers=FLAGS.num_layers, isp_model_name=FLAGS.isp_model_name, is_real_data=False) end_points['ground_truth'] = images # end_points['noisy'] = noisy_batch ############################# # Specify the loss function # ############################# tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add( tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) # Add image summary for denoised image for end_point in end_points: if end_point in ['outputs', 'post_anscombe', 'pre_inv_anscombe']: summaries.add( tf.summary.image(end_point, end_points[end_point])) if end_point in [ 'mobilenet_input', 'noisy', 'inputs', 'ground_truth', 'R', 'G1', 'G2', 'B' ]: clean_image = end_points[end_point] summaries.add(tf.summary.image(end_point, clean_image)) summaries.add( tf.summary.scalar('bounds/%s_min' % end_point, tf.reduce_min(clean_image))) summaries.add( tf.summary.scalar('bounds/%s_max' % end_point, tf.reduce_max(clean_image))) ################################# # Configure the moving averages # ################################# moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') saver = tf.train.Saver(keep_checkpoint_every_n_hours=2) ########################### # Kicks off the training. # ########################### slim.learning.train(train_tensor, saver=saver, logdir=FLAGS.train_dir, master='', is_chief=True, init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=None)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() # images, labels = vgg.distorted_inputs() dataset = imagenet.get_split('train', '/data/ramyadML/TF-slim-data/imageNet/processed') # Creates a TF-Slim DataProvider which reads the dataset in the background # during both training and testing. provider = slim.dataset_data_provider.DatasetDataProvider(dataset, num_readers=4, common_queue_capacity=20*32, common_queue_min=10*32, shuffle=True) preprocessing_name = 'vgg_16' image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) [image, label] = provider.get(['image', 'label']) image = image_preprocessing_fn(image, 224, 224) label -= 1 # batch up some training data images, labels = tf.train.batch([image, label], batch_size=32, num_threads=4, capacity=5*32) print (images.shape) images = tf.cast(images, tf.float32) # Build a Graph that computes the logits predictions from the # inference model. logits = vgg.inference(images) print ("logits shape:", logits.shape) # Calculate loss. print ("label shape", labels.shape) # Calculate loss. loss = vgg.loss(logits, labels) # Save list_var_names = [ 'vgg_16/conv1/conv1_1/biases', 'vgg_16/conv1/conv1_1/weights', 'vgg_16/conv1/conv1_2/biases', 'vgg_16/conv1/conv1_2/weights', 'vgg_16/conv2/conv2_1/biases', 'vgg_16/conv2/conv2_1/weights', 'vgg_16/conv2/conv2_2/biases', 'vgg_16/conv2/conv2_2/weights', 'vgg_16/conv3/conv3_1/biases', 'vgg_16/conv3/conv3_1/weights', 'vgg_16/conv3/conv3_2/biases', 'vgg_16/conv3/conv3_2/weights', 'vgg_16/conv3/conv3_3/biases', 'vgg_16/conv3/conv3_3/weights', 'vgg_16/conv4/conv4_1/biases', 'vgg_16/conv4/conv4_1/weights', 'vgg_16/conv4/conv4_2/biases', 'vgg_16/conv4/conv4_2/weights', 'vgg_16/conv4/conv4_3/biases', 'vgg_16/conv4/conv4_3/weights', 'vgg_16/conv5/conv5_1/biases', 'vgg_16/conv5/conv5_1/weights', 'vgg_16/conv5/conv5_2/biases', 'vgg_16/conv5/conv5_2/weights', 'vgg_16/conv5/conv5_3/biases', 'vgg_16/conv5/conv5_3/weights', 'vgg_16/fc6/biases', 'vgg_16/fc6/weights', 'vgg_16/fc7/biases', 'vgg_16/fc7/weights', 'vgg_16/fc8/biases', 'vgg_16/fc8/weights'] var_list_to_restore = [] for name in list_var_names: var_list_to_restore = var_list_to_restore + tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, name) saver = tf.train.Saver(var_list_to_restore) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = vgg.train(loss, global_step) # Parse pruning hyperparameters pruning_hparams = pruning.get_pruning_hparams().parse(FLAGS.pruning_hparams) # Create a pruning object using the pruning hyperparameters pruning_obj = pruning.Pruning(pruning_hparams, global_step=global_step) # Use the pruning_obj to add ops to the training graph to update the masks # The conditional_mask_update_op will update the masks only when the # training step is in [begin_pruning_step, end_pruning_step] specified in # the pruning spec proto mask_update_op = pruning_obj.conditional_mask_update_op() # Use the pruning_obj to add summaries to the graph to track the sparsity # of each of the layers pruning_obj.add_pruning_summaries() class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0: num_examples_per_step = 128 examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook()], config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) as mon_sess: saver.restore(mon_sess,"trained_weights/vgg_16.ckpt") while not mon_sess.should_stop(): mon_sess.run(train_op) # Update the masks mon_sess.run(mask_update_op)
def main(_): if ((not FLAGS.dataset_dir_iris) or (not FLAGS.dataset_dir_face)): raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ###################### # Config model_deploy# ###################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset_iris = dataset_factory.get_dataset(FLAGS.dataset_name_iris, FLAGS.dataset_split_name, FLAGS.dataset_dir_iris) dataset_face = dataset_factory.get_dataset(FLAGS.dataset_name_face, FLAGS.dataset_split_name, FLAGS.dataset_dir_face) #################### # Select the network # #################### # network_fn_iris = nets_factory.get_network_fn( # FLAGS.model_name_iris, # num_classes=(dataset.num_classes - FLAGS.labels_offset), # weight_decay=FLAGS.weight_decay, # is_training=True) network_fn_joint = nets_factory.get_network_fn_joint( FLAGS.model_name_joint, num_classes=(dataset_face.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name_iris = FLAGS.preprocessing_name_iris or FLAGS.model_name_iris image_preprocessing_fn_iris = preprocessing_factory.get_preprocessing( preprocessing_name_iris, is_training=True) preprocessing_name_face = FLAGS.preprocessing_name_face or FLAGS.model_name_face image_preprocessing_fn_face = preprocessing_factory.get_preprocessing( preprocessing_name_face, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider_iris = slim.dataset_data_provider.DatasetDataProvider( dataset_iris, shuffle=False, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image_iris, label_iris] = provider_iris.get(['image', 'label']) label_iris -= FLAGS.labels_offset # train_image_size_iris = FLAGS.train_image_size_iris or network_fn_iris.default_image_size new_height_iris = FLAGS.New_Height_Of_Image_iris or network_fn_joint.default_image_size new_width_iris = FLAGS.New_Width_Of_Image_iris or network_fn_joint.default_image_size # image = image_preprocessing_fn(image, train_image_size, train_image_size) image_iris = image_preprocessing_fn_iris(image_iris, new_height_iris, new_width_iris) # io.imshow(image) # io.show() images_iris, labels_iris = tf.train.batch( [image_iris, label_iris], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) # tf.image_summary('images', images) labels_iris = slim.one_hot_encoding( labels_iris, dataset_iris.num_classes - FLAGS.labels_offset) batch_queue_iris = slim.prefetch_queue.prefetch_queue( [images_iris, labels_iris], capacity=2 * deploy_config.num_clones) with tf.device(deploy_config.inputs_device()): provider_face = slim.dataset_data_provider.DatasetDataProvider( dataset_face, shuffle=False, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image_face, label_face] = provider_face.get(['image', 'label']) label_face -= FLAGS.labels_offset # train_image_size_face = FLAGS.train_image_size_face or network_fn_face.default_image_size new_height_face = FLAGS.New_Height_Of_Image_face or network_fn_joint.default_image_size new_width_face = FLAGS.New_Width_Of_Image_face or network_fn_joint.default_image_size # image = image_preprocessing_fn(image, train_image_size, train_image_size) image_face = image_preprocessing_fn_face(image_face, new_height_face, new_width_face) # io.imshow(image) # io.show() images_face, labels_face = tf.train.batch( [image_face, label_face], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) # tf.image_summary('images', images) labels_face = slim.one_hot_encoding( labels_face, dataset_face.num_classes - FLAGS.labels_offset) batch_queue_face = slim.prefetch_queue.prefetch_queue( [images_face, labels_face], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue_iris, batch_queue_face): """Allows data parallelism by creating multiple clones of network_fn.""" images_iris, labels_iris = batch_queue_iris.dequeue() images_face, labels_face = batch_queue_face.dequeue() logits, end_points = network_fn_joint(images_face, images_iris) # def clone_fn_face(batch_queue_face): # """Allows data parallelism by creating multiple clones of network_fn.""" # images_face, labels_face = batch_queue_face.dequeue() # logits_face, end_points_face, features_face,model_var_face = network_fn_face(images_face) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels_face, label_smoothing=FLAGS.label_smoothing, weight=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels_face, label_smoothing=FLAGS.label_smoothing, weight=1.0) # Adding the accuracy metric with tf.name_scope('accuracy'): predictions = tf.argmax(logits, 1) labels_face = tf.argmax(labels_face, 1) accuracy = tf.reduce_mean( tf.to_float(tf.equal(predictions, labels_face))) tf.add_to_collection('accuracy', accuracy) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones( deploy_config, clone_fn, [batch_queue_iris, batch_queue_face]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.histogram_summary('activations/' + end_point, x)) summaries.add( tf.scalar_summary('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.histogram_summary(variable.op.name, variable)) # Add summaries for the input images. summaries.add( tf.image_summary('face', images_face, max_images=15, name='Face_images')) summaries.add( tf.image_summary('iris', images_iris, max_images=15, name='Iris_images')) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset_face.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add( tf.scalar_summary('learning_rate', learning_rate, name='learning_rate')) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append( variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # # Add total_loss to summary. # summaries.add(tf.scalar_summary('total_loss', total_loss, # name='total_loss')) # Add total_loss and accuacy to summary. summaries.add( tf.scalar_summary('eval/Total_Loss', total_loss, name='total_loss')) accuracy = tf.get_collection('accuracy', first_clone_scope)[0] summaries.add( tf.scalar_summary('eval/Accuracy', accuracy, name='accuracy')) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.merge_summary(list(summaries), name='summary_op') init_iris, init_feed = _get_init_op() # var_2=[v for v in tf.all_variables() if v.name == "vgg_19/conv3/conv3_3/weights:0"][0] ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=init_iris, init_feed_dict=init_feed, summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def eval_model(model_name): slim = tf.contrib.slim print("eval model") PATH_TO_HACONE_LOCAL = '/home/lile/Projects/git_repo/hacone' with open(PATH_TO_HACONE_LOCAL + '/jobs/job{}.txt'.format(model_name)) as fp: data = json.load(fp) job_id = data['job'] params = data['params'] params = json.loads(params) candidate = [] for i in xrange(0, 5): candidate.append(params['b{}_i1'.format(i)]) candidate.append(params['b{}_i2'.format(i)]) candidate.append(params['b{}_o1'.format(i)]) candidate.append(params['b{}_o2'.format(i)]) N = 2 F = 24 dataset_dir = '/home/lile/dataset/cifar10_val' batch_size = 100 output_dir = os.path.join(PATH_TO_HACONE_LOCAL, 'models_trained', model_name) tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset('cifar10', 'val', dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( 'cifarnet', candidate, N, F, num_classes=(dataset.num_classes - 0), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * batch_size, common_queue_min=batch_size) [image, label] = provider.get(['image', 'label']) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = 'cifarnet' image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch([image, label], batch_size=batch_size, num_threads=4, capacity=5 * batch_size) #################### # Define the model # #################### logits, _ = network_fn(images) variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k(logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 num_batches = math.ceil(dataset.num_samples / float(batch_size)) checkpoint_path = output_dir if tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) else: checkpoint_path = checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) final_op = [names_to_values['Accuracy']] #top1 accuracy to return config = tf.ConfigProto() config.gpu_options.allow_growth = True start_time = time.time() a = slim.evaluation.evaluate_once( master='', checkpoint_path=checkpoint_path, logdir=output_dir, session_config=config, num_evals=num_batches, eval_op=list(names_to_updates.values()), final_op=final_op, variables_to_restore=variables_to_restore) duration = time.time() - start_time print('________________________________') print('duration :' + str(duration)) print('________________________________') print(a) return duration
def eval(checkpoint_path, eval_dir, dataset_dir, logo_name, model_name="inception_v4", batch_size=100): if not dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = my_dataset.get_split('validation', dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( model_name, #num_classes=(dataset.num_classes), is_training=False, logo_names=[logo_name]) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * batch_size, common_queue_min=batch_size) [image, label] = provider.get(['image', 'label']) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch([image, label], batch_size=batch_size, num_threads=4, capacity=5 * batch_size) #################### # Define the model # #################### logits, _ = network_fn(images, logo_names=[logo_name]) variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits[logo_name], 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k(logits[logo_name], labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(batch_size)) if tf.gfile.IsDirectory(checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(checkpoint_path) # tf.logging.info('Evaluating %s' % checkpoint_path) #print('variables_to_restore: ',variables_to_restore) accuracy = slim.evaluation.evaluate_once( master="", checkpoint_path=checkpoint_path, logdir=eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore) print(accuracy)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label, filename] = provider.get(['image', 'label', 'filename']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels, filenames = tf.train.batch( [image, label, filename], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) #################### # Define the model # #################### logits, end_points = network_fn(images) preprobs = end_points['Predictions'] if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) mislabeled = tf.not_equal(predictions, labels) mislabeled_filenames = tf.boolean_mask(filenames, mislabeled) original_classes = tf.boolean_mask(labels, mislabeled) predicted_classes = tf.boolean_mask(predictions, mislabeled) probabilities = tf.reduce_max(preprobs, 1) names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k(logits, labels, 5), 'Mean_absolute': tf.metrics.mean_absolute_error(labels, predictions), 'Confusion_matrix': _get_streaming_metrics(predictions, labels, dataset.num_classes - FLAGS.labels_offset), 'mislabeled_filenames': tf.contrib.metrics.streaming_concat(mislabeled_filenames), 'original_classes': tf.contrib.metrics.streaming_concat(original_classes), 'predicted_classes': tf.contrib.metrics.streaming_concat(predicted_classes), 'probabilities': tf.contrib.metrics.streaming_concat(probabilities), }) # Print the summaries to screen. unnames = [ 'Confusion_matrix', 'mislabeled_filenames', 'original_classes', 'predicted_classes', 'probabilities' ] for name, value in names_to_values.items(): if name not in unnames: summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # op = tf.Print(names_to_values['mislabeled_filenames'], [names_to_values['mislabeled_filenames']], 'testing', summarize=1000) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) eval_op = list(names_to_updates.values()) [ confusion_matrix, mislabeled_filenames, original_classes, predicted_classes, probabilities, ] = slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=eval_op, variables_to_restore=variables_to_restore, # session_config=session_config, final_op=[ names_to_updates['Confusion_matrix'], names_to_values['mislabeled_filenames'], names_to_values['original_classes'], names_to_values['predicted_classes'], names_to_values['probabilities'] ]) print(confusion_matrix) filenames = list(mislabeled_filenames) original = list(original_classes) predicted = list(predicted_classes) probabilities = list(probabilities) with open('misclassified_images.p', 'wb') as f: pickle.dump( list(zip(filenames, original, predicted, probabilities)), f) if FLAGS.print_misclassified_images: zipped = list(zip(filenames, original, predicted, probabilities)) print(zipped)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn(FLAGS.model_name, num_classes=14, is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) #[image, label] = provider.get(['image', 'label']) #label -= FLAGS.labels_offset [image, label1, label2, label3, label4, label5, label6, label7, label8, label9, label10, label11, label12, label13, label14] = \ provider.get(['image', 'label1', 'label2', 'label3', 'label4', 'label5', 'label6', 'label7', 'label8', 'label9', 'label10', 'label11', 'label12', 'label13', 'label14']) print(image.shape) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = 'nihxray' image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) #images, labels = tf.train.batch( # [image, label], # batch_size=FLAGS.batch_size, # num_threads=FLAGS.num_preprocessing_threads, # capacity=5 * FLAGS.batch_size) images, labels1, labels2, labels3, labels4, labels5, labels6, labels7, \ labels8, labels9, labels10, labels11, labels12, labels13, labels14 \ = tf.train.batch( [image, label1, label2, label3, label4, label5, label6, label7, label8, label9, label10, label11, label12, label13, label14], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels1 = tf.expand_dims(labels1, 1) labels2 = tf.expand_dims(labels2, 1) labels3 = tf.expand_dims(labels3, 1) labels4 = tf.expand_dims(labels4, 1) labels5 = tf.expand_dims(labels5, 1) labels6 = tf.expand_dims(labels6, 1) labels7 = tf.expand_dims(labels7, 1) labels8 = tf.expand_dims(labels8, 1) labels9 = tf.expand_dims(labels9, 1) labels10 = tf.expand_dims(labels10, 1) labels11 = tf.expand_dims(labels11, 1) labels12 = tf.expand_dims(labels12, 1) labels13 = tf.expand_dims(labels13, 1) labels14 = tf.expand_dims(labels14, 1) labels = tf.concat([ labels1, labels2, labels3, labels4, labels5, labels6, labels7, labels8, labels9, labels10, labels11, labels12, labels13, labels14 ], 1) print(labels.shape) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() #predictions = tf.argmax(logits, 1) #labels = tf.squeeze(labels) predictions = logits pred1 = predictions[:, 0] pred2 = predictions[:, 1] pred3 = predictions[:, 2] pred4 = predictions[:, 3] pred5 = predictions[:, 4] pred6 = predictions[:, 5] pred7 = predictions[:, 6] pred8 = predictions[:, 7] pred9 = predictions[:, 8] pred10 = predictions[:, 9] pred11 = predictions[:, 10] pred12 = predictions[:, 11] pred13 = predictions[:, 12] pred14 = predictions[:, 13] pred1 = tf.div(tf.subtract(pred1, tf.reduce_min(pred1)), tf.subtract(tf.reduce_max(pred1), tf.reduce_min(pred1))) pred2 = tf.div(tf.subtract(pred2, tf.reduce_min(pred2)), tf.subtract(tf.reduce_max(pred2), tf.reduce_min(pred2))) pred3 = tf.div(tf.subtract(pred3, tf.reduce_min(pred3)), tf.subtract(tf.reduce_max(pred3), tf.reduce_min(pred3))) pred4 = tf.div(tf.subtract(pred4, tf.reduce_min(pred4)), tf.subtract(tf.reduce_max(pred4), tf.reduce_min(pred4))) pred5 = tf.div(tf.subtract(pred5, tf.reduce_min(pred5)), tf.subtract(tf.reduce_max(pred5), tf.reduce_min(pred5))) pred6 = tf.div(tf.subtract(pred6, tf.reduce_min(pred6)), tf.subtract(tf.reduce_max(pred6), tf.reduce_min(pred6))) pred7 = tf.div(tf.subtract(pred7, tf.reduce_min(pred7)), tf.subtract(tf.reduce_max(pred7), tf.reduce_min(pred7))) pred8 = tf.div(tf.subtract(pred8, tf.reduce_min(pred8)), tf.subtract(tf.reduce_max(pred8), tf.reduce_min(pred8))) pred9 = tf.div(tf.subtract(pred9, tf.reduce_min(pred9)), tf.subtract(tf.reduce_max(pred9), tf.reduce_min(pred9))) pred10 = tf.div( tf.subtract(pred10, tf.reduce_min(pred10)), tf.subtract(tf.reduce_max(pred10), tf.reduce_min(pred10))) pred11 = tf.div( tf.subtract(pred11, tf.reduce_min(pred11)), tf.subtract(tf.reduce_max(pred11), tf.reduce_min(pred11))) pred12 = tf.div( tf.subtract(pred12, tf.reduce_min(pred12)), tf.subtract(tf.reduce_max(pred12), tf.reduce_min(pred12))) pred13 = tf.div( tf.subtract(pred13, tf.reduce_min(pred13)), tf.subtract(tf.reduce_max(pred13), tf.reduce_min(pred13))) pred14 = tf.div( tf.subtract(pred14, tf.reduce_min(pred14)), tf.subtract(tf.reduce_max(pred14), tf.reduce_min(pred14))) labels1 = labels[:, 0] labels2 = labels[:, 1] labels3 = labels[:, 2] labels4 = labels[:, 3] labels5 = labels[:, 4] labels6 = labels[:, 5] labels7 = labels[:, 6] labels8 = labels[:, 7] labels9 = labels[:, 8] labels10 = labels[:, 9] labels11 = labels[:, 10] labels12 = labels[:, 11] labels13 = labels[:, 12] labels14 = labels[:, 13] print(pred1.shape) print(labels1.shape) # Define the metrics: #names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ # 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), # 'Recall_5': slim.metrics.streaming_recall_at_k( # logits, labels, 5), names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'AUC1': slim.metrics.streaming_auc(pred1, labels1), 'AUC2': slim.metrics.streaming_auc(pred2, labels2), 'AUC3': slim.metrics.streaming_auc(pred3, labels3), 'AUC4': slim.metrics.streaming_auc(pred4, labels4), 'AUC5': slim.metrics.streaming_auc(pred5, labels5), 'AUC6': slim.metrics.streaming_auc(pred6, labels6), 'AUC7': slim.metrics.streaming_auc(pred7, labels7), 'AUC8': slim.metrics.streaming_auc(pred8, labels8), 'AUC9': slim.metrics.streaming_auc(pred9, labels9), 'AUC10': slim.metrics.streaming_auc(pred10, labels10), 'AUC11': slim.metrics.streaming_auc(pred11, labels11), 'AUC12': slim.metrics.streaming_auc(pred12, labels12), 'AUC13': slim.metrics.streaming_auc(pred13, labels13), 'AUC14': slim.metrics.streaming_auc(pred14, labels14), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') if not tf.gfile.IsDirectory(FLAGS.checkpoint_dir): raise ValueError( 'You must supply the checkpoint directory with --checkpoint_dir') if os.path.exists(FLAGS.eval_dir): raise ValueError('eval_dir exists') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k(logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) writer = tf.summary.FileWriter(FLAGS.eval_dir) writer.add_event( event_pb2.Event(wall_time=0, file_version="brain.Event:2")) prog = re.compile(".*model.ckpt-(?P<wall_time>\d+.\d+)-(?P<step>\d+)") checkpoint_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) for checkpoint_path in checkpoint_state.all_model_checkpoint_paths: try: temp_eval_dir = tempfile.mkdtemp() tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=temp_eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore) temp_event_file = glob.glob( os.path.join(temp_eval_dir, 'events.out*'))[0] for event in tf.train.summary_iterator(temp_event_file): for value in event.summary.value: if value.tag == 'eval/Accuracy': accuracy = value if value.tag == 'eval/Recall_5': recall = value finally: try: shutil.rmtree(temp_eval_dir) except OSError as e: if e.errno != errno.ENOENT: raise m = prog.match(checkpoint_path) wall_time = float(m.group('wall_time')) step = int(m.group('step')) summary = summary_pb2.Summary(value=[accuracy, recall]) writer.add_event( event_pb2.Event(wall_time=wall_time, step=step, summary=summary))
def main_fun(argv, ctx): import tensorflow as tf from tensorflow.python.ops import control_flow_ops from datasets import dataset_factory from deployment import model_deploy from nets import nets_factory from preprocessing import preprocessing_factory sys.argv = argv slim = tf.contrib.slim tf.app.flags.DEFINE_integer( 'num_gpus', '1', 'The number of GPUs to use per node') tf.app.flags.DEFINE_boolean('rdma', False, 'Whether to use rdma.') tf.app.flags.DEFINE_string( 'master', '', 'The address of the TensorFlow master to use.') tf.app.flags.DEFINE_string( 'train_dir', '/tmp/tfmodel/', 'Directory where checkpoints and event logs are written to.') tf.app.flags.DEFINE_integer('num_clones', 1, 'Number of model clones to deploy.') tf.app.flags.DEFINE_boolean('clone_on_cpu', False, 'Use CPUs to deploy clones.') tf.app.flags.DEFINE_integer('worker_replicas', 1, 'Number of worker replicas.') tf.app.flags.DEFINE_integer( 'num_ps_tasks', 0, 'The number of parameter servers. If the value is 0, then the parameters ' 'are handled locally by the worker.') tf.app.flags.DEFINE_integer( 'num_readers', 4, 'The number of parallel readers that read data from the dataset.') tf.app.flags.DEFINE_integer( 'num_preprocessing_threads', 4, 'The number of threads used to create the batches.') tf.app.flags.DEFINE_integer( 'log_every_n_steps', 10, 'The frequency with which logs are print.') tf.app.flags.DEFINE_integer( 'save_summaries_secs', 600, 'The frequency with which summaries are saved, in seconds.') tf.app.flags.DEFINE_integer( 'save_interval_secs', 600, 'The frequency with which the model is saved, in seconds.') tf.app.flags.DEFINE_integer( 'task', 0, 'Task id of the replica running the training.') ###################### # Optimization Flags # ###################### tf.app.flags.DEFINE_float( 'weight_decay', 0.00004, 'The weight decay on the model weights.') tf.app.flags.DEFINE_string( 'optimizer', 'rmsprop', 'The name of the optimizer, one of "adadelta", "adagrad", "adam",' '"ftrl", "momentum", "sgd" or "rmsprop".') tf.app.flags.DEFINE_float( 'adadelta_rho', 0.95, 'The decay rate for adadelta.') tf.app.flags.DEFINE_float( 'adagrad_initial_accumulator_value', 0.1, 'Starting value for the AdaGrad accumulators.') tf.app.flags.DEFINE_float( 'adam_beta1', 0.9, 'The exponential decay rate for the 1st moment estimates.') tf.app.flags.DEFINE_float( 'adam_beta2', 0.999, 'The exponential decay rate for the 2nd moment estimates.') tf.app.flags.DEFINE_float('opt_epsilon', 1.0, 'Epsilon term for the optimizer.') tf.app.flags.DEFINE_float('ftrl_learning_rate_power', -0.5, 'The learning rate power.') tf.app.flags.DEFINE_float( 'ftrl_initial_accumulator_value', 0.1, 'Starting value for the FTRL accumulators.') tf.app.flags.DEFINE_float( 'ftrl_l1', 0.0, 'The FTRL l1 regularization strength.') tf.app.flags.DEFINE_float( 'ftrl_l2', 0.0, 'The FTRL l2 regularization strength.') tf.app.flags.DEFINE_float( 'momentum', 0.9, 'The momentum for the MomentumOptimizer and RMSPropOptimizer.') tf.app.flags.DEFINE_float('rmsprop_decay', 0.9, 'Decay term for RMSProp.') ####################### # Learning Rate Flags # ####################### tf.app.flags.DEFINE_string( 'learning_rate_decay_type', 'exponential', 'Specifies how the learning rate is decayed. One of "fixed", "exponential",' ' or "polynomial"') tf.app.flags.DEFINE_float('learning_rate', 0.01, 'Initial learning rate.') tf.app.flags.DEFINE_float( 'end_learning_rate', 0.0001, 'The minimal end learning rate used by a polynomial decay learning rate.') tf.app.flags.DEFINE_float( 'label_smoothing', 0.0, 'The amount of label smoothing.') tf.app.flags.DEFINE_float( 'learning_rate_decay_factor', 0.94, 'Learning rate decay factor.') tf.app.flags.DEFINE_float( 'num_epochs_per_decay', 2.0, 'Number of epochs after which learning rate decays.') tf.app.flags.DEFINE_bool( 'sync_replicas', False, 'Whether or not to synchronize the replicas during training.') tf.app.flags.DEFINE_integer( 'replicas_to_aggregate', 1, 'The Number of gradients to collect before updating params.') tf.app.flags.DEFINE_float( 'moving_average_decay', None, 'The decay to use for the moving average.' 'If left as None, then moving averages are not used.') ####################### # Dataset Flags # ####################### tf.app.flags.DEFINE_string( 'dataset_name', 'imagenet', 'The name of the dataset to load.') tf.app.flags.DEFINE_string( 'dataset_split_name', 'train', 'The name of the train/test split.') tf.app.flags.DEFINE_string( 'dataset_dir', None, 'The directory where the dataset files are stored.') tf.app.flags.DEFINE_integer( 'labels_offset', 0, 'An offset for the labels in the dataset. This flag is primarily used to ' 'evaluate the VGG and ResNet architectures which do not use a background ' 'class for the ImageNet dataset.') tf.app.flags.DEFINE_string( 'model_name', 'inception_v3', 'The name of the architecture to train.') tf.app.flags.DEFINE_string( 'preprocessing_name', None, 'The name of the preprocessing to use. If left ' 'as `None`, then the model_name flag is used.') tf.app.flags.DEFINE_integer( 'batch_size', 32, 'The number of samples in each batch.') tf.app.flags.DEFINE_integer( 'train_image_size', None, 'Train image size') tf.app.flags.DEFINE_integer('max_number_of_steps', None, 'The maximum number of training steps.') ##################### # Fine-Tuning Flags # ##################### tf.app.flags.DEFINE_string( 'checkpoint_path', None, 'The path to a checkpoint from which to fine-tune.') tf.app.flags.DEFINE_string( 'checkpoint_exclude_scopes', None, 'Comma-separated list of scopes of variables to exclude when restoring ' 'from a checkpoint.') tf.app.flags.DEFINE_string( 'trainable_scopes', None, 'Comma-separated list of scopes to filter the set of variables to train.' 'By default, None would train all the variables.') tf.app.flags.DEFINE_boolean( 'ignore_missing_vars', False, 'When restoring a checkpoint would ignore missing variables.') FLAGS = tf.app.flags.FLAGS FLAGS.job_name = ctx.job_name FLAGS.task = ctx.task_index FLAGS.num_clones = FLAGS.num_gpus FLAGS.worker_replicas = len(ctx.cluster_spec['worker']) assert(FLAGS.num_ps_tasks == (len(ctx.cluster_spec['ps']) if 'ps' in ctx.cluster_spec else 0)) def _configure_learning_rate(num_samples_per_epoch, global_step): """Configures the learning rate. Args: num_samples_per_epoch: The number of samples in each epoch of training. global_step: The global_step tensor. Returns: A `Tensor` representing the learning rate. Raises: ValueError: if """ decay_steps = int(num_samples_per_epoch / FLAGS.batch_size * FLAGS.num_epochs_per_decay) if FLAGS.sync_replicas: decay_steps /= FLAGS.replicas_to_aggregate if FLAGS.learning_rate_decay_type == 'exponential': return tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True, name='exponential_decay_learning_rate') elif FLAGS.learning_rate_decay_type == 'fixed': return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate') elif FLAGS.learning_rate_decay_type == 'polynomial': return tf.train.polynomial_decay(FLAGS.learning_rate, global_step, decay_steps, FLAGS.end_learning_rate, power=1.0, cycle=False, name='polynomial_decay_learning_rate') else: raise ValueError('learning_rate_decay_type [%s] was not recognized', FLAGS.learning_rate_decay_type) def _configure_optimizer(learning_rate): """Configures the optimizer used for training. Args: learning_rate: A scalar or `Tensor` learning rate. Returns: An instance of an optimizer. Raises: ValueError: if FLAGS.optimizer is not recognized. """ if FLAGS.optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer( learning_rate, rho=FLAGS.adadelta_rho, epsilon=FLAGS.opt_epsilon) elif FLAGS.optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer( learning_rate, initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value) elif FLAGS.optimizer == 'adam': optimizer = tf.train.AdamOptimizer( learning_rate, beta1=FLAGS.adam_beta1, beta2=FLAGS.adam_beta2, epsilon=FLAGS.opt_epsilon) elif FLAGS.optimizer == 'ftrl': optimizer = tf.train.FtrlOptimizer( learning_rate, learning_rate_power=FLAGS.ftrl_learning_rate_power, initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value, l1_regularization_strength=FLAGS.ftrl_l1, l2_regularization_strength=FLAGS.ftrl_l2) elif FLAGS.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer( learning_rate, momentum=FLAGS.momentum, name='Momentum') elif FLAGS.optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer( learning_rate, decay=FLAGS.rmsprop_decay, momentum=FLAGS.momentum, epsilon=FLAGS.opt_epsilon) elif FLAGS.optimizer == 'sgd': optimizer = tf.train.GradientDescentOptimizer(learning_rate) else: raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer) return optimizer def _add_variables_summaries(learning_rate): summaries = [] for variable in slim.get_model_variables(): summaries.append(tf.summary.histogram(variable.op.name, variable)) summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate)) return summaries def _get_init_fn(): """Returns a function run by the chief worker to warm-start the training. Note that the init_fn is only run when initializing the model during the very first global step. Returns: An init function run by the supervisor. """ if FLAGS.checkpoint_path is None: return None # Warn the user if a checkpoint exists in the train_dir. Then we'll be # ignoring the checkpoint anyway. if tf.train.latest_checkpoint(FLAGS.train_dir): tf.logging.info( 'Ignoring --checkpoint_path because a checkpoint already exists in %s' % FLAGS.train_dir) return None exclusions = [] if FLAGS.checkpoint_exclude_scopes: exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')] # TODO(sguada) variables.filter_variables() variables_to_restore = [] for var in slim.get_model_variables(): excluded = False for exclusion in exclusions: if var.op.name.startswith(exclusion): excluded = True break if not excluded: variables_to_restore.append(var) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Fine-tuning from %s' % checkpoint_path) return slim.assign_from_checkpoint_fn( checkpoint_path, variables_to_restore, ignore_missing_vars=FLAGS.ignore_missing_vars) def _get_variables_to_train(): """Returns a list of variables to train. Returns: A list of variables to train by the optimizer. """ if FLAGS.trainable_scopes is None: return tf.trainable_variables() else: scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')] variables_to_train = [] for scope in scopes: variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) variables_to_train.extend(variables) return variables_to_train # main cluster_spec, server = TFNode.start_cluster_server(ctx=ctx, num_gpus=FLAGS.num_gpus, rdma=FLAGS.rdma) if ctx.job_name == 'ps': # `ps` jobs wait for incoming connections from the workers. server.join() else: # `worker` jobs will actually do the work. if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step #with tf.device(deploy_config.variables_device()): # global_step = slim.create_global_step() with tf.device("/job:ps/task:0"): global_step = tf.Variable(0, name="global_step") ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: tf.losses.softmax_cross_entropy( logits=end_points['AuxLogits'], onehot_labels=labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') ########################### # Kicks off the training. # ########################### summary_writer = tf.summary.FileWriter("tensorboard_%d" %(ctx.worker_num), graph=tf.get_default_graph()) slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=server.target, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, summary_writer=summary_writer, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, total_num_replicas=FLAGS.worker_replicas, variable_averages=variable_averages, variables_to_average=moving_average_variables) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # Create a saver. saver = tf.train.Saver(tf.global_variables()) ########################### # Kicks off the training. # ########################### init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement)) sess.run(init) if FLAGS.checkpoint_path==FLAGS.train_dir: saver.restore(sess, tf.train.latest_checkpoint(FLAGS.train_dir)) # load pretrained weights weight_ini_fn = _get_init_fn() weight_ini_fn(sess) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) for step in range(FLAGS.max_number_of_steps): start_time = time.time() # _, loss_value = sess.run([train_tensor, loss]) # _, loss_value = sess.run([train_tensor, total_loss]) loss_value = sess.run(train_tensor) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % FLAGS.log_every_n_steps == 0: # num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration # sec_per_batch = duration / FLAGS.num_gpus sec_per_batch = duration format_str = ('step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (step, loss_value, examples_per_sec, sec_per_batch)) if step % FLAGS.summary_snapshot_steps == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % FLAGS.model_snapshot_steps == 0 or (step + 1) == FLAGS.max_number_of_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) print('OK...')
def main(_): tf.logging.set_verbosity(tf.logging.INFO) # Get image's height and width. height = 0 width = 0 with tf.gfile.GFile(FLAGS.image_file, 'rb') as f: with tf.Session().as_default() as sess: if FLAGS.image_file.lower().endswith('png'): image = sess.run(tf.image.decode_png(f.read())) else: image = sess.run(tf.image.decode_jpeg(f.read())) height = image.shape[0] width = image.shape[1] tf.logging.info('Image size: %dx%d' % (width, height)) with tf.Graph().as_default(): with tf.Session().as_default() as sess: # Read image data. image_preprocessing_fn, _ = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) image = reader.get_image(FLAGS.image_file, height, width, image_preprocessing_fn) print(image) plt.subplot(121) np_image = sess.run(image) plt.imshow(np_image) input_shape = (None, None, 3) input_tensor = tf.placeholder(dtype=tf.uint8, shape=input_shape, name='image_tensor') print(input_tensor) with tf.variable_scope("input_process"): processed_image = utils.mean_image_subtraction( input_tensor, [123.68, 116.779, 103.939]) # Preprocessing image batched_image = tf.expand_dims(processed_image, 0) # Add batch dimension generated = model.net(batched_image, training=False) generated = tf.cast(generated, tf.uint8) # Remove batch dimension generated = tf.squeeze(generated, [0],name='output_image') # Restore model variables. saver = tf.train.Saver(tf.global_variables(), write_version=tf.train.SaverDef.V1) sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) # Use absolute path FLAGS.model_file = os.path.abspath(FLAGS.model_file) saver.restore(sess, FLAGS.model_file) summary_writer = tf.summary.FileWriter("logs",sess.graph) save_graph_to_file(sess,sess.graph_def ,"models/new_freeze_graph.pb") # Make sure 'generated' directory exists. generated_file = 'generated/res.jpg' if os.path.exists('generated') is False: os.makedirs('generated') # Generate and write image data to file. with tf.gfile.GFile(generated_file, 'wb') as f: feed_dict={input_tensor:np_image} plt.subplot(122) plt.imshow(sess.run(generated,feed_dict)) plt.show() start_time = time.time() f.write(sess.run(tf.image.encode_jpeg(generated),feed_dict)) end_time = time.time() tf.logging.info('Elapsed time: %fs' % (end_time - start_time)) tf.logging.info('Done. Please check %s.' % generated_file)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label, image_name] = provider.get(['image', 'label', 'name']) print(image, label, image_name) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) labels = slim.one_hot_encoding( label, dataset.num_classes - FLAGS.labels_offset) labels = tf.reduce_sum(labels, axis=0) images, labels, image_names = tf.train.batch( [image, labels, image_name], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) print(images, labels) #################### # Define the model # #################### logits, _ = network_fn(images) print(logits) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.greater(logits, .7) labels = tf.cast(labels, tf.bool) match = tf.reduce_all(tf.equal(predictions, labels), axis=1) accuracy = slim.metrics.streaming_percentage_less( tf.cast(match, tf.float32), 0.5) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': accuracy, 'Precision': slim.metrics.streaming_recall(predictions, labels), 'Recall': slim.metrics.streaming_precision(predictions, labels) #'Recall_5': slim.metrics.streaming_recall_at_k( # logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) init = tf.global_variables_initializer() from tensorflow.python.training import saver as tf_saver saver = tf_saver.Saver(variables_to_restore) all_logits = [] all_labels = [] all_names = [] try: with tf.Session() as sess: sess.run(init) saver.restore(sess, checkpoint_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) print('RUNNING') i = 0 while not coord.should_stop(): print(i) logits_val, labels_val, name_val = sess.run( [logits, labels, image_names]) all_logits.append(logits_val) all_labels.append(labels_val) all_names.append(name_val) i += 1 if i == num_batches: break except tf.errors.OutOfRangeError: print('Done') finally: coord.request_stop() import numpy as np all_logits = np.concatenate(all_logits) all_labels = np.concatenate(all_labels) all_names = np.concatenate(all_names) np.savez('%s.%s' % (checkpoint_path, '%s-val.npz' % FLAGS.dataset_split_name), logits=all_logits, labels=all_labels, names=all_names) coord.join(threads) sess.close()
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label, coarse_label] = provider.get( ['image', 'label', 'coarse_label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size # image = tf.image.grayscale_to_rgb(image) image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels, coarse_labels = tf.train.batch( [image, label, coarse_label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) coarse_labels = tf.cast(coarse_labels, tf.int32) tf.image_summary('image', images, max_images=5) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() one_hot_labels = slim.one_hot_encoding(labels, 2) loss = slim.losses.softmax_cross_entropy(logits, one_hot_labels) predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Total_Loss': slim.metrics.streaming_mean(loss), 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), }) with tf.variable_scope('coarse_label_accuracy', values=[predictions, labels, coarse_labels]): totals = tf.Variable( initial_value=tf.zeros([len(dataset.coarse_labels_to_names)]), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], dtype=tf.float32, name='totals') counts = tf.Variable( initial_value=tf.zeros([len(dataset.coarse_labels_to_names)]), trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES], dtype=tf.float32, name='counts') correct = tf.cast(tf.equal(predictions, labels), tf.int32) accuracy_ops = [] for index, coarse_key in list(enumerate(dataset.coarse_labels_to_names)): label_correct = tf.boolean_mask(correct, tf.equal(coarse_key, coarse_labels)) sum_correct = tf.reduce_sum(label_correct) sum_correct = tf.cast(tf.expand_dims(sum_correct, 0), tf.float32) delta_totals = tf.SparseTensor([[index]], sum_correct, totals.get_shape()) label_count = tf.cast(tf.shape(label_correct), tf.float32) delta_counts = tf.SparseTensor([[index]], label_count, counts.get_shape()) totals_compute_op = tf.assign_add( totals, tf.sparse_tensor_to_dense(delta_totals), use_locking=True) counts_compute_op = tf.assign_add( counts, tf.sparse_tensor_to_dense(delta_counts), use_locking=True) accuracy_ops.append(totals_compute_op) accuracy_ops.append(counts_compute_op) with tf.control_dependencies(accuracy_ops): update_op = tf.select(tf.equal(counts, 0), tf.zeros_like(counts, tf.float32), tf.div(totals, counts)) names_to_updates['Coarse_Label_Accuracy'] = update_op if FLAGS.recall: recall_value, recall_update = slim.metrics.streaming_recall_at_k( logits, labels, 5) names_to_values['Recall@5'] = recall_value names_to_updates['Recall@5'] = recall_update # Print the summaries to screen. # TODO(vonclites) list(d.items()) is for Python 3... check compatibility for name, value in list(names_to_values.items()): summary_name = 'eval/%s' % name op = tf.scalar_summary(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) for index, label_name in list(enumerate(dataset.coarse_labels_to_names.values())): summary_name = 'eval/%s' % label_name op = tf.scalar_summary(summary_name, update_op[index], collections=[]) op = tf.Print(op, [update_op[index]], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) # if tf.gfile.IsDirectory(FLAGS.checkpoint_path): # checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) # else: # checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % FLAGS.checkpoint_path) slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), eval_interval_secs=FLAGS.eval_interval_secs, variables_to_restore=slim.get_variables_to_restore())
tf.contrib.data.parallel_interleave(tf.data.TFRecordDataset, cycle_length=8)) ds = ds.map(decode, num_parallel_calls=16) ds = ds.apply( tf.contrib.data.shuffle_and_repeat(buffer_size=batch_size * 16, seed=1234)) ds = ds.apply(tf.contrib.data.batch_and_drop_remainder(batch_size)) ds = ds.prefetch(buffer_size=batch_size * 16) iterator = tf.data.Iterator.from_structure(ds.output_types, ds.output_shapes) images, labels = iterator.get_next() training_init_op = iterator.make_initializer(ds) # In[5]: image_prep_fn = preprocessing_factory.get_preprocessing('inception_v1', is_training=False) images_preped = image_prep_fn(images, None, None) print images, images_preped import model class_logits = model.build_net(images_preped, num_classes, True, args.model) labels_oh = tf.one_hot(labels, num_classes, on_value=1., off_value=0., dtype=tf.float32) cls_loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_oh, logits=class_logits) cls_loss = tf.reduce_mean(cls_loss)
def main(FLAGS): style_features_t = losses.get_style_features(FLAGS) training_path = os.path.join(FLAGS.model_path, FLAGS.naming) if not (os.path.exists(training_path)): os.makedirs(training_path) with tf.Graph().as_default(): with tf.Session() as sess: """创建Network""" network_fn = nets_factory.get_network_fn( FLAGS.loss_model, num_classes=1, is_training=False) image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) """训练图片预处理""" processed_images = reader.batch_image(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 'train2014/', image_preprocessing_fn, epochs=FLAGS.epoch) generated = model.transform_network(processed_images, training=True) processed_generated = [image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size) for image in tf.unstack(generated, axis=0, num=FLAGS.batch_size) ] processed_generated = tf.stack(processed_generated) _, endpoints_dict = network_fn(tf.concat([processed_generated, processed_images], 0), spatial_squeeze=False) tf.logging.info('Loss network layers(You can define them in "content_layers" and "style_layers"):') for key in endpoints_dict: tf.logging.info(key) """创建 Losses""" content_loss = losses.content_loss(endpoints_dict, FLAGS.content_layers) style_loss, style_loss_summary = losses.style_loss(endpoints_dict, style_features_t, FLAGS.style_layers) tv_loss = losses.total_variation_loss(generated) # use the unprocessed image loss = FLAGS.style_weight * style_loss + FLAGS.content_weight * content_loss + FLAGS.tv_weight * tv_loss """准备训练""" global_step = tf.Variable(0, name="global_step", trainable=False) variable_to_train = [] for variable in tf.trainable_variables(): # 只训练和保存生成网络中的变量 if not (variable.name.startswith(FLAGS.loss_model)): variable_to_train.append(variable) """优化""" train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step, var_list=variable_to_train) variables_to_restore = [] for v in tf.global_variables(): if not (v.name.startswith(FLAGS.loss_model)): variables_to_restore.append(v) saver = tf.train.Saver(variables_to_restore, write_version=tf.train.SaverDef.V1) sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) init_func = utils._get_init_fn(FLAGS) init_func(sess) last_file = tf.train.latest_checkpoint(training_path) if last_file: tf.logging.info('Restoring model from {}'.format(last_file)) saver.restore(sess, last_file) """开始训练""" coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) start_time = time.time() try: while not coord.should_stop(): _, loss_t, step = sess.run([train_op, loss, global_step]) elapsed_time = time.time() - start_time start_time = time.time() if step % 10 == 0: tf.logging.info( 'step: %d, total Loss %f, secs/step: %f,%s' % (step, loss_t, elapsed_time, time.asctime())) """checkpoint""" if step % 50 == 0: tf.logging.info('saving check point...') saver.save(sess, os.path.join(training_path, FLAGS.naming + '.ckpt'), global_step=step) except tf.errors.OutOfRangeError: saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt-done')) tf.logging.info('Done training -- epoch limit reached') finally: coord.request_stop() tf.logging.info('coordinator stop') coord.join(threads)
def use_tensorflow_get_feature(base_path, save_path): checkpoint = tf.train.get_checkpoint_state('/home/lee/Downloads/logs/') input_checkpoint = checkpoint.model_checkpoint_path network_fn = nets_factory.get_network_fn('resnet_v1_50', num_classes=100000, is_training=False) placeholder = tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]) network_fn(placeholder) saver = tf.train.Saver() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) saver.restore(sess, input_checkpoint) image_preprocessing_fn = preprocessing_factory.get_preprocessing( 'resnet_v1_50', is_training=False) img_pla = tf.placeholder(dtype=tf.float32, shape=[None, None, 3], name='img') image_preprocessing = image_preprocessing_fn(img_pla, 224, 224) def format_feature(class_name, feature, image_filepath): return [feature, class_name, image_filepath] class_name_and_path_list = [ [floder, os.path.join(base_path, floder)] for floder in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, floder)) ] #print class_name_and_path_list max_num = 0 for class_name_and_path in class_name_and_path_list: image_path_list = [ os.path.join(class_name_and_path[1], image_file) for image_file in os.listdir(class_name_and_path[1]) if image_file.endswith('.jpg') ] max_num = max_num + len(image_path_list) now_num = 0 last_num = 0 last_time = time.time() for class_name_and_path in class_name_and_path_list: image_path_list = [ os.path.join(class_name_and_path[1], image_file) for image_file in os.listdir(class_name_and_path[1]) if image_file.endswith('.jpg') ] if os.path.exists( os.path.join( class_name_and_path[1].replace(base_path, save_path), 'tensorflow-resnet-50_feature.npy')): t_npy = np.load( os.path.join( class_name_and_path[1].replace(base_path, save_path), 'tensorflow-resnet-50_feature.npy')) if len(image_path_list) == len(image_path_list): now_num = now_num + len(image_path_list) continue feature_list = [] all_image_list = [] image_list = [] for image_path in image_path_list: image = imutils.opencv2matplotlib(cv2.imread(image_path)) image_list.append( sess.run(image_preprocessing, feed_dict={'img:0': image})) if len(image_list) > 63: all_image_list.append(image_list) image_list = [] all_image_list.append(image_list) result_list = [] if len(all_image_list) != 0: for image_list in all_image_list: #print image_list if len(image_list) != 0: temp = sess.run("resnet_v1_50/pool5:0", feed_dict={'input:0': image_list}) for j in temp: result_list.append(np.ravel(j)) #print len(result_list) for idx, featrue in enumerate(result_list): #print format_feature(feature=featrue, # class_name=class_name_and_path[0], image_filepath=image_path_list[idx]) feature_list.append( format_feature(feature=featrue, class_name=class_name_and_path[0], image_filepath=image_path_list[idx])) now_num = now_num + len(image_path_list) print '正在提取中...%d/%d (%.2f/sec)' % (now_num, max_num, (now_num - last_num) / float(time.time() - last_time)) last_num = now_num last_time = time.time() file_tools.check_fold(class_name_and_path[1].replace( base_path, save_path)) np.save( os.path.join( class_name_and_path[1].replace(base_path, save_path), 'tensorflow-resnet-50_feature.npy'), feature_list) else: print '发现文件夹分布不符合规范' max_num = 0 for class_name_and_path in class_name_and_path_list: image_path_list = [ os.path.join(class_name_and_path[1].replace(base_path, save_path), image_file) for image_file in os.listdir(class_name_and_path[1]) if image_file.endswith('.jpg') ] max_num = max_num + len(image_path_list) now_num = 0 feature_list = [] class_name_list = [] file_path_list = [] for class_name_and_path in class_name_and_path_list: image_path_list = [ os.path.join(class_name_and_path[1].replace(base_path, save_path), image_file) for image_file in os.listdir(class_name_and_path[1]) if image_file.endswith('.jpg') ] npy = np.load( os.path.join(class_name_and_path[1].replace(base_path, save_path), 'tensorflow-resnet-50_feature.npy')) for t in npy: feature_list.append(t[0]) class_name_list.append(t[1]) file_path_list.append(t[2]) now_num = now_num + 1 print '正在提取中...%d / %d' % (now_num, max_num) np.save(os.path.join(save_path, 'tensorflow-feature.npy'), feature_list) np.save(os.path.join(save_path, 'tensorflow-class_name.npy'), class_name_list) np.save(os.path.join(save_path, 'tensorflow-file_path.npy'), file_path_list)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k( logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k(logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.items(): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
def main_fun(argv, ctx): import math import six import tensorflow as tf from datasets import dataset_factory from nets import nets_factory from preprocessing import preprocessing_factory sys.argv = argv slim = tf.contrib.slim tf.app.flags.DEFINE_integer( 'batch_size', 100, 'The number of samples in each batch.') tf.app.flags.DEFINE_integer( 'max_num_batches', None, 'Max number of batches to evaluate by default use all.') tf.app.flags.DEFINE_string( 'master', '', 'The address of the TensorFlow master to use.') tf.app.flags.DEFINE_string( 'checkpoint_path', '/tmp/tfmodel/', 'The directory where the model was written to or an absolute path to a ' 'checkpoint file.') tf.app.flags.DEFINE_string( 'eval_dir', '/tmp/tfmodel/', 'Directory where the results are saved to.') tf.app.flags.DEFINE_integer( 'num_preprocessing_threads', 4, 'The number of threads used to create the batches.') tf.app.flags.DEFINE_string( 'dataset_name', 'imagenet', 'The name of the dataset to load.') tf.app.flags.DEFINE_string( 'dataset_split_name', 'test', 'The name of the train/test split.') tf.app.flags.DEFINE_string( 'dataset_dir', None, 'The directory where the dataset files are stored.') tf.app.flags.DEFINE_integer( 'labels_offset', 0, 'An offset for the labels in the dataset. This flag is primarily used to ' 'evaluate the VGG and ResNet architectures which do not use a background ' 'class for the ImageNet dataset.') tf.app.flags.DEFINE_string( 'model_name', 'inception_v3', 'The name of the architecture to evaluate.') tf.app.flags.DEFINE_string( 'preprocessing_name', None, 'The name of the preprocessing to use. If left ' 'as `None`, then the model_name flag is used.') tf.app.flags.DEFINE_float( 'moving_average_decay', None, 'The decay to use for the moving average.' 'If left as None, then moving averages are not used.') tf.app.flags.DEFINE_integer( 'eval_image_size', None, 'Eval image size') FLAGS = tf.app.flags.FLAGS if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') cluster_spec, server = TFNode.start_cluster_server(ctx) tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): #tf_global_step = slim.get_or_create_global_step() tf_global_step = tf.Variable(0, name="global_step") ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) #################### # Define the model # #################### logits, _ = network_fn(images) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall_5': slim.metrics.streaming_recall_at_k( logits, labels, 5), }) # Print the summaries to screen. for name, value in six.iteritems(names_to_values): summary_name = 'eval/%s' % name op = tf.summary.scalar(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=False) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) # label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size print(train_image_size) image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) images, labels = batch_queue.dequeue() print(images, labels) logits, end_points = network_fn(images) labels_to_class_names = dataset_utils.read_label_file( FLAGS.dataset_dir, filename='labels.txt') print(labels_to_class_names) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) images_np, labels_np = sess.run([images, labels]) print(images_np.shape, labels_np.shape) for i in range(10): image_np, label_np = sess.run([images, labels]) plt.imshow(image_np[0, :, :, :]) plt.title('label name:' + str(labels_to_class_names[np.argmax(label_np[0])])) plt.show() # cv2.imshow('label name:',cv2.cvtColor(image_np[0,:,:,:],cv2.COLOR_RGB2BGR)) # print(labels_to_class_names[np.argmax(label_np[0])]) # cv2.waitKey(0) coord.request_stop() coord.join(threads)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### ssd_model = ssd_vgg_300.SSDNet() ssd_model.set_batch_size(FLAGS.batch_size) network_fn = nets_factory.get_network_fn(ssd_model, is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, labels, bboxes] = provider.get(['image', 'object/label', 'object/bbox']) labels -= FLAGS.labels_offset if FLAGS.remove_difficult: difficults_gt = provider.get(['object/difficult']) else: difficults_gt = tf.zeros(tf.shape(labels), dtype=tf.int64) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name) eval_image_size_height = FLAGS.eval_image_size_height or ssd_model.ssd_params.image_size[ 0] eval_image_size_width = FLAGS.eval_image_size_width or ssd_model.ssd_params.image_size[ 1] image, labels_gt, bboxes_gt = image_preprocessing_fn( image, labels, bboxes, eval_image_size_height, eval_image_size_width, data_format=DATA_FORMAT, is_training=False) anchors = ssd_model.anchors_for_all_layer() labels_en, scores_en, bboxes_en = ssd_model.bboxes_encode( anchors, labels_gt, bboxes_gt) images, labels_gt, bboxes_gt, difficults_gt, labels_en, scores_en, bboxes_en = \ tf.train.batch( [image, labels_gt, bboxes_gt, difficults_gt,labels_en, scores_en, bboxes_en], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size, dynamic_pad=True) ################################ # SSD Model + outputs decoding # ################################ logits, locs, endpoints = network_fn(images) ssd_model.ssd_class_and_loc_losses(logits, locs, labels_en, bboxes_en, scores_en) # Performing post_processing on CPU: loop-intensive, usually more efficient. with tf.device('/device:CPU:0'): # Detect objects from SSD Model outputs locs_aggr = ssd_model.bboxes_decode(locs, anchors) scores_nms, bboxes_nms = ssd_model.detected_bboxes( logits, locs_aggr, FLAGS.select_threshold, FLAGS.nms_threshold, FLAGS.select_top_k, FLAGS.keep_top_k) num_bboxes_gt, tp, fp = bboxes_matching_batch( scores_nms.keys(), scores_nms, bboxes_nms, labels_gt, bboxes_gt, difficults_gt, matching_threshold=FLAGS.matching_threshold) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() # Define the metrics: with tf.device('/device:CPU:0'): dict_metrics = {} # First add all losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) # Extra losses as well. for loss in tf.get_collection('EXTRA_LOSSES'): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) # Add metrics to summaries and Print on screen. for name, metric in dict_metrics.items(): # summary_name = 'eval/%s' % name summary_name = name op = tf.summary.scalar(summary_name, metric[0], collections=[]) # op = tf.Print(op, [metric[0]], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # FP and TP metrics. tp_fp_metric = streaming_tp_fp_arrays(num_bboxes_gt, tp, fp, scores_nms) for c in tp_fp_metric[0].keys(): dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], tp_fp_metric[1][c]) # Add to summaries precision/recall values. aps_voc12 = {} for c in tp_fp_metric[0].keys(): # Precison and recall values. prec, rec = precision_recall(*tp_fp_metric[0][c]) # Average precision VOC12. v = average_precision_voc12(prec, rec) summary_name = 'AP_VOC12/%s' % c op = tf.summary.scalar(summary_name, v, collections=[]) # op = tf.Print(op, [v], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) aps_voc12[c] = v # Mean average precision VOC12. summary_name = 'AP_VOC12/mAP' mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12) op = tf.summary.scalar(summary_name, mAP, collections=[]) op = tf.Print(op, [mAP], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # Split into values and updates ops. names_to_values, names_to_updates = slim.metrics.aggregate_metric_map( dict_metrics) gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore)
def main(FLAGS): style_features_t = losses.get_style_features(FLAGS) # Make sure the training path exists. training_path = os.path.join(FLAGS.model_path, FLAGS.naming) if not(os.path.exists(training_path)): os.makedirs(training_path) with tf.Graph().as_default(): with tf.Session() as sess: """Build Network""" network_fn = nets_factory.get_network_fn( FLAGS.loss_model, num_classes=1, is_training=False) image_preprocessing_fn, image_unprocessing_fn = preprocessing_factory.get_preprocessing( FLAGS.loss_model, is_training=False) processed_images = reader.image(FLAGS.batch_size, FLAGS.image_size, FLAGS.image_size, 'train2014/', image_preprocessing_fn, epochs=FLAGS.epoch) generated = model.net(processed_images, training=True) processed_generated = [image_preprocessing_fn(image, FLAGS.image_size, FLAGS.image_size) for image in tf.unstack(generated, axis=0, num=FLAGS.batch_size) ] processed_generated = tf.stack(processed_generated) _, endpoints_dict = network_fn(tf.concat([processed_generated, processed_images], 0), spatial_squeeze=False) # Log the structure of loss network tf.logging.info('Loss network layers(You can define them in "content_layers" and "style_layers"):') for key in endpoints_dict: tf.logging.info(key) """Build Losses""" content_loss = losses.content_loss(endpoints_dict, FLAGS.content_layers) style_loss, style_loss_summary = losses.style_loss(endpoints_dict, style_features_t, FLAGS.style_layers) tv_loss = losses.total_variation_loss(generated) # use the unprocessed image loss = FLAGS.style_weight * style_loss + FLAGS.content_weight * content_loss + FLAGS.tv_weight * tv_loss # Add Summary for visualization in tensorboard. """Add Summary""" tf.summary.scalar('losses/content_loss', content_loss) tf.summary.scalar('losses/style_loss', style_loss) tf.summary.scalar('losses/regularizer_loss', tv_loss) tf.summary.scalar('weighted_losses/weighted_content_loss', content_loss * FLAGS.content_weight) tf.summary.scalar('weighted_losses/weighted_style_loss', style_loss * FLAGS.style_weight) tf.summary.scalar('weighted_losses/weighted_regularizer_loss', tv_loss * FLAGS.tv_weight) tf.summary.scalar('total_loss', loss) for layer in FLAGS.style_layers: tf.summary.scalar('style_losses/' + layer, style_loss_summary[layer]) tf.summary.image('generated', generated) # tf.image_summary('processed_generated', processed_generated) # May be better? tf.summary.image('origin', tf.stack([ image_unprocessing_fn(image) for image in tf.unstack(processed_images, axis=0, num=FLAGS.batch_size) ])) summary = tf.summary.merge_all() writer = tf.summary.FileWriter(training_path) """Prepare to Train""" global_step = tf.Variable(0, name="global_step", trainable=False) variable_to_train = [] for variable in tf.trainable_variables(): if not(variable.name.startswith(FLAGS.loss_model)): variable_to_train.append(variable) train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step=global_step, var_list=variable_to_train) variables_to_restore = [] for v in tf.global_variables(): if not(v.name.startswith(FLAGS.loss_model)): variables_to_restore.append(v) saver = tf.train.Saver(variables_to_restore, write_version=tf.train.SaverDef.V1) sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) # Restore variables for loss network. init_func = utils._get_init_fn(FLAGS) init_func(sess) # Restore variables for training model if the checkpoint file exists. last_file = tf.train.latest_checkpoint(training_path) if last_file: tf.logging.info('Restoring model from {}'.format(last_file)) saver.restore(sess, last_file) """Start Training""" coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) start_time = time.time() try: while not coord.should_stop(): _, loss_t, step = sess.run([train_op, loss, global_step]) elapsed_time = time.time() - start_time start_time = time.time() """logging""" # print(step) if step % 10 == 0: tf.logging.info('step: %d, total Loss %f, secs/step: %f' % (step, loss_t, elapsed_time)) """summary""" if step % 25 == 0: tf.logging.info('adding summary...') summary_str = sess.run(summary) writer.add_summary(summary_str, step) writer.flush() """checkpoint""" if step % 1000 == 0: saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt'), global_step=step) except tf.errors.OutOfRangeError: saver.save(sess, os.path.join(training_path, 'fast-style-model.ckpt-done')) tf.logging.info('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads)
def main(_): if not FLAGS.input_dir: raise ValueError( 'You must supply the input directory with --input_dir') if not FLAGS.output_dir: raise ValueError( 'You must supply the dataset directory with --output_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Preprocess the images so that they all have the same size preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size orig_image = tf.placeholder(tf.uint8, shape=(None, None, 3)) image = image_preprocessing_fn(orig_image, orig_image, eval_image_size, eval_image_size) images = tf.expand_dims(image, 0) # Add noise. noisy_batch, alpha, sigma = sensor_model.sensor_noise_rand_light_level( images, [FLAGS.ll_low, FLAGS.ll_high], scale=1.0, sensor=FLAGS.sensor) bayer_mask = sensor_model.get_bayer_mask(eval_image_size, eval_image_size) inputs = noisy_batch * bayer_mask if not os.path.isdir(FLAGS.output_dir): os.mkdir(FLAGS.output_dir) with tf.Session() as sess: count = 0 synsets = [ path for path in os.listdir(FLAGS.input_dir) if not '.' in path ] for synset in synsets: path = os.path.join(FLAGS.input_dir, synset) image_names = os.listdir(path) print("Found %d images in %s" % (len(image_names), synset)) synset_path = os.path.join(FLAGS.output_dir, synset) if not os.path.isdir(synset_path): os.mkdir(synset_path) for imagename in image_names: output_imgfn = os.path.join( FLAGS.output_dir, synset, imagename.split('.')[0] + '.png') if os.path.isfile(output_imgfn): continue loaded_image = cv2.imread(os.path.join(path, imagename)) # BGR to RGB loaded_image = loaded_image[..., ::-1] images, alpha_val, sigma_val = sess.run( [inputs, alpha, sigma], feed_dict={orig_image: loaded_image}) img = (255.0 * images[0, :, :, :]).astype(np.uint8) # RGB to BGR img = img[..., ::-1] if count % 1000 == 0: print("%d processed images." % (count)) cv2.imwrite(output_imgfn, img) count += 1 print('Total images processed:', count)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') times = {} tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): start = time.time() tf_global_step = slim.get_or_create_global_step() times['global_step'] = time.time() - start ###################### # Select the dataset # start = time.time() dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir, suffix=FLAGS.dataset_name_suffix) times['get_dataset'] = time.time() - start #################### # Select the model # #################### start = time.time() network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) times['select_model'] = time.time() - start ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## start = time.time() provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle=False, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size) times['get_provider'] = time.time() - start start = time.time() [image] = provider.get(['image']) times['get_image'] = time.time() - start ##################################### # Select the preprocessing function # ##################################### start = time.time() preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) times['get_preprocessing'] = time.time() - start eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size start = time.time() image = image_preprocessing_fn(image, eval_image_size, eval_image_size) times['preprocessing'] = time.time() - start start = time.time() images = tf.train.batch( [image], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) times['get_batch'] = time.time() - start start = time.time() tf.image_summary('test_images', images, FLAGS.batch_size) times['image_summary'] = time.time() - start #################### # Define the model # #################### start = time.time() logits, _ = network_fn(images) times['do_network'] = time.time() - start # with tf.variable_scope('resnet_v2_152/block1/unit_1/bottleneck_v2/conv1', reuse=True): # weights = tf.get_variable('weights') # kernel_transposed = put_kernels_on_grid(weights) # scale weights to [0 1], type is still float # x_min = tf.reduce_min(weights) # x_max = tf.reduce_max(weights) # kernel_0_to_1 = (weights - x_min) / (x_max - x_min) # # # to tf.image_summary format [batch_size, height, width, channels] # kernel_transposed = tf.transpose(kernel_0_to_1, [3, 0, 1, 2]) # this will display random 3 filters from the 64 in conv1 # tf.image_summary('conv1/filters', kernel_transposed, max_images=50) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() if len(logits.get_shape()) == 4: logits = tf.reshape(logits, [int(logits.get_shape()[0]), -1]) softmax = tf.nn.softmax(logits) # predictions = tf.argmax(logits, 1) # Define the metrics: # names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ # 'Predictions': predictions, # 'Predictions': slim.metrics.streaming_accuracy(predictions, labels), # 'Predictions@5': slim.metrics.streaming_recall_at_k( # logits, labels, 5), # }) # Print the summaries to screen. # for name, value in names_to_values.iteritems(): # summary_name = 'eval/%s' % name # op = tf.scalar_summary(summary_name, value, collections=[]) # op = tf.Print(op, [value], summary_name) # tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) start = time.time() if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path times['load_checkpoint'] = time.time() - start tf.logging.info('Evaluating %s' % checkpoint_path) # evaluate_loop from tensorflow.contrib.framework.python.ops import variables from tensorflow.core.protobuf import saver_pb2 from tensorflow.python.training import saver as tf_saver from tensorflow.python.framework import ops from tensorflow.python.training import supervisor saver = tf_saver.Saver( variables_to_restore or variables.get_variables_to_restore(), write_version=saver_pb2.SaverDef.V1) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=FLAGS.eval_dir, summary_op=None, summary_writer=None, global_step=None, saver=None) # init = tf.initialize_all_variables() # sess = tf.Session() with sv.managed_session(FLAGS.master, start_standard_services=False) as sess: # sess.run(init) saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) start = time.time() final_op_value = sess.run(logits) # final_op_value = slim.evaluation.evaluate_once( # master=FLAGS.master, # checkpoint_path=checkpoint_path, # logdir=FLAGS.eval_dir, # num_evals=num_batches, # final_op=[softmax, logits], # # eval_op=names_to_updates.values(), # variables_to_restore=variables_to_restore) times['exec'] = time.time() - start print(final_op_value[1].shape) result_predict = np.reshape(final_op_value[1], (FLAGS.batch_size, final_op_value[1].shape[-1])) # print(final_op_value) print(result_predict) print(np.argsort(result_predict[:, 1])[-5:]) print(times)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.DEBUG) with tf.Graph().as_default(): # Config model_deploy. Keep TF Slim Models structure. # Useful if want to need multiple GPUs and/or servers in the future. deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=0, num_replicas=1, num_ps_tasks=0) # Create global_step. with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() # Select the dataset. dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) # Get the SSD network and its anchors. ssd_class = nets_factory.get_network(FLAGS.model_name) ssd_params = ssd_class.default_params._replace(num_classes=FLAGS.num_classes) ssd_net = ssd_class(ssd_params) ssd_shape = ssd_net.params.img_shape ssd_anchors = ssd_net.anchors(ssd_shape) # Select the preprocessing function. preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) tf_utils.print_configuration(FLAGS.__flags, ssd_params, dataset.data_sources, FLAGS.train_dir) # =================================================================== # # Create a dataset provider and batches. # =================================================================== # with tf.device(deploy_config.inputs_device()): with tf.name_scope(FLAGS.dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size, shuffle=True) # Get for SSD network: image, labels, bboxes. [image, shape, glabels, gbboxes] = provider.get(['image', 'shape', 'object/label', 'object/bbox']) # Pre-processing image, labels and bboxes. image, glabels, gbboxes = \ image_preprocessing_fn(image, glabels, gbboxes, out_shape=ssd_shape, data_format=DATA_FORMAT) # Encode groundtruth labels and bboxes. gclasses, glocalisations, gscores = \ ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) batch_shape = [1] + [len(ssd_anchors)] * 3 # Training batches and queue. r = tf.train.batch( tf_utils.reshape_list([image, gclasses, glocalisations, gscores]), batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) b_image, b_gclasses, b_glocalisations, b_gscores = \ tf_utils.reshape_list(r, batch_shape) # Intermediate queueing: unique batch computation pipeline for all # GPUs running the training. batch_queue = slim.prefetch_queue.prefetch_queue( tf_utils.reshape_list([b_image, b_gclasses, b_glocalisations, b_gscores]), capacity=2 * deploy_config.num_clones) # =================================================================== # # Define the model running on every GPU. # =================================================================== # def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" # Dequeue batch. b_image, b_gclasses, b_glocalisations, b_gscores = \ tf_utils.reshape_list(batch_queue.dequeue(), batch_shape) # Construct SSD network. arg_scope = ssd_net.arg_scope(weight_decay=FLAGS.weight_decay, data_format=DATA_FORMAT) with slim.arg_scope(arg_scope): predictions, localisations, logits, end_points = \ ssd_net.net(b_image, is_training=True) # Add loss function. ssd_net.losses(logits, localisations, b_gclasses, b_glocalisations, b_gscores, match_threshold=FLAGS.match_threshold, negative_ratio=FLAGS.negative_ratio, alpha=FLAGS.loss_alpha, label_smoothing=FLAGS.label_smoothing) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # =================================================================== # # Add summaries from first clone. # =================================================================== # clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses and extra losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar(loss.op.name, loss)) for loss in tf.get_collection('EXTRA_LOSSES', first_clone_scope): summaries.add(tf.summary.scalar(loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) # =================================================================== # # Configure the moving averages. # =================================================================== # if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None # =================================================================== # # Configure the optimization procedure. # =================================================================== # with tf.device(deploy_config.optimizer_device()): learning_rate = tf_utils.configure_learning_rate(FLAGS, dataset.num_samples, global_step) optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = tf_utils.get_variables_to_train(FLAGS) # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') # =================================================================== # # Kicks off the training. # =================================================================== # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=1.0, write_version=2, pad_step_number=False) slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master='', is_chief=True, init_fn=tf_utils.get_init_fn(FLAGS), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, saver=saver, save_interval_secs=FLAGS.save_interval_secs, session_config=config, sync_optimizer=None)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True, width_multiplier=FLAGS.width_multiplier) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) # gt_bboxes format [ymin, xmin, ymax, xmax] [image, img_shape, gt_labels, gt_bboxes] = provider.get(['image', 'shape', 'object/label', 'object/bbox']) # Preprocesing # gt_bboxes = scale_bboxes(gt_bboxes, img_shape) # bboxes format [0,1) for tf draw image, gt_labels, gt_bboxes = image_preprocessing_fn(image, config.IMG_HEIGHT, config.IMG_WIDTH, labels=gt_labels, bboxes=gt_bboxes, ) ############################################# # Encode annotations for losses computation # ############################################# # anchors format [cx, cy, w, h] anchors = tf.convert_to_tensor(config.ANCHOR_SHAPE, dtype=tf.float32) # encode annos, box_input format [cx, cy, w, h] input_mask, labels_input, box_delta_input, box_input = encode_annos(gt_labels, gt_bboxes, anchors, config.NUM_CLASSES) images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input = tf.train.batch( [image, input_mask, labels_input, box_delta_input, box_input], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) batch_queue = slim.prefetch_queue.prefetch_queue( [images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input = batch_queue.dequeue() anchors = tf.convert_to_tensor(config.ANCHOR_SHAPE, dtype=tf.float32) end_points = network_fn(images) end_points["viz_images"] = images conv_ds_14 = end_points['MobileNet/conv_ds_14/depthwise_conv'] dropout = slim.dropout(conv_ds_14, keep_prob=0.5, is_training=True) num_output = config.NUM_ANCHORS * (config.NUM_CLASSES + 1 + 4) predict = slim.conv2d(dropout, num_output, kernel_size=(3, 3), stride=1, padding='SAME', activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.0001), scope="MobileNet/conv_predict") with tf.name_scope("Interpre_prediction") as scope: pred_box_delta, pred_class_probs, pred_conf, ious, det_probs, det_boxes, det_class = \ interpre_prediction(predict, b_input_mask, anchors, b_box_input) end_points["viz_det_probs"] = det_probs end_points["viz_det_boxes"] = det_boxes end_points["viz_det_class"] = det_class with tf.name_scope("Losses") as scope: losses(b_input_mask, b_labels_input, ious, b_box_delta_input, pred_class_probs, pred_conf, pred_box_delta) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: if end_point not in ["viz_images", "viz_det_probs", "viz_det_boxes", "viz_det_class"]: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for det result TODO(shizehao): vizulize prediction # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def eval(eval_op_feed_dict=None, session_config=None, max_num_batches=None, sample_percentage = None, masking_variable_value=None, compute_delta_cost_per_layer_solution=None, compute_delta_cost_per_layer_solution2=None): FLAGS.is_training=False if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') assert not ((max_num_batches is not None) and (sample_percentage is not None)), 'argument of eval max_num_batches and sample_percentage cannot be both specified' shuffle = False if sample_percentage is not None: if sample_percentage < 0.99: shuffle = True if max_num_batches is not None: shuffle = True #set the number of batches to be evalated, None for all the samples if max_num_batches is not None: FLAGS.max_num_batches=max_num_batches #tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.WARN) with tf.Graph().as_default(), tf.device('/cpu:0'): tf_global_step = tf.train.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = slim.dataset_data_provider.DatasetDataProvider( dataset, shuffle= shuffle, num_readers=32, common_queue_capacity=10 * FLAGS.batch_size, common_queue_min=3*FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) gpus = os.environ["CUDA_VISIBLE_DEVICES"].strip().split(',') if gpus == ['']: gpus = ['CPU'] else: assert all([g.isdigit() for g in gpus]), 'invalud gpu string : %s'%os.environ["CUDA_VISIBLE_DEVICES"] num_gpus = len(gpus) # Split the batch of images and labels for towers. if num_gpus == 0: num_splits = 1 else: num_splits = num_gpus assert FLAGS.batch_size % num_splits == 0, 'batch_size %d cannot be divided by num_splits %d'%(FLAGS.batch_size, num_splits) images_splits = tf.split(axis=0, num_or_size_splits=num_splits, value=images) labels_splits = tf.split(axis=0, num_or_size_splits=num_splits, value=labels) def _tower_logit(images, labels,reuse_variables=None): #################### # Define the model # #################### with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): logits, _ = network_fn(images) return logits # Calculate the gradients for each model tower. tower_loss_list = [] tower_top_1_op_list = [] tower_top_5_op_list = [] #MaskingVariableManager mvm = DnnUtili.mvm #each element is the concatenation of all the masking variables in one tower variables_list = [] for idx, gpu_id in enumerate(gpus): #with slim.arg_scope([slim.model_variable,slim.variable], device='/gpu:%s' % gpu_id): if gpu_id == 'CPU': device_string = '/cpu:0' else: device_string = '/gpu:%d' % idx with tf.device(device_string): # Force all Variables to reside on the CPU. #with slim.arg_scope([slim.model_variable,slim.variable], device='/cpu:0'): #with tf.device('/cpu:0'): # Calculate the loss for one tower of the ImageNet model. This # function constructs the entire ImageNet model but shares the # variables across all towers. logits = _tower_logit(images_splits[idx], labels_splits[idx], reuse_variables=True if idx>0 else None) #predictions = tf.argmax(logits, 1) labels = tf.squeeze(labels_splits[idx]) # Specify the loss function loss = tf.losses.sparse_softmax_cross_entropy( logits = logits, labels = labels) # Calculate predictions. top_1_op = tf.reduce_sum(tf.to_float(tf.nn.in_top_k(logits, labels, 1))) top_5_op = tf.reduce_sum(tf.to_float(tf.nn.in_top_k(logits, labels, 5))) tower_loss_list.append(loss) tower_top_1_op_list.append(top_1_op) tower_top_5_op_list.append(top_5_op) #if the mvm is not empty if not mvm.is_empty(): #test get variables #variables = slim.get_model_variables() variables = DnnUtili.mvm.get_variables() variables_list.append(variables) #num_mask_variables = DnnUtili.mvm.get_num_reduced_mask_variables() #variables_name = [str(v) for v in variables] #print(variables_name) #compute the value of the feed_dict for masking variables #only run for the first tower, and if the mvm is not empty if idx == 0 and not mvm.is_empty(): if FLAGS.only_compute_mask_variable is False: print_mvm_parameters() #print the information of the masking variables #if FLAGS.only_compute_mask_variable is False: if FLAGS.K_heuristic is None: #don't print when computing reduced index mvm.print_variable_index() assert bool(FLAGS.call_gurobi) + bool(FLAGS.solution_path is not None) + bool(FLAGS.K_heuristic is not None) <= 1, 'no more than one of these options can be true, got %s, %s, %s'%(FLAGS.call_gurobi, FLAGS.load_solution, FLAGS.K_heuristic) #if only call the compute_delta_cost_per_layer function in mvm, dump /tmp/delta_cost_per_layer.pickle if compute_delta_cost_per_layer_solution is not None: mvm.compute_delta_cost_per_layer(compute_delta_cost_per_layer_solution, compute_delta_cost_per_layer_solution2) return if FLAGS.call_gurobi: masking_variable_value = mvm.call_gurobi_miqp(hessian_pickle_path=FLAGS.hessian_pickle_path, computation_max=FLAGS.computation_max, memory_max=FLAGS.memory_max, monotonic=False, timelimit=FLAGS.timelimit) elif FLAGS.solution_path is not None: print('---Loading solution from %s'%FLAGS.solution_path) if str(FLAGS.solution_path).endswith('.pickle'): with open(FLAGS.solution_path, 'rb') as f: masking_variable_value = pickle.load(f) elif str(FLAGS.solution_path).endswith('.mat'): assert int(FLAGS.solution_random_rounding) + int(FLAGS.cross_entropy_rounding) + int(FLAGS.add_and_svd_rounding) <= 1, 'only choose one type of rounding' masking_variable_value = scipy.io.loadmat(FLAGS.solution_path)['x'] #full solution is the solution of all the variables, including the reduced variables full_solution = mvm.expand_reduced_mask_variables_np(np.squeeze(masking_variable_value), exact_size=True) mat_content = scipy.io.loadmat(FLAGS.solution_path) mat_content['full_x'] = full_solution scipy.io.savemat(FLAGS.solution_path, mat_content, do_compression=True) print('eval_functions_multi: adding full_solution to sqp_solution.mat') if FLAGS.solution_random_rounding: masking_variable_value = DnnUtili.solution_random_rounding(masking_variable_value) elif FLAGS.cross_entropy_rounding: masking_variable_value = mvm.cross_entropy_rounding(masking_variable_value, FLAGS.computation_max, FLAGS.memory_max) elif FLAGS.add_and_svd_rounding: #all the values are calculated in my_slim_layer.py when the network is being constructed masking_variable_value = None else: assert masking_variable_value.shape[1]==1, 'expected a column vector, got %s'%str(masking_variable_value.shape) masking_variable_value = np.reshape(masking_variable_value,(masking_variable_value.shape[0])) else: raise ValueError('invalid solution_path: %s'%FLAGS.solution_path) elif FLAGS.K_heuristic is not None: #use the get_mask_variable_value_using_heuristic() to decide the singular values to use using heuristic print('---Using heuristic %d in get_mask_variable_value_using_heuristic()'%(FLAGS.K_heuristic)) masking_variable_value = mvm.get_mask_variable_value_using_heuristic(FLAGS.K_heuristic, computation_max=FLAGS.computation_max, memory_max=FLAGS.memory_max, monotonic=False, timelimit=FLAGS.timelimit) #if an sqp_solution exists, and contains a x_full entry, convert the full solution according the reduced index just computed if os.path.isfile('/tmp/sqp_solution.mat'): mat_content = scipy.io.loadmat('/tmp/sqp_solution.mat') if 'full_x' in mat_content: reduced_x = mvm.reduce_mask_variables_np(np.squeeze(mat_content['full_x']), exact_size=True) mat_content['reduced_x'] = reduced_x scipy.io.savemat('/tmp/sqp_solution.mat',mat_content, do_compression=True) print('eval_functions_multi: added reduced_x to sqp_solution.mat, based on existing full_x and current reduced_index.') elif masking_variable_value is not None: print('---Using masking variable solution from argument') else: print('---!!! No approximation is specified, all mask are enabled, no approximation to the network') masking_variable_value = np.zeros([mvm.get_num_mask_variables()],dtype=np.float32) #save the computation and memory cost coefficients to a pickle mvm.save_coefficients_to_pickle() if FLAGS.only_compute_mask_variable: assert FLAGS.call_gurobi or FLAGS.K_heuristic is not None, 'should be computing a mask variable solution' mvm.save_variable_index_to_pickle() print('---mask_variable solution computed.') return masking_variable_value_dict = mvm.get_variable_to_value_dict(masking_variable_value) if FLAGS.add_and_svd_rounding: #all the values are calculated in my_slim_layer.py when the network is being constructed masking_variable_value_dict = dict() #DEBUG #DnnUtili.mvm.print_variable_index() #DnnUtili.mvm.print_solution(masking_variable_value) #print('-----Total computation cost: %s'%mvm.get_total_computation_cost()) #print('-----Total memory cost: %s'%mvm.get_total_memory_cost()) if not FLAGS.add_and_svd_rounding: computation_percentage, memory_percentage = mvm.calculate_percentage_computation_memory_cost(masking_variable_value) else: computation_percentage, memory_percentage = -1,-1 #end MaskingVariableManager ##clear the masking variable manager, but not for the last gpu, so we still have a copy of the masking variables if idx != num_gpus-1: DnnUtili.mvm.__init__() with slim.arg_scope([slim.model_variable,slim.variable], device='/cpu:0'): loss_op = tf.reduce_sum(tower_loss_list) top_1_op = tf.reduce_sum(tower_top_1_op_list) top_5_op = tf.reduce_sum(tower_top_5_op_list) if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() #print('eval_functions_multi: %s'%variables_to_restore) #find the absolute path of the checkpoint file and restore weights if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) #compute the number of iterations # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches elif sample_percentage: assert 0<sample_percentage <= 1, 'invalid sample_percentage %s'%sample_percentage num_batches = math.ceil(dataset.num_samples*sample_percentage / float(FLAGS.batch_size)) else: # This ensures that we make a single pass over all of the data. num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) assert num_batches > 4, 'only evaluate so few batches? num_batches = %d'%num_batches #the number of samples that are actually evaluated total_sample_count = num_batches * FLAGS.batch_size #duplicate the value of the masking variables to each tower if not mvm.is_empty(): #at this point, masking_variable_value_dict is computed duplicated_masking_variable_value_dict = copy.copy(masking_variable_value_dict) masking_variable_value_dict_values = list(masking_variable_value_dict.values()) for i, variables in enumerate(variables_list): if i == 0: continue for j, var in enumerate(variables): duplicated_masking_variable_value_dict[var] = masking_variable_value_dict_values[j] #save a dict mapping from the name of the variable to its values name_value_dict = OrderedDict() for var,value in masking_variable_value_dict.items(): name_value_dict[var.op.name] = value with open('/tmp/mask_variable_value_dict.pickle', 'wb') as f: pickle.dump(name_value_dict, f, protocol=-1) masking_variable_value_dict = duplicated_masking_variable_value_dict print_eval_parameters() assert eval_op_feed_dict is None, 'because the feed_dict has to be duplicated for each tower for the masking variables, this is not implemented yet' if not mvm.is_empty(): eval_op_feed_dict = masking_variable_value_dict ##merge mask variable values with the eval_op_feed_dict argument #if eval_op_feed_dict: # eval_op_feed_dict = masking_variable_value_dict #else: # raise NotImplementedError('because the feed_dict has to be duplicated for each tower for the masking variables, this is not implemented yet') # eval_op_feed_dict = {**eval_op_feed_dict, **masking_variable_value_dict} #start a session sess = tf.Session(config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=False)) init_op = tf.global_variables_initializer() #do not need to run init_op because the weights will be restored using saver? #sess.run(init_op) saver = tf.train.Saver(variables_to_restore) saver.restore(sess, checkpoint_path) loss_sum = np.longdouble(0.0) top_1_sum = np.longdouble(0.0) top_5_sum = np.longdouble(0.0) tf_run_start = time.time() #sess.run, the code will halt and produce no result with slim.queues.QueueRunners(sess): for i in range(num_batches): #print('starting iteration %d at %s '%(i, datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) #iteration_start_time = time.time() loss_np, top_1_op_np, top_5_op_np = sess.run([loss_op, top_1_op, top_5_op], feed_dict=eval_op_feed_dict) loss_sum += np.longdouble(loss_np) top_1_sum += np.longdouble(top_1_op_np) top_5_sum += np.longdouble(top_5_op_np) tf_run_time = time.time() - tf_run_start loss = loss_sum/np.longdouble(num_batches)/num_gpus top_1 = top_1_sum/np.longdouble(total_sample_count) top_5 = top_5_sum/np.longdouble(total_sample_count) sess.close() #accuracy = slim.evaluation.evaluate_once( # master=FLAGS.master, # checkpoint_path=checkpoint_path, # logdir=FLAGS.eval_dir, # num_evals=num_batches, # #Chong edited # #initial_op=None, # #initial_op_feed_dict=init_op_feed_dict, # eval_op=list(names_to_updates.values()), # eval_op_feed_dict = eval_op_feed_dict, # final_op=final_op, # final_op_feed_dict=None, # session_config = session_config, # variables_to_restore=variables_to_restore) #delete the eval directory, so the summary files do not accmulate shutil.rmtree(FLAGS.eval_dir, ignore_errors=True) results = OrderedDict() results['accuracy'] = top_1 results['accuracy_5'] = top_5 results['loss'] = loss if FLAGS.add_and_svd_rounding: computation_percentage, memory_percentage = DnnUtili.calculate_percentage_add_and_svd(FLAGS.computation_max, FLAGS.memory_max) try: results['computation_cost'] = computation_percentage results['memory_cost'] = memory_percentage except NameError: results['computation_cost'] = -1 results['memory_cost'] = -1 results['tf_run_time'] = tf_run_time #print('eval_functions_multi: tf.run() time: %.1f'%tf_run_time) return results
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() # =================================================================== # # Dataset + SSD model + Pre-processing # =================================================================== # dataset = dataset_factory.get_dataset(FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) # Get the SSD network and its anchors. ssd_class = nets_factory.get_network(FLAGS.model_name) ssd_params = ssd_class.default_params._replace( num_classes=FLAGS.num_classes) ssd_net = ssd_class(ssd_params) # Evaluation shape and associated anchors: eval_image_size ssd_shape = ssd_net.params.img_shape ssd_anchors = ssd_net.anchors(ssd_shape) # Select the preprocessing function. preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) tf_utils.print_configuration(FLAGS.__flags, ssd_params, dataset.data_sources, FLAGS.eval_dir) # =================================================================== # # Create a dataset provider and batches. # =================================================================== # with tf.device('/cpu:0'): with tf.name_scope(FLAGS.dataset_name + '_data_provider'): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size, shuffle=False) # Get for SSD network: image, labels, bboxes. [image, shape, glabels, gbboxes] = provider.get( ['image', 'shape', 'object/label', 'object/bbox']) if FLAGS.remove_difficult: [gdifficults] = provider.get(['object/difficult']) else: gdifficults = tf.zeros(tf.shape(glabels), dtype=tf.int64) # Pre-processing image, labels and bboxes. image, glabels, gbboxes, gbbox_img = \ image_preprocessing_fn(image, glabels, gbboxes, out_shape=ssd_shape, data_format=DATA_FORMAT, resize=FLAGS.eval_resize, difficults=None) # Encode groundtruth labels and bboxes. gclasses, glocalisations, gscores = \ ssd_net.bboxes_encode(glabels, gbboxes, ssd_anchors) batch_shape = [1] * 5 + [len(ssd_anchors)] * 3 # Evaluation batch. r = tf.train.batch(tf_utils.reshape_list([ image, glabels, gbboxes, gdifficults, gbbox_img, gclasses, glocalisations, gscores ]), batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size, dynamic_pad=True) (b_image, b_glabels, b_gbboxes, b_gdifficults, b_gbbox_img, b_gclasses, b_glocalisations, b_gscores) = tf_utils.reshape_list(r, batch_shape) # =================================================================== # # SSD Network + Ouputs decoding. # =================================================================== # dict_metrics = {} arg_scope = ssd_net.arg_scope(data_format=DATA_FORMAT) with slim.arg_scope(arg_scope): predictions, localisations, logits, end_points = \ ssd_net.net(b_image, is_training=False) # Add losses functions. ssd_net.losses(logits, localisations, b_gclasses, b_glocalisations, b_gscores) # Performing post-processing on CPU: loop-intensive, usually more efficient. with tf.device('/device:CPU:0'): # Detected objects from SSD output. localisations = ssd_net.bboxes_decode(localisations, ssd_anchors) rscores, rbboxes = \ ssd_net.detected_bboxes(predictions, localisations, select_threshold=FLAGS.select_threshold, nms_threshold=FLAGS.nms_threshold, clipping_bbox=None, top_k=FLAGS.select_top_k, keep_top_k=FLAGS.keep_top_k) # Compute TP and FP statistics. num_gbboxes, tp, fp, rscores = \ tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes, b_glabels, b_gbboxes, b_gdifficults, matching_threshold=FLAGS.matching_threshold) # Variables to restore: moving avg. or normal weights. if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() # =================================================================== # # Evaluation metrics. # =================================================================== # with tf.device('/device:CPU:0'): dict_metrics = {} # First add all losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) # Extra losses as well. for loss in tf.get_collection('EXTRA_LOSSES'): dict_metrics[loss.op.name] = slim.metrics.streaming_mean(loss) # Add metrics to summaries and Print on screen. for name, metric in dict_metrics.items(): # summary_name = 'eval/%s' % name summary_name = name op = tf.summary.scalar(summary_name, metric[0], collections=[]) # op = tf.Print(op, [metric[0]], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # FP and TP metrics. tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores) for c in tp_fp_metric[0].keys(): dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c], tp_fp_metric[1][c]) # Add to summaries precision/recall values. aps_voc07 = {} aps_voc12 = {} for c in tp_fp_metric[0].keys(): # Precison and recall values. prec, rec = tfe.precision_recall(*tp_fp_metric[0][c]) # Average precision VOC07. v = tfe.average_precision_voc07(prec, rec) summary_name = 'AP_VOC07/%s' % c op = tf.summary.scalar(summary_name, v, collections=[]) # op = tf.Print(op, [v], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) aps_voc07[c] = v # Average precision VOC12. v = tfe.average_precision_voc12(prec, rec) summary_name = 'AP_VOC12/%s' % c op = tf.summary.scalar(summary_name, v, collections=[]) # op = tf.Print(op, [v], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) aps_voc12[c] = v # Mean average precision VOC07. summary_name = 'AP_VOC07/mAP' mAP = tf.add_n(list(aps_voc07.values())) / len(aps_voc07) op = tf.summary.scalar(summary_name, mAP, collections=[]) op = tf.Print(op, [mAP], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # Mean average precision VOC12. summary_name = 'AP_VOC12/mAP' mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12) op = tf.summary.scalar(summary_name, mAP, collections=[]) op = tf.Print(op, [mAP], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # for i, v in enumerate(l_precisions): # summary_name = 'eval/precision_at_recall_%.2f' % LIST_RECALLS[i] # op = tf.summary.scalar(summary_name, v, collections=[]) # op = tf.Print(op, [v], summary_name) # tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # Split into values and updates ops. names_to_values, names_to_updates = slim.metrics.aggregate_metric_map( dict_metrics) # =================================================================== # # Evaluation loop. # =================================================================== # gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory_fraction) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) # config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 # Number of batches... if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: num_batches = math.ceil(dataset.num_samples / float(FLAGS.batch_size)) if not FLAGS.wait_for_checkpoints: if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint( FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) # Standard evaluation loop. start = time.time() slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=flatten(list(names_to_updates.values())), variables_to_restore=variables_to_restore, session_config=config) # Log time spent. elapsed = time.time() elapsed = elapsed - start print('Time spent : %.3f seconds.' % elapsed) print('Time spent per BATCH: %.3f seconds.' % (elapsed / num_batches)) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) # Waiting loop. slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=list(names_to_updates.values()), variables_to_restore=variables_to_restore, eval_interval_secs=60, max_number_of_evaluations=np.inf, session_config=config, timeout=None)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpus if FLAGS.num_clones == -1: FLAGS.num_clones = len(FLAGS.gpus.split(',')) tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # tf.set_random_seed(42) tf.set_random_seed(0) ###################### # Config model_deploy# ###################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir.split(','), dataset_list_dir=FLAGS.dataset_list_dir, num_samples=FLAGS.frames_per_video, modality=FLAGS.modality, split_id=FLAGS.split_id) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), batch_size=FLAGS.batch_size, weight_decay=FLAGS.weight_decay, is_training=True, dropout_keep_prob=(1.0-FLAGS.dropout), pooled_dropout_keep_prob=(1.0-FLAGS.pooled_dropout), batch_norm=FLAGS.netvlad_batch_norm) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) # in case of pooling images, # now preprocessing is done video-level ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size, bgr_flips=FLAGS.bgr_flip) [image, label] = provider.get(['image', 'label']) # now note that the above image might be a 23 channel image if you have # both RGB and flow streams. It will need to split later, but all the # preprocessing will be done consistently for all frames over all streams label = tf.string_to_number(label, tf.int32) label.set_shape(()) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size scale_ratios=[float(el) for el in FLAGS.scale_ratios.split(',')], image = image_preprocessing_fn(image, train_image_size, train_image_size, scale_ratios=scale_ratios, out_dim_scale=FLAGS.out_dim_scale, model_name=FLAGS.model_name) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) if FLAGS.debug: images = tf.Print(images, [labels], 'Read batch') labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) summarize_images(images, provider.num_channels_stream) #################### # Define the model # #################### kwargs = {} if FLAGS.conv_endpoint is not None: kwargs['conv_endpoint'] = FLAGS.conv_endpoint def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, labels = batch_queue.dequeue() logits, end_points = network_fn( images, pool_type=FLAGS.pooling, classifier_type=FLAGS.classifier_type, num_channels_stream=provider.num_channels_stream, netvlad_centers=FLAGS.netvlad_initCenters.split(','), stream_pool_type=FLAGS.stream_pool_type, **kwargs) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: slim.losses.softmax_cross_entropy( end_points['AuxLogits'], labels, label_smoothing=FLAGS.label_smoothing, weight=0.4, scope='aux_loss') slim.losses.softmax_cross_entropy( logits, labels, label_smoothing=FLAGS.label_smoothing, weight=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. global end_points_debug end_points = clones[0].outputs end_points_debug = dict(end_points) end_points_debug['images'] = images end_points_debug['labels'] = labels for end_point in end_points: x = end_points[end_point] summaries.add(tf.histogram_summary('activations/' + end_point, x)) summaries.add(tf.scalar_summary('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.histogram_summary(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.scalar_summary('learning_rate', learning_rate, name='learning_rate')) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() logging.info('Training the following variables: %s' % ( ' '.join([el.name for el in variables_to_train]))) # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # clip the gradients if needed if FLAGS.clip_gradients > 0: logging.info('Clipping gradients by %f' % FLAGS.clip_gradients) with tf.name_scope('clip_gradients'): clones_gradients = slim.learning.clip_gradient_norms( clones_gradients, FLAGS.clip_gradients) # Add total_loss to summary. summaries.add(tf.scalar_summary('total_loss', total_loss, name='total_loss')) # Create gradient updates. train_ops = {} if FLAGS.iter_size == 1: grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') train_ops = train_tensor else: gvs = [(grad, var) for grad, var in clones_gradients] varnames = [var.name for grad, var in gvs] varname_to_var = {var.name: var for grad, var in gvs} varname_to_grad = {var.name: grad for grad, var in gvs} varname_to_ref_grad = {} for vn in varnames: grad = varname_to_grad[vn] print("accumulating ... ", (vn, grad.get_shape())) with tf.variable_scope("ref_grad"): with tf.device(deploy_config.variables_device()): ref_var = slim.local_variable( np.zeros(grad.get_shape(),dtype=np.float32), name=vn[:-2]) varname_to_ref_grad[vn] = ref_var all_assign_ref_op = [ref.assign(varname_to_grad[vn]) for vn, ref in varname_to_ref_grad.items()] all_assign_add_ref_op = [ref.assign_add(varname_to_grad[vn]) for vn, ref in varname_to_ref_grad.items()] assign_gradients_ref_op = tf.group(*all_assign_ref_op) accmulate_gradients_op = tf.group(*all_assign_add_ref_op) with tf.control_dependencies([accmulate_gradients_op]): final_gvs = [(varname_to_ref_grad[var.name] / float(FLAGS.iter_size), var) for grad, var in gvs] apply_gradients_op = optimizer.apply_gradients(final_gvs, global_step=global_step) update_ops.append(apply_gradients_op) update_op = tf.group(*update_ops) train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op') for i in range(FLAGS.iter_size): if i == 0: train_ops[i] = assign_gradients_ref_op elif i < FLAGS.iter_size - 1: # because apply_gradients also computes # (see control_dependency), so # no need of running an extra iteration train_ops[i] = accmulate_gradients_op else: train_ops[i] = train_tensor # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.merge_summary(list(summaries), name='summary_op') config = tf.ConfigProto() config.gpu_options.allow_growth = True config.intra_op_parallelism_threads = FLAGS.cpu_threads # config.allow_soft_placement = True # config.gpu_options.per_process_gpu_memory_fraction=0.7 ########################### # Kicks off the training. # ########################### logging.info('RUNNING ON SPLIT %d' % FLAGS.split_id) slim.learning.train( train_ops, train_step_fn=train_step, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None, session_config=config)
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) #################### # Get the label map # #################### PATH_TO_LABELS = os.path.join(FLAGS.dataset_dir, 'labels.txt') category_index = {} categories = [] label_map = open(PATH_TO_LABELS, 'r', encoding='utf-8') for line in label_map: cat = {} id = line.strip().split(":")[0] name = line.strip().split(":")[1] cat['id'] = int(id) cat['name'] = name category_index[int(id)] = cat categories.append(cat) #################### # Get train data # #################### filename_queue = tf.train.string_input_producer([ FLAGS.dataset_dir + '/pj_vehicle_validation_0000%d-of-00004.tfrecord' % i for i in range(0, 4) ], ) reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) features = tf.parse_single_example(serialized_example, features={ 'image/class/label': tf.FixedLenFeature([], tf.int64), 'image/encoded': tf.FixedLenFeature([], tf.string), }) image = features['image/encoded'] label = features['image/class/label'] graph = tf.Graph().as_default() #################### # Select the model # #################### network_fn = nets_factory.get_network_fn(FLAGS.model_name, num_classes=NUM_CLASSES, is_training=False) if hasattr(network_fn, 'default_image_size'): image_size = network_fn.default_image_size else: image_size = FLAGS.default_image_size ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) image_processed = tf.image.decode_jpeg(image, channels=3) image_processed = image_preprocessing_fn(image_processed, image_size, image_size) images_processed, images, labels = tf.train.batch( [image_processed, image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = tf.squeeze(labels) #################### # Define the model # ##################### logits, end_points = network_fn(images_processed) checkpoint_path = FLAGS.checkpoint_path variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(variables_to_restore) sess = tf.Session() saver.restore(sess, checkpoint_path) #evaluator = object_detection_evaluation.ObjectDetectionEvaluator(categories, matching_iou_threshold=0.5) iou = [] with sess: coord = tf.train.Coordinator() # 创建一个协调器,管理线程 threads = tf.train.start_queue_runners( coord=coord) # 启动QueueRunner, 此时文件名队列已经进队。 for i in range(FLAGS.max_num_batches): images_, labels_, logits_, end_points_ = sess.run( [images, labels, logits, end_points]) for j in range(FLAGS.batch_size): idx = i * FLAGS.batch_size + j if j == 0: image_ = images_[j] image_ = Image.open(BytesIO(image_), 'r') image_.save( os.path.join( FLAGS.output_file, 'test_{0}_label_{1}.jpg'.format(i, labels_[j]))) image_np = np.array(image_) logit_value = logits_[j] feature_maps_A = end_points_['features_A'][j] softmax = np.exp(logit_value) / np.sum(np.exp(logit_value), axis=0) n_top = 1 predictions = np.argsort(-logit_value)[:n_top] scores = -np.sort(-softmax)[:n_top] print(predictions) print(scores) print(labels_[j]) # 生成heatmap cam_A = cam_utils.CAMmap(feature_maps_A, logit_value, n_top) cam_B = cam_utils.CAMmap(feature_maps_B, logit_value, n_top) for k in range(n_top): fm_a = cam_A[:, :, k] cam_A[:, :, k] = (fm_a - fm_a.min()) / (fm_a.max() - fm_a.min()) fm_b = cam_B[:, :, k] cam_B[:, :, k] = (fm_b - fm_b.min()) / (fm_b.max() - fm_b.min()) cam = np.maximum(cam_A, cam_B) im_height = image_np.shape[0] im_width = image_np.shape[1] # 保存heatmap cam_resize = np.zeros((im_height, im_width, n_top)) for k in range(n_top): heatmap_resize = Image.fromarray(cam[:, :, k]).resize( (im_width, im_height), Image.BILINEAR) cam_resize[:, :, k] = np.array(heatmap_resize) heatmap = cam_utils.grey2rainbow(cam_resize[:, :, k] * 255) heatmap = Image.fromarray(heatmap) heatmap.save( os.path.join( FLAGS.output_file, 'test_{0}_heatmap_{1}.jpg'.format(i, k))) # 生成bounding_boxes threshold = 0.75 boxes = cam_utils.bounding_box(cam_resize, threshold) # 输出检测结果 vis_util.visualize_boxes_and_labels_on_image_array( image_np, boxes, predictions.astype(np.int32), scores, category_index, use_normalized_coordinates=True, min_score_thresh=0.001, line_thickness=5) plt.imsave( os.path.join(FLAGS.output_file, 'test_{0}_output.jpg'.format(i)), image_np) # 计算评价指标 annotations_dir = os.path.join(FLAGS.dataset_dir, 'test_data/annotations') boxes_, classes_ = cam_utils.get_boxes(annotations_dir, i) for k in range(boxes.shape[0]): boxes[k, 0] = boxes[k, 0] * im_height boxes[k, 1] = boxes[k, 1] * im_width boxes[k, 2] = boxes[k, 2] * im_height boxes[k, 3] = boxes[k, 3] * im_width if predictions[0] == labels_[j]: iou_ = np_box_ops.iou(boxes, boxes_)[0][0] iou.append(iou_) ''' result_dict = {} result_dict[fields.InputDataFields.groundtruth_boxes] = boxes_ result_dict[fields.InputDataFields.groundtruth_classes] = classes_ result_dict[fields.DetectionResultFields.detection_boxes] = boxes result_dict[fields.DetectionResultFields.detection_scores] = scores result_dict[fields.DetectionResultFields.detection_classes] = classes evaluator.add_single_ground_truth_image_info(image_id=i, groundtruth_dict=result_dict) evaluator.add_single_detected_image_info(image_id=i, detections_dict=result_dict) metrics = evaluator.evaluate() for key in metrics: print(metrics[key]) ''' mean_iou = np.array(iou).mean() print(mean_iou) coord.request_stop() coord.join(threads)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): ####################### # Config model_deploy # ####################### deploy_config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.worker_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Create global_step with tf.device(deploy_config.variables_device()): global_step = slim.create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir) ###################### # Select the network # ###################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), weight_decay=FLAGS.weight_decay, is_training=True) ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=True) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## with tf.device(deploy_config.inputs_device()): provider = slim.dataset_data_provider.DatasetDataProvider( dataset, num_readers=FLAGS.num_readers, common_queue_capacity=20 * FLAGS.batch_size, common_queue_min=10 * FLAGS.batch_size) [image, label] = provider.get(['image', 'label']) label -= FLAGS.labels_offset train_image_size = FLAGS.train_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, train_image_size, train_image_size) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) labels = slim.one_hot_encoding( labels, dataset.num_classes - FLAGS.labels_offset) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * deploy_config.num_clones) #################### # Define the model # #################### def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" with tf.device(deploy_config.inputs_device()): images, labels = batch_queue.dequeue() logits, end_points = network_fn(images) ############################# # Specify the loss function # ############################# if 'AuxLogits' in end_points: tf.losses.softmax_cross_entropy( logits=end_points['AuxLogits'], onehot_labels=labels, label_smoothing=FLAGS.label_smoothing, weights=0.4, scope='aux_loss') tf.losses.softmax_cross_entropy( logits=logits, onehot_labels=labels, label_smoothing=FLAGS.label_smoothing, weights=1.0) return end_points # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) clones = model_deploy.create_clones(deploy_config, clone_fn, [batch_queue]) first_clone_scope = deploy_config.clone_scope(0) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by network_fn. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Add summaries for end_points. end_points = clones[0].outputs for end_point in end_points: x = end_points[end_point] summaries.add(tf.summary.histogram('activations/' + end_point, x)) summaries.add(tf.summary.scalar('sparsity/' + end_point, tf.nn.zero_fraction(x))) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ################################# # Configure the moving averages # ################################# if FLAGS.moving_average_decay: moving_average_variables = slim.get_model_variables() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) else: moving_average_variables, variable_averages = None, None ######################################### # Configure the optimization procedure. # ######################################### with tf.device(deploy_config.optimizer_device()): learning_rate = _configure_learning_rate(dataset.num_samples, global_step) optimizer = _configure_optimizer(learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.sync_replicas: # If sync_replicas is enabled, the averaging will be done in the chief # queue runner. optimizer = tf.train.SyncReplicasOptimizer( opt=optimizer, replicas_to_aggregate=FLAGS.replicas_to_aggregate, variable_averages=variable_averages, variables_to_average=moving_average_variables, replica_id=tf.constant(FLAGS.task, tf.int32, shape=()), total_num_replicas=FLAGS.worker_replicas) elif FLAGS.moving_average_decay: # Update ops executed locally by trainer. update_ops.append(variable_averages.apply(moving_average_variables)) # Variables to train. variables_to_train = _get_variables_to_train() # and returns a train_tensor and summary_op total_loss, clones_gradients = model_deploy.optimize_clones( clones, optimizer, var_list=variables_to_train) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) # Create gradient updates. grad_updates = optimizer.apply_gradients(clones_gradients, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries), name='summary_op') ########################### # Kicks off the training. # ########################### slim.learning.train( train_tensor, logdir=FLAGS.train_dir, master=FLAGS.master, is_chief=(FLAGS.task == 0), init_fn=_get_init_fn(), summary_op=summary_op, number_of_steps=FLAGS.max_number_of_steps, log_every_n_steps=FLAGS.log_every_n_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs, sync_optimizer=optimizer if FLAGS.sync_replicas else None)
def main(_): if not FLAGS.dataset_dir: raise ValueError('You must supply the dataset directory with --dataset_dir') if not os.path.isfile(FLAGS.checkpoint_path): FLAGS.eval_dir = os.path.join(FLAGS.checkpoint_path, 'eval') else: FLAGS.eval_dir = os.path.join( os.path.dirname(FLAGS.checkpoint_path), 'eval') try: os.makedirs(FLAGS.eval_dir) except OSError: pass tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): tf_global_step = slim.get_or_create_global_step() ###################### # Select the dataset # ###################### dataset = dataset_factory.get_dataset( FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.dataset_dir.split(','), FLAGS.dataset_list_dir, num_samples=FLAGS.frames_per_video, modality=FLAGS.modality, split_id=FLAGS.split_id) #################### # Select the model # #################### network_fn = nets_factory.get_network_fn( FLAGS.model_name, num_classes=(dataset.num_classes - FLAGS.labels_offset), batch_size=FLAGS.batch_size, is_training=False) ############################################################## # Create a dataset provider that loads data from the dataset # ############################################################## provider = dataset_data_provider.DatasetDataProvider( dataset, shuffle=FLAGS.force_random_shuffle, common_queue_capacity=2 * FLAGS.batch_size, common_queue_min=FLAGS.batch_size, bgr_flips=FLAGS.bgr_flip) [image, label] = provider.get(['image', 'label']) label = tf.cast(tf.string_to_number(label, tf.int32), tf.int64) label.set_shape(()) label -= FLAGS.labels_offset ##################################### # Select the preprocessing function # ##################################### preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name image_preprocessing_fn = preprocessing_factory.get_preprocessing( preprocessing_name, is_training=False) eval_image_size = FLAGS.eval_image_size or network_fn.default_image_size image = image_preprocessing_fn(image, eval_image_size, eval_image_size, model_name=FLAGS.model_name, ncrops=FLAGS.ncrops, out_dim_scale=FLAGS.out_dim_scale) images, labels = tf.train.batch( [image, label], batch_size=FLAGS.batch_size, num_threads=1 if FLAGS.store_feat is not None else FLAGS.num_preprocessing_threads, capacity=5 * FLAGS.batch_size) #################### # Define the model # #################### kwargs = {} if FLAGS.conv_endpoint is not None: kwargs['conv_endpoint'] = FLAGS.conv_endpoint logits, end_points = network_fn( images, pool_type=FLAGS.pooling, classifier_type=FLAGS.classifier_type, num_channels_stream=provider.num_channels_stream, netvlad_centers=FLAGS.netvlad_initCenters.split(','), stream_pool_type=FLAGS.stream_pool_type, **kwargs) end_points['images'] = images end_points['labels'] = labels if FLAGS.moving_average_decay: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, tf_global_step) variables_to_restore = variable_averages.variables_to_restore( slim.get_model_variables()) variables_to_restore[tf_global_step.op.name] = tf_global_step else: variables_to_restore = slim.get_variables_to_restore() predictions = tf.argmax(logits, 1) # rgirdhar: Because of the following, can't use with batch_size=1 if FLAGS.batch_size > 1: labels = tf.squeeze(labels) # Define the metrics: names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({ 'Accuracy': slim.metrics.streaming_accuracy(predictions, labels), 'Recall@5': slim.metrics.streaming_recall_at_k( logits, labels, 5), }) # Print the summaries to screen. for name, value in names_to_values.iteritems(): summary_name = 'eval/%s' % name op = tf.scalar_summary(summary_name, value, collections=[]) op = tf.Print(op, [value], summary_name) tf.add_to_collection(tf.GraphKeys.SUMMARIES, op) # TODO(sguada) use num_epochs=1 if FLAGS.max_num_batches: num_batches = FLAGS.max_num_batches else: # This ensures that we make a single pass over all of the data. num_batches = int(math.ceil(dataset.num_samples / float(FLAGS.batch_size))) if tf.gfile.IsDirectory(FLAGS.checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) else: checkpoint_path = FLAGS.checkpoint_path tf.logging.info('Evaluating %s' % checkpoint_path) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True if FLAGS.store_feat is not None: assert(FLAGS.store_feat_path is not None) from tensorflow.python.training import supervisor from tensorflow.python.framework import ops import h5py saver = tf.train.Saver(variables_to_restore) sv = supervisor.Supervisor(graph=ops.get_default_graph(), logdir=None, summary_op=None, summary_writer=None, global_step=None, saver=None) ept_names_to_store = FLAGS.store_feat.split(',') try: ept_to_store = [end_points[el] for el in ept_names_to_store] except: logging.error('Endpoint not found') logging.error('Choose from %s' % ','.join(end_points.keys())) raise KeyError() res = dict([(epname, []) for epname in ept_names_to_store]) with sv.managed_session( FLAGS.master, start_standard_services=False, config=config) as sess: saver.restore(sess, checkpoint_path) sv.start_queue_runners(sess) for j in range(num_batches): if j % 10 == 0: logging.info('Doing batch %d/%d' % (j, num_batches)) feats = sess.run(ept_to_store) for eid, epname in enumerate(ept_names_to_store): res[epname].append(feats[eid]) logging.info('Writing out features to %s' % FLAGS.store_feat_path) with h5py.File(FLAGS.store_feat_path, 'w') as fout: for epname in res.keys(): fout.create_dataset(epname, data=np.concatenate(res[epname], axis=0), compression='gzip', compression_opts=FLAGS.feat_store_compression_opt) else: slim.evaluation.evaluate_once( master=FLAGS.master, checkpoint_path=checkpoint_path, logdir=FLAGS.eval_dir, num_evals=num_batches, eval_op=names_to_updates.values(), variables_to_restore=variables_to_restore, session_config=config)