def __init__(self, model_def_file, class_labels_file): logging.info('Loading net and associated files...') with tf.Graph().as_default(), tf.device('cpu:0'): self.sess = tf.Session() self.image_buffer = tf.placeholder(tf.string) image = tf.image.decode_jpeg(self.image_buffer, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = self.eval_image(image, 299, 299) image = tf.sub(image, 0.5) image = tf.mul(image, 2.0) images = tf.expand_dims(image, 0) # Run inference. logits, predictions = inception_model.inference( images, NUM_CLASSES + 1) # Transform output to topK result. self.values, self.indices = tf.nn.top_k( predictions, NUM_TOP_CLASSES) variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() tf.initialize_all_variables().run(session=self.sess) tf.initialize_local_variables().run(session=self.sess) saver = tf.train.Saver(variables_to_restore) saver.restore(self.sess, model_def_file) # Required to get the filename matching to run. self.label_names = ['none'] with open(class_labels_file) as f: for line in f.read().decode("utf-8").splitlines(): self.label_names.append(line)
def _tower_loss(images, labels, num_classes, scope, reuse_variables=None): """Calculate the total loss on a single tower running the ImageNet model. We perform 'batch splitting'. This means that we cut up a batch across multiple GPU's. For instance, if the batch size = 32 and num_gpus = 2, then each tower will operate on an batch of 16 images. Args: images: Images. 4D tensor of size [batch_size, FLAGS.image_size, FLAGS.image_size, 3]. labels: 1-D integer Tensor of [batch_size]. num_classes: number of classes scope: unique prefix string identifying the ImageNet tower, e.g. 'tower_0'. Returns: Tensor of shape [] containing the total loss for a batch of data """ # When fine-tuning a model, we do not restore the logits but instead we # randomly initialize the logits. The number of classes in the output of the # logit is the number of classes in specified Dataset. restore_logits = not FLAGS.fine_tune # Build inference Graph. with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): logits = inception.inference(images, num_classes, for_training=True, restore_logits=restore_logits, scope=scope) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. split_batch_size = images.get_shape().as_list()[0] inception.loss(logits, labels, batch_size=split_batch_size) # Assemble all of the losses for the current tower only. losses = tf.get_collection(slim.losses.LOSSES_COLLECTION, scope) # Calculate the total loss for the current tower. regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses + regularization_losses, name='total_loss') # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summmary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on TensorBoard. loss_name = re.sub('%s_[0-9]*/' % inception.TOWER_NAME, '', l.op.name) # Name each loss as '(raw)' and name the moving average version of the loss # as the original loss name. tf.scalar_summary(loss_name +' (raw)', l) tf.scalar_summary(loss_name, loss_averages.average(l)) with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) return total_loss
def build_graph(self, for_training=True): print("Building graph...") self.add_image_pre_processing() self.add_image_distortion() extra = self.add_meta_nn() self.logits = inception.inference(self.inception_input, self.class_count, extra_to_last_layer=extra, for_training=for_training, restore_logits=not for_training) self.add_train_step() self.add_result_ops()
def evaluate(dataset): """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Get images and labels from the dataset. images, labels, all_filenames, filename_queue = image_processing.inputs(dataset) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class num_classes = dataset.num_classes() + 1 print("there are %d classes!" % dataset.num_classes()) # Build a Graph that computes the logits predictions from the # inference model. logits, _, end_points, net2048, sel_end_points = inception.inference(images, num_classes) # Calculate predictions. #max_percent = tf.argmax(logits,1) #max_percent = tf.reduce_max(logits, reduction_indices=[1]) / tf.add_n(logits) max_percent = end_points['predictions'] # max_percent = len(end_points) #for kk in range(len(labels)): # #max_percent.append(end_points['predictions'][kk][labels[kk]]) # max_percent.append(labels[kk]) if FLAGS.mode == '0_softmax': top_1_op = tf.nn.in_top_k(logits, labels, 1) top_5_op = tf.nn.in_top_k(logits, labels, 5) elif FLAGS.mode == '1_sigmoid': top_1_op = None top_5_op = None # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, graph_def=graph_def) while True: precision_at_1, current_score = _eval_once(saver, summary_writer, top_1_op, top_5_op, summary_op, max_percent, all_filenames, filename_queue, net2048, sel_end_points, logits, labels) print("%s: Precision: %.4f " % (datetime.now(), precision_at_1) ) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs) return precision_at_1, current_score
def export(): with tf.Graph().as_default(), tf.Session() as sess: if FLAGS.export_type == 'mobile': input_, image_raw = mobile_input() elif FLAGS.export_type == 'inference': input_, image_raw = inference_input() else: print('export_type must be mobile or inference, currently %s' % (FLAGS.export_type)) return logits, _ = inception_model.inference(input_, FLAGS.num_classes + 1) softmax = tf.nn.softmax(logits, name='softmax') variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Load checkpoint ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: if os.path.isabs(ckpt.model_checkpoint_path): # Restores from checkpoint with absolute path. saver.restore(sess, ckpt.model_checkpoint_path) else: # Restores from checkpoint with relative path. saver.restore(sess, os.path.join(FLAGS.checkpoint_dir, ckpt.model_checkpoint_path)) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Succesfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) # Write out graph def graph_def = sess.graph.as_graph_def() tf.train.write_graph(sess.graph.as_graph_def(), os.path.dirname(FLAGS.export_graph), os.path.basename(FLAGS.export_graph)) print('Successfully converted checkpoint:\n %s/%s\n into proto\n %s\n with inputs of size %d' % (FLAGS.checkpoint_dir, ckpt.model_checkpoint_path, FLAGS.export_graph, FLAGS.image_size)) else: print('No checkpoint file found')
def evaluate_op(dataset): # Get images and labels from the dataset. images, labels, _ = image_processing.inputs(dataset) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. #num_classes = dataset.num_classes() + 1 num_classes = dataset.num_classes() # Build a Graph that computes the logits predictions from the # inference model. logits, _ = inception.inference(images, num_classes) # Calculate predictions. top_1_op = tf.nn.in_top_k(logits, labels, 1) top_5_op = tf.nn.in_top_k(logits, labels, 5) return top_1_op, top_5_op
def predict(dataset): """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Get images and labels from the dataset. images, labels, filenames = image_processing.inputs(dataset) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() # Build a Graph that computes the logits predictions from the # inference model. logits, _ = inception.inference(images, num_classes) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) _predict_once(saver, filenames, logits)
def test(dataset): """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Get images and labels from the dataset. images, _, filenames = image_processing.inputs(dataset) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 # Build a Graph that computes the logits predictions from the # inference model. logits, _ = inception.inference(images, num_classes) output = tf.nn.softmax(tf.slice(logits, [0,1], [-1,-1]), name='output') # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) results = _test(saver, filenames, output) current_time = datetime.now().strftime('%Y-%m-%d-%Hh%Mm%Ss') csvfilename = os.path.join(FLAGS.test_dir, 'submission-{}.csv'.format(current_time)) zipfilename = os.path.join(FLAGS.test_dir, '{}.zip'.format(csvfilename)) with open(csvfilename, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow(['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']) for batch_result in results: for filename, result in batch_result: writer.writerow([filename] + result.tolist()) with zipfile.ZipFile(zipfilename, 'w') as myzip: myzip.write(csvfilename) print('Submission available at: %s' % (zipfilename))
def evaluate(dataset): """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Get images and labels from the dataset. images, labels = image_processing.inputs(dataset) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 # Build a Graph that computes the logits predictions from the # inference model. logits, _ = inception.inference(images, num_classes) max_percent = end_points['predictions'] # Calculate predictions. # top_1_op = tf.nn.in_top_k(logits, labels, 1) # top_5_op = tf.nn.in_top_k(logits, labels, 5) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, graph_def=graph_def) while True: _eval_once(saver, summary_writer, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. jpegs = tf.placeholder(tf.string) images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) classes = tf.contrib.lookup.index_to_string(tf.to_int64(indices), mapping=class_tensor) # Restore variables from training checkpoint. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: # Restore variables from training checkpoints. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) return # Export inference model. init_op = tf.group(tf.initialize_all_tables(), name='init_op') model_exporter = exporter.Exporter(saver) model_exporter.init(init_op=init_op, named_graph_signatures={ 'inputs': exporter.generic_signature({'images': jpegs}), 'outputs': exporter.generic_signature({'classes': classes, 'scores': values})}) model_exporter.export(FLAGS.export_dir, tf.constant(global_step), sess) print('Successfully exported model to %s' % FLAGS.export_dir)
def main(img_dir): filelist = [] for file in os.listdir(img_dir): name, ext = os.path.splitext(file) if ext == '.png' or ext == '.jpg': filelist.append(os.path.join(img_dir, file)) # List of all images to process print("Running inference on images", filelist) global BATCH_SIZE if len(filelist) < BATCH_SIZE: BATCH_SIZE = len(filelist) # build the tensorflow graph. with tf.Graph().as_default() as g: input_shape = [IMAGE_SIZE, IMAGE_SIZE, 3] final_shape = [1, IMAGE_SIZE, IMAGE_SIZE, 3] img_placeholder = tf.placeholder( tf.uint8, shape=input_shape) print(img_placeholder.shape) # reshape to add batch size dimension img = tf.reshape(img_placeholder, final_shape) # cast to float img = tf.dtypes.cast(img, dtype=tf.float32) # normalize input to values in range [0,1] img = img / 255.0 print('Image shape {}'.format(img.shape)) logits, _ = inception.inference(img, NUM_CLASSES) saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.ckpt_dir) sess = tf.Session(config=tf.ConfigProto( log_device_placement=True)) with sess: if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint print('Restoring trained model from checkpoint') saver.restore(sess, ckpt.model_checkpoint_path) print('Checkpoint loaded') else: print('No checkpoint file found') for files_batch in chunks(filelist, BATCH_SIZE): start_time = time.time() image_list = [load_image(file) for file in files_batch] image_batch = np.array(image_list) print(image_batch.shape) image_batch = np.reshape( image_batch, [IMAGE_SIZE, IMAGE_SIZE, 3]) score = sess.run(logits, feed_dict={ img_placeholder: image_batch}) pos_score = np.exp( score[:, 1])/(np.exp(score[:, 1])+np.exp(score[:, 0])) for i in range(BATCH_SIZE): print("Score %s : %f" % (files_batch[i], pos_score[i])) duration = time.time() - start_time print("Batch done Duration: " + str(duration)) if SAVE_MODEL: save_dir = './saved_models' print( 'Saving model for deployment in directory {}'.format(save_dir)) tf.saved_model.simple_save(sess, save_dir, inputs={ 'image': img_placeholder}, outputs={'predictions': logits})
def inference(images): logits, _ = inception_model.inference(images, 1001) return logits
def evaluate(dataset): """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Get images and labels from the dataset. images, labels, all_filenames, filename_queue = image_processing.inputs( dataset) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 # Build a Graph that computes the logits predictions from the # inference model. logits, _, end_points, net2048, sel_end_points = inception.inference( images, num_classes) # Calculate predictions. #max_percent = tf.argmax(logits,1) #max_percent = tf.reduce_max(logits, reduction_indices=[1]) / tf.add_n(logits) max_percent = end_points['predictions'] # max_percent = len(end_points) #for kk in range(len(labels)): # #max_percent.append(end_points['predictions'][kk][labels[kk]]) # max_percent.append(labels[kk]) #top_1_op = tf.nn.in_top_k(logits, labels, 1) #top_5_op = tf.nn.in_top_k(logits, labels, 5) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, graph_def=graph_def) # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 ''' # Split the batch of images and labels for towers. images_splits = tf.split(axis=0, num_or_size_splits=1, value=images) labels_splits = tf.split(axis=0, num_or_size_splits=1, value=labels) # Calculate the gradients for each model tower. tower_grads = [] reuse_variables = None for i in range(1): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope: # Force all Variables to reside on the CPU. with slim.arg_scope([slim.variables.variable], device='/cpu:0'): # Calculate the loss for one tower of the ImageNet model. This # function constructs the entire ImageNet model but shares the # variables across all towers. loss = _tower_loss(images_splits[i], labels_splits[i], num_classes, scope, reuse_variables) # Reuse variables for the next tower. reuse_variables = True ''' loss = False while True: precision_at_1, current_score = _eval_once( saver, summary_writer, summary_op, max_percent, all_filenames, filename_queue, net2048, sel_end_points, logits, labels, loss) print("%s: Precision: %.4f --------------------" % (datetime.now(), precision_at_1)) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs) return precision_at_1, current_score
def train(target, dataset, cluster_spec): """Train Inception on a dataset for a number of steps.""" # Number of workers and parameter servers are inferred from the workers and ps # hosts string. num_workers = len(cluster_spec.as_dict()['worker']) num_parameter_servers = len(cluster_spec.as_dict()['ps']) # If no value is given, num_replicas_to_aggregate defaults to be the number of # workers. if FLAGS.num_replicas_to_aggregate == -1: num_replicas_to_aggregate = num_workers else: num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate # Both should be greater than 0 in a distributed training. assert num_workers > 0 and num_parameter_servers > 0, (' num_workers and ' 'num_parameter_servers' ' must be > 0.') # Choose worker 0 as the chief. Note that any worker could be the chief # but there should be only one chief. is_chief = (FLAGS.task_id == 0) # Ops are assigned to worker by default. with tf.device('/job:worker/task:%d' % FLAGS.task_id): # Variables and its related init/assign ops are assigned to ps. with slim.scopes.arg_scope( [slim.variables.variable, slim.variables.global_step], device=slim.variables.VariableDeviceChooser(num_parameter_servers)): # Create a variable to count the number of train() calls. This equals the # number of updates applied to the variables. global_step = slim.variables.global_step() # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples_per_epoch() / FLAGS.batch_size) # Decay steps need to be divided by the number of replicas to aggregate. decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay / num_replicas_to_aggregate) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) # Add a summary to track the learning rate. tf.summary.scalar('learning_rate', lr) # Create an optimizer that performs gradient descent. opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) images, labels = image_processing.distorted_inputs( dataset, batch_size=FLAGS.batch_size, num_preprocess_threads=FLAGS.num_preprocess_threads) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 logits = inception.inference(images, num_classes, for_training=True) # Add classification loss. inception.loss(logits, labels) # Gather all of the losses including regularization losses. losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses, name='total_loss') if is_chief: # Compute the moving average of all individual losses and the # total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summmary to all individual losses and the total loss; # do the same for the averaged version of the losses. for l in losses + [total_loss]: loss_name = l.op.name # Name each loss as '(raw)' and name the moving average version of the # loss as the original loss name. tf.summary.scalar(loss_name + ' (raw)', l) tf.summary.scalar(loss_name, loss_averages.average(l)) # Add dependency to compute loss_averages. with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) # Track the moving averages of all trainable variables. # Note that we maintain a 'double-average' of the BatchNormalization # global statistics. # This is not needed when the number of replicas are small but important # for synchronous distributed training with tens of workers/replicas. exp_moving_averager = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY, global_step) variables_to_average = ( tf.trainable_variables() + tf.moving_average_variables()) # Add histograms for model variables. for var in variables_to_average: tf.summary.histogram(var.op.name, var) # Create synchronous replica optimizer. opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_replicas_to_aggregate, total_num_replicas=num_workers, variable_averages=exp_moving_averager, variables_to_average=variables_to_average) batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION) assert batchnorm_updates, 'Batchnorm updates are missing' batchnorm_updates_op = tf.group(*batchnorm_updates) # Add dependency to compute batchnorm_updates. with tf.control_dependencies([batchnorm_updates_op]): total_loss = tf.identity(total_loss) # Compute gradients with respect to the loss. grads = opt.compute_gradients(total_loss) # Add histograms for gradients. for grad, var in grads: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) apply_gradients_op = opt.apply_gradients(grads, global_step=global_step) with tf.control_dependencies([apply_gradients_op]): train_op = tf.identity(total_loss, name='train_op') # Get chief queue_runners and init_tokens, which is used to synchronize # replicas. More details can be found in SyncReplicasOptimizer. chief_queue_runners = [opt.get_chief_queue_runner()] init_tokens_op = opt.get_init_tokens_op() # Create a saver. saver = tf.train.Saver() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init_op = tf.global_variables_initializer() # We run the summaries in the same thread as the training operations by # passing in None for summary_op to avoid a summary_thread being started. # Running summaries and training operations in parallel could run out of # GPU memory. sv = tf.train.Supervisor(is_chief=is_chief, logdir=FLAGS.train_dir, init_op=init_op, summary_op=None, global_step=global_step, saver=saver, save_model_secs=FLAGS.save_interval_secs) tf.logging.info('%s Supervisor' % datetime.now()) sess_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) # Get a session. sess = sv.prepare_or_wait_for_session(target, config=sess_config) # Start the queue runners. queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS) sv.start_queue_runners(sess, queue_runners) tf.logging.info('Started %d queues for processing input data.', len(queue_runners)) if is_chief: sv.start_queue_runners(sess, chief_queue_runners) sess.run(init_tokens_op) # Train, checking for Nans. Concurrently run the summary operation at a # specified interval. Note that the summary_op and train_op never run # simultaneously in order to prevent running out of GPU memory. next_summary_time = time.time() + FLAGS.save_summaries_secs while not sv.should_stop(): try: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() start_time = time.time() loss_value, step = sess.run([train_op, global_step], options=run_options, run_metadata=run_metadata) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step > FLAGS.max_steps: break duration = time.time() - start_time tl = timeline.Timeline(run_metadata.step_stats) examples_per_sec = FLAGS.batch_size / float(duration) format_str = ('Worker %d: %s: step %d, loss = %.2f' '(%.1f examples/sec; %.3f sec/batch)') tf.logging.info(format_str % (FLAGS.task_id, datetime.now(), step, loss_value, examples_per_sec, duration)) # Terminate the job on the 100th iteration if step == 100: exit() # Determine if the summary_op should be run on the chief worker. if is_chief and next_summary_time < time.time(): tf.logging.info('Running Summary operation on the chief.') summary_str = sess.run(summary_op) sv.summary_computed(sess, summary_str) tf.logging.info('Finished running Summary operation.') # Determine the next time for running the summary. next_summary_time += FLAGS.save_summaries_secs except: if is_chief: tf.logging.info('Chief got exception while running!') raise return
def _tower_loss(images, labels, num_classes, scope, reuse_variables=None): """Calculate the total loss on a single tower running the ImageNet model. We perform 'batch splitting'. This means that we cut up a batch across multiple GPU's. For instance, if the batch size = 32 and num_gpus = 2, then each tower will operate on an batch of 16 images. Args: images: Images. 4D tensor of size [batch_size, FLAGS.image_size, FLAGS.image_size, 3]. labels: 1-D integer Tensor of [batch_size]. num_classes: number of classes scope: unique prefix string identifying the ImageNet tower, e.g. 'tower_0'. Returns: Tensor of shape [] containing the total loss for a batch of data """ # When fine-tuning a model, we do not restore the logits but instead we # randomly initialize the logits. The number of classes in the output of the # logit is the number of classes in specified Dataset. restore_logits = not FLAGS.fine_tune # Build inference Graph. with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): logits = inception.inference(images, num_classes, for_training=True, restore_logits=restore_logits, scope=scope) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. split_batch_size = images.get_shape().as_list()[0] inception.loss(logits, labels, batch_size=split_batch_size) # Assemble all of the losses for the current tower only. losses = tf.get_collection(slim.losses.LOSSES_COLLECTION, scope) # Calculate the total loss for the current tower. regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses + regularization_losses, name='total_loss') # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summmary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on TensorBoard. loss_name = re.sub('%s_[0-9]*/' % inception.TOWER_NAME, '', l.op.name) # Name each loss as '(raw)' and name the moving average version of the loss # as the original loss name. tf.summary.scalar(loss_name + ' (raw)', l) tf.summary.scalar(loss_name, loss_averages.average(l)) with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) return total_loss
def train(target, dataset, cluster_spec, ctx): """Train Inception on a dataset for a number of steps.""" # Number of workers and parameter servers are infered from the workers and ps # hosts string. num_workers = len(cluster_spec.as_dict()['worker']) num_parameter_servers = len(cluster_spec.as_dict()['ps']) # If no value is given, num_replicas_to_aggregate defaults to be the number of # workers. if FLAGS.num_replicas_to_aggregate == -1: num_replicas_to_aggregate = num_workers else: num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate # Both should be greater than 0 in a distributed training. assert num_workers > 0 and num_parameter_servers > 0, ( ' num_workers and ' 'num_parameter_servers' ' must be > 0.') # Choose worker 0 as the chief. Note that any worker could be the chief # but there should be only one chief. is_chief = (FLAGS.task_id == 0) # Ops are assigned to worker by default. with tf.device('/job:worker/task:%d' % FLAGS.task_id): # Variables and its related init/assign ops are assigned to ps. with slim.scopes.arg_scope( [slim.variables.variable, slim.variables.global_step], device=slim.variables.VariableDeviceChooser( num_parameter_servers)): # Create a variable to count the number of train() calls. This equals the # number of updates applied to the variables. global_step = slim.variables.global_step() # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples_per_epoch() / FLAGS.batch_size) # Decay steps need to be divided by the number of replicas to aggregate. decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay / num_replicas_to_aggregate) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) # Add a summary to track the learning rate. tf.summary.scalar('learning_rate', lr) # Create an optimizer that performs gradient descent. opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) if FLAGS.input_mode == 'spark': def feed_dict(feed_batch): # extract TFRecords, since feed_batch is [(TFRecord, None)] tfrecords = [] for elem in feed_batch: tfrecords.append(str(elem[0])) return tfrecords batch = tf.placeholder( tf.string, [FLAGS.batch_size / FLAGS.num_preprocess_threads]) # The following is adapted from image_processing.py to remove Readers/QueueRunners. # Note: this removes the RandomShuffledQueue, so the incoming data is not shuffled. # Presumably, this could be done on the Spark side or done in additional TF code. examples = tf.unstack(batch) images, labels = [], [] for example_serialized in examples: for thread_id in range(FLAGS.num_preprocess_threads): # Parse a serialized Example proto to extract the image and metadata. image_buffer, label_index, bbox, _ = image_processing.parse_example_proto( example_serialized) image = image_processing.image_preprocessing( image_buffer, bbox, train, thread_id) images.append(image) labels.append(label_index) height = FLAGS.image_size width = FLAGS.image_size depth = 3 images = tf.cast(images, tf.float32) images = tf.reshape( images, shape=[FLAGS.batch_size, height, width, depth]) tf.summary.image('images', images) labels = tf.reshape(labels, [FLAGS.batch_size]) else: images, labels = image_processing.distorted_inputs( dataset, batch_size=FLAGS.batch_size, num_preprocess_threads=FLAGS.num_preprocess_threads) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 logits = inception.inference(images, num_classes, for_training=True) # Add classification loss. inception.loss(logits, labels) # Gather all of the losses including regularization losses. losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses, name='total_loss') if is_chief: # Compute the moving average of all individual losses and the # total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summmary to all individual losses and the total loss; # do the same for the averaged version of the losses. for l in losses + [total_loss]: loss_name = l.op.name # Name each loss as '(raw)' and name the moving average version of the # loss as the original loss name. tf.summary.scalar(loss_name + ' (raw)', l) tf.summary.scalar(loss_name, loss_averages.average(l)) # Add dependency to compute loss_averages. with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) # Track the moving averages of all trainable variables. # Note that we maintain a 'double-average' of the BatchNormalization # global statistics. # This is not needed when the number of replicas are small but important # for synchronous distributed training with tens of workers/replicas. exp_moving_averager = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY, global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) # Add histograms for model variables. for var in variables_to_average: tf.summary.histogram(var.op.name, var) # Create synchronous replica optimizer. opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_replicas_to_aggregate, total_num_replicas=num_workers, variable_averages=exp_moving_averager, variables_to_average=variables_to_average) batchnorm_updates = tf.get_collection( slim.ops.UPDATE_OPS_COLLECTION) assert batchnorm_updates, 'Batchnorm updates are missing' batchnorm_updates_op = tf.group(*batchnorm_updates) # Add dependency to compute batchnorm_updates. with tf.control_dependencies([batchnorm_updates_op]): total_loss = tf.identity(total_loss) # Compute gradients with respect to the loss. grads = opt.compute_gradients(total_loss) # Add histograms for gradients. for grad, var in grads: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) apply_gradients_op = opt.apply_gradients(grads, global_step=global_step) with tf.control_dependencies([apply_gradients_op]): train_op = tf.identity(total_loss, name='train_op') # Get chief queue_runners, init_tokens and clean_up_op, which is used to # synchronize replicas. # More details can be found in sync_replicas_optimizer. chief_queue_runners = [opt.get_chief_queue_runner()] init_tokens_op = opt.get_init_tokens_op() # Create a saver. saver = tf.train.Saver() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init_op = tf.global_variables_initializer() # We run the summaries in the same thread as the training operations by # passing in None for summary_op to avoid a summary_thread being started. # Running summaries and training operations in parallel could run out of # GPU memory. summary_writer = tf.summary.FileWriter( "tensorboard_%d" % ctx.worker_num, graph=tf.get_default_graph()) sv = tf.train.Supervisor(is_chief=is_chief, logdir=FLAGS.train_dir, init_op=init_op, summary_op=None, global_step=global_step, summary_writer=summary_writer, saver=saver, save_model_secs=FLAGS.save_interval_secs) tf.logging.info('%s Supervisor' % datetime.now()) sess_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) # Get a session. sess = sv.prepare_or_wait_for_session(target, config=sess_config) # Start the queue runners. queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS) sv.start_queue_runners(sess, queue_runners) tf.logging.info('Started %d queues for processing input data.', len(queue_runners)) if is_chief: sv.start_queue_runners(sess, chief_queue_runners) sess.run(init_tokens_op) # Train, checking for Nans. Concurrently run the summary operation at a # specified interval. Note that the summary_op and train_op never run # simultaneously in order to prevent running out of GPU memory. next_summary_time = time.time() + FLAGS.save_summaries_secs tf_feed = TFNode.DataFeed(ctx.mgr) while not sv.should_stop(): try: start_time = time.time() if FLAGS.input_mode == 'spark': tmp = feed_dict( tf_feed.next_batch(FLAGS.batch_size / FLAGS.num_preprocess_threads)) feed = {batch: tmp} loss_value, step = sess.run([train_op, global_step], feed_dict=feed) else: loss_value, step = sess.run([train_op, global_step]) assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step > FLAGS.max_steps: break duration = time.time() - start_time if step % 30 == 0: examples_per_sec = FLAGS.batch_size / float(duration) format_str = ('Worker %d: %s: step %d, loss = %.2f' '(%.1f examples/sec; %.3f sec/batch)') tf.logging.info( format_str % (FLAGS.task_id, datetime.now(), step, loss_value, examples_per_sec, duration)) # Determine if the summary_op should be run on the chief worker. if FLAGS.input_mode == 'tf' and is_chief and next_summary_time < time.time( ): tf.logging.info( 'Running Summary operation on the chief.') summary_str = sess.run(summary_op) sv.summary_computed(sess, summary_str) tf.logging.info('Finished running Summary operation.') # Determine the next time for running the summary. next_summary_time += FLAGS.save_summaries_secs except: if is_chief: tf.logging.info('About to execute sync_clean_up_op!') raise # Stop the TFNode data feed if FLAGS.input_mode == 'spark': tf_feed.terminate() # Stop the supervisor. This also waits for service threads to finish. sv.stop() # Save after the training ends. if is_chief: saver.save(sess, os.path.join(FLAGS.train_dir, 'model.ckpt'), global_step=global_step)
def retrieve(dataset): """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(), tf.Session() as sess: # Get images and labels from the dataset. images, labels, filenames_tensor = image_processing.inputs(dataset, return_filenames=True) # Build a Graph that computes the features. num_classes = dataset.num_classes() + 1 _, _ = inception.inference(images, num_classes, restore_logits=False) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Restore checkpoint. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: if os.path.isabs(ckpt.model_checkpoint_path): # Restores from checkpoint with absolute path. saver.restore(sess, ckpt.model_checkpoint_path) else: # Restores from checkpoint with relative path. saver.restore(sess, os.path.join(FLAGS.checkpoint_dir, ckpt.model_checkpoint_path)) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Succesfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found') return # Start the queue runners. coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) print('%s: starting evaluation on (%s).' % (datetime.now(), FLAGS.subset)) start_time = time.time() features_tensor = tf.get_default_graph().get_tensor_by_name(FLAGS.features_tensor_name) features = [] filenames = [] step = 0 while step < num_iter and not coord.should_stop(): features_batch, filenames_batch = sess.run([features_tensor, filenames_tensor]) features.append(features_batch) filenames.extend(filenames_batch) step += 1 if step % 20 == 0: duration = time.time() - start_time sec_per_batch = duration / 20.0 examples_per_sec = FLAGS.batch_size / sec_per_batch print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f' 'sec/batch)' % (datetime.now(), step, num_iter, examples_per_sec, sec_per_batch)) start_time = time.time() features = features[:FLAGS.num_examples] filenames = filenames[:FLAGS.num_examples] except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10) return np.vstack(features), filenames
image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) return image #################################################### 加载已经训练好的模型 ################################################ sess = tf.Session() ckpt = tf.train.get_checkpoint_state( '/home/recsys/hzwangjian1/tensorflow/models/inception/darthvader_model') ckpt = tf.train.get_checkpoint_state( '/home/recsys/hzwangjian1/tensorflow/models/inception/inception/flower_model' ) print(ckpt.model_checkpoint_path) images_input = tf.placeholder(tf.float32, shape=(1, 299, 299, 3)) logits, _ = inception.inference(images_input, 6) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) saver.restore(sess, ckpt.model_checkpoint_path) #################################################### 载入一张图片来测试模型 ################################################ img = misc.imread( "/home/recsys/hzwangjian1/tensorflow/models/inception/inception/data/raw-data/validation/dandelion/2465442759_d4532a57a3.jpg" ) import matplotlib.pyplot as plt fig = plt.figure() plt.subplot(1, 2, 1) plt.imshow(img)
def build_graph(self, filenames, labels, subset, feed_hypes=None): hypes = self.hypes.copy() if feed_hypes: with tf.name_scope(None): for i in feed_hypes: hypes[i] = tf.placeholder("float32", name=i) hypes[i].set_shape([]) with tf.name_scope("inputs"): filenames, labels = tf.train.slice_input_producer( tensor_list=[filenames, labels], capacity=hypes["batch_size"] * 2, shuffle=(subset == "train") ) filenames, labels = tf.train.batch( tensor_list=[filenames, labels], capacity=hypes["batch_size"] * 2, batch_size=hypes["batch_size"] ) images0 = [ tf.image.decode_jpeg(tf.read_file(i[0]), channels=3) for i in tf.split(0, hypes["batch_size"], filenames) ] images0 = [skin.util.square_pad(i) for i in images0] if subset == "train": images0 = [tf.image.random_flip_left_right(i) for i in images0] images0 = [tf.image.random_flip_up_down(i) for i in images0] if hypes["spatial_transformer"]: images = skin.util.spatial_tranform( images0, hypes["batch_size"], subset, hypes["loc_net"], hypes["xform_reg"] ) else: images = tf.pack([tf.image.resize_images(i, 299, 299) for i in images0]) with tf.name_scope(None): images = tf.identity(images, name="input") logits, logits_aux = inception_model.inference( images=(images - 128) / 128.0, num_classes=len(self.labels), for_training=(subset == "train"), restore_logits=(subset != "train"), ) with tf.name_scope(None): logits = tf.identity(logits, name="logits") tf.histogram_summary("logits", logits) with tf.name_scope("loss"): batch_size, num_classes = logits.get_shape().as_list() labels_sparse = tf.sparse_to_dense( sparse_indices=tf.transpose(tf.pack([tf.range(batch_size), labels])), output_shape=[batch_size, num_classes], sparse_values=np.ones(batch_size, dtype="float32"), ) loss = tf.nn.softmax_cross_entropy_with_logits(logits, labels_sparse) loss = tf.reduce_mean(loss, name="loss") loss_aux = tf.nn.softmax_cross_entropy_with_logits(logits_aux, labels_sparse) loss_aux = tf.reduce_mean(loss_aux, name="loss_aux") loss = 0.7 * loss + 0.3 * loss_aux tf.scalar_summary("loss", loss) fetches = {"loss": loss, "filenames": filenames, "logits": logits} def print_graph_ops(): with open("/tmp/graph_ops.txt", "w") as f: for op in tf.get_default_graph().get_operations(): f.write(op.type.ljust(35) + "\t" + op.name + "\n") if subset == "train": reg_losses = tf.get_collection("regularization_losses") for i, j in enumerate(reg_losses): if "loc_net" in j.name: reg_losses[i] *= hypes["loc_net_reg"] reg_loss = tf.add_n(reg_losses) tf.scalar_summary("reg_loss", reg_loss) with tf.variable_scope("reg_loss"): loss += reg_loss print_graph_ops() global_step = tf.Variable(0, name="global_step", trainable=False) opt = eval("tf.train.{}Optimizer".format("Adam"))( learning_rate=hypes["learning_rate"], epsilon=hypes["epsilon"], beta1=hypes["beta1"], beta2=hypes["beta2"], ) grads = opt.compute_gradients(loss) apply_grads = opt.apply_gradients(grads, global_step) variable_averages = tf.train.ExponentialMovingAverage(hypes["variable_averages_decay"], global_step) variables_to_average = tf.trainable_variables() + tf.moving_average_variables() variables_averages_op = variable_averages.apply(variables_to_average) batchnorm_updates_op = tf.group(*tf.get_collection("_update_ops_")) train_op = tf.group(apply_grads, variables_averages_op, batchnorm_updates_op) for grad, var in grads: tf.histogram_summary(var.op.name, var) try: tf.histogram_summary(var.op.name + "/gradients", grad) except: print var.op.name fetches.update({"reg_loss": reg_loss, "train_op": train_op, "global_step": global_step}) else: print_graph_ops() return fetches
def export(args): FLAGS = tf.app.flags.FLAGS """Evaluate model on Dataset for a number of steps.""" #with tf.Graph().as_default(): tf.reset_default_graph() def preprocess_image(image_buffer): """Preprocess JPEG encoded bytes to 3D float Tensor.""" # Decode the string as an RGB JPEG. # Note that the resulting image contains an unknown height and width # that is set dynamically by decode_jpeg. In other words, the height # and width of image is unknown at compile-time. image = tf.image.decode_jpeg(image_buffer, channels=3) # After this point, all image pixels reside in [0,1) # until the very end, when they're rescaled to (-1, 1). The various # adjust_* ops all require this range for dtype float. image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Crop the central region of the image with an area containing 87.5% of # the original image. image = tf.image.central_crop(image, central_fraction=0.875) # Resize the image to the original height and width. image = tf.expand_dims(image, 0) image = tf.image.resize_bilinear(image, [FLAGS.image_size, FLAGS.image_size], align_corners=False) image = tf.squeeze(image, [0]) # Finally, rescale to [-1,1] instead of [0, 1) image = tf.subtract(image, 0.5) image = tf.multiply(image, 2.0) return image # Get images and labels from the dataset. jpegs = tf.placeholder(tf.string, [None], name='jpegs') images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) labels = tf.placeholder(tf.int32, [None], name='labels') # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. dataset = ImagenetData(subset=FLAGS.subset) num_classes = dataset.num_classes() + 1 # Build a Graph that computes the logits predictions from the # inference model. logits, _ = inception.inference(images, num_classes) # Calculate predictions. top_1_op = tf.nn.in_top_k(logits, labels, 1) top_5_op = tf.nn.in_top_k(logits, labels, 5) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if not ckpt or not ckpt.model_checkpoint_path: raise Exception("No checkpoint file found at: {}".format( FLAGS.train_dir)) print("ckpt.model_checkpoint_path: {0}".format( ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) print("Exporting saved_model to: {}".format(args.export_dir)) # exported signatures defined in code signatures = { tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: { 'inputs': { 'jpegs': jpegs, 'labels': labels }, 'outputs': { 'top_5_acc': top_5_op }, 'method_name': tf.saved_model.signature_constants.PREDICT_METHOD_NAME } } TFNode.export_saved_model(sess, args.export_dir, tf.saved_model.tag_constants.SERVING, signatures) print("Exported saved_model")
def softmax(x): """Compute softmax values for each sets of scores in x.""" return np.exp(x) / np.sum(np.exp(x), axis=0) NUM_CLASSES = 7 NUM_TOP_CLASSES = 8 MODEL_CHECKPOINT_PATH = 'dk-finetune/model.ckpt-45000' #with tf.Graph().as_default(): jpegs = tf.placeholder(tf.string) images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) sess = tf.Session() # Restore variables from training checkpoints. saver.restore(sess, MODEL_CHECKPOINT_PATH) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = MODEL_CHECKPOINT_PATH.split('/')[-1].split('-')[-1]
def test(): eval_set_queue = generate_eval_set() with tf.Graph().as_default() as g: img_placeholder = tf.placeholder(tf.float32, shape=[1, IMAGE_SIZE, IMAGE_SIZE, 3]) logits, _, feature_map = inception.inference(img_placeholder, NUM_CLASSES) with tf.name_scope('conv_aux_1') as scope: kernel1 = tf.Variable(tf.truncated_normal([3, 3, 288, 512], dtype=tf.float32, stddev=1e-4), name='weights') conv = tf.nn.conv2d(feature_map, kernel1, [1, 1, 1, 1], padding='SAME') biases1 = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32), trainable=True, name='biases') bias = tf.nn.bias_add(conv, biases1) conv_aux = tf.nn.relu(bias, name=scope) with tf.name_scope('conv_aux_2') as scope: kernel2 = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, stddev=1e-4), name='weights') conv = tf.nn.conv2d(conv_aux, kernel2, [1, 1, 1, 1], padding='SAME') biases2 = tf.Variable(tf.constant(0.1, shape=[512], dtype=tf.float32), trainable=True, name='biases') bias = tf.nn.bias_add(conv, biases2) conv_aux = tf.nn.relu(bias, name=scope) GAP = tf.reduce_mean(conv_aux, [1, 2]) W = tf.get_variable(name='W', shape=[512, 2], initializer=tf.random_normal_initializer(0., 0.01)) conv_map_resized = tf.image.resize_bilinear(conv_aux, [100, 100]) # get weights connected to definite class. W_c = tf.gather(tf.transpose(W), 1) W_c = tf.reshape(W_c, [-1, 512, 1]) conv_map_resized = tf.reshape(conv_map_resized, [-1, 100 * 100, 512]) #CAM = tf.batch_matmul(conv_map_resized, W_c) tf.batch_matmul is deprecated in TF 1.12 CAM = tf.matmul(conv_map_resized, W_c) CAM = tf.reshape(CAM, [-1, 100, 100]) # Construct saver variables_to_restore = tf.get_collection( slim.variables.VARIABLES_TO_RESTORE) print variables_to_restore saver1 = tf.train.Saver(variables_to_restore) saver2 = tf.train.Saver( var_list=[W, kernel2, biases2, kernel1, biases1]) with tf.Session() as sess: # restore model parameters. checkpoint1 = tf.train.get_checkpoint_state( FLAGS.classification_ckpt_restore_dir) if checkpoint1 and checkpoint1.model_checkpoint_path: saver1.restore(sess, checkpoint1.model_checkpoint_path) print("Successfully loaded:", checkpoint1.model_checkpoint_path) else: print("Could not find old network weights") checkpoint2 = tf.train.get_checkpoint_state( FLAGS.segmentation_ckpt_restore_dir) if checkpoint2 and checkpoint2.model_checkpoint_path: saver2.restore(sess, checkpoint2.model_checkpoint_path) print("Successfully loaded:", checkpoint2.model_checkpoint_path) else: print("Could not find old network weights") stats = {} stats['r'] = [0, 0, 0] # [TP, FP, FN] for residential. stats['d'] = [0, 0, 0] # [TP, FP, FN] for downtown/commercial. area_error = {} area_error['r'] = [] area_error['d'] = [] # store both true and estimate total pixel areas for each region true_total_area = {} for i in xrange(1, 66): true_total_area[i] = 0.0 estimiate_total_area = {} for i in xrange(1, 66): estimiate_total_area[i] = 0.0 for step in xrange(1, len(eval_set_queue) + 1): print('Processing ' + str(step) + '/' + str(len(eval_set_queue)) + '...') img_path, label, region_index, img_index, region_type = eval_set_queue.pop( ) img = load_image(img_path) img_batch = np.reshape(img, [1, IMAGE_SIZE, IMAGE_SIZE, 3]) score = sess.run(logits, feed_dict={img_placeholder: img_batch}) pos_prob = np.exp( score[0, 1]) / (np.exp(score[0, 1]) + np.exp(score[0, 0])) if pos_prob >= 0.5: # generate CAM for that sample CAM_val = sess.run(CAM, feed_dict={img_placeholder: img_batch}) CAM_val = rescale_CAM(CAM_val) pred_pixel_area = np.sum( CAM_val > SEGMENTATION_THRES ) # predicted or estimated pixel area estimiate_total_area[region_index] += pred_pixel_area if label == [0]: # FP stats[region_type][1] += 1 # save original image and CAM. skimage.io.imsave( os.path.join( RESULT_DIR, 'FP', str(region_index) + '_' + str(img_index) + '_original.png'), img) skimage.io.imsave( os.path.join( RESULT_DIR, 'FP', str(region_index) + '_' + str(img_index) + '_CAM.png'), CAM_val) else: # TP stats[region_type][0] += 1 # save original image and CAM. skimage.io.imsave( os.path.join( RESULT_DIR, 'TP', str(region_index) + '_' + str(img_index) + '_original.png'), img) skimage.io.imsave( os.path.join( RESULT_DIR, 'TP', str(region_index) + '_' + str(img_index) + '_CAM.png'), CAM_val) # compare with ground truth segmentation. true_seg_img = skimage.io.imread( os.path.join(FLAGS.eval_set_dir, str(region_index), str(img_index) + '_true_seg.png')) true_seg_img /= 255.0 true_pixel_area = np.sum(true_seg_img) true_pixel_area = true_pixel_area * (100 * 100) / (320 * 320) true_total_area[region_index] += true_pixel_area area_error[region_type].append(true_pixel_area - pred_pixel_area) else: if label == [1]: # FN stats[region_type][2] += 1 true_seg_img = skimage.io.imread( os.path.join(FLAGS.eval_set_dir, str(region_index), str(img_index) + '_true_seg.png')) true_seg_img /= 255.0 true_pixel_area = np.sum(true_seg_img) true_pixel_area = true_pixel_area * (100 * 100) / (320 * 320) true_total_area[region_index] += true_pixel_area # report precision and recall and absolute error rate. abs_error_sum_r = 0 for e in area_error['r']: abs_error_sum_r += abs(e) abs_error_rate_r = float(abs_error_sum_r) / float( len(area_error['r'])) abs_error_sum_d = 0 for e in area_error['d']: abs_error_sum_d += abs(e) abs_error_rate_d = float(abs_error_sum_d) / float( len(area_error['d'])) precision_r = float( stats['r'][0]) / float(stats['r'][0] + stats['r'][1] + 0.00000001) recall_r = float( stats['r'][0]) / float(stats['r'][0] + stats['r'][2] + +0.00000001) precision_d = float( stats['d'][0]) / float(stats['d'][0] + stats['d'][1] + 0.00000001) recall_d = float( stats['d'][0]) / float(stats['d'][0] + stats['d'][2] + +0.00000001) print('############ RESULTS ############') print('Residential: precision: ' + str(precision_r) + ' recall: ' + str(recall_r) + ' average absolute error rate: ' + str(abs_error_rate_r)) print('Commercial: precision: ' + str(precision_d) + ' recall: ' + str(recall_d) + ' average absolute error rate: ' + str(abs_error_rate_d)) # save csv for region-level comparison of true total area and estimated total area. result_list = [] for i in xrange(1, 66): result_list.append([ i, true_total_area[i], estimiate_total_area[i], float(estimiate_total_area[i] - true_total_area[i]) / float(true_total_area[i]) ]) with open(os.path.join("region_level_area_estimation.csv"), 'wb') as f: writer = csv.writer(f) writer.writerow([ 'region', 'true pixel area', 'estimiated pixel area', 'relative difference' ]) writer.writerows(result_list) f.close()
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. jpegs = tf.placeholder(tf.string) images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. logits, _, endpoints = inception_model.inference( images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) #TODO change values-->features [flatted] #(?,8,8,2048) features = endpoints['mixed_8x8x2048b'] features = tf.reduce_mean(features, 1) #(?,2048) features = tf.reduce_mean(features, 1) # sys.exit() # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) classes = tf.contrib.lookup.index_to_string(tf.to_int64(indices), mapping=class_tensor) # Restore variables from training checkpoint. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: # Restore variables from training checkpoints. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) return # Export inference model. init_op = tf.group(tf.initialize_all_tables(), name='init_op') model_exporter = exporter.Exporter(saver) # change scores values to features ? signature = exporter.classification_signature( input_tensor=jpegs, classes_tensor=classes, scores_tensor=features) model_exporter.init(default_graph_signature=signature, init_op=init_op) model_exporter.export(FLAGS.export_dir, tf.constant(global_step), sess) print('Successfully exported model to %s' % FLAGS.export_dir)
def train(target, dataset, cluster_spec): """Train Inception on a dataset for a number of steps.""" # Number of workers and parameter servers are infered from the workers and ps # hosts string. num_workers = len(cluster_spec.as_dict()['worker']) num_parameter_servers = len(cluster_spec.as_dict()['ps']) # If no value is given, num_replicas_to_aggregate defaults to be the number of # workers. if FLAGS.num_replicas_to_aggregate == -1: num_replicas_to_aggregate = num_workers else: num_replicas_to_aggregate = FLAGS.num_replicas_to_aggregate # Both should be greater than 0 in a distributed training. assert num_workers > 0 and num_parameter_servers > 0, (' num_workers and ' 'num_parameter_servers' ' must be > 0.') # Choose worker 0 as the chief. Note that any worker could be the chief # but there should be only one chief. is_chief = (FLAGS.task_id == 0) # Ops are assigned to worker by default. with tf.device('/job:worker/task:%d' % FLAGS.task_id): # Variables and its related init/assign ops are assigned to ps. with slim.scopes.arg_scope( [slim.variables.variable, slim.variables.global_step], device=slim.variables.VariableDeviceChooser(num_parameter_servers)): # Create a variable to count the number of train() calls. This equals the # number of updates applied to the variables. global_step = slim.variables.global_step() # Calculate the learning rate schedule. num_batches_per_epoch = (dataset.num_examples_per_epoch() / FLAGS.batch_size) # Decay steps need to be divided by the number of replicas to aggregate. decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay / num_replicas_to_aggregate) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) # Add a summary to track the learning rate. tf.scalar_summary('learning_rate', lr) # Create an optimizer that performs gradient descent. opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) images, labels = image_processing.distorted_inputs( dataset, batch_size=FLAGS.batch_size, num_preprocess_threads=FLAGS.num_preprocess_threads) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 logits = inception.inference(images, num_classes, for_training=True) # Add classification loss. inception.loss(logits, labels) # Gather all of the losses including regularization losses. losses = tf.get_collection(slim.losses.LOSSES_COLLECTION) losses += tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses, name='total_loss') if is_chief: # Compute the moving average of all individual losses and the # total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # Attach a scalar summmary to all individual losses and the total loss; # do the same for the averaged version of the losses. for l in losses + [total_loss]: loss_name = l.op.name # Name each loss as '(raw)' and name the moving average version of the # loss as the original loss name. tf.scalar_summary(loss_name + ' (raw)', l) tf.scalar_summary(loss_name, loss_averages.average(l)) # Add dependency to compute loss_averages. with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) # Track the moving averages of all trainable variables. # Note that we maintain a 'double-average' of the BatchNormalization # global statistics. # This is not needed when the number of replicas are small but important # for synchronous distributed training with tens of workers/replicas. exp_moving_averager = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY, global_step) variables_to_average = ( tf.trainable_variables() + tf.moving_average_variables()) # Add histograms for model variables. for var in variables_to_average: tf.histogram_summary(var.op.name, var) # Create synchronous replica optimizer. opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_replicas_to_aggregate, replica_id=FLAGS.task_id, total_num_replicas=num_workers, variable_averages=exp_moving_averager, variables_to_average=variables_to_average) batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION) assert batchnorm_updates, 'Batchnorm updates are missing' batchnorm_updates_op = tf.group(*batchnorm_updates) # Add dependency to compute batchnorm_updates. with tf.control_dependencies([batchnorm_updates_op]): total_loss = tf.identity(total_loss) # Compute gradients with respect to the loss. grads = opt.compute_gradients(total_loss) # Add histograms for gradients. for grad, var in grads: if grad is not None: tf.histogram_summary(var.op.name + '/gradients', grad) apply_gradients_op = opt.apply_gradients(grads, global_step=global_step) with tf.control_dependencies([apply_gradients_op]): train_op = tf.identity(total_loss, name='train_op') # Get chief queue_runners, init_tokens and clean_up_op, which is used to # synchronize replicas. # More details can be found in sync_replicas_optimizer. chief_queue_runners = [opt.get_chief_queue_runner()] init_tokens_op = opt.get_init_tokens_op() clean_up_op = opt.get_clean_up_op() # Create a saver. saver = tf.train.Saver() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init_op = tf.initialize_all_variables() # We run the summaries in the same thread as the training operations by # passing in None for summary_op to avoid a summary_thread being started. # Running summaries and training operations in parallel could run out of # GPU memory. sv = tf.train.Supervisor(is_chief=is_chief, logdir=FLAGS.train_dir, init_op=init_op, summary_op=None, global_step=global_step, saver=saver, save_model_secs=FLAGS.save_interval_secs) tf.logging.info('%s Supervisor' % datetime.now()) sess_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) # Get a session. sess = sv.prepare_or_wait_for_session(target, config=sess_config) # Start the queue runners. queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS) sv.start_queue_runners(sess, queue_runners) tf.logging.info('Started %d queues for processing input data.', len(queue_runners)) if is_chief: sv.start_queue_runners(sess, chief_queue_runners) sess.run(init_tokens_op) # Train, checking for Nans. Concurrently run the summary operation at a # specified interval. Note that the summary_op and train_op never run # simultaneously in order to prevent running out of GPU memory. next_summary_time = time.time() + FLAGS.save_summaries_secs while not sv.should_stop(): try: start_time = time.time() loss_value, step = sess.run([train_op, global_step]) assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step > FLAGS.max_steps: break duration = time.time() - start_time if step % 30 == 0: examples_per_sec = FLAGS.batch_size / float(duration) format_str = ('Worker %d: %s: step %d, loss = %.2f' '(%.1f examples/sec; %.3f sec/batch)') tf.logging.info(format_str % (FLAGS.task_id, datetime.now(), step, loss_value, examples_per_sec, duration)) # Determine if the summary_op should be run on the chief worker. if is_chief and next_summary_time < time.time(): tf.logging.info('Running Summary operation on the chief.') summary_str = sess.run(summary_op) sv.summary_computed(sess, summary_str) tf.logging.info('Finished running Summary operation.') # Determine the next time for running the summary. next_summary_time += FLAGS.save_summaries_secs except: if is_chief: tf.logging.info('About to execute sync_clean_up_op!') sess.run(clean_up_op) raise # Stop the supervisor. This also waits for service threads to finish. sv.stop() # Save after the training ends. if is_chief: saver.save(sess, os.path.join(FLAGS.train_dir, 'model.ckpt'), global_step=global_step)
def train(): # load train set list and transform it to queue. try: with open('train_set_list.pickle', 'r') as f: train_set_list = pickle.load(f) except: raise EnvironmentError( 'Data list not existed. Please run generate_data_list.py first.') random.shuffle(train_set_list) train_set_queue = deque(train_set_list) train_set_size = len(train_set_list) del train_set_list print('Training set built. Size: ' + str(train_set_size)) # build the tensorflow graph. with tf.Graph().as_default() as g: global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) num_batches_per_epoch = train_set_size / BATCH_SIZE decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(FLAGS.initial_learning_rate, global_step, decay_steps, FLAGS.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', lr) # Create an optimizer that performs gradient descent. opt = tf.train.RMSPropOptimizer(lr, RMSPROP_DECAY, momentum=RMSPROP_MOMENTUM, epsilon=RMSPROP_EPSILON) images = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3]) labels = tf.placeholder(tf.int32, shape=[BATCH_SIZE]) logits = inception.inference(images, NUM_CLASSES, for_training=True, restore_logits=FLAGS.fine_tune, scope=None) inception.loss(logits, labels, batch_size=BATCH_SIZE) # Assemble all of the losses for the current tower only. losses = tf.get_collection(slim.losses.LOSSES_COLLECTION, scope=None) # Calculate the total loss for the current tower. regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses + regularization_losses, name='total_loss') # Compute the moving average of all individual losses and the total loss. loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) # same for the averaged version of the losses. for l in losses + [total_loss]: # Name each loss as '(raw)' and name the moving average version of the loss # as the original loss name. tf.summary.scalar(l.op.name + ' (raw)', l) tf.summary.scalar(l.op.name, loss_averages.average(l)) with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) batchnorm_updates = tf.get_collection(slim.ops.UPDATE_OPS_COLLECTION, scope=None) # Calculate the gradients for the batch of data on this ImageNet # tower. grads = opt.compute_gradients(total_loss) # Apply gradients. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): tf.summary.histogram(var.op.name, var) # Add histograms for gradients. for grad, var in grads: if grad is not None: tf.summary.histogram(var.op.name + '/gradients', grad) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY, global_step) variables_to_average = (tf.trainable_variables() + tf.moving_average_variables()) variables_averages_op = variable_averages.apply(variables_to_average) # Group all updates to into a single train op. batchnorm_updates_op = tf.group(*batchnorm_updates) train_op = tf.group(apply_gradient_op, variables_averages_op, batchnorm_updates_op) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.global_variables_initializer() # open session and initialize sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) sess.run(init) # restore old checkpoint if FLAGS.fine_tune: checkpoint = tf.train.get_checkpoint_state(FLAGS.ckpt_restore_dir) if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Successfully loaded:", checkpoint.model_checkpoint_path) else: print("Could not find old network weights") else: variables_to_restore = tf.get_collection( slim.variables.VARIABLES_TO_RESTORE) restorer = tf.train.Saver(variables_to_restore) restorer.restore(sess, FLAGS.pretrained_model_checkpoint_path) print('%s: Pre-trained model restored from %s' % (datetime.now(), FLAGS.pretrained_model_checkpoint_path)) summary_writer = tf.summary.FileWriter( FLAGS.ckpt_save_dir, graph_def=sess.graph.as_graph_def(add_shapes=True)) step = 1 while step <= FLAGS.max_steps: start_time = time.time() # construct image batch and label batch for one step train minibatch = [] for count in xrange(0, BATCH_SIZE): element = train_set_queue.pop() minibatch.append(element) train_set_queue.appendleft(element) image_list = [load_image(d[0]) for d in minibatch] label_list = [d[1] for d in minibatch] image_batch = np.array(image_list) label_batch = np.array(label_list) image_batch = np.reshape(image_batch, [BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, 3]) label_batch = np.reshape(label_batch, [BATCH_SIZE]) _, loss_value = sess.run([train_op, total_loss], feed_dict={ images: image_batch, labels: label_batch }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step == 1 or step % 10 == 0: num_examples_per_step = BATCH_SIZE examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) # shuttle the image list per epoch if step % num_batches_per_epoch == 0: random.shuffle(train_set_queue) # write summary periodically if step == 1 or step % 100 == 0: summary_str = sess.run(summary_op, feed_dict={ images: image_batch, labels: label_batch }) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0: checkpoint_path = os.path.join(FLAGS.ckpt_save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) step += 1
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = { 'image/encoded': tf.FixedLenFeature( shape=[], dtype=tf.string), } tf_example = tf.parse_example(serialized_tf_example, feature_configs) jpegs = tf_example['image/encoded'] images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) table = tf.contrib.lookup.index_to_string_table_from_tensor(class_tensor) classes = table.lookup(tf.to_int64(indices)) # Restore variables from training checkpoint. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: # Restore variables from training checkpoints. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print 'Successfully loaded model from %s at step=%s.' % ( ckpt.model_checkpoint_path, global_step) else: print 'No checkpoint file found at %s' % FLAGS.checkpoint_dir return # Export inference model. output_path = os.path.join( tf.compat.as_bytes(FLAGS.output_dir), tf.compat.as_bytes(str(FLAGS.model_version))) print 'Exporting trained model to', output_path builder = tf.saved_model.builder.SavedModelBuilder(output_path) # Build the signature_def_map. classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( serialized_tf_example) classes_output_tensor_info = tf.saved_model.utils.build_tensor_info( classes) scores_output_tensor_info = tf.saved_model.utils.build_tensor_info(values) classification_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={ tf.saved_model.signature_constants.CLASSIFY_INPUTS: classify_inputs_tensor_info }, outputs={ tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES: classes_output_tensor_info, tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: scores_output_tensor_info }, method_name=tf.saved_model.signature_constants. CLASSIFY_METHOD_NAME)) predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(jpegs) prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={'images': predict_inputs_tensor_info}, outputs={ 'classes': classes_output_tensor_info, 'scores': scores_output_tensor_info }, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME )) legacy_init_op = tf.group( tf.tables_initializer(), name='legacy_init_op') builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ 'predict_images': prediction_signature, tf.saved_model.signature_constants. DEFAULT_SERVING_SIGNATURE_DEF_KEY: classification_signature, }, legacy_init_op=legacy_init_op) builder.save() print 'Successfully exported model to %s' % FLAGS.output_dir
def evaluate(dataset): """Evaluate model on Dataset for a number of steps.""" with tf.Graph().as_default(): # Get images and labels from the dataset. images, labels = image_processing.inputs(dataset) # Number of classes in the Dataset label set plus 1. # Label 0 is reserved for an (unused) background class. num_classes = dataset.num_classes() + 1 # Build a Graph that computes the logits predictions from the # inference model. logits, _ = inception.inference(images, num_classes) pred = tf.nn.softmax(logits) top_1_op = tf.nn.in_top_k(logits, labels, 1) # Calculate predictions. # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( inception.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: if os.path.isabs(ckpt.model_checkpoint_path): # Restores from checkpoint with absolute path. saver.restore(sess, ckpt.model_checkpoint_path) else: # Restores from checkpoint with relative path. saver.restore( sess, os.path.join(FLAGS.checkpoint_dir, ckpt.model_checkpoint_path)) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] print('Succesfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found') return # Start the queue runners. coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend( qr.create_threads(sess, coord=coord, daemon=True, start=True)) num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) # Counts the number of correct predictions. test_acc = 0.0 count_top_1 = 0 confusion_m_all = [] total_sample_count = num_iter * FLAGS.batch_size step = 0 print('%s: starting evaluation on (%s).' % (datetime.now(), FLAGS.subset)) start_time = time.time() while step < num_iter and not coord.should_stop(): pred, labels, top_1 = sess.run([pred, labels, top_1_op]) print(pred.shape) print(labels.shape) #correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(labels, 1)) correct_pred = np.equal(np.argmax(pred, 1), labels) #print (correct_pred) test_acc += np.sum(correct_pred.astype(float)) confu_m = confusion_matrix(labels, np.argmax( pred, 1)) #(np.argmax(labels,1), np.argmax(pred,1)) confusion_m_all.append(confu_m) #top_1, top_5 = sess.run([top_1_op, top_5_op]) count_top_1 += np.sum(top_1) #count_top_5 += np.sum(top_5) step += 1 ''' if step % 20 == 0: duration = time.time() - start_time sec_per_batch = duration / 20.0 examples_per_sec = FLAGS.batch_size / sec_per_batch print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f' 'sec/batch)' % (datetime.now(), step, num_iter, examples_per_sec, sec_per_batch)) start_time = time.time() ''' # Compute precision @ 1 ''' precision_at_1 = count_top_1 / total_sample_count #recall_at_5 = count_top_5 / total_sample_count print('%s: precision @ 1 = %.4f [%d examples]' % (datetime.now(), precision_at_1, total_sample_count)) ''' print(confusion_m_all.shape) exit() confusion_m_average = np.sum(confusion_m_all, axis=0) print(confusion_m_average) test_acc = float(test_acc) / float(total_sample_count) print("Test Accuracy: {} \n".format(test_acc)) summary = tf.Summary() summary.ParseFromString(sess.run(summary_op)) summary.value.add(tag='Precision @ 1', simple_value=precision_at_1) #summary.value.add(tag='Recall @ 5', simple_value=recall_at_5) summary_writer.add_summary(summary, global_step) except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. # TODO(b/27776734): Add batching support. jpegs = tf.placeholder(tf.string, shape=(1)) image_buffer = tf.squeeze(jpegs, [0]) # Decode the string as an RGB JPEG. # Note that the resulting image contains an unknown height and width # that is set dynamically by decode_jpeg. In other words, the height # and width of image is unknown at compile-time. image = tf.image.decode_jpeg(image_buffer, channels=3) # After this point, all image pixels reside in [0,1) # until the very end, when they're rescaled to (-1, 1). The various # adjust_* ops all require this range for dtype float. image = tf.image.convert_image_dtype(image, dtype=tf.float32) # Crop the central region of the image with an area containing 87.5% of # the original image. image = tf.image.central_crop(image, central_fraction=0.875) # Resize the image to the original height and width. image = tf.expand_dims(image, 0) image = tf.image.resize_bilinear(image, [FLAGS.image_size, FLAGS.image_size], align_corners=False) image = tf.squeeze(image, [0]) # Finally, rescale to [-1,1] instead of [0, 1) image = tf.sub(image, 0.5) image = tf.mul(image, 2.0) images = tf.expand_dims(image, 0) # Run inference. logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. class_tensor = tf.constant([texts[s] for s in synsets]) classes = tf.contrib.lookup.index_to_string(tf.to_int64(indices), mapping=class_tensor) # Restore variables from training checkpoint. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: # Restore variables from training checkpoints. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) return # Export inference model. init_op = tf.group(tf.initialize_all_tables(), name='init_op') model_exporter = exporter.Exporter(saver) signature = exporter.classification_signature( input_tensor=jpegs, classes_tensor=classes, scores_tensor=values) model_exporter.init(default_graph_signature=signature, init_op=init_op) model_exporter.export(FLAGS.export_dir, tf.constant(global_step), sess) print('Successfully exported model to %s' % FLAGS.export_dir)
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = { 'image/encoded': tf.FixedLenFeature(shape=[], dtype=tf.string), } tf_example = tf.parse_example(serialized_tf_example, feature_configs) jpegs = tf_example['image/encoded'] images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) table = tf.contrib.lookup.index_to_string_table_from_tensor( class_tensor) classes = table.lookup(tf.to_int64(indices)) # Restore variables from training checkpoint. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: # Restore variables from training checkpoints. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) return # Export inference model. init_op = tf.group(tf.tables_initializer(), name='init_op') classification_signature = exporter.classification_signature( input_tensor=serialized_tf_example, classes_tensor=classes, scores_tensor=values) named_graph_signature = { 'inputs': exporter.generic_signature({'images': jpegs}), 'outputs': exporter.generic_signature({ 'classes': classes, 'scores': values }) } model_exporter = exporter.Exporter(saver) model_exporter.init( init_op=init_op, default_graph_signature=classification_signature, named_graph_signatures=named_graph_signature) model_exporter.export(FLAGS.export_dir, tf.constant(global_step), sess) print('Successfully exported model to %s' % FLAGS.export_dir)
def spatial_tranform(images0, batch_size, subset, loc_net, xform_reg): images1 = tf.pack([ tf.image.resize_images(i, 299, 299) for i in images0 ]) with tf.name_scope(None): images1 = tf.identity(images1, name='input_stn') with tf.variable_scope('loc_net') as scope: if loc_net == 'fc': print 'using fully connected localization network' theta = loc_net_fc(images1, batch_size) if loc_net == 'conv': print 'using convolutional localization network' theta = loc_net_conv(images1, batch_size) if loc_net == 'inception': print 'using inception localization network' theta, _ = inception_model.inference( images = (images1-128)/128., num_classes = 3, for_training = (subset == 'train'), restore_logits = (subset != 'train') ) theta = tf.nn.tanh(theta) with tf.name_scope(None): theta = tf.identity(theta, name='theta') tf.histogram_summary('theta/zoom', theta[:,0]) tf.histogram_summary('theta/pan_horizontal', theta[:,1]) tf.histogram_summary('theta/pan_vertical', theta[:,2]) if subset == 'train': with tf.name_scope(None): theta_loss = tf.nn.l2_loss(theta, name='theta_loss') tf.scalar_summary('theta_loss', theta_loss) tf.add_to_collection('regularization_losses', xform_reg*theta_loss) images2 = [] for i in range(batch_size): s, dx, dy = (theta[i,0]+1)/2, theta[i,1], theta[i,2] th = tf.pack([s, 0, dx, 0, s, dy]) u = images0[i] u, th = tf.expand_dims(u, 0), tf.expand_dims(th, 0) dsf = tf.cast(tf.shape(u)[1], 'float32') / 299 v = spatial_transformer.transformer(u, th, dsf) v = tf.image.resize_images(v[0,:,:,:], 299, 299) v.set_shape([299, 299, 3]) images2.append(v) images2 = tf.pack(images2) images12 = tf.concat(2, [images1, images2]) blkbar = tf.zeros([batch_size, 299/2, 299*2, 3]) whtbar = 255*tf.ones([batch_size, 299/2, 299*2, 3]) images12 = tf.concat(1, [whtbar, images12, whtbar]) images12 = tf.clip_by_value(images12, 0, 255) tf.image_summary('xform_pairs', images12, max_images=batch_size) return images2
def export(): # # Create index->synset mapping # synsets = [] # with open(SYNSET_FILE) as f: # synsets = f.read().splitlines() # # Create synset->metadata mapping # texts = {} # with open(METADATA_FILE) as f: # for line in f.read().splitlines(): # parts = line.split('\t') # assert len(parts) == 2 # texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = { 'image/encoded': tf.FixedLenFeature(shape=[], dtype=tf.string), } tf_example = tf.parse_example(serialized_tf_example, feature_configs) jpegs = tf_example['image/encoded'] images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(logits, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) table = tf.contrib.lookup.index_to_string_table_from_tensor( class_tensor) classes = table.lookup(tf.to_int64(indices)) # Restore variables from training checkpoint. variable_averages = tf.train.ExponentialMovingAverage( inception_model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: # Restore variables from training checkpoints. ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] print('Successfully loaded model from %s at step=%s.' % (ckpt.model_checkpoint_path, global_step)) else: print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) return # Export inference model. output_path = os.path.join( tf.compat.as_bytes(FLAGS.output_dir), tf.compat.as_bytes(str(FLAGS.model_version))) print('Exporting trained model to', output_path) builder = tf.saved_model.builder.SavedModelBuilder(output_path) # Build the signature_def_map. classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( serialized_tf_example) classes_output_tensor_info = tf.saved_model.utils.build_tensor_info( classes) scores_output_tensor_info = tf.saved_model.utils.build_tensor_info( values) classification_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={ tf.saved_model.signature_constants.CLASSIFY_INPUTS: classify_inputs_tensor_info }, outputs={ tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES: classes_output_tensor_info, tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: scores_output_tensor_info }, method_name=tf.saved_model.signature_constants. CLASSIFY_METHOD_NAME)) predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( jpegs) prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={'images': predict_inputs_tensor_info}, outputs={ 'classes': classes_output_tensor_info, 'scores': scores_output_tensor_info }, method_name=tf.saved_model.signature_constants. PREDICT_METHOD_NAME)) legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ 'predict_images': prediction_signature, tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: classification_signature, }, legacy_init_op=legacy_init_op) builder.save() print('Successfully exported model to %s' % FLAGS.output_dir)