Exemple #1
0
def _get_init_fn():

	if FLAGS.checkpoint_path is None:
		return None

	exclusions = []
	
	if FLAGS.checkpoint_exclude_scopes:
		exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')]

  # TODO(sguada) variables.filter_variables()
	variables_to_restore = []

	for var in slim.get_model_variables():
		
		#print var.op.name
		excluded = False
		for exclusion in exclusions:
			if var.op.name.startswith(exclusion):
				excluded = True
				break
		if not excluded:
			variables_to_restore.append(var)

	tf.logging.info('Fine-tuning from %s' % FLAGS.checkpoint_path)

	return slim.assign_from_checkpoint_fn(FLAGS.checkpoint_path,variables_to_restore,ignore_missing_vars=False)
Exemple #2
0
def _get_init_fn():
	"""Returns a function run by the chief worker to warm-start the training.
	Note that the init_fn is only run when initializing the model during the very
	first global step.
	Returns:
	An init function run by the supervisor.
	"""
	if FLAGS.checkpoint_path is None:
		return None

	exclusions = []
	
	if FLAGS.checkpoint_exclude_scopes:
		exclusions = [scope.strip() for scope in FLAGS.checkpoint_exclude_scopes.split(',')]

  # TODO(sguada) variables.filter_variables()
	variables_to_restore = []

	for var in slim.get_model_variables():
		
		#print var.op.name
		excluded = False
		for exclusion in exclusions:
			if var.op.name.startswith(exclusion):
				excluded = True
				break
		if not excluded:
			variables_to_restore.append(var)

	tf.logging.info('Fine-tuning from %s' % FLAGS.checkpoint_path)

	return slim.assign_from_checkpoint_fn(FLAGS.checkpoint_path,variables_to_restore,ignore_missing_vars=False)
Exemple #3
0
def main():
    model = config.get('config', 'model')
    cachedir = utils.get_cachedir(config)
    with open(os.path.join(cachedir, 'names'), 'r') as f:
        names = [line.strip() for line in f]
    width = config.getint(model, 'width')
    height = config.getint(model, 'height')
    yolo = importlib.import_module('model.' + model)
    cell_width, cell_height = utils.calc_cell_width_height(config, width, height)
    tf.logging.info('(width, height)=(%d, %d), (cell_width, cell_height)=(%d, %d)' % (width, height, cell_width, cell_height))
    with tf.Session() as sess:
        paths = [os.path.join(cachedir, profile + '.tfrecord') for profile in args.profile]
        num_examples = sum(sum(1 for _ in tf.python_io.tf_record_iterator(path)) for path in paths)
        tf.logging.warn('num_examples=%d' % num_examples)
        image_rgb, labels = utils.data.load_image_labels(paths, len(names), width, height, cell_width, cell_height, config)
        image_std = tf.image.per_image_standardization(image_rgb)
        image_rgb = tf.cast(image_rgb, tf.uint8)
        ph_image = tf.placeholder(image_std.dtype, [1] + image_std.get_shape().as_list(), name='ph_image')
        global_step = tf.contrib.framework.get_or_create_global_step()
        builder = yolo.Builder(args, config)
        builder(ph_image)
        variables_to_restore = slim.get_variables_to_restore()
        ph_labels = [tf.placeholder(l.dtype, [1] + l.get_shape().as_list(), name='ph_' + l.op.name) for l in labels]
        with tf.name_scope('total_loss') as name:
            builder.create_objectives(ph_labels)
            total_loss = tf.losses.get_total_loss(name=name)
        tf.global_variables_initializer().run()
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord)
        _image_rgb, _image_std, _labels = sess.run([image_rgb, image_std, labels])
        coord.request_stop()
        coord.join(threads)
        feed_dict = dict([(ph, np.expand_dims(d, 0)) for ph, d in zip(ph_labels, _labels)])
        feed_dict[ph_image] = np.expand_dims(_image_std, 0)
        logdir = utils.get_logdir(config)
        assert os.path.exists(logdir)
        model_path = tf.train.latest_checkpoint(logdir)
        tf.logging.info('load ' + model_path)
        slim.assign_from_checkpoint_fn(model_path, variables_to_restore)(sess)
        tf.logging.info('global_step=%d' % sess.run(global_step))
        tf.logging.info('total_loss=%f' % sess.run(total_loss, feed_dict))
        _ = Drawer(sess, names, builder.model.cell_width, builder.model.cell_height, _image_rgb, _labels, builder.model, feed_dict)
        plt.show()
Exemple #4
0
def main():
    model = config.get('config', 'model')
    yolo = importlib.import_module('model.' + model)
    width = config.getint(model, 'width')
    height = config.getint(model, 'height')
    preprocess = getattr(importlib.import_module('detect'), args.preprocess)
    with tf.Session() as sess:
        ph_image = tf.placeholder(tf.float32, [1, height, width, 3], name='ph_image')
        builder = yolo.Builder(args, config)
        builder(ph_image)
        global_step = tf.contrib.framework.get_or_create_global_step()
        model_path = tf.train.latest_checkpoint(utils.get_logdir(config))
        tf.logging.info('load ' + model_path)
        slim.assign_from_checkpoint_fn(model_path, tf.global_variables())(sess)
        tf.logging.info('global_step=%d' % sess.run(global_step))
        tensors = [builder.model.conf, builder.model.xy_min, builder.model.xy_max]
        tensors = [tf.check_numerics(t, t.op.name) for t in tensors]
        cap = cv2.VideoCapture(0)
        try:
            while True:
                ret, image_bgr = cap.read()
                assert ret
                image_height, image_width, _ = image_bgr.shape
                scale = [image_width / builder.model.cell_width, image_height / builder.model.cell_height]
                image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
                image_std = np.expand_dims(preprocess(cv2.resize(image_rgb, (width, height))).astype(np.float32), 0)
                feed_dict = {ph_image: image_std}
                conf, xy_min, xy_max = sess.run(tensors, feed_dict)
                boxes = utils.postprocess.non_max_suppress(conf[0], xy_min[0], xy_max[0], args.threshold, args.threshold_iou)
                for _conf, _xy_min, _xy_max in boxes:
                    index = np.argmax(_conf)
                    if _conf[index] > args.threshold:
                        _xy_min = (_xy_min * scale).astype(np.int)
                        _xy_max = (_xy_max * scale).astype(np.int)
                        cv2.rectangle(image_bgr, tuple(_xy_min), tuple(_xy_max), (255, 0, 255), 3)
                        cv2.putText(image_bgr, builder.names[index] + ' (%.1f%%)' % (_conf[index] * 100), tuple(_xy_min), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
                cv2.imshow('detection', image_bgr)
                cv2.waitKey(1)
        finally:
            cv2.destroyAllWindows()
            cap.release()
def _get_init_fn():
  """Returns a function run by the chief worker to warm-start the training.

  Note that the init_fn is only run when initializing the model during the very
  first global step.

  Returns:
    An init function run by the supervisor.
  """
  if FLAGS.checkpoint_path is None:
    return None

  # Warn the user if a checkpoint exists in the train_dir. Then we'll be
  # ignoring the checkpoint anyway.
  if tf.train.latest_checkpoint(FLAGS.train_dir):
    tf.logging.info(
      'Ignoring --checkpoint_path because a checkpoint already exists in %s'
      % FLAGS.train_dir)
    return None

  exclusions = []
  if FLAGS.checkpoint_exclude_scopes:
    exclusions = [scope.strip()
                  for scope in FLAGS.checkpoint_exclude_scopes.split(',')]

  # TODO(sguada) variables.filter_variables()
  variables_to_restore = []
  for var in slim.get_model_variables():
    excluded = False
    for exclusion in exclusions:
      if var.op.name.startswith(exclusion):
        excluded = True
        break
    if not excluded:
      variables_to_restore.append(var)

  if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
    checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
  else:
    checkpoint_path = FLAGS.checkpoint_path

  tf.logging.info('Fine-tuning from %s' % checkpoint_path)

  return slim.assign_from_checkpoint_fn(
    checkpoint_path,
    variables_to_restore,
    ignore_missing_vars=FLAGS.ignore_missing_vars)
def get_init_fn():
    """Returns a function run by the chief worker to warm-start the training."""
    checkpoint_exclude_scopes=["InceptionV1/Logits"]

    exclusions = [scope.strip() for scope in checkpoint_exclude_scopes]
    checkpoints_dir = "D:\\zero\\work\\models-master\\model\\"
    variables_to_restore = []
    for var in slim.get_model_variables():
        excluded = False
        for exclusion in exclusions:
            if var.op.name.startswith(exclusion):
                excluded = True
                break
        if not excluded:
            variables_to_restore.append(var)

    return slim.assign_from_checkpoint_fn(
      os.path.join(checkpoints_dir, 'inception_v1.ckpt'),
      variables_to_restore)
Exemple #7
0
def ing_models(extracted_img_list):
    # VARIABLES
    # CKPT_PATH = "cosmetic-300/cosmetic-300"
    CKPT_PATH = os.path.join(settings.BASE_DIR, 'cosmetic-300/cosmetic-300')
    MEAN_PIXEL = [123.68, 116.78, 103.94]
    NCLASS = 12

    inputs = tf.placeholder(tf.float32, [None, 224, 224, 3])
    is_training = tf.placeholder(tf.bool)

    # MODEL PREPARATION
    with slim.arg_scope(resnet.resnet_arg_scope()):
        logit, model = resnet.resnet_v1_50(inputs,
                                           num_classes=NCLASS,
                                           is_training=is_training)

    init_fn = slim.assign_from_checkpoint_fn(CKPT_PATH,
                                             slim.get_variables_to_restore(),
                                             ignore_missing_vars=True)

    # CREATE SESSION
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    init_fn(sess)

    predict_list = []
    if extracted_img_list:
        for extracted_img in extracted_img_list:
            x = Pil_image.open(extracted_img.image.path)
            x = x.convert('RGB')
            x = x.resize((224, 224))
            x = np.array(x)
            x = x.astype(np.float32) - MEAN_PIXEL
            predict_images = []
            predict_images.append(x)
            predict_images = np.array(predict_images)
            predict = sess.run(model["predictions"],
                               feed_dict={
                                   inputs: predict_images,
                                   is_training: False
                               })
            predict = np.argmax(predict, 1)
            predict_list.append(predict)

    category_list = []
    if predict_list:
        for predict in predict_list:
            category = Category.objects.get(id=(int(predict)))
            category_list.append(category)

    nickname_id_list = []
    if category_list:
        for category in category_list:
            nickname_queryset = Nickname.objects.filter(category=category)
            if nickname_queryset:
                nickname_id = nickname_queryset[random.randrange(
                    0, len(nickname_queryset))].id
                nickname_id_list.append(nickname_id)

    nickname_list = Nickname.objects.filter(id__in=nickname_id_list)
    if nickname_list:
        for extracted_img, assigned_nickname in zip(extracted_img_list,
                                                    nickname_list):
            extracted_img.nickname = assigned_nickname
            extracted_img.save()

    return nickname_list
Exemple #8
0
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode):
  assert(args.arch.multi_scale), 'removed support for old single scale code.'
  # Set up the model.
  tf.set_random_seed(args.solver.seed)
  task_params = args.navtask.task_params

  batch_norm_is_training_op = \
      tf.placeholder_with_default(batch_norm_is_training, shape=[],
                                  name='batch_norm_is_training_op') 

  # Setup the inputs
  m.input_tensors = {}
  m.train_ops = {}
  m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \
      _inputs(task_params)

  m.init_fn = None

  if task_params.input_type == 'vision':
    m.vision_ops = get_map_from_images(
        m.input_tensors['step']['imgs'], args.mapper_arch,
        task_params, args.solver.freeze_conv,
        args.solver.wt_decay, is_training, batch_norm_is_training_op,
        num_maps=len(task_params.map_crop_sizes))

    # Load variables from snapshot if needed.
    if args.solver.pretrained_path is not None:
      m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path,
                                                 m.vision_ops.vars_to_restore)

    # Set up caching of vision features if needed.
    if args.solver.freeze_conv:
      m.train_ops['step_data_cache'] = [m.vision_ops.encoder_output]
    else:
      m.train_ops['step_data_cache'] = []

    # Set up blobs that are needed for the computation in rest of the graph.
    m.ego_map_ops = m.vision_ops.fss_logits
    m.coverage_ops = m.vision_ops.confs_probs
    
    # Zero pad these to make them same size as what the planner expects.
    for i in range(len(m.ego_map_ops)):
      if args.mapper_arch.pad_map_with_zeros_each[i] > 0:
        paddings = np.zeros((5,2), dtype=np.int32)
        paddings[2:4,:] = args.mapper_arch.pad_map_with_zeros_each[i]
        paddings_op = tf.constant(paddings, dtype=tf.int32)
        m.ego_map_ops[i] = tf.pad(m.ego_map_ops[i], paddings=paddings_op)
        m.coverage_ops[i] = tf.pad(m.coverage_ops[i], paddings=paddings_op)
  
  elif task_params.input_type == 'analytical_counts':
    m.ego_map_ops = []; m.coverage_ops = []
    for i in range(len(task_params.map_crop_sizes)):
      ego_map_op = m.input_tensors['step']['analytical_counts_{:d}'.format(i)]
      coverage_op = tf.cast(tf.greater_equal(
          tf.reduce_max(ego_map_op, reduction_indices=[4],
                        keep_dims=True), 1), tf.float32)
      coverage_op = tf.ones_like(ego_map_op) * coverage_op
      m.ego_map_ops.append(ego_map_op)
      m.coverage_ops.append(coverage_op)
      m.train_ops['step_data_cache'] = []
  
  num_steps = task_params.num_steps
  num_goals = task_params.num_goals

  map_crop_size_ops = []
  for map_crop_size in task_params.map_crop_sizes:
    map_crop_size_ops.append(tf.constant(map_crop_size, dtype=tf.int32, shape=(2,)))

  with tf.name_scope('check_size'):
    is_single_step = tf.equal(tf.unstack(tf.shape(m.ego_map_ops[0]), num=5)[1], 1)

  fr_ops = []; value_ops = [];
  fr_intermediate_ops = []; value_intermediate_ops = [];
  crop_value_ops = [];
  resize_crop_value_ops = [];
  confs = []; occupancys = [];

  previous_value_op = None
  updated_state = []; state_names = [];

  for i in range(len(task_params.map_crop_sizes)):
    map_crop_size = task_params.map_crop_sizes[i]
    with tf.variable_scope('scale_{:d}'.format(i)): 
      # Accumulate the map.
      fn = lambda ns: running_combine(
             m.ego_map_ops[i],
             m.coverage_ops[i],
             m.input_tensors['step']['incremental_locs'] * task_params.map_scales[i],
             m.input_tensors['step']['incremental_thetas'],
             m.input_tensors['step']['running_sum_num_{:d}'.format(i)],
             m.input_tensors['step']['running_sum_denom_{:d}'.format(i)],
             m.input_tensors['step']['running_max_denom_{:d}'.format(i)],
             map_crop_size, ns)

      running_sum_num, running_sum_denom, running_max_denom = \
          tf.cond(is_single_step, lambda: fn(1), lambda: fn(num_steps*num_goals))
      updated_state += [running_sum_num, running_sum_denom, running_max_denom]
      state_names += ['running_sum_num_{:d}'.format(i),
                      'running_sum_denom_{:d}'.format(i),
                      'running_max_denom_{:d}'.format(i)]

      # Concat the accumulated map and goal
      occupancy = running_sum_num / tf.maximum(running_sum_denom, 0.001)
      conf = running_max_denom
      # print occupancy.get_shape().as_list()

      # Concat occupancy, how much occupied and goal.
      with tf.name_scope('concat'):
        sh = [-1, map_crop_size, map_crop_size, task_params.map_channels]
        occupancy = tf.reshape(occupancy, shape=sh)
        conf = tf.reshape(conf, shape=sh)

        sh = [-1, map_crop_size, map_crop_size, task_params.goal_channels]
        goal = tf.reshape(m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)], shape=sh)
        to_concat = [occupancy, conf, goal]

        if previous_value_op is not None:
          to_concat.append(previous_value_op)

        x = tf.concat(to_concat, 3)

      # Pass the map, previous rewards and the goal through a few convolutional
      # layers to get fR.
      fr_op, fr_intermediate_op = fr_v2(
         x, output_neurons=args.arch.fr_neurons,
         inside_neurons=args.arch.fr_inside_neurons,
         is_training=batch_norm_is_training_op, name='fr',
         wt_decay=args.solver.wt_decay, stride=args.arch.fr_stride)

      # Do Value Iteration on the fR
      if args.arch.vin_num_iters > 0:
        value_op, value_intermediate_op = value_iteration_network(
            fr_op, num_iters=args.arch.vin_num_iters,
            val_neurons=args.arch.vin_val_neurons,
            action_neurons=args.arch.vin_action_neurons,
            kernel_size=args.arch.vin_ks, share_wts=args.arch.vin_share_wts,
            name='vin', wt_decay=args.solver.wt_decay)
      else:
        value_op = fr_op
        value_intermediate_op = []

      # Crop out and upsample the previous value map.
      remove = args.arch.crop_remove_each
      if remove > 0:
        crop_value_op = value_op[:, remove:-remove, remove:-remove,:]
      else:
        crop_value_op = value_op
      crop_value_op = tf.reshape(crop_value_op, shape=[-1, args.arch.value_crop_size,
                                                       args.arch.value_crop_size,
                                                       args.arch.vin_val_neurons])
      if i < len(task_params.map_crop_sizes)-1:
        # Reshape it to shape of the next scale.
        previous_value_op = tf.image.resize_bilinear(crop_value_op,
                                                     map_crop_size_ops[i+1],
                                                     align_corners=True)
        resize_crop_value_ops.append(previous_value_op)
      
      occupancys.append(occupancy)
      confs.append(conf)
      value_ops.append(value_op)
      crop_value_ops.append(crop_value_op)
      fr_ops.append(fr_op)
      fr_intermediate_ops.append(fr_intermediate_op)
  
  m.value_ops = value_ops
  m.value_intermediate_ops = value_intermediate_ops
  m.fr_ops = fr_ops
  m.fr_intermediate_ops = fr_intermediate_ops
  m.final_value_op = crop_value_op
  m.crop_value_ops = crop_value_ops
  m.resize_crop_value_ops = resize_crop_value_ops
  m.confs = confs
  m.occupancys = occupancys

  sh = [-1, args.arch.vin_val_neurons*((args.arch.value_crop_size)**2)]
  m.value_features_op = tf.reshape(m.final_value_op, sh, name='reshape_value_op')
  
  # Determine what action to take.
  with tf.variable_scope('action_pred'):
    batch_norm_param = args.arch.pred_batch_norm_param
    if batch_norm_param is not None:
      batch_norm_param['is_training'] = batch_norm_is_training_op
    m.action_logits_op, _ = tf_utils.fc_network(
        m.value_features_op, neurons=args.arch.pred_neurons,
        wt_decay=args.solver.wt_decay, name='pred', offset=0,
        num_pred=task_params.num_actions,
        batch_norm_param=batch_norm_param) 
    m.action_prob_op = tf.nn.softmax(m.action_logits_op)

  init_state = tf.constant(0., dtype=tf.float32, shape=[
      task_params.batch_size, 1, map_crop_size, map_crop_size,
      task_params.map_channels])

  m.train_ops['state_names'] = state_names
  m.train_ops['updated_state'] = updated_state
  m.train_ops['init_state'] = [init_state for _ in updated_state]

  m.train_ops['step'] = m.action_prob_op
  m.train_ops['common'] = [m.input_tensors['common']['orig_maps'],
                           m.input_tensors['common']['goal_loc']]
  m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op
  m.loss_ops = []; m.loss_ops_names = [];

  if args.arch.readout_maps:
    with tf.name_scope('readout_maps'):
      all_occupancys = tf.concat(m.occupancys + m.confs, 3)
      readout_maps, probs = readout_general(
          all_occupancys, num_neurons=args.arch.rom_arch.num_neurons,
          strides=args.arch.rom_arch.strides, 
          layers_per_block=args.arch.rom_arch.layers_per_block, 
          kernel_size=args.arch.rom_arch.kernel_size,
          batch_norm_is_training_op=batch_norm_is_training_op,
          wt_decay=args.solver.wt_decay)

      gt_ego_maps = [m.input_tensors['step']['readout_maps_{:d}'.format(i)]
                     for i in range(len(task_params.readout_maps_crop_sizes))]
      m.readout_maps_gt = tf.concat(gt_ego_maps, 4)
      gt_shape = tf.shape(m.readout_maps_gt)
      m.readout_maps_logits = tf.reshape(readout_maps, gt_shape)
      m.readout_maps_probs = tf.reshape(probs, gt_shape)

      # Add a loss op
      m.readout_maps_loss_op = tf.losses.sigmoid_cross_entropy(
          tf.reshape(m.readout_maps_gt, [-1, len(task_params.readout_maps_crop_sizes)]), 
          tf.reshape(readout_maps, [-1, len(task_params.readout_maps_crop_sizes)]),
          scope='loss')
      m.readout_maps_loss_op = 10.*m.readout_maps_loss_op

  ewma_decay = 0.99 if is_training else 0.0
  weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32,
                        name='weight')
  m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \
    compute_losses_multi_or(m.action_logits_op,
                            m.input_tensors['train']['action'], weights=weight,
                            num_actions=task_params.num_actions,
                            data_loss_wt=args.solver.data_loss_wt,
                            reg_loss_wt=args.solver.reg_loss_wt,
                            ewma_decay=ewma_decay)
  
  if args.arch.readout_maps:
    m.total_loss_op = m.total_loss_op + m.readout_maps_loss_op
    m.loss_ops += [m.readout_maps_loss_op]
    m.loss_ops_names += ['readout_maps_loss']

  m.loss_ops += [m.reg_loss_op, m.data_loss_op, m.total_loss_op]
  m.loss_ops_names += ['reg_loss', 'data_loss', 'total_loss']

  if args.solver.freeze_conv:
    vars_to_optimize = list(set(tf.trainable_variables()) -
                            set(m.vision_ops.vars_to_restore))
  else:
    vars_to_optimize = None

  m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \
  m.sync_optimizer = tf_utils.setup_training(
      m.total_loss_op, 
      args.solver.initial_learning_rate, 
      args.solver.steps_per_decay,
      args.solver.learning_rate_decay, 
      args.solver.momentum,
      args.solver.max_steps, 
      args.solver.sync, 
      args.solver.adjust_lr_sync,
      args.solver.num_workers, 
      args.solver.task,
      vars_to_optimize=vars_to_optimize,
      clip_gradient_norm=args.solver.clip_gradient_norm,
      typ=args.solver.typ, momentum2=args.solver.momentum2,
      adam_eps=args.solver.adam_eps)

  if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay':
    m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k,
                                                         m.global_step_op)
  elif args.arch.sample_gt_prob_type == 'zero':
    m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32)

  elif args.arch.sample_gt_prob_type.split('_')[0] == 'step':
    step = int(args.arch.sample_gt_prob_type.split('_')[1])
    m.sample_gt_prob_op = tf_utils.step_gt_prob(
        step, m.input_tensors['step']['step_number'][0,0,0])

  m.sample_action_type = args.arch.action_sample_type
  m.sample_action_combine_type = args.arch.action_sample_combine_type

  m.summary_ops = {
      summary_mode: _add_summaries(m, args, summary_mode,
                                   args.summary.arop_full_summary_iters)}

  m.init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
  m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4,
                              write_version=tf.train.SaverDef.V2)
  return m
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode):
    # Set up the model.
    tf.set_random_seed(args.solver.seed)
    task_params = args.navtask.task_params
    num_steps = task_params.num_steps
    num_goals = task_params.num_goals
    num_actions = task_params.num_actions
    num_actions_ = num_actions

    n_views = task_params.n_views

    batch_norm_is_training_op = \
        tf.placeholder_with_default(batch_norm_is_training, shape=[],
                                    name='batch_norm_is_training_op')
    # Setup the inputs
    m.input_tensors = {}
    lstm_states = []
    lstm_state_dims = []
    state_names = []
    updated_state_ops = []
    init_state_ops = []
    if args.arch.lstm_output:
        lstm_states += ['lstm_output']
        lstm_state_dims += [
            args.arch.lstm_output_dim + task_params.num_actions
        ]
    if args.arch.lstm_ego:
        lstm_states += ['lstm_ego']
        lstm_state_dims += [args.arch.lstm_ego_dim + args.arch.lstm_ego_out]
        lstm_states += ['lstm_img']
        lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out]
    elif args.arch.lstm_img:
        # An LSTM only on the image
        lstm_states += ['lstm_img']
        lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out]
    else:
        # No LSTMs involved here.
        None

    m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train_bkp'] = \
        _inputs(task_params, lstm_states, lstm_state_dims)

    with tf.name_scope('check_size'):
        is_single_step = tf.equal(
            tf.unstack(tf.shape(m.input_tensors['step']['imgs']), num=6)[1], 1)

    images_reshaped = tf.reshape(m.input_tensors['step']['imgs'],
                                 shape=[
                                     -1, task_params.img_height,
                                     task_params.img_width,
                                     task_params.img_channels
                                 ],
                                 name='re_image')

    rel_goal_loc_reshaped = tf.reshape(
        m.input_tensors['step']['rel_goal_loc'],
        shape=[-1, task_params.rel_goal_loc_dim],
        name='re_rel_goal_loc')

    x, vars_ = get_repr_from_image(images_reshaped, task_params.modalities,
                                   task_params.data_augment, args.arch.encoder,
                                   args.solver.freeze_conv,
                                   args.solver.wt_decay, is_training)

    # Reshape into nice things so that these can be accumulated over time steps
    # for faster backprop.
    sh_before = x.get_shape().as_list()
    m.encoder_output = tf.reshape(x,
                                  shape=[task_params.batch_size, -1, n_views] +
                                  sh_before[1:])
    x = tf.reshape(m.encoder_output, shape=[-1] + sh_before[1:])

    # Add a layer to reduce dimensions for a fc layer.
    if args.arch.dim_reduce_neurons > 0:
        ks = 1
        neurons = args.arch.dim_reduce_neurons
        init_var = np.sqrt(2.0 / (ks**2) / neurons)
        batch_norm_param = args.arch.batch_norm_param
        batch_norm_param['is_training'] = batch_norm_is_training_op
        m.conv_feat = slim.conv2d(
            x,
            neurons,
            kernel_size=ks,
            stride=1,
            normalizer_fn=slim.batch_norm,
            normalizer_params=batch_norm_param,
            padding='SAME',
            scope='dim_reduce',
            weights_regularizer=slim.l2_regularizer(args.solver.wt_decay),
            weights_initializer=tf.random_normal_initializer(stddev=init_var))
        reshape_conv_feat = slim.flatten(m.conv_feat)
        sh = reshape_conv_feat.get_shape().as_list()
        m.reshape_conv_feat = tf.reshape(reshape_conv_feat,
                                         shape=[-1, sh[1] * n_views])

    # Restore these from a checkpoint.
    if args.solver.pretrained_path is not None:
        m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path,
                                                   vars_)
    else:
        m.init_fn = None

    # Hit the goal_location with a bunch of fully connected layers, to embed it
    # into some space.
    with tf.variable_scope('embed_goal'):
        batch_norm_param = args.arch.batch_norm_param
        batch_norm_param['is_training'] = batch_norm_is_training_op
        m.embed_goal, _ = tf_utils.fc_network(
            rel_goal_loc_reshaped,
            neurons=args.arch.goal_embed_neurons,
            wt_decay=args.solver.wt_decay,
            name='goal_embed',
            offset=0,
            batch_norm_param=batch_norm_param,
            dropout_ratio=args.arch.fc_dropout,
            is_training=is_training)

    if args.arch.embed_goal_for_state:
        with tf.variable_scope('embed_goal_for_state'):
            batch_norm_param = args.arch.batch_norm_param
            batch_norm_param['is_training'] = batch_norm_is_training_op
            m.embed_goal_for_state, _ = tf_utils.fc_network(
                m.input_tensors['common']['rel_goal_loc_at_start'][:, 0, :],
                neurons=args.arch.goal_embed_neurons,
                wt_decay=args.solver.wt_decay,
                name='goal_embed',
                offset=0,
                batch_norm_param=batch_norm_param,
                dropout_ratio=args.arch.fc_dropout,
                is_training=is_training)

    # Hit the goal_location with a bunch of fully connected layers, to embed it
    # into some space.
    with tf.variable_scope('embed_img'):
        batch_norm_param = args.arch.batch_norm_param
        batch_norm_param['is_training'] = batch_norm_is_training_op
        m.embed_img, _ = tf_utils.fc_network(
            m.reshape_conv_feat,
            neurons=args.arch.img_embed_neurons,
            wt_decay=args.solver.wt_decay,
            name='img_embed',
            offset=0,
            batch_norm_param=batch_norm_param,
            dropout_ratio=args.arch.fc_dropout,
            is_training=is_training)

    # For lstm_ego, and lstm_image, embed the ego motion, accumulate it into an
    # LSTM, combine with image features and accumulate those in an LSTM. Finally
    # combine what you get from the image LSTM with the goal to output an action.
    if args.arch.lstm_ego:
        ego_reshaped = preprocess_egomotion(
            m.input_tensors['step']['incremental_locs'],
            m.input_tensors['step']['incremental_thetas'])
        with tf.variable_scope('embed_ego'):
            batch_norm_param = args.arch.batch_norm_param
            batch_norm_param['is_training'] = batch_norm_is_training_op
            m.embed_ego, _ = tf_utils.fc_network(
                ego_reshaped,
                neurons=args.arch.ego_embed_neurons,
                wt_decay=args.solver.wt_decay,
                name='ego_embed',
                offset=0,
                batch_norm_param=batch_norm_param,
                dropout_ratio=args.arch.fc_dropout,
                is_training=is_training)

        state_name, state_init_op, updated_state_op, out_op = lstm_setup(
            'lstm_ego', m.embed_ego, task_params.batch_size, is_single_step,
            args.arch.lstm_ego_dim, args.arch.lstm_ego_out,
            num_steps * num_goals, m.input_tensors['step']['lstm_ego'])
        state_names += [state_name]
        init_state_ops += [state_init_op]
        updated_state_ops += [updated_state_op]

        # Combine the output with the vision features.
        m.img_ego_op = combine_setup('img_ego', args.arch.combine_type_ego,
                                     m.embed_img, out_op,
                                     args.arch.img_embed_neurons[-1],
                                     args.arch.lstm_ego_out)

        # LSTM on these vision features.
        state_name, state_init_op, updated_state_op, out_op = lstm_setup(
            'lstm_img', m.img_ego_op, task_params.batch_size, is_single_step,
            args.arch.lstm_img_dim, args.arch.lstm_img_out,
            num_steps * num_goals, m.input_tensors['step']['lstm_img'])
        state_names += [state_name]
        init_state_ops += [state_init_op]
        updated_state_ops += [updated_state_op]

        m.img_for_goal = out_op
        num_img_for_goal_neurons = args.arch.lstm_img_out

    elif args.arch.lstm_img:
        # LSTM on just the image features.
        state_name, state_init_op, updated_state_op, out_op = lstm_setup(
            'lstm_img', m.embed_img, task_params.batch_size, is_single_step,
            args.arch.lstm_img_dim, args.arch.lstm_img_out,
            num_steps * num_goals, m.input_tensors['step']['lstm_img'])
        state_names += [state_name]
        init_state_ops += [state_init_op]
        updated_state_ops += [updated_state_op]
        m.img_for_goal = out_op
        num_img_for_goal_neurons = args.arch.lstm_img_out

    else:
        m.img_for_goal = m.embed_img
        num_img_for_goal_neurons = args.arch.img_embed_neurons[-1]

    if args.arch.use_visit_count:
        m.embed_visit_count = visit_count_fc(
            m.input_tensors['step']['visit_count'],
            m.input_tensors['step']['last_visit'],
            args.arch.goal_embed_neurons,
            args.solver.wt_decay,
            args.arch.fc_dropout,
            is_training=is_training)
        m.embed_goal = m.embed_goal + m.embed_visit_count

    m.combined_f = combine_setup('img_goal', args.arch.combine_type,
                                 m.img_for_goal, m.embed_goal,
                                 num_img_for_goal_neurons,
                                 args.arch.goal_embed_neurons[-1])

    # LSTM on the combined representation.
    if args.arch.lstm_output:
        name = 'lstm_output'
        # A few fully connected layers here.
        with tf.variable_scope('action_pred'):
            batch_norm_param = args.arch.batch_norm_param
            batch_norm_param['is_training'] = batch_norm_is_training_op
            x, _ = tf_utils.fc_network(m.combined_f,
                                       neurons=args.arch.pred_neurons,
                                       wt_decay=args.solver.wt_decay,
                                       name='pred',
                                       offset=0,
                                       batch_norm_param=batch_norm_param,
                                       dropout_ratio=args.arch.fc_dropout)

        if args.arch.lstm_output_init_state_from_goal:
            # Use the goal embedding to initialize the LSTM state.
            # UGLY CLUGGY HACK: if this is doing computation for a single time step
            # then this will not involve back prop, so we can use the state input from
            # the feed dict, otherwise we compute the state representation from the
            # goal and feed that in. Necessary for using goal location to generate the
            # state representation.
            m.embed_goal_for_state = tf.expand_dims(m.embed_goal_for_state,
                                                    dim=1)
            state_op = tf.cond(is_single_step,
                               lambda: m.input_tensors['step'][name],
                               lambda: m.embed_goal_for_state)
            state_name, state_init_op, updated_state_op, out_op = lstm_setup(
                name, x, task_params.batch_size, is_single_step,
                args.arch.lstm_output_dim, num_actions_, num_steps * num_goals,
                state_op)
            init_state_ops += [m.embed_goal_for_state]
        else:
            state_op = m.input_tensors['step'][name]
            state_name, state_init_op, updated_state_op, out_op = lstm_setup(
                name, x, task_params.batch_size, is_single_step,
                args.arch.lstm_output_dim, num_actions_, num_steps * num_goals,
                state_op)
            init_state_ops += [state_init_op]

        state_names += [state_name]
        updated_state_ops += [updated_state_op]

        out_op = tf.reshape(out_op, shape=[-1, num_actions_])
        if num_actions_ > num_actions:
            m.action_logits_op = out_op[:, :num_actions]
            m.baseline_op = out_op[:, num_actions:]
        else:
            m.action_logits_op = out_op
            m.baseline_op = None
        m.action_prob_op = tf.nn.softmax(m.action_logits_op)

    else:
        # A few fully connected layers here.
        with tf.variable_scope('action_pred'):
            batch_norm_param = args.arch.batch_norm_param
            batch_norm_param['is_training'] = batch_norm_is_training_op
            out_op, _ = tf_utils.fc_network(m.combined_f,
                                            neurons=args.arch.pred_neurons,
                                            wt_decay=args.solver.wt_decay,
                                            name='pred',
                                            offset=0,
                                            num_pred=num_actions_,
                                            batch_norm_param=batch_norm_param,
                                            dropout_ratio=args.arch.fc_dropout,
                                            is_training=is_training)
            if num_actions_ > num_actions:
                m.action_logits_op = out_op[:, :num_actions]
                m.baseline_op = out_op[:, num_actions:]
            else:
                m.action_logits_op = out_op
                m.baseline_op = None
            m.action_prob_op = tf.nn.softmax(m.action_logits_op)

    m.train_ops = {}
    m.train_ops['step'] = m.action_prob_op
    m.train_ops['common'] = [
        m.input_tensors['common']['orig_maps'],
        m.input_tensors['common']['goal_loc'],
        m.input_tensors['common']['rel_goal_loc_at_start']
    ]
    m.train_ops['state_names'] = state_names
    m.train_ops['init_state'] = init_state_ops
    m.train_ops['updated_state'] = updated_state_ops
    m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op

    # Flat list of ops which cache the step data.
    m.train_ops['step_data_cache'] = [tf.no_op()]

    if args.solver.freeze_conv:
        m.train_ops['step_data_cache'] = [m.encoder_output]
    else:
        m.train_ops['step_data_cache'] = []

    ewma_decay = 0.99 if is_training else 0.0
    weight = tf.ones_like(m.input_tensors['train_bkp']['action'],
                          dtype=tf.float32,
                          name='weight')

    m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \
      compute_losses_multi_or(
          m.action_logits_op, m.input_tensors['train_bkp']['action'],
          weights=weight, num_actions=num_actions,
          data_loss_wt=args.solver.data_loss_wt,
          reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay)

    if args.solver.freeze_conv:
        vars_to_optimize = list(set(tf.trainable_variables()) - set(vars_))
    else:
        vars_to_optimize = None

    m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \
    m.sync_optimizer = tf_utils.setup_training(
        m.total_loss_op,
        args.solver.initial_learning_rate,
        args.solver.steps_per_decay,
        args.solver.learning_rate_decay,
        args.solver.momentum,
        args.solver.max_steps,
        args.solver.sync,
        args.solver.adjust_lr_sync,
        args.solver.num_workers,
        args.solver.task,
        vars_to_optimize=vars_to_optimize,
        clip_gradient_norm=args.solver.clip_gradient_norm,
        typ=args.solver.typ, momentum2=args.solver.momentum2,
        adam_eps=args.solver.adam_eps)

    if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay':
        m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(
            args.arch.isd_k, m.global_step_op)
    elif args.arch.sample_gt_prob_type == 'zero':
        m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32)
    elif args.arch.sample_gt_prob_type.split('_')[0] == 'step':
        step = int(args.arch.sample_gt_prob_type.split('_')[1])
        m.sample_gt_prob_op = tf_utils.step_gt_prob(
            step, m.input_tensors['step']['step_number'][0, 0, 0])

    m.sample_action_type = args.arch.action_sample_type
    m.sample_action_combine_type = args.arch.action_sample_combine_type
    _add_summaries(m, summary_mode, args.summary.arop_full_summary_iters)

    m.init_op = tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer())
    m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4,
                                write_version=tf.train.SaverDef.V2)

    return m
Exemple #10
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    input_seg_maps = tf.placeholder(tf.float32,
                                    shape=[None, None, None, 6],
                                    name='input_score_maps')
    input_training_masks = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 1],
                                          name='input_training_masks')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.94,
                                               staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    # opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_seg_maps_split = tf.split(input_seg_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isegs = input_seg_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isegs, itms,
                                                    reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)
    gpu_options = tf.GPUOptions(allow_growth=True)
    #gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.75)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                          allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            logger.info('continue training from previous checkpoint')
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            logger.debug(ckpt)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = data_provider.get_batch(
            num_workers=FLAGS.num_readers,
            input_size=FLAGS.input_size,
            batch_size=FLAGS.batch_size_per_gpu * len(gpus))

        start = time.time()
        for step in range(FLAGS.max_steps):
            data = next(data_generator)
            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_seg_maps: data[2],
                    input_training_masks: data[3]
                })
            if np.isnan(tl):
                logger.error('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                logger.info(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess,
                           os.path.join(FLAGS.checkpoint_path, 'model.ckpt'),
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_seg_maps: data[2],
                        input_training_masks: data[3]
                    })
                summary_writer.add_summary(summary_str, global_step=step)
Exemple #11
0
    def __init__(self, flags, is_training=True):
        self.is_training = is_training
        self.preprocessing_name = (flags.preprocessing_name
                                   or flags.model_name)

        network_fn = nets_factory.get_network_fn(
            flags.model_name,
            num_classes=config.num_label,
            weight_decay=flags.weight_decay,
            is_training=is_training)
        self.image_size = network_fn.default_image_size

        self.image_ph = tf.placeholder(tf.float32,
                                       shape=(None, self.image_size,
                                              self.image_size,
                                              config.channels))
        self.label_ph = tf.placeholder(tf.float32,
                                       shape=(None, config.num_label))

        self.logits, end_points = network_fn(self.image_ph)

        if not is_training:
            return

        # global_step = tf.train.create_global_step()
        global_step = tf.train.get_global_step()
        decay_steps = int(config.train_data_size / config.train_batch_size *
                          flags.num_epochs_per_decay)
        learning_rate = tf.train.exponential_decay(
            flags.init_learning_rate,
            global_step,
            decay_steps,
            flags.learning_rate_decay_factor,
            staircase=True,
            name='exponential_decay_learning_rate')

        tf.losses.sigmoid_cross_entropy(self.label_ph, self.logits)
        losses = tf.get_collection(tf.GraphKeys.LOSSES)
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        losses.extend(regularization_losses)
        loss = tf.add_n(losses, name='loss')
        total_loss = tf.losses.get_total_loss(name='total_loss')
        diff = tf.subtract(loss, total_loss)

        tf.summary.scalar('learning_rate', learning_rate)
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('diff', diff)
        self.summary_op = tf.summary.merge_all()

        exclusions = [
            scope.strip()
            for scope in flags.checkpoint_exclude_scopes.split(',')
        ]

        variables_to_restore = []
        for variable in slim.get_model_variables():
            excluded = False
            for exclusion in exclusions:
                if variable.op.name.startswith(exclusion):
                    excluded = True
                    break
            if not excluded:
                variables_to_restore.append(variable)
            else:
                num_params = 1
                for dim in variable.shape:
                    num_params *= dim.value
                print('randinit {}\t({} params)'.format(
                    variable.name, num_params))

        scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')]
        variables_to_train = []
        for scope in scopes:
            variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                          scope)
            variables_to_train.extend(variables)
        self.save_dict = {}
        for variable in variables_to_train:
            if not variable.name.startswith('vgg_16'):
                continue
            num_params = 1
            for dim in variable.shape:
                num_params *= dim.value
            print('trainable {}\t({} params)'.format(variable.name,
                                                     num_params))
            self.save_dict[variable.name] = variable
        self.saver = tf.train.Saver(self.save_dict)

        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
        self.train_op = optimizer.minimize(loss,
                                           var_list=variables_to_train,
                                           global_step=global_step)

        self.init_fn = slim.assign_from_checkpoint_fn(flags.checkpoint_path,
                                                      variables_to_restore)
Exemple #12
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    gpus = list(range(len(FLAGS.gpu_list.split(','))))

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    input_score_maps = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 1],
                                      name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 5],
                                        name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 8],
                                        name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 1],
                                          name='input_training_masks')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.94,
                                               staircase=True)

    # 这个是定义召回率、精确度和F1
    v_recall = tf.Variable(0.001, trainable=False)
    v_precision = tf.Variable(0.001, trainable=False)
    v_f1 = tf.Variable(0.001, trainable=False)
    tf.summary.scalar("Recall", v_recall)
    tf.summary.scalar("Precision", v_precision)
    tf.summary.scalar("F1", v_f1)

    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]

                # 模型定义!!!
                total_loss, model_loss, f_score, f_geometry = tower_loss(
                    iis, isms, igms, itms, reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = create_summary_writer()

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        # pretrained_model_path实际上是resnet50的pretrain模型
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)
        logger.debug("成功加载resnet预训练模型:%s", FLAGS.pretrained_model_path)

    early_stop = EarlyStop(FLAGS.early_stop)

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            logger.debug('尝试从[%s]中恢复训练到半截的模型', FLAGS.model_path)
            # 这个是之前的checkpoint模型,可以半截接着训练
            ckpt = tf.train.latest_checkpoint(FLAGS.model_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)
            logger.debug("从头开始训练...")

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size,
                                         type="train")

        validate_data_generator = icdar.get_batch(
            num_workers=FLAGS.num_readers,
            input_size=FLAGS.input_size,
            batch_size=FLAGS.batch_size,
            type="validate")

        # 开始训练啦!
        for step in range(FLAGS.max_steps):

            # 取出一个batch的数据
            start = time.time()
            data = next(data_generator)
            logger.debug("[训练] 第%d步,加载了一批(%d)图片(%f秒),准备训练...", step,
                         FLAGS.batch_size, (time.time() - start))

            # 训练他们
            run_start = time.time()
            ml, tl, _, summary_str = sess.run(
                [model_loss, total_loss, train_op, summary_op],
                feed_dict={
                    input_images: data[0],
                    input_score_maps: data[2],
                    input_geo_maps: data[3],
                    input_training_masks: data[4]
                })
            if np.isnan(tl):
                logger.debug('Loss diverged, stop training')
                break

            logger.debug("[训练] 跑完批次的梯度下降,耗时:%f", time.time() - run_start)

            # if step % FLAGS.validate_steps == 0:
            #     logger.debug("保存checkpoint:",FLAGS.model_path + 'model.ckpt')
            #     saver.save(sess, FLAGS.model_path + 'model.ckpt', global_step=global_step)
            # 默认是1000步,validate一下
            if step != 0 and step % FLAGS.validate_steps == 0:
                precision, recall, f1 = evaluator.validate(
                    sess, FLAGS.validate_batch_num, FLAGS.batch_size,
                    validate_data_generator, f_score, f_geometry, input_images)
                # 更新三个scalar tensor
                sess.run([
                    tf.assign(v_f1, f1),
                    tf.assign(v_recall, recall),
                    tf.assign(v_precision, precision)
                ])

                logger.debug("评估完毕:在第%d步,F1:%f,Recall:%f,Precision:%f", step,
                             f1, recall, precision)
                if is_need_early_stop(early_stop, f1, saver, sess, step):
                    break  # 用负的编辑距离

            if step != 0 and step % FLAGS.save_summary_steps == 0:
                logger.debug("写入summary文件,第%d步", step)
                summary_writer.add_summary(summary_str, global_step=step)
                avg_time_per_step = (time.time() -
                                     start) / FLAGS.save_summary_steps
                avg_examples_per_second = (FLAGS.save_summary_steps *
                                           FLAGS.batch_size *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                logger.debug(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            logger.debug("[训练] 第%d步结束,整体耗时(包括加载数据):%f", step,
                         (time.time() - start))
Exemple #13
0
 def train(self):
     img_size = [self.image_height, self.image_width, self.image_depth]
     train_batch = tf.train.shuffle_batch(
         [read_tfrecord(self.train_file, img_size)],
         batch_size=self.train_batch_size,
         capacity=3000,
         num_threads=2,
         min_after_dequeue=1000)
     test_batch = tf.train.shuffle_batch(
         [read_tfrecord(self.test_file, img_size)],
         batch_size=self.test_batch_size,
         capacity=500,
         num_threads=2,
         min_after_dequeue=300)
     init = tf.global_variables_initializer()
     init_fn = slim.assign_from_checkpoint_fn(
         "resnet_v2_50.ckpt", slim.get_model_variables('resnet_v2'))
     saver = tf.train.Saver()
     with tf.Session() as sess:
         sess.run(init)
         init_fn(sess)
         train_writer = tf.summary.FileWriter(self.log_dir + "/train",
                                              sess.graph)
         test_writer = tf.summary.FileWriter(self.log_dir + "/test",
                                             sess.graph)
         coord = tf.train.Coordinator()
         threads = tf.train.start_queue_runners(sess=sess, coord=coord)
         inputs_test, outputs_gt_test = build_img_pair(sess.run(test_batch))
         for iter in range(self.max_iteration):
             inputs_train, outputs_gt_train = build_img_pair(
                 sess.run(train_batch))
             # train with dynamic learning rate
             if iter <= 500:
                 self.train_step.run({
                     self.input_data: inputs_train,
                     self.gt: outputs_gt_train,
                     self.learning_rate: 1e-3,
                     self.batch_size: self.train_batch_size
                 })
             elif iter <= self.max_iteration - 1000:
                 self.train_step.run({
                     self.input_data: inputs_train,
                     self.gt: outputs_gt_train,
                     self.learning_rate: 0.5e-3,
                     self.batch_size: self.train_batch_size
                 })
             else:
                 self.train_step.run({
                     self.input_data: inputs_train,
                     self.gt: outputs_gt_train,
                     self.learning_rate: 1e-4,
                     self.batch_size: self.train_batch_size
                 })
             # print training loss and test loss
             if iter % 10 == 0:
                 summary_train = sess.run(
                     self.summary, {
                         self.input_data: inputs_train,
                         self.gt: outputs_gt_train,
                         self.batch_size: self.train_batch_size
                     })
                 train_writer.add_summary(summary_train, iter)
                 train_writer.flush()
                 summary_test = sess.run(
                     self.summary, {
                         self.input_data: inputs_test,
                         self.gt: outputs_gt_test,
                         self.batch_size: self.test_batch_size
                     })
                 test_writer.add_summary(summary_test, iter)
                 test_writer.flush()
             # record training loss and test loss
             if iter % 10 == 0:
                 train_loss = self.cross_entropy.eval({
                     self.input_data:
                     inputs_train,
                     self.gt:
                     outputs_gt_train,
                     self.batch_size:
                     self.train_batch_size
                 })
                 test_loss = self.cross_entropy.eval({
                     self.input_data:
                     inputs_test,
                     self.gt:
                     outputs_gt_test,
                     self.batch_size:
                     self.test_batch_size
                 })
                 print("iter step %d trainning batch loss %f" %
                       (iter, train_loss))
                 print("iter step %d test loss %f\n" % (iter, test_loss))
             # record model
             if iter % 100 == 0:
                 saver.save(sess,
                            self.log_dir + "/model.ckpt",
                            global_step=iter)
         coord.request_stop()
         coord.join(threads)
Exemple #14
0
def train(H, test_images):
    '''
    Setup computation graph, run 2 prefetch data threads, and then run the main loop
    '''

    if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir'])

    ckpt_file = H['save_dir'] + '/save.ckpt'
    with open(H['save_dir'] + '/hypes.json', 'w') as f:
        json.dump(H, f, indent=4)

    x_in = tf.placeholder(tf.float32)
    confs_in = tf.placeholder(tf.float32)
    boxes_in = tf.placeholder(tf.float32)
    q = {}
    enqueue_op = {}
    for phase in ['train', 'test']:
        dtypes = [tf.float32, tf.float32, tf.float32]
        grid_size = H['grid_width'] * H['grid_height']
        channels = H.get('image_channels', 3)
        print('Image channels: %d' % channels)
        shapes = (
            [H['image_height'], H['image_width'], channels],
            [grid_size, H['rnn_len'], H['num_classes']],
            [grid_size, H['rnn_len'], 4],
        )
        q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes)
        enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in))

    def make_feed(d):
        return {
            x_in: d['image'],
            confs_in: d['confs'],
            boxes_in: d['boxes'],
            learning_rate: H['solver']['learning_rate']
        }

    def thread_loop(sess, enqueue_op, phase, gen):
        for d in gen:
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))

    (config, loss, accuracy, summary_op, train_op, smooth_op, global_step,
     learning_rate) = build(H, q)

    saver = tf.train.Saver(max_to_keep=None)
    writer = tf.summary.FileWriter(logdir=H['save_dir'], flush_secs=10)

    with tf.Session(config=config) as sess:
        tf.train.start_queue_runners(sess=sess)
        for phase in ['train', 'test']:
            # enqueue once manually to avoid thread start delay
            gen = train_utils.load_data_gen(H,
                                            phase,
                                            jitter=H['solver']['use_jitter'])
            d = next(gen)
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))
            t = threading.Thread(target=thread_loop,
                                 args=(sess, enqueue_op, phase, gen))
            t.daemon = True
            t.start()

        tf.set_random_seed(H['solver']['rnd_seed'])
        sess.run(tf.global_variables_initializer())
        writer.add_graph(sess.graph)
        weights_str = H['solver']['weights']
        if len(weights_str) > 0:
            print('Restoring from: %s' % weights_str)
            saver.restore(sess, weights_str)
        elif H['slim_ckpt'] == '':
            sess.run(
                tf.variables_initializer([
                    x for x in tf.global_variables()
                    if x.name.startswith(H['slim_basename'])
                    and H['solver']['opt'] not in x.name
                ]))
        else:
            init_fn = slim.assign_from_checkpoint_fn(
                '%s/data/%s' %
                (os.path.dirname(os.path.realpath(__file__)), H['slim_ckpt']),
                [
                    x for x in tf.global_variables()
                    if x.name.startswith(H['slim_basename'])
                    and H['solver']['opt'] not in x.name
                ])
            init_fn(sess)

        # train model for N iterations
        start = time.time()
        max_iter = H['solver'].get('max_iter', 10000000)
        for i in range(max_iter):
            display_iter = H['logging']['display_iter']
            adjusted_lr = (
                H['solver']['learning_rate'] *
                0.5**max(0, (i / H['solver']['learning_rate_step']) - 2))
            lr_feed = {learning_rate: adjusted_lr}

            if i % display_iter != 0:
                # train network
                batch_loss_train, _ = sess.run([loss['train'], train_op],
                                               feed_dict=lr_feed)
            else:
                # test network every N iterations; log additional info
                if i > 0:
                    dt = (time.time() - start) / (H['batch_size'] *
                                                  display_iter)
                start = time.time()
                (train_loss, test_accuracy, summary_str, _,
                 _) = sess.run([
                     loss['train'],
                     accuracy['test'],
                     summary_op,
                     train_op,
                     smooth_op,
                 ],
                               feed_dict=lr_feed)
                writer.add_summary(summary_str, global_step=global_step.eval())
                print_str = ', '.join([
                    'Step: %d',
                    'lr: %f',
                    'Train Loss: %.2f',
                    'Softmax Test Accuracy: %.1f%%',
                    'Time/image (ms): %.1f',
                ])
                print(print_str % (i, adjusted_lr, train_loss, test_accuracy *
                                   100, dt * 1000 if i > 0 else 0))

            if global_step.eval() % H['logging'][
                    'save_iter'] == 0 or global_step.eval() == max_iter - 1:
                saver.save(sess, ckpt_file, global_step=global_step)
Exemple #15
0
def train():
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        lr = FLAGS.learning_rate
        opt = tf.train.RMSPropOptimizer(lr, decay=0.9, momentum=0.9, epsilon=1)

        # Get images and labels
        # for train
        with tf.name_scope('train_images'):
            images, labels, boxes, num_objects = input.distorted_inputs(
                FLAGS.batch_size)

        batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
            [images, labels, boxes, num_objects], capacity=2 * FLAGS.num_gpus)

        tower_grads = []
        tower_losses = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in xrange(FLAGS.num_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % ('tower', i)) as scope:

                        image_batch, label_batch, box_batch, num_objects_batch = batch_queue.dequeue(
                        )

                        cls_loss, loc_loss = ssd.loss(image_batch, label_batch,
                                                      box_batch,
                                                      num_objects_batch)

                        loss = cls_loss + loc_loss
                        regularization_loss = tf.add_n(
                            tf.get_collection(
                                tf.GraphKeys.REGULARIZATION_LOSSES))

                        loss = loss + regularization_loss

                        tf.get_variable_scope().reuse_variables()

                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                      scope)

                        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                                       scope)

                        grads = opt.compute_gradients(loss)

                        tower_grads.append(grads)
                        tower_losses.append(loss)

        grads = average_gradients(tower_grads)

        #validation
        val_images, val_labels, val_boxes, val_num_objects = input.inputs(1)
        with tf.device('/gpu:0'):
            with tf.name_scope('eval_images'):
                cls_pred, loc_pred = ssd.inference(val_images)

        summaries.extend(
            tf.get_collection(tf.GraphKeys.SUMMARIES, 'train_images'))
        summaries.extend(
            tf.get_collection(tf.GraphKeys.SUMMARIES, 'eval_images'))

        # Add a summary to track the learning rate.
        summaries.append(tf.summary.scalar('learning_rate', lr))

        for grad, var in grads:
            if grad is not None:
                summaries.append(
                    tf.summary.histogram(var.op.name + '/gradients', grad))

        with tf.control_dependencies(update_ops):
            train_op = opt.apply_gradients(grads, global_step=global_step)

        for var in tf.trainable_variables():
            print(var.name)
            summaries.append(tf.summary.histogram(var.op.name, var))

        saver = tf.train.Saver(max_to_keep=20)

        summary_op = tf.summary.merge(summaries)

        pretrained_ckpt_path = FLAGS.pretrained_ckpt_path

        if not tf.train.latest_checkpoint(FLAGS.ckpt_save_path):
            print('pretrained ckpt')
            exclude_layers = ['global_step']
            restore_variables = slim.get_variables_to_restore(
                exclude=exclude_layers)
            init_fn = slim.assign_from_checkpoint_fn(pretrained_ckpt_path,
                                                     restore_variables,
                                                     ignore_missing_vars=True)

        else:
            print('training ckpt')
            init_fn = None

        sv = tf.train.Supervisor(logdir=FLAGS.ckpt_save_path,
                                 summary_op=None,
                                 saver=saver,
                                 save_model_secs=0,
                                 init_fn=init_fn)
        config_ = tf.ConfigProto(allow_soft_placement=True)
        config_.gpu_options.per_process_gpu_memory_fraction = 0.4

        # sess=sv.managed_session(config=config_)
        with sv.managed_session(config=config_) as sess:
            # Start the queue runners.
            sv.start_queue_runners(sess=sess)

            for step in xrange(FLAGS.max_steps):
                start_time = time.time()
                sess.run(train_op)
                loss_value, cls_loss_value, loc_loss_value = sess.run(
                    [loss, cls_loss, loc_loss])
                duration = time.time() - start_time

                assert not np.isnan(
                    loss_value), 'Model diverged with loss = NaN'

                if step % 100 == 0:
                    num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
                    examples_per_sec = num_examples_per_step / duration
                    sec_per_batch = duration / FLAGS.num_gpus

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str % (datetime.now(), step, loss_value,
                                        examples_per_sec, sec_per_batch))
                    print(cls_loss_value, loc_loss_value)

                if step % 100 == 0:
                    summary_str = sess.run(summary_op)

                if step % (int(FLAGS.num_train / FLAGS.batch_size) *
                           4) == 0 and step != 0:

                    print('start validation')
                    entire_TF = []
                    entire_score = []
                    entire_numGT = []
                    for val_step in range(FLAGS.num_validation):

                        if val_step % 500 == 0:
                            print(val_step, ' / ', FLAGS.num_validation)
                        val_GT_boxes, val_GT_cls, val_loc_pred, val_cls_pred, num_objects = sess.run(
                            [
                                val_boxes, val_labels, loc_pred, cls_pred,
                                val_num_objects
                            ])

                        TF_array, TF_score, num_GT = validation.one_image_validation(
                            val_GT_boxes, val_GT_cls, val_loc_pred,
                            val_cls_pred, num_objects)

                        if len(entire_TF) == 0:
                            entire_TF = TF_array
                            entire_score = TF_score
                            entire_numGT = num_GT
                        else:
                            for k_cls in range(FLAGS.num_classes - 1):
                                entire_TF[k_cls] = np.concatenate(
                                    [entire_TF[k_cls], TF_array[k_cls]],
                                    axis=0)
                                entire_score[k_cls] = np.concatenate(
                                    [entire_score[k_cls], TF_score[k_cls]],
                                    axis=0)
                                entire_numGT[k_cls] += num_GT[k_cls]

                    entire_AP_sum = validation.compute_AP(
                        entire_score, entire_TF, entire_numGT)

                    mAP = np.sum(np.array(entire_AP_sum)) / np.sum(
                        np.array(entire_AP_sum) != 0)

                    print('class AP : ', entire_AP_sum)
                    print('mAP : ', mAP)

                    checkpoint_path = os.path.join(FLAGS.ckpt_save_path,
                                                   'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=step)
def main(argv=None):
    #训练过程分为以下步骤:1 加载数据 2定义网络模型 3定义损失函数 4定义优化器 5定义评估指标

    #加载处理好的图片数据
    processed_data = np.load(DATA_FILE)
    training_images = processed_data[0]
    n_training_example = len(training_images)
    training_labels = processed_data[1]
    validation_images = processed_data[2]
    n_validation_example = len(validation_images)
    validation_labels = processed_data[3]
    testing_images = processed_data[4]
    n_testing_example = len(testing_images)
    testing_labels = processed_data[5]
    logger.info(
        '%d training examples, %d validation examples, %d testing examples.' %
        (n_training_example, n_validation_example, n_testing_example))

    #定义输入数据和label
    images = tf.placeholder(tf.float32, [None, 299, 299, 3],
                            name='input_images')
    labels = tf.placeholder(tf.int64, [None], name='labels')

    #定义inception-v3模型。因为谷歌中给的inception_v3模型只有参数取值,所以这里要定义inception_v3模型结构。因为训练好的inception_v3模型中使用的Batch_normlization参数与新的数据会有差异,导致训练结果很差,所以这里直接使用一个模型进行测试,不区分训练模型和测试模型
    with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
        logits, _ = inception_v3.inception_v3(images, num_classes=N_CLASSES)

    #获取需要训练的变量
    trainable_variables = get_trainable_variables()

    #定义交叉熵损失函数,参数的正则项损失在定义模型的时候已经加载
    tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES),
                                    logits,
                                    weights=1.0)
    #定义优化器
    train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(
        tf.losses.get_total_loss())

    #计算正确率,评估模型
    with tf.name_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(logits, 1), labels)
        evalution_step = tf.reduce_mean(tf.cast(correct_prediction,
                                                tf.float32))

    # 定义加载模型的函数
    load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE,
                                             get_tuned_variables(),
                                             ignore_missing_vars=True)

    #定义保存训练好的模型
    saver = tf.train.Saver()

    #开始训练
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        #加载谷歌训练好的模型
        logger.info('Loading tuned variables from %s' % CKPT_FILE)
        load_fn(sess)

        start = 0
        end = BATCH_SIZE
        for i in range(STEPS):
            logger.info('Step %d-%d is training....' % (i, STEPS))
            try:
                sess.run(train_step,
                         feed_dict={
                             images: training_images[start:end],
                             labels: training_labels[start:end]
                         })
            except Exception:
                logger.error('trainging fail', exc_info=True)

            #输出日志
            if i % display_steps == 0 or i + 1 == STEPS:
                validation_acc = sess.run(evalution_step,
                                          feed_dict={
                                              images: validation_images,
                                              labels: validation_labels
                                          })
                logger.info('Step %d-%d:validation acc = %.1f%%' %
                            (i, STEPS, validation_acc * 100.0))
                #模型持久化
                if i % save_steps == 0 or i + 1 == STEPS:
                    saver.save(sess, TRAIN_FILE_SAVE_PATH, global_step=i)
            #因为数据处理时就已经打乱了顺序,所以在这里直接顺序使用训练数据就可以
            start = end
            if start == n_training_example:
                start = 0
            end = end + BATCH_SIZE
            if end > n_training_example:
                end = n_training_example
        #最后在测试集上测试正确率
        test_acc = sess.run(evalution_step,
                            feed_dict={
                                images: testing_images,
                                labels: testing_labels
                            })
        logger.info('Final test acc = %.1f%%' % (test_acc * 100.0))
def main(margin, batch_size, output_size, learning_rate, whichGPU,
         is_finetuning, pretrained_net):
    def handler(signum, frame):
        print 'Saving checkpoint before closing'
        pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str)
        saver.save(sess, pretrained_net, global_step=step)
        print 'Checkpoint-', pretrained_net + '-' + str(step), ' saved!'
        sys.exit(0)

    signal.signal(signal.SIGINT, handler)

    ckpt_dir = './output/traffickcam/ckpts/finetuning'
    log_dir = './output/traffickcam/logs/finetuning'
    train_filename = './input/traffickcam/train.txt'
    mean_file = './input/traffickcam/meanIm.npy'

    img_size = [256, 256]
    crop_size = [224, 224]
    num_iters = 200000
    summary_iters = 100
    save_iters = 5000
    featLayer = 'resnet_v2_50/logits'

    is_training = True

    margin = float(margin)
    batch_size = int(batch_size)
    output_size = int(output_size)
    learning_rate = float(learning_rate)
    whichGPU = str(whichGPU)

    if batch_size % 30 != 0:
        print 'Batch size must be divisible by 30!'
        sys.exit(0)

    num_pos_examples = batch_size / 30

    # Create data "batcher"
    train_data = CombinatorialTripletSet(train_filename,
                                         mean_file,
                                         img_size,
                                         crop_size,
                                         batch_size,
                                         num_pos_examples,
                                         isTraining=is_training)
    numClasses = len(train_data.files)
    numIms = np.sum(
        [len(train_data.files[idx]) for idx in range(0, numClasses)])
    datestr = datetime.now().strftime("%Y_%m_%d_%H%M")
    param_str = datestr + '_lr' + str(learning_rate).replace(
        '.', 'pt') + '_outputSz' + str(output_size) + '_margin' + str(
            margin).replace('.', 'pt')
    logfile_path = os.path.join(log_dir, param_str + '_train.txt')
    train_log_file = open(logfile_path, 'a')
    print '------------'
    print ''
    print 'Going to train with the following parameters:'
    print '# Classes: ', numClasses
    train_log_file.write('# Classes: ' + str(numClasses) + '\n')
    print '# Ims: ', numIms
    train_log_file.write('# Ims: ' + str(numIms) + '\n')
    print 'Margin: ', margin
    train_log_file.write('Margin: ' + str(margin) + '\n')
    print 'Output size: ', output_size
    train_log_file.write('Output size: ' + str(output_size) + '\n')
    print 'Learning rate: ', learning_rate
    train_log_file.write('Learning rate: ' + str(learning_rate) + '\n')
    print 'Logging to: ', logfile_path
    train_log_file.write('Param_str: ' + param_str + '\n')
    train_log_file.write('----------------\n')
    print ''
    print '------------'

    # Queuing op loads data into input tensor
    image_batch = tf.placeholder(
        tf.float32, shape=[batch_size, crop_size[0], crop_size[0], 3])
    label_batch = tf.placeholder(tf.int32, shape=(batch_size))

    # doctor image params
    percent_crop = .5
    percent_rotate = .2
    percent_filters = .4
    percent_text = .1

    # # richard's argument: since the data is randomly loaded, we don't need to change the indices that we perform operations on every time; i am on board with this, but had already implemented the random crops, so will leave that for now
    # # apply random rotations
    num_rotate = int(batch_size * percent_rotate)
    rotate_inds = np.random.choice(np.arange(0, batch_size),
                                   num_rotate,
                                   replace=False)
    rotate_vals = np.random.randint(-65, 65,
                                    num_rotate).astype('float32') / float(100)
    rotate_angles = np.zeros((batch_size))
    rotate_angles[rotate_inds] = rotate_vals
    rotated_batch = tf.contrib.image.rotate(image_batch,
                                            rotate_angles,
                                            interpolation='BILINEAR')

    # do random crops
    num_to_crop = int(batch_size * percent_crop)
    num_to_not_crop = batch_size - num_to_crop

    shuffled_inds = tf.random_shuffle(np.arange(0, batch_size, dtype='int32'))
    # shuffled_inds = np.arange(0,batch_size,dtype='int32')
    # np.random.shuffle(shuffled_inds)
    crop_inds = tf.slice(shuffled_inds, [0], [num_to_crop])
    uncropped_inds = tf.slice(shuffled_inds, [num_to_crop], [num_to_not_crop])

    # crop_ratio = float(3)/float(5)
    # crop_yx = tf.random_uniform([num_to_crop,2], 0,1-crop_ratio, dtype=tf.float32, seed=0)
    # crop_sz = tf.add(crop_yx,np.tile([crop_ratio,crop_ratio],[num_to_crop, 1]))
    # crop_boxes = tf.concat([crop_yx,crop_sz],axis=1)

    # randomly select a crop between 3/5 of the image and the entire image
    crop_ratio = tf.random_uniform([num_to_crop, 1],
                                   float(3) / float(5),
                                   1,
                                   dtype=tf.float32,
                                   seed=0)
    # randomly select a starting location between 0 and the max valid x position
    crop_yx = tf.random_uniform([1, 2],
                                0.,
                                1. - crop_ratio,
                                dtype=tf.float32,
                                seed=0)
    crop_sz = tf.add(crop_yx, tf.concat([crop_ratio, crop_ratio], axis=1))
    crop_boxes = tf.concat([crop_yx, crop_sz], axis=1)

    uncropped_boxes = np.tile([0, 0, 1, 1], [num_to_not_crop, 1])

    all_inds = tf.concat([crop_inds, uncropped_inds], axis=0)
    all_boxes = tf.concat([crop_boxes, uncropped_boxes], axis=0)

    sorted_inds = tf.nn.top_k(-shuffled_inds, sorted=True,
                              k=batch_size).indices
    cropped_batch = tf.gather(
        tf.image.crop_and_resize(rotated_batch, all_boxes, all_inds,
                                 crop_size), sorted_inds)

    # apply different filters
    flt_image = convert_image_dtype(cropped_batch, dtypes.float32)

    num_to_filter = int(batch_size * percent_filters)

    filter_inds = np.random.choice(np.arange(0, batch_size),
                                   num_to_filter,
                                   replace=False)
    filter_mask = np.zeros(batch_size)
    filter_mask[filter_inds] = 1
    filter_mask = filter_mask.astype('float32')
    inv_filter_mask = np.ones(batch_size)
    inv_filter_mask[filter_inds] = 0
    inv_filter_mask = inv_filter_mask.astype('float32')

    #
    hsv = gen_image_ops.rgb_to_hsv(flt_image)
    hue = array_ops.slice(hsv, [0, 0, 0, 0], [batch_size, -1, -1, 1])
    saturation = array_ops.slice(hsv, [0, 0, 0, 1], [batch_size, -1, -1, 1])
    value = array_ops.slice(hsv, [0, 0, 0, 2], [batch_size, -1, -1, 1])

    # hue
    delta_vals = random_ops.random_uniform([batch_size], -.15, .15)
    hue_deltas = tf.multiply(filter_mask, delta_vals)
    hue_deltas2 = tf.expand_dims(
        tf.transpose(
            tf.tile(tf.reshape(hue_deltas, [1, 1, batch_size]),
                    (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3)
    # hue = math_ops.mod(hue + (hue_deltas2 + 1.), 1.)
    hue_mod = tf.add(hue, hue_deltas2)
    hue = clip_ops.clip_by_value(hue_mod, 0.0, 1.0)

    # saturation
    saturation_factor = random_ops.random_uniform([batch_size], -.05, .05)
    saturation_factor2 = tf.multiply(filter_mask, saturation_factor)
    saturation_factor3 = tf.expand_dims(
        tf.transpose(
            tf.tile(tf.reshape(saturation_factor2, [1, 1, batch_size]),
                    (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3)
    saturation_mod = tf.add(saturation, saturation_factor3)
    saturation = clip_ops.clip_by_value(saturation_mod, 0.0, 1.0)

    hsv_altered = array_ops.concat([hue, saturation, value], 3)
    rgb_altered = gen_image_ops.hsv_to_rgb(hsv_altered)

    # brightness
    brightness_factor = random_ops.random_uniform([batch_size], -.25, .25)
    brightness_factor2 = tf.multiply(filter_mask, brightness_factor)
    brightness_factor3 = tf.expand_dims(
        tf.transpose(
            tf.tile(tf.reshape(brightness_factor2, [1, 1, batch_size]),
                    (crop_size[0], crop_size[1], 1)), (2, 0, 1)), 3)
    adjusted = math_ops.add(rgb_altered,
                            math_ops.cast(brightness_factor3, dtypes.float32))

    filtered_batch = clip_ops.clip_by_value(adjusted, 0.0, 255.0)

    # after we've doctored everything, we need to remember to subtract off the mean
    repMeanIm = np.tile(np.expand_dims(train_data.meanImage, 0),
                        [batch_size, 1, 1, 1])
    noise = tf.random_normal(shape=[batch_size, crop_size[0], crop_size[0], 1],
                             mean=0.0,
                             stddev=0.0025,
                             dtype=tf.float32)
    final_batch = tf.add(tf.subtract(filtered_batch, repMeanIm), noise)

    print("Preparing network...")
    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
        _, layers = resnet_v2.resnet_v2_50(final_batch,
                                           num_classes=output_size,
                                           is_training=True)

    variables_to_restore = []
    for var in slim.get_model_variables():
        excluded = False
        if is_finetuning.lower() == 'true' and var.op.name.startswith(
                'resnet_v2_50/logits') or 'momentum' in var.op.name.lower():
            excluded = True
        if not excluded:
            variables_to_restore.append(var)

    feat = tf.squeeze(tf.nn.l2_normalize(layers[featLayer], 3))
    expanded_a = tf.expand_dims(feat, 1)
    expanded_b = tf.expand_dims(feat, 0)
    D = tf.reduce_sum(tf.squared_difference(expanded_a, expanded_b), 2)

    # if not train_data.isOverfitting:
    #     D_max = tf.reduce_max(D)
    #     D_mean, D_var = tf.nn.moments(D, axes=[0,1])
    #     lowest_nonzero_distance = tf.reduce_max(-D)
    #     bottom_thresh = 1.2*lowest_nonzero_distance
    #     top_thresh = (D_max + D_mean)/2.0
    #     bool_mask = tf.logical_and(D>=bottom_thresh,D<=top_thresh)
    #     D = tf.multiply(D,tf.cast(bool_mask,tf.float32))

    posIdx = np.floor(np.arange(0, batch_size) /
                      num_pos_examples).astype('int')
    posIdx10 = num_pos_examples * posIdx
    posImInds = np.tile(posIdx10, (num_pos_examples, 1)).transpose() + np.tile(
        np.arange(0, num_pos_examples), (batch_size, 1))
    anchorInds = np.tile(np.arange(0, batch_size),
                         (num_pos_examples, 1)).transpose()

    posImInds_flat = posImInds.ravel()
    anchorInds_flat = anchorInds.ravel()

    posPairInds = zip(posImInds_flat, anchorInds_flat)
    posDists = tf.reshape(tf.gather_nd(D, posPairInds),
                          (batch_size, num_pos_examples))

    shiftPosDists = tf.reshape(posDists, (1, batch_size, num_pos_examples))
    posDistsRep = tf.tile(shiftPosDists, (batch_size, 1, 1))

    allDists = tf.tile(tf.expand_dims(D, 2), (1, 1, num_pos_examples))

    ra, rb, rc = np.meshgrid(np.arange(0, batch_size),
                             np.arange(0, batch_size),
                             np.arange(0, num_pos_examples))

    bad_negatives = np.floor((ra) / num_pos_examples) == np.floor(
        (rb) / num_pos_examples)
    bad_positives = np.mod(rb,
                           num_pos_examples) == np.mod(rc, num_pos_examples)

    mask = ((1 - bad_negatives) * (1 - bad_positives)).astype('float32')

    # loss = tf.reduce_sum(tf.maximum(0.,tf.multiply(mask,margin + posDistsRep - allDists)))/batch_size
    loss = tf.reduce_mean(
        tf.maximum(0., tf.multiply(mask, margin + posDistsRep - allDists)))

    # slightly counterintuitive to not define "init_op" first, but tf vars aren't known until added to graph
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        # train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        train_op = slim.learning.create_train_op(loss, optimizer)

    summary_op = tf.summary.merge_all()
    init_op = tf.global_variables_initializer()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver(max_to_keep=2000)

    # tf will consume any GPU it finds on the system. Following lines restrict it to specific gpus
    c = tf.ConfigProto()
    c.gpu_options.visible_device_list = whichGPU

    print("Starting session...")
    sess = tf.Session(config=c)
    sess.run(init_op)

    writer = tf.summary.FileWriter(log_dir, sess.graph)

    restore_fn = slim.assign_from_checkpoint_fn(pretrained_net,
                                                variables_to_restore)
    restore_fn(sess)

    print("Start training...")
    ctr = 0
    for step in range(num_iters):
        start_time = time.time()
        batch, labels, ims = train_data.getBatch()
        _, loss_val = sess.run([train_op, loss],
                               feed_dict={
                                   image_batch: batch,
                                   label_batch: labels
                               })
        end_time = time.time()
        duration = end_time - start_time
        out_str = 'Step %d: loss = %.6f -- (%.3f sec)' % (step, loss_val,
                                                          duration)
        # print(out_str)
        if step % summary_iters == 0:
            print(out_str)
            train_log_file.write(out_str + '\n')
        # Update the events file.
        # summary_str = sess.run(summary_op)
        # writer.add_summary(summary_str, step)
        # writer.flush()
        #
        # Save a checkpoint
        if (step + 1) % save_iters == 0:
            print('Saving checkpoint at iteration: %d' % (step))
            pretrained_net = os.path.join(ckpt_dir, 'checkpoint-' + param_str)
            saver.save(sess, pretrained_net, global_step=step)
            print 'checkpoint-', pretrained_net + '-' + str(step), ' saved!'
        if (step + 1) == num_iters:
            print('Saving final')
            pretrained_net = os.path.join(ckpt_dir, 'final-' + param_str)
            saver.save(sess, pretrained_net, global_step=step)
            print 'final-', pretrained_net + '-' + str(step), ' saved!'

    sess.close()
    train_log_file.close()
Exemple #18
0
def main(_):

    if FLAGS.csv_file_path:
        if os.path.exists(FLAGS.csv_file_path) is False:
            csv_dir = FLAGS.csv_file_path.rsplit('/', 1)[0]
            if os.path.exists(csv_dir) is False:
                os.makedirs(csv_dir)

            if FLAGS.task_name == 'chalearn/age':
                with open(FLAGS.csv_file_path, 'w') as f:
                    writer = csv.writer(f)
                    writer.writerow([
                        'Pruned rate', 'MAE', 'Acc', 'Epoch No.',
                        'Model size through inference (MB) (Shared part + task-specific part)',
                        'Shared part (MB)', 'Task specific part (MB)',
                        'Whole masks (MB)', 'Task specific masks (MB)',
                        'Task specific batch norm vars (MB)',
                        'Task specific biases (MB)'
                    ])
            else:
                with open(FLAGS.csv_file_path, 'w') as f:
                    writer = csv.writer(f)
                    writer.writerow([
                        'Pruned rate', 'Acc', 'Epoch No.',
                        'Model size through inference (MB) (Shared part + task-specific part)',
                        'Shared part (MB)', 'Task specific part (MB)',
                        'Whole masks (MB)', 'Task specific masks (MB)',
                        'Task specific batch norm vars (MB)',
                        'Task specific biases (MB)'
                    ])

    args, unparsed = parse_arguments(sys.argv[1:])
    FLAGS.filters_expand_ratio = math.sqrt(FLAGS.filters_expand_ratio)
    FLAGS.history_filters_expand_ratios = [
        math.sqrt(float(ratio))
        for ratio in FLAGS.history_filters_expand_ratios
    ]

    with tf.Graph().as_default():

        with tf.Session() as sess:
            if 'emotion' in FLAGS.task_name or 'chalearn' in FLAGS.task_name:
                test_data_path = os.path.join(args.data_dir, 'val')
            else:
                test_data_path = os.path.join(args.data_dir, 'test')

            test_set = utils.get_dataset(test_data_path)

            # Get the paths for the corresponding images
            image_list, label_list = facenet.get_image_paths_and_labels(
                test_set)

            image_paths_placeholder = tf.placeholder(tf.string,
                                                     shape=(None, 1),
                                                     name='image_paths')
            labels_placeholder = tf.placeholder(tf.int32,
                                                shape=(None, 1),
                                                name='labels')
            batch_size_placeholder = tf.placeholder(tf.int32,
                                                    name='batch_size')
            control_placeholder = tf.placeholder(tf.int32,
                                                 shape=(None, 1),
                                                 name='control')
            phase_train_placeholder = tf.placeholder(tf.bool,
                                                     name='phase_train')

            nrof_preprocess_threads = 4
            image_size = (args.image_size, args.image_size)
            eval_input_queue = data_flow_ops.FIFOQueue(
                capacity=2000000,
                dtypes=[tf.string, tf.int32, tf.int32],
                shapes=[(1, ), (1, ), (1, )],
                shared_name=None,
                name=None)
            eval_enqueue_op = eval_input_queue.enqueue_many(
                [
                    image_paths_placeholder, labels_placeholder,
                    control_placeholder
                ],
                name='eval_enqueue_op')
            image_batch, label_batch = facenet.create_input_pipeline(
                eval_input_queue, image_size, nrof_preprocess_threads,
                batch_size_placeholder)
            coord = tf.train.Coordinator()
            tf.train.start_queue_runners(coord=coord, sess=sess)

            # Load the model
            if os.path.isdir(args.model):
                temp_record_file = os.path.join(args.model, 'temp_record.txt')
                checkpoint_file = os.path.join(args.model, 'checkpoint')

                if os.path.exists(temp_record_file) and os.path.exists(
                        checkpoint_file):
                    with open(temp_record_file) as json_file:
                        data = json.load(json_file)
                        max_acc = max(data, key=float)
                        epoch_no = data[max_acc]
                        ckpt_file = args.model + '/model-.ckpt-' + epoch_no

                    with open(checkpoint_file) as f:
                        context = f.read()
                    original_epoch = re.search("(\d)+", context).group()
                    context = context.replace(original_epoch, epoch_no)
                    with open(checkpoint_file, 'w') as f:
                        f.write(context)
                    if os.path.exists(os.path.join(args.model,
                                                   'copied')) is False:
                        os.makedirs(os.path.join(args.model, 'copied'))
                    copyfile(
                        temp_record_file,
                        os.path.join(args.model, 'copied', 'temp_record.txt'))
                    os.remove(temp_record_file)

                elif os.path.exists(checkpoint_file):
                    ckpt = tf.train.get_checkpoint_state(args.model)
                    ckpt_file = ckpt.model_checkpoint_path
                    epoch_no = ckpt_file.rsplit('-', 1)[-1]
                else:
                    print(
                        'No `temp_record.txt` or `checkpoint` in `{}`, you should pass args.model the file path, not the directory'
                        .format(args.model))
                    sys.exit(1)
            else:
                ckpt_file = args.model
                epoch_no = ckpt_file.rsplit('-')[-1]

            prelogits, _ = network.inference(
                image_batch,
                1.0,
                phase_train=phase_train_placeholder,
                bottleneck_layer_size=args.embedding_size,
                weight_decay=0.0)

            with tf.variable_scope('task_{}'.format(FLAGS.task_id)):
                if FLAGS.task_name == 'chalearn/age':
                    logits = slim.fully_connected(prelogits,
                                                  100,
                                                  activation_fn=None,
                                                  scope='Logits',
                                                  reuse=False)
                else:
                    logits = slim.fully_connected(prelogits,
                                                  len(test_set),
                                                  activation_fn=None,
                                                  scope='Logits',
                                                  reuse=False)

            # Get output tensor
            if FLAGS.task_name == 'chalearn/age':
                softmax = tf.nn.softmax(logits=logits)
                labels_range = tf.range(1.0, 101.0)  # [1.0, ..., 100.0]
                labels_matrix = tf.broadcast_to(
                    labels_range,
                    [args.test_batch_size, labels_range.shape[0]])
                result_vector = tf.reduce_sum(softmax * labels_matrix, axis=1)
                MAE_error_vector = tf.abs(result_vector -
                                          tf.cast(label_batch, tf.float32))
                MAE_avg_error = tf.reduce_mean(MAE_error_vector)

                correct_prediction = tf.cast(
                    tf.equal(tf.argmax(logits, 1),
                             tf.cast(label_batch, tf.int64)), tf.float32)
                accuracy = tf.reduce_mean(correct_prediction)
                regularization_losses = tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES)
                total_loss = tf.add_n([MAE_avg_error] + regularization_losses)

                criterion = MAE_avg_error
            else:
                cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=label_batch,
                    logits=logits,
                    name='cross_entropy_per_example')
                cross_entropy_mean = tf.reduce_mean(cross_entropy)

                correct_prediction = tf.cast(
                    tf.equal(tf.argmax(logits, 1),
                             tf.cast(label_batch, tf.int64)), tf.float32)
                accuracy = tf.reduce_mean(correct_prediction)
                regularization_losses = tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES)
                total_loss = tf.add_n([cross_entropy_mean] +
                                      regularization_losses)

                criterion = cross_entropy_mean

            init_fn = slim.assign_from_checkpoint_fn(ckpt_file,
                                                     tf.global_variables())
            init_fn(sess)

            pruned_ratio_relative_to_curr_task = 0.0
            model_size = 0.0
            if FLAGS.print_mem or FLAGS.print_mask_info:
                masks = tf.get_collection('masks')

                if FLAGS.print_mask_info:

                    if masks:
                        num_elems_in_each_task_op = {}
                        num_elems_in_tasks_in_masks_op = {
                        }  # two dimentional dictionary
                        num_elems_in_masks_op = []
                        num_remain_elems_in_masks_op = []

                        for task_id in range(1, FLAGS.task_id + 1):
                            num_elems_in_each_task_op[task_id] = tf.constant(
                                0, dtype=tf.int32)
                            num_elems_in_tasks_in_masks_op[task_id] = {}

                        # Define graph
                        for i, mask in enumerate(masks):
                            num_elems_in_masks_op.append(tf.size(mask))
                            num_remain_elems_in_curr_mask = tf.size(mask)
                            for task_id in range(1, FLAGS.task_id + 1):
                                cnt = tf_count(mask, task_id)
                                num_elems_in_tasks_in_masks_op[task_id][
                                    i] = cnt
                                num_elems_in_each_task_op[task_id] = tf.add(
                                    num_elems_in_each_task_op[task_id], cnt)
                                num_remain_elems_in_curr_mask -= cnt

                            num_remain_elems_in_masks_op.append(
                                num_remain_elems_in_curr_mask)

                        num_elems_in_network_op = tf.add_n(
                            num_elems_in_masks_op)

                        print('Calculate pruning status ...')

                        # Doing operation
                        num_elems_in_masks = sess.run(num_elems_in_masks_op)
                        num_elems_in_each_task = sess.run(
                            num_elems_in_each_task_op)
                        num_elems_in_tasks_in_masks = sess.run(
                            num_elems_in_tasks_in_masks_op)
                        num_elems_in_network = sess.run(
                            num_elems_in_network_op)
                        num_remain_elems_in_masks = sess.run(
                            num_remain_elems_in_masks_op)

                        # Print out the result
                        print('Showing pruning status ...')

                        if FLAGS.verbose:
                            for i, mask in enumerate(masks):
                                print('Layer %s: ' % mask.op.name, end='')
                                for task_id in range(1, FLAGS.task_id + 1):
                                    cnt = num_elems_in_tasks_in_masks[task_id][
                                        i]
                                    print('task_%d -> %d/%d (%.2f%%), ' %
                                          (task_id, cnt, num_elems_in_masks[i],
                                           100 * cnt / num_elems_in_masks[i]),
                                          end='')
                                print('remain -> {:.2f}%'.format(
                                    100 * num_remain_elems_in_masks[i] /
                                    num_elems_in_masks[i]))

                        print('Num elems in network: {}'.format(
                            num_elems_in_network))
                        num_elems_of_usued_weights = num_elems_in_network
                        for task_id in range(1, FLAGS.task_id + 1):
                            print('Num elems in task_{}: {}'.format(
                                task_id, num_elems_in_each_task[task_id]))
                            print('Ratio of task_{} to all: {}'.format(
                                task_id, num_elems_in_each_task[task_id] /
                                num_elems_in_network))
                            num_elems_of_usued_weights -= num_elems_in_each_task[
                                task_id]
                        print('Num usued elems in all masks: {}'.format(
                            num_elems_of_usued_weights))

                        pruned_ratio_relative_to_all_elems = num_elems_of_usued_weights / num_elems_in_network
                        print('Ratio of usused_elem to all: {}'.format(
                            pruned_ratio_relative_to_all_elems))
                        pruned_ratio_relative_to_curr_task = num_elems_of_usued_weights / (
                            num_elems_of_usued_weights +
                            num_elems_in_each_task[FLAGS.task_id])
                        print('Pruning degree relative to task_{}: {:.3f}'.
                              format(FLAGS.task_id,
                                     pruned_ratio_relative_to_curr_task))

                if FLAGS.print_mem:
                    # Analyze param
                    start_time = time.time()
                    (MB_of_model_through_inference, MB_of_shared_variables,
                     MB_of_task_specific_variables, MB_of_whole_masks,
                     MB_of_task_specific_masks,
                     MB_of_task_specific_batch_norm_variables,
                     MB_of_task_specific_biases
                     ) = model_analyzer.analyze_vars_for_current_task(
                         tf.model_variables(),
                         sess=sess,
                         task_id=FLAGS.task_id,
                         verbose=False)
                    duration = time.time() - start_time
                    print('duration time: {}'.format(duration))
            if FLAGS.eval_once:
                validate(
                    args, sess, image_list, label_list, eval_enqueue_op,
                    image_paths_placeholder, labels_placeholder,
                    control_placeholder, phase_train_placeholder,
                    batch_size_placeholder, total_loss, regularization_losses,
                    criterion, accuracy, args.use_fixed_image_standardization,
                    FLAGS.csv_file_path, pruned_ratio_relative_to_curr_task,
                    epoch_no, MB_of_model_through_inference,
                    MB_of_shared_variables, MB_of_task_specific_variables,
                    MB_of_whole_masks, MB_of_task_specific_masks,
                    MB_of_task_specific_batch_norm_variables,
                    MB_of_task_specific_biases)

            return
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default() as graph:
        num_channels = FLAGS.input_channels
        num_channels = max(3, num_channels)
        # simulate num_channels channel input
        data = imageutils.resize(imageutils.read(
            'deeplab/Oxford.street.london.arp.jpg'), (128, 192)).reshape(1, 128, 192, 3)
        inputs = tf.to_float(numpy.concatenate([data[...,0:1]]*num_channels, axis=3))

        # Create the global step on the device storing the variables.
        global_step = tf.train.get_or_create_global_step()

        # Define the model and create clones.
        model_fn = _build_deeplab_inputs

        num_classes = FLAGS.num_classes
        model_args = (inputs,
                      {common.OUTPUT_TYPE: num_classes})
        model_fn(*model_args)

        # Soft placement allows placing on CPU ops without GPU implementation.
        session_config = tf.ConfigProto(
            allow_soft_placement=True, log_device_placement=False)

        ### Adapted from code by Paul Upchurch. ###
        model_vars = slim.get_model_variables()  # debug
        #print("##2##")
        #print(model_vars)

        if FLAGS.model_variant == 'xception_65':
            input_kernel_name = 'xception_65/entry_flow/conv1_1/weights'
        elif FLAGS.model_variant == 'mobilenet_v2':
            input_kernel_name = 'MobilenetV2/Conv/weights'
        else:
            raise Exception("{} is not supported. Modify the code.".format(FLAGS.model_variant))
        variables_to_restore = slim.get_variables_to_restore(
            exclude=['global_step', input_kernel_name])

        #### Deeplab ####
        checkpoint_dir = FLAGS.source_checkpoint_dir
        checkpoint_name = FLAGS.source_checkpoint_name
        loader = slim.assign_from_checkpoint_fn(
            checkpoint_dir+'/' + checkpoint_name, variables_to_restore, ignore_missing_vars=False)
        ################

        init_op = tf.global_variables_initializer()
        #print ('##3## init_op...')
        #print(init_op)

        saver = tf.train.Saver(tf.global_variables())
        with tf.Session(config=session_config) as sess:
            sess.run(init_op)
            loader(sess)
            f = assign_conv2d_from_checkpoint_fn(input_kernel_name+':0',
                                                checkpoint_dir+'/' + checkpoint_name,
                                                input_kernel_name)

            f(sess)

            print('== Expanded kernel, first output feature ==')
            print(get_tensor_value(model_vars[0], sess).shape)
            print(get_tensor_value(model_vars[0], sess)[:, :, :, 0])
            print('== Original kernel, first output feature ==')
            print(tf.contrib.framework.load_variable(checkpoint_dir+'/' + checkpoint_name,
                                                    input_kernel_name).shape)
            print(tf.contrib.framework.load_variable(checkpoint_dir+'/' + checkpoint_name,
                                                    input_kernel_name)[:, :, :, 0])

            output_dir = FLAGS.output_checkpoint_dir
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            print(' == == ==')
            print('Saving to {}'.format(output_dir))
            saver.save(sess, os.path.join(output_dir, "model.ckpt"))
    logits, _ = mobilenet_v1.mobilenet_v1(processed_images,
                                          is_training=False,
                                          depth_multiplier=1.0,
                                          num_classes=1001,
                                          id_act_layer_input=layer_id,
                                          act_quant_delta_input=quant_delta,
                                          act_quant_levels_input=quant_level)
    #probabilities = tf.nn.softmax(logits)

variables_to_restore = slim.get_variables_to_restore()

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
init_fn = slim.assign_from_checkpoint_fn(checkpoint_file, variables_to_restore)
init_fn(sess)

######################## compute output error ############################
final_output_layer_quantized = [0] * n_images

for i in range(n_images):
    img_path = IMAGENET_VAL_PATH + 'ILSVRC2012_val_%08d.JPEG' % (i + 1)
    final_output_layer_quantized[i] = sess.run(
        logits, feed_dict={input_string: img_path})

output_error = 0.0
for i in range(n_images):
    output_error = output_error + np.mean(
        (final_output_layer_original[i] - final_output_layer_quantized[i])**2)
output_error = output_error / n_images
Exemple #21
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    now = datetime.datetime.now()
    StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S")
    os.makedirs(FLAGS.logs_path + StyleTime)
    if not os.path.exists(FLAGS.checkpoint_path):
        os.makedirs(FLAGS.checkpoint_path)

    input_image = tf.placeholder(tf.float32,
                                 shape=[None, None, None, 3],
                                 name='input_image')
    input_bbox = tf.placeholder(tf.float32, shape=[None, 5], name='input_bbox')
    input_im_info = tf.placeholder(tf.float32,
                                   shape=[None, 3],
                                   name='input_im_info')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False)
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)

    gpu_id = int(FLAGS.gpu)
    with tf.device('/gpu:%d' % gpu_id):
        with tf.name_scope('model_%d' % gpu_id) as scope:
            bbox_pred, cls_pred, cls_prob = model.model(input_image)
            total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = model.loss(
                bbox_pred, cls_pred, input_bbox, input_im_info)
            batch_norm_updates_op = tf.group(
                *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
            grads = opt.compute_gradients(total_loss)

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
    summary_writer = tf.summary.FileWriter(FLAGS.logs_path + StyleTime,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.95
    config.allow_soft_placement = True
    with tf.Session(config=config) as sess:
        if FLAGS.restore:
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            restore_step = int(ckpt.split('.')[0].split('_')[-1])
            print("continue training from previous checkpoint {}".format(
                restore_step))
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            restore_step = 0
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers)
        start = time.time()
        for step in range(restore_step, FLAGS.max_steps):
            data = next(data_generator)
            ml, tl, _, summary_str = sess.run(
                [model_loss, total_loss, train_op, summary_op],
                feed_dict={
                    input_image: data[0],
                    input_bbox: data[1],
                    input_im_info: data[2]
                })

            summary_writer.add_summary(summary_str, global_step=step)

            if step != 0 and step % FLAGS.decay_steps == 0:
                sess.run(
                    tf.assign(learning_rate,
                              learning_rate.eval() * FLAGS.decay_rate))

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}'
                    .format(step, ml, tl, avg_time_per_step,
                            learning_rate.eval()))

            if (step + 1) % FLAGS.save_checkpoint_steps == 0:
                filename = ('ctpn_{:d}'.format(step + 1) + '.ckpt')
                filename = os.path.join(FLAGS.checkpoint_path, filename)
                saver.save(sess, filename)
                print('Write model to: {:s}'.format(filename))
Exemple #22
0

if __name__ == '__main__':
    # test fpn class output
    import os
    import numpy as np
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    test_input = tf.Variable(initial_value=tf.ones((((5, 384, 384, 3)))),
                             dtype=tf.float32)

    fpn_model = FPN('resnet_v1_101', test_input, is_training=True)
    # output = fpn_model.model()
    output = fpn_model.pre_seg_maps
    init_op = tf.global_variables_initializer()
    restore = slim.assign_from_checkpoint_fn(
        'libs\\nets\\resnet_v1_101\\resnet_v1_101.ckpt',
        slim.get_trainable_variables(),
        ignore_missing_vars=True)

    logits, share_net = fpn_model.get_logits_and_share_net()

    feature_maps = fpn_model.get_feature_maps()

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                          log_device_placement=False)) as sess:
        sess.run(init_op)
        restore(sess)
        # out  = sess.run([output])
        # print(len(out[0]))
        logit_print = sess.run([logits])
        feature_maps_print = sess.run([feature_maps])
        print('***************logits*****************')
def main():
    # 加载预处理好的数据。
    processed_data = np.load(INPUT_DATA)
    training_images = processed_data[0]
    n_training_example = len(training_images)
    training_labels = processed_data[1]

    validation_images = processed_data[2]
    validation_labels = processed_data[3]

    testing_images = processed_data[4]
    testing_labels = processed_data[5]
    print(
        "%d training examples, %d validation examples and %d testing examples."
        % (n_training_example, len(validation_labels), len(testing_labels)))

    # 定义inception-v3的输入,images为输入图片,labels为每一张图片对应的标签。
    images = tf.placeholder(tf.float32, [None, 299, 299, 3],
                            name='input_images')
    labels = tf.placeholder(tf.int64, [None], name='labels')

    # 定义inception-v3模型。因为谷歌给出的只有模型参数取值,所以这里
    # 需要在这个代码中定义inception-v3的模型结构。虽然理论上需要区分训练和
    # 测试中使用到的模型,也就是说在测试时应该使用is_training=False,但是
    # 因为预先训练好的inception-v3模型中使用的batch normalization参数与
    # 新的数据会有出入,所以这里直接使用同一个模型来做测试。
    with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
        logits, _ = inception_v3.inception_v3(images,
                                              num_classes=N_CLASSES,
                                              is_training=True)
    print(logits)
    trainable_variables = get_trainable_variables()
    # 定义损失函数和训练过程。
    tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES),
                                    logits,
                                    weights=1.0)
    total_loss = tf.losses.get_total_loss()
    train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(total_loss)

    # 计算正确率。
    with tf.name_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(logits, 1), labels)
        evaluation_step = tf.reduce_mean(
            tf.cast(correct_prediction, tf.float32))

    # 定义加载Google训练好的Inception-v3模型的Saver。
    load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE,
                                             get_tuned_variables(),
                                             ignore_missing_vars=True)

    # 定义保存新模型的Saver。
    saver = tf.train.Saver()

    with tf.Session() as sess:
        # 初始化没有加载进来的变量。
        init = tf.global_variables_initializer()
        sess.run(init)

        # 加载谷歌已经训练好的模型。
        print('Loading tuned variables from %s' % CKPT_FILE)
        load_fn(sess)

        start = 0
        end = BATCH
        for i in range(STEPS):
            _, loss = sess.run(
                [train_step, total_loss],
                feed_dict={
                    images: training_images[start:end],
                    labels: training_labels[start:end]
                })

            if i % 30 == 0 or i + 1 == STEPS:
                saver.save(sess, TRAIN_FILE, global_step=i)

                validation_accuracy = sess.run(evaluation_step,
                                               feed_dict={
                                                   images: validation_images,
                                                   labels: validation_labels
                                               })
                print(
                    'Step %d: Training loss is %.1f Validation accuracy = %.1f%%'
                    % (i, loss, validation_accuracy * 100.0))

            start = end
            if start == n_training_example:
                start = 0

            end = start + BATCH
            if end > n_training_example:
                end = n_training_example

        # 在最后的测试数据上测试正确率。
        test_accuracy = sess.run(evaluation_step,
                                 feed_dict={
                                     images: testing_images,
                                     labels: testing_labels
                                 })
        print('Final test accuracy = %.1f%%' % (test_accuracy * 100))
Exemple #24
0
# Experiment initialization and running
with tf.Session() as sess:
    sess.run(init)
    train_saver = tf.train.Saver()
    val_saver = tf.train.Saver()
    if continue_from_epoch != -1: #load checkpoint if needed
        checkpoint = "saved_models/{}_{}.ckpt".format(experiment_name, continue_from_epoch)
        variables_to_restore = []
        for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
            print(var)
            variables_to_restore.append(var)

        tf.logging.info('Fine-tuning from %s' % checkpoint)

        fine_tune = slim.assign_from_checkpoint_fn(
            checkpoint,
            variables_to_restore,
            ignore_missing_vars=True)
        fine_tune(sess)

    best_val_acc_mean = 0.
    best_val_epoch = 6
    with tqdm.tqdm(total=epochs) as pbar_e:
        for e in range(0, epochs):
            total_train_c_loss_mean, total_train_c_loss_std, total_train_accuracy_mean, total_train_accuracy_std =\
                experiment.run_training_epoch(total_train_batches=total_train_batches,
                                                                                sess=sess)
            print("Epoch {}: train_loss_mean: {}, train_loss_std: {}, train_accuracy_mean: {}, train_accuracy_std: {}"
                  .format(e, total_train_c_loss_mean, total_train_c_loss_std, total_train_accuracy_mean, total_train_accuracy_std))

            total_val_c_loss_mean, total_val_c_loss_std, total_val_accuracy_mean, total_val_accuracy_std = \
                experiment.run_validation_epoch(total_val_batches=total_val_batches,
Exemple #25
0
def build_pspnet(inputs,
                 label_size,
                 num_classes,
                 preset_model='PSPNet-Res50',
                 pooling_type="MAX",
                 weight_decay=1e-5,
                 upscaling_method="bilinear",
                 is_training=True,
                 pretrained_dir="models"):
    """
    Builds the PSPNet model. 

    Arguments:
      inputs: The input tensor
      label_size: Size of the final label tensor. We need to know this for proper upscaling 
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes
      pooling_type: Max or Average pooling

    Returns:
      PSPNet model
    """

    inputs = mean_image_subtraction(inputs)

    if preset_model == 'PSPNet-Res50':
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v2.resnet_v2_50(
                inputs, is_training=is_training, scope='resnet_v2_50')
            resnet_scope = 'resnet_v2_50'
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v2_50.ckpt'),
                slim.get_model_variables('resnet_v2_50'))
    elif preset_model == 'PSPNet-Res101':
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v2.resnet_v2_101(
                inputs, is_training=is_training, scope='resnet_v2_101')
            resnet_scope = 'resnet_v2_101'
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v2_101.ckpt'),
                slim.get_model_variables('resnet_v2_101'))
    elif preset_model == 'PSPNet-Res152':
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v2.resnet_v2_152(
                inputs, is_training=is_training, scope='resnet_v2_152')
            resnet_scope = 'resnet_v2_152'
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v2_152.ckpt'),
                slim.get_model_variables('resnet_v2_152'))
    else:
        raise ValueError(
            "Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152"
            % (preset_model))

    feature_map_shape = [int(x / 8.0) for x in label_size]
    print(feature_map_shape)
    psp = PyramidPoolingModule(end_points['pool3'],
                               feature_map_shape=feature_map_shape,
                               pooling_type=pooling_type)

    net = slim.conv2d(psp, 512, [3, 3], activation_fn=None)
    net = slim.batch_norm(net, fused=True)
    net = tf.nn.relu(net)

    if upscaling_method.lower() == "conv":
        net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 256)
        net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 128)
        net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 64)
    elif upscaling_method.lower() == "bilinear":
        net = Upsampling(net, label_size)

    net = slim.dropout(net, keep_prob=(0.9))

    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn
Exemple #26
0
    image_np = cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR)
    image = tf.convert_to_tensor(image_np)
    processed_image = inception_preprocessing.preprocess_image(
        image, image_size, image_size, is_training=False)
    processed_images = tf.expand_dims(processed_image, 0)

    # Create the model, use the default arg scope to configure the batch norm parameters.
    with slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
        logits, _ = inception.inception_resnet_v2(processed_images,
                                                  num_classes=11,
                                                  is_training=False)
    probabilities = tf.nn.softmax(logits)

    init_fn = slim.assign_from_checkpoint_fn(
        os.path.join(
            "/home/jade/Models/Image_Classif/dfgoods_inception_resnet_v2_use_checkpoitns_2019-04-29",
            'model.ckpt-196478'),
        slim.get_model_variables('InceptionResnetV2'))

    with tf.Session() as sess:
        init_fn(sess)
        np_image, probabilities = sess.run([image, probabilities])
        probabilities = probabilities[0, 0:]
        sorted_inds = [
            i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1])
        ]

    names = imagenet.create_readable_names_for_imagenet_labels()
    for i in range(5):
        index = sorted_inds[i]
        print('Probability %0.2f%% => [%s]' %
Exemple #27
0
def init_fn_part():  #从checkpoint读入网络权值
    variables_to_restore = slim.get_variables_to_restore(
        exclude=["vgg_16/fc8"])
    return slim.assign_from_checkpoint_fn(checkpoint_path,
                                          variables_to_restore)
#image_size = inception.inception_v1.default_image_size

with tf.Graph().as_default():
    url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg'
    image_string = urllib.urlopen(url).read()
    image = tf.image.decode_jpeg(image_string, channels=3)
    processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
    processed_images  = tf.expand_dims(processed_image, 0)
    
    # Create the model, use the default arg scope to configure the batch norm parameters.
    with slim.arg_scope(inception.inception_v1_arg_scope()):
        logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False)
    probabilities = tf.nn.softmax(logits)
    
    init_fn = slim.assign_from_checkpoint_fn(
        os.path.join(checkpoints_dir, 'inception_v4.ckpt'),
        slim.get_model_variables('InceptionV4'))
    
    with tf.Session() as sess:
        init_fn(sess)
        np_image, probabilities = sess.run([image, probabilities])
        probabilities = probabilities[0, 0:]
        sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
        
    plt.figure()
    plt.imshow(np_image.astype(np.uint8))
    plt.axis('off')
    plt.show()

    names = imagenet.create_readable_names_for_imagenet_labels()
    for i in range(5):
Exemple #29
0
def main(argv=None):
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    input_score_maps = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 1],
                                      name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 5],
                                        name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 8],
                                        name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 1],
                                          name='input_training_masks')
    input_labels = tf.placeholder(tf.float32,
                                  shape=[None, None, 4, 2],
                                  name='input_labels')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.01,
                                               staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))
    input_labels_split = tf.split(input_labels, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                il = input_labels_split[i]
                total_loss, model_loss, f_score, f_geometry, _ = tower_loss(
                    iis, isms, igms, itms, il, reuse_variables)
                #f_score, f_geometry = i_am_testing(iis)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                #print "below..."
                #batch_norm_updates_op = tf.group(*[op for op in tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope) if 'resnet_v1_50/block4' in op.name or 'resnet_v1_50/block3' in op.name or 'feature_fusion' in op.name])
                #print "above..."
                reuse_variables = True
                #print "below.."
                #train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block1' in var.name]
                #train_var = [var for var in tf.trainable_variables() if 'resnet_v1_50/block4' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_7' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_8' in var.name]
                #train_var += [var for var in tf.trainable_variables() if 'feature_fusion/Conv_9' in var.name]
                #print train_var
                #print "above..."
                train_var = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                              scope='feature_fusion')
                grads = opt.compute_gradients(total_loss, var_list=train_var)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    #train_var = [var for var in tf.trainable_variables() if ('resnet_v1_50/block3' in var.name or 'resnet_v1_50/block4' in var.name or 'feature_fusion' in var.name)]
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)
    my_char_l = "5"
    my_char_U = ""
    data_size = 0
    train_data_indices = []
    list_of_img_pos = []
    with open(
            'Data/cropped_annotations_new/cropped_annotations' + my_char_l +
            '.txt', 'r') as f:
        annotation_file = f.readlines()
    #with open('Data/cropped_annotations_new/cropped_annotations' + my_char_U + '.txt', 'r') as f:
    #    annotation_file += f.readlines()
    idx = 0
    for line in annotation_file:
        if len(line) > 1 and line[:13] == './cropped_img' and str(
                line[14:27]) in training_list:
            data_size += 1
            train_data_indices.append(idx)
            list_of_img_pos.append(line[14:].split(".")[0] + ".tiff")
        idx += 1
    list_of_img_all = os.listdir('Data/cropped_img')
    list_of_img_neg = np.array(
        list(set(list_of_img_all) - set(list_of_img_pos)))
    print "Char model: " + my_char_U + my_char_l
    print "Data size: " + str(data_size)
    epoche_size = data_size / (16 * 2)
    #print epoche_size
    print "This many steps per epoche: " + str(epoche_size)

    list_of_img_neg_char = os.listdir('Data/j')

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            saver.restore(sess, model_path)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)
        #print "below:"
        #tvars = tf.trainable_variables()
        #g_vars = [var for var in tvars if 'resnet_v1_50/block4' in var.name]
        #print g_vars
        #print tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='resnet_v1_50')
        #return
        print FLAGS.learning_rate
        print reg_constant
        for step in range(24 * epoche_size):
            ### Generate Dwata ###
            data = [], [], [], [], []
            np.random.shuffle(train_data_indices)
            num_im = 0
            actual_num_im = 0
            while len(data[0]) < 32:
                prob = np.random.random(1)[0]
                if prob > 0.49:
                    i = train_data_indices[num_im]
                    im_fn = "Data/cropped_img/" + annotation_file[i][
                        14:].split(".tiff", 1)[0] + ".tiff"
                    im = cv2.imread(im_fn)
                    if im is not None:
                        r, c, _ = im.shape
                        text_polys = []
                        text_tags = []
                        if int(annotation_file[i + 1]) > 0:
                            for idx in range(
                                    i + 2,
                                    i + 2 + int(annotation_file[i + 1])):
                                annotation_data = annotation_file[idx]
                                annotation_data = annotation_data.split(" ")
                                x, y = float(annotation_data[0]), float(
                                    annotation_data[1])
                                w, h = float(annotation_data[2]), float(
                                    annotation_data[3])
                                text_polys.append([
                                    list([int(x), int(y - h)]),
                                    list([int(x + w), int(y - h)]),
                                    list([int(x + w), int(y)]),
                                    list([int(x), int(y)])
                                ])
                                text_tags.append(False)
                        score_map, geo_map, training_mask = icdar.generate_rbox(
                            (int(r), int(c)), np.array(text_polys),
                            np.array(text_tags))
                        data[0].append(im[:, :, ::-1].astype(np.float32))
                        data[1].append(im_fn)
                        data[2].append(score_map[::4, ::4, np.newaxis].astype(
                            np.float32))
                        data[3].append(geo_map[::4, ::4, :].astype(np.float32))
                        data[4].append(training_mask[::4, ::4,
                                                     np.newaxis].astype(
                                                         np.float32))
                        actual_num_im += 1
                    num_im += 1

                else:
                    im_fn = np.random.choice(list_of_img_neg)
                    im = cv2.imread("Data/cropped_img/" + im_fn)
                    #if prob > 0.25:
                    #    im_fn = np.random.choice(list_of_img_neg_char)
                    #    im_mini = cv2.imread("Data/j/" + im_fn)
                    # 	r0, c0, _ = im_mini.shape
                    #     im = np.zeros((512, 512, 3), dtype=np.uint8)
                    #	ra, rb, ca, cb = 256-r0/2, 256+(r0+1)/2, 256-c0/2, 256+(c0+1)/2
                    #    im[ra:rb, ca:cb, :] = im_mini.copy()
                    if im is not None:
                        r, c, _ = im.shape
                        score_map, geo_map, training_mask = icdar.generate_rbox(
                            (int(r), int(c)), np.array([]), np.array([]))
                        data[0].append(im[:, :, ::-1].astype(np.float32))
                        data[1].append(im_fn)
                        data[2].append(score_map[::4, ::4, np.newaxis].astype(
                            np.float32))
                        data[3].append(geo_map[::4, ::4, :].astype(np.float32))
                        data[4].append(training_mask[::4, ::4,
                                                     np.newaxis].astype(
                                                         np.float32))

            ### Run model ###
            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_score_maps: data[2],
                    input_geo_maps: data[3],
                    input_training_masks: data[4]
                })
            epoch = step / epoche_size
            batch_num = step % epoche_size
            if step % (epoche_size / 3) == 0:
                print "Epoch no.: " + str(epoch) + " batch no.: " + str(
                    batch_num) + " loss: " + str(ml)
                print "Epoch no.: " + str(epoch) + " batch no.: " + str(
                    batch_num) + " loss: " + str(tl)
            if step % (epoche_size / 2) == 0:
                #print "Epoche: " + str(step / (epoche_size/2))
                saver.save(sess,
                           FLAGS.checkpoint_path + 'model.ckpt',
                           global_step=global_step)
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_score_maps: data[2],
                        input_geo_maps: data[3],
                        input_training_masks: data[4]
                    })
                summary_writer.add_summary(summary_str, global_step=step)
            if False:
                count_right = 0
                count_wrong = 0
                count_posNotDetected = 0
                im0 = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1]
                w, h, _ = im0.shape
                slide_window = 300
                crop_size = 512
                crop_center = (256, 256)
                num_rows, num_cols = int(np.ceil(w / slide_window)), int(
                    np.ceil(h / slide_window))
                print num_cols
                for rot in [-90.0, -60.0, -30.0, 0.0, 30.0, 60.0, 90.0]:
                    im = cv2.imread("Data/maps/D0117-5755036.tiff")[:, :, ::-1]
                    boxes_one_rot = []
                    count = 0
                    while count < num_rows * num_cols:
                        images, data2, data3, data4 = [], [], [], []
                        for k in range(16):
                            i = (count + k) / num_rows
                            j = (count + k) % num_cols

                            temp = im[slide_window*i:slide_window*i+crop_size, \
                                      slide_window*j:slide_window*j+crop_size, ::-1]
                            w2, h2, _ = temp.shape
                            if w2 < crop_size or h2 < crop_size:
                                result = np.zeros((crop_size, crop_size, 3))
                                result[:w2, :h2] = temp
                                temp = result
                            M = cv2.getRotationMatrix2D(crop_center, rot, 1.0)
                            temp = cv2.warpAffine(temp, M,
                                                  (crop_size, crop_size))
                            images.append(temp)
                            score_map, geo_map, training_mask = icdar.generate_rbox(
                                (int(crop_size), int(crop_size)), np.array([]),
                                np.array([]))
                            data2.append(score_map[::4, ::4,
                                                   np.newaxis].astype(
                                                       np.float32))
                            data3.append(geo_map[::4, ::4, :].astype(
                                np.float32))
                            data4.append(training_mask[::4, ::4,
                                                       np.newaxis].astype(
                                                           np.float32))
                        score, geometry = sess.run(
                            [f_score, f_geometry],
                            feed_dict={
                                input_images: images,
                                input_score_maps: data2,
                                input_geo_maps: data3,
                                input_training_masks: data4
                            })
                        for k in range(16):
                            i = (count + k) / num_rows
                            j = (count + k) % num_cols
                            boxes = detect(score_map=score[j],
                                           geo_map=geometry[j],
                                           score_map_thresh=0.01,
                                           box_thresh=0.01,
                                           nms_thres=0.01)
                            if boxes is not None:
                                boxes = boxes[:, :8].reshape((-1, 4, 2))
                                for box in boxes:
                                    M_inv = cv2.getRotationMatrix2D(
                                        crop_center, -1 * rot, 1)
                                    box[0] = M_inv.dot(
                                        np.array((box[0, 0], box[0, 1]) +
                                                 (1, )))
                                    box[1] = M_inv.dot(
                                        np.array((box[1, 0], box[1, 1]) +
                                                 (1, )))
                                    box[2] = M_inv.dot(
                                        np.array((box[2, 0], box[2, 1]) +
                                                 (1, )))
                                    box[3] = M_inv.dot(
                                        np.array((box[3, 0], box[3, 1]) +
                                                 (1, )))
                                    box = sort_poly(box.astype(np.int32))
                                    box[0, 0] = box[0, 0] + j * slide_window
                                    box[0, 1] = box[0, 1] + i * slide_window
                                    box[1, 0] = box[1, 0] + j * slide_window
                                    box[1, 1] = box[1, 1] + i * slide_window
                                    box[2, 0] = box[2, 0] + j * slide_window
                                    box[2, 1] = box[2, 1] + i * slide_window
                                    box[3, 0] = box[3, 0] + j * slide_window
                                    box[3, 1] = box[3, 1] + i * slide_window
                    boxes_one_rot.append(box)
                    boxes_single_rot = np.zeros((len(boxes_one_rot), 9))
                    boxes_single_rot[:, :8] = np.array(boxes_one_rot).reshape(
                        (-1, 8))
                    boxes_single_rot[:, 8] = 1
                    labels += boxes_single_rot.tolist()
                boxes = lanms.merge_quadrangle_n9(np.array(labels), nms_thres)
                annotation = np.load(
                    "/mnt/nfs/work1/elm/ray/new_char_anots_ncs/" + "j" + "/" +
                    "D0117-5755036" + ".npy").item()
                ### Compute the TP, FP, FN info for each image
                count_right_cache = 0
                boxes = boxes[:, :8].reshape((-1, 4, 2))
                num_true_pos = len(annotation)
                for box in boxes:
                    box = sort_poly(box.astype(np.int32))
                    if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(
                            box[3] - box[0]) < 5:
                        continue
                    k = 0
                    idx = 0
                    count_wrong += 1
                    while (idx < num_true_pos):
                        if k in annotation:
                            proposed_label = annotation[k]['vertices']
                            if len(proposed_label) == 4:
                                x3, y3, x2, y2, x1, y1, x0, y0 = proposed_label[0][0], proposed_label[0][1], proposed_label[1][0], proposed_label[1][1], \
                                                     proposed_label[2][0], proposed_label[2][1], proposed_label[3][0], proposed_label[3][1]
                                if (checkIOU(box,
                                             [[x0, y0], [x1, y1], [x2, y2],
                                              [x3, y3]]) == True):
                                    count_right_cache += 1
                                    count_wrong -= 1
                                    break
                            idx += 1
                        k += 1
                count_posNotDetected += num_true_pos - count_right_cache
                count_right += count_right_cache
                precision = (float)(count_right) / (float)(
                    count_right + count_wrong)  # TP / TP + FP
                recall = (float)(count_right) / (float)(
                    count_right + count_posNotDetected)  # TP / TP + FN
                fscore = 2 * (precision * recall) / (precision + recall)
                print "Precision, recall, fscore: " + str(
                    precision) + ", " + str(recall) + ", " + str(fscore)
Exemple #30
0
def main():
    parser = argparse.ArgumentParser(
        description='Preprocess imagenet dataset for qvis')
    parser.add_argument('--datapath',
                        type=str,
                        help='location of imagenet dataset')
    parser.add_argument('--modelpath',
                        type=str,
                        help='location of tensorflow-slim model')
    parser.add_argument('--batch_size',
                        type=int,
                        help='batch size',
                        default=32)
    parser.add_argument('--pca', action='store_true')
    parser.add_argument('--fvec', action='store_true')
    args = parser.parse_args()

    if args.fvec:
        transfer_to_fvecs()
        return

    if args.pca:
        do_pca()
        return

    # infer resnet
    config = tf.ConfigProto()
    # config.operation_timeout_in_ms = 6000
    dataset = get_dataset(args.datapath)

    # from tensorflow.python.training import input as tf_input
    # from tensorflow.contrib.slim.python.slim.data import parallel_reader
    # data_files = parallel_reader.get_data_files(args.datapath)
    # print(len(data_files), 'files.')
    # filename_queue = tf_input.string_input_producer(data_files, num_epochs=1, shuffle=False, name='filenames')
    # reader = tf.TFRecordReader()
    # key, value = reader.read(filename_queue)
    # dvalue = dataset.decoder.decode(value)

    # with tf.Session(config=config) as sess:
    #     ini_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    #     sess.run(ini_op)
    #     coord = tf.train.Coordinator()
    #     threads = tf.train.start_queue_runners(coord=coord)

    #     counter = 0
    #     while True:
    #         k, v = sess.run([key, dvalue])
    #         counter += 1
    #         # print(k, v)
    #         print(k, counter)

    # return

    provider = slim.dataset_data_provider.DatasetDataProvider(dataset,
                                                              shuffle=False,
                                                              num_epochs=1)
    images, labels = provider.get(['image', 'label'])

    # import urllib
    # url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg'
    # image_string = urllib.request.urlopen(url).read()
    # image = tf.image.decode_jpeg(image_string, channels=3)

    processed_images = preprocess_for_eval(images, 224, 224)
    # processed_images = tf.expand_dims(processed_images, 0)

    # Batch up
    processed_images, labels = tf.train.batch([processed_images, labels],
                                              batch_size=args.batch_size,
                                              num_threads=8,
                                              capacity=2 * args.batch_size,
                                              allow_smaller_final_batch=True)

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        logits, endpoints = resnet_v1.resnet_v1_50(processed_images,
                                                   num_classes=1000,
                                                   scope='resnet_v1_50',
                                                   is_training=False)
        pool5 = math_ops.reduce_mean(endpoints['resnet_v1_50/block4'], [1, 2],
                                     name='pool5',
                                     keep_dims=True)
        vectors = tf.squeeze(pool5, axis=[1, 2])

    init_fn = slim.assign_from_checkpoint_fn(args.modelpath,
                                             slim.get_model_variables())

    vectors_to_save = []
    labels_to_save = []
    with tf.Session(config=config) as sess:
        ini_op = tf.group(tf.global_variables_initializer(),
                          tf.local_variables_initializer())
        sess.run(ini_op)
        coord = tf.train.Coordinator()
        thread = tf.train.start_queue_runners(sess=sess, coord=coord)
        init_fn(sess)
        # prob = tf.squeeze(logits, axis=[1, 2])
        # probabilities = tf.nn.softmax(prob, dim=-1)
        counter = 0
        while True:
            try:
                vector, label = sess.run([vectors, labels])
            except OutOfRangeError as e:
                break
            print(vector.shape)
            vectors_to_save.append(vector)
            labels_to_save.append(label)
            counter += vector.shape[0]
            print(counter)
            # results, gtlabel = sess.run([probabilities, labels])
            # print(sorted(enumerate(results[0]), key=lambda x: -x[1])[:5], gtlabel)
        np.save("imagenet_resnet_v1_50_vectors.npy",
                np.concatenate(vectors_to_save))
        np.save("imagenet_resnet_v1_50_lables.npy",
                np.concatenate(labels_to_save))
        # convolutional manner
        logits, _ = vgg.vgg_16(input_image,
                               num_classes=1000,
                               is_training=False,
                               spatial_squeeze=False)

    # For each pixel we get predictions for each class
    # out of 1000. We need to pick the one with the highest
    # probability. To be more precise, these are not probabilities,
    # because we didn't apply softmax. But if we pick a class
    # with the highest value it will be equivalent to picking
    # the highest value after applying softmax
    pred = tf.argmax(logits, dimension=3)

    init_fn = slim.assign_from_checkpoint_fn(
        os.path.join(checkpoints_dir, 'vgg_16.ckpt'),
        slim.get_model_variables('vgg_16'))

    with tf.Session() as sess:
        init_fn(sess)
        segmentation, np_image = sess.run([pred, image])

# Remove the first empty dimension
segmentation = np.squeeze(segmentation)

# Let's get unique predicted classes (from 0 to 1000) and
# relable the original predictions so that classes are
# numerated starting from zero
unique_classes, relabeled_image = np.unique(segmentation, return_inverse=True)

segmentation_size = segmentation.shape
Exemple #32
0
image_size = inception.inception_v4.default_image_size
checkpoints_dir = '/Users/zhangxin/data_public/goolenet/v4' # inception_v4.ckpt
with tf.Graph().as_default():
    url = 'https://upload.wikimedia.org/wikipedia/commons/7/70/EnglishCockerSpaniel_simon.jpg'
    image_string = urllib.urlopen(url).read()
    image = tf.image.decode_jpeg(image_string, channels=3)
    processed_image = inception_preprocessing.preprocess_image(image, image_size, image_size, is_training=False)
    processed_images  = tf.expand_dims(processed_image, 0)
    
    # Create the model, use the default arg scope to configure the batch norm parameters.
    with slim.arg_scope(inception.inception_v4_arg_scope()):
        logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False)
    probabilities = tf.nn.softmax(logits)
    
    init_fn = slim.assign_from_checkpoint_fn(
        os.path.join(checkpoints_dir, 'inception_v4.ckpt'),
        slim.get_model_variables('InceptionV4'))
    
    with tf.Session() as sess:
        init_fn(sess)
        np_image, probabilities = sess.run([image, probabilities])
        probabilities = probabilities[0, 0:]
        sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]
        
    # plt.figure()
    # plt.imshow(np_image.astype(np.uint8))
    # plt.axis('off')
    # plt.show()

    names = imagenet.create_readable_names_for_imagenet_labels()
    for i in range(5):
def setup_to_run(m, args, is_training, batch_norm_is_training, summary_mode):
  # Set up the model.
  tf.set_random_seed(args.solver.seed)
  task_params = args.navtask.task_params
  num_steps = task_params.num_steps
  num_goals = task_params.num_goals
  num_actions = task_params.num_actions
  num_actions_ = num_actions

  n_views = task_params.n_views

  batch_norm_is_training_op = \
      tf.placeholder_with_default(batch_norm_is_training, shape=[],
                                  name='batch_norm_is_training_op') 
  # Setup the inputs
  m.input_tensors = {}
  lstm_states = []; lstm_state_dims = [];
  state_names = []; updated_state_ops = []; init_state_ops = [];
  if args.arch.lstm_output:
    lstm_states += ['lstm_output']
    lstm_state_dims += [args.arch.lstm_output_dim+task_params.num_actions]
  if args.arch.lstm_ego:
    lstm_states += ['lstm_ego']
    lstm_state_dims += [args.arch.lstm_ego_dim + args.arch.lstm_ego_out]
    lstm_states += ['lstm_img']
    lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out]
  elif args.arch.lstm_img:
    # An LSTM only on the image
    lstm_states += ['lstm_img']
    lstm_state_dims += [args.arch.lstm_img_dim + args.arch.lstm_img_out]
  else:
    # No LSTMs involved here.
    None

  m.input_tensors['common'], m.input_tensors['step'], m.input_tensors['train'] = \
      _inputs(task_params, lstm_states, lstm_state_dims)

  with tf.name_scope('check_size'):
    is_single_step = tf.equal(tf.unstack(tf.shape(m.input_tensors['step']['imgs']), 
                                        num=6)[1], 1)

  images_reshaped = tf.reshape(m.input_tensors['step']['imgs'], 
      shape=[-1, task_params.img_height, task_params.img_width,
             task_params.img_channels], name='re_image')

  rel_goal_loc_reshaped = tf.reshape(m.input_tensors['step']['rel_goal_loc'], 
      shape=[-1, task_params.rel_goal_loc_dim], name='re_rel_goal_loc')

  x, vars_ = get_repr_from_image(
      images_reshaped, task_params.modalities, task_params.data_augment,
      args.arch.encoder, args.solver.freeze_conv, args.solver.wt_decay,
      is_training)

  # Reshape into nice things so that these can be accumulated over time steps
  # for faster backprop.
  sh_before = x.get_shape().as_list()
  m.encoder_output = tf.reshape(
      x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:])
  x = tf.reshape(m.encoder_output, shape=[-1] + sh_before[1:])

  # Add a layer to reduce dimensions for a fc layer.
  if args.arch.dim_reduce_neurons > 0:
    ks = 1; neurons = args.arch.dim_reduce_neurons;
    init_var = np.sqrt(2.0/(ks**2)/neurons)
    batch_norm_param = args.arch.batch_norm_param
    batch_norm_param['is_training'] = batch_norm_is_training_op
    m.conv_feat = slim.conv2d(
        x, neurons, kernel_size=ks, stride=1, normalizer_fn=slim.batch_norm,
        normalizer_params=batch_norm_param, padding='SAME', scope='dim_reduce',
        weights_regularizer=slim.l2_regularizer(args.solver.wt_decay),
        weights_initializer=tf.random_normal_initializer(stddev=init_var))
    reshape_conv_feat = slim.flatten(m.conv_feat)
    sh = reshape_conv_feat.get_shape().as_list()
    m.reshape_conv_feat = tf.reshape(reshape_conv_feat, 
                                     shape=[-1, sh[1]*n_views])

  # Restore these from a checkpoint.
  if args.solver.pretrained_path is not None:
    m.init_fn = slim.assign_from_checkpoint_fn(args.solver.pretrained_path,
                                               vars_)
  else:
    m.init_fn = None

  # Hit the goal_location with a bunch of fully connected layers, to embed it
  # into some space.
  with tf.variable_scope('embed_goal'):
    batch_norm_param = args.arch.batch_norm_param
    batch_norm_param['is_training'] = batch_norm_is_training_op
    m.embed_goal, _ = tf_utils.fc_network(
        rel_goal_loc_reshaped, neurons=args.arch.goal_embed_neurons,
        wt_decay=args.solver.wt_decay, name='goal_embed', offset=0,
        batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
        is_training=is_training)
  
  if args.arch.embed_goal_for_state:
    with tf.variable_scope('embed_goal_for_state'):
      batch_norm_param = args.arch.batch_norm_param
      batch_norm_param['is_training'] = batch_norm_is_training_op
      m.embed_goal_for_state, _ = tf_utils.fc_network(
          m.input_tensors['common']['rel_goal_loc_at_start'][:,0,:],
          neurons=args.arch.goal_embed_neurons, wt_decay=args.solver.wt_decay,
          name='goal_embed', offset=0, batch_norm_param=batch_norm_param,
          dropout_ratio=args.arch.fc_dropout, is_training=is_training)

  # Hit the goal_location with a bunch of fully connected layers, to embed it
  # into some space.
  with tf.variable_scope('embed_img'):
    batch_norm_param = args.arch.batch_norm_param
    batch_norm_param['is_training'] = batch_norm_is_training_op
    m.embed_img, _ = tf_utils.fc_network(
        m.reshape_conv_feat, neurons=args.arch.img_embed_neurons,
        wt_decay=args.solver.wt_decay, name='img_embed', offset=0,
        batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
        is_training=is_training)

  # For lstm_ego, and lstm_image, embed the ego motion, accumulate it into an
  # LSTM, combine with image features and accumulate those in an LSTM. Finally
  # combine what you get from the image LSTM with the goal to output an action.
  if args.arch.lstm_ego:
    ego_reshaped = preprocess_egomotion(m.input_tensors['step']['incremental_locs'], 
                                        m.input_tensors['step']['incremental_thetas'])
    with tf.variable_scope('embed_ego'):
      batch_norm_param = args.arch.batch_norm_param
      batch_norm_param['is_training'] = batch_norm_is_training_op
      m.embed_ego, _ = tf_utils.fc_network(
          ego_reshaped, neurons=args.arch.ego_embed_neurons,
          wt_decay=args.solver.wt_decay, name='ego_embed', offset=0,
          batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout,
          is_training=is_training)

    state_name, state_init_op, updated_state_op, out_op = lstm_setup(
        'lstm_ego', m.embed_ego, task_params.batch_size, is_single_step, 
        args.arch.lstm_ego_dim, args.arch.lstm_ego_out, num_steps*num_goals,
        m.input_tensors['step']['lstm_ego'])
    state_names += [state_name]
    init_state_ops += [state_init_op]
    updated_state_ops += [updated_state_op]

    # Combine the output with the vision features.
    m.img_ego_op = combine_setup('img_ego', args.arch.combine_type_ego,
                                 m.embed_img, out_op,
                                 args.arch.img_embed_neurons[-1],
                                 args.arch.lstm_ego_out)

    # LSTM on these vision features.
    state_name, state_init_op, updated_state_op, out_op = lstm_setup(
        'lstm_img', m.img_ego_op, task_params.batch_size, is_single_step, 
        args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals,
        m.input_tensors['step']['lstm_img'])
    state_names += [state_name]
    init_state_ops += [state_init_op]
    updated_state_ops += [updated_state_op]

    m.img_for_goal = out_op
    num_img_for_goal_neurons = args.arch.lstm_img_out

  elif args.arch.lstm_img:
    # LSTM on just the image features.
    state_name, state_init_op, updated_state_op, out_op = lstm_setup(
        'lstm_img', m.embed_img, task_params.batch_size, is_single_step,
        args.arch.lstm_img_dim, args.arch.lstm_img_out, num_steps*num_goals,
        m.input_tensors['step']['lstm_img'])
    state_names += [state_name]
    init_state_ops += [state_init_op]
    updated_state_ops += [updated_state_op]
    m.img_for_goal = out_op
    num_img_for_goal_neurons = args.arch.lstm_img_out

  else:
    m.img_for_goal = m.embed_img
    num_img_for_goal_neurons = args.arch.img_embed_neurons[-1]


  if args.arch.use_visit_count:
    m.embed_visit_count = visit_count_fc(
        m.input_tensors['step']['visit_count'],
        m.input_tensors['step']['last_visit'], args.arch.goal_embed_neurons,
        args.solver.wt_decay, args.arch.fc_dropout, is_training=is_training)
    m.embed_goal = m.embed_goal + m.embed_visit_count
  
  m.combined_f = combine_setup('img_goal', args.arch.combine_type,
                               m.img_for_goal, m.embed_goal,
                               num_img_for_goal_neurons,
                               args.arch.goal_embed_neurons[-1])

  # LSTM on the combined representation.
  if args.arch.lstm_output:
    name = 'lstm_output'
    # A few fully connected layers here.
    with tf.variable_scope('action_pred'):
      batch_norm_param = args.arch.batch_norm_param
      batch_norm_param['is_training'] = batch_norm_is_training_op
      x, _ = tf_utils.fc_network(
          m.combined_f, neurons=args.arch.pred_neurons,
          wt_decay=args.solver.wt_decay, name='pred', offset=0,
          batch_norm_param=batch_norm_param, dropout_ratio=args.arch.fc_dropout)

    if args.arch.lstm_output_init_state_from_goal:
      # Use the goal embedding to initialize the LSTM state.
      # UGLY CLUGGY HACK: if this is doing computation for a single time step
      # then this will not involve back prop, so we can use the state input from
      # the feed dict, otherwise we compute the state representation from the
      # goal and feed that in. Necessary for using goal location to generate the
      # state representation.
      m.embed_goal_for_state = tf.expand_dims(m.embed_goal_for_state, dim=1)
      state_op = tf.cond(is_single_step, lambda: m.input_tensors['step'][name],
                         lambda: m.embed_goal_for_state)
      state_name, state_init_op, updated_state_op, out_op = lstm_setup(
          name, x, task_params.batch_size, is_single_step,
          args.arch.lstm_output_dim,
          num_actions_,
          num_steps*num_goals, state_op)
      init_state_ops += [m.embed_goal_for_state]
    else:
      state_op = m.input_tensors['step'][name]
      state_name, state_init_op, updated_state_op, out_op = lstm_setup(
          name, x, task_params.batch_size, is_single_step,
          args.arch.lstm_output_dim,
          num_actions_, num_steps*num_goals, state_op)
      init_state_ops += [state_init_op]

    state_names += [state_name]
    updated_state_ops += [updated_state_op]

    out_op = tf.reshape(out_op, shape=[-1, num_actions_])
    if num_actions_ > num_actions:
      m.action_logits_op = out_op[:,:num_actions]
      m.baseline_op = out_op[:,num_actions:]
    else:
      m.action_logits_op = out_op
      m.baseline_op = None
    m.action_prob_op = tf.nn.softmax(m.action_logits_op)

  else:
    # A few fully connected layers here.
    with tf.variable_scope('action_pred'):
      batch_norm_param = args.arch.batch_norm_param
      batch_norm_param['is_training'] = batch_norm_is_training_op
      out_op, _ = tf_utils.fc_network(
          m.combined_f, neurons=args.arch.pred_neurons,
          wt_decay=args.solver.wt_decay, name='pred', offset=0,
          num_pred=num_actions_,
          batch_norm_param=batch_norm_param,
          dropout_ratio=args.arch.fc_dropout, is_training=is_training)
      if num_actions_ > num_actions:
        m.action_logits_op = out_op[:,:num_actions]
        m.baseline_op = out_op[:,num_actions:]
      else:
        m.action_logits_op = out_op 
        m.baseline_op = None
      m.action_prob_op = tf.nn.softmax(m.action_logits_op)

  m.train_ops = {}
  m.train_ops['step'] = m.action_prob_op
  m.train_ops['common'] = [m.input_tensors['common']['orig_maps'],
                           m.input_tensors['common']['goal_loc'],
                           m.input_tensors['common']['rel_goal_loc_at_start']]
  m.train_ops['state_names'] = state_names
  m.train_ops['init_state'] = init_state_ops
  m.train_ops['updated_state'] = updated_state_ops
  m.train_ops['batch_norm_is_training_op'] = batch_norm_is_training_op

  # Flat list of ops which cache the step data.
  m.train_ops['step_data_cache'] = [tf.no_op()]

  if args.solver.freeze_conv:
    m.train_ops['step_data_cache'] = [m.encoder_output]
  else:
    m.train_ops['step_data_cache'] = []

  ewma_decay = 0.99 if is_training else 0.0
  weight = tf.ones_like(m.input_tensors['train']['action'], dtype=tf.float32,
                        name='weight')

  m.reg_loss_op, m.data_loss_op, m.total_loss_op, m.acc_ops = \
    compute_losses_multi_or(
        m.action_logits_op, m.input_tensors['train']['action'],
        weights=weight, num_actions=num_actions,
        data_loss_wt=args.solver.data_loss_wt,
        reg_loss_wt=args.solver.reg_loss_wt, ewma_decay=ewma_decay)


  if args.solver.freeze_conv:
    vars_to_optimize = list(set(tf.trainable_variables()) - set(vars_))
  else:
    vars_to_optimize = None

  m.lr_op, m.global_step_op, m.train_op, m.should_stop_op, m.optimizer, \
  m.sync_optimizer = tf_utils.setup_training(
      m.total_loss_op, 
      args.solver.initial_learning_rate, 
      args.solver.steps_per_decay,
      args.solver.learning_rate_decay, 
      args.solver.momentum,
      args.solver.max_steps, 
      args.solver.sync, 
      args.solver.adjust_lr_sync,
      args.solver.num_workers, 
      args.solver.task,
      vars_to_optimize=vars_to_optimize,
      clip_gradient_norm=args.solver.clip_gradient_norm,
      typ=args.solver.typ, momentum2=args.solver.momentum2,
      adam_eps=args.solver.adam_eps)
  
  
  if args.arch.sample_gt_prob_type == 'inverse_sigmoid_decay':
    m.sample_gt_prob_op = tf_utils.inverse_sigmoid_decay(args.arch.isd_k,
                                                         m.global_step_op)
  elif args.arch.sample_gt_prob_type == 'zero':
    m.sample_gt_prob_op = tf.constant(-1.0, dtype=tf.float32)
  elif args.arch.sample_gt_prob_type.split('_')[0] == 'step':
    step = int(args.arch.sample_gt_prob_type.split('_')[1])
    m.sample_gt_prob_op = tf_utils.step_gt_prob(
        step, m.input_tensors['step']['step_number'][0,0,0])
  
  m.sample_action_type = args.arch.action_sample_type
  m.sample_action_combine_type = args.arch.action_sample_combine_type
  _add_summaries(m, summary_mode, args.summary.arop_full_summary_iters)
  
  m.init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
  m.saver_op = tf.train.Saver(keep_checkpoint_every_n_hours=4,
                              write_version=tf.train.SaverDef.V2)
  
  return m
Exemple #34
0
def train(H, test_images):
    '''
    Setup computation graph, run 2 prefetch data threads, and then run the main loop
    '''

    if not os.path.exists(H['save_dir']): os.makedirs(H['save_dir'])

    ckpt_file = H['save_dir'] + '/save.ckpt'
    with open(H['save_dir'] + '/hypes.json', 'w') as f:
        json.dump(H, f, indent=4)

    x_in = tf.placeholder(tf.float32)
    confs_in = tf.placeholder(tf.float32)
    boxes_in = tf.placeholder(tf.float32)
    q = {}
    enqueue_op = {}
    for phase in ['train', 'test']:
        dtypes = [tf.float32, tf.float32, tf.float32]
        grid_size = H['grid_width'] * H['grid_height']
        shapes = (
            [H['image_height'], H['image_width'], 3],
            [grid_size, H['rnn_len'], H['num_classes']],
            [grid_size, H['rnn_len'], 4],
            )
        q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes)
        enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in))

    def make_feed(d):
        return {x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'],
                learning_rate: H['solver']['learning_rate']}

    def thread_loop(sess, enqueue_op, phase, gen):
        for d in gen:
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))

    (config, loss, accuracy, summary_op, train_op,
     smooth_op, global_step, learning_rate) = build(H, q)

    saver = tf.train.Saver(max_to_keep=None)
    writer = tf.train.SummaryWriter(
        logdir=H['save_dir'],
        flush_secs=10
    )

    with tf.Session(config=config) as sess:
        tf.train.start_queue_runners(sess=sess)
        for phase in ['train', 'test']:
            # enqueue once manually to avoid thread start delay
            gen = train_utils.load_data_gen(H, phase, jitter=H['solver']['use_jitter'])
            d = gen.next()
            sess.run(enqueue_op[phase], feed_dict=make_feed(d))
            t = threading.Thread(target=thread_loop,
                                 args=(sess, enqueue_op, phase, gen))
            t.daemon = True
            t.start()

        tf.set_random_seed(H['solver']['rnd_seed'])
        sess.run(tf.initialize_all_variables())
        writer.add_graph(sess.graph)
        weights_str = H['solver']['weights']
        if len(weights_str) > 0:
            print('Restoring from: %s' % weights_str)
            saver.restore(sess, weights_str)
        else:
            init_fn = slim.assign_from_checkpoint_fn(
                  '%s/data/inception_v1.ckpt' % os.path.dirname(os.path.realpath(__file__)),
                  [x for x in tf.all_variables() if x.name.startswith('InceptionV1') and not H['solver']['opt'] in x.name])
            init_fn(sess)

        # train model for N iterations
        start = time.time()
        max_iter = H['solver'].get('max_iter', 10000000)
        for i in xrange(max_iter):
            display_iter = H['logging']['display_iter']
            adjusted_lr = (H['solver']['learning_rate'] *
                           0.5 ** max(0, (i / H['solver']['learning_rate_step']) - 2))
            lr_feed = {learning_rate: adjusted_lr}

            if i % display_iter != 0:
                # train network
                batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed)
            else:
                # test network every N iterations; log additional info
                if i > 0:
                    dt = (time.time() - start) / (H['batch_size'] * display_iter)
                start = time.time()
                (train_loss, test_accuracy, summary_str,
                    _, _) = sess.run([loss['train'], accuracy['test'],
                                      summary_op, train_op, smooth_op,
                                     ], feed_dict=lr_feed)
                writer.add_summary(summary_str, global_step=global_step.eval())
                print_str = string.join([
                    'Step: %d',
                    'lr: %f',
                    'Train Loss: %.2f',
                    'Softmax Test Accuracy: %.1f%%',
                    'Time/image (ms): %.1f'
                ], ', ')
                print(print_str %
                      (i, adjusted_lr, train_loss,
                       test_accuracy * 100, dt * 1000 if i > 0 else 0))

            if global_step.eval() % H['logging']['save_iter'] == 0 or global_step.eval() == max_iter - 1:
                saver.save(sess, ckpt_file, global_step=global_step)
Exemple #35
0
def train(dataset, epochs, batch_size, weight_path):
    with slim.arg_scope(
            mobilenet_v1.mobilenet_v1_arg_scope(is_training=True,
                                                batch_norm_decay=0.99)):
        im_inputs = tf.placeholder(
            tf.float32,
            [None, dataset.input_shape[0], dataset.input_shape[1], 3],
            name="inputs")
        # images_arg = data_augmentation(im_inputs)
        y_true = tf.placeholder(tf.float32, [None, dataset.num_classes],
                                name="labels")
        logits, endpoints = mobilenet_v1.mobilenet_v1(
            im_inputs,
            num_classes=dataset.num_classes,
            is_training=True,
            global_pool=True)
    net_out_loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,
                                                   labels=y_true))
    weight_loss = tf.losses.get_regularization_losses()
    # net_out_loss = tf.losses.get_losses()
    variable_summaries(net_out_loss, "net_loss")
    all_loss = weight_loss
    cost = tf.add_n(all_loss) + net_out_loss
    variable_summaries(cost, "total_loss")
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    global_step = tf.Variable(0, trainable=False)
    with tf.control_dependencies(update_ops):
        Adam_optim = tf.train.AdamOptimizer(learning_rate=0.0001)
        Momentum_optim = tf.train.MomentumOptimizer(momentum=0.9,
                                                    learning_rate=0.0001)
        optim = slim.learning.create_train_op(cost,
                                              Momentum_optim,
                                              global_step=global_step)
        # Momentum_optim = tf.train.MomentumOptimizer(momentum=0.9, learning_rate=0.001).minimize(cost, global_step=global_step)
    with tf.name_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(logits, 1),
                                      tf.argmax(y_true, 1))
        evaluation_step = tf.reduce_mean(
            tf.cast(correct_prediction, tf.float32))
        variable_summaries(evaluation_step, "accuracy")
    train_writer = tf.summary.FileWriter("log", tf.get_default_graph())
    merge_summary = tf.summary.merge_all()
    vars = slim.get_model_variables()
    saver = tf.train.Saver(tf.global_variables())
    load_fn = slim.assign_from_checkpoint_fn(weight_path,
                                             tf.global_variables(),
                                             ignore_missing_vars=True)
    with tf.Session() as sess:
        print("load:", weight_path)
        saver.restore(sess, weight_path)
        for epoch in range(epochs):
            startTime = time.time()
            for iter_ in range(dataset.num_data // batch_size):
                x, y = dataset.read_data_label(batch_size)
                if iter_ % 50 == 0:
                    loss, _, train_summary, step = sess.run(
                        [cost, optim, merge_summary, global_step],
                        feed_dict={
                            im_inputs: x,
                            y_true: y
                        })
                    val_loss, validation_accuracy = sess.run(
                        [cost, evaluation_step],
                        feed_dict={
                            im_inputs: x,
                            y_true: y
                        })
                    train_writer.add_summary(train_summary, step)
                    print(
                        "epoch:{};iter:{};train_loss:{};train_loss:{};val_acc{}:step:{}"
                        .format(epoch, iter_, loss, val_loss,
                                validation_accuracy, step))
                else:
                    _ = sess.run([optim], feed_dict={im_inputs: x, y_true: y})
            endTime = time.time()
            print("epoch_time:{}".format(endTime - startTime))
        saver.save(sess, "model/416_tree_mobilev1.ckpt")
Exemple #36
0
#loss = tf.reduce_mean(loss)

with tf.Session() as sess:
    if FLAGS.train == True:
        sess.run([
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ])
        var_list = []
        for x in slim.get_model_variables():
            if not ("MobilenetV1/AuxLogits" in x.op.name
                    or "MobilenetV1/Logits" in x.op.name
                    or "MobilenetV1" not in x.op.name):
                var_list.append(x)
        mobilenet_restore = slim.assign_from_checkpoint_fn(
            'checkpoints/mobilenet_v1_1.0_224.ckpt',
            var_list,
            ignore_missing_vars=True)
        mobilenet_restore(sess)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        #saver = tf.train.Saver()
        #saver.restore(sess, './checkpoints/model.ckpt')
        #images = input_pipeline(filenames, 32, num_epochs=10000)

        try:
            step = 0
            while not coord.should_stop():
                result = sess.run([op1, op2, update_k])
                result = sess.run([incr, global_step])
                step = result[-1]
Exemple #37
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    input_score_maps = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 1],
                                      name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 5],
                                        name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32,
                                        shape=[None, None, None, 8],
                                        name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32,
                                          shape=[None, None, None, 1],
                                          name='input_training_masks')

    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate,
                                               global_step,
                                               decay_steps=10000,
                                               decay_rate=0.94,
                                               staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)

    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isms, igms, itms,
                                                    reuse_variables)
                batch_norm_updates_op = tf.group(
                    *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies(
        [variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000)
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path,
                                           tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(
            FLAGS.pretrained_model_path,
            slim.get_trainable_variables(),
            ignore_missing_vars=True)

    step = 0
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            if ckpt_state is not None:
                print('continue training from previous checkpoint')
                model_path = os.path.join(
                    FLAGS.checkpoint_path,
                    os.path.basename(ckpt_state.model_checkpoint_path))
                print('Restore from {}'.format(model_path))
                saver.restore(sess, model_path)
                print(sess.run(global_step))
                step = int(ckpt.split('-')[-1]) - 1

            #else :
            #    print('Load the backbone, Name {}'.format(FLAGS.backbone))
            #    load_layers = tf.global_variables(scope=FLAGS.backbone)
            #    print(load_layers)
            #    saver = tf.train.Saver(load_layers)
            #    saver.restore(sess,  FLAGS.backbone_ckpt)
            #    step = 0
            else:
                sess.run(init)
                #for layer in tf.global_variables(scope='Mobilenet')[:2]:
                #    print("layer name : {} mean : {}".format(layer.name, sess.run(tf.reduce_mean(layer.eval(session=sess)))))
                if FLAGS.pretrained_model_path is not None:
                    print("--------------------------------")
                    print("---Load the Pretraiend-Weight---")
                    print("--------------------------------")

                    variable_restore_op(sess)
                #for layer in tf.global_variables(scope='Mobilenet')[:2]:
                #    print("layer name : {} mean : {}".format(layer.name, sess.run(tf.reduce_mean(layer.eval(session=sess)))))
        else:
            sess.run(init)

        total_parameters = 0
        for variable in tf.trainable_variables():
            local_parameters = 1
            shape = variable.get_shape()  #getting shape of a variable
            for i in shape:
                local_parameters *= i.value  #mutiplying dimension values
            total_parameters += local_parameters
        print("-----params-----", total_parameters)
        if os.name is 'nt':
            workers = 0
        else:
            workers = multiprocessing.cpu_count()
        print(" num of worker : ", workers)
        data_generator = icdar.get_batch(num_workers=workers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size_per_gpu *
                                         len(gpus))

        start = time.time()

        while step < FLAGS.max_steps:
            data = next(data_generator)
            ml, tl, _ = sess.run(
                [model_loss, total_loss, train_op],
                feed_dict={
                    input_images: data[0],
                    input_score_maps: data[2],
                    input_geo_maps: data[3],
                    input_training_masks: data[4]
                })
            if np.isnan(tl):
                print('Loss diverged, stop training')
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start) / 10
                avg_examples_per_second = (10 * FLAGS.batch_size_per_gpu *
                                           len(gpus)) / (time.time() - start)
                start = time.time()
                print(
                    'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'
                    .format(step, ml, tl, avg_time_per_step,
                            avg_examples_per_second))

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess,
                           FLAGS.checkpoint_path + 'model.ckpt',
                           global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run(
                    [train_op, total_loss, summary_op],
                    feed_dict={
                        input_images: data[0],
                        input_score_maps: data[2],
                        input_geo_maps: data[3],
                        input_training_masks: data[4]
                    })
                summary_writer.add_summary(summary_str, global_step=step)
            step += 1
Exemple #38
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    if not tf.gfile.Exists(FLAGS.checkpoint_path):
        tf.gfile.MkDir(FLAGS.checkpoint_path)
    else:
        if not FLAGS.restore:
            tf.gfile.DeleteRecursively(FLAGS.checkpoint_path)
            tf.gfile.MkDir(FLAGS.checkpoint_path)

    input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
    input_score_maps = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_score_maps')
    if FLAGS.geometry == 'RBOX':
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 5], name='input_geo_maps')
    else:
        input_geo_maps = tf.placeholder(tf.float32, shape=[None, None, None, 8], name='input_geo_maps')
    input_training_masks = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='input_training_masks')

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)
    learning_rate = tf.train.exponential_decay(FLAGS.learning_rate, global_step, decay_steps=10000, decay_rate=0.94, staircase=True)
    # add summary
    tf.summary.scalar('learning_rate', learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.MomentumOptimizer(learning_rate, 0.9)


    # split
    input_images_split = tf.split(input_images, len(gpus))
    input_score_maps_split = tf.split(input_score_maps, len(gpus))
    input_geo_maps_split = tf.split(input_geo_maps, len(gpus))
    input_training_masks_split = tf.split(input_training_masks, len(gpus))

    tower_grads = []
    reuse_variables = None
    for i, gpu_id in enumerate(gpus):
        with tf.device('/gpu:%d' % gpu_id):
            with tf.name_scope('model_%d' % gpu_id) as scope:
                iis = input_images_split[i]
                isms = input_score_maps_split[i]
                igms = input_geo_maps_split[i]
                itms = input_training_masks_split[i]
                total_loss, model_loss = tower_loss(iis, isms, igms, itms, reuse_variables)
                batch_norm_updates_op = tf.group(*tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope))
                reuse_variables = True

                grads = opt.compute_gradients(total_loss)
                tower_grads.append(grads)

    grads = average_gradients(tower_grads)
    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

    summary_op = tf.summary.merge_all()
    # save moving average
    variable_averages = tf.train.ExponentialMovingAverage(
        FLAGS.moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    # batch norm updates
    with tf.control_dependencies([variables_averages_op, apply_gradient_op, batch_norm_updates_op]):
        train_op = tf.no_op(name='train_op')

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(FLAGS.checkpoint_path, tf.get_default_graph())

    init = tf.global_variables_initializer()

    if FLAGS.pretrained_model_path is not None:
        variable_restore_op = slim.assign_from_checkpoint_fn(FLAGS.pretrained_model_path, slim.get_trainable_variables(),
                                                             ignore_missing_vars=True)
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        if FLAGS.restore:
            print 'continue training from previous checkpoint'
            ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
            saver.restore(sess, ckpt)
        else:
            sess.run(init)
            if FLAGS.pretrained_model_path is not None:
                variable_restore_op(sess)

        data_generator = icdar.get_batch(num_workers=FLAGS.num_readers,
                                         input_size=FLAGS.input_size,
                                         batch_size=FLAGS.batch_size * len(gpus))

        start = time.time()
        for step in xrange(FLAGS.max_steps):
            data = data_generator.next()
            ml, tl, _ = sess.run([model_loss, total_loss, train_op], feed_dict={input_images: data[0],
                                                                                input_score_maps: data[2],
                                                                                input_geo_maps: data[3],
                                                                                input_training_masks: data[4]})
            if np.isnan(tl):
                print 'Loss diverged, stop training'
                break

            if step % 10 == 0:
                avg_time_per_step = (time.time() - start)/10
                avg_examples_per_second = (10 * FLAGS.batch_size * len(gpus))/(time.time() - start)
                start = time.time()
                print 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, {:.2f} examples/second'.format(
                    step, ml, tl, avg_time_per_step, avg_examples_per_second)

            if step % FLAGS.save_checkpoint_steps == 0:
                saver.save(sess, FLAGS.checkpoint_path + 'model.ckpt', global_step=global_step)

            if step % FLAGS.save_summary_steps == 0:
                _, tl, summary_str = sess.run([train_op, total_loss, summary_op], feed_dict={input_images: data[0],
                                                                                             input_score_maps: data[2],
                                                                                             input_geo_maps: data[3],
                                                                                             input_training_masks: data[4]})
                summary_writer.add_summary(summary_str, global_step=step)
Exemple #39
0
    def run_experiment(self):
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            sess.run(self.init)
            self.train_writer = tf.summary.FileWriter(
                "{}/train_logs/".format(self.log_path),
                graph=tf.get_default_graph())
            self.validation_writer = tf.summary.FileWriter(
                "{}/validation_logs/".format(self.log_path),
                graph=tf.get_default_graph())
            self.train_saver = tf.train.Saver()
            self.val_saver = tf.train.Saver()

            start_from_epoch = 0
            if self.continue_from_epoch != -1:
                start_from_epoch = self.continue_from_epoch
                checkpoint = "{}train_saved_model_{}_{}.ckpt".format(
                    self.saved_models_filepath, self.experiment_name,
                    self.continue_from_epoch)
                variables_to_restore = []
                for var in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES):
                    print(var)
                    variables_to_restore.append(var)

                tf.logging.info('Fine-tuning from %s' % checkpoint)

                fine_tune = slim.assign_from_checkpoint_fn(
                    checkpoint, variables_to_restore, ignore_missing_vars=True)
                fine_tune(sess)

            self.iter_done = 0
            self.disc_iter = 5
            self.gen_iter = 1
            best_d_val_loss = np.inf

            if self.spherical_interpolation:
                dim = int(np.sqrt(self.num_generations) * 2)
                self.z_2d_vectors = interpolations.create_mine_grid(
                    rows=dim,
                    cols=dim,
                    dim=self.z_dim,
                    space=3,
                    anchors=None,
                    spherical=True,
                    gaussian=True)
                self.z_vectors = interpolations.create_mine_grid(
                    rows=1,
                    cols=self.num_generations,
                    dim=self.z_dim,
                    space=3,
                    anchors=None,
                    spherical=True,
                    gaussian=True)
            else:
                self.z_vectors = np.random.normal(size=(self.num_generations,
                                                        self.z_dim))
                self.z_2d_vectors = np.random.normal(
                    size=(self.num_generations, self.z_dim))

            with tqdm.tqdm(total=self.total_epochs -
                           start_from_epoch) as pbar_e:
                for e in range(start_from_epoch, self.total_epochs):

                    train_g_loss = []
                    val_g_loss = []
                    train_d_loss = []
                    val_d_loss = []

                    with tqdm.tqdm(
                            total=self.total_train_batches) as pbar_train:
                        for iter in range(self.total_train_batches):

                            cur_sample = 0

                            for n in range(self.disc_iter):
                                x_train_i, x_train_j = self.data.get_train_batch(
                                )
                                x_val_i, x_val_j = self.data.get_val_batch()

                                _, d_train_loss_value = sess.run(
                                    [
                                        self.graph_ops["d_opt_op"],
                                        self.losses["d_losses"]
                                    ],
                                    feed_dict={
                                        self.input_x_i: x_train_i,
                                        self.input_x_j: x_train_j,
                                        self.dropout_rate:
                                        self.dropout_rate_value,
                                        self.training_phase: True,
                                        self.random_rotate: True
                                    })

                                d_val_loss_value = sess.run(
                                    self.losses["d_losses"],
                                    feed_dict={
                                        self.input_x_i: x_val_i,
                                        self.input_x_j: x_val_j,
                                        self.dropout_rate:
                                        self.dropout_rate_value,
                                        self.training_phase: False,
                                        self.random_rotate: False
                                    })

                                cur_sample += 1
                                train_d_loss.append(d_train_loss_value)
                                val_d_loss.append(d_val_loss_value)

                            for n in range(self.gen_iter):
                                x_train_i, x_train_j = self.data.get_train_batch(
                                )
                                x_val_i, x_val_j = self.data.get_val_batch()
                                _, g_train_loss_value, train_summaries = sess.run(
                                    [
                                        self.graph_ops["g_opt_op"],
                                        self.losses["g_losses"], self.summary
                                    ],
                                    feed_dict={
                                        self.input_x_i: x_train_i,
                                        self.input_x_j: x_train_j,
                                        self.dropout_rate:
                                        self.dropout_rate_value,
                                        self.training_phase: True,
                                        self.random_rotate: True
                                    })

                                g_val_loss_value, val_summaries = sess.run(
                                    [self.losses["g_losses"], self.summary],
                                    feed_dict={
                                        self.input_x_i: x_val_i,
                                        self.input_x_j: x_val_j,
                                        self.dropout_rate:
                                        self.dropout_rate_value,
                                        self.training_phase: False,
                                        self.random_rotate: False
                                    })

                                cur_sample += 1
                                train_g_loss.append(g_train_loss_value)
                                val_g_loss.append(g_val_loss_value)

                                if iter % (
                                        self.tensorboard_update_interval) == 0:
                                    self.train_writer.add_summary(
                                        train_summaries,
                                        global_step=self.iter_done)
                                    self.validation_writer.add_summary(
                                        val_summaries,
                                        global_step=self.iter_done)

                            self.iter_done = self.iter_done + 1
                            iter_out = "{}_train_d_loss: {}, train_g_loss: {}, " \
                                       "val_d_loss: {}, val_g_loss: {}".format(self.iter_done,
                                                                               d_train_loss_value, g_train_loss_value,
                                                                               d_val_loss_value,
                                                                               g_val_loss_value)
                            pbar_train.set_description(iter_out)
                            pbar_train.update(1)

                    total_d_train_loss_mean = np.mean(train_d_loss)
                    total_d_train_loss_std = np.std(train_d_loss)
                    total_g_train_loss_mean = np.mean(train_g_loss)
                    total_g_train_loss_std = np.std(train_g_loss)

                    print(
                        "Epoch {}: d_train_loss_mean: {}, d_train_loss_std: {},"
                        "g_train_loss_mean: {}, g_train_loss_std: {}".format(
                            e, total_d_train_loss_mean, total_d_train_loss_std,
                            total_g_train_loss_mean, total_g_train_loss_std))

                    total_d_val_loss_mean = np.mean(val_d_loss)
                    total_d_val_loss_std = np.std(val_d_loss)
                    total_g_val_loss_mean = np.mean(val_g_loss)
                    total_g_val_loss_std = np.std(val_g_loss)

                    print("Epoch {}: d_val_loss_mean: {}, d_val_loss_std: {},"
                          "g_val_loss_mean: {}, g_val_loss_std: {}, ".format(
                              e, total_d_val_loss_mean, total_d_val_loss_std,
                              total_g_val_loss_mean, total_g_val_loss_std))

                    sample_generator(
                        num_generations=self.num_generations,
                        sess=sess,
                        same_images=self.same_images,
                        inputs=x_train_i,
                        data=self.data,
                        batch_size=self.batch_size,
                        z_input=self.z_input,
                        file_name="{}/train_z_variations_{}_{}.png".format(
                            self.save_image_path, self.experiment_name, e),
                        input_a=self.input_x_i,
                        training_phase=self.training_phase,
                        z_vectors=self.z_vectors,
                        dropout_rate=self.dropout_rate,
                        dropout_rate_value=self.dropout_rate_value)

                    sample_two_dimensions_generator(
                        sess=sess,
                        same_images=self.same_images,
                        inputs=x_train_i,
                        data=self.data,
                        batch_size=self.batch_size,
                        z_input=self.z_input,
                        file_name="{}/train_z_spherical_{}_{}".format(
                            self.save_image_path, self.experiment_name, e),
                        input_a=self.input_x_i,
                        training_phase=self.training_phase,
                        dropout_rate=self.dropout_rate,
                        dropout_rate_value=self.dropout_rate_value,
                        z_vectors=self.z_2d_vectors)

                    with tqdm.tqdm(total=self.total_gen_batches) as pbar_samp:
                        for i in range(self.total_gen_batches):
                            x_gen_a = self.data.get_gen_batch()
                            sample_generator(
                                num_generations=self.num_generations,
                                sess=sess,
                                same_images=self.same_images,
                                inputs=x_gen_a,
                                data=self.data,
                                batch_size=self.batch_size,
                                z_input=self.z_input,
                                file_name="{}/test_z_variations_{}_{}_{}.png".
                                format(self.save_image_path,
                                       self.experiment_name, e, i),
                                input_a=self.input_x_i,
                                training_phase=self.training_phase,
                                z_vectors=self.z_vectors,
                                dropout_rate=self.dropout_rate,
                                dropout_rate_value=self.dropout_rate_value)

                            sample_two_dimensions_generator(
                                sess=sess,
                                same_images=self.same_images,
                                inputs=x_gen_a,
                                data=self.data,
                                batch_size=self.batch_size,
                                z_input=self.z_input,
                                file_name="{}/val_z_spherical_{}_{}_{}".format(
                                    self.save_image_path, self.experiment_name,
                                    e, i),
                                input_a=self.input_x_i,
                                training_phase=self.training_phase,
                                dropout_rate=self.dropout_rate,
                                dropout_rate_value=self.dropout_rate_value,
                                z_vectors=self.z_2d_vectors)

                            pbar_samp.update(1)

                    train_save_path = self.train_saver.save(
                        sess, "{}/train_saved_model_{}_{}.ckpt".format(
                            self.saved_models_filepath, self.experiment_name,
                            e))

                    if total_d_val_loss_mean < best_d_val_loss:
                        best_d_val_loss = total_d_val_loss_mean
                        val_save_path = self.train_saver.save(
                            sess, "{}/val_saved_model_{}_{}.ckpt".format(
                                self.saved_models_filepath,
                                self.experiment_name, e))
                        print("Saved current best val model at", val_save_path)

                    #save_statistics(self.log_path, [e, total_d_train_loss_mean, total_d_val_loss_mean,
                    #                            total_d_train_loss_std, total_d_val_loss_std,
                    #                            total_g_train_loss_mean, total_g_val_loss_mean,
                    #                            total_g_train_loss_std, total_g_val_loss_std])

                    pbar_e.update(1)
def restore_model(checkpoint_paths,
                  variables_to_restore,
                  ignore_missing_vars=False,
                  num_streams=1,
                  checkpoint_style=None,
                  special_assign_vars=None):
    all_ops = []
    if len(checkpoint_paths) == 1 and num_streams > 1:
      logging.info('Provided one checkpoint for multi-stream '
                   'network. Will use this as a saved model '
                   'with this exact multi stream network.')
      all_ops.append(slim.assign_from_checkpoint_fn(
        checkpoint_paths[0],
        variables_to_restore,
        ignore_missing_vars=ignore_missing_vars))
    else:
      for sid in range(num_streams):
        this_checkpoint_style = checkpoint_style.split(',')[sid] if \
                                checkpoint_style is not None else None
        checkpoint_path = checkpoint_paths[sid]
        # assert tf.gfile.Exists(checkpoint_path)
        this_stream_name = 'stream%d/' % sid
        this_checkpoint_variables = [var for var in variables_to_restore
                                     if var in
                                     slim.get_model_variables(this_stream_name)]
        if checkpoint_path.endswith('.npy'):
          vars_to_restore_names = [
              el.name for el in this_checkpoint_variables]
          key_name_mapper = var_name_mapper.map()
          init_weights = np.load(checkpoint_path).item()
          init_weights_final = {}
          vars_restored = []
          for key in init_weights.keys():
            for subkey in init_weights[key].keys():
              prefix = this_stream_name
              if this_checkpoint_style == 'v2_withStream':
                prefix = 'stream0/'  # because any model trained with stream
                                     # will have that stream as 0
              final_key_name = prefix + key_name_mapper(
                  key + '/' + subkey)
              if final_key_name not in vars_to_restore_names:
                logging.error('Not using %s from npy' % final_key_name)
                continue
              
              target_shape = slim.get_model_variables(
                final_key_name)[0].get_shape().as_list()
              pretrained_wts = init_weights[key][subkey]
              target_shape_squeezed = np.delete(
                target_shape, np.where(np.array(target_shape) == 1))
              pretrained_shape_squeezed = np.delete(
                pretrained_wts.shape, np.where(np.array(pretrained_wts.shape) == 1))
              if np.all(target_shape_squeezed !=
                        pretrained_shape_squeezed):
                logging.error('Shape mismatch var: %s from npy [%s vs %s]' 
                              % (final_key_name, target_shape,
                                 pretrained_wts.shape))

              init_weights_final[final_key_name] = \
                  pretrained_wts
              vars_restored.append(final_key_name)
          init_weights = init_weights_final
          for v in vars_to_restore_names:
            if v not in vars_restored:
              logging.fatal('No weights found for %s' % v)
          all_ops.append(slim.assign_from_values_fn(
              init_weights))
        else:
          if this_checkpoint_style != 'v2_withStream':
            all_ops.append(slim.assign_from_checkpoint_fn(
                checkpoint_path,
                # stripping the stream name to map variables
                dict(
                  [('/'.join(el.name.split('/')[1:]).split(':')[0], el) for
                      el in this_checkpoint_variables]),
                ignore_missing_vars=ignore_missing_vars))
          else:
            all_ops.append(slim.assign_from_checkpoint_fn(
                checkpoint_path,
                # stripping the stream name to map variables, to stream0,
                # as the model is v2_withStream, hence must be trained with
                # stream0/ prefix
                dict(
                  [('/'.join(['stream0'] + el.name.split('/')[1:]).split(':')[0], el) for
                      el in this_checkpoint_variables]),
                ignore_missing_vars=ignore_missing_vars))
    if special_assign_vars is not None:
      all_ops.append(get_special_assigns(special_assign_vars))
    def combined(sess):
      for op in all_ops:
        op(sess)
    return combined
Exemple #41
0
    def train(self):
        def get_optimizer(loss,
                          global_step=None,
                          var_list=None,
                          is_gradient_clip=False):
            train_op = tf.train.AdamOptimizer(self.lr)
            if is_gradient_clip:
                grads_and_vars = train_op.compute_gradients(loss,
                                                            var_list=var_list)
                unchanged_gvs = [(grad, var) for grad, var in grads_and_vars
                                 if not 'LSTM' in var.name]
                rnn_grad = [
                    grad for grad, var in grads_and_vars if 'LSTM' in var.name
                ]
                rnn_var = [
                    var for grad, var in grads_and_vars if 'LSTM' in var.name
                ]
                capped_grad, _ = tf.clip_by_global_norm(rnn_grad, clip_norm=3)
                capped_gvs = list(zip(capped_grad, rnn_var))
                train_op = train_op.apply_gradients(grads_and_vars=capped_gvs +
                                                    unchanged_gvs,
                                                    global_step=global_step)
            else:
                train_op = train_op.minimize(loss, global_step, var_list)
            return train_op

        global_step = tf.Variable(initial_value=0,
                                  dtype=tf.int32,
                                  trainable=False)
        self.global_step = global_step

        # build model
        self.build_model()

        # learning rate decay
        self.lr = tf.train.polynomial_decay(self.learning_rate,
                                            global_step,
                                            self.max_steps,
                                            end_learning_rate=1e-6,
                                            power=0.3)
        tf.summary.scalar('learning_rate', self.lr)

        # training operators
        train_gnet = get_optimizer(self.loss_total, global_step, self.all_vars)

        # session and thread
        gpu_options = tf.GPUOptions(allow_growth=True)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        self.sess = sess
        sess.run(
            tf.group(tf.global_variables_initializer(),
                     tf.local_variables_initializer()))
        # load vgg model
        vgg_model_path = '/home/chenli/Downloads/vgg_16.ckpt'
        exclude = [
            'vgg_16/fc6', 'vgg_16/pool4', 'vgg_16/conv5', 'vgg_16/pool5',
            'vgg_16/fc7', 'vgg_16/global_pool', 'vgg_16/fc8/squeezed',
            'vgg_16/fc8'
        ]
        vgg_vars = slim.get_variables_to_restore(include=['vgg_16'],
                                                 exclude=exclude)
        # vgg_init_var = slim.get_variables_to_restore(include=['vgg_16/fc6'])
        init_fn = slim.assign_from_checkpoint_fn(vgg_model_path, vgg_vars)
        init_fn(self.sess)
        # tf.initialize_variables(var_list=vgg_init_var)
        print('vgg s weights load done')
        self.saver = tf.train.Saver(max_to_keep=50,
                                    keep_checkpoint_every_n_hours=1)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        # training summary
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(self.train_dir,
                                               sess.graph,
                                               flush_secs=30)

        # self.load(sess, self.restore_dir, step=self.restore_step)

        for step in xrange(sess.run(global_step), self.max_steps + 1):

            start_time = time.time()

            # update G network
            _, loss_total_val = sess.run([train_gnet, self.loss_total])

            duration = time.time() - start_time
            # print loss_value
            assert not np.isnan(
                loss_total_val), 'Model diverged with loss = NaN'

            if step % 5 == 0:
                num_examples_per_step = self.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = (%.5f; %.5f, %.5f)(%.1f data/s; %.3f s/bch)'
                )
                print(format_str %
                      (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), step,
                       loss_total_val, 0.0, 0.0, examples_per_sec,
                       sec_per_batch))

            if step % 20 == 0:
                # summary_str = sess.run(summary_op, feed_dict={inputs:batch_input, gt:batch_gt})
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, global_step=step)

            # Save the model checkpoint periodically.
            if step > self.max_steps * 0.5:
                if step % 1000 == 0 or step == self.max_steps:
                    checkpoint_path = os.path.join(self.train_dir,
                                                   'checkpoints')
                    self.save(sess, checkpoint_path, step)
Exemple #42
0
                                         np.float32)
            processed_image = vgg_preprocessing.preprocess_image(
                tfimg, image_size, image_size, is_training=False)
            processed_images = tf.expand_dims(processed_image, 0)

            # Create the model, use the default arg scope to configure the batch norm parameters.
            with slim.arg_scope(vgg.vgg_arg_scope()):
                # 1000 classes instead of 1001.
                logits, end_points = vgg.vgg_16(processed_images,
                                                num_classes=1000,
                                                is_training=False)

            probabilities = tf.nn.softmax(logits)

            init_fn = slim.assign_from_checkpoint_fn(
                'C:/Users/falindrith/Dropbox/Documents/research/sliders_project/vgg_16/vgg_16.ckpt',
                slim.get_model_variables('vgg_16'))

            #print (slim.get_model_variables('vgg_16'))
            feature_conv_5_3 = end_points['vgg_16/conv4/conv4_2']

            with tf.Session() as sess:
                tf.train.start_queue_runners(sess=sess)
                init_fn(sess)
                probabilities, feats = sess.run(
                    [probabilities, feature_conv_5_3])
                #probabilities = probabilities[0, 0:]
                #sorted_inds = [i[0] for i in sorted(enumerate(-probabilities), key=lambda x:x[1])]

            np.save(outFolder + '/' + f, feats)
            tf.get_variable_scope().reuse_variables()
import inception_v1
import input_data
import tensorflow.contrib.slim as slim
import tensorflow as tf 


x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3])
keep_prob = tf.placeholder(dtype=tf.float32)
logits = inception_v1.inception_v1(x, keep_prob, 5)
logits = tf.reshape(logits, [-1, 5])

exclusions = ['InceptionV1/Logits']
inception_except_logits = slim.get_variables_to_restore(exclude=exclusions)
CKPT_FILE = 'inception_v1.ckpt'
init_fn = slim.assign_from_checkpoint_fn(
	CKPT_FILE,
	inception_except_logits, ignore_missing_vars=True)


y = tf.nn.softmax(logits)
y_ = tf.placeholder(dtype=tf.float32,shape=[None, 5])
output_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='InceptionV1/Logits')
cross_entropy = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy, var_list=output_vars)

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

flower_photos = input_data.read_data_sets('flower_photos/')

with tf.Session() as sess: