def __init__(self, data={}, n_gpus=1, data_shape=None, **config): self.datasets = data self.data_shape = data_shape self.n_gpus = n_gpus self.graph = tf.get_default_graph() self.name = self.__class__.__name__.lower() # get child name self.trainable = getattr(self, 'trainable', True) # Update config self.config = dict_update(self._default_config, getattr(self, 'default_config', {})) self.config = dict_update(self.config, config) required = self.required_baseconfig + getattr( self, 'required_config_keys', []) for r in required: assert r in self.config, 'Required configuration entry: \'{}\''.format( r) assert set(self.datasets) <= self.dataset_names, \ 'Unknown dataset name: {}'.format(set(self.datasets)-self.dataset_names) assert n_gpus > 0, 'TODO: CPU-only training is currently not supported.' with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): self._build_graph()
def __init__(self, **config): # Update config self.config = dict_update(getattr(self, 'default_config', {}), config) self.dataset = self._init_dataset(**self.config) self.tf_splits = {} self.tf_next = {} with tf.device('/cpu:0'): for n in self.split_names: self.tf_splits[n] = self._get_data(self.dataset, n, **self.config) self.tf_next[n] = self.tf_splits[n].make_one_shot_iterator( ).get_next() self.end_set = tf.errors.OutOfRangeError self.sess = tf.Session()
def homography_adaptation(image, net, config, approximate_inverse=True): """Perfoms homography adaptation. Inference using multiple random wrapped patches of the same input image for robust predictions. Arguments: image: A `Tensor` with shape `[H, W, 1]`. net: A function that takes an image as input, performs inference, and outputs the prediction dictionary. config: A configuration dictionary containing optional entries such as the number of sampled homographies `'num'`, the aggregation method `'aggregation'`. Returns: A dictionary which contains the aggregated detection probabilities. """ probs = net(image)['prob'] counts = tf.ones_like(probs) images = image probs = tf.expand_dims(probs, axis=0) counts = tf.expand_dims(counts, axis=0) images = tf.expand_dims(images, axis=0) shape = tf.shape(image)[:2] config = dict_update(homography_adaptation_default_config, config) def step(i, probs, counts, images): # Sample image patch H = sample_homography(shape, **config['homographies']) H_inv = invert_homography(H) wrapped = H_transform(image, H, interpolation='BILINEAR') count = H_transform(tf.ones(shape), H_inv, interpolation='NEAREST') # Predict detection probabilities input_wrapped = tf.image.resize_images(wrapped, tf.floordiv(shape, 2)) prob = net(input_wrapped)['prob'] prob = tf.image.resize_images(tf.expand_dims(prob, axis=-1), shape)[..., 0] # In theory, directly inverting the probability map tends to discard many points # with high probability. However experiments show that this is not an issue for # a large number of homographies, and is 3 times faster than an exact inverse. if approximate_inverse: prob_proj = H_transform(prob, H_inv, interpolation='BILINEAR') else: # Select the points to be mapped back to the original image pts = tf.where(tf.greater_equal(prob, 0.01)) selected_prob = tf.gather_nd(prob, pts) # Compute the projected coordinates pad = tf.ones(tf.stack([tf.shape(pts)[0], tf.constant(1)])) pts_homogeneous = tf.concat([tf.reverse(tf.to_float(pts), axis=[1]), pad], 1) pts_proj = tf.matmul(pts_homogeneous, tf.transpose(flat2mat(H)[0])) pts_proj = pts_proj[:, :2] / tf.expand_dims(pts_proj[:, 2], axis=1) pts_proj = tf.to_int32(tf.round(tf.reverse(pts_proj, axis=[1]))) # Hack: convert 2D coordinates to 1D indices in order to use tf.unique pts_idx = pts_proj[:, 0] * shape[1] + pts_proj[:, 1] pts_idx_unique, idx = tf.unique(pts_idx) # Keep maximum corresponding probability for each projected point # Hack: tf.segment_max requires sorted indices idx, sort_idx = tf.nn.top_k(idx, k=tf.shape(idx)[0]) idx = tf.reverse(idx, axis=[0]) sort_idx = tf.reverse(sort_idx, axis=[0]) selected_prob = tf.gather(selected_prob, sort_idx) with tf.device('/cpu:0'): unique_prob = tf.segment_max(selected_prob, idx) # Create final probability map pts_proj_unique = tf.stack([tf.floordiv(pts_idx_unique, shape[1]), tf.floormod(pts_idx_unique, shape[1])], axis=1) prob_proj = tf.scatter_nd(pts_proj_unique, unique_prob, shape) probs = tf.concat([probs, tf.expand_dims(prob_proj, 0)], axis=0) counts = tf.concat([counts, tf.expand_dims(count, 0)], axis=0) images = tf.concat([images, tf.expand_dims(wrapped, 0)], axis=0) return i + 1, probs, counts, images _, probs, counts, images = tf.while_loop( lambda i, p, c, im: tf.less(i, config['num'] - 1), step, [0, probs, counts, images], parallel_iterations=1, shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None, None, None]), tf.TensorShape([None, None, None]), tf.TensorShape([None, None, None, 1])]) counts = tf.reduce_sum(counts, axis=0) max_prob = tf.reduce_max(probs, axis=0) mean_prob = tf.reduce_sum(probs, axis=0) / counts if config['aggregation'] == 'max': prob = max_prob elif config['aggregation'] == 'sum': prob = mean_prob else: raise ValueError('Unkown aggregation method: {}'.format(config['aggregation'])) if config['filter_counts']: prob = tf.where(tf.greater_equal(counts, config['filter_counts']), prob, tf.zeros_like(prob)) return {'prob': prob, 'counts': counts, 'mean_prob': mean_prob, 'input_images': images, 'H_probs': probs} # debug
def homography_adaptation(image, net, config): """Perfoms homography adaptation. Inference using multiple random warped patches of the same input image for robust predictions. Arguments: image: A `Tensor` with shape `[N, H, W, 1]`. net: A function that takes an image as input, performs inference, and outputs the prediction dictionary. config: A configuration dictionary containing optional entries such as the number of sampled homographies `'num'`, the aggregation method `'aggregation'`. Returns: A dictionary which contains the aggregated detection probabilities. """ #['prob'] probs, _ = net(image) counts = tf.ones_like(probs) images = image probs = tf.expand_dims(probs, axis=-1) counts = tf.expand_dims(counts, axis=-1) images = tf.expand_dims(images, axis=-1) shape = tf.shape(image)[1:3] config = dict_update(homography_adaptation_default_config, config) def step(i, probs, counts, images): # Sample image patch H = sample_homography(shape, **config['homographies']) H_inv = invert_homography(H) warped = H_transform(image, H, interpolation='BILINEAR') count = H_transform(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1), H_inv, interpolation='NEAREST')[..., 0] # Predict detection probabilities warped_shape = tf.to_int32( tf.to_float(shape)*config['homographies']['patch_ratio']) input_warped = tf.image.resize_images(warped, warped_shape) prob = net(input_warped)['prob'] prob = tf.image.resize_images(tf.expand_dims(prob, axis=-1), shape)[..., 0] prob_proj = H_transform(tf.expand_dims(prob, -1), H_inv, interpolation='BILINEAR')[..., 0] probs = tf.concat([probs, tf.expand_dims(prob_proj, -1)], axis=-1) counts = tf.concat([counts, tf.expand_dims(count, -1)], axis=-1) images = tf.concat([images, tf.expand_dims(warped, -1)], axis=-1) return i + 1, probs, counts, images _, probs, counts, images = tf.while_loop( lambda i, p, c, im: tf.less(i, config['num'] - 1), step, [0, probs, counts, images], parallel_iterations=1, back_prop=False, shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, 1, None])]) counts = tf.reduce_sum(counts, axis=-1) max_prob = tf.reduce_max(probs, axis=-1) mean_prob = tf.reduce_sum(probs, axis=-1) / counts if config['aggregation'] == 'max': prob = max_prob elif config['aggregation'] == 'sum': prob = mean_prob else: raise ValueError('Unkown aggregation method: {}'.format(config['aggregation'])) if config['filter_counts']: prob = tf.where(tf.greater_equal(counts, config['filter_counts']), prob, tf.zeros_like(prob)) return {'prob': prob, 'counts': counts, 'mean_prob': mean_prob, 'input_images': images, 'H_probs': probs} # debug
def homography_adaptation(image, net, config): """Perfoms homography adaptation. Inference using multiple random warped patches of the same input image for robust predictions. Arguments: image: A `Tensor` with shape `[N, H, W, 1]`. net: A function that takes an image as input, performs inference, and outputs the prediction dictionary. config: A configuration dictionary containing optional entries such as the number of sampled homographies `'num'`, the aggregation method `'aggregation'`. Returns: A dictionary which contains the aggregated detection probabilities. """ probs = net(image)['prob'] counts = tf.ones_like(probs) images = image probs = tf.expand_dims(probs, axis=-1) counts = tf.expand_dims(counts, axis=-1) images = tf.expand_dims(images, axis=-1) shape = tf.shape(image)[1:3] config = dict_update(homography_adaptation_default_config, config) def step(i, probs, counts, images): # Sample image patch H = sample_homography(shape, **config['homographies']) H_inv = invert_homography(H) warped = H_transform(image, H, interpolation='BILINEAR') count = H_transform(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1), H_inv, interpolation='NEAREST') mask = H_transform(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1), H, interpolation='NEAREST') # Ignore the detections too close to the border to avoid artifacts if config['valid_border_margin']: kernel = cv.getStructuringElement( cv.MORPH_ELLIPSE, (config['valid_border_margin'] * 2, ) * 2) with tf.device('/cpu:0'): count = tf.nn.erosion2d( count, tf.to_float(tf.constant(kernel)[..., tf.newaxis]), [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')[..., 0] + 1. mask = tf.nn.erosion2d( mask, tf.to_float(tf.constant(kernel)[..., tf.newaxis]), [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')[..., 0] + 1. # Predict detection probabilities prob = net(warped)['prob'] prob = prob * mask prob_proj = H_transform(tf.expand_dims(prob, -1), H_inv, interpolation='BILINEAR')[..., 0] prob_proj = prob_proj * count probs = tf.concat([probs, tf.expand_dims(prob_proj, -1)], axis=-1) counts = tf.concat([counts, tf.expand_dims(count, -1)], axis=-1) images = tf.concat([images, tf.expand_dims(warped, -1)], axis=-1) return i + 1, probs, counts, images _, probs, counts, images = tf.while_loop( lambda i, p, c, im: tf.less(i, config['num'] - 1), step, [0, probs, counts, images], parallel_iterations=1, back_prop=False, shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, 1, None]) ]) counts = tf.reduce_sum(counts, axis=-1) max_prob = tf.reduce_max(probs, axis=-1) mean_prob = tf.reduce_sum(probs, axis=-1) / counts if config['aggregation'] == 'max': prob = max_prob elif config['aggregation'] == 'sum': prob = mean_prob else: raise ValueError('Unkown aggregation method: {}'.format( config['aggregation'])) if config['filter_counts']: prob = tf.where(tf.greater_equal(counts, config['filter_counts']), prob, tf.zeros_like(prob)) return { 'prob': prob, 'counts': counts, 'mean_prob': mean_prob, 'input_images': images, 'H_probs': probs } # debug
def distortion_homography_adaptation(image, net, config): """Performs radial distortion and homography adaptation. Arguments: image: a 'Tensor' with shape '[N,H,W,1]'. net: A function that takes an image as input, performs inference, and outputs the prediction dictionary. config: A configuration dictionary containing the distortion factor 'dist_fact' and optional enteries such as number of sampled homographies 'num', the aggregation method 'aggregation. Returns: A dictionary which contains the aggregated detection probabilities. """ probs = net(image)['prob'] counts = tf.ones_like(probs) images = image probs = tf.expand_dims(probs, axis=-1) counts = tf.expand_dims(counts, axis=-1) images = tf.expand_dims(images, axis=-1) shape = tf.shape(image)[1:3] config = dict_update(homography_adaptation_default_config, config) def step(i, probs, counts, images): #Sample image patch H = sample_homography(shape, **config['homographies']) H_inv = invert_homography(H) ############################################# H_ = shape[0] W = shape[1] row_c = tf.random_uniform(shape=[], minval=0, maxval=tf.cast(H_, tf.float32), dtype=tf.float32) col_c = tf.random_uniform(shape=[], minval=0, maxval=tf.cast(W, tf.float32), dtype=tf.float32) lambda_ = tf.constant(0.000006) ############################################# #apply the homography warped = H_transform(image, H, interpolation='BILINEAR') ############################################# #apply the radial distortion warped = distort(warped, lambda_, (row_c, col_c)) #count = warp_points_dist(tf.expand_dims(tf.ones(tf.shape(image)[:3]),-1), lambda_, (row_c,col_c), inverse=True) count = undistort(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1), lambda_, (row_c, col_c)) #count = tf.round(count) count = H_transform(count, H_inv, interpolation='NEAREST') mask = H_transform(tf.expand_dims(tf.ones(tf.shape(image)[:3]), -1), H, interpolation='NEAREST') mask = distort(mask, lambda_, (row_c, col_c)) ############################################# # Ignore the detections too close to the border to avoid artifacts if config['valid_border_margin']: kernel = cv.getStructuringElement( cv.MORPH_ELLIPSE, (config['valid_border_margin'] * 2, ) * 2) with tf.device('/cpu:0'): count = tf.nn.erosion2d( count, tf.to_float(tf.constant(kernel)[..., tf.newaxis]), [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')[..., 0] + 1. mask = tf.nn.erosion2d( mask, tf.to_float(tf.constant(kernel)[..., tf.newaxis]), [1, 1, 1, 1], [1, 1, 1, 1], 'SAME')[..., 0] + 1. # Predict detection probabilities prob = net(warped)['prob'] prob = prob * mask prob_proj = undistort(tf.expand_dims(prob, -1), lambda_, (row_c, col_c)) prob_proj = H_transform(prob_proj, H_inv, interpolation='BILINEAR')[..., 0] prob_proj = prob_proj * count probs = tf.concat([probs, tf.expand_dims(prob_proj, -1)], axis=-1) counts = tf.concat([counts, tf.expand_dims(count, -1)], axis=-1) images = tf.concat([images, tf.expand_dims(warped, -1)], axis=-1) return i + 1, probs, counts, images _, probs, counts, images = tf.while_loop( lambda i, p, c, im: tf.less(i, config['num'] - 1), step, [0, probs, counts, images], parallel_iterations=1, back_prop=False, shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, None]), tf.TensorShape([None, None, None, 1, None]) ]) counts = tf.reduce_sum(counts, axis=-1) max_prob = tf.reduce_max(probs, axis=-1) mean_prob = tf.reduce_sum(probs, axis=-1) / counts if config['aggregation'] == 'max': prob = max_prob elif config['aggregation'] == 'sum': prob = mean_prob else: raise ValueError('Unkown aggregation method: {}'.format( config['aggregation'])) if config['filter_counts']: prob = tf.where(tf.greater_equal(counts, config['filter_counts']), prob, tf.zeros_like(prob)) return { 'prob': prob, 'counts': counts, 'mean_prob': mean_prob, 'input_images': images, 'H_probs': probs } # debug