def build_patch_extraction(config, det_endpoints, photos=None, name='PatchExtract'): with tf.name_scope(name): batch_inds = det_endpoints['batch_inds'] kpts = det_endpoints['kpts'] kpts_scale = det_endpoints['kpts_scale'] kpts_ori = det_endpoints['kpts_ori'] if config.desc_inputs == 'det_feats': feat_maps = tf.identity(det_endpoints['feat_maps']) elif config.desc_inputs == 'photos': feat_maps = tf.identity(photos) elif config.desc_inputs == 'concat': feat_maps = tf.concat([photos, det_endpoints['feat_maps']], axis=-1) else: raise ValueError('Unknown desc_inputs: {}'.format( config.desc_inputs)) patches = transformer_crop(feat_maps, config.patch_size, batch_inds, kpts, kpts_scale=kpts_scale, kpts_ori=kpts_ori) return patches
def euclidean_augmentation(next_batch, out_size, rot_aug, scale_aug): # Apply data augmentation of euclidean transformation photos1, photos2, depths1, depths2, valid_masks1, valid_masks2, c2Tc1s, c1Tc2s, c1Tws, c2Tws, Ks1, Ks2, theta_params, use_aug = next_batch batch_size = tf.shape(photos1)[0] data_height, data_width = photos1.get_shape().as_list()[1:3] out_height, out_width = out_size theta_params4 = theta_params[:, :4] print(theta_params4) scales1_log, scales2_log, oris1, oris2 = tf.split(theta_params4, 4, axis=1) scales1 = tf.exp(scales1_log) scales2 = tf.exp(scales2_log) thetas1 = make_thetas(batch_size, None, None) # don't apply augmentation if not scale_aug: scales2 = None else: tf.summary.histogram('scale_aug', scales2_log) if not rot_aug: oris2 = None else: tf.summary.histogram('rot_aug', oris2) thetas2 = make_thetas(batch_size, scales2, oris2) inv_thetas1 = tf.matrix_inverse(thetas1) inv_thetas2 = tf.matrix_inverse(thetas2) center_xy = tf.tile( tf.stack([data_width / 2, data_height / 2])[None], [batch_size, 1]) rgbdv1 = tf.concat([photos1, depths1, valid_masks1], axis=-1) rgbdv2 = tf.concat([photos2, depths2, valid_masks2], axis=-1) rgbdv1_t = tf.cond( use_aug[0], lambda: transformer_crop(rgbdv1, (out_width, out_height), tf.range(batch_size), kpts_xy=center_xy, thetas=inv_thetas1), lambda: tf.identity(rgbdv1)) rgbdv2_t = tf.cond( use_aug[0], lambda: transformer_crop(rgbdv2, (out_width, out_height), tf.range(batch_size), kpts_xy=center_xy, thetas=inv_thetas2), lambda: tf.identity(rgbdv2)) photos1 = tf.slice(rgbdv1_t, [0, 0, 0, 0], [-1, -1, -1, 1]) depths1 = tf.slice(rgbdv1_t, [0, 0, 0, 1], [-1, -1, -1, 1]) valid_masks1 = tf.slice(rgbdv1_t, [0, 0, 0, 2], [-1, -1, -1, 1]) valid_masks1 = tf.cast(tf.equal(valid_masks1, 1.0), tf.float32) # eliminate interpolated pixels photos2 = tf.slice(rgbdv2_t, [0, 0, 0, 0], [-1, -1, -1, 1]) depths2 = tf.slice(rgbdv2_t, [0, 0, 0, 1], [-1, -1, -1, 1]) valid_masks2 = tf.slice(rgbdv2_t, [0, 0, 0, 2], [-1, -1, -1, 1]) valid_masks2 = tf.cast(tf.equal(valid_masks2, 1.0), tf.float32) # eliminate interpolated pixels Ks1 = tf.cond( use_aug[0], lambda: fix_intrinsic_center(Ks1, out_width / 2, out_height / 2), lambda: tf.identity(Ks1)) Ks2 = tf.cond( use_aug[0], lambda: fix_intrinsic_center(Ks2, out_width / 2, out_height / 2), lambda: tf.identity(Ks2)) next_batch = [ photos1, photos2, depths1, depths2, valid_masks1, valid_masks2, c2Tc1s, c1Tc2s, c1Tws, c2Tws, Ks1, Ks2, thetas1, thetas2, inv_thetas1, inv_thetas2, theta_params ] return next_batch
def build_multi_scale_deep_detector(config, detector, photos, reuse=False, name='MSDeepDet'): with tf.name_scope(name): batch_size = tf.shape(photos)[0] height = tf.shape(photos)[1] width = tf.shape(photos)[2] # Detector score_maps_list, det_endpoints = detector.build_model(photos, reuse=reuse) if isinstance(score_maps_list, list): scale_factors = det_endpoints['scale_factors'] else: score_maps_list = [score_maps_list] scale_factors = [1.] scale_factors_tensor = tf.constant(scale_factors, dtype=tf.float32) num_scale = len(score_maps_list) multi_scale_heatmaps = [None] * num_scale for i in range(num_scale): logits = instance_normalization(score_maps_list[i]) _heatmaps = spatial_softmax(logits, config.sm_ksize, config.com_strength) _heatmaps = tf.image.resize_images( _heatmaps, (height, width)) # back to original resolution multi_scale_heatmaps[i] = _heatmaps multi_scale_heatmaps = tf.concat( multi_scale_heatmaps, axis=-1, ) # [B,H,W,num_scales] if config.soft_scale: # max_heatmaps = tf.reduce_max(multi_scale_heatmaps, axis=-1, keep_dims=True) # [B,H,W,1] # Maybe softmax have effect of scale-space-NMS # softmax_heatmaps = tf.reduce_max(tf.nn.softmax(multi_scale_heatmaps), axis=-1, keep_dims=True) # tf.summary.image('softmax_heatmaps', tf.cast(softmax_heatmaps*255, tf.uint8), max_outputs=5) max_heatmaps, max_scales = soft_max_and_argmax_1d( multi_scale_heatmaps, axis=-1, inputs_index=scale_factors_tensor, keep_dims=False, com_strength1=config.score_com_strength, com_strength2=config.scale_com_strength ) # both output = [B,H,W] max_heatmaps = max_heatmaps[ ..., None] # make max_heatmaps the correct shape tf.summary.histogram('max_scales', max_scales) else: max_heatmaps = tf.reduce_max(multi_scale_heatmaps, axis=-1, keep_dims=True) # [B,H,W,1] max_scale_inds = tf.argmax(multi_scale_heatmaps, axis=-1, output_type=tf.int32) # [B,H,W] max_scales = tf.gather(scale_factors_tensor, max_scale_inds) # [B,H,W] eof_masks_pad = end_of_frame_masks(height, width, det_endpoints['pad_size']) max_heatmaps = max_heatmaps * eof_masks_pad # Extract Top-K keypoints eof_masks_crop = end_of_frame_masks(height, width, config.crop_radius) nms_maps = non_max_suppression(max_heatmaps, config.nms_thresh, config.nms_ksize) nms_scores = max_heatmaps * nms_maps * eof_masks_crop top_ks = make_top_k_sparse_tensor(nms_scores, k=config.top_k) top_ks = top_ks * nms_maps top_ks = tf.stop_gradient(top_ks) ori_maps = det_endpoints['ori_maps'] kpts, batch_inds, num_kpts = extract_keypoints(top_ks) kpts_scale = batch_gather_keypoints(max_scales, batch_inds, kpts) kpts_ori = batch_gather_keypoints(ori_maps, batch_inds, kpts) if config.soft_kpts: # keypoint refinement # Use transformer crop to get the patches for refining keypoints to a certain size. kp_local_max_scores = transformer_crop( max_heatmaps, config.kp_loc_size, batch_inds, kpts, kpts_scale=kpts_scale ) # omit orientation [N, loc_size, loc_size, 1] # Now do a 2d softargmax. I set `do_softmax=True` since the # `max_heatmap` is generated by doing softmax # individually. However, you might want to see if which works # better. dxdy = soft_argmax_2d(kp_local_max_scores, config.kp_loc_size, do_softmax=config.do_softmax_kp_refine, com_strength=config.kp_com_strength) # [N,2] tf.summary.histogram('dxdy', dxdy) # Now add this to the current kpts, so that we can be happy! kpts = tf.to_float( kpts) + dxdy * kpts_scale[:, None] * config.kp_loc_size / 2 det_endpoints['score_maps_list'] = score_maps_list det_endpoints['top_ks'] = top_ks det_endpoints['kpts'] = kpts # float det_endpoints['kpts_scale'] = kpts_scale det_endpoints['kpts_ori'] = kpts_ori det_endpoints['batch_inds'] = batch_inds det_endpoints['num_kpts'] = num_kpts det_endpoints['scale_maps'] = max_scales det_endpoints['db_max_heatmaps'] = max_heatmaps det_endpoints['db_max_scales'] = max_scales # det_endpoints['db_max_scales_inds'] = max_scales_inds det_endpoints['db_scale_factors_tensor'] = scale_factors_tensor # det_endpoints['db_max_heatmaps2'] = max_heatmaps2 det_endpoints['db_max_heatmaps_org'] = tf.reduce_max( multi_scale_heatmaps, axis=-1, keep_dims=True) max_scale_inds = tf.argmax(multi_scale_heatmaps, axis=-1, output_type=tf.int32) det_endpoints['db_max_scale_inds'] = max_scale_inds det_endpoints['db_max_scales2'] = tf.gather(scale_factors_tensor, max_scale_inds) return max_heatmaps, det_endpoints