def shift(i, x=0, y=0): """ Use this to shift your filter for a conv2d, which is probably needed if you do a conv2d with even input and/or filter. """ return image.pad_to_bounding_box(i, max(0, y), max(0, x), i.shape.as_list()[1] + abs(y), i.shape.as_list()[2] + abs(x))
def decode_img_enhanced(leye_img, reye_img, region, label): precision_type = tf.float16 region = tf.cast(region, tf.int32) leye_im = tf.io.decode_jpeg(leye_img) reye_im = tf.io.decode_jpeg(reye_img) '''Convert to float16/32 in the [0,1] range''' leye_im = convert_image_dtype(leye_im, precision_type) reye_im = convert_image_dtype(reye_im, precision_type) '''Resize''' leye_im = resize(leye_im, [config.eyeIm_size, config.eyeIm_size]) reye_im = resize(reye_im, [config.eyeIm_size, config.eyeIm_size]) '''Normalize''' # leye_im = tf.image.per_image_standardization(leye_im) # reye_im = tf.image.per_image_standardization(reye_im) orientation = tf.cast(tf.one_hot(region[24], depth=3), precision_type) eyelandmark = tf.cast(tf.concat([region[8:11], region[13:16]], 0), tf.float32) / 640.0 '''Create heatmap label''' if (config.heatmap): hmFocus_size = 17 if (config.mobile) else 9 # tablet focus_size=9 HM_FOCUS_IM = np.zeros((5, hmFocus_size, hmFocus_size, 1)) stdv_list = [0.2, 0.25, 0.3, 0.35, 0.4] for level in range(5): # 5 levels of std to constuct heatmap stdv = stdv_list[level] # 3/(12-level) for i in range(hmFocus_size): for j in range(hmFocus_size): distanceFromCenter = 2 * \ np.linalg.norm(np.array([i-int(hmFocus_size/2), j-int(hmFocus_size/2)]))/((hmFocus_size)/2) gauss_prob = gauss(distanceFromCenter, stdv) HM_FOCUS_IM[level, i, j, 0] = gauss_prob HM_FOCUS_IM[level, :, :, 0] /= np.sum(HM_FOCUS_IM[level, :, :, 0]) heatmap_im = convert_image_dtype(HM_FOCUS_IM[0, :, :, :], tf.float32) heatmap_im = pad_to_bounding_box( heatmap_im, int(label[0] * config.scale + config.hm_size / 2 - hmFocus_size / 2), int(label[1] * config.scale + config.hm_size / 2 - hmFocus_size / 2), config.hm_size, config.hm_size) label = heatmap_im return (orientation, eyelandmark, leye_im, reye_im, label)
def call(self, img, param, t=1): param = [p * t for p in param] h, w, c = img.shape padding = tuple([p * t for p in self.padding]) size = tuple([s * t for s in self.size]) l, t, r, b = padding _h = max(h + t + b, size[0]) _w = max(w + l + r, size[1]) img = tfimg.pad_to_bounding_box(img, t, l, _h, _w) if self.area_ratio == self.aspect_ratio == (1, 1): x, y = param img = img[y:y + size[0], x:x + size[1]] else: x, y, cols, rows = param img = tfimg.resize(img[y:y + rows, x:x + cols], size, TF_INTERP[self.interp]) return img
def _parse_sequence_example_fn(sequence_example_proto): """ Parse the input `tf.SequenceExample` proto using the features_spec Parameters ---------- sequence_example_proto : string serialized tfrecord SequenceExample protobuf message Returns ------- features : dict parsed features as `tf.Tensor` objects extracted from the protobuf labels : `tf.Tensor` parsed label as a `tf.Tensor` object extracted from the protobuf """ context_features, sequence_features = io.parse_single_sequence_example( serialized=sequence_example_proto, context_features=context_features_spec, sequence_features=sequence_features_spec, ) features_dict = dict() # Handle context features for feature_info in feature_config.get_context_features(): feature_node_name = feature_info.get("node_name", feature_info["name"]) default_tensor = tf.constant( value=feature_config.get_default_value(feature_info), dtype=feature_info["dtype"], ) feature_tensor = context_features.get(feature_info["name"], default_tensor) feature_tensor = tf.expand_dims(feature_tensor, axis=0) # Preprocess features feature_tensor = preprocess_feature(feature_tensor, feature_info, preprocessing_map) features_dict[feature_node_name] = feature_tensor # Define mask to identify padded sequence if required_fields_only and not feature_config.get_rank("serving_info")["required"]: """ Define dummy mask if the rank field is not a required field for serving NOTE: This masks all max_sequence_size as 1 as there is no real way to know the number of sequence in the query. There is no predefined required field, and hence we would need to do a full pass of all features to find the record shape. This approach might be unstable if different features have different shapes. Hence we just mask all sequence """ features_dict["mask"] = tf.constant( value=1, shape=[max_sequence_size], dtype=feature_config.get_rank("dtype") ) sequence_size = tf.constant(max_sequence_size, dtype=tf.int64) else: # Typically used at training time, to pad/clip to a fixed number of sequence per query # Use rank as a reference tensor to infer shape/sequence_size in query reference_tensor = sequence_features.get(feature_config.get_rank(key="node_name")) # Add mask for identifying padded sequence mask = tf.ones_like(sparse.to_dense(sparse.reset_shape(reference_tensor))) sequence_size = tf.cast(tf.reduce_sum(mask), tf.int64) if pad_sequence: mask = tf.expand_dims(mask, axis=-1) def crop_fn(): tf.print("\n[WARN] Bad query found. Number of sequence : ", tf.shape(mask)[1]) return image.crop_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_sequence_size, ) mask = tf.cond( tf.shape(mask)[1] <= max_sequence_size, # Pad if there are missing sequence lambda: image.pad_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_sequence_size, ), # Crop if there are extra sequence crop_fn, ) mask = tf.squeeze(mask) else: mask = tf.squeeze(mask, axis=0) # Check validity of mask tf.debugging.assert_greater(sequence_size, tf.constant(0, dtype=tf.int64)) features_dict["mask"] = mask sequence_size = max_sequence_size if pad_sequence else sequence_size # Pad sequence features to max_sequence_size for feature_info in feature_config.get_sequence_features(): feature_node_name = feature_info.get("node_name", feature_info["name"]) default_tensor = tf.fill( value=tf.constant( value=feature_config.get_default_value(feature_info), dtype=feature_info["dtype"], ), dims=[max_sequence_size if pad_sequence else sequence_size], ) feature_tensor = sequence_features.get(feature_info["name"], default_tensor) if isinstance(feature_tensor, sparse.SparseTensor): feature_tensor = sparse.reset_shape( feature_tensor, new_shape=[1, max_sequence_size if pad_sequence else sequence_size], ) feature_tensor = sparse.to_dense(feature_tensor) feature_tensor = tf.squeeze(feature_tensor, axis=0) # Preprocess features feature_tensor = preprocess_feature(feature_tensor, feature_info, preprocessing_map) features_dict[feature_node_name] = feature_tensor labels = features_dict.pop(feature_config.get_label(key="name")) return features_dict, labels
def decode_img(img, region, label): precision_type = tf.float16 region = tf.cast(region, tf.int32) face_im = tf.io.decode_and_crop_jpeg( img, [region[5], region[4], region[3], region[2]], channels=config.channel) if (config.regions == 'default' ): # only for the default GazeCapture facial regions '''Crop eye regions from face region''' leye_im = face_im[region[10]:(region[10] + region[7]), region[9]:(region[9] + region[8]), :] reye_im = face_im[region[15]:(region[15] + region[12]), region[14]:(region[14] + region[13]), :] else: '''Decode and crop eye regions directly from raw image''' leye_im = tf.io.decode_and_crop_jpeg( img, [region[10], region[9], region[8], region[7]], channels=config.channel) reye_im = tf.io.decode_and_crop_jpeg( img, [region[15], region[14], region[13], region[12]], channels=config.channel) '''Convert to float16/32 in the [0,1] range''' leye_im = convert_image_dtype(leye_im, precision_type) reye_im = convert_image_dtype(reye_im, precision_type) '''Resize''' leye_im = resize(leye_im, [config.eyeIm_size, config.eyeIm_size]) reye_im = resize(reye_im, [config.eyeIm_size, config.eyeIm_size]) '''Normalize''' # leye_im = tf.image.per_image_standardization(leye_im) # reye_im = tf.image.per_image_standardization(reye_im) orientation = tf.cast(tf.one_hot(region[24], depth=3), precision_type) if (config.arc == 'iTracker'): face_im = convert_image_dtype(face_im, precision_type) face_im = resize(face_im, [config.faceIm_size, config.faceIm_size]) '''Create face grid''' face_grid_im = convert_image_dtype( tf.ones((region[19], region[19], 1)), precision_type) face_grid_im = pad_to_bounding_box(face_grid_im, region[18], region[17], config.faceGrid_size, config.faceGrid_size) elif (config.arc == 'SAGE'): eyelandmark = tf.cast(tf.concat([region[8:11], region[13:16]], 0), tf.float32) / 640.0 # SAGE mode leye_im = tf.image.flip_left_right(leye_im) '''Create heatmap label''' if (config.heatmap): hmFocus_size = 17 # if (config.mobile) else 9 # in pixel unit HM_FOCUS_IM = np.zeros((5, hmFocus_size, hmFocus_size, 1)) stdv_list = [0.2, 0.25, 0.3, 0.35, 0.4] for level in range(5): # 5 levels of std to constuct heatmap stdv = stdv_list[level] # 3/(12-level) for i in range(hmFocus_size): for j in range(hmFocus_size): distanceFromCenter = 2 * \ np.linalg.norm(np.array([i-int(hmFocus_size/2), j-int(hmFocus_size/2)]))/((hmFocus_size)/2) gauss_prob = gauss(distanceFromCenter, stdv) HM_FOCUS_IM[level, i, j, 0] = gauss_prob HM_FOCUS_IM[level, :, :, 0] /= np.sum(HM_FOCUS_IM[level, :, :, 0]) heatmap_im = convert_image_dtype(HM_FOCUS_IM[0, :, :, :], tf.float32) heatmap_im = pad_to_bounding_box( heatmap_im, int(label[0] * config.scale + config.hm_size / 2 - hmFocus_size / 2), int(label[1] * config.scale + config.hm_size / 2 - hmFocus_size / 2), config.hm_size, config.hm_size) label = heatmap_im if (config.arc == 'SAGE'): return (orientation, eyelandmark, leye_im, reye_im, label) else: return (orientation, face_grid_im, face_im, leye_im, reye_im, label)
def _parse_sequence_example_fn(sequence_example_proto): """ Parse the input `tf.Example` proto using the features_spec Args: sequence_example_proto: tfrecord SequenceExample protobuf data Returns: features: parsed features extracted from the protobuf labels: parsed label extracted from the protobuf """ context_features, sequence_features = io.parse_single_sequence_example( serialized=sequence_example_proto, context_features=context_features_spec, sequence_features=sequence_features_spec, ) features_dict = dict() # Explode context features into all records for feature_info in feature_config.get_context_features(): feature_node_name = feature_info.get("node_name", feature_info["name"]) feature_layer_info = feature_info.get("feature_layer_info") feature_tensor = context_features.get(feature_node_name) feature_tensor = tf.expand_dims(feature_tensor, axis=0) feature_tensor = tf.tile(feature_tensor, multiples=[max_num_records]) # If feature is a string, then decode into numbers if feature_layer_info["type"] == FeatureTypeKey.STRING: feature_tensor = io.decode_raw( feature_tensor, out_type=tf.uint8, fixed_length=feature_layer_info["max_length"], ) feature_tensor = tf.cast(feature_tensor, tf.float32) features_dict[feature_node_name] = feature_tensor # Pad sequence features to max_num_records for feature_info in feature_config.get_sequence_features(): feature_node_name = feature_info.get("node_name", feature_info["name"]) feature_layer_info = feature_info["feature_layer_info"] feature_tensor = sequence_features.get(feature_node_name) if isinstance(feature_tensor, sparse.SparseTensor): if feature_node_name == feature_config.get_rank( key="node_name"): # Add mask for identifying padded records mask = tf.ones_like( sparse.to_dense(sparse.reset_shape(feature_tensor))) mask = tf.expand_dims(mask, axis=2) def crop_fn(): tf.print( "\n[WARN] Bad query found. Number of records : ", tf.shape(mask)[1]) return image.crop_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_num_records, ) mask = tf.cond( tf.shape(mask)[1] < max_num_records, # Pad if there are missing records lambda: image.pad_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_num_records, ), # Crop if there are extra records crop_fn, ) mask = tf.squeeze(mask) # Check validity of mask tf.debugging.assert_greater( tf.cast(tf.reduce_sum(mask), tf.float32), tf.constant(0.0)) features_dict["mask"] = mask feature_tensor = sparse.reset_shape( feature_tensor, new_shape=[1, max_num_records]) feature_tensor = sparse.to_dense(feature_tensor) feature_tensor = tf.squeeze(feature_tensor) # If feature is a string, then decode into numbers if feature_layer_info["type"] == FeatureTypeKey.STRING: feature_tensor = io.decode_raw( feature_tensor, out_type=tf.uint8, fixed_length=feature_layer_info["max_length"], ) feature_tensor = tf.cast(feature_tensor, tf.float32) else: raise ValueError("Invalid input : {}".format(feature_name)) features_dict[feature_node_name] = feature_tensor labels = features_dict.pop(feature_config.get_label(key="name")) # Check if label is one-hot and correctly masked tf.debugging.assert_equal(tf.cast(tf.reduce_sum(labels), tf.float32), tf.constant(1.0)) return features_dict, labels
def _parse_sequence_example_fn(sequence_example_proto): """ Parse the input `tf.Example` proto using the features_spec Args: sequence_example_proto: tfrecord SequenceExample protobuf data Returns: TODO(ashish): note - "features" is not a Features object. It's a {feat_name: tf.Tensor} mapping (so perhaps a bad name?) features: parsed features extracted from the protobuf labels: parsed label extracted from the protobuf """ context, examples = io.parse_single_sequence_example( serialized=sequence_example_proto, context_features=context_features_spec, sequence_features=sequence_features_spec, ) features = dict() # Explode context features into all records for feat, t in context.items(): t = tf.expand_dims(t, axis=0) t = tf.tile(t, multiples=[max_num_records]) # If feature is a string, then decode into numbers if feature_config.get_dict( )[feat]["type"] == FeatureTypeKey.STRING: t = io.decode_raw( t, out_type=tf.uint8, fixed_length=feature_config.get_dict()[feat]["max_length"], ) t = tf.cast(t, tf.float32) features[feat] = t # Pad sequence features to max_num_records for feat, t in examples.items(): if isinstance(t, sparse.SparseTensor): if feat == "pos": # Add mask for identifying padded records mask = tf.ones_like(sparse.to_dense(sparse.reset_shape(t))) mask = tf.expand_dims(mask, axis=2) mask = image.pad_to_bounding_box( mask, offset_height=0, offset_width=0, target_height=1, target_width=max_num_records, ) features["mask"] = tf.squeeze(mask) t = sparse.reset_shape(t, new_shape=[1, max_num_records]) t = sparse.to_dense(t) t = tf.squeeze(t) # If feature is a string, then decode into numbers if feature_config.get_dict( )[feat]["type"] == FeatureTypeKey.STRING: t = io.decode_raw( t, out_type=tf.uint8, fixed_length=feature_config.get_dict()[feat] ["max_length"], ) t = tf.cast(t, tf.float32) else: # # Handle dense tensors # # if len(t.shape) == 1: # t = tf.expand_dims(t, axis=0) # if len(t.shape) == 2: # t = tf.pad(t, paddings=[[0, 0], [0, max_num_records]]) # t = tf.squeeze(t) # else: # raise Exception('Invalid input : {}'.format(feat)) raise ValueError("Invalid input : {}".format(feat)) features[feat] = t labels = features.pop(feature_config.label) return features, labels