def getLandmarksFromFrame(self,image,detected_faces): if len(detected_faces) == 0: return [] centers = [] scales = [] for i, d in enumerate(detected_faces): center = torch.FloatTensor( [d[2] - (d[2] - d[0]) / 2.0, d[3] - (d[3] - d[1]) / 2.0]) center[1] = center[1] - (d[3] - d[1]) * 0.12 scale = (d[2] - d[0] + d[3] - d[1]) / 195 centers.append(center) scales.append(scale) inp = crop(image, center, scale) inp = torch.from_numpy(inp.transpose( (2, 0, 1))).float() inp = inp.to(self.device) inp.div_(255.0).unsqueeze_(0) if i == 0: imgs = inp else: imgs = torch.cat((imgs,inp), dim=0) out = self.forward(imgs) out = out[-1].cpu() pts, pts_img = get_preds_fromhm(out, centers, scales) #pts, pts_img = pts.view(68, 2) * 4, pts_img.view(68, 2) #landmarks.append(pts_img.numpy()) return pts_img.numpy().tolist()
def get_one_example(all_coords, input_size, get_offsets, whole_images, whole_labels, eval_tracker): while True: full_patch, full_labels = get_one_input(all_coords[index][::-1], input_size, whole_images, whole_labels) seed = logit(utils.initial_seed(input_size)) for off in get_offsets(seed): pred_seed = utils.crop(seed, off, FLAGS.fov_size) patch = utils.crop(full_patch, off, FLAGS.fov_size) labels = utils.crop(full_labels, off, FLAGS.fov_size) assert pred_seed.base is seed yield pred_seed, patch, labels eval_tracker.eval_one_patch(full_labels, seed)
def _loss_mask(self, proto_output, pred_mask_coef, gt_bbox_norm, gt_masks, positiveness, max_id_for_anchors, max_masks_for_train): shape_proto = tf.shape(proto_output) num_batch = shape_proto[0] loss_mask = 0. total_pos = 0 for idx in tf.range(num_batch): # extract randomly postive sample in pred_mask_coef, gt_cls, gt_offset according to positive_indices proto = proto_output[idx] mask_coef = pred_mask_coef[idx] mask_gt = gt_masks[idx] bbox_norm = gt_bbox_norm[idx] pos = positiveness[idx] max_id = max_id_for_anchors[idx] pos_indices = tf.squeeze(tf.where(pos == 1)) # tf.print("num_pos", tf.shape(pos_indices)) """ if tf.size(pos_indices) == 0: tf.print("detect no positive") continue """ # Todo decrease the number pf positive to be 100 # [num_pos, k] pos_mask_coef = tf.gather(mask_coef, pos_indices) pos_max_id = tf.gather(max_id, pos_indices) if tf.size(pos_indices) == 1: # tf.print("detect only one dim") pos_mask_coef = tf.expand_dims(pos_mask_coef, axis=0) pos_max_id = tf.expand_dims(pos_max_id, axis=0) total_pos += tf.size(pos_indices) # proto = [80, 80, num_mask] # pos_mask_coef = [num_pos, num_mask] # pred_mask = proto x pos_mask_coef = [80, 80, num_pos] # pred_mask transpose = [num_pos, 80, 80] pred_mask = tf.linalg.matmul(proto, pos_mask_coef, transpose_a=False, transpose_b=True) pred_mask = tf.transpose(pred_mask, perm=(2, 0, 1)) # calculating loss for each mask coef correspond to each postitive anchor # pos_max_id = [num_pos] gt = tf.gather(mask_gt, pos_max_id) # [num_pos, 80, 80] bbox = tf.gather(bbox_norm, pos_max_id) # [num_pos, 4] bbox_center = utils.map_to_center_form(bbox) # [num_pos, 4] area = bbox_center[:, -1] * bbox_center[:, -2] # crop the pred (not real crop, zero out the area outside the gt box) s = tf.nn.sigmoid_cross_entropy_with_logits(gt, pred_mask) # [num_pos, 80, 80] s = utils.crop(s, bbox, origin_w=80, origin_h=80) # [num_pos, 80, 80] loss = tf.reduce_sum(s, axis=[1, 2]) / area # [num_pos] loss_mask += tf.reduce_sum(loss) loss_mask /= tf.cast(total_pos, tf.float32) return loss_mask
def _loss_mask(self, protonet_output, pred_mask_coef, gt_bbox_norm, gt_masks, positiveness, max_id_for_anchors, max_masks_for_train): shape_proto = tf.shape(protonet_output) batch_size = shape_proto[0] loss_mask = 0. total_pos = 0 for idx in tf.range(batch_size): # extract randomly postive sample in pred_mask_coef, gt_cls, gt_offset according to positive_indices proto = protonet_output[idx] mask_coef = pred_mask_coef[idx] mask_gt = gt_masks[idx] bbox_norm = gt_bbox_norm[idx] pos = positiveness[idx] max_id = max_id_for_anchors[idx] pos_indices = tf.squeeze(tf.where(pos == 1)) # tf.print("num_pos", tf.shape(pos_indices)) # TODO: Limit number of positive to be less than max_masks_for_train pos_mask_coef = tf.gather(mask_coef, pos_indices) pos_max_id = tf.gather(max_id, pos_indices) if tf.size(pos_indices) == 1: # tf.print("detect only one dim") pos_mask_coef = tf.expand_dims(pos_mask_coef, axis=0) pos_max_id = tf.expand_dims(pos_max_id, axis=0) total_pos += tf.size(pos_indices) pred_mask = tf.linalg.matmul(proto, pos_mask_coef, transpose_a=False, transpose_b=True) pred_mask = tf.transpose(pred_mask, perm=(2, 0, 1)) # calculating loss for each mask coef correspond to each postitive anchor gt = tf.gather(mask_gt, pos_max_id) bbox = tf.gather(bbox_norm, pos_max_id) bbox_center = utils.map_to_center_form(bbox) area = bbox_center[:, -1] * bbox_center[:, -2] # crop the pred (not real crop, zero out the area outside the gt box) s = tf.nn.sigmoid_cross_entropy_with_logits(gt, pred_mask) s = utils.crop(s, bbox) loss = tf.reduce_sum(s, axis=[1, 2]) / area loss_mask += tf.reduce_sum(loss) loss_mask /= tf.cast(total_pos, tf.float32) return loss_mask
def generate_sample_face(image, landmarks, detector, input_resolution=256, output_resolution=64): num_landmarks = len(landmarks) detected_faces = utils.get_face_bbox(image, detector) outputs = list() if len(detected_faces) > 0: for i, rect in enumerate(detected_faces): center = [(rect.left() + rect.right()) / 2, (rect.top() + rect.bottom()) / 2] center[1] = center[1] - (rect.bottom() - rect.top()) * 0.12 # scale = (rect.right() - rect.left() + # rect.bottom() - rect.top()) / 195.0 scale = 2.0 cropped_image = utils.crop(image, center, scale, resolution=input_resolution) heatmaps = np.zeros( (output_resolution, output_resolution, num_landmarks)) transformed_landmarks = [] for j in range(num_landmarks): ldmk = utils.transform(landmarks[j] + 1, center, scale, resolution=output_resolution) transformed_landmarks.append(ldmk) tmp = utils.draw_gaussian(heatmaps[:, :, j], ldmk, 1) heatmaps[:, :, j] = tmp outputs.append({ 'image': cropped_image / 255, 'heatmaps': heatmaps, 'center': center, 'scale': scale, 'pts': transformed_landmarks }) return outputs
def get_full_image(image_data): all_channels = np.empty( [cropped_img_size, cropped_img_size, len(channels)]) for ch_idx, channel in enumerate(channels): raw_img = utils.fetch_hdf5_sample(channel, image_data, image_time_offset_idx) if raw_img is None or raw_img.shape != (650, 1500): return None try: array_cropped = utils.crop(copy.deepcopy(raw_img), station_pixel_coords, cropped_img_size) except: return None # raw_data[array_idx, station_idx, channel_idx, ...] = cv.flip(array_cropped, 0) # TODO why the flip?? #array = (((array.astype(np.float32) - norm_min) / (norm_max - norm_min)) * 255).astype(np.uint8) # TODO norm? array_cropped = array_cropped.astype( np.float64) # convert to image format all_channels[:, :, ch_idx] = array_cropped return all_channels
def _loss_mask(self, prior_max_index, coef_p, proto_p, mask_gt, prior_max_box, conf_gt, use_weight_sum=False, use_cropped_mask=False): shape_proto = tf.shape(proto_p) proto_h = shape_proto[1] proto_w = shape_proto[2] num_batch = shape_proto[0] loss_m = 0.0 mask_gt = tf.transpose( mask_gt, (0, 2, 3, 1)) #[batch, height, width, num_object] for i in tf.range(num_batch): pos_indices = tf.where(conf_gt[i] > 0) _pos_prior_index = tf.gather_nd( prior_max_index[i], pos_indices) #shape: [num_positives] _pos_prior_box = tf.gather_nd(prior_max_box[i], pos_indices) #shape: [num_positives] _pos_coef = tf.gather_nd(coef_p[i], pos_indices) #shape: [num_positives] _mask_gt = mask_gt[i] if tf.shape(_pos_prior_index)[0] == 0: # num_positives are zero continue # If exceeds the number of masks for training, select a random subset old_num_pos = tf.shape(_pos_coef)[0] if old_num_pos > self._max_masks_for_train: perm = tf.random.shuffle(tf.range(tf.shape(_pos_coef)[0])) select = perm[:self._max_masks_for_train] _pos_coef = tf.gather(_pos_coef, select) _pos_prior_index = tf.gather(_pos_prior_index, select) _pos_prior_box = tf.gather(_pos_prior_box, select) num_pos = tf.shape(_pos_coef)[0] _pos_mask_gt = tf.gather(_mask_gt, _pos_prior_index, axis=-1) # mask assembly by linear combination mask_p = tf.linalg.matmul( proto_p[i], _pos_coef, transpose_a=False, transpose_b=True) # [proto_height, proto_width, num_pos] # crop the pred (not real crop, zero out the area outside the gt box) if use_cropped_mask: mask_p = utils.crop( mask_p, _pos_prior_box) # _pos_prior_box.shape: (num_pos, 4) _pos_mask_gt = utils.crop(_pos_mask_gt, _pos_prior_box) if use_weight_sum: # The idea was borred from UNET weight loss function # https://jaidevd.github.io/posts/weighted-loss-functions-for-instance-segmentation/ # It was modified to handle instance segmentation. # Normalize the mask loss to emulate roi pooling's effect on loss. # pos_get_csize = utils.map_to_center_form(_pos_prior_box) # gt_box_width = pos_get_csize[:, 2] * tf.cast(proto_w, tf.float32) # gt_box_height = pos_get_csize[:, 3] * tf.cast(proto_h, tf.float32) # _area_pos = gt_box_width * gt_box_height # _area_neg = (tf.cast(proto_h, tf.float32) * tf.cast(proto_w, tf.float32)) - _area_pos weight_ip = tf.zeros(tf.shape(_pos_mask_gt), dtype=tf.float32) w_1 = 1 - tf.reduce_sum(_pos_mask_gt, [0, 1]) / tf.cast( tf.shape(weight_ip)[0] * tf.shape(weight_ip)[1], tf.float32) w_0 = 1 - w_1 w_1_index = tf.where(_pos_mask_gt == 1) weight_sum = tf.tensor_scatter_nd_update( weight_ip, w_1_index, tf.ones(tf.shape(w_1_index)[0])) * ( w_1) #*_area_neg/_area_pos) w_0_index = tf.where(_pos_mask_gt == 0) weight_ip = tf.tensor_scatter_nd_update( weight_ip, w_0_index, tf.ones( tf.shape(w_0_index)[0])) * w_0 weight_sum += weight_ip mask_loss = tf.nn.weighted_cross_entropy_with_logits( _pos_mask_gt, mask_p, weight_sum) else: mask_loss = tf.nn.sigmoid_cross_entropy_with_logits( _pos_mask_gt, mask_p) # Adding extra dimension as i/p and o/p shapes are different with "reduction" is set to None. # https://github.com/tensorflow/tensorflow/issues/27190 # _pos_mask_gt = tf.transpose(_pos_mask_gt, (2,0,1)) # mask_p = tf.transpose(mask_p, (2,0,1)) # _pos_mask_gt = tf.reshape(_pos_mask_gt, [ -1, proto_h * proto_w]) # mask_p = tf.reshape(mask_p, [ -1, proto_h * proto_w]) # mask_loss = tf.keras.losses.binary_crossentropy(_pos_mask_gt, mask_p) if old_num_pos > num_pos: mask_loss *= tf.cast(old_num_pos / num_pos, tf.float32) loss_m += tf.reduce_sum(mask_loss) loss_m /= (tf.cast(proto_h, tf.float32) * tf.cast(proto_w, tf.float32)) return loss_m / tf.cast(num_batch, tf.float32)
def _loss_mask(self, proto_output, pred_mask_coef, gt_bbox_norm, gt_masks, positiveness, max_id_for_anchors, max_masks_for_train): shape_proto = tf.shape(proto_output) num_batch = shape_proto[0] proto_h = shape_proto[1] proto_w = shape_proto[2] loss_mask = 0. total_pos = 0 for idx in tf.range(num_batch): # extract randomly postive sample in prejd_mask_coef, gt_cls, gt_offset according to positive_indices proto = proto_output[idx] mask_coef = pred_mask_coef[idx] mask_gt = gt_masks[idx] bbox_norm = gt_bbox_norm[idx] # [100, 4] -> [num_obj, 4] pos = positiveness[idx] max_id = max_id_for_anchors[idx] pos_indices = tf.squeeze(tf.where(pos == 1)) # If exceeds the number of masks for training, select a random subset old_num_pos = tf.size(pos_indices) # print("pos indices", pos_indices.shape) if old_num_pos > max_masks_for_train: perm = tf.random.shuffle(pos_indices) pos_indices = perm[:max_masks_for_train] pos_mask_coef = tf.gather(mask_coef, pos_indices) pos_max_id = tf.gather(max_id, pos_indices) # if only 1 positive or no positive if tf.size(pos_indices) == 1: pos_mask_coef = tf.expand_dims(pos_mask_coef, axis=0) pos_max_id = tf.expand_dims(pos_max_id, axis=0) elif tf.size(pos_indices) == 0: continue else: ... # [num_pos, k] gt = tf.gather(mask_gt, pos_max_id) bbox = tf.gather(bbox_norm, pos_max_id) # print(bbox[:5]) num_pos = tf.size(pos_indices) # print('gt_me', gt.shape) total_pos += num_pos # [138, 138, num_pos] pred_mask = tf.linalg.matmul(proto, pos_mask_coef, transpose_a=False, transpose_b=True) pred_mask = tf.transpose(pred_mask, perm=(2, 0, 1)) s = tf.nn.sigmoid_cross_entropy_with_logits(gt, pred_mask) s = utils.crop(s, bbox) # calculating loss for each mask coef correspond to each postitive anchor bbox_center = utils.map_to_center_form(tf.cast(bbox, tf.float32)) area = bbox_center[:, -1] * bbox_center[:, -2] mask_loss = tf.reduce_sum(s, axis=[1, 2]) / area if old_num_pos > num_pos: mask_loss = mask_loss * tf.cast( (old_num_pos / num_pos), mask_loss.dtype) loss_mask += tf.reduce_sum(mask_loss) return loss_mask / tf.cast(total_pos, loss_mask.dtype)