def _getLabels(self, coords, im): # need lip_coords in pixel-coordinate units for generating masks lip_coords = (len(im) * np.array(helenUtils.getLipCoords(coords))).astype(int) mask = utils.getMask([lip_coords], (len(im), len(im[0])), (len(im), len(im[0]))) mask = np.expand_dims(mask, axis=-1) return mask
def _getLabels(self, coords, im): """coords = np.reshape(coords, (self.num_coords, 2)) heatmap = utils.coordsToHeatmapsFast(coords, self.pdfs) heatmap = np.moveaxis(heatmap, 0, -1) return heatmap""" # need lip_coords in pixel-coordinate units for generating masks lip_coords = (len(im) * np.array( helenUtils.getLipCoords( coords, ibug_version=self.ibug_version))).astype(int) mask = utils.getMask([lip_coords], (len(im), len(im[0])), (len(im), len(im[0]))) mask = np.expand_dims(mask, axis=-1) # bbox coords lip_coords_normalized = helenUtils.getLipCoords(coords) bbox = utils.getBbox(lip_coords_normalized) bbox = utils.getExpandedBbox(bbox, 0.5, 0.5) return [bbox, mask]
def _getLabels(self, coords, im): coords = np.reshape(coords, (self.num_coords, 2)) lip_coords = helenUtils.getLipCoords(coords) labels = [] for i in range(len(self.pdfs)): masks = utils.coordsToHeatmapsFast(lip_coords, self.pdfs[i]) masks = np.moveaxis(masks, 0, -1) masks /= np.max(masks, axis=(0, 1)) l = self.mask_side_len / 2**i masks = cv2.resize(masks, (l, l), interpolation=cv2.INTER_LINEAR) labels.append(masks) """ masks_0 = utils.coordsToHeatmapsFast(coords, self.pdfs) heatmap = np.moveaxis(heatmap, 0, -1) heatmap /= np.sum(heatmap, axis=(0,1)) summed = np.sum(heatmap, axis=-1) summed /= (np.max(summed) / 4.0) summed = np.minimum(summed, 1.0) summed = np.expand_dims(summed, axis=-1) return [heatmap, summed] """ return labels
def getTrainingPair(self, im, coords, augment=False): if augment: # random rotation width = len(im[0]) height = len(im) center = [(height - 1) / 2.0, (width - 1) / 2.0] max_rot_deg = 10 rot_deg = max_rot_deg * np.random.rand(1) rot_rad = np.deg2rad(rot_deg) im = scipy.misc.imrotate(im, rot_deg) coords = utils.getRotatedPoints(coords, center, rot_rad) bbox = utils.getBbox(coords) square = utils.getSquareFromRect(bbox) if augment: # random scale and shift rect = utils.getRandomlyExpandedBbox(square, -0.2, 0.3) max_shift = 0.1 * (square[2] - square[0]) shifts = max_shift * np.random.rand(2) rect = np.array(utils.getShiftedBbox(rect, shifts)).astype(int) else: rect = utils.getExpandedBbox(square, 0.0, 0.0) # make sure that rect does not go beyond image borders rect = utils.getClippedBbox(im, rect) # crop coords[:, 0] -= rect[0] coords[:, 1] -= rect[1] im = utils.getCropped(im, rect) """ # brightness and saturation adjustments if augment: rand_v_delta = (np.random.rand() - 0.5) * 0.2 * 255 rand_s_delta = (np.random.rand() - 0.7) * 0.6 * 255 im_hsv = cv2.cvtColor(im, cv2.COLOR_RGB2HSV) im_hsv = im_hsv.astype(np.float32) im_hsv[:,:,1] += rand_s_delta im_hsv[:,:,2] += rand_v_delta im_hsv = np.clip(im_hsv, 0, 255) im_hsv = im_hsv.astype(np.uint8) im = cv2.cvtColor(im_hsv, cv2.COLOR_HSV2RGB) """ # just the lip coords for now # flip flipped = bool(np.random.randint(0, 2)) if augment else False lip_coords = helenUtils.getLipCoords(coords, len(im[0]), flip_x=flipped, ibug_version=self.ibug_version) im = np.fliplr(im) if flipped else im # normalize the coords and image im = cv2.resize(im, (self.targ_im_len, self.targ_im_len)) crop_width = rect[3] - rect[1] crop_height = rect[2] - rect[0] normalized_lip_coords = helenUtils.normalizeCoords( lip_coords, crop_width, crop_height) # mask from coords masks = utils.coordsToHeatmapsFast(normalized_lip_coords, self.pdfs) masks = np.moveaxis(masks, 0, -1) masks /= np.max(masks, axis=(0, 1)) hd_masks = utils.coordsToHeatmapsFast(normalized_lip_coords, self.hd_pdfs) hd_masks = np.moveaxis(hd_masks, 0, -1) hd_masks /= np.max(hd_masks, axis=(0, 1)) l = self.mask_side_len hd_l = self.hd_mask_side_len # try using hd masks for everything! masks = cv2.resize(masks, (hd_l, hd_l), interpolation=cv2.INTER_LINEAR) hd_masks = cv2.resize(hd_masks, (hd_l, hd_l), interpolation=cv2.INTER_LINEAR) return [im], [masks, hd_masks]
def testNormalizedDistanceError(model, batch_generator, ibug_version=True): """ Parameters ---------- batch_generator: Should be class PointsBatchGenerator. """ X, Y = batch_generator.getAllData() ims, labels = X[0], Y[0] overall_avg = 0.0 all_avgs = [] """ for i in range(len(ims)): im = ims[i] factors = np.expand_dims([len(im), len(im[0])], axis=0) label = labels[i] * factors utils.visualizeCoords(ims[i], label) """ #for i in range(len(ims)): # utils.visualizeCoords(ims[i], 224 * labels[i]) print 'Processing test set of images, counted ' + str(len(ims)) + ': ' t1 = time.time() for i in range(len(ims)): im = ims[i] #labels[i] = helenUtils.normalizeCoords(labels[i], len(im[0]), len(im)) eye_dist = helenUtils.getEyeDistance(labels[i], ibug_version=ibug_version) lip_labels = helenUtils.getLipCoords(labels[i], 1.0, ibug_version=ibug_version) # vanilla version #lip_preds = np.array(getCoordsFromImage(model, im)) # cascaded version base_masks, residual_masks = getNormalizedCascadedMasksFromImage( model, im) base_coords = utils.getCoordsFromPointMasks(base_masks, 224, 224, 'mean') residual_coords = utils.getCoordsFromPointMasks( residual_masks, 28, 28, 'mean') #max_coords = utils.getCoordsFromPointMasks(base_masks, width, height, 'max') lip_preds = np.add(base_coords, residual_coords) - 28 / 2.0 lip_preds[:, 0] /= float(len(im)) lip_preds[:, 1] /= float(len(im[0])) cur_avg = 0.0 for j in range(0, len(lip_preds)): diff = lip_preds[j] - lip_labels[j] dist = np.sqrt(diff.dot(diff)) normalized = dist / eye_dist cur_avg += normalized cur_avg /= len(lip_preds) # expand to image dimensions to visualize factors = np.expand_dims([len(im), len(im[0])], axis=0) lip_preds *= factors lip_labels *= factors leye_coord = helenUtils.getLeyeCenter(labels[i], ibug_version=ibug_version) reye_coord = helenUtils.getReyeCenter(labels[i], ibug_version=ibug_version) leye_coord *= np.squeeze(factors) reye_coord *= np.squeeze(factors) all_coords = np.concatenate([lip_preds, lip_labels], axis=0) all_coords = np.concatenate([all_coords, [leye_coord], [reye_coord]], axis=0) pred_indices = np.arange(0, len(all_coords) / 2) #utils.visualizeCoords(im, all_coords, pred_indices) #print 'eye to eye distance: ' + str(eye_dist) #print 'avg error across all points: ' + str(cur_avg) overall_avg += cur_avg all_avgs.append(cur_avg) utils.informProgress(i, len(ims)) processing_time = time.time() - t1 all_avgs = sorted(all_avgs) overall_avg /= len(ims) print '\nMedian normalized landmark error: ' + str( all_avgs[len(all_avgs) / 2]) print 'Average normalized landmark error: ' + str(overall_avg) print 'Total processing time: ' + str(processing_time) print 'Per-image processing time: ' + str( processing_time / float(len(ims)))
def _getLabels(self, coords, im): lip_coords = helenUtils.getLipCoords(coords) line_mask = helenUtils.getLipLineMask( lip_coords, np.shape(im), (self.mask_side_len, self.mask_side_len)) return [line_mask]