def store_prediction(self, sess, batch_x, batch_y, name): prediction = sess.run(self.net.predicter, feed_dict={ self.net.x: batch_x, self.net.y: batch_y, self.net.keep_prob: 1. }) pred_shape = prediction.shape loss = sess.run(self.net.cost, feed_dict={ self.net.x: batch_x, self.net.y: util.crop_to_shape(batch_y, pred_shape), self.net.keep_prob: 1. }) logging.info("Verification error= {:.1f}%, loss= {:.4f}".format( error_rate(prediction, util.crop_to_shape(batch_y, prediction.shape)), loss)) batch_y_img = util.crop_to_shape(batch_y, prediction.shape) for idx in range(batch_y.shape[0]): img_true = util.to_rgb(batch_y_img[idx]) img_pred = util.to_rgb(np.asarray(prediction[idx], 'float')) util.save_image( img_true, "%s/%s_true_%s0.jpg" % (self.prediction_path, name, idx)) util.save_image( img_pred, "%s/%s_pred_%s0.jpg" % (self.prediction_path, name, idx)) return pred_shape
def value_pipeline(value, phase): _MAX_DELTA = 63 _CONTRAST_LOWER = 0.5 _CONTRAST_UPPER = 1.5 value = util.to_rgb(value) value = util.random_resize(value, size_range=_SIZE_RANGE) value = util.random_crop(value, size=_NET_SIZE) value = util.random_flip(value) if phase == data._TRAIN: value = util.random_adjust(value, max_delta=_MAX_DELTA, contrast_lower=_CONTRAST_LOWER, contrast_upper=_CONTRAST_UPPER) value = util.remove_mean(value, mean=_MEAN) return value
def output_minibatch_stats(self, sess, summary_writer, step, batch_x, batch_y, epoch): # Calculate batch loss and accuracy summary_str, loss, acc, predictions = sess.run([ self.summary_op, self.net.cost, self.net.accuracy, self.net.predicter ], feed_dict={ self.net.x: batch_x, self.net.y: batch_y, self.net.keep_prob: 1. }) img_pred = util.to_rgb(np.asarray(predictions[0], 'float')) util.save_image(img_pred, "test_mini/%s_%s_pred.jpg" % (epoch, step)) summary_writer.add_summary(summary_str, step) summary_writer.flush() logging.info( "Iter {:}, Minibatch Loss= {:.4f}, Training Accuracy= {:.4f}%, Minibatch error= {:.1f}%" .format(step, loss, acc * 100.0, error_rate(predictions, batch_y)))
def main(args): sleep(random.random()) output_dir = os.path.expanduser(args.output_dir) if not os.path.exists(output_dir): os.makedirs(output_dir) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) util.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) dataset = util.get_dataset(args.input_dir) print('Creating networks and loading parameters') with tf.Graph().as_default(): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) with sess.as_default(): pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) minsize = 20 # minimum size of face threshold = [0.6, 0.7, 0.7] # three steps's threshold factor = 0.709 # scale factor # Add a random key to the filename to allow alignment using multiple processes random_key = np.random.randint(0, high=99999) bounding_boxes_filename = os.path.join( output_dir, 'bounding_boxes_%05d.txt' % random_key) with open(bounding_boxes_filename, "w") as text_file: nrof_images_total = 0 nrof_successfully_aligned = 0 if args.random_order: random.shuffle(dataset) for cls in dataset: output_class_dir = os.path.join(output_dir, cls.name) if not os.path.exists(output_class_dir): os.makedirs(output_class_dir) if args.random_order: random.shuffle(cls.image_paths) for image_path in cls.image_paths: nrof_images_total += 1 filename = os.path.splitext(os.path.split(image_path)[1])[0] output_filename = os.path.join(output_class_dir, filename + '.png') print(image_path) if not os.path.exists(output_filename): try: img = misc.imread(image_path) except (IOError, ValueError, IndexError) as e: errorMessage = '{}: {}'.format(image_path, e) print(errorMessage) else: if img.ndim < 2: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) continue if img.ndim == 2: img = util.to_rgb(img) img = img[:, :, 0:3] bounding_boxes, _ = align.detect_face.detect_face( img, minsize, pnet, rnet, onet, threshold, factor) nrof_faces = bounding_boxes.shape[0] if nrof_faces > 0: det = bounding_boxes[:, 0:4] det_arr = [] img_size = np.asarray(img.shape)[0:2] if nrof_faces > 1: if args.detect_multiple_faces: for i in range(nrof_faces): det_arr.append(np.squeeze(det[i])) else: bounding_box_size = ( det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) img_center = img_size / 2 offsets = np.vstack([ (det[:, 0] + det[:, 2]) / 2 - img_center[1], (det[:, 1] + det[:, 3]) / 2 - img_center[0] ]) offset_dist_squared = np.sum( np.power(offsets, 2.0), 0) index = np.argmax( bounding_box_size - offset_dist_squared * 2.0 ) # some extra weight on the centering det_arr.append(det[index, :]) else: det_arr.append(np.squeeze(det)) for i, det in enumerate(det_arr): det = np.squeeze(det) bb = np.zeros(4, dtype=np.int32) bb[0] = np.maximum(det[0] - args.margin / 2, 0) bb[1] = np.maximum(det[1] - args.margin / 2, 0) bb[2] = np.minimum(det[2] + args.margin / 2, img_size[1]) bb[3] = np.minimum(det[3] + args.margin / 2, img_size[0]) cropped = img[bb[1]:bb[3], bb[0]:bb[2], :] scaled = misc.imresize( cropped, (args.image_size, args.image_size), interp='bilinear') nrof_successfully_aligned += 1 filename_base, file_extension = os.path.splitext( output_filename) if args.detect_multiple_faces: output_filename_n = "{}_{}{}".format( filename_base, i, file_extension) else: output_filename_n = "{}{}".format( filename_base, file_extension) misc.imsave(output_filename_n, scaled) text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) else: print('Unable to align "%s"' % image_path) text_file.write('%s\n' % (output_filename)) print('Total number of images: %d' % nrof_images_total) print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
def estimate_planar_homography(I, line1, line2, K, win1, win2, lane_width): """ Estimates the planar homography H between the camera image plane, and the World (ground) plane. The World reference frame is directly below the camera, with: - Z-axis parallel to the road - X-axis on the road plane - Y-axis pointing upward from the road plane - origin is on the road plane (Y=0), and halfway between the two lanes boundaries (center of road). Input: nparray I nparray line1, line2: [a, b, c] nparray K The 3x3 camera intrinsic matrix. tuple win1, win2: (float x, float y, float w, float h) float lane_width Output: nparray H where H is a 3x3 homography. """ h, w = I.shape[0:2] x_win1 = intrnd(w * win1[0]) y_win1 = intrnd(h * win1[1]) x_win2 = intrnd(w * win2[0]) y_win2 = intrnd(h * win2[1]) pts = [] NUM = 10 h_win = intrnd(h * win1[3]) # Assume window heights same btwn left/right for i in xrange(NUM): frac = i / float(NUM) y_cur = intrnd((y_win1 - (h_win / 2) + frac * h_win)) pt_i = (compute_x(line1, y_cur), y_cur) pt_j = (compute_x(line2, y_cur), y_cur) pts.append((pt_i, pt_j)) r1 = solve_for_r1(pts, K, lane_width) vanishing_pt = np.cross(line1, line2) vanishing_pt = vanishing_pt / vanishing_pt[2] vanishing_pt[1] = vanishing_pt[1] ## DEBUG Plot points on image, save to file for visual verification Irgb = util.to_rgb(I) COLOURS = [(255, 0, 0), (0, 255, 0)] for i, (pt_i, pt_j) in enumerate(pts): clr = COLOURS[i % 2] cv2.circle(Irgb, tuple(map(intrnd, pt_i)), 5, clr) cv2.circle(Irgb, tuple(map(intrnd, pt_j)), 5, clr) cv2.circle(Irgb, (intrnd(vanishing_pt[0]), intrnd(vanishing_pt[1])), 5, (0, 0, 255)) cv2.imwrite("_Irgb.png", Irgb) r3 = solve_for_r3(vanishing_pt, line1, line2, K) T = solve_for_t(pts, K, r1, r3, lane_width) print "T_pre:", T T = T * (2.1798 / T[1]) # Height of camera is 2.1798 meters T[2] = 1 # We want the ref. frame to be directly below camera (why 1?!) #T = T / np.linalg.norm(T) print "T_post:", T H = np.zeros([3, 3]) H[:, 0] = r1 H[:, 1] = r3 H[:, 2] = T return np.dot(K, H)
def estimate_planar_homography(I, line1, line2, K, win1, win2, lane_width): """ Estimates the planar homography H between the camera image plane, and the World (ground) plane. The World reference frame is directly below the camera, with: - Z-axis parallel to the road - X-axis on the road plane - Y-axis pointing upward from the road plane - origin is on the road plane (Y=0), and halfway between the two lanes boundaries (center of road). Input: nparray I nparray line1, line2: [a, b, c] nparray K The 3x3 camera intrinsic matrix. tuple win1, win2: (float x, float y, float w, float h) float lane_width Output: nparray H where H is a 3x3 homography. """ h, w = I.shape[0:2] x_win1 = intrnd(w * win1[0]) y_win1 = intrnd(h * win1[1]) x_win2 = intrnd(w * win2[0]) y_win2 = intrnd(h * win2[1]) pts = [] NUM = 10 h_win = intrnd(h*win1[3]) # Assume window heights same btwn left/right for i in xrange(NUM): frac = i / float(NUM) y_cur = intrnd((y_win1-(h_win/2) + frac*h_win)) pt_i = (compute_x(line1, y_cur), y_cur) pt_j = (compute_x(line2, y_cur), y_cur) pts.append((pt_i, pt_j)) r1 = solve_for_r1(pts, K, lane_width) vanishing_pt = np.cross(line1, line2) vanishing_pt = vanishing_pt / vanishing_pt[2] vanishing_pt[1] = vanishing_pt[1] ## DEBUG Plot points on image, save to file for visual verification Irgb = util.to_rgb(I) COLOURS = [(255, 0, 0), (0, 255, 0)] for i, (pt_i, pt_j) in enumerate(pts): clr = COLOURS[i % 2] cv2.circle(Irgb, tuple(map(intrnd, pt_i)), 5, clr) cv2.circle(Irgb, tuple(map(intrnd, pt_j)), 5, clr) cv2.circle(Irgb, (intrnd(vanishing_pt[0]), intrnd(vanishing_pt[1])), 5, (0, 0, 255)) cv2.imwrite("_Irgb.png", Irgb) r3 = solve_for_r3(vanishing_pt, line1, line2, K) T = solve_for_t(pts, K, r1, r3, lane_width) print "T_pre:", T T = T * (2.1798 / T[1]) # Height of camera is 2.1798 meters T[2] = 1 # We want the ref. frame to be directly below camera (why 1?!) #T = T / np.linalg.norm(T) print "T_post:", T H = np.zeros([3,3]) H[:, 0] = r1 H[:, 1] = r3 H[:, 2] = T return np.dot(K, H)