Exemple #1
0
    def store_prediction(self, sess, batch_x, batch_y, name):
        prediction = sess.run(self.net.predicter,
                              feed_dict={
                                  self.net.x: batch_x,
                                  self.net.y: batch_y,
                                  self.net.keep_prob: 1.
                              })

        pred_shape = prediction.shape
        loss = sess.run(self.net.cost,
                        feed_dict={
                            self.net.x: batch_x,
                            self.net.y:
                            util.crop_to_shape(batch_y, pred_shape),
                            self.net.keep_prob: 1.
                        })

        logging.info("Verification error= {:.1f}%, loss= {:.4f}".format(
            error_rate(prediction,
                       util.crop_to_shape(batch_y, prediction.shape)), loss))

        batch_y_img = util.crop_to_shape(batch_y, prediction.shape)
        for idx in range(batch_y.shape[0]):
            img_true = util.to_rgb(batch_y_img[idx])
            img_pred = util.to_rgb(np.asarray(prediction[idx], 'float'))
            util.save_image(
                img_true,
                "%s/%s_true_%s0.jpg" % (self.prediction_path, name, idx))
            util.save_image(
                img_pred,
                "%s/%s_pred_%s0.jpg" % (self.prediction_path, name, idx))
        return pred_shape
Exemple #2
0
def value_pipeline(value, phase):
    _MAX_DELTA = 63
    _CONTRAST_LOWER = 0.5
    _CONTRAST_UPPER = 1.5

    value = util.to_rgb(value)
    value = util.random_resize(value, size_range=_SIZE_RANGE)
    value = util.random_crop(value, size=_NET_SIZE)
    value = util.random_flip(value)
    if phase == data._TRAIN:
        value = util.random_adjust(value, max_delta=_MAX_DELTA, contrast_lower=_CONTRAST_LOWER, contrast_upper=_CONTRAST_UPPER)
    value = util.remove_mean(value, mean=_MEAN)
    return value
Exemple #3
0
    def output_minibatch_stats(self, sess, summary_writer, step, batch_x,
                               batch_y, epoch):
        # Calculate batch loss and accuracy
        summary_str, loss, acc, predictions = sess.run([
            self.summary_op, self.net.cost, self.net.accuracy,
            self.net.predicter
        ],
                                                       feed_dict={
                                                           self.net.x: batch_x,
                                                           self.net.y: batch_y,
                                                           self.net.keep_prob:
                                                           1.
                                                       })

        img_pred = util.to_rgb(np.asarray(predictions[0], 'float'))
        util.save_image(img_pred, "test_mini/%s_%s_pred.jpg" % (epoch, step))
        summary_writer.add_summary(summary_str, step)
        summary_writer.flush()
        logging.info(
            "Iter {:}, Minibatch Loss= {:.4f}, Training Accuracy= {:.4f}%, Minibatch error= {:.1f}%"
            .format(step, loss, acc * 100.0, error_rate(predictions, batch_y)))
def main(args):
    sleep(random.random())
    output_dir = os.path.expanduser(args.output_dir)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    util.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
    dataset = util.get_dataset(args.input_dir)

    print('Creating networks and loading parameters')

    with tf.Graph().as_default():
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                                log_device_placement=False))
        with sess.as_default():
            pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)

    minsize = 20  # minimum size of face
    threshold = [0.6, 0.7, 0.7]  # three steps's threshold
    factor = 0.709  # scale factor

    # Add a random key to the filename to allow alignment using multiple processes
    random_key = np.random.randint(0, high=99999)
    bounding_boxes_filename = os.path.join(
        output_dir, 'bounding_boxes_%05d.txt' % random_key)

    with open(bounding_boxes_filename, "w") as text_file:
        nrof_images_total = 0
        nrof_successfully_aligned = 0
        if args.random_order:
            random.shuffle(dataset)
        for cls in dataset:
            output_class_dir = os.path.join(output_dir, cls.name)
            if not os.path.exists(output_class_dir):
                os.makedirs(output_class_dir)
                if args.random_order:
                    random.shuffle(cls.image_paths)
            for image_path in cls.image_paths:
                nrof_images_total += 1
                filename = os.path.splitext(os.path.split(image_path)[1])[0]
                output_filename = os.path.join(output_class_dir,
                                               filename + '.png')
                print(image_path)
                if not os.path.exists(output_filename):
                    try:
                        img = misc.imread(image_path)
                    except (IOError, ValueError, IndexError) as e:
                        errorMessage = '{}: {}'.format(image_path, e)
                        print(errorMessage)
                    else:
                        if img.ndim < 2:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))
                            continue
                        if img.ndim == 2:
                            img = util.to_rgb(img)
                        img = img[:, :, 0:3]

                        bounding_boxes, _ = align.detect_face.detect_face(
                            img, minsize, pnet, rnet, onet, threshold, factor)
                        nrof_faces = bounding_boxes.shape[0]
                        if nrof_faces > 0:
                            det = bounding_boxes[:, 0:4]
                            det_arr = []
                            img_size = np.asarray(img.shape)[0:2]
                            if nrof_faces > 1:
                                if args.detect_multiple_faces:
                                    for i in range(nrof_faces):
                                        det_arr.append(np.squeeze(det[i]))
                                else:
                                    bounding_box_size = (
                                        det[:, 2] - det[:, 0]) * (det[:, 3] -
                                                                  det[:, 1])
                                    img_center = img_size / 2
                                    offsets = np.vstack([
                                        (det[:, 0] + det[:, 2]) / 2 -
                                        img_center[1],
                                        (det[:, 1] + det[:, 3]) / 2 -
                                        img_center[0]
                                    ])
                                    offset_dist_squared = np.sum(
                                        np.power(offsets, 2.0), 0)
                                    index = np.argmax(
                                        bounding_box_size -
                                        offset_dist_squared * 2.0
                                    )  # some extra weight on the centering
                                    det_arr.append(det[index, :])
                            else:
                                det_arr.append(np.squeeze(det))

                            for i, det in enumerate(det_arr):
                                det = np.squeeze(det)
                                bb = np.zeros(4, dtype=np.int32)
                                bb[0] = np.maximum(det[0] - args.margin / 2, 0)
                                bb[1] = np.maximum(det[1] - args.margin / 2, 0)
                                bb[2] = np.minimum(det[2] + args.margin / 2,
                                                   img_size[1])
                                bb[3] = np.minimum(det[3] + args.margin / 2,
                                                   img_size[0])
                                cropped = img[bb[1]:bb[3], bb[0]:bb[2], :]
                                scaled = misc.imresize(
                                    cropped,
                                    (args.image_size, args.image_size),
                                    interp='bilinear')
                                nrof_successfully_aligned += 1
                                filename_base, file_extension = os.path.splitext(
                                    output_filename)
                                if args.detect_multiple_faces:
                                    output_filename_n = "{}_{}{}".format(
                                        filename_base, i, file_extension)
                                else:
                                    output_filename_n = "{}{}".format(
                                        filename_base, file_extension)
                                misc.imsave(output_filename_n, scaled)
                                text_file.write('%s %d %d %d %d\n' %
                                                (output_filename_n, bb[0],
                                                 bb[1], bb[2], bb[3]))
                        else:
                            print('Unable to align "%s"' % image_path)
                            text_file.write('%s\n' % (output_filename))

    print('Total number of images: %d' % nrof_images_total)
    print('Number of successfully aligned images: %d' %
          nrof_successfully_aligned)
def estimate_planar_homography(I, line1, line2, K, win1, win2, lane_width):
    """ Estimates the planar homography H between the camera image
    plane, and the World (ground) plane.
    The World reference frame is directly below the camera, with:
        - Z-axis parallel to the road
        - X-axis on the road plane
        - Y-axis pointing upward from the road plane
        - origin is on the road plane (Y=0), and halfway between the
          two lanes boundaries (center of road).
    Input:
        nparray I
        nparray line1, line2: [a, b, c]
        nparray K
            The 3x3 camera intrinsic matrix.
        tuple win1, win2: (float x, float y, float w, float h)
        float lane_width
    Output:
        nparray H
    where H is a 3x3 homography.
    """
    h, w = I.shape[0:2]
    x_win1 = intrnd(w * win1[0])
    y_win1 = intrnd(h * win1[1])
    x_win2 = intrnd(w * win2[0])
    y_win2 = intrnd(h * win2[1])

    pts = []
    NUM = 10
    h_win = intrnd(h * win1[3])  # Assume window heights same btwn left/right
    for i in xrange(NUM):
        frac = i / float(NUM)
        y_cur = intrnd((y_win1 - (h_win / 2) + frac * h_win))
        pt_i = (compute_x(line1, y_cur), y_cur)
        pt_j = (compute_x(line2, y_cur), y_cur)
        pts.append((pt_i, pt_j))

    r1 = solve_for_r1(pts, K, lane_width)
    vanishing_pt = np.cross(line1, line2)
    vanishing_pt = vanishing_pt / vanishing_pt[2]
    vanishing_pt[1] = vanishing_pt[1]
    ## DEBUG Plot points on image, save to file for visual verification
    Irgb = util.to_rgb(I)
    COLOURS = [(255, 0, 0), (0, 255, 0)]
    for i, (pt_i, pt_j) in enumerate(pts):
        clr = COLOURS[i % 2]
        cv2.circle(Irgb, tuple(map(intrnd, pt_i)), 5, clr)
        cv2.circle(Irgb, tuple(map(intrnd, pt_j)), 5, clr)
    cv2.circle(Irgb, (intrnd(vanishing_pt[0]), intrnd(vanishing_pt[1])), 5,
               (0, 0, 255))
    cv2.imwrite("_Irgb.png", Irgb)

    r3 = solve_for_r3(vanishing_pt, line1, line2, K)
    T = solve_for_t(pts, K, r1, r3, lane_width)
    print "T_pre:", T
    T = T * (2.1798 / T[1])  # Height of camera is 2.1798 meters
    T[2] = 1  # We want the ref. frame to be directly below camera (why 1?!)
    #T = T / np.linalg.norm(T)
    print "T_post:", T

    H = np.zeros([3, 3])
    H[:, 0] = r1
    H[:, 1] = r3
    H[:, 2] = T
    return np.dot(K, H)
def estimate_planar_homography(I, line1, line2, K, win1, win2, lane_width):
    """ Estimates the planar homography H between the camera image
    plane, and the World (ground) plane.
    The World reference frame is directly below the camera, with:
        - Z-axis parallel to the road
        - X-axis on the road plane
        - Y-axis pointing upward from the road plane
        - origin is on the road plane (Y=0), and halfway between the
          two lanes boundaries (center of road).
    Input:
        nparray I
        nparray line1, line2: [a, b, c]
        nparray K
            The 3x3 camera intrinsic matrix.
        tuple win1, win2: (float x, float y, float w, float h)
        float lane_width
    Output:
        nparray H
    where H is a 3x3 homography.
    """
    h, w = I.shape[0:2]
    x_win1 = intrnd(w * win1[0])
    y_win1 = intrnd(h * win1[1])
    x_win2 = intrnd(w * win2[0])
    y_win2 = intrnd(h * win2[1])
    
    pts = []
    NUM = 10
    h_win = intrnd(h*win1[3]) # Assume window heights same btwn left/right
    for i in xrange(NUM):
        frac = i / float(NUM)
        y_cur = intrnd((y_win1-(h_win/2) + frac*h_win))
        pt_i = (compute_x(line1, y_cur), y_cur)
        pt_j = (compute_x(line2, y_cur), y_cur)
        pts.append((pt_i, pt_j))
        
    r1 = solve_for_r1(pts,
                      K,
                      lane_width)
    vanishing_pt = np.cross(line1, line2)
    vanishing_pt = vanishing_pt / vanishing_pt[2]
    vanishing_pt[1] = vanishing_pt[1]
    ## DEBUG Plot points on image, save to file for visual verification
    Irgb = util.to_rgb(I)
    COLOURS = [(255, 0, 0), (0, 255, 0)]
    for i, (pt_i, pt_j) in enumerate(pts):
        clr = COLOURS[i % 2]
        cv2.circle(Irgb, tuple(map(intrnd, pt_i)), 5, clr)
        cv2.circle(Irgb, tuple(map(intrnd, pt_j)), 5, clr)
    cv2.circle(Irgb, (intrnd(vanishing_pt[0]), intrnd(vanishing_pt[1])), 5, (0, 0, 255))
    cv2.imwrite("_Irgb.png", Irgb)

    r3 = solve_for_r3(vanishing_pt, line1, line2, K)
    T = solve_for_t(pts, K, r1, r3, lane_width)
    print "T_pre:", T
    T = T * (2.1798 / T[1])    # Height of camera is 2.1798 meters
    T[2] = 1 # We want the ref. frame to be directly below camera (why 1?!)
    #T = T / np.linalg.norm(T)
    print "T_post:", T
    
    H = np.zeros([3,3])
    H[:, 0] = r1
    H[:, 1] = r3
    H[:, 2] = T
    return np.dot(K, H)