コード例 #1
0
    def __init__(self, weight_file_path, session, prob_threshold=0.5, nms_threshold=0.1):
        self.sess = session
        self.prob_thresh = prob_threshold
        self.nms_thresh = nms_threshold

        # Create the tiny face model which weights are loaded from a pretrained model.
        self.model = tiny_face_model.Model(weight_file_path)

        # placeholder of input images. Currently batch size of one is supported.
        self.x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

        self.score_final = self.model.tiny_face(self.x)

        # intialise variables
        self.sess.run(tf.global_variables_initializer())

        # Load an average image and clusters(reference boxes of templates).
        with open(weight_file_path, "rb") as f:
            _, mat_params_dict = pickle.load(f)

        self.average_image = self.model.get_data_by_key("average_image")
        self.clusters = self.model.get_data_by_key("clusters")
        self.clusters_h = self.clusters[:, 3] - self.clusters[:, 1] + 1
        self.clusters_w = self.clusters[:, 2] - self.clusters[:, 0] + 1
        self.normal_idx = np.where(self.clusters[:, 4] == 1)
コード例 #2
0
def init(sess, weight_file_path):
    # placeholder of input images. Currently batch size of one is supported.
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model(weight_file_path)
    score_final = model.tiny_face(x)
    sess.run(tf.global_variables_initializer())

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")
    clusters_h = clusters[:, 3] - clusters[:, 1] + 1
    clusters_w = clusters[:, 2] - clusters[:, 0] + 1
    normal_idx = np.where(clusters[:, 4] == 1)
    return model, average_image, clusters, clusters_h, clusters_w, normal_idx, score_final, x
コード例 #3
0
model_file = 'data/models/smoking-faces-output-graph.pb'
label_file = "data/models/smoking-faces-output-graph.txt"
graph = load_graph(model_file)
"""
initialize tensorflow face detection
"""

weight_file_path = 'data/models/hr_res101'
face_detection_graph = tf.Graph().as_default()

# placeholder of input images. Currently batch size of one is supported.
face_detection_x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

# Create the tiny face model which weights are loaded from a pretrained model.
model = tiny_face_model.Model(weight_file_path)
score_final = model.tiny_face(face_detection_x)

# Load an average image and clusters(reference boxes of templates).
with open(weight_file_path, "rb") as f:
    _, mat_params_dict = pickle.load(f)

average_image = model.get_data_by_key("average_image")
clusters = model.get_data_by_key("clusters")
clusters_h = clusters[:, 3] - clusters[:, 1] + 1
clusters_w = clusters[:, 2] - clusters[:, 0] + 1
normal_idx = np.where(clusters[:, 4] == 1)

face_detection_sess = tf.Session()
face_detection_sess.run(tf.global_variables_initializer())
コード例 #4
0
def evaluate(weight_file_path,
             data_dir,
             output_dir,
             prob_thresh=0.5,
             nms_thresh=0.1,
             lw=3,
             display=False):
    """Detect faces in images.
  Args:
    prob_thresh:
        The threshold of detection confidence.
    nms_thresh:
        The overlap threshold of non maximum suppression
    weight_file_path: 
        A pretrained weight file in the pickle format 
        generated by matconvnet_hr101_to_tf.py.
    data_dir: 
        A directory which contains images.
    output_dir: 
        A directory into which images with detected faces are output.
    lw: 
        Line width of bounding boxes. If zero specified,
        this is determined based on confidence of each detection.
    display:
        Display tiny face images on window.
  Returns:
    None.
  """

    # placeholder of input images. Currently batch size of one is supported.

    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c
    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model(weight_file_path)
    score_final = model.tiny_face(x)

    # Find image files in data_dir.
    # filenames = []
    # for ext in ('*.png', '*.gif', '*.jpg', '*.jpeg'):
    #   filenames.extend(glob.glob(os.path.join(data_dir, ext)))
    filenames = data_dir

    # Load an average image and clusters(reference boxes of templates).
    with open(weight_file_path, "rb") as f:
        _, mat_params_dict = pickle.load(f)

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")
    clusters_h = clusters[:, 3] - clusters[:, 1] + 1
    clusters_w = clusters[:, 2] - clusters[:, 0] + 1
    normal_idx = np.where(clusters[:, 4] == 1)

    # main
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for filename in filenames:
            start = time.time()
            fname = filename.split(os.sep)[-1]
            #print(filename)
            raw_img = cv2.imread(filename)
            #print(type(raw_img))
            img_xsize = raw_img
            raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)
            raw_img_f = raw_img.astype(np.float32)

            def _calc_scales():
                raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
                min_scale = min(
                    np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))),
                    np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h))))
                max_scale = min(1.0,
                                -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM))
                scales_down = pl.frange(min_scale, 0, 1.)
                scales_up = pl.frange(0.5, max_scale, 0.5)
                scales_pow = np.hstack((scales_down, scales_up))
                scales = np.power(2.0, scales_pow)
                return scales

            scales = _calc_scales()
            #start = time.time()

            # initialize output
            bboxes = np.empty(shape=(0, 5))

            # process input at different scales
            for s in scales:
                #print("Processing {} at scale {:.4f}".format(fname, s))
                img = cv2.resize(raw_img_f, (0, 0),
                                 fx=s,
                                 fy=s,
                                 interpolation=cv2.INTER_LINEAR)
                img = img - average_image
                img = img[np.newaxis, :]

                # we don't run every template on every scale ids of templates to ignore
                tids = list(range(
                    4, 12)) + ([] if s <= 1.0 else list(range(18, 25)))
                ignoredTids = list(
                    set(range(0, clusters.shape[0])) - set(tids))

                # run through the net
                score_final_tf = sess.run(score_final, feed_dict={x: img})

                # collect scores
                score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                            25], score_final_tf[:, :, :,
                                                                                25:
                                                                                125]
                prob_cls_tf = expit(score_cls_tf)
                prob_cls_tf[0, :, :, ignoredTids] = 0.0

                def _calc_bounding_boxes():
                    # threshold for detection
                    _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh)

                    # interpret heatmap into bounding boxes
                    cy = fy * 8 - 1
                    cx = fx * 8 - 1
                    ch = clusters[fc, 3] - clusters[fc, 1] + 1
                    cw = clusters[fc, 2] - clusters[fc, 0] + 1

                    # extract bounding box refinement
                    Nt = clusters.shape[0]
                    tx = score_reg_tf[0, :, :, 0:Nt]
                    ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                    tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                    th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                    # refine bounding boxes
                    dcx = cw * tx[fy, fx, fc]
                    dcy = ch * ty[fy, fx, fc]
                    rcx = cx + dcx
                    rcy = cy + dcy
                    rcw = cw * np.exp(tw[fy, fx, fc])
                    rch = ch * np.exp(th[fy, fx, fc])

                    scores = score_cls_tf[0, fy, fx, fc]
                    tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2,
                                            rcx + rcw / 2, rcy + rch / 2))
                    tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
                    tmp_bboxes = tmp_bboxes.transpose()
                    return tmp_bboxes

                tmp_bboxes = _calc_bounding_boxes()
                bboxes = np.vstack(
                    (bboxes, tmp_bboxes))  # <class 'tuple'>: (5265, 5)

            #print("time {:.2f} secs for {}".format(time.time() - start, fname))

            # non maximum suppression
            # refind_idx = util.nms(bboxes, nms_thresh)
            refind_idx = tf.image.non_max_suppression(
                tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
                tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
                max_output_size=bboxes.shape[0],
                iou_threshold=nms_thresh)
            refind_idx = sess.run(refind_idx)
            refined_bboxes = bboxes[refind_idx]
            # if not refined_bboxes.any():
            #   print("No Faces!")
            #   return
            face_list, Lavg, Wavg = overlay_bounding_boxes(
                img_xsize, refined_bboxes, lw)

            if display:
                # plt.axis('off')
                plt.imshow(raw_img)
                plt.show()

            # save image with bounding boxes
            raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR)
            cv2.imwrite(os.path.join(output_dir, fname), raw_img)
            main_img_name = fname.split('.')[0]
            #print("Total Faces: ", len(face_list))
            num_faces_taken = crop_faces_save(img_xsize, face_list,
                                              main_img_name)
            crop_nonfaces_save(img_xsize, face_list, Lavg, Wavg, main_img_name,
                               num_faces_taken)
            print("time {:.2f} secs for {}".format(time.time() - start, fname))
コード例 #5
0
def evaluate(weight_file_path,
             data_dir,
             output_dir,
             prob_thresh=0.5,
             nms_thresh=0.1,
             lw=3,
             display=False):
    """Detect faces in images.
    Args:
      prob_thresh:
          The threshold of detection confidence.
      nms_thresh:
          The overlap threshold of non maximum suppression
      weight_file_path:
          A pretrained weight file in the pickle format
          generated by matconvnet_hr101_to_tf.py.
      data_dir:
          A directory which contains images.
      output_dir:
          A directory into which images with detected faces are output.
      lw:
          Line width of bounding boxes. If zero specified,
          this is determined based on confidence of each detection.
      display:
          Display tiny face images on window.
    Returns:
      None.
    """

    # placeholder of input images. Currently batch size of one is supported.
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model(weight_file_path)
    score_final = model.tiny_face(x)

    # Find image files in data_dir.
    filenames = []
    for ext in ('*.mp4', '*.avi', '*.ts'):
        filenames.extend(glob.glob(os.path.join(data_dir, ext)))

    # Load an average image and clusters(reference boxes of templates).
    with open(weight_file_path, "rb") as f:
        _, mat_params_dict = pickle.load(f)

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")
    clusters_h = clusters[:, 3] - clusters[:, 1] + 1
    clusters_w = clusters[:, 2] - clusters[:, 0] + 1
    normal_idx = np.where(clusters[:, 4] == 1)
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.9
    #config.gpu_options.allow_growth = True
    # main
    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())

        for filename in filenames:
            videoCapture = cv2.VideoCapture(filename)
            frame_numbers = videoCapture.get(cv2.CAP_PROP_FRAME_COUNT)
            fps = videoCapture.get(cv2.CAP_PROP_FPS)
            success, frame = videoCapture.read()
            out_video_size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                              int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
            fname = filename.split(os.sep)[-1]
            print("Processing {} ".format(fname))
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            out_path = output_dir + '/' + fname.split('.')[0] + '.avi'
            video_writer = cv2.VideoWriter(
                out_path, cv2.VideoWriter_fourcc('M', 'P', 'E', 'G'), fps,
                out_video_size)
            test_bar = tqdm(range(int(frame_numbers)),
                            desc='[processing video and saving result videos]')
            per_fps = fps // 2
            start = time.time()
            for index in test_bar:
                if success:
                    if (index % per_fps == 0):
                        ###########################################################################
                        raw_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                        raw_img_f = raw_img.astype(np.float32)

                        def _calc_scales():
                            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
                            min_scale = min(
                                np.floor(
                                    np.log2(
                                        np.max(clusters_w[normal_idx] /
                                               raw_w))),
                                np.floor(
                                    np.log2(
                                        np.max(clusters_h[normal_idx] /
                                               raw_h))))
                            max_scale = min(
                                1.0,
                                -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM))
                            scales_down = pl.frange(min_scale, 0, 1.)
                            scales_up = pl.frange(0.5, max_scale, 0.5)
                            scales_pow = np.hstack((scales_down, scales_up))
                            scales = np.power(2.0, scales_pow)
                            return scales

                        scales = _calc_scales()
                        #start = time.time()

                        # initialize output
                        bboxes = np.empty(shape=(0, 5))

                        # process input at different scales
                        for s in scales:
                            #print("Processing {} at scale {:.4f}".format(fname, s))
                            img = cv2.resize(raw_img_f, (0, 0),
                                             fx=s,
                                             fy=s,
                                             interpolation=cv2.INTER_LINEAR)
                            img = img - average_image
                            img = img[np.newaxis, :]

                            # we don't run every template on every scale ids of templates to ignore
                            tids = list(range(4, 12)) + ([] if s <= 1.0 else
                                                         list(range(18, 25)))
                            ignoredTids = list(
                                set(range(0, clusters.shape[0])) - set(tids))

                            # run through the net
                            score_final_tf = sess.run(score_final,
                                                      feed_dict={x: img})

                            # collect scores
                            score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                                        25], score_final_tf[:, :, :,
                                                                                            25:
                                                                                            125]
                            prob_cls_tf = expit(score_cls_tf)
                            prob_cls_tf[0, :, :, ignoredTids] = 0.0

                            def _calc_bounding_boxes():
                                # threshold for detection
                                _, fy, fx, fc = np.where(
                                    prob_cls_tf > prob_thresh)

                                # interpret heatmap into bounding boxes
                                cy = fy * 8 - 1
                                cx = fx * 8 - 1
                                ch = clusters[fc, 3] - clusters[fc, 1] + 1
                                cw = clusters[fc, 2] - clusters[fc, 0] + 1

                                # extract bounding box refinement
                                Nt = clusters.shape[0]
                                tx = score_reg_tf[0, :, :, 0:Nt]
                                ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                                tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                                th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                                # refine bounding boxes
                                dcx = cw * tx[fy, fx, fc]
                                dcy = ch * ty[fy, fx, fc]
                                rcx = cx + dcx
                                rcy = cy + dcy
                                rcw = cw * np.exp(tw[fy, fx, fc])
                                rch = ch * np.exp(th[fy, fx, fc])

                                scores = score_cls_tf[0, fy, fx, fc]
                                tmp_bboxes = np.vstack(
                                    (rcx - rcw / 2, rcy - rch / 2,
                                     rcx + rcw / 2, rcy + rch / 2))
                                tmp_bboxes = np.vstack(
                                    (tmp_bboxes / s, scores))
                                tmp_bboxes = tmp_bboxes.transpose()
                                return tmp_bboxes

                            tmp_bboxes = _calc_bounding_boxes()
                            bboxes = np.vstack(
                                (bboxes,
                                 tmp_bboxes))  # <class 'tuple'>: (5265, 5)

                        #print("time {:.2f} secs for {}".format(time.time() - start, fname))

                        # non maximum suppression
                        # refind_idx = util.nms(bboxes, nms_thresh)
                        refind_idx = tf.image.non_max_suppression(
                            tf.convert_to_tensor(bboxes[:, :4],
                                                 dtype=tf.float32),
                            tf.convert_to_tensor(bboxes[:, 4],
                                                 dtype=tf.float32),
                            max_output_size=bboxes.shape[0],
                            iou_threshold=nms_thresh)
                        refind_idx = sess.run(refind_idx)
                        refined_bboxes = bboxes[refind_idx]

                        cut_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR)
                        cut_bounding_boxes(
                            cut_img, refined_bboxes, output_dir + '/' +
                            fname.split('.')[0] + '/' + str(index * per_fps))
                        overlay_bounding_boxes(raw_img, refined_bboxes, lw)

                        if display:
                            # plt.axis('off')
                            plt.imshow(raw_img)
                            plt.show()

                        # save image with bounding boxes
                        raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR)
                        video_writer.write(raw_img)
                success, frame = videoCapture.read()
            print("time {:.2f} secs for {}".format(time.time() - start, fname))
コード例 #6
0
def evaluate(weight_file_path,
             data_dir,
             output_dir,
             fps,
             prob_thresh=0.5,
             nms_thresh=0.1,
             lw=3,
             display=True):
    """Detect faces in images.
  Args:
    prob_thresh:
        The threshold of detection confidence.
    nms_thresh:
        The overlap threshold of non maximum suppression
    weight_file_path: 
        A pretrained weight file in the pickle format 
        generated by matconvnet_hr101_to_tf.py.
    data_dir: 
        A directory which contains images.
    output_dir: 
        A directory into which images with detected faces are output.
    lw: 
        Line width of bounding boxes. If zero specified,
        this is determined based on confidence of each detection.
    display:
        Display tiny face images on window.
  Returns:
    None.
  """
    # placeholder of input images. Currently batch size of one is supported.
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model(weight_file_path)
    score_final = model.tiny_face(x)

    saved_model = os.path.normpath('networks\\cifar100.h5')
    model_vgg = load_model(saved_model)

    my_layer = 'dense_15'

    intermediate_layer_model = Model(
        inputs=model_vgg.input, outputs=model_vgg.get_layer(my_layer).output)

    # Find image files in data_dir.
    filenames = []
    for ext in ('*.avi', '*.gif', '*.mp4', '*.wmv'):
        filenames.extend(glob.glob(os.path.join(data_dir, ext)))

    output_file = open("output_file.txt", "w+")

    for video in filenames:
        video_out_name = os.path.basename(video).replace('gif', 'avi', 1)
        video_out_name = os.path.join(output_dir, video_out_name)
        print(video_out_name)

        #Load the video
        video = cv2.VideoCapture(video)

        #buffer for traking faces
        distancias = []
        refined_bboxes_anterior = []
        faces = []

        #write video
        frame_width = 352
        frame_height = 240

        # Define the codec and create VideoWriter object.The output is stored in 'output.avi' file.
        video_out = cv2.VideoWriter(video_out_name,
                                    cv2.VideoWriter_fourcc(*'XVID'), fps,
                                    (frame_width, frame_height))

        # Load an average image and clusters(reference boxes of templates).
        with open(weight_file_path, "rb") as f:
            _, mat_params_dict = pickle.load(f)

        average_image = model.get_data_by_key("average_image")
        clusters = model.get_data_by_key("clusters")
        clusters_h = clusters[:, 3] - clusters[:, 1] + 1
        clusters_w = clusters[:, 2] - clusters[:, 0] + 1
        normal_idx = np.where(clusters[:, 4] == 1)
        n_frame = 0

        # main
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            try:
                while (video.isOpened()):
                    _, frame = video.read()
                    raw_img = frame
                    raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)
                    raw_img_f = raw_img.astype(np.float32)

                    def _calc_scales():
                        raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
                        min_scale = min(
                            np.floor(
                                np.log2(np.max(clusters_w[normal_idx] /
                                               raw_w))),
                            np.floor(
                                np.log2(np.max(clusters_h[normal_idx] /
                                               raw_h))))
                        max_scale = min(
                            1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM))
                        scales_down = pl.frange(min_scale, 0, 1.)
                        scales_up = pl.frange(0.5, max_scale, 0.5)
                        scales_pow = np.hstack((scales_down, scales_up))
                        scales = np.power(2.0, scales_pow)
                        return scales

                    scales = _calc_scales()
                    start = time.time()

                    # initialize output
                    bboxes = np.empty(shape=(0, 5))

                    # process input at different scales
                    for s in scales:

                        img = cv2.resize(raw_img_f, (0, 0),
                                         fx=s,
                                         fy=s,
                                         interpolation=cv2.INTER_LINEAR)
                        img = img - average_image
                        img = img[np.newaxis, :]

                        # we don't run every template on every scale ids of templates to ignore
                        tids = list(range(
                            4, 12)) + ([] if s <= 1.0 else list(range(18, 25)))
                        ignoredTids = list(
                            set(range(0, clusters.shape[0])) - set(tids))

                        # run through the net
                        score_final_tf = sess.run(score_final,
                                                  feed_dict={x: img})

                        # collect scores
                        score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                                    25], score_final_tf[:, :, :,
                                                                                        25:
                                                                                        125]
                        prob_cls_tf = expit(score_cls_tf)
                        prob_cls_tf[0, :, :, ignoredTids] = 0.0

                        def _calc_bounding_boxes():
                            # threshold for detection
                            _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh)

                            # interpret heatmap into bounding boxes
                            cy = fy * 8 - 1
                            cx = fx * 8 - 1
                            ch = clusters[fc, 3] - clusters[fc, 1] + 1
                            cw = clusters[fc, 2] - clusters[fc, 0] + 1

                            # extract bounding box refinement
                            Nt = clusters.shape[0]
                            tx = score_reg_tf[0, :, :, 0:Nt]
                            ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                            tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                            th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                            # refine bounding boxes
                            dcx = cw * tx[fy, fx, fc]
                            dcy = ch * ty[fy, fx, fc]
                            rcx = cx + dcx
                            rcy = cy + dcy
                            rcw = cw * np.exp(tw[fy, fx, fc])
                            rch = ch * np.exp(th[fy, fx, fc])

                            scores = score_cls_tf[0, fy, fx, fc]
                            tmp_bboxes = np.vstack(
                                (rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2,
                                 rcy + rch / 2))
                            tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
                            tmp_bboxes = tmp_bboxes.transpose()
                            return tmp_bboxes

                        tmp_bboxes = _calc_bounding_boxes()
                        bboxes = np.vstack(
                            (bboxes, tmp_bboxes))  # <class 'tuple'>: (5265, 5)

                    # non maximum suppression
                    # refind_idx = util.nms(bboxes, nms_thresh)

                    refind_idx = tf.image.non_max_suppression(
                        tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
                        tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
                        max_output_size=bboxes.shape[0],
                        iou_threshold=nms_thresh)
                    refind_idx = sess.run(refind_idx)
                    refined_bboxes = bboxes[refind_idx]
                    overlay_bounding_boxes(raw_img, refined_bboxes, lw)

                    #calcula a distância entre faces no frame atual e o frame anterior
                    #retorna uma matriz com duas colunas - o centroid 1 e o centroid 2
                    #dois pontos a distância entre esses pontos foi a movimentação da pessoa
                    something = get_distance_points(refined_bboxes,
                                                    refined_bboxes_anterior)

                    get_faces_distances(refined_bboxes, faces, n_frame)

                    draw_distance_labels_counter(faces, raw_img)
                    #junta as distância com um vetor de todas as distâncias já cálculadas
                    #distancias.append(something)

                    #desenha a distancia entre as faces
                    #a partir dos centroids das faces encotradas anteriormente
                    #draw_distance(raw_img, distancias)

                    #o frame atual se torna o anterior
                    refined_bboxes_anterior = refined_bboxes

                    # save image with bounding boxes
                    raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR)
                    video_out.write(raw_img)
                    n_frame = n_frame + 1

                    try:
                        print("time {:.2f} secs for {}_{}".format(
                            time.time() - start, 'frame', n_frame))
                    except Exception:
                        traceback.print_exc()

            except Exception:
                video.release()
                video_out.release()
                traceback.print_exc()

        video.release()
        video_out.release()

        output_file.write(video_out_name + " " + "Esperado: " + " " +
                          "Contado: " + str(len(faces)))

    output_file.close()
コード例 #7
0
ファイル: tiny_face_det.py プロジェクト: firebitsbr/MLTx2020
def evaluate(img_path, prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False):
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c
    model = tiny_face_model.Model('/path/to/pkl/file/')
    score_final = model.tiny_face(x)

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")

    # main
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        fname = img_path
        raw_img = cv2.imread(img_path)
        raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)
        raw_img_f = raw_img.astype(np.float32)

        scales = [0.5, 1, 1.5, 2.0]
        start = time.time()

        # initialize output
        bboxes = np.empty(shape=(0, 5))

        # process input at different scales
        for s in scales:
            print("Processing {} at scale {:.4f}".format(fname, s))
            img = cv2.resize(raw_img_f, (0, 0),
                             fx=s,
                             fy=s,
                             interpolation=cv2.INTER_LINEAR)
            img = img - average_image
            img = img[np.newaxis, :]

            # we don't run every template on every scale ids of templates to ignore
            tids = list(range(4,
                              12)) + ([] if s <= 1.0 else list(range(18, 25)))
            ignoredTids = list(set(range(0, clusters.shape[0])) - set(tids))

            # run through the net
            score_final_tf = sess.run(score_final, feed_dict={x: img})

            # collect scores
            score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                        25], score_final_tf[:, :, :,
                                                                            25:
                                                                            125]
            prob_cls_tf = expit(score_cls_tf)
            prob_cls_tf[0, :, :, ignoredTids] = 0.0

            def _calc_bounding_boxes():
                # threshold for detection
                _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh)

                # interpret heatmap into bounding boxes
                cy = fy * 8 - 1
                cx = fx * 8 - 1
                ch = clusters[fc, 3] - clusters[fc, 1] + 1
                cw = clusters[fc, 2] - clusters[fc, 0] + 1

                # extract bounding box refinement
                Nt = clusters.shape[0]
                tx = score_reg_tf[0, :, :, 0:Nt]
                ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                # refine bounding boxes
                dcx = cw * tx[fy, fx, fc]
                dcy = ch * ty[fy, fx, fc]
                rcx = cx + dcx
                rcy = cy + dcy
                rcw = cw * np.exp(tw[fy, fx, fc])
                rch = ch * np.exp(th[fy, fx, fc])

                scores = score_cls_tf[0, fy, fx, fc]
                tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2,
                                        rcx + rcw / 2, rcy + rch / 2))
                tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
                tmp_bboxes = tmp_bboxes.transpose()
                return tmp_bboxes

            tmp_bboxes = _calc_bounding_boxes()
            bboxes = np.vstack(
                (bboxes, tmp_bboxes))  # <class 'tuple'>: (5265, 5)

        print("time {:.2f} secs for {}".format(time.time() - start, fname))

        # non maximum suppression
        # refind_idx = util.nms(bboxes, nms_thresh)
        refind_idx = tf.image.non_max_suppression(
            tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
            tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
            max_output_size=bboxes.shape[0],
            iou_threshold=nms_thresh)
        refind_idx = sess.run(refind_idx)
        refined_bboxes = bboxes[refind_idx]
        return refined_bboxes
コード例 #8
0
def faceDetection_TinyFaces(weight_file_path,
                            videoFile,
                            sampling_interval,
                            detectionFrames,
                            prob_thresh=0.5,
                            nms_thresh=0.1,
                            lw=3,
                            display=False,
                            newScale=360):
    """Detect faces in images.
    Input:
    weight_file_path: 
        A pretrained weight file in the pickle format 
    videoFile: 
        path to video
    sampling_interval (float):
        interval in seconds at which face detection is performed
    detectionFrames (list of int):
        frames number for shot boundaries
    prob_thresh:
        The threshold of detection confidence.
    nms_thresh:
        The overlap threshold of non maximum suppression
    lw: 
        Line width of bounding boxes.
    display:
        Display tiny face images on window.
    newScale:
        Height which images are rescaled to before performing detection. Smaller sizes save processing time at cost of accuracy.

    Output : 
    detections (nested list of int):
        list of bounding boxes for detected faces in format [xmin, ymin, xmax, ymax]
    indices (list of int):
        frame number corresponding to bounding boxes in 'detections'

    """

    RESIZED_IMAGE_HEIGHT = newScale

    print('Resizing images to height ', RESIZED_IMAGE_HEIGHT)

    vid = cv2.VideoCapture(videoFile)
    frame_width = int(vid.get(3))
    frame_height = int(vid.get(4))
    total_frames = int(vid.get(7))
    FPS = vid.get(cv2.CAP_PROP_FPS)

    sampling_rate = int(FPS * sampling_interval)
    # TODO : may set adaptive sampling rate based on shot length

    print(videoFile, " : total_frames:", total_frames, ", FPS:", FPS, ' SR:',
          sampling_rate)

    detections = []
    # face detection bounding boxes
    indices = []
    # frame number for detection

    # placeholder of input images. Currently batch size of one is supported.
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model(weight_file_path)
    score_final = model.tiny_face(x)

    # Load an average image and clusters(reference boxes of templates).
    with open(weight_file_path, "rb") as f:
        _, mat_params_dict = pickle.load(f)

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")
    clusters_h = clusters[:, 3] - clusters[:, 1] + 1
    clusters_w = clusters[:, 2] - clusters[:, 0] + 1
    normal_idx = np.where(clusters[:, 4] == 1)

    # main
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for frameInd in range(0, total_frames):

            if (frameInd % (int(total_frames / 10)) == 0):
                print(str(int(frameInd / int(total_frames / 10))) + '0 %')

            ret, frame = vid.read()

            if (ret == False):
                continue

            # subsample frames
            if (frameInd % sampling_rate != 0
                    and (frameInd - 2) not in detectionFrames):
                continue

            raw_img = frame
            org_raw_img = frame[:]

            myScale = (float(RESIZED_IMAGE_HEIGHT) / raw_img.shape[0])

            # print ('org:', raw_img.shape)
            raw_img = cv2.resize(
                raw_img,
                (int(raw_img.shape[1] * myScale), RESIZED_IMAGE_HEIGHT),
                interpolation=cv2.INTER_CUBIC)
            # print ('res:',raw_img.shape)

            raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)
            raw_img_f = raw_img.astype(np.float32)

            def _calc_scales():
                raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
                min_scale = min(
                    np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))),
                    np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h))))
                max_scale = min(
                    1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM_TINYFACES))
                scales_down = pl.frange(min_scale, 0, 1.)
                scales_up = pl.frange(0.5, max_scale, 0.5)
                scales_pow = np.hstack((scales_down, scales_up))
                scales = np.power(2.0, scales_pow)
                return scales

            scales = _calc_scales()
            start = time.time()

            # initialize output
            bboxes = np.empty(shape=(0, 5))

            # process input at different scales
            for s in scales:
                # print("Processing {} at scale {:.4f}".format(str(frameInd), s))
                img = cv2.resize(raw_img_f, (0, 0),
                                 fx=s,
                                 fy=s,
                                 interpolation=cv2.INTER_LINEAR)
                img = img - average_image
                img = img[np.newaxis, :]

                # we don't run every template on every scale ids of templates to ignore
                tids = list(range(
                    4, 12)) + ([] if s <= 1.0 else list(range(18, 25)))
                ignoredTids = list(
                    set(range(0, clusters.shape[0])) - set(tids))

                # run through the net
                score_final_tf = sess.run(score_final, feed_dict={x: img})

                # collect scores
                score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                            25], score_final_tf[:, :, :,
                                                                                25:
                                                                                125]
                prob_cls_tf = expit(score_cls_tf)
                prob_cls_tf[0, :, :, ignoredTids] = 0.0

                def _calc_bounding_boxes():
                    # threshold for detection
                    _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh)

                    # interpret heatmap into bounding boxes
                    cy = fy * 8 - 1
                    cx = fx * 8 - 1
                    ch = clusters[fc, 3] - clusters[fc, 1] + 1
                    cw = clusters[fc, 2] - clusters[fc, 0] + 1

                    # extract bounding box refinement
                    Nt = clusters.shape[0]
                    tx = score_reg_tf[0, :, :, 0:Nt]
                    ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                    tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                    th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                    # refine bounding boxes
                    dcx = cw * tx[fy, fx, fc]
                    dcy = ch * ty[fy, fx, fc]
                    rcx = cx + dcx
                    rcy = cy + dcy
                    rcw = cw * np.exp(tw[fy, fx, fc])
                    rch = ch * np.exp(th[fy, fx, fc])

                    scores = score_cls_tf[0, fy, fx, fc]
                    tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2,
                                            rcx + rcw / 2, rcy + rch / 2))
                    tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
                    tmp_bboxes = tmp_bboxes.transpose()
                    return tmp_bboxes

                tmp_bboxes = _calc_bounding_boxes()
                bboxes = np.vstack(
                    (bboxes, tmp_bboxes))  # <class 'tuple'>: (5265, 5)

            print("Took {:.2f} secs for Frame {}".format(
                time.time() - start, str(frameInd)))

            # non maximum suppression
            # refind_idx = util.nms(bboxes, nms_thresh)
            refind_idx = tf.image.non_max_suppression(
                tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
                tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
                max_output_size=bboxes.shape[0],
                iou_threshold=nms_thresh)
            refind_idx = sess.run(refind_idx)
            refined_bboxes = bboxes[refind_idx]

            finalBboxes = []

            for ind_bb in range(len(refined_bboxes)):
                refined_bboxes[ind_bb][0] /= myScale
                refined_bboxes[ind_bb][1] /= myScale
                refined_bboxes[ind_bb][2] /= myScale
                refined_bboxes[ind_bb][3] /= myScale
                nbbox = list(refined_bboxes[ind_bb][0:4].astype(int))
                finalBboxes.append(nbbox)

            # print(refined_bboxes)

            # overlay_bounding_boxes(org_raw_img, refined_bboxes, lw)

            # if display:
            # plt.axis('off')

            # plt.imshow(org_raw_img)
            # plt.show()

            # showImg(org_raw_img, 50)

            # save image with bounding boxes
            # raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR)
            # cv2.imwrite(os.path.join(output_dir, str(ind)), raw_img)

            # faces = getFaceBoundingBoxesMTCNN(frame)
            detections.append(finalBboxes)
            indices.append(frameInd)

    return detections, indices
コード例 #9
0
def get_faceboxes(image, threshold=0.5, nms_thresh=0.1, lw=3):
    # placeholder of input images. Currently batch size of one is supported.
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model('assets/tiny_faces.pkl')
    score_final = model.tiny_face(x)

    # Load an average image and clusters(reference boxes of templates).
    with open('assets/tiny_faces.pkl', "rb") as f:
        _, mat_params_dict = pickle.load(f)

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")
    clusters_h = clusters[:, 3] - clusters[:, 1] + 1
    clusters_w = clusters[:, 2] - clusters[:, 0] + 1
    normal_idx = np.where(clusters[:, 4] == 1)

    # main
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        raw_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        raw_img_f = raw_img.astype(np.float32)

        def _calc_scales():
            raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
            min_scale = min(
                np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))),
                np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h))))
            max_scale = min(1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM))
            scales_down = pl.frange(min_scale, 0, 1.)
            scales_up = pl.frange(0.5, max_scale, 0.5)
            scales_pow = np.hstack((scales_down, scales_up))
            scales = np.power(2.0, scales_pow)
            return scales

        scales = _calc_scales()
        start = time.time()

        # initialize output
        bboxes = np.empty(shape=(0, 5))

        # process input at different scales
        for s in scales:
            print("Processing image at scale {:.4f}".format(s))
            img = cv2.resize(raw_img_f, (0, 0),
                             fx=s,
                             fy=s,
                             interpolation=cv2.INTER_LINEAR)
            img = img - average_image
            img = img[np.newaxis, :]

            # we don't run every template on every scale ids of templates to ignore
            tids = list(range(4,
                              12)) + ([] if s <= 1.0 else list(range(18, 25)))
            ignoredTids = list(set(range(0, clusters.shape[0])) - set(tids))

            # run through the net
            score_final_tf = sess.run(score_final, feed_dict={x: img})

            # collect scores
            score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                        25], score_final_tf[:, :, :,
                                                                            25:
                                                                            125]
            prob_cls_tf = expit(score_cls_tf)
            prob_cls_tf[0, :, :, ignoredTids] = 0.0

            def _calc_bounding_boxes():
                # threshold for detection
                _, fy, fx, fc = np.where(prob_cls_tf > threshold)

                # interpret heatmap into bounding boxes
                cy = fy * 8 - 1
                cx = fx * 8 - 1
                ch = clusters[fc, 3] - clusters[fc, 1] + 1
                cw = clusters[fc, 2] - clusters[fc, 0] + 1

                # extract bounding box refinement
                Nt = clusters.shape[0]
                tx = score_reg_tf[0, :, :, 0:Nt]
                ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                # refine bounding boxes
                dcx = cw * tx[fy, fx, fc]
                dcy = ch * ty[fy, fx, fc]
                rcx = cx + dcx
                rcy = cy + dcy
                rcw = cw * np.exp(tw[fy, fx, fc])
                rch = ch * np.exp(th[fy, fx, fc])

                scores = score_cls_tf[0, fy, fx, fc]
                tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2,
                                        rcx + rcw / 2, rcy + rch / 2))
                tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
                tmp_bboxes = tmp_bboxes.transpose()
                return tmp_bboxes

            tmp_bboxes = _calc_bounding_boxes()
            # <class 'tuple'>: (5265, 5)
            bboxes = np.vstack((bboxes, tmp_bboxes))

        refind_idx = tf.image.non_max_suppression(
            tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
            tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
            max_output_size=bboxes.shape[0],
            iou_threshold=nms_thresh)
        refind_idx = sess.run(refind_idx)
        refined_bboxes = bboxes[refind_idx]

    # Reset default graph
    tf.reset_default_graph()
    return refined_bboxes[:, :4].astype(np.int)
コード例 #10
0
def evaluate(weight_file_path,
             prob_thresh=0.1,
             nms_thresh=0.1,
             lw=3,
             display=False):
    """Detect faces in images.
  Args:
    prob_thresh:
        The threshold of detection confidence.
    nms_thresh:
        The overlap threshold of non maximum suppression
    weight_file_path: 
        A pretrained weight file in the pickle format 
        generated by matconvnet_hr101_to_tf.py.
    data_dir: 
        A directory which contains images.
    output_dir: 
        A directory into which images with detected faces are output.
    lw: 
        Line width of bounding boxes. If zero specified,
        this is determined based on confidence of each detection.
    display:
        Display tiny face images on window.
  Returns:
    None.
  """

    # placeholder of input images. Currently batch size of one is supported.
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model(weight_file_path)
    score_final = model.tiny_face(x)

    with open(weight_file_path, "rb") as f:
        _, mat_params_dict = pickle.load(f)

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")
    clusters_h = clusters[:, 3] - clusters[:, 1] + 1
    clusters_w = clusters[:, 2] - clusters[:, 0] + 1
    normal_idx = np.where(clusters[:, 4] == 1)

    # main
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        #for filename in filenames:
        #fname = filename.split(os.sep)[-1]
        video_capture = cv2.VideoCapture(
            '/home/sidhu/Desktop/Crowd_Count/input/video/poh.mp4')
        skip_frame = True
        f_no = 0
        x_arr = []
        y_arr = []
        while True:
            # Capture frame-by-frame
            ret, frame = video_capture.read()
            skip_frame = not skip_frame
            if not skip_frame:
                #print('skip')
                continue
            #print('not skip')
            f_no = f_no + 1
            fsec = f_no / 24
            x_arr.append(fsec)
            #raw_img = cv2.imread(filename)
            raw_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            raw_img_f = raw_img.astype(np.float32)

            def _calc_scales():
                raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
                min_scale = min(
                    np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))),
                    np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h))))
                max_scale = min(1.0,
                                -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM))
                scales_down = pl.frange(min_scale, 0, 1.)
                scales_up = pl.frange(0.5, max_scale, 0.5)
                scales_pow = np.hstack((scales_down, scales_up))
                scales = np.power(2.0, scales_pow)
                return scales

            scales = _calc_scales()
            start = time.time()

            # initialize output
            bboxes = np.empty(shape=(0, 5))

            # process input at different scales
            for s in scales:
                #print("Processing {} at scale {:.4f}".format(fname, s))
                img = cv2.resize(raw_img_f, (0, 0),
                                 fx=s,
                                 fy=s,
                                 interpolation=cv2.INTER_LINEAR)
                img = img - average_image
                img = img[np.newaxis, :]

                # we don't run every template on every scale ids of templates to ignore
                tids = list(range(
                    4, 12)) + ([] if s <= 1.0 else list(range(18, 25)))
                ignoredTids = list(
                    set(range(0, clusters.shape[0])) - set(tids))

                # run through the net
                score_final_tf = sess.run(score_final, feed_dict={x: img})

                # collect scores
                score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                            25], score_final_tf[:, :, :,
                                                                                25:
                                                                                125]
                prob_cls_tf = expit(score_cls_tf)
                prob_cls_tf[0, :, :, ignoredTids] = 0.0

                def _calc_bounding_boxes():
                    # threshold for detection
                    _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh)

                    # interpret heatmap into bounding boxes
                    cy = fy * 8 - 1
                    cx = fx * 8 - 1
                    ch = clusters[fc, 3] - clusters[fc, 1] + 1
                    cw = clusters[fc, 2] - clusters[fc, 0] + 1

                    # extract bounding box refinement
                    Nt = clusters.shape[0]
                    tx = score_reg_tf[0, :, :, 0:Nt]
                    ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                    tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                    th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                    # refine bounding boxes
                    dcx = cw * tx[fy, fx, fc]
                    dcy = ch * ty[fy, fx, fc]
                    rcx = cx + dcx
                    rcy = cy + dcy
                    rcw = cw * np.exp(tw[fy, fx, fc])
                    rch = ch * np.exp(th[fy, fx, fc])

                    scores = score_cls_tf[0, fy, fx, fc]
                    tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2,
                                            rcx + rcw / 2, rcy + rch / 2))
                    tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
                    tmp_bboxes = tmp_bboxes.transpose()
                    return tmp_bboxes

                tmp_bboxes = _calc_bounding_boxes()
                bboxes = np.vstack(
                    (bboxes, tmp_bboxes))  # <class 'tuple'>: (5265, 5)

            refind_idx = tf.image.non_max_suppression(
                tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
                tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
                max_output_size=bboxes.shape[0],
                iou_threshold=nms_thresh)
            refind_idx = sess.run(refind_idx)
            refined_bboxes = bboxes[refind_idx]
            overlay_bounding_boxes(raw_img, refined_bboxes, lw, y_arr)

            #print(x_arr,y_arr)

            # save image with bounding boxes
            raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR)
            # Display the resulting frame
            cv2.imshow('Video', raw_img)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        # When everything is done, release the capture
        video_capture.release()
        cv2.destroyAllWindows()
        plt.plot(x_arr, y_arr)
        plt.xlabel('Time (sec)')
        plt.ylabel('Count')
        plt.savefig('output/analytics/fig.png')
コード例 #11
0
def evaluate_and_crop(weight_file_path,
                      data_dir,
                      output_dir,
                      sample_ratio=0.1,
                      prob_thresh=0.5,
                      nms_thresh=0.1,
                      lw=3,
                      display=False):
    """Detect faces in images.
    Args:
      prob_thresh:
          The threshold of detection confidence.
      nms_thresh:
          The overlap threshold of non maximum suppression
      weight_file_path: 
          A pretrained weight file in the pickle format 
          generated by matconvnet_hr101_to_tf.py.
      data_dir: 
          A directory which contains images.
      output_dir: 
          A directory into which images with detected faces are output.
      lw: 
          Line width of bounding boxes. If zero specified,
          this is determined based on confidence of each detection.
      display:
          Display tiny face images on window.
    Returns:
      None.
    """

    # placeholder of input images. Currently batch size of one is supported.
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model(weight_file_path)
    score_final = model.tiny_face(x)

    # Find image files in data_dir.
    exts = ['png', 'jpg', 'jpeg']

    file_boxes = open(os.path.join(output_dir, "bboxes.csv"), "w")

    # Load an average image and clusters(reference boxes of templates).
    with open(weight_file_path, "rb") as f:
        _, mat_params_dict = pickle.load(f)

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")
    clusters_h = clusters[:, 3] - clusters[:, 1] + 1
    clusters_w = clusters[:, 2] - clusters[:, 0] + 1
    normal_idx = np.where(clusters[:, 4] == 1)

    # main
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        in_out = []
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        for dirname, dirnames, _ in os.walk(data_dir):
            for movie_dir in dirnames:
                current_dir = os.path.join(dirname, movie_dir)
                vid_paths = os.listdir(current_dir)
                current_out_dir = os.path.join(output_dir, movie_dir)
                os.mkdir(current_out_dir)
                for filename in vid_paths:
                    file_path = os.path.join(current_dir, filename)
                    in_out.append((file_path, current_out_dir))

        print(in_out)
        in_out = [(x, y) for (x, y) in in_out if x.split('.')[-1] in exts]
        in_out = random.sample(in_out, int(sample_ratio * len(in_out)))
        print(in_out)

        for (filename, out_dir) in in_out:
            print("Processing... ", filename)
            fname = filename.split(os.sep)[-1]
            raw_img = cv2.imread(filename)
            if not (type(raw_img) is np.ndarray and raw_img.shape[0] > 10
                    and raw_img.shape[1] > 10 and raw_img.shape[2] == 3):
                continue
            raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)
            raw_img_f = raw_img.astype(np.float32)

            def _calc_scales():
                raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
                min_scale = min(
                    np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))),
                    np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h))))
                max_scale = min(1.0,
                                -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM))
                scales_down = pl.frange(min_scale, 0, 1.)
                scales_up = pl.frange(0.5, max_scale, 0.5)
                scales_pow = np.hstack((scales_down, scales_up))
                scales = np.power(2.0, scales_pow)
                return scales

            scales = _calc_scales()
            start = time.time()

            # initialize output
            bboxes = np.empty(shape=(0, 5))

            # process input at different scales
            for s in scales:
                print("Processing {} at scale {:.4f}".format(fname, s))
                img = cv2.resize(raw_img_f, (0, 0),
                                 fx=s,
                                 fy=s,
                                 interpolation=cv2.INTER_LINEAR)
                img = img - average_image
                img = img[np.newaxis, :]

                # we don't run every template on every scale ids of templates to ignore
                tids = list(range(
                    4, 12)) + ([] if s <= 1.0 else list(range(18, 25)))
                ignoredTids = list(
                    set(range(0, clusters.shape[0])) - set(tids))

                # run through the net
                score_final_tf = sess.run(score_final, feed_dict={x: img})

                # collect scores
                score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                            25], score_final_tf[:, :, :,
                                                                                25:
                                                                                125]
                prob_cls_tf = expit(score_cls_tf)
                prob_cls_tf[0, :, :, ignoredTids] = 0.0

                def _calc_bounding_boxes():
                    # threshold for detection
                    _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh)

                    # interpret heatmap into bounding boxes
                    cy = fy * 8 - 1
                    cx = fx * 8 - 1
                    ch = clusters[fc, 3] - clusters[fc, 1] + 1
                    cw = clusters[fc, 2] - clusters[fc, 0] + 1

                    # extract bounding box refinement
                    Nt = clusters.shape[0]
                    tx = score_reg_tf[0, :, :, 0:Nt]
                    ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                    tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                    th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                    # refine bounding boxes
                    dcx = cw * tx[fy, fx, fc]
                    dcy = ch * ty[fy, fx, fc]
                    rcx = cx + dcx
                    rcy = cy + dcy
                    rcw = cw * np.exp(tw[fy, fx, fc])
                    rch = ch * np.exp(th[fy, fx, fc])

                    scores = score_cls_tf[0, fy, fx, fc]
                    tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2,
                                            rcx + rcw / 2, rcy + rch / 2))
                    tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
                    tmp_bboxes = tmp_bboxes.transpose()
                    return tmp_bboxes

                tmp_bboxes = _calc_bounding_boxes()
                # <class 'tuple'>: (5265, 5)
                bboxes = np.vstack((bboxes, tmp_bboxes))

            print("time {:.2f} secs for {}".format(time.time() - start, fname))

            # non maximum suppression
            # refind_idx = util.nms(bboxes, nms_thresh)
            refind_idx = tf.image.non_max_suppression(
                tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
                tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
                max_output_size=bboxes.shape[0],
                iou_threshold=nms_thresh)
            refind_idx = sess.run(refind_idx)
            refined_bboxes = bboxes[refind_idx]
            print("bboxes", refined_bboxes)
            cropped = [(crop_image(raw_img, bbox), bbox)
                       for bbox in refined_bboxes]
            #overlay_bounding_boxes(raw_img, refined_bboxes, lw)

            if display:
                # plt.axis('off')
                plt.imshow(raw_img)
                plt.show()

            for i, (img, bbox) in zip(range(len(cropped)), cropped):
                cimig = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
                cv2.imwrite(os.path.join(out_dir, str(i) + "-" + fname), cimig)
                file_boxes.write(
                    os.path.join(out_dir,
                                 str(i) + "-" + fname) + "," +
                    ",".join([str(int(x)) for x in bbox]) + "\n")

    file_boxes.close()
コード例 #12
0
def evaluate(weight_file_path,
             data_dir,
             output_dir,
             prob_thresh=0.5,
             nms_thresh=0.1,
             lw=3,
             display=False):

    # placeholder of input images. Currently batch size of one is supported.
    x = tf.placeholder(tf.float32, [1, None, None, 3])  # n, h, w, c

    # Create the tiny face model which weights are loaded from a pretrained model.
    model = tiny_face_model.Model(weight_file_path)
    score_final = model.tiny_face(x)

    # Find image files in data_dir.
    filenames = []
    for ext in ('*.png', '*.gif', '*.jpg', '*.jpeg'):
        filenames.extend(glob.glob(os.path.join(data_dir, ext)))

    # Load an average image and clusters(reference boxes of templates).
    with open(weight_file_path, "rb") as f:
        _, mat_params_dict = pickle.load(f)

    average_image = model.get_data_by_key("average_image")
    clusters = model.get_data_by_key("clusters")
    clusters_h = clusters[:, 3] - clusters[:, 1] + 1
    clusters_w = clusters[:, 2] - clusters[:, 0] + 1
    normal_idx = np.where(clusters[:, 4] == 1)

    # main
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        _results = []

        for filename in filenames:
            fname = filename.split(os.sep)[-1]
            raw_img = cv2.imread(filename)
            raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB)
            raw_img_f = raw_img.astype(np.float32)

            def _calc_scales():
                raw_h, raw_w = raw_img.shape[0], raw_img.shape[1]
                min_scale = min(
                    np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))),
                    np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h))))
                max_scale = min(1.0,
                                -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM))
                scales_down = pl.frange(min_scale, 0, 1.)
                scales_up = pl.frange(0.5, max_scale, 0.5)
                scales_pow = np.hstack((scales_down, scales_up))
                scales = np.power(2.0, scales_pow)
                return scales

            scales = _calc_scales()
            start = time.time()

            # initialize output
            bboxes = np.empty(shape=(0, 5))

            # process input at different scales
            for s in scales:
                print("Processing {} at scale {:.4f}".format(fname, s))
                img = cv2.resize(raw_img_f, (0, 0),
                                 fx=s,
                                 fy=s,
                                 interpolation=cv2.INTER_LINEAR)
                img = img - average_image
                img = img[np.newaxis, :]

                # we don't run every template on every scale ids of templates to ignore
                tids = list(range(
                    4, 12)) + ([] if s <= 1.0 else list(range(18, 25)))
                ignoredTids = list(
                    set(range(0, clusters.shape[0])) - set(tids))

                # run through the net
                score_final_tf = sess.run(score_final, feed_dict={x: img})

                # collect scores
                score_cls_tf, score_reg_tf = score_final_tf[:, :, :, :
                                                            25], score_final_tf[:, :, :,
                                                                                25:
                                                                                125]
                prob_cls_tf = expit(score_cls_tf)
                prob_cls_tf[0, :, :, ignoredTids] = 0.0

                def _calc_bounding_boxes():
                    # threshold for detection
                    _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh)

                    # interpret heatmap into bounding boxes
                    cy = fy * 8 - 1
                    cx = fx * 8 - 1
                    ch = clusters[fc, 3] - clusters[fc, 1] + 1
                    cw = clusters[fc, 2] - clusters[fc, 0] + 1

                    # extract bounding box refinement
                    Nt = clusters.shape[0]
                    tx = score_reg_tf[0, :, :, 0:Nt]
                    ty = score_reg_tf[0, :, :, Nt:2 * Nt]
                    tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt]
                    th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt]

                    # refine bounding boxes
                    dcx = cw * tx[fy, fx, fc]
                    dcy = ch * ty[fy, fx, fc]
                    rcx = cx + dcx
                    rcy = cy + dcy
                    rcw = cw * np.exp(tw[fy, fx, fc])
                    rch = ch * np.exp(th[fy, fx, fc])

                    scores = score_cls_tf[0, fy, fx, fc]
                    tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2,
                                            rcx + rcw / 2, rcy + rch / 2))
                    tmp_bboxes = np.vstack((tmp_bboxes / s, scores))
                    tmp_bboxes = tmp_bboxes.transpose()
                    return tmp_bboxes

                tmp_bboxes = _calc_bounding_boxes()
                bboxes = np.vstack(
                    (bboxes, tmp_bboxes))  # <class 'tuple'>: (5265, 5)

            print("time {:.2f} secs for {}".format(time.time() - start, fname))

            # non maximum suppression
            # refind_idx = util.nms(bboxes, nms_thresh)
            refind_idx = tf.image.non_max_suppression(
                tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32),
                tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32),
                max_output_size=bboxes.shape[0],
                iou_threshold=nms_thresh)
            refind_idx = sess.run(refind_idx)
            refined_bboxes = bboxes[refind_idx]

            _result = []
            for r in refined_bboxes:
                _score = expit(r[4])
                _r = [int(x) for x in r[:4]]
                print("{} {} {} {} {}".format(_score, _r[0], _r[1], _r[2],
                                              _r[3]))
                _result.append([_r[0], _r[1], _r[2], _r[3], _score])
                pass

            overlay_bounding_boxes(raw_img, refined_bboxes, lw)

            if display:
                # plt.axis('off')
                plt.imshow(raw_img)
                plt.show()

            # save image with bounding boxes
            raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR)
            cv2.imwrite(os.path.join(output_dir, fname), raw_img)
            _results.append(_result)

            pass

        return _results

    pass