def __init__(self, weight_file_path, session, prob_threshold=0.5, nms_threshold=0.1): self.sess = session self.prob_thresh = prob_threshold self.nms_thresh = nms_threshold # Create the tiny face model which weights are loaded from a pretrained model. self.model = tiny_face_model.Model(weight_file_path) # placeholder of input images. Currently batch size of one is supported. self.x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c self.score_final = self.model.tiny_face(self.x) # intialise variables self.sess.run(tf.global_variables_initializer()) # Load an average image and clusters(reference boxes of templates). with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) self.average_image = self.model.get_data_by_key("average_image") self.clusters = self.model.get_data_by_key("clusters") self.clusters_h = self.clusters[:, 3] - self.clusters[:, 1] + 1 self.clusters_w = self.clusters[:, 2] - self.clusters[:, 0] + 1 self.normal_idx = np.where(self.clusters[:, 4] == 1)
def init(sess, weight_file_path): # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(x) sess.run(tf.global_variables_initializer()) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) return model, average_image, clusters, clusters_h, clusters_w, normal_idx, score_final, x
model_file = 'data/models/smoking-faces-output-graph.pb' label_file = "data/models/smoking-faces-output-graph.txt" graph = load_graph(model_file) """ initialize tensorflow face detection """ weight_file_path = 'data/models/hr_res101' face_detection_graph = tf.Graph().as_default() # placeholder of input images. Currently batch size of one is supported. face_detection_x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(face_detection_x) # Load an average image and clusters(reference boxes of templates). with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) face_detection_sess = tf.Session() face_detection_sess.run(tf.global_variables_initializer())
def evaluate(weight_file_path, data_dir, output_dir, prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False): """Detect faces in images. Args: prob_thresh: The threshold of detection confidence. nms_thresh: The overlap threshold of non maximum suppression weight_file_path: A pretrained weight file in the pickle format generated by matconvnet_hr101_to_tf.py. data_dir: A directory which contains images. output_dir: A directory into which images with detected faces are output. lw: Line width of bounding boxes. If zero specified, this is determined based on confidence of each detection. display: Display tiny face images on window. Returns: None. """ # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(x) # Find image files in data_dir. # filenames = [] # for ext in ('*.png', '*.gif', '*.jpg', '*.jpeg'): # filenames.extend(glob.glob(os.path.join(data_dir, ext))) filenames = data_dir # Load an average image and clusters(reference boxes of templates). with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) # main with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for filename in filenames: start = time.time() fname = filename.split(os.sep)[-1] #print(filename) raw_img = cv2.imread(filename) #print(type(raw_img)) img_xsize = raw_img raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) def _calc_scales(): raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] min_scale = min( np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))), np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h)))) max_scale = min(1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM)) scales_down = pl.frange(min_scale, 0, 1.) scales_up = pl.frange(0.5, max_scale, 0.5) scales_pow = np.hstack((scales_down, scales_up)) scales = np.power(2.0, scales_pow) return scales scales = _calc_scales() #start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: #print("Processing {} at scale {:.4f}".format(fname, s)) img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range( 4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list( set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() bboxes = np.vstack( (bboxes, tmp_bboxes)) # <class 'tuple'>: (5265, 5) #print("time {:.2f} secs for {}".format(time.time() - start, fname)) # non maximum suppression # refind_idx = util.nms(bboxes, nms_thresh) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] # if not refined_bboxes.any(): # print("No Faces!") # return face_list, Lavg, Wavg = overlay_bounding_boxes( img_xsize, refined_bboxes, lw) if display: # plt.axis('off') plt.imshow(raw_img) plt.show() # save image with bounding boxes raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR) cv2.imwrite(os.path.join(output_dir, fname), raw_img) main_img_name = fname.split('.')[0] #print("Total Faces: ", len(face_list)) num_faces_taken = crop_faces_save(img_xsize, face_list, main_img_name) crop_nonfaces_save(img_xsize, face_list, Lavg, Wavg, main_img_name, num_faces_taken) print("time {:.2f} secs for {}".format(time.time() - start, fname))
def evaluate(weight_file_path, data_dir, output_dir, prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False): """Detect faces in images. Args: prob_thresh: The threshold of detection confidence. nms_thresh: The overlap threshold of non maximum suppression weight_file_path: A pretrained weight file in the pickle format generated by matconvnet_hr101_to_tf.py. data_dir: A directory which contains images. output_dir: A directory into which images with detected faces are output. lw: Line width of bounding boxes. If zero specified, this is determined based on confidence of each detection. display: Display tiny face images on window. Returns: None. """ # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(x) # Find image files in data_dir. filenames = [] for ext in ('*.mp4', '*.avi', '*.ts'): filenames.extend(glob.glob(os.path.join(data_dir, ext))) # Load an average image and clusters(reference boxes of templates). with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.9 #config.gpu_options.allow_growth = True # main with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for filename in filenames: videoCapture = cv2.VideoCapture(filename) frame_numbers = videoCapture.get(cv2.CAP_PROP_FRAME_COUNT) fps = videoCapture.get(cv2.CAP_PROP_FPS) success, frame = videoCapture.read() out_video_size = (int(videoCapture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(videoCapture.get(cv2.CAP_PROP_FRAME_HEIGHT))) fname = filename.split(os.sep)[-1] print("Processing {} ".format(fname)) if not os.path.exists(output_dir): os.makedirs(output_dir) out_path = output_dir + '/' + fname.split('.')[0] + '.avi' video_writer = cv2.VideoWriter( out_path, cv2.VideoWriter_fourcc('M', 'P', 'E', 'G'), fps, out_video_size) test_bar = tqdm(range(int(frame_numbers)), desc='[processing video and saving result videos]') per_fps = fps // 2 start = time.time() for index in test_bar: if success: if (index % per_fps == 0): ########################################################################### raw_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) def _calc_scales(): raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] min_scale = min( np.floor( np.log2( np.max(clusters_w[normal_idx] / raw_w))), np.floor( np.log2( np.max(clusters_h[normal_idx] / raw_h)))) max_scale = min( 1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM)) scales_down = pl.frange(min_scale, 0, 1.) scales_up = pl.frange(0.5, max_scale, 0.5) scales_pow = np.hstack((scales_down, scales_up)) scales = np.power(2.0, scales_pow) return scales scales = _calc_scales() #start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: #print("Processing {} at scale {:.4f}".format(fname, s)) img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range(4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list( set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where( prob_cls_tf > prob_thresh) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack( (rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack( (tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() bboxes = np.vstack( (bboxes, tmp_bboxes)) # <class 'tuple'>: (5265, 5) #print("time {:.2f} secs for {}".format(time.time() - start, fname)) # non maximum suppression # refind_idx = util.nms(bboxes, nms_thresh) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] cut_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR) cut_bounding_boxes( cut_img, refined_bboxes, output_dir + '/' + fname.split('.')[0] + '/' + str(index * per_fps)) overlay_bounding_boxes(raw_img, refined_bboxes, lw) if display: # plt.axis('off') plt.imshow(raw_img) plt.show() # save image with bounding boxes raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR) video_writer.write(raw_img) success, frame = videoCapture.read() print("time {:.2f} secs for {}".format(time.time() - start, fname))
def evaluate(weight_file_path, data_dir, output_dir, fps, prob_thresh=0.5, nms_thresh=0.1, lw=3, display=True): """Detect faces in images. Args: prob_thresh: The threshold of detection confidence. nms_thresh: The overlap threshold of non maximum suppression weight_file_path: A pretrained weight file in the pickle format generated by matconvnet_hr101_to_tf.py. data_dir: A directory which contains images. output_dir: A directory into which images with detected faces are output. lw: Line width of bounding boxes. If zero specified, this is determined based on confidence of each detection. display: Display tiny face images on window. Returns: None. """ # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(x) saved_model = os.path.normpath('networks\\cifar100.h5') model_vgg = load_model(saved_model) my_layer = 'dense_15' intermediate_layer_model = Model( inputs=model_vgg.input, outputs=model_vgg.get_layer(my_layer).output) # Find image files in data_dir. filenames = [] for ext in ('*.avi', '*.gif', '*.mp4', '*.wmv'): filenames.extend(glob.glob(os.path.join(data_dir, ext))) output_file = open("output_file.txt", "w+") for video in filenames: video_out_name = os.path.basename(video).replace('gif', 'avi', 1) video_out_name = os.path.join(output_dir, video_out_name) print(video_out_name) #Load the video video = cv2.VideoCapture(video) #buffer for traking faces distancias = [] refined_bboxes_anterior = [] faces = [] #write video frame_width = 352 frame_height = 240 # Define the codec and create VideoWriter object.The output is stored in 'output.avi' file. video_out = cv2.VideoWriter(video_out_name, cv2.VideoWriter_fourcc(*'XVID'), fps, (frame_width, frame_height)) # Load an average image and clusters(reference boxes of templates). with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) n_frame = 0 # main with tf.Session() as sess: sess.run(tf.global_variables_initializer()) try: while (video.isOpened()): _, frame = video.read() raw_img = frame raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) def _calc_scales(): raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] min_scale = min( np.floor( np.log2(np.max(clusters_w[normal_idx] / raw_w))), np.floor( np.log2(np.max(clusters_h[normal_idx] / raw_h)))) max_scale = min( 1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM)) scales_down = pl.frange(min_scale, 0, 1.) scales_up = pl.frange(0.5, max_scale, 0.5) scales_pow = np.hstack((scales_down, scales_up)) scales = np.power(2.0, scales_pow) return scales scales = _calc_scales() start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range( 4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list( set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack( (rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() bboxes = np.vstack( (bboxes, tmp_bboxes)) # <class 'tuple'>: (5265, 5) # non maximum suppression # refind_idx = util.nms(bboxes, nms_thresh) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] overlay_bounding_boxes(raw_img, refined_bboxes, lw) #calcula a distância entre faces no frame atual e o frame anterior #retorna uma matriz com duas colunas - o centroid 1 e o centroid 2 #dois pontos a distância entre esses pontos foi a movimentação da pessoa something = get_distance_points(refined_bboxes, refined_bboxes_anterior) get_faces_distances(refined_bboxes, faces, n_frame) draw_distance_labels_counter(faces, raw_img) #junta as distância com um vetor de todas as distâncias já cálculadas #distancias.append(something) #desenha a distancia entre as faces #a partir dos centroids das faces encotradas anteriormente #draw_distance(raw_img, distancias) #o frame atual se torna o anterior refined_bboxes_anterior = refined_bboxes # save image with bounding boxes raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR) video_out.write(raw_img) n_frame = n_frame + 1 try: print("time {:.2f} secs for {}_{}".format( time.time() - start, 'frame', n_frame)) except Exception: traceback.print_exc() except Exception: video.release() video_out.release() traceback.print_exc() video.release() video_out.release() output_file.write(video_out_name + " " + "Esperado: " + " " + "Contado: " + str(len(faces))) output_file.close()
def evaluate(img_path, prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False): x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c model = tiny_face_model.Model('/path/to/pkl/file/') score_final = model.tiny_face(x) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") # main with tf.Session() as sess: sess.run(tf.global_variables_initializer()) fname = img_path raw_img = cv2.imread(img_path) raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) scales = [0.5, 1, 1.5, 2.0] start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: print("Processing {} at scale {:.4f}".format(fname, s)) img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range(4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list(set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() bboxes = np.vstack( (bboxes, tmp_bboxes)) # <class 'tuple'>: (5265, 5) print("time {:.2f} secs for {}".format(time.time() - start, fname)) # non maximum suppression # refind_idx = util.nms(bboxes, nms_thresh) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] return refined_bboxes
def faceDetection_TinyFaces(weight_file_path, videoFile, sampling_interval, detectionFrames, prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False, newScale=360): """Detect faces in images. Input: weight_file_path: A pretrained weight file in the pickle format videoFile: path to video sampling_interval (float): interval in seconds at which face detection is performed detectionFrames (list of int): frames number for shot boundaries prob_thresh: The threshold of detection confidence. nms_thresh: The overlap threshold of non maximum suppression lw: Line width of bounding boxes. display: Display tiny face images on window. newScale: Height which images are rescaled to before performing detection. Smaller sizes save processing time at cost of accuracy. Output : detections (nested list of int): list of bounding boxes for detected faces in format [xmin, ymin, xmax, ymax] indices (list of int): frame number corresponding to bounding boxes in 'detections' """ RESIZED_IMAGE_HEIGHT = newScale print('Resizing images to height ', RESIZED_IMAGE_HEIGHT) vid = cv2.VideoCapture(videoFile) frame_width = int(vid.get(3)) frame_height = int(vid.get(4)) total_frames = int(vid.get(7)) FPS = vid.get(cv2.CAP_PROP_FPS) sampling_rate = int(FPS * sampling_interval) # TODO : may set adaptive sampling rate based on shot length print(videoFile, " : total_frames:", total_frames, ", FPS:", FPS, ' SR:', sampling_rate) detections = [] # face detection bounding boxes indices = [] # frame number for detection # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(x) # Load an average image and clusters(reference boxes of templates). with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) # main with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for frameInd in range(0, total_frames): if (frameInd % (int(total_frames / 10)) == 0): print(str(int(frameInd / int(total_frames / 10))) + '0 %') ret, frame = vid.read() if (ret == False): continue # subsample frames if (frameInd % sampling_rate != 0 and (frameInd - 2) not in detectionFrames): continue raw_img = frame org_raw_img = frame[:] myScale = (float(RESIZED_IMAGE_HEIGHT) / raw_img.shape[0]) # print ('org:', raw_img.shape) raw_img = cv2.resize( raw_img, (int(raw_img.shape[1] * myScale), RESIZED_IMAGE_HEIGHT), interpolation=cv2.INTER_CUBIC) # print ('res:',raw_img.shape) raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) def _calc_scales(): raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] min_scale = min( np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))), np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h)))) max_scale = min( 1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM_TINYFACES)) scales_down = pl.frange(min_scale, 0, 1.) scales_up = pl.frange(0.5, max_scale, 0.5) scales_pow = np.hstack((scales_down, scales_up)) scales = np.power(2.0, scales_pow) return scales scales = _calc_scales() start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: # print("Processing {} at scale {:.4f}".format(str(frameInd), s)) img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range( 4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list( set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() bboxes = np.vstack( (bboxes, tmp_bboxes)) # <class 'tuple'>: (5265, 5) print("Took {:.2f} secs for Frame {}".format( time.time() - start, str(frameInd))) # non maximum suppression # refind_idx = util.nms(bboxes, nms_thresh) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] finalBboxes = [] for ind_bb in range(len(refined_bboxes)): refined_bboxes[ind_bb][0] /= myScale refined_bboxes[ind_bb][1] /= myScale refined_bboxes[ind_bb][2] /= myScale refined_bboxes[ind_bb][3] /= myScale nbbox = list(refined_bboxes[ind_bb][0:4].astype(int)) finalBboxes.append(nbbox) # print(refined_bboxes) # overlay_bounding_boxes(org_raw_img, refined_bboxes, lw) # if display: # plt.axis('off') # plt.imshow(org_raw_img) # plt.show() # showImg(org_raw_img, 50) # save image with bounding boxes # raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR) # cv2.imwrite(os.path.join(output_dir, str(ind)), raw_img) # faces = getFaceBoundingBoxesMTCNN(frame) detections.append(finalBboxes) indices.append(frameInd) return detections, indices
def get_faceboxes(image, threshold=0.5, nms_thresh=0.1, lw=3): # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model('assets/tiny_faces.pkl') score_final = model.tiny_face(x) # Load an average image and clusters(reference boxes of templates). with open('assets/tiny_faces.pkl', "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) # main with tf.Session() as sess: sess.run(tf.global_variables_initializer()) raw_img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) def _calc_scales(): raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] min_scale = min( np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))), np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h)))) max_scale = min(1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM)) scales_down = pl.frange(min_scale, 0, 1.) scales_up = pl.frange(0.5, max_scale, 0.5) scales_pow = np.hstack((scales_down, scales_up)) scales = np.power(2.0, scales_pow) return scales scales = _calc_scales() start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: print("Processing image at scale {:.4f}".format(s)) img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range(4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list(set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where(prob_cls_tf > threshold) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() # <class 'tuple'>: (5265, 5) bboxes = np.vstack((bboxes, tmp_bboxes)) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] # Reset default graph tf.reset_default_graph() return refined_bboxes[:, :4].astype(np.int)
def evaluate(weight_file_path, prob_thresh=0.1, nms_thresh=0.1, lw=3, display=False): """Detect faces in images. Args: prob_thresh: The threshold of detection confidence. nms_thresh: The overlap threshold of non maximum suppression weight_file_path: A pretrained weight file in the pickle format generated by matconvnet_hr101_to_tf.py. data_dir: A directory which contains images. output_dir: A directory into which images with detected faces are output. lw: Line width of bounding boxes. If zero specified, this is determined based on confidence of each detection. display: Display tiny face images on window. Returns: None. """ # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(x) with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) # main with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #for filename in filenames: #fname = filename.split(os.sep)[-1] video_capture = cv2.VideoCapture( '/home/sidhu/Desktop/Crowd_Count/input/video/poh.mp4') skip_frame = True f_no = 0 x_arr = [] y_arr = [] while True: # Capture frame-by-frame ret, frame = video_capture.read() skip_frame = not skip_frame if not skip_frame: #print('skip') continue #print('not skip') f_no = f_no + 1 fsec = f_no / 24 x_arr.append(fsec) #raw_img = cv2.imread(filename) raw_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) def _calc_scales(): raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] min_scale = min( np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))), np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h)))) max_scale = min(1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM)) scales_down = pl.frange(min_scale, 0, 1.) scales_up = pl.frange(0.5, max_scale, 0.5) scales_pow = np.hstack((scales_down, scales_up)) scales = np.power(2.0, scales_pow) return scales scales = _calc_scales() start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: #print("Processing {} at scale {:.4f}".format(fname, s)) img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range( 4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list( set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() bboxes = np.vstack( (bboxes, tmp_bboxes)) # <class 'tuple'>: (5265, 5) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] overlay_bounding_boxes(raw_img, refined_bboxes, lw, y_arr) #print(x_arr,y_arr) # save image with bounding boxes raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR) # Display the resulting frame cv2.imshow('Video', raw_img) if cv2.waitKey(1) & 0xFF == ord('q'): break # When everything is done, release the capture video_capture.release() cv2.destroyAllWindows() plt.plot(x_arr, y_arr) plt.xlabel('Time (sec)') plt.ylabel('Count') plt.savefig('output/analytics/fig.png')
def evaluate_and_crop(weight_file_path, data_dir, output_dir, sample_ratio=0.1, prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False): """Detect faces in images. Args: prob_thresh: The threshold of detection confidence. nms_thresh: The overlap threshold of non maximum suppression weight_file_path: A pretrained weight file in the pickle format generated by matconvnet_hr101_to_tf.py. data_dir: A directory which contains images. output_dir: A directory into which images with detected faces are output. lw: Line width of bounding boxes. If zero specified, this is determined based on confidence of each detection. display: Display tiny face images on window. Returns: None. """ # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(x) # Find image files in data_dir. exts = ['png', 'jpg', 'jpeg'] file_boxes = open(os.path.join(output_dir, "bboxes.csv"), "w") # Load an average image and clusters(reference boxes of templates). with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) # main with tf.Session() as sess: sess.run(tf.global_variables_initializer()) in_out = [] if not os.path.isdir(output_dir): os.mkdir(output_dir) for dirname, dirnames, _ in os.walk(data_dir): for movie_dir in dirnames: current_dir = os.path.join(dirname, movie_dir) vid_paths = os.listdir(current_dir) current_out_dir = os.path.join(output_dir, movie_dir) os.mkdir(current_out_dir) for filename in vid_paths: file_path = os.path.join(current_dir, filename) in_out.append((file_path, current_out_dir)) print(in_out) in_out = [(x, y) for (x, y) in in_out if x.split('.')[-1] in exts] in_out = random.sample(in_out, int(sample_ratio * len(in_out))) print(in_out) for (filename, out_dir) in in_out: print("Processing... ", filename) fname = filename.split(os.sep)[-1] raw_img = cv2.imread(filename) if not (type(raw_img) is np.ndarray and raw_img.shape[0] > 10 and raw_img.shape[1] > 10 and raw_img.shape[2] == 3): continue raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) def _calc_scales(): raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] min_scale = min( np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))), np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h)))) max_scale = min(1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM)) scales_down = pl.frange(min_scale, 0, 1.) scales_up = pl.frange(0.5, max_scale, 0.5) scales_pow = np.hstack((scales_down, scales_up)) scales = np.power(2.0, scales_pow) return scales scales = _calc_scales() start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: print("Processing {} at scale {:.4f}".format(fname, s)) img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range( 4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list( set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() # <class 'tuple'>: (5265, 5) bboxes = np.vstack((bboxes, tmp_bboxes)) print("time {:.2f} secs for {}".format(time.time() - start, fname)) # non maximum suppression # refind_idx = util.nms(bboxes, nms_thresh) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] print("bboxes", refined_bboxes) cropped = [(crop_image(raw_img, bbox), bbox) for bbox in refined_bboxes] #overlay_bounding_boxes(raw_img, refined_bboxes, lw) if display: # plt.axis('off') plt.imshow(raw_img) plt.show() for i, (img, bbox) in zip(range(len(cropped)), cropped): cimig = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) cv2.imwrite(os.path.join(out_dir, str(i) + "-" + fname), cimig) file_boxes.write( os.path.join(out_dir, str(i) + "-" + fname) + "," + ",".join([str(int(x)) for x in bbox]) + "\n") file_boxes.close()
def evaluate(weight_file_path, data_dir, output_dir, prob_thresh=0.5, nms_thresh=0.1, lw=3, display=False): # placeholder of input images. Currently batch size of one is supported. x = tf.placeholder(tf.float32, [1, None, None, 3]) # n, h, w, c # Create the tiny face model which weights are loaded from a pretrained model. model = tiny_face_model.Model(weight_file_path) score_final = model.tiny_face(x) # Find image files in data_dir. filenames = [] for ext in ('*.png', '*.gif', '*.jpg', '*.jpeg'): filenames.extend(glob.glob(os.path.join(data_dir, ext))) # Load an average image and clusters(reference boxes of templates). with open(weight_file_path, "rb") as f: _, mat_params_dict = pickle.load(f) average_image = model.get_data_by_key("average_image") clusters = model.get_data_by_key("clusters") clusters_h = clusters[:, 3] - clusters[:, 1] + 1 clusters_w = clusters[:, 2] - clusters[:, 0] + 1 normal_idx = np.where(clusters[:, 4] == 1) # main with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _results = [] for filename in filenames: fname = filename.split(os.sep)[-1] raw_img = cv2.imread(filename) raw_img = cv2.cvtColor(raw_img, cv2.COLOR_BGR2RGB) raw_img_f = raw_img.astype(np.float32) def _calc_scales(): raw_h, raw_w = raw_img.shape[0], raw_img.shape[1] min_scale = min( np.floor(np.log2(np.max(clusters_w[normal_idx] / raw_w))), np.floor(np.log2(np.max(clusters_h[normal_idx] / raw_h)))) max_scale = min(1.0, -np.log2(max(raw_h, raw_w) / MAX_INPUT_DIM)) scales_down = pl.frange(min_scale, 0, 1.) scales_up = pl.frange(0.5, max_scale, 0.5) scales_pow = np.hstack((scales_down, scales_up)) scales = np.power(2.0, scales_pow) return scales scales = _calc_scales() start = time.time() # initialize output bboxes = np.empty(shape=(0, 5)) # process input at different scales for s in scales: print("Processing {} at scale {:.4f}".format(fname, s)) img = cv2.resize(raw_img_f, (0, 0), fx=s, fy=s, interpolation=cv2.INTER_LINEAR) img = img - average_image img = img[np.newaxis, :] # we don't run every template on every scale ids of templates to ignore tids = list(range( 4, 12)) + ([] if s <= 1.0 else list(range(18, 25))) ignoredTids = list( set(range(0, clusters.shape[0])) - set(tids)) # run through the net score_final_tf = sess.run(score_final, feed_dict={x: img}) # collect scores score_cls_tf, score_reg_tf = score_final_tf[:, :, :, : 25], score_final_tf[:, :, :, 25: 125] prob_cls_tf = expit(score_cls_tf) prob_cls_tf[0, :, :, ignoredTids] = 0.0 def _calc_bounding_boxes(): # threshold for detection _, fy, fx, fc = np.where(prob_cls_tf > prob_thresh) # interpret heatmap into bounding boxes cy = fy * 8 - 1 cx = fx * 8 - 1 ch = clusters[fc, 3] - clusters[fc, 1] + 1 cw = clusters[fc, 2] - clusters[fc, 0] + 1 # extract bounding box refinement Nt = clusters.shape[0] tx = score_reg_tf[0, :, :, 0:Nt] ty = score_reg_tf[0, :, :, Nt:2 * Nt] tw = score_reg_tf[0, :, :, 2 * Nt:3 * Nt] th = score_reg_tf[0, :, :, 3 * Nt:4 * Nt] # refine bounding boxes dcx = cw * tx[fy, fx, fc] dcy = ch * ty[fy, fx, fc] rcx = cx + dcx rcy = cy + dcy rcw = cw * np.exp(tw[fy, fx, fc]) rch = ch * np.exp(th[fy, fx, fc]) scores = score_cls_tf[0, fy, fx, fc] tmp_bboxes = np.vstack((rcx - rcw / 2, rcy - rch / 2, rcx + rcw / 2, rcy + rch / 2)) tmp_bboxes = np.vstack((tmp_bboxes / s, scores)) tmp_bboxes = tmp_bboxes.transpose() return tmp_bboxes tmp_bboxes = _calc_bounding_boxes() bboxes = np.vstack( (bboxes, tmp_bboxes)) # <class 'tuple'>: (5265, 5) print("time {:.2f} secs for {}".format(time.time() - start, fname)) # non maximum suppression # refind_idx = util.nms(bboxes, nms_thresh) refind_idx = tf.image.non_max_suppression( tf.convert_to_tensor(bboxes[:, :4], dtype=tf.float32), tf.convert_to_tensor(bboxes[:, 4], dtype=tf.float32), max_output_size=bboxes.shape[0], iou_threshold=nms_thresh) refind_idx = sess.run(refind_idx) refined_bboxes = bboxes[refind_idx] _result = [] for r in refined_bboxes: _score = expit(r[4]) _r = [int(x) for x in r[:4]] print("{} {} {} {} {}".format(_score, _r[0], _r[1], _r[2], _r[3])) _result.append([_r[0], _r[1], _r[2], _r[3], _score]) pass overlay_bounding_boxes(raw_img, refined_bboxes, lw) if display: # plt.axis('off') plt.imshow(raw_img) plt.show() # save image with bounding boxes raw_img = cv2.cvtColor(raw_img, cv2.COLOR_RGB2BGR) cv2.imwrite(os.path.join(output_dir, fname), raw_img) _results.append(_result) pass return _results pass