def execute(self, columns): [img, bboxes] = columns [h, w] = img.shape[:2] # TODO(wcrichto): make this batched out_size = 160 bboxes = readers.bboxes(bboxes, self.protobufs) outputs = '' for bbox in bboxes: # NOTE: if using output of mtcnn, not-normalized, so removing de-normalization factors here face_img = img[int(bbox.y1):int(bbox.y2), int(bbox.x1):int(bbox.x2)] [fh, fw] = face_img.shape[:2] if fh == 0 or fw == 0: outputs += np.zeros(128, dtype=np.float32).tobytes() else: face_img = cv2.resize(face_img, (out_size, out_size)) face_img = facenet.prewhiten(face_img) embs = self.sess.run(self.embeddings, feed_dict={ self.images_placeholder: [face_img], self.phase_train_placeholder: False }) outputs += embs[0].tobytes() return [outputs]
def execute(self, *input_columns) -> bytes: bboxes_list = [] for c in input_columns: bboxes_list += readers.bboxes(c, self._config.protobufs) nmsed_bboxes = bboxes.nms(bboxes_list, self._threshold) return writers.bboxes(nmsed_bboxes, self._config.protobufs)
def execute(self, frame: Sequence[FrameType], bboxes: Sequence[bytes]) -> Sequence[bytes]: from PIL import Image from torch.autograd import Variable import torch H, W = frame[0].shape[:2] counts = [] images = [] for i, (fram, bbs) in enumerate(zip(frame, bboxes)): bbs = readers.bboxes(bbs, self.config.protobufs) counts.append((counts[i - 1][0] + counts[i - 1][1] if i > 0 else 0, len(bbs))) if len(bboxes) == 0: raise Exception("No bounding boxes") for i, bbox in enumerate(bbs): x1 = int(bbox.x1 * W) y1 = int(bbox.y1 * H) x2 = int(bbox.x2 * W) y2 = int(bbox.y2 * H) w = max(y2 - y1, x2 - x1) * 3 // 4 cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 x1 = cx - w if cx - w > 0 else 0 x2 = cx + w if cx + w < W else W y1 = cy - w if cy - w > 0 else 0 y2 = cy + w if cy + w < H else H cropped = fram[y1:y2, x1:x2, :] images.append(cropped) all_scores = [] for i in range(0, len(images), BATCH_SIZE): tensor = self.images_to_tensor([ self.transform(Image.fromarray(img)) for img in images[i:i + BATCH_SIZE] ]) var = Variable(tensor if self.cpu_only else tensor.cuda(), requires_grad=False) all_scores.append(self.model(var)) scores = [ torch.cat([scores[i] for scores in all_scores], dim=0) for i in range(len(all_scores[0])) ] all_att = [] for k in range(len(frame)): (idx, n) = counts[k] predicted_attributes = np.zeros((n, len(scores)), dtype=np.int32) for i, attrib_score in enumerate(scores): _, predicted = torch.max(attrib_score[idx:idx + n, :], 1) predicted_attributes[:, i] = predicted.cpu().data.numpy().astype( np.int32) all_att.append(pickle.dumps(predicted_attributes)) return all_att
def execute(self, frame: FrameType, bboxes: bytes) -> bytes: frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) [h, w] = frame.shape[:2] bboxes = readers.bboxes(bboxes, self.config.protobufs) frames = [ frame[int(bbox.y1 * h):int(bbox.y2 * h), int(bbox.x1 * w):int(bbox.x2 * w)] for bbox in bboxes ] genders = self.rc.get_gender_batch(frames) return pickle.dumps(genders)
def handle(self, *args, **options): with open(options['path']) as f: paths = [s.strip() for s in f.readlines()] with Database() as db: filtered = paths labeler, _ = Labeler.objects.get_or_create(name=options['bbox_labeler']) filtered = [] for path in paths: try: video = Video.objects.get(path=path) except Video.DoesNotExist: continue if len(Face.objects.filter(person__frame__video=video, labeler=labeler)) > 0: continue filtered.append(path) stride = 24 # Run the detector via Scanner faces_c = pipelines.detect_faces(db, [db.table(path).column('frame') for path in filtered], db.sampler.strided(stride), 'tmp_faces') for path, video_faces_table in zip(filtered, faces_c): video = Video.objects.filter(path=path).get() table = db.table(path) imgs = table.load(['frame'], rows=list(range(0, table.num_rows(), stride))) video_faces = video_faces_table.load( ['bboxes'], lambda lst, db: readers.bboxes(lst[0], db.protobufs)) for (i, frame_faces), (_, img) in zip(video_faces, imgs): frame = Frame.objects.get(video=video, number=i * stride) for bbox in frame_faces: if labeler.name == 'dummy' and random.randint(0, 10) == 1: # generate dummy labels, sometimes # TODO: add boundary checks, shouldn't matter much thouhg. bbox.x1 += 50 bbox.x2 += 50 bbox.y1 += 50 bbox.y2 += 50 p = Person(frame=frame) p.save() f = Face(person=p) f.bbox_x1 = bbox.x1 / video.width f.bbox_x2 = bbox.x2 / video.width f.bbox_y1 = bbox.y1 / video.height f.bbox_y2 = bbox.y2 / video.height f.bbox_score = bbox.score f.labeler = labeler f.save()
def bbox_to_json(config, bboxes: bytes, frame_id: bytes) -> bytes: bboxes = readers.bboxes(bboxes, config.protobufs) frame_id = json.loads(frame_id.decode('utf-8'))[0]['id'] return json.dumps([{ 'bbox_x1': bb.x1, 'bbox_x2': bb.x2, 'bbox_y1': bb.y1, 'bbox_y2': bb.y2, 'probability': bb.score, 'label': bb.label, 'frame_id': frame_id } for bb in bboxes])
def faces_to_json(config, bboxes: bytes, frame_ids: bytes) -> bytes: faces = readers.bboxes(bboxes, config.protobufs) frame_id = json.loads(frame_ids.decode('utf-8'))[0]['id'] return json.dumps([{ 'frame_id': frame_id, 'bbox_x1': f.x1, 'bbox_x2': f.x2, 'bbox_y1': f.y1, 'bbox_y2': f.y2, 'probability': f.score, 'labeler_id': labeler_id, **face_defaults } for f in faces])
def execute(self, frame: FrameType, bboxes: bytes) -> bytes: import facenet import cv2 import tensorflow as tf if self.images_placeholder is None: print('Loading model...') with self.g.as_default(): with self.sess.as_default(): model_path = self.config.args['model_dir'] meta_file, ckpt_file = facenet.get_model_filenames( model_path) saver = tf.train.import_meta_graph( os.path.join(model_path, meta_file)) saver.restore(self.sess, os.path.join(model_path, ckpt_file)) self.images_placeholder = tf.get_default_graph( ).get_tensor_by_name('input:0') self.embeddings = tf.get_default_graph( ).get_tensor_by_name('embeddings:0') self.phase_train_placeholder = tf.get_default_graph( ).get_tensor_by_name('phase_train:0') print('Model loaded!') [h, w] = frame.shape[:2] out_size = 160 bboxes = readers.bboxes(bboxes, self.config.protobufs) outputs = b'' for bbox in bboxes: # NOTE: if using output of mtcnn, not-normalized, so removing de-normalization factors here face_img = frame[int(bbox.y1 * h):int(bbox.y2 * h), int(bbox.x1 * w):int(bbox.x2 * w)] [fh, fw] = face_img.shape[:2] if fh == 0 or fw == 0: outputs += np.zeros(128, dtype=np.float32).tobytes() else: face_img = cv2.resize(face_img, (out_size, out_size)) face_img = facenet.prewhiten(face_img) embs = self.sess.run(self.embeddings, feed_dict={ self.images_placeholder: [face_img], self.phase_train_placeholder: False }) outputs += embs[0].tobytes() return ' ' if outputs == b'' else outputs
def execute(self, columns): [frame, bboxes] = columns bboxes = readers.bboxes(bboxes, self.protobufs) if len(bboxes) == 0: return [] results = [] for bbox in bboxes: img = frame[int(bbox.y1):int(bbox.y2), int(bbox.x1):int(bbox.x2), :] img = cv2.resize(img, (200, 200)) results.append(cv2.Laplacian(img, cv2.CV_64F).var()) return [json.dumps(results)]
def execute(self, frame: Sequence[FrameType], bboxes: Sequence[bytes]) -> Sequence[bytes]: from PIL import Image from torch.autograd import Variable import torch h, w = frame[0].shape[:2] counts = [] images = [] for i, (fram, bbs) in enumerate(zip(frame, bboxes)): bbs = readers.bboxes(bbs, self.config.protobufs) counts.append((counts[i - 1][0] + counts[i - 1][1] if i > 0 else 0, len(bbs))) if len(bboxes) == 0: raise Exception("No bounding boxes") for bbox in bbs: # print(int(bbox.y1 * h), int(bbox.y2 * h), # int(bbox.x1 * w), int(bbox.x2 * w)) Image.fromarray(fram[int(bbox.y1 * h):int(bbox.y2 * h), int(bbox.x1 * w):int(bbox.x2 * w)]) images.extend([ fram[int(bbox.y1 * h):int(bbox.y2 * h), int(bbox.x1 * w):int(bbox.x2 * w)] for bbox in bbs ]) tensor = self.images_to_tensor( [self.transform(Image.fromarray(img)) for img in images]) var = Variable(tensor if self.cpu_only else tensor.cuda(), requires_grad=False) scores, features = self.model(var) all_att = [] for k in range(len(frame)): (idx, n) = counts[k] predicted_attributes = np.zeros((n, len(scores)), dtype=np.int32) for i, attrib_score in enumerate(scores): _, predicted = torch.max(attrib_score[idx:idx + n, :], 1) predicted_attributes[:, i] = predicted.cpu().data.numpy().astype( np.int32) all_att.append(pickle.dumps(predicted_attributes)) return all_att
def execute(self, frame: FrameType, bboxes: bytes) -> FrameType: bboxes = readers.bboxes(bboxes, self._config.protobufs) if len(bboxes) == 0: return frame bboxes = proto_to_np(bboxes) bboxes[:, [0, 1]] = bboxes[:, [1, 0]] bboxes[:, [2, 3]] = bboxes[:, [3, 2]] return tf_vis_utils.visualize_boxes_and_labels_on_image_array( frame, bboxes[:, :4], bboxes[:, 5].astype(np.int32), bboxes[:, 4], self._category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.5)
def execute(self, frame: FrameType, bboxes: bytes) -> bytes: [h, w] = frame.shape[:2] bboxes = readers.bboxes(bboxes, self.config.protobufs) if len(bboxes) == 0: return pickle.dumps([]) # This returns a numpy array of size (68, 2) for every bbox) predictions = self.fa.get_landmarks_from_image( frame, detected_faces=[(bbox.x1 * w, bbox.y1 * h, bbox.x2 * w, bbox.y2 * h) for bbox in bboxes]) predictions = [ np.array([[width / w, height / h] for [width, height] in prediction]) for prediction in predictions ] return pickle.dumps(predictions)
def execute(self, columns): global i [img, bboxes] = columns img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) [h, w] = img.shape[:2] bboxes = readers.bboxes(bboxes, self.protobufs) imgs = [ img[int(h * bbox.y1):int(h * bbox.y2), int(w * bbox.x1):int(w * bbox.x2)] for bbox in bboxes ] for img in imgs: cv2.imwrite('/app/tmp/{:05d}.jpg'.format(i), img) i += 1 genders = self.rc.get_gender_batch(imgs) outputs = [ struct.pack('=cf', label, score) for [label, score] in genders ] assert (len(outputs) == len(imgs)) return [''.join(outputs)]
def execute(self, columns): [img, bboxes] = columns [h, w] = img.shape[:2] out_size = 160 bboxes = readers.bboxes(bboxes, self.protobufs) outputs = '' for bbox in bboxes: face_img = img[int(h * bbox.y1):int(h * bbox.y2), int(w * bbox.x1):int(w * bbox.x2)] face_img = cv2.resize(face_img, (out_size, out_size)) face_img = facenet.prewhiten(face_img) embs = self.sess.run(self.embeddings, feed_dict={ self.images_placeholder: [face_img], self.phase_train_placeholder: False }) outputs += embs[0].tobytes() return [outputs]
def execute(self, *inputs) -> bytes: bboxes_list = [] for c in inputs: bboxes_list += readers.bboxes(c, self.protobufs) nmsed_bboxes = bboxes.nms(bboxes_list, 0.1) return writers.bboxes(nmsed_bboxes, self.protobufs)
def execute(self, frame: Sequence[FrameType], bboxes: Sequence[bytes]) -> Sequence[bytes]: h, w = frame[0].shape[:2] all_new_bbs = [] for fram, bbs in zip(frame, bboxes): bbs = readers.bboxes(bbs, self.protobufs) new_bbs = [] for i, bbox in enumerate(bbs): x1 = int(bbox.x1 * w) y1 = int(bbox.y1 * h) x2 = int(bbox.x2 * w) y2 = int(bbox.y2 * h) ## set crop window crop_w = (x2 - x1) * 2 crop_h = crop_w * 2 X1 = int((x1 + x2) / 2 - crop_w / 2) X2 = X1 + crop_w Y1 = int((y1 + y2) / 2 - crop_h / 3) Y2 = Y1 + crop_h ## adjust box size by image boundary crop_x1 = max(0, X1) crop_x2 = min(w - 1, X2) crop_y1 = max(0, Y1) crop_y2 = min(h - 1, Y2) cropped = fram[crop_y1:crop_y2 + 1, crop_x1:crop_x2 + 1, :] ## compute body bound body_bound = 1.0 for j, other_bbox in enumerate(bbs): if i == j: continue if bbox.y2 < other_bbox.y1: center = (bbox.x1 + bbox.x2) / 2 crop_x1 = (center - bbox.x2 + bbox.x1) crop_x2 = (center + bbox.x2 - bbox.x1) if other_bbox.x1 < crop_x2 or other_bbox.x2 > crop_x1: body_bound = other_bbox.y1 ## detect edge and text neck_line = y2 - crop_y1 body_bound = int(body_bound * h) - crop_y1 crop_y = self.detect_edge_text(cropped, neck_line) crop_y = min(crop_y, body_bound) def inbound(coord, limit): return 0 <= int(coord) and int(coord) < limit # If we produce a malformed bbox then just use the original bbox. if abs(crop_x1 - crop_x2) < 20 or abs(crop_y1 - crop_y) < 20 \ or crop_x1 >= crop_x2 or crop_y1 >= crop_y \ or not inbound(crop_x1, w) or not inbound(crop_x2, w) \ or not inbound(crop_y1, h) or not inbound(crop_y, h): new_bbs.append(bbox) else: new_bbs.append( self.protobufs.BoundingBox(x1=crop_x1 / w, x2=crop_x2 / w, y1=crop_y1 / h, y2=crop_y / h)) all_new_bbs.append(writers.bboxes(new_bbs, self.protobufs)) return all_new_bbs
def execute(self, frame: FrameType, bboxes: bytes) -> bytes: from PIL import Image from torch.autograd import Variable import torch h, w = frame.shape[:2] bboxes = readers.bboxes(bboxes, self.config.protobufs) if len(bboxes) == 0: return pickle.dumps([]) if self.config.args['adjust_bboxes']: images = [] for i, bbox in enumerate(bboxes): x1 = int(bbox.x1 * w) y1 = int(bbox.y1 * h) x2 = int(bbox.x2 * w) y2 = int(bbox.y2 * h) ## set crop window crop_w = (x2 - x1) * 2 crop_h = crop_w * 2 X1 = int((x1 + x2) / 2 - crop_w / 2) X2 = X1 + crop_w Y1 = int((y1 + y2) / 2 - crop_h / 3) Y2 = Y1 + crop_h ## adjust box size by image boundary crop_x1 = max(0, X1) crop_x2 = min(w - 1, X2) crop_y1 = max(0, Y1) crop_y2 = min(h - 1, Y2) cropped = frame[crop_y1:crop_y2 + 1, crop_x1:crop_x2 + 1, :] ## compute body bound body_bound = 1.0 for j, other_bbox in enumerate(bboxes): if i == j: continue if bbox.y2 < other_bbox.y1: center = (bbox.x1 + bbox.x2) / 2 crop_x1 = (center - face.x2 + face.x1) crop_x2 = (center + face.x2 - face.x1) if other_bbox.x1 < crop_x2 or other_bbox.x2 > crop_x1: body_bound = other_bbox.y1 ## detect edge and text neck_line = y2 - crop_y1 body_bound = int(body_bound * h) - crop_y1 crop_y = self.detect_edge_text(cropped, neck_line) crop_y = min(crop_y, body_bound) cropped = cropped[:crop_y, :, :] images.append(cropped) else: images = [ frame[int(bbox.y1 * h):int(bbox.y2 * h), int(bbox.x1 * w):int(bbox.x2 * w)] for bbox in bboxes ] tensor = self.images_to_tensor( [self.transform(Image.fromarray(img)) for img in images]) var = Variable(tensor if self.cpu_only else tensor.cuda(), requires_grad=False) scores, features = self.model(var) predicted_attributes = np.zeros((len(images), len(scores)), dtype=np.int32) for i, attrib_score in enumerate(scores): _, predicted = torch.max(attrib_score, 1) predicted_attributes[:, i] = predicted.cpu().data.numpy().astype( np.int32) return pickle.dumps(predicted_attributes)