def getFramesFromBlock(video_name, frames_block): """ return: names: list(str), frames: list(PILImage), bboxes: list(Bbox) """ names = [] frames = [] bboxes = [] for frame_info in frames_block: # f_info = frame_info[] frame_name = str(frame_info[0]) occluded = int(frame_info[1]) # print('frame_name000000000000: ', frame_name, 'occluded: ', occluded) frame_path = os.path.join(video_name, frame_name) names.append(frame_path) # image = np.array(Image.open(frame_path)) image = Image.open(frame_path) frames.append(image) bbox = BoundingBox(Point(float(frame_info[constants.IDX_XMIN]), float(frame_info[constants.IDX_YMIN])), Point(float(frame_info[constants.IDX_XMAX]), float(frame_info[constants.IDX_YMAX])), occluded=occluded) # print('frame_name000000000000: ', frame_name, 'occluded: ', occluded, bbox) bboxes.append(bbox) return names, frames, bboxes
def __init__(self, pmin, pmax, abnormal_area=0, occluded=0): self._pmin = pmin self._pmax = pmax self._occluded = occluded self._abnormal_area = abnormal_area pcenter = Point(-1, -1) pcenter.x = self._pmin.x + int((self._pmax.x - self._pmin.x) / 2) pcenter.y = self._pmin.y + int((self._pmax.y - self._pmin.y) / 2) self._pcenter = pcenter
def personDetectionInFrameMaskRCNN(model, ioImage, threshold): masks, pred_boxes, pred_class, detection_time = get_prediction( model, ioImage, threshold) persons = [] if masks is not None: for idx, clase in enumerate(pred_class): if clase == 'person': bbox = BoundingBox( Point(pred_boxes[idx][0][0], pred_boxes[idx][0][1]), Point(pred_boxes[idx][1][0], pred_boxes[idx][1][1])) persons.append(bbox) return persons, detection_time
def getSegmentBBox(lbboxes): xmin = 10000 ymin = 10000 xmax = 0 ymax = 0 for bbox in lbboxes: if bbox.occluded == 0: if bbox.pmin.x < xmin: xmin = bbox.pmin.x if bbox.pmin.y < ymin: ymin = bbox.pmin.y if bbox.pmax.x > xmax: xmax = bbox.pmax.x if bbox.pmax.y > ymax: ymax = bbox.pmax.y return BoundingBox(Point(xmin, ymin), Point(xmax, ymax))
def randomBBox(h, w): xmax = np.random.randint(0, w) xmin = 0 if xmax > 20: xmin = np.random.randint(0, xmax - 20) else: xmin = np.random.randint(0, xmax) if xmax - xmin < 20: xmax = xmax + 20 ymax = np.random.randint(0, h) if ymax > 20: ymin = np.random.randint(0, ymax - 20) else: ymin = np.random.randint(0, ymax) if ymax - ymin < 20: ymax = ymax + 20 return BoundingBox(Point(xmin, ymin), Point(xmax, ymax))
def joinBBoxes(bbox1, bbox2, saliency_regions=None): xmin = min(bbox1.pmin.x, bbox2.pmin.x) ymin = min(bbox1.pmin.y, bbox2.pmin.y) xmax = max(bbox1.pmax.x, bbox2.pmax.x) ymax = max(bbox1.pmax.y, bbox2.pmax.y) bbox = BoundingBox(Point(xmin, ymin), Point(xmax, ymax)) # if saliency_regions is not None: # i_areas_1 = [] # i_areas_2 = [] # for sr in saliency_regions: # a1 = intersetionArea(bbox1, sr) # a2 = intersetionArea(bbox2, sr) # i_areas_1.append(a1) # i_areas_2.append(a2) # i_areas_1.sort(reverse=True) # i_areas_2.sort(reverse=True) # bbox.iou = bbox1.iou + bbox2.iou return bbox
def __init__(self, pmin, pmax, iou=0, occluded=0, score=0): self._pmin = pmin self._pmax = pmax if self._pmax.x < self._pmin.x or self._pmax.y < self._pmin.y: print( 'NOoooooooooooooooooooooooooooooooo00.0000000000000000..0000000000' ) self._score = score self._occluded = occluded self._iou = iou pcenter = Point(-1, -1) pcenter.x = self._pmin.x + int((self._pmax.x - self._pmin.x) / 2) pcenter.y = self._pmin.y + int((self._pmax.y - self._pmin.y) / 2) self._pcenter = pcenter dy = int(self._pmax.y - self._pmin.y) dx = int(self._pmax.x - self._pmin.x) self._area = dx * dy
def personDetectionInFrameYolo(model, img_size, conf_thres, nms_thres, classes, ioImage, device): # print('='*20+' YOLOv3 - ', frame_path) img = yolo_inference.preProcessImage(ioImage, img_size) img = img.to(device) detections, detection_time = yolo_inference.inference( model, img, conf_thres, nms_thres) ioImage = np.array(ioImage) bbox_persons = [] if detections is not None: # print('detectios rescale: ', type(detections), detections.size()) detections = yolo_inference.rescale_boxes(detections, 416, ioImage.shape[:2]) unique_labels = detections[:, -1].cpu().unique() for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections: if classes[int(cls_pred)] == 'person': pmin = Point(x1, y1) pmax = Point(x2, y2) bbox_persons.append(BoundingBox(pmin, pmax)) # print("\t+ Label: %s, Conf: %.5f" % (classes[int(cls_pred)], cls_conf.item())) return bbox_persons, detection_time
def load_localization_ground_truth(paths): pth, _ = os.path.split(paths[0][0]) _, video_name = os.path.split(pth) video_name = video_name[:-8] bdx_file_path = os.path.join(constants.PATH_UCFCRIME2LOCAL_Txt_ANNOTATIONS, video_name + '.txt') data = [] with open(bdx_file_path, 'r') as file: for row in file: data.append(row.split()) # data = np.array(data) gt_bboxes = [] for i, frame_path in enumerate(paths): pth, frame_name = os.path.split(frame_path[0]) # _, video_name = os.path.split(pth) splits = re.split('(\d+)', frame_name) frame_number = int(splits[1]) frame_data = data[frame_number] # print('video={}, frame={}, frame_number={}, gt={}'.format(video_name, frame_name, frame_number, frame_data)) if frame_number != int(frame_data[5]): print('=========*********** Error en Ground Truth!!!!!!!!!') break bb = BoundingBox(Point(int(frame_data[1]), int(frame_data[2])), Point(int(frame_data[3]), int(frame_data[4]))) gt_bboxes.append(bb) one_box = None for gtb in gt_bboxes: if one_box is None: one_box = gtb else: xmin = min(bbox1.pmin.x, bbox2.pmin.x) ymin = min(bbox1.pmin.y, bbox2.pmin.y) xmax = max(bbox1.pmax.x, bbox2.pmax.x) ymax = max(bbox1.pmax.y, bbox2.pmax.y) return gt_bboxes, one_box
def cvRect2BoundingBox(cvRect): pmin = Point(cvRect[0], cvRect[1]) pmax = Point(cvRect[0] + cvRect[2], cvRect[1] + cvRect[3]) bb = BoundingBox(pmin, pmax) # print('bbbbbbbbxxxxx: ', pmin.x, bb.center.x) return bb
def getFramesFromSegment(video_name, frames_segment, num_frames): """ return: names: list(str), frames: list(PILImage), bboxes: list(Bbox) """ names = [] frames = [] bboxes = [] # print('getFramesFromSegment Video: ', video_name, len(frames_segment)) if num_frames == 'all': for frame_info in frames_segment: # f_info = frame_info[] frame_name = str(frame_info[0][0]) occluded = int(frame_info[1].cpu().item()) # print('frame_name000000000000: ', frame_name, 'occluded: ', occluded) frame_path = os.path.join(video_name, frame_name) names.append(frame_path) # image = np.array(Image.open(frame_path)) image = Image.open(frame_path) frames.append(image) bbox = BoundingBox(Point(frame_info[constants.IDX_XMIN].float(), frame_info[constants.IDX_YMIN].float()), Point(frame_info[constants.IDX_XMAX].float(), frame_info[constants.IDX_YMAX].float()), occluded=occluded) # print('frame_name000000000000: ', frame_name, 'occluded: ', occluded, bbox) bboxes.append(bbox) elif num_frames == 'first': frame_info = frames_segment[0] frame_name = str(frame_info[0][0]) frame_path = os.path.join(video_name, frame_name) names.append(frame_path) image = Image.open(frame_path) frames.append(image) bbox = BoundingBox( Point(frame_info[constants.IDX_XMIN], frame_info[constants.IDX_YMIN]), Point(frame_info[constants.IDX_XMAX], frame_info[constants.IDX_YMAX])) bboxes.append(bbox) elif num_frames == 'extremes': frame_info_first = frames_segment[0] frame_name_first = str(frame_info_first[0][0]) frame_path_first = os.path.join(video_name, frame_name_first) names.append(frame_path_first) image = Image.open(frame_path_first) frames.append(image) bbox = BoundingBox( Point(frame_info_first[constants.IDX_XMIN], frame_info_first[constants.IDX_YMIN]), Point(frame_info_first[constants.IDX_XMAX], frame_info_first[constants.IDX_YMAX])) bboxes.append(bbox) frame_info_end = frames_segment[len(frames_segment) - 1] frame_name_end = str(frame_info_end[0][0]) frame_path_end = os.path.join(video_name, frame_name_end) names.append(frame_path_end) image = Image.open(frame_path_end) frames.append(image) bbox = BoundingBox( Point(frame_info_end[constants.IDX_XMIN], frame_info_end[constants.IDX_YMIN]), Point(frame_info_end[constants.IDX_XMAX], frame_info_end[constants.IDX_YMAX])) bboxes.append(bbox) return names, frames, bboxes