def read_file_to_dic(self, filename, dic): with open(filename, 'r') as f: data = f.readlines() for line in data: content = line.split(',') key = content[0] + "/" + str(int(content[1])) img_h = int(self.data_size[content[0]][0]) img_w = int(self.data_size[content[0]][1]) if key not in dic: dic[key] = [ AVA_video.info( content[6], BBox( # convert to 0-based pixel index left=float(content[2]) * img_w, top=float(content[3]) * img_h, right=float(content[4]) * img_w, bottom=float(content[5]) * img_h), img_h, img_w, key) ] else: dic[key].append( AVA_video.info( content[6], BBox( # convert to 0-based pixel index left=float(content[2]) * img_w, top=float(content[3]) * img_h, right=float(content[4]) * img_w, bottom=float(content[5]) * img_h), img_h, img_w, key))
def edit_target_class(self, target_idx=-1): # findout which target is selected first if target_idx < 0: target_idx = self.targetList.currentRow() # if yaml is provided (ex: in task) if len(self.cls_map) > 0: class_list = [] for cls_idx, cls_name in self.cls_map.items(): class_list.append(f"{cls_idx}-{cls_name}") # show a dialog dialog = QInputDialog() label_text = "Input the correct class number.\n"\ "Please note your input will not be checked for legality" item, okPressed = \ QInputDialog.getItem(dialog, \ "Edit class", \ label_text, \ class_list, False) # print(text, okPressed) if okPressed and item: cur_bbox = label_table[self.data_name][target_idx] old_bbox = BBox(cur_bbox.xywh, cur_bbox.imgSizeWH, cur_bbox.cls) class_idx = item.split('-')[0] label_table[self.data_name][target_idx].cls = int(class_idx) self.last_cls = int(class_idx) # log the change new_data = label_table[self.data_name][target_idx].to_label_str() # print(new_data) mod = [self.data_name, target_idx, new_data, old_bbox] modification_list.append(mod) self.ui_form.check_undoable() self.show() else: dialog = QInputDialog() label_text = "Input the correct class number.\n"\ "Please note your input will not be checked for legality" text, okPressed = \ QInputDialog.getText(dialog, \ "Edit class", \ label_text, \ QLineEdit.Normal) if okPressed and text != '': cur_bbox = label_table[self.data_name][target_idx] old_bbox = BBox(cur_bbox.xywh, cur_bbox.imgSizeWH, cur_bbox.cls) label_table[self.data_name][target_idx].cls = int(text) self.last_cls = int(text) # log the change new_data = label_table[self.data_name][target_idx].to_label_str() # print(new_data) mod = [self.data_name, target_idx, new_data, old_bbox] modification_list.append(mod) self.ui_form.check_undoable() self.show()
def read_label_file(label_file, with_landmark=True): """ read data from given label file :param img_dir: str, directory shared by label files and images :param label_file: str, absolute path of label file :param with_landmark: :return: List of 3-element-tuple, (img_path, bbox_tuple, landmark_tuple) """ result = [] with open(label_file, 'r') as lf: for line in lf: data_units = line.strip().split() # read absolute path of image img_path = data_units[0].replace('\\', '/') # read bounding box (x1, y1, x2, y2) bbox = [data_units[1], data_units[3], data_units[2], data_units[4]] bbox = [int(float(x)) for x in bbox] # read landmarks (x1, ) if with_landmark: landmarks = np.zeros((5, 2)) for i in range(5): landmarks[i] = (float(data_units[5 + 2 * i]), float(data_units[6 + 2 * i])) result.append((img_path, BBox(bbox), landmarks)) return result
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): image = transforms.Image.open(path_to_input_image) dataset_class = DatasetBase.from_name(dataset_name) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooling_mode=Config.POOLING_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) forward_input = Model.ForwardInput.Eval(image_tensor.cuda()) forward_output: Model.ForwardOutput.Eval = model.eval().forward(forward_input) detection_bboxes = forward_output.detection_bboxes / scale detection_classes = forward_output.detection_classes detection_probs = forward_output.detection_probs kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
def detect_face(image, net, crop_coordinates=None, threshold=0.4): blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) net.setInput(blob) detected = net.forward()[0, 0, ...] conf = detected[:, 2] detected = detected[conf > threshold, :] detected[:, 3:] = np.clip(detected[:, 3:], a_min=0., a_max=1.) detected[:, (3, 5)] *= image.shape[1] detected[:, (4, 6)] *= image.shape[0] if crop_coordinates is not None: detected[:, (3, 5)] += crop_coordinates[0] detected[:, (4, 6)] += crop_coordinates[1] faces = [] for f in detected: coor = f[3:].astype(int) if coor[0] >= coor[2] or coor[1] >= coor[3]: continue faces.append( AnnotationInstance(bbox=BBox(xmin=coor[0], ymin=coor[1], xmax=coor[2], ymax=coor[3], label='face', score=f[2], coordinate_mode='absolute'))) return faces
def bbox(self): """ returns the bounding box of the line """ bbox = BBox() for pt in self.pts: bbox.update(pt) return bbox
def to_box(self) -> BBox: x0 = (self.x - self.w * 0.5) x1 = (self.x + self.w * 0.5) y0 = (self.y - self.h * 0.5) y1 = (self.y + self.h * 0.5) box = BBox(x0, y0, x1, y1) return box
def load_dataList(self, nameList, showThumbnail=True, progressBar=True): self.dataList.clear() if progressBar: progress = QProgressDialog("Loading data...", "Abort", \ 0, len(nameList), self.window) progress.setWindowModality(Qt.WindowModal) for i, dataName in enumerate(nameList): newItem = QtWidgets.QListWidgetItem(dataName) # Mark finished data if self.in_task and (dataName in self.current_task.finished_data): newItem.setBackground(QBrush(QColor("#b3b3b3"))) if showThumbnail: # boring img down sizing and img format converting img = Image.open(self.current_data_dir + IMG_FOLDER \ + '/' + dataName + '.' + IMG_EXT) w, h = img.size img = img.resize((128, int(128 * h / w))) img = img.convert("RGBA") qimg = QImage(img.tobytes('raw', 'RGBA'), img.size[0], \ img.size[1], QImage.Format_RGBA8888) thumbnail = QIcon() thumbnail.addPixmap(QtGui.QPixmap.fromImage(qimg)) newItem.setIcon(thumbnail) # pre load all the labels label_dir = self.current_data_dir + LEBEL_FOLDER \ + '/' + dataName + '.txt' if os.path.exists(label_dir): with open(label_dir, 'r') as label_file: bboxs = [] for line in label_file: bbox_l = line.split() class_num = int(bbox_l[0]) centerX = int(float(bbox_l[1]) * w) centerY = int(float(bbox_l[2]) * h) width = int(float(bbox_l[3]) * w) height = int(float(bbox_l[4]) * h) new_bbox = BBox([centerX, centerY, width, height],\ [w, h], class_num) bboxs.append(new_bbox) label_table[dataName] = bboxs else: # self.error_msg("Cannot find label: " + \ # label_dir) # if the label do not exist, create an empty bbox list bboxs = [] label_table[dataName] = bboxs self.dataList.addItem(newItem) if progressBar: progress.setValue(i) if progress.wasCanceled(): break if progressBar: progress.setValue(len(nameList))
def _infer_stream(path_to_input_stream_endpoint: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) if path_to_input_stream_endpoint.isdigit(): path_to_input_stream_endpoint = int(path_to_input_stream_endpoint) video_capture = cv2.VideoCapture(path_to_input_stream_endpoint) with torch.no_grad(): for sn in itertools.count(start=1): _, frame = video_capture.read() if sn % period_of_inference != 0: continue timestamp = time.time() image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) image = Image.fromarray(image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image = np.array(image) frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) elapse = time.time() - timestamp fps = 1 / elapse cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) cv2.imshow('easy-faster-rcnn.pytorch', frame) if cv2.waitKey(10) == 27: break video_capture.release() cv2.destroyAllWindows()
def _infer(path_to_input_dir: str, path_to_output_dir: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) images = glob.glob(path_to_input_dir + '/*.jpg') with torch.no_grad(): for image in tqdm(images): name = image.split("/")[-1] image = transforms.Image.open(image).convert("RGB") image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle( ((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_dir + name) print(f'Output image is saved to {path_to_output_dir}')
def write_label(img_dir, img_id, label, bboxes): global _min_x_w, _min_y_h, _max_x_w, _max_y_h global _max_w, _max_h, _max_w_h, _max_h_w img = None for f in glob.glob(os.path.join(img_dir, img_id + '.*')): if f.endswith('.txt'): continue if img: raise FileExistsError('{} and\n{}'.format(img, f)) img = f if img: img = cv2.imread(f) if img is None: raise FileNotFoundError(os.path.join(img_dir, img_id)) h, w = img.shape[:2] if _max_w < w: _max_w = w print('New max width: {}'.format(w)) if _max_h < h: _max_h = h print('New max height: {}'.format(h)) if h < w: w_h = w / h if _max_w_h < w_h: _max_w_h = w_h print('New width / height: {}'.format(w_h)) else: h_w = h / w if _max_h_w < h_w: _max_h_w = h_w print('New height / width: {}'.format(h_w)) with open( find_replace(os.path.join(img_dir, img_id + '.txt.' + label), 'images', 'labels'), 'w') as flabel: for bbox_tuple in bboxes: bbox = BBox(hw=(h, w), type_=BBox.VOC, bbox=bbox_tuple) xmin, ymin, xmax, ymax = bbox.get(type_=BBox.OPEN_IMAGES) if _min_x_w > xmin: _min_x_w = xmin print('New min xmin / (width-1): {}'.format(_min_x_w)) if _min_y_h > ymin: _min_y_h = ymin print('New min ymin / (height-1): {}'.format(_min_y_h)) if _max_x_w < xmax: _max_x_w = xmax print('New max xmax / (width-1): {}'.format(_max_x_w)) if _max_y_h < ymax: _max_y_h = ymax print('New max ymax / (height-1): {}'.format(_max_y_h)) flabel.write('{:1.15f} {:1.15f} {:1.15f} {:1.15f}\n'.format( *bbox.get(type_=BBox.YOLO)))
def __init__(self, path_to_data_dir: str, mode: Mode): super().__init__() self._mode = mode path_to_voc2007_dir = os.path.join(path_to_data_dir, 'VOCdevkit', 'VOC2007') path_to_imagesets_main_dir = os.path.join(path_to_voc2007_dir, 'ImageSets', 'Main') path_to_annotations_dir = os.path.join(path_to_voc2007_dir, 'Annotations') self._path_to_jpeg_images_dir = os.path.join(path_to_voc2007_dir, 'JPEGImages') if self._mode == Dataset.Mode.TRAIN: path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'trainval.txt') elif self._mode == Dataset.Mode.TEST: path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'test.txt') else: raise ValueError('invalid mode') with open(path_to_image_ids_txt, 'r') as f: lines = f.readlines() self._image_ids = [line.rstrip() for line in lines] self._image_id_to_annotation_dict = {} for image_id in self._image_ids: path_to_annotation_xml = os.path.join(path_to_annotations_dir, f'{image_id}.xml') tree = ET.ElementTree(file=path_to_annotation_xml) root = tree.getroot() self._image_id_to_annotation_dict[image_id] = Dataset.Annotation( filename=next(root.iterfind('filename')).text, objects=[ Dataset.Annotation.Object( name=next(tag_object.iterfind('name')).text, difficult=next( tag_object.iterfind('difficult')).text == '1', bbox=BBox( left=float( next(tag_object.iterfind('bndbox/xmin')).text), top=float( next(tag_object.iterfind('bndbox/ymin')).text), right=float( next(tag_object.iterfind('bndbox/xmax')).text), bottom=float( next( tag_object.iterfind('bndbox/ymax')).text))) for tag_object in root.iterfind('object') ])
def __init__(self, path_to_data_dir: str, mode: Base.Mode, image_min_side: float, image_max_side: float): super().__init__(path_to_data_dir, mode, image_min_side, image_max_side) path_to_voc2007_dir = os.path.join(self._path_to_data_dir, 'VOCdevkit', 'VOC2007') path_to_imagesets_main_dir = os.path.join(path_to_voc2007_dir, 'ImageSets', 'Main') path_to_annotations_dir = os.path.join(path_to_voc2007_dir, 'Annotations') self._path_to_jpeg_images_dir = os.path.join(path_to_voc2007_dir, 'JPEGImages') if self._mode == VOC2007Person.Mode.TRAIN: path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'trainval.txt') elif self._mode == VOC2007Person.Mode.EVAL: path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'test.txt') else: raise ValueError('invalid mode') with open(path_to_image_ids_txt, 'r') as f: lines = f.readlines() image_ids = [line.rstrip() for line in lines] self._image_id_to_annotation_dict = {} self._image_ratios = [] for image_id in image_ids: path_to_annotation_xml = os.path.join(path_to_annotations_dir, f'{image_id}.xml') tree = ET.ElementTree(file=path_to_annotation_xml) root = tree.getroot() annotation = VOC2007Person.Annotation( filename=root.find('filename').text, objects=[VOC2007Person.Annotation.Object( name=next(tag_object.iterfind('name')).text, difficult=next(tag_object.iterfind('difficult')).text == '1', bbox=BBox( # convert to 0-based pixel index left=float(next(tag_object.iterfind('bndbox/xmin')).text) - 1, top=float(next(tag_object.iterfind('bndbox/ymin')).text) - 1, right=float(next(tag_object.iterfind('bndbox/xmax')).text) - 1, bottom=float(next(tag_object.iterfind('bndbox/ymax')).text) - 1 ) ) for tag_object in root.iterfind('object')] ) annotation.objects = [obj for obj in annotation.objects if obj.name in ['person'] and not obj.difficult] if len(annotation.objects) > 0: self._image_id_to_annotation_dict[image_id] = annotation width = int(root.find('size/width').text) height = int(root.find('size/height').text) ratio = float(width / height) self._image_ratios.append(ratio) self._image_ids = list(self._image_id_to_annotation_dict.keys())
def main(args): global predefined_labels global _min_x_w, _min_y_h, _max_x_w, _max_y_h global _max_w, _max_h, _max_w_h, _max_h_w LabelNames.init(predefined_labels) print('Reading: {}'.format(args.test_list)) with open(args.test_list, 'r') as ftest, open(args.output_file, 'w') as fo: fo.write('ImageId,PredictionString\n') for line in ftest: id_ = pathlib.PurePath(line).stem pred = '' summary = os.path.join(args.summary_dir, id_ + '.csv') if not os.path.exists(summary): print('***** CAUTION *****: no summary for {}'.format(id_)) else: df = pd.read_csv(os.path.join(args.summary_dir, id_ + '.csv'), header=None) df.columns = [ 'confidence', 'voc_xmin', 'voc_ymin', 'voc_xmax', 'voc_ymax', 'label' ] df.sort_values(by='confidence', ascending=False, inplace=True) df = df.head(5) bboxes = [] for index, row in df.iterrows(): if 1 > len(bboxes) or float(row[0]) >= args.threshold: bboxes.append( BBox(type_=BBox.VOC, bbox=(tuple(map(float, row[1:5]))), label=row[5])) for b in bboxes: bbox = b.get(type_=BBox.ILSVRC) #bbox = b.get(type_=BBox.VOC) if pred: pred += ' ' # OC_synset_mapping.txt: The mapping between the 1000 synset id and their descriptions. # For example, Line 1 says n01440764 tench, Tinca tinca means this is class 1, has a synset id of n01440764, # and it contains the fish tench. pred += '{} {} {} {} {}'.format( 1 + LabelNames.label_index(b.label), #LabelNames.label_index(b.label), math.floor(bbox[0]), math.floor(bbox[1]), math.ceil(bbox[2]), math.ceil(bbox[3])) fo.write('{},{}\n'.format(id_, pred))
async def handler(websocket, path): print('Connection established:', path) with torch.no_grad(): while True: frame = await websocket.recv() frame = np.frombuffer(frame, dtype=np.uint8).reshape(480, 640, 3) image = Image.fromarray(frame) image_tensor, scale = dataset_class.preprocess( image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) detection_bboxes, detection_classes, detection_probs, _ = \ model.eval().forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale kept_indices = detection_probs > prob_thresh detection_bboxes = detection_bboxes[kept_indices] detection_classes = detection_classes[kept_indices] detection_probs = detection_probs[kept_indices] message = [] for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] message.append({ 'left': int(bbox.left), 'top': int(bbox.top), 'right': int(bbox.right), 'bottom': int(bbox.bottom), 'category': category }) message = json.dumps(message) await websocket.send(message)
def blend_boxes(group, label, coordinate_mode): scores = np.array([i.score for i in group]) xmins = np.array([i.xmin for i in group]) ymins = np.array([i.ymin for i in group]) xmaxs = np.array([i.xmax for i in group]) ymaxs = np.array([i.ymax for i in group]) xmin = np.sum(xmins * scores) / np.sum(scores) ymin = np.sum(ymins * scores) / np.sum(scores) xmax = np.sum(xmaxs * scores) / np.sum(scores) ymax = np.sum(ymaxs * scores) / np.sum(scores) score = scores.max() # np.sum(scores * scores) / np.sum(scores) return AnnotationInstance(bbox=BBox(xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, label=label, score=score, coordinate_mode=coordinate_mode))
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, backbone_name: str): image = transforms.Image.open(path_to_input_image) image_tensor, scale = Dataset.preprocess(image) backbone = Interface.from_name(backbone_name)(pretrained=False) model = Model(backbone).cuda() model.load(path_to_checkpoint) forward_input = Model.ForwardInput.Eval(image_tensor.cuda()) forward_output: Model.ForwardOutput.Eval = model.eval().forward( forward_input) detection_bboxes = forward_output.detection_bboxes / scale detection_labels = forward_output.detection_labels detection_probs = forward_output.detection_probs draw = ImageDraw.Draw(image) for bbox, label, prob in zip(detection_bboxes.tolist(), detection_labels.tolist(), detection_probs.tolist()): if prob < 0.6: continue color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = Dataset.LABEL_TO_CATEGORY_DICT[label] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image)
def bbox(self, min_area=0): """ smart bounding box """ bb = [] bbox = BBox() if min_area == 0: bb.append(self.poly.boundingBox()) else: areas = self.areas() max_a = max(areas) for i in range(len(self.poly)): if self.poly.isHole(i): continue a = areas[i] if a < max_a * min_area: continue bb.append(self.poly.boundingBox(i)) for b in bb: bbox.update((b[0], b[2])) bbox.update((b[1], b[2])) bbox.update((b[0], b[3])) bbox.update((b[1], b[3])) return bbox
def mouseMoveEvent(self, event): super().mouseMoveEvent(event) x = event.scenePos().x() y = event.scenePos().y() # at the begaining of a click & drag: create a new bbox if self.mouseDown and (not self.targetCreated): # if started outside the img, don't create nothing if (0 > x) or (0 > y) or (x > self.dscene.backgroundSize[0]) \ or (y > self.dscene.backgroundSize[1]): self.mouseDown = False return newBbox = BBox([x, y, 0, 0], self.dscene.backgroundSize, self.dscene.last_cls) newBbox.drew_in_scene(self, self.dscene, -1) newBbox.br.mouseMoveEvent(event, \ passed_by_scene=True) self.newBboxes.append(newBbox) self.targetCreated = True # a new bbox is already created for this click & drag action # pass the mouse event to the botton right ancker so a bbox # can be dragged out elif self.mouseDown: self.newBboxes[-1].br.mouseMoveEvent(event, \ passed_by_scene=True)
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float): dataset_class = DatasetBase.from_name(dataset_name) backbone = BackboneBase.from_name(backbone_name)(pretrained=False) model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE, anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES, rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda() model.load(path_to_checkpoint) ''' 默认选项: pooler_mode=Config.POOLER_MODE= Pooler.Mode.ALIGN anchor_ratios=Config.ANCHOR_RATIOS= [(1, 2), (1, 1), (2, 1)] anchor_sizes=对于infer,这里默认增加了一个64,因此最后就是[64,128, 256, 512] 用于Eval的RPN_NMS: RPN_PRE_NMS_TOP_N: int = 6000 RPN_POST_NMS_TOP_N: int = 300 ''' with torch.no_grad(): #预处理,使得输入图像至少一边满足min_side或max_side #yolo需要固定图像尺寸,这里并不需要. image = transforms.Image.open(path_to_input_image) image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE) #先增加一个批的维度,再以eval模式下执行forward. #(gd_n,4) (gd_n,) (gd_n,) detection_bboxes, detection_classes, detection_probs, _ = model.eval( ).forward(image_tensor.unsqueeze(dim=0).cuda()) detection_bboxes /= scale #原图像是经过乘scale的,因此这里对于detection_box要除scale。 kept_indices = detection_probs > prob_thresh #0.6 detection_bboxes = detection_bboxes[kept_indices] #(gd_thresh_n,4) detection_classes = detection_classes[kept_indices] #(gd_thresh_n,) detection_probs = detection_probs[kept_indices] #(gd_thresh_n,) draw = ImageDraw.Draw(image) for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()): color = random.choice( ['red', 'green', 'blue', 'yellow', 'purple', 'white']) bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3]) category = dataset_class.LABEL_TO_CATEGORY_DICT[cls] draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color) draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color) image.save(path_to_output_image) print(f'Output image is saved to {path_to_output_image}')
def __init__(self, path_to_data_dir: str, mode: Base.Mode, image_min_side: float, image_max_side: float): super().__init__(path_to_data_dir, mode, image_min_side, image_max_side) path_to_coco_dir = os.path.join(self._path_to_data_dir, 'COCO') path_to_annotations_dir = os.path.join(path_to_coco_dir, 'annotations') path_to_caches_dir = os.path.join('caches', 'coco2017-person', f'{self._mode.value}') path_to_image_ids_pickle = os.path.join(path_to_caches_dir, 'image-ids.pkl') path_to_image_id_dict_pickle = os.path.join(path_to_caches_dir, 'image-id-dict.pkl') if self._mode == COCO2017Person.Mode.TRAIN: path_to_jpeg_images_dir = os.path.join(path_to_coco_dir, 'train2017') path_to_annotation = os.path.join(path_to_annotations_dir, 'instances_train2017.json') elif self._mode == COCO2017Person.Mode.EVAL: path_to_jpeg_images_dir = os.path.join(path_to_coco_dir, 'val2017') path_to_annotation = os.path.join(path_to_annotations_dir, 'instances_val2017.json') else: raise ValueError('invalid mode') coco_dataset = CocoDetection(root=path_to_jpeg_images_dir, annFile=path_to_annotation) if os.path.exists(path_to_image_ids_pickle) and os.path.exists(path_to_image_id_dict_pickle): print('loading cache files...') with open(path_to_image_ids_pickle, 'rb') as f: self._image_ids = pickle.load(f) with open(path_to_image_id_dict_pickle, 'rb') as f: self._image_id_to_annotation_dict = pickle.load(f) else: print('generating cache files...') os.makedirs(path_to_caches_dir, exist_ok=True) self._image_id_to_annotation_dict: Dict[str, COCO2017Person.Annotation] = {} for idx, (image, annotation) in enumerate(tqdm(coco_dataset)): if len(annotation) > 0: image_id = str(annotation[0]['image_id']) # all image_id in annotation are the same annotation = COCO2017Person.Annotation( filename=os.path.join(path_to_jpeg_images_dir, '{:012d}.jpg'.format(int(image_id))), objects=[COCO2017Person.Annotation.Object( bbox=BBox( # `ann['bbox']` is in the format [left, top, width, height] left=ann['bbox'][0], top=ann['bbox'][1], right=ann['bbox'][0] + ann['bbox'][2], bottom=ann['bbox'][1] + ann['bbox'][3] ), label=ann['category_id']) for ann in annotation] ) annotation.objects = [obj for obj in annotation.objects if obj.label in [COCO2017.CATEGORY_TO_LABEL_DICT['person']]] # filtering label should refer to original `COCO2017` dataset if len(annotation.objects) > 0: self._image_id_to_annotation_dict[image_id] = annotation self._image_ids = list(self._image_id_to_annotation_dict.keys()) with open(path_to_image_ids_pickle, 'wb') as f: pickle.dump(self._image_ids, f) with open(path_to_image_id_dict_pickle, 'wb') as f: pickle.dump(self._image_id_to_annotation_dict, f)
def __init__(self): self.i2c_dic=self.index2class() self.bboxes = [] self.labels = [] self.probs=[] self.image_ratios = [] self.image_position = [] self.widths = [] self.heights = [] self.data_dic = {} self.data_dic_real = {} self.data_size = {} self.data_format = {} self.path_to_data_dir = '/home/aiuser/' path_to_AVA_dir = os.path.join(self.path_to_data_dir, 'ava_v2.2', 'preproc', 'train_clips') self.path_to_videos = os.path.join(path_to_AVA_dir, 'clips') self.path_to_keyframe = os.path.join(path_to_AVA_dir, 'keyframes') #path_to_video_ids_txt = os.path.join(path_to_AVA_dir, 'trainval.txt') path_to_video_ids_txt = '/home/aiuser/ava_v2.2/result.txt' path_to_real_ids_txt = '/home/aiuser/ava_v2.2/preproc/train_clips/trainval.txt' # 得到每个视频的大小,通过读取第一张keyframe for frame in sorted(os.listdir(self.path_to_keyframe)): img = os.listdir(os.path.join(self.path_to_keyframe, frame))[0] img = cv2.imread(os.path.join(self.path_to_keyframe, frame, img)) img_shape = img.shape self.data_size[frame] = (img_shape[0], img_shape[1]) # 得到每个视频的格式 for video in sorted(os.listdir(self.path_to_videos)): video_0 = os.listdir(os.path.join(self.path_to_videos, video))[0] self.data_format[video] = '.' + video_0.split('.')[1] # 读取文件,key是文件名(aa/0930) with open(path_to_video_ids_txt, 'r') as f: data = f.readlines() for line in data: content = line.split(',') key = content[0] + "/" + str(int(content[1])) img_h = int(self.data_size[content[0]][0]) img_w = int(self.data_size[content[0]][1]) if key not in self.data_dic: self.data_dic[key] = [imshow_result.info(content[6],content[7].replace("\n", ""), BBox( # convert to 0-based pixel index left=float(content[2]) * img_w - 1, top=float(content[3]) * img_h - 1, right=float(content[4]) * img_w - 1, bottom=float(content[5]) * img_h - 1), img_h, img_w, key)] else: self.data_dic[key].append(imshow_result.info(content[6],content[7].replace("\n", ""), BBox( # convert to 0-based pixel index left=float(content[2]) * img_w - 1, top=float(content[3]) * img_h - 1, right=float(content[4]) * img_w - 1, bottom=float(content[5]) * img_h - 1), img_h, img_w, key)) with open(path_to_real_ids_txt, 'r') as f: data = f.readlines() for line in data: content = line.split(',') key = content[0] + "/" + str(int(content[1])) img_h = int(self.data_size[content[0]][0]) img_w = int(self.data_size[content[0]][1]) if key not in self.data_dic_real: self.data_dic_real[key] = [imshow_result.info(content[6], content[7].replace("\n", ""), BBox( # convert to 0-based pixel index left=float(content[2]) * img_w - 1, top=float(content[3]) * img_h - 1, right=float(content[4]) * img_w - 1, bottom=float(content[5]) * img_h - 1), img_h,img_w, key)] else: self.data_dic_real[key].append(imshow_result.info(content[6], content[7].replace("\n", ""), BBox( # convert to 0-based pixel index left=float(content[2]) * img_w - 1, top=float(content[3]) * img_h - 1, right=float(content[4]) * img_w - 1, bottom=float(content[5]) * img_h - 1), img_h,img_w, key)) # print('data_dic:',self.data_dic) # 对字典中的数据进行整理,变成list的形式 for key in self.data_dic: self.bboxes.append([item.bbox.tolist() for item in self.data_dic[key]]) self.labels.append([item.img_class for item in self.data_dic[key]]) self.probs.append([item.prob for item in self.data_dic[key]]) width = int(self.data_dic[key][0].weight) self.widths.append(width) height = int(self.data_dic[key][0].height) self.heights.append(height) ratio = float(width / height) self.image_ratios.append(ratio) self.image_position.append(self.data_dic[key][0].img_position)