Esempio n. 1
0
 def read_file_to_dic(self, filename, dic):
     with open(filename, 'r') as f:
         data = f.readlines()
         for line in data:
             content = line.split(',')
             key = content[0] + "/" + str(int(content[1]))
             img_h = int(self.data_size[content[0]][0])
             img_w = int(self.data_size[content[0]][1])
             if key not in dic:
                 dic[key] = [
                     AVA_video.info(
                         content[6],
                         BBox(  # convert to 0-based pixel index
                             left=float(content[2]) * img_w,
                             top=float(content[3]) * img_h,
                             right=float(content[4]) * img_w,
                             bottom=float(content[5]) * img_h),
                         img_h,
                         img_w,
                         key)
                 ]
             else:
                 dic[key].append(
                     AVA_video.info(
                         content[6],
                         BBox(  # convert to 0-based pixel index
                             left=float(content[2]) * img_w,
                             top=float(content[3]) * img_h,
                             right=float(content[4]) * img_w,
                             bottom=float(content[5]) * img_h),
                         img_h,
                         img_w,
                         key))
Esempio n. 2
0
    def edit_target_class(self, target_idx=-1):
        # findout which target is selected first
        if target_idx < 0:
            target_idx = self.targetList.currentRow()
        
        # if yaml is provided (ex: in task)
        if len(self.cls_map) > 0:
            class_list = []
            for cls_idx, cls_name in self.cls_map.items():
                class_list.append(f"{cls_idx}-{cls_name}")
            # show a dialog
            dialog = QInputDialog()
            label_text = "Input the correct class number.\n"\
                "Please note your input will not be checked for legality"
            item, okPressed = \
                QInputDialog.getItem(dialog, \
                "Edit class", \
                label_text, \
                class_list, False)
            # print(text, okPressed)
            if okPressed and item:
                cur_bbox = label_table[self.data_name][target_idx]
                old_bbox = BBox(cur_bbox.xywh, cur_bbox.imgSizeWH, cur_bbox.cls)
                class_idx = item.split('-')[0] 
                label_table[self.data_name][target_idx].cls = int(class_idx)
                self.last_cls = int(class_idx)
                # log the change
                new_data = label_table[self.data_name][target_idx].to_label_str()
                # print(new_data)
                mod = [self.data_name, target_idx, new_data, old_bbox]
                modification_list.append(mod)
                self.ui_form.check_undoable()
                self.show()
        else:
            dialog = QInputDialog()
            label_text = "Input the correct class number.\n"\
                "Please note your input will not be checked for legality"
            text, okPressed = \
                QInputDialog.getText(dialog, \
                "Edit class", \
                label_text, \
                QLineEdit.Normal)

            if okPressed and text != '':
                cur_bbox = label_table[self.data_name][target_idx]
                old_bbox = BBox(cur_bbox.xywh, cur_bbox.imgSizeWH, cur_bbox.cls)
                label_table[self.data_name][target_idx].cls = int(text)
                self.last_cls = int(text)
                # log the change
                new_data = label_table[self.data_name][target_idx].to_label_str()
                # print(new_data)
                mod = [self.data_name, target_idx, new_data, old_bbox]
                modification_list.append(mod)
                self.ui_form.check_undoable()
                self.show()
Esempio n. 3
0
def read_label_file(label_file, with_landmark=True):
    """
    read data from given label file
    :param img_dir: str, directory shared by label files and images
    :param label_file: str, absolute path of label file
    :param with_landmark:
    :return: List of 3-element-tuple, (img_path, bbox_tuple, landmark_tuple)
    """
    result = []

    with open(label_file, 'r') as lf:
        for line in lf:
            data_units = line.strip().split()
            # read absolute path of image
            img_path = data_units[0].replace('\\', '/')
            # read bounding box (x1, y1, x2, y2)
            bbox = [data_units[1], data_units[3], data_units[2], data_units[4]]
            bbox = [int(float(x)) for x in bbox]
            # read landmarks (x1, )
            if with_landmark:
                landmarks = np.zeros((5, 2))
                for i in range(5):
                    landmarks[i] = (float(data_units[5 + 2 * i]),
                                    float(data_units[6 + 2 * i]))
                result.append((img_path, BBox(bbox), landmarks))

    return result
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
    image = transforms.Image.open(path_to_input_image)
    dataset_class = DatasetBase.from_name(dataset_name)
    image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone, dataset_class.num_classes(), pooling_mode=Config.POOLING_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)

    forward_input = Model.ForwardInput.Eval(image_tensor.cuda())
    forward_output: Model.ForwardOutput.Eval = model.eval().forward(forward_input)

    detection_bboxes = forward_output.detection_bboxes / scale
    detection_classes = forward_output.detection_classes
    detection_probs = forward_output.detection_probs

    kept_indices = detection_probs > prob_thresh
    detection_bboxes = detection_bboxes[kept_indices]
    detection_classes = detection_classes[kept_indices]
    detection_probs = detection_probs[kept_indices]

    draw = ImageDraw.Draw(image)

    for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
        color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white'])
        bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
        category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

        draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color)
        draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color)

    image.save(path_to_output_image)
    print(f'Output image is saved to {path_to_output_image}')
Esempio n. 5
0
def detect_face(image, net, crop_coordinates=None, threshold=0.4):
    blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
                                 (300, 300), (104.0, 177.0, 123.0))
    net.setInput(blob)
    detected = net.forward()[0, 0, ...]

    conf = detected[:, 2]
    detected = detected[conf > threshold, :]
    detected[:, 3:] = np.clip(detected[:, 3:], a_min=0., a_max=1.)
    detected[:, (3, 5)] *= image.shape[1]
    detected[:, (4, 6)] *= image.shape[0]
    if crop_coordinates is not None:
        detected[:, (3, 5)] += crop_coordinates[0]
        detected[:, (4, 6)] += crop_coordinates[1]

    faces = []
    for f in detected:
        coor = f[3:].astype(int)
        if coor[0] >= coor[2] or coor[1] >= coor[3]:
            continue
        faces.append(
            AnnotationInstance(bbox=BBox(xmin=coor[0],
                                         ymin=coor[1],
                                         xmax=coor[2],
                                         ymax=coor[3],
                                         label='face',
                                         score=f[2],
                                         coordinate_mode='absolute')))

    return faces
Esempio n. 6
0
 def bbox(self):
     """
     returns the bounding box of the line
     """
     bbox = BBox()
     for pt in self.pts:
         bbox.update(pt)
     return bbox
Esempio n. 7
0
    def to_box(self) -> BBox:
        x0 = (self.x - self.w * 0.5)
        x1 = (self.x + self.w * 0.5)
        y0 = (self.y - self.h * 0.5)
        y1 = (self.y + self.h * 0.5)

        box = BBox(x0, y0, x1, y1)

        return box
Esempio n. 8
0
    def load_dataList(self, nameList, showThumbnail=True, progressBar=True):
        self.dataList.clear()
        if progressBar:
            progress = QProgressDialog("Loading data...", "Abort", \
                0, len(nameList), self.window)
            progress.setWindowModality(Qt.WindowModal)
        for i, dataName in enumerate(nameList):
            newItem = QtWidgets.QListWidgetItem(dataName)
            # Mark finished data
            if self.in_task and (dataName in self.current_task.finished_data):
                newItem.setBackground(QBrush(QColor("#b3b3b3")))

            if showThumbnail:
                # boring img down sizing and img format converting
                img = Image.open(self.current_data_dir + IMG_FOLDER \
                    + '/' + dataName + '.' + IMG_EXT)
                w, h = img.size
                img = img.resize((128, int(128 * h / w)))
                img = img.convert("RGBA")
                qimg = QImage(img.tobytes('raw', 'RGBA'), img.size[0], \
                    img.size[1], QImage.Format_RGBA8888)
                thumbnail = QIcon()
                thumbnail.addPixmap(QtGui.QPixmap.fromImage(qimg))
                newItem.setIcon(thumbnail)

            # pre load all the labels
            label_dir = self.current_data_dir + LEBEL_FOLDER \
                + '/' + dataName + '.txt'
            if os.path.exists(label_dir):
                with open(label_dir, 'r') as label_file:
                    bboxs = []
                    for line in label_file:
                        bbox_l = line.split()
                        class_num = int(bbox_l[0])
                        centerX = int(float(bbox_l[1]) * w)
                        centerY = int(float(bbox_l[2]) * h)
                        width = int(float(bbox_l[3]) * w)
                        height = int(float(bbox_l[4]) * h)
                        new_bbox = BBox([centerX, centerY, width, height],\
                                [w, h], class_num)
                        bboxs.append(new_bbox)

                    label_table[dataName] = bboxs
            else:
                # self.error_msg("Cannot find label: " + \
                #     label_dir)
                # if the label do not exist, create an empty bbox list
                bboxs = []
                label_table[dataName] = bboxs

            self.dataList.addItem(newItem)
            if progressBar:
                progress.setValue(i)
                if progress.wasCanceled():
                    break
        if progressBar:
            progress.setValue(len(nameList))
def _infer_stream(path_to_input_stream_endpoint: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)

    if path_to_input_stream_endpoint.isdigit():
        path_to_input_stream_endpoint = int(path_to_input_stream_endpoint)
    video_capture = cv2.VideoCapture(path_to_input_stream_endpoint)

    with torch.no_grad():
        for sn in itertools.count(start=1):
            _, frame = video_capture.read()

            if sn % period_of_inference != 0:
                continue

            timestamp = time.time()

            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(image)
            image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

            detection_bboxes, detection_classes, detection_probs, _ = \
                model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
            detection_bboxes /= scale

            kept_indices = detection_probs > prob_thresh
            detection_bboxes = detection_bboxes[kept_indices]
            detection_classes = detection_classes[kept_indices]
            detection_probs = detection_probs[kept_indices]

            draw = ImageDraw.Draw(image)

            for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
                color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white'])
                bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
                category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color)
                draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color)

            image = np.array(image)
            frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            elapse = time.time() - timestamp
            fps = 1 / elapse
            cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

            cv2.imshow('easy-faster-rcnn.pytorch', frame)
            if cv2.waitKey(10) == 27:
                break

    video_capture.release()
    cv2.destroyAllWindows()
Esempio n. 10
0
def _infer(path_to_input_dir: str, path_to_output_dir: str,
           path_to_checkpoint: str, dataset_name: str, backbone_name: str,
           prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone,
                  dataset_class.num_classes(),
                  pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS,
                  anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                  rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)
    images = glob.glob(path_to_input_dir + '/*.jpg')

    with torch.no_grad():
        for image in tqdm(images):
            name = image.split("/")[-1]

            image = transforms.Image.open(image).convert("RGB")
            image_tensor, scale = dataset_class.preprocess(
                image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

            detection_bboxes, detection_classes, detection_probs, _ = \
                model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
            detection_bboxes /= scale

            kept_indices = detection_probs > prob_thresh
            detection_bboxes = detection_bboxes[kept_indices]
            detection_classes = detection_classes[kept_indices]
            detection_probs = detection_probs[kept_indices]

            draw = ImageDraw.Draw(image)

            for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                       detection_classes.tolist(),
                                       detection_probs.tolist()):
                color = random.choice(
                    ['red', 'green', 'blue', 'yellow', 'purple', 'white'])
                bbox = BBox(left=bbox[0],
                            top=bbox[1],
                            right=bbox[2],
                            bottom=bbox[3])
                category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                draw.rectangle(
                    ((bbox.left, bbox.top), (bbox.right, bbox.bottom)),
                    outline=color)
                draw.text((bbox.left, bbox.top),
                          text=f'{category:s} {prob:.3f}',
                          fill=color)

            image.save(path_to_output_dir + name)
        print(f'Output image is saved to {path_to_output_dir}')
Esempio n. 11
0
def write_label(img_dir, img_id, label, bboxes):
    global _min_x_w, _min_y_h, _max_x_w, _max_y_h
    global _max_w, _max_h, _max_w_h, _max_h_w
    img = None
    for f in glob.glob(os.path.join(img_dir, img_id + '.*')):
        if f.endswith('.txt'):
            continue
        if img:
            raise FileExistsError('{} and\n{}'.format(img, f))
        img = f
    if img:
        img = cv2.imread(f)
    if img is None:
        raise FileNotFoundError(os.path.join(img_dir, img_id))

    h, w = img.shape[:2]
    if _max_w < w:
        _max_w = w
        print('New max width: {}'.format(w))
    if _max_h < h:
        _max_h = h
        print('New max height: {}'.format(h))
    if h < w:
        w_h = w / h
        if _max_w_h < w_h:
            _max_w_h = w_h
            print('New width / height: {}'.format(w_h))
    else:
        h_w = h / w
        if _max_h_w < h_w:
            _max_h_w = h_w
            print('New height / width: {}'.format(h_w))

    with open(
            find_replace(os.path.join(img_dir, img_id + '.txt.' + label),
                         'images', 'labels'), 'w') as flabel:
        for bbox_tuple in bboxes:
            bbox = BBox(hw=(h, w), type_=BBox.VOC, bbox=bbox_tuple)
            xmin, ymin, xmax, ymax = bbox.get(type_=BBox.OPEN_IMAGES)
            if _min_x_w > xmin:
                _min_x_w = xmin
                print('New min xmin / (width-1): {}'.format(_min_x_w))
            if _min_y_h > ymin:
                _min_y_h = ymin
                print('New min ymin / (height-1): {}'.format(_min_y_h))
            if _max_x_w < xmax:
                _max_x_w = xmax
                print('New max xmax / (width-1): {}'.format(_max_x_w))
            if _max_y_h < ymax:
                _max_y_h = ymax
                print('New max ymax / (height-1): {}'.format(_max_y_h))
            flabel.write('{:1.15f} {:1.15f} {:1.15f} {:1.15f}\n'.format(
                *bbox.get(type_=BBox.YOLO)))
Esempio n. 12
0
    def __init__(self, path_to_data_dir: str, mode: Mode):
        super().__init__()

        self._mode = mode

        path_to_voc2007_dir = os.path.join(path_to_data_dir, 'VOCdevkit',
                                           'VOC2007')
        path_to_imagesets_main_dir = os.path.join(path_to_voc2007_dir,
                                                  'ImageSets', 'Main')
        path_to_annotations_dir = os.path.join(path_to_voc2007_dir,
                                               'Annotations')
        self._path_to_jpeg_images_dir = os.path.join(path_to_voc2007_dir,
                                                     'JPEGImages')

        if self._mode == Dataset.Mode.TRAIN:
            path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir,
                                                 'trainval.txt')
        elif self._mode == Dataset.Mode.TEST:
            path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir,
                                                 'test.txt')
        else:
            raise ValueError('invalid mode')

        with open(path_to_image_ids_txt, 'r') as f:
            lines = f.readlines()
            self._image_ids = [line.rstrip() for line in lines]

        self._image_id_to_annotation_dict = {}
        for image_id in self._image_ids:
            path_to_annotation_xml = os.path.join(path_to_annotations_dir,
                                                  f'{image_id}.xml')
            tree = ET.ElementTree(file=path_to_annotation_xml)
            root = tree.getroot()

            self._image_id_to_annotation_dict[image_id] = Dataset.Annotation(
                filename=next(root.iterfind('filename')).text,
                objects=[
                    Dataset.Annotation.Object(
                        name=next(tag_object.iterfind('name')).text,
                        difficult=next(
                            tag_object.iterfind('difficult')).text == '1',
                        bbox=BBox(
                            left=float(
                                next(tag_object.iterfind('bndbox/xmin')).text),
                            top=float(
                                next(tag_object.iterfind('bndbox/ymin')).text),
                            right=float(
                                next(tag_object.iterfind('bndbox/xmax')).text),
                            bottom=float(
                                next(
                                    tag_object.iterfind('bndbox/ymax')).text)))
                    for tag_object in root.iterfind('object')
                ])
    def __init__(self, path_to_data_dir: str, mode: Base.Mode, image_min_side: float, image_max_side: float):
        super().__init__(path_to_data_dir, mode, image_min_side, image_max_side)

        path_to_voc2007_dir = os.path.join(self._path_to_data_dir, 'VOCdevkit', 'VOC2007')
        path_to_imagesets_main_dir = os.path.join(path_to_voc2007_dir, 'ImageSets', 'Main')
        path_to_annotations_dir = os.path.join(path_to_voc2007_dir, 'Annotations')
        self._path_to_jpeg_images_dir = os.path.join(path_to_voc2007_dir, 'JPEGImages')

        if self._mode == VOC2007Person.Mode.TRAIN:
            path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'trainval.txt')
        elif self._mode == VOC2007Person.Mode.EVAL:
            path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'test.txt')
        else:
            raise ValueError('invalid mode')

        with open(path_to_image_ids_txt, 'r') as f:
            lines = f.readlines()
            image_ids = [line.rstrip() for line in lines]

        self._image_id_to_annotation_dict = {}
        self._image_ratios = []

        for image_id in image_ids:
            path_to_annotation_xml = os.path.join(path_to_annotations_dir, f'{image_id}.xml')
            tree = ET.ElementTree(file=path_to_annotation_xml)
            root = tree.getroot()

            annotation = VOC2007Person.Annotation(
                filename=root.find('filename').text,
                objects=[VOC2007Person.Annotation.Object(
                    name=next(tag_object.iterfind('name')).text,
                    difficult=next(tag_object.iterfind('difficult')).text == '1',
                    bbox=BBox(  # convert to 0-based pixel index
                        left=float(next(tag_object.iterfind('bndbox/xmin')).text) - 1,
                        top=float(next(tag_object.iterfind('bndbox/ymin')).text) - 1,
                        right=float(next(tag_object.iterfind('bndbox/xmax')).text) - 1,
                        bottom=float(next(tag_object.iterfind('bndbox/ymax')).text) - 1
                    )
                ) for tag_object in root.iterfind('object')]
            )
            annotation.objects = [obj for obj in annotation.objects if obj.name in ['person'] and not obj.difficult]

            if len(annotation.objects) > 0:
                self._image_id_to_annotation_dict[image_id] = annotation

                width = int(root.find('size/width').text)
                height = int(root.find('size/height').text)
                ratio = float(width / height)
                self._image_ratios.append(ratio)

        self._image_ids = list(self._image_id_to_annotation_dict.keys())
Esempio n. 14
0
def main(args):
    global predefined_labels
    global _min_x_w, _min_y_h, _max_x_w, _max_y_h
    global _max_w, _max_h, _max_w_h, _max_h_w

    LabelNames.init(predefined_labels)

    print('Reading: {}'.format(args.test_list))
    with open(args.test_list, 'r') as ftest, open(args.output_file, 'w') as fo:
        fo.write('ImageId,PredictionString\n')
        for line in ftest:
            id_ = pathlib.PurePath(line).stem
            pred = ''
            summary = os.path.join(args.summary_dir, id_ + '.csv')
            if not os.path.exists(summary):
                print('***** CAUTION *****: no summary for {}'.format(id_))
            else:
                df = pd.read_csv(os.path.join(args.summary_dir, id_ + '.csv'),
                                 header=None)
                df.columns = [
                    'confidence', 'voc_xmin', 'voc_ymin', 'voc_xmax',
                    'voc_ymax', 'label'
                ]
                df.sort_values(by='confidence', ascending=False, inplace=True)
                df = df.head(5)
                bboxes = []
                for index, row in df.iterrows():
                    if 1 > len(bboxes) or float(row[0]) >= args.threshold:
                        bboxes.append(
                            BBox(type_=BBox.VOC,
                                 bbox=(tuple(map(float, row[1:5]))),
                                 label=row[5]))
                for b in bboxes:
                    bbox = b.get(type_=BBox.ILSVRC)
                    #bbox = b.get(type_=BBox.VOC)
                    if pred:
                        pred += ' '
                    # OC_synset_mapping.txt: The mapping between the 1000 synset id and their descriptions.
                    # For example, Line 1 says n01440764 tench, Tinca tinca means this is class 1, has a synset id of n01440764,
                    # and it contains the fish tench.
                    pred += '{} {} {} {} {}'.format(
                        1 + LabelNames.label_index(b.label),
                        #LabelNames.label_index(b.label),
                        math.floor(bbox[0]),
                        math.floor(bbox[1]),
                        math.ceil(bbox[2]),
                        math.ceil(bbox[3]))
            fo.write('{},{}\n'.format(id_, pred))
    async def handler(websocket, path):
        print('Connection established:', path)

        with torch.no_grad():
            while True:
                frame = await websocket.recv()
                frame = np.frombuffer(frame,
                                      dtype=np.uint8).reshape(480, 640, 3)

                image = Image.fromarray(frame)
                image_tensor, scale = dataset_class.preprocess(
                    image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

                detection_bboxes, detection_classes, detection_probs, _ = \
                    model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
                detection_bboxes /= scale

                kept_indices = detection_probs > prob_thresh
                detection_bboxes = detection_bboxes[kept_indices]
                detection_classes = detection_classes[kept_indices]
                detection_probs = detection_probs[kept_indices]

                message = []

                for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                           detection_classes.tolist(),
                                           detection_probs.tolist()):
                    bbox = BBox(left=bbox[0],
                                top=bbox[1],
                                right=bbox[2],
                                bottom=bbox[3])
                    category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                    message.append({
                        'left': int(bbox.left),
                        'top': int(bbox.top),
                        'right': int(bbox.right),
                        'bottom': int(bbox.bottom),
                        'category': category
                    })

                message = json.dumps(message)
                await websocket.send(message)
Esempio n. 16
0
    def blend_boxes(group, label, coordinate_mode):
        scores = np.array([i.score for i in group])
        xmins = np.array([i.xmin for i in group])
        ymins = np.array([i.ymin for i in group])
        xmaxs = np.array([i.xmax for i in group])
        ymaxs = np.array([i.ymax for i in group])

        xmin = np.sum(xmins * scores) / np.sum(scores)
        ymin = np.sum(ymins * scores) / np.sum(scores)
        xmax = np.sum(xmaxs * scores) / np.sum(scores)
        ymax = np.sum(ymaxs * scores) / np.sum(scores)
        score = scores.max()  # np.sum(scores * scores) / np.sum(scores)

        return AnnotationInstance(bbox=BBox(xmin=xmin,
                                            ymin=ymin,
                                            xmax=xmax,
                                            ymax=ymax,
                                            label=label,
                                            score=score,
                                            coordinate_mode=coordinate_mode))
def _infer(path_to_input_image: str, path_to_output_image: str,
           path_to_checkpoint: str, backbone_name: str):
    image = transforms.Image.open(path_to_input_image)
    image_tensor, scale = Dataset.preprocess(image)

    backbone = Interface.from_name(backbone_name)(pretrained=False)
    model = Model(backbone).cuda()
    model.load(path_to_checkpoint)

    forward_input = Model.ForwardInput.Eval(image_tensor.cuda())
    forward_output: Model.ForwardOutput.Eval = model.eval().forward(
        forward_input)

    detection_bboxes = forward_output.detection_bboxes / scale
    detection_labels = forward_output.detection_labels
    detection_probs = forward_output.detection_probs

    draw = ImageDraw.Draw(image)

    for bbox, label, prob in zip(detection_bboxes.tolist(),
                                 detection_labels.tolist(),
                                 detection_probs.tolist()):
        if prob < 0.6:
            continue

        color = random.choice(
            ['red', 'green', 'blue', 'yellow', 'purple', 'white'])
        bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
        category = Dataset.LABEL_TO_CATEGORY_DICT[label]

        draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)),
                       outline=color)
        draw.text((bbox.left, bbox.top),
                  text=f'{category:s} {prob:.3f}',
                  fill=color)

    image.save(path_to_output_image)
Esempio n. 18
0
 def bbox(self, min_area=0):
     """
     smart bounding box
     """
     bb = []
     bbox = BBox()
     if min_area == 0:
         bb.append(self.poly.boundingBox())
     else:
         areas = self.areas()
         max_a = max(areas)
         for i in range(len(self.poly)):
             if self.poly.isHole(i):
                 continue
             a = areas[i]
             if a < max_a * min_area:
                 continue
             bb.append(self.poly.boundingBox(i))
     for b in bb:
         bbox.update((b[0], b[2]))
         bbox.update((b[1], b[2]))
         bbox.update((b[0], b[3]))
         bbox.update((b[1], b[3]))
     return bbox
Esempio n. 19
0
 def mouseMoveEvent(self, event):
     super().mouseMoveEvent(event)
     x = event.scenePos().x()
     y = event.scenePos().y()
     # at the begaining of a click & drag: create a new bbox
     if self.mouseDown and (not self.targetCreated):
         # if started outside the img, don't create nothing
         if (0 > x) or (0 > y) or (x > self.dscene.backgroundSize[0]) \
             or (y > self.dscene.backgroundSize[1]):
             self.mouseDown = False
             return
         newBbox = BBox([x, y, 0, 0], self.dscene.backgroundSize,
                        self.dscene.last_cls)
         newBbox.drew_in_scene(self, self.dscene, -1)
         newBbox.br.mouseMoveEvent(event, \
             passed_by_scene=True)
         self.newBboxes.append(newBbox)
         self.targetCreated = True
     # a new bbox is already created for this click & drag action
     # pass the mouse event to the botton right ancker so a bbox
     # can be dragged out
     elif self.mouseDown:
         self.newBboxes[-1].br.mouseMoveEvent(event, \
             passed_by_scene=True)
def _infer(path_to_input_image: str, path_to_output_image: str,
           path_to_checkpoint: str, dataset_name: str, backbone_name: str,
           prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone,
                  dataset_class.num_classes(),
                  pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS,
                  anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                  rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)
    '''
    默认选项:
    pooler_mode=Config.POOLER_MODE= Pooler.Mode.ALIGN
    anchor_ratios=Config.ANCHOR_RATIOS= [(1, 2), (1, 1), (2, 1)]
    anchor_sizes=对于infer,这里默认增加了一个64,因此最后就是[64,128, 256, 512]

    用于Eval的RPN_NMS:
        RPN_PRE_NMS_TOP_N: int = 6000
        RPN_POST_NMS_TOP_N: int = 300

    '''

    with torch.no_grad():
        #预处理,使得输入图像至少一边满足min_side或max_side
        #yolo需要固定图像尺寸,这里并不需要.
        image = transforms.Image.open(path_to_input_image)
        image_tensor, scale = dataset_class.preprocess(image,
                                                       Config.IMAGE_MIN_SIDE,
                                                       Config.IMAGE_MAX_SIDE)

        #先增加一个批的维度,再以eval模式下执行forward.
        #(gd_n,4) (gd_n,) (gd_n,)
        detection_bboxes, detection_classes, detection_probs, _ = model.eval(
        ).forward(image_tensor.unsqueeze(dim=0).cuda())
        detection_bboxes /= scale  #原图像是经过乘scale的,因此这里对于detection_box要除scale。

        kept_indices = detection_probs > prob_thresh  #0.6
        detection_bboxes = detection_bboxes[kept_indices]  #(gd_thresh_n,4)
        detection_classes = detection_classes[kept_indices]  #(gd_thresh_n,)
        detection_probs = detection_probs[kept_indices]  #(gd_thresh_n,)

        draw = ImageDraw.Draw(image)

        for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                   detection_classes.tolist(),
                                   detection_probs.tolist()):
            color = random.choice(
                ['red', 'green', 'blue', 'yellow', 'purple', 'white'])
            bbox = BBox(left=bbox[0],
                        top=bbox[1],
                        right=bbox[2],
                        bottom=bbox[3])
            category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

            draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)),
                           outline=color)
            draw.text((bbox.left, bbox.top),
                      text=f'{category:s} {prob:.3f}',
                      fill=color)

        image.save(path_to_output_image)
        print(f'Output image is saved to {path_to_output_image}')
    def __init__(self, path_to_data_dir: str, mode: Base.Mode, image_min_side: float, image_max_side: float):
        super().__init__(path_to_data_dir, mode, image_min_side, image_max_side)

        path_to_coco_dir = os.path.join(self._path_to_data_dir, 'COCO')
        path_to_annotations_dir = os.path.join(path_to_coco_dir, 'annotations')
        path_to_caches_dir = os.path.join('caches', 'coco2017-person', f'{self._mode.value}')
        path_to_image_ids_pickle = os.path.join(path_to_caches_dir, 'image-ids.pkl')
        path_to_image_id_dict_pickle = os.path.join(path_to_caches_dir, 'image-id-dict.pkl')

        if self._mode == COCO2017Person.Mode.TRAIN:
            path_to_jpeg_images_dir = os.path.join(path_to_coco_dir, 'train2017')
            path_to_annotation = os.path.join(path_to_annotations_dir, 'instances_train2017.json')
        elif self._mode == COCO2017Person.Mode.EVAL:
            path_to_jpeg_images_dir = os.path.join(path_to_coco_dir, 'val2017')
            path_to_annotation = os.path.join(path_to_annotations_dir, 'instances_val2017.json')
        else:
            raise ValueError('invalid mode')

        coco_dataset = CocoDetection(root=path_to_jpeg_images_dir, annFile=path_to_annotation)

        if os.path.exists(path_to_image_ids_pickle) and os.path.exists(path_to_image_id_dict_pickle):
            print('loading cache files...')

            with open(path_to_image_ids_pickle, 'rb') as f:
                self._image_ids = pickle.load(f)

            with open(path_to_image_id_dict_pickle, 'rb') as f:
                self._image_id_to_annotation_dict = pickle.load(f)
        else:
            print('generating cache files...')

            os.makedirs(path_to_caches_dir, exist_ok=True)

            self._image_id_to_annotation_dict: Dict[str, COCO2017Person.Annotation] = {}
            for idx, (image, annotation) in enumerate(tqdm(coco_dataset)):
                if len(annotation) > 0:
                    image_id = str(annotation[0]['image_id'])  # all image_id in annotation are the same
                    annotation = COCO2017Person.Annotation(
                        filename=os.path.join(path_to_jpeg_images_dir, '{:012d}.jpg'.format(int(image_id))),
                        objects=[COCO2017Person.Annotation.Object(
                            bbox=BBox(  # `ann['bbox']` is in the format [left, top, width, height]
                                left=ann['bbox'][0],
                                top=ann['bbox'][1],
                                right=ann['bbox'][0] + ann['bbox'][2],
                                bottom=ann['bbox'][1] + ann['bbox'][3]
                            ),
                            label=ann['category_id'])
                            for ann in annotation]
                    )
                    annotation.objects = [obj for obj in annotation.objects
                                          if obj.label in [COCO2017.CATEGORY_TO_LABEL_DICT['person']]]  # filtering label should refer to original `COCO2017` dataset

                    if len(annotation.objects) > 0:
                        self._image_id_to_annotation_dict[image_id] = annotation

            self._image_ids = list(self._image_id_to_annotation_dict.keys())

            with open(path_to_image_ids_pickle, 'wb') as f:
                pickle.dump(self._image_ids, f)

            with open(path_to_image_id_dict_pickle, 'wb') as f:
                pickle.dump(self._image_id_to_annotation_dict, f)
    def __init__(self):
        self.i2c_dic=self.index2class()

        self.bboxes = []
        self.labels = []
        self.probs=[]
        self.image_ratios = []
        self.image_position = []
        self.widths = []
        self.heights = []

        self.data_dic = {}
        self.data_dic_real = {}

        self.data_size = {}
        self.data_format = {}
        self.path_to_data_dir = '/home/aiuser/'
        path_to_AVA_dir = os.path.join(self.path_to_data_dir, 'ava_v2.2', 'preproc', 'train_clips')
        self.path_to_videos = os.path.join(path_to_AVA_dir, 'clips')
        self.path_to_keyframe = os.path.join(path_to_AVA_dir, 'keyframes')
        #path_to_video_ids_txt = os.path.join(path_to_AVA_dir, 'trainval.txt')
        path_to_video_ids_txt = '/home/aiuser/ava_v2.2/result.txt'
        path_to_real_ids_txt = '/home/aiuser/ava_v2.2/preproc/train_clips/trainval.txt'
        # 得到每个视频的大小,通过读取第一张keyframe
        for frame in sorted(os.listdir(self.path_to_keyframe)):
            img = os.listdir(os.path.join(self.path_to_keyframe, frame))[0]
            img = cv2.imread(os.path.join(self.path_to_keyframe, frame, img))
            img_shape = img.shape
            self.data_size[frame] = (img_shape[0], img_shape[1])
        # 得到每个视频的格式
        for video in sorted(os.listdir(self.path_to_videos)):
            video_0 = os.listdir(os.path.join(self.path_to_videos, video))[0]
            self.data_format[video] = '.' + video_0.split('.')[1]
        # 读取文件,key是文件名(aa/0930)
        with open(path_to_video_ids_txt, 'r') as f:
            data = f.readlines()
            for line in data:
                content = line.split(',')
                key = content[0] + "/" + str(int(content[1]))
                img_h = int(self.data_size[content[0]][0])
                img_w = int(self.data_size[content[0]][1])
                if key not in self.data_dic:
                    self.data_dic[key] = [imshow_result.info(content[6],content[7].replace("\n", ""), BBox(  # convert to 0-based pixel index
                        left=float(content[2]) * img_w - 1,
                        top=float(content[3]) * img_h - 1,
                        right=float(content[4]) * img_w - 1,
                        bottom=float(content[5]) * img_h - 1), img_h, img_w, key)]
                else:
                    self.data_dic[key].append(imshow_result.info(content[6],content[7].replace("\n", ""), BBox(  # convert to 0-based pixel index
                        left=float(content[2]) * img_w - 1,
                        top=float(content[3]) * img_h - 1,
                        right=float(content[4]) * img_w - 1,
                        bottom=float(content[5]) * img_h - 1), img_h, img_w, key))
        with open(path_to_real_ids_txt, 'r') as f:
            data = f.readlines()
            for line in data:
                content = line.split(',')
                key = content[0] + "/" + str(int(content[1]))
                img_h = int(self.data_size[content[0]][0])
                img_w = int(self.data_size[content[0]][1])
                if key not in self.data_dic_real:
                    self.data_dic_real[key] = [imshow_result.info(content[6], content[7].replace("\n", ""),
                                                                BBox(  # convert to 0-based pixel index
                                                                left=float(content[2]) * img_w - 1,
                                                                top=float(content[3]) * img_h - 1,
                                                                right=float(content[4]) * img_w - 1,
                                                                bottom=float(content[5]) * img_h - 1), img_h,img_w, key)]
                else:
                    self.data_dic_real[key].append(imshow_result.info(content[6], content[7].replace("\n", ""),
                                                                BBox(  # convert to 0-based pixel index
                                                                    left=float(content[2]) * img_w - 1,
                                                                    top=float(content[3]) * img_h - 1,
                                                                    right=float(content[4]) * img_w - 1,
                                                                    bottom=float(content[5]) * img_h - 1), img_h,img_w, key))
            # print('data_dic:',self.data_dic)
        # 对字典中的数据进行整理,变成list的形式
        for key in self.data_dic:
            self.bboxes.append([item.bbox.tolist() for item in self.data_dic[key]])
            self.labels.append([item.img_class for item in self.data_dic[key]])
            self.probs.append([item.prob for item in self.data_dic[key]])
            width = int(self.data_dic[key][0].weight)
            self.widths.append(width)
            height = int(self.data_dic[key][0].height)
            self.heights.append(height)
            ratio = float(width / height)
            self.image_ratios.append(ratio)
            self.image_position.append(self.data_dic[key][0].img_position)