Ejemplo n.º 1
0
def _infer(path_to_input_image: str, path_to_output_image: str, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
    image = transforms.Image.open(path_to_input_image)
    dataset_class = DatasetBase.from_name(dataset_name)
    image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone, dataset_class.num_classes(), pooling_mode=Config.POOLING_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)

    forward_input = Model.ForwardInput.Eval(image_tensor.cuda())
    forward_output: Model.ForwardOutput.Eval = model.eval().forward(forward_input)

    detection_bboxes = forward_output.detection_bboxes / scale
    detection_classes = forward_output.detection_classes
    detection_probs = forward_output.detection_probs

    kept_indices = detection_probs > prob_thresh
    detection_bboxes = detection_bboxes[kept_indices]
    detection_classes = detection_classes[kept_indices]
    detection_probs = detection_probs[kept_indices]

    draw = ImageDraw.Draw(image)

    for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
        color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white'])
        bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
        category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

        draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color)
        draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color)

    image.save(path_to_output_image)
    print(f'Output image is saved to {path_to_output_image}')
Ejemplo n.º 2
0
def _eval(path_to_checkpoint: str, dataset_name: str, backbone_name: str,
          path_to_data_dir: str, path_to_results_dir: str):
    dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir,
                                                  DatasetBase.Mode.EVAL,
                                                  Config.IMAGE_MIN_SIDE,
                                                  Config.IMAGE_MAX_SIDE)
    evaluator = Evaluator(dataset, path_to_data_dir, path_to_results_dir)

    Log.i('Found {:d} samples'.format(len(dataset)))

    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone,
                  dataset.num_classes(),
                  pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS,
                  anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                  rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)

    Log.i('Start evaluating with 1 GPU (1 batch per GPU)')
    mean_ap, detail = evaluator.evaluate(model)
    Log.i('Done')

    Log.i('mean AP = {:.4f}'.format(mean_ap))
    Log.i('\n' + detail)
def _infer_stream(path_to_input_stream_endpoint: str, period_of_inference: int, path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)

    if path_to_input_stream_endpoint.isdigit():
        path_to_input_stream_endpoint = int(path_to_input_stream_endpoint)
    video_capture = cv2.VideoCapture(path_to_input_stream_endpoint)

    with torch.no_grad():
        for sn in itertools.count(start=1):
            _, frame = video_capture.read()

            if sn % period_of_inference != 0:
                continue

            timestamp = time.time()

            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(image)
            image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

            detection_bboxes, detection_classes, detection_probs, _ = \
                model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
            detection_bboxes /= scale

            kept_indices = detection_probs > prob_thresh
            detection_bboxes = detection_bboxes[kept_indices]
            detection_classes = detection_classes[kept_indices]
            detection_probs = detection_probs[kept_indices]

            draw = ImageDraw.Draw(image)

            for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
                color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white'])
                bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
                category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color)
                draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color)

            image = np.array(image)
            frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            elapse = time.time() - timestamp
            fps = 1 / elapse
            cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

            cv2.imshow('easy-faster-rcnn.pytorch', frame)
            if cv2.waitKey(10) == 27:
                break

    video_capture.release()
    cv2.destroyAllWindows()
Ejemplo n.º 4
0
def _infer(path_to_input_dir: str, path_to_output_dir: str,
           path_to_checkpoint: str, dataset_name: str, backbone_name: str,
           prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone,
                  dataset_class.num_classes(),
                  pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS,
                  anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                  rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)
    images = glob.glob(path_to_input_dir + '/*.jpg')

    with torch.no_grad():
        for image in tqdm(images):
            name = image.split("/")[-1]

            image = transforms.Image.open(image).convert("RGB")
            image_tensor, scale = dataset_class.preprocess(
                image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

            detection_bboxes, detection_classes, detection_probs, _ = \
                model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
            detection_bboxes /= scale

            kept_indices = detection_probs > prob_thresh
            detection_bboxes = detection_bboxes[kept_indices]
            detection_classes = detection_classes[kept_indices]
            detection_probs = detection_probs[kept_indices]

            draw = ImageDraw.Draw(image)

            for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                       detection_classes.tolist(),
                                       detection_probs.tolist()):
                color = random.choice(
                    ['red', 'green', 'blue', 'yellow', 'purple', 'white'])
                bbox = BBox(left=bbox[0],
                            top=bbox[1],
                            right=bbox[2],
                            bottom=bbox[3])
                category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                draw.rectangle(
                    ((bbox.left, bbox.top), (bbox.right, bbox.bottom)),
                    outline=color)
                draw.text((bbox.left, bbox.top),
                          text=f'{category:s} {prob:.3f}',
                          fill=color)

            image.save(path_to_output_dir + name)
        print(f'Output image is saved to {path_to_output_dir}')
Ejemplo n.º 5
0
def _infer_websocket(path_to_checkpoint: str, dataset_name: str, backbone_name: str, prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone, dataset_class.num_classes(), pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)

    async def handler(websocket, path):
        print('Connection established:', path)

        with torch.no_grad():
            while True:
                frame = await websocket.recv()
                frame = np.frombuffer(frame, dtype=np.uint8).reshape(480, 640, 3)

                image = Image.fromarray(frame)
                image_tensor, scale = dataset_class.preprocess(image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

                detection_bboxes, detection_classes, detection_probs, _ = \
                    model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
                detection_bboxes /= scale

                kept_indices = detection_probs > prob_thresh
                detection_bboxes = detection_bboxes[kept_indices]
                detection_classes = detection_classes[kept_indices]
                detection_probs = detection_probs[kept_indices]

                message = []

                for bbox, cls, prob in zip(detection_bboxes.tolist(), detection_classes.tolist(), detection_probs.tolist()):
                    bbox = BBox(left=bbox[0], top=bbox[1], right=bbox[2], bottom=bbox[3])
                    category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                    message.append({
                        'left': int(bbox.left),
                        'top': int(bbox.top),
                        'right': int(bbox.right),
                        'bottom': int(bbox.bottom),
                        'category': category
                    })

                message = json.dumps(message)
                await websocket.send(message)

    server = websockets.serve(handler, host='*', port=8765, max_size=2 ** 32, compression=None)
    asyncio.get_event_loop().run_until_complete(server)
    print('Service is ready. Please navigate to http://127.0.0.1:8000/')
    asyncio.get_event_loop().run_forever()
Ejemplo n.º 6
0
    def __init__(self, filename):
        """Initialize the class."""
        self.logger = logging.getLogger(__name__)
        self.logger.setLevel(logging.DEBUG)

        self.dataset_class = DatasetBase.from_name('voc2007')
        self.backbone = BackboneBase.from_name('resnet101')(pretrained=False)
        self.model = Model(self.backbone,
                           self.dataset_class.num_classes(),
                           pooler_mode=Pooler.Mode.ALIGN,
                           anchor_ratios=[(1, 2), (1, 1), (2, 1)],
                           anchor_sizes=[128, 256, 512],
                           rpn_pre_nms_top_n=6000,
                           rpn_post_nms_top_n=300).cpu()

        self.load(filename)
Ejemplo n.º 7
0
def evaluate(model, path_to_images_dir: str, path_to_annotation: str,
             iou_thres, conf_thres, nms_thres, img_size, batch_size,
             num_workers):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()

    # Get dataloader
    dataset = DatasetBase.from_name(
        'tiny-person'
    )('data/tiny_set/train',
      'data/tiny_set/erase_with_uncertain_dataset/annotations/corner/task/tiny_set_train_sw640_sh512_all.json',
      DatasetBase.Mode.TRAIN)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             collate_fn=dataset.collate_fn)

    Tensor = torch.cuda.FloatTensor if torch.cuda.is_available(
    ) else torch.FloatTensor

    labels = []
    sample_metrics = []  # List of tuples (TP, confs, pred)
    for imgs, targets in tqdm.tqdm(dataloader, desc="Detecting objects"):
        imgs = imgs.to(device)
        targets = targets.to(device)

        with torch.no_grad():
            outputs = model(imgs)
            outputs = non_max_suppression(outputs,
                                          conf_thres=conf_thres,
                                          nms_thres=nms_thres)

        sample_metrics += get_batch_statistics(outputs,
                                               targets,
                                               iou_threshold=iou_thres)

    # Concatenate sample statistics
    true_positives, pred_scores, pred_labels = [
        torch.cat(x, 0) for x in list(zip(*sample_metrics))
    ]
    precision, recall, AP, f1, ap_class = ap_per_class(true_positives,
                                                       pred_scores,
                                                       pred_labels, labels)

    return precision, recall, AP, f1, ap_class, dataset
Ejemplo n.º 8
0
def draw(path_to_input_image, dataset_name):
    image = transforms.Image.open(path_to_input_image)
    dataset_class = DatasetBase.from_name(dataset_name)
    image_tensor, scale = dataset_class.preprocess(image, 600.0, 1000.0)
    #annotation_path = 'data/sunprimitive/annotations/val.json'
    annotation_path = 'data/container/vertices/MVI_3015.MP4.json'
    with open(annotation_path) as f:
        annotations = json.load(f)
    gt = annotations[path_to_input_image.split('/')[-1]]
    gt_vertices = [obj['vertices'] for obj in gt]

    image = np.array(image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    colors = ['red', 'green', 'blue', 'yellow', 'purple', 'white']

    for vert in gt_vertices:

        quads = []
        quads.append(((int(vert[0][0]), int(vert[1][0])), (int(vert[0][1]), int(vert[1][1])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]), int(vert[1][0]))))
        quads.append(((int(vert[0][0]), int(vert[1][0])), (int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][1]), int(vert[1][1])), (int(vert[0][0]), int(vert[1][0]))))
        quads.append(((int(vert[0][0]), int(vert[1][0])), (int(vert[0][4]), int(vert[1][4])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]), int(vert[1][0]))))
        quads.append(((int(vert[0][1]), int(vert[1][1])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][1]), int(vert[1][1]))))
        quads.append(((int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][4]), int(vert[1][4]))))
        quads.append(((int(vert[0][2]), int(vert[1][2])), (int(vert[0][3]), int(vert[1][3])), (int(vert[0][7]), int(vert[1][7])), (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]), int(vert[1][2]))))

        for i in range(8):

            cv2.putText(image,str(i), (int(vert[0][i]), int(vert[1][i])), cv2.FONT_HERSHEY_SIMPLEX,1,(255,255,0),1)
        for i,quad in enumerate(quads):
            image = cv2.line(image, quad[0], quad[1], (255,255,0), 1) 
            image = cv2.line(image, quad[1], quad[2], (255,255,0), 1) 
            image = cv2.line(image, quad[2], quad[3], (255,255,0), 1) 
            image = cv2.line(image, quad[3], quad[0], (255,255,0), 1) 

    if len(gt_vertices) > 0:
        path_to_output_image = os.path.join('images/container_gt', path_to_input_image.split('/')[-1])
        cv2.imwrite(path_to_output_image, image)
Ejemplo n.º 9
0
    def __init__(self,
                 path_to_checkpoint,
                 dataset_name='obstacle',
                 backbone_name='resnet101',
                 prob_thresh=0.6):
        self.path_to_checkpoint = path_to_checkpoint
        self.dataset_name = dataset_name
        self.backbone_name = backbone_name
        self.prob_thresh = prob_thresh
        self.dataset_class = DatasetBase.from_name(dataset_name)
        self.backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
        # Set up model
        self.model = Model(
            self.backbone,
            self.dataset_class.num_classes(),
            pooler_mode=Config.POOLER_MODE,
            anchor_ratios=Config.ANCHOR_RATIOS,
            anchor_sizes=Config.ANCHOR_SIZES,
            rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
            rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()

        self.model.load(path_to_checkpoint)
        self.model.eval()  # Set in evaluation mode
Ejemplo n.º 10
0
def _infer_stream(path_to_input_stream_endpoint: str, path_to_output_dir: str,
                  period_of_inference: int, path_to_checkpoint: str,
                  dataset_name: str, backbone_name: str, prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone,
                  dataset_class.num_classes(),
                  pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS,
                  anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                  rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)

    if path_to_input_stream_endpoint.isdigit():
        path_to_input_stream_endpoint = int(path_to_input_stream_endpoint)
    video_capture = cv2.VideoCapture(path_to_input_stream_endpoint)

    with torch.no_grad():
        frame_num = 1
        for sn in itertools.count(start=1):
            _, frame = video_capture.read()

            if sn % period_of_inference != 0:
                continue

            timestamp = time.time()

            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(image)
            image_tensor, scale = dataset_class.preprocess(
                image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

            detection_bboxes, detection_classes, detection_probs, _ = \
                model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
            detection_bboxes /= scale

            kept_indices = detection_probs > prob_thresh
            detection_bboxes = detection_bboxes[kept_indices]
            detection_classes = detection_classes[kept_indices]
            detection_probs = detection_probs[kept_indices]

            # draw = ImageDraw.Draw(image)

            jsonData = OrderedDict()
            resultData = OrderedDict()
            detectionResultDataList = []

            image_name = path_to_input_stream_endpoint.split('/')[-1]

            jsonData["image_path"] = image_name
            jsonData["modules"] = "Faster_R-CNN_ResNet101"

            jsonData["cam_id"] = "0"
            jsonData["frame_num"] = frame_num

            for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                       detection_classes.tolist(),
                                       detection_probs.tolist()):
                color = 'yellow'
                # color = random.choice(['red', 'green', 'blue', 'yellow', 'purple', 'white'])
                bbox = BBox(left=bbox[0],
                            top=bbox[1],
                            right=bbox[2],
                            bottom=bbox[3])
                category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                detectionResultData = OrderedDict()
                detectionResultData["label"] = [{
                    'description': category,
                    'score': prob
                }]
                detectionResultData["position"] = {
                    'x': bbox.left,
                    'y': bbox.top,
                    'w': (bbox.right - bbox.left),
                    'h': (bbox.bottom - bbox.top)
                }
                detectionResultDataList.append(detectionResultData)

                # draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)), outline=color)
                # draw.text((bbox.left, bbox.top), text=f'{category:s} {prob:.3f}', fill=color)

            resultData["module_name"] = "Faster_R-CNN_ResNet101"
            resultData["detection_result"] = detectionResultDataList
            jsonData["results"] = [resultData]

            output_file_path = path_to_output_dir + "/" + datetime.now(
            ).strftime("%Y-%m-%d__%H:%M:%S.%f__") + image_name.split(
                '.')[0] + ".json"
            with open('{}'.format(output_file_path), 'w',
                      encoding="utf-8") as make_file:
                json.dump(jsonData, make_file, ensure_ascii=False, indent="\t")
                print(f'Saved JSON File : [NAME] {output_file_path}')
                frame_num += 1

            # image = np.array(image)
            # frame = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # elapse = time.time() - timestamp
            # fps = 1 / elapse
            # cv2.putText(frame, f'FPS = {fps:.1f}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)

            # cv2.imshow('easy-faster-rcnn.pytorch', frame)
            # if cv2.waitKey(10) == 27:
            #     break

    video_capture.release()
    cv2.destroyAllWindows()
Ejemplo n.º 11
0
def _infer(path_to_input_image: str, path_to_output_dir: str,
           path_to_checkpoint: str, dataset_name: str, backbone_name: str,
           prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone,
                  dataset_class.num_classes(),
                  pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS,
                  anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                  rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)

    with torch.no_grad():
        image = transforms.Image.open(path_to_input_image)
        image_tensor, scale = dataset_class.preprocess(image,
                                                       Config.IMAGE_MIN_SIDE,
                                                       Config.IMAGE_MAX_SIDE)

        detection_bboxes, detection_classes, detection_probs, _ = \
            model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
        detection_bboxes /= scale

        kept_indices = detection_probs > prob_thresh
        detection_bboxes = detection_bboxes[kept_indices]
        detection_classes = detection_classes[kept_indices]
        detection_probs = detection_probs[kept_indices]

        jsonData = OrderedDict()
        resultData = OrderedDict()
        detectionResultDataList = []

        image_name = path_to_input_image.split('/')[-1]
        frame_num = image_name.split('.')[0].split('_')[-1]

        jsonData["image_path"] = image_name
        jsonData["modules"] = "Faster_R-CNN_ResNet101"

        jsonData["cam_id"] = "0"
        jsonData["frame_num"] = frame_num

        for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                   detection_classes.tolist(),
                                   detection_probs.tolist()):
            bbox = BBox(left=bbox[0],
                        top=bbox[1],
                        right=bbox[2],
                        bottom=bbox[3])
            category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

            detectionResultData = OrderedDict()
            detectionResultData["label"] = [{
                'description': category,
                'score': prob
            }]
            detectionResultData["position"] = {
                'x': bbox.left,
                'y': bbox.top,
                'w': (bbox.right - bbox.left),
                'h': (bbox.bottom - bbox.top)
            }
            detectionResultDataList.append(detectionResultData)

        resultData["module_name"] = "Faster_R-CNN_ResNet101"
        resultData["detection_result"] = detectionResultDataList
        jsonData["results"] = [resultData]

        output_file_path = path_to_output_dir + "/" + datetime.now().strftime(
            "%Y-%m-%d__%H:%M:%S__") + image_name.split('.')[0] + ".json"
        with open('{}'.format(output_file_path), 'w',
                  encoding="utf-8") as make_file:
            json.dump(jsonData, make_file, ensure_ascii=False, indent="\t")
Ejemplo n.º 12
0
def _train(dataset_name: str, 
                backbone_name: str, 
                path_to_data_dir: str, 
                path_to_checkpoints_dir: str, 
                path_to_resuming_checkpoint: Optional[str]):
    dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.TRAIN, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)
    dataloader = DataLoader(dataset, 
                                            batch_size=Config.BATCH_SIZE,
                                            sampler=DatasetBase.NearestRatioRandomSampler(dataset.image_ratios, num_neighbors=Config.BATCH_SIZE),
                                            num_workers=8, 
                                            collate_fn=DatasetBase.padding_collate_fn, 
                                            pin_memory=True)

    Log.i('Found {:d} samples'.format(len(dataset)))

    backbone = BackboneBase.from_name(backbone_name)(pretrained=True)
    model = nn.DataParallel(
        Model(
                    backbone, dataset.num_classes(), 
                    pooler_mode=Config.POOLER_MODE,
                    anchor_ratios=Config.ANCHOR_RATIOS, 
                    anchor_sizes=Config.ANCHOR_SIZES,
                    rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, 
                    rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N,
                    anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, 
                    proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA
                ).cuda()
    )
    optimizer = optim.SGD(model.parameters(), 
                                        lr=Config.LEARNING_RATE,
                                        momentum=Config.MOMENTUM, 
                                        weight_decay=Config.WEIGHT_DECAY)
    scheduler = WarmUpMultiStepLR(optimizer, 
                                                    milestones=Config.STEP_LR_SIZES, 
                                                    gamma=Config.STEP_LR_GAMMA,
                                                    factor=Config.WARM_UP_FACTOR, 
                                                    num_iters=Config.WARM_UP_NUM_ITERS)

    step = 0
    time_checkpoint = time.time()
    losses = deque(maxlen=100)
    summary_writer = SummaryWriter(os.path.join(path_to_checkpoints_dir, 'summaries'))
    should_stop = False

    num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY
    num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT
    num_steps_to_finish = Config.NUM_STEPS_TO_FINISH

    if path_to_resuming_checkpoint is not None:
        step = model.module.load(path_to_resuming_checkpoint, optimizer, scheduler)
        Log.i(f'Model has been restored from file: {path_to_resuming_checkpoint}')

    device_count = torch.cuda.device_count()
    assert Config.BATCH_SIZE % device_count == 0, 'The batch size is not divisible by the device count'
    Log.i('Start training with {:d} GPUs ({:d} batches per GPU)'.format(torch.cuda.device_count(),
                                                                        Config.BATCH_SIZE // torch.cuda.device_count()))

    while not should_stop:
        for _, (_, image_batch, _, bboxes_batch, labels_batch) in enumerate(dataloader):
            batch_size = image_batch.shape[0]
            image_batch = image_batch.cuda()
            bboxes_batch = bboxes_batch.cuda()
            labels_batch = labels_batch.cuda()

            anchor_objectness_losses, anchor_transformer_losses, proposal_class_losses, proposal_transformer_losses = \
                model.train().forward(image_batch, bboxes_batch, labels_batch)
            anchor_objectness_loss = anchor_objectness_losses.mean()
            anchor_transformer_loss = anchor_transformer_losses.mean()
            proposal_class_loss = proposal_class_losses.mean()
            proposal_transformer_loss = proposal_transformer_losses.mean()
            loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            losses.append(loss.item())
            summary_writer.add_scalar('train/anchor_objectness_loss', anchor_objectness_loss.item(), step)
            summary_writer.add_scalar('train/anchor_transformer_loss', anchor_transformer_loss.item(), step)
            summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step)
            summary_writer.add_scalar('train/proposal_transformer_loss', proposal_transformer_loss.item(), step)
            summary_writer.add_scalar('train/loss', loss.item(), step)
            step += 1

            if step == num_steps_to_finish:
                should_stop = True

            if step % num_steps_to_display == 0:
                elapsed_time = time.time() - time_checkpoint
                time_checkpoint = time.time()
                steps_per_sec = num_steps_to_display / elapsed_time
                samples_per_sec = batch_size * steps_per_sec
                eta = (num_steps_to_finish - step) / steps_per_sec / 3600
                avg_loss = sum(losses) / len(losses)
                lr = scheduler.get_lr()[0]
                Log.i(f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:.8f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)')

            if step % num_steps_to_snapshot == 0 or should_stop:
                path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer, scheduler)
                Log.i(f'Model has been saved to {path_to_checkpoint}')

            if should_stop:
                break

    Log.i('Done')
Ejemplo n.º 13
0
                   args.model_def,
                   image_size=config.GLOBAL.IMAGE_SIZE[0])

    if torch.cuda.is_available():
        model = model.cuda()

    # If specified we start from checkpoint
    # if config.TRAIN.PRETRAINED_WEIGHTS is not None and config.TRAIN.PRETRAINED_WEIGHTS != '':
    #     if config.TRAIN.PRETRAINED_WEIGHTS.endswith(".pth"):
    #         backbone.load_state_dict(torch.load(config.TRAIN.PRETRAINED_WEIGHTS))
    #     else:
    #         backbone.load_darknet_weights(config.TRAIN.PRETRAINED_WEIGHTS)

    # Get dataloader
    dataset = DatasetBase.from_name('tiny-person')(
        config.TRAIN.PATH_TO_IMAGES_DIR, config.TRAIN.PATH_TO_ANNOTATIONS,
        DatasetBase.Mode.TRAIN)

    dataloader = DataLoader(dataset,
                            batch_size=config.TRAIN.BATCH_SIZE,
                            sampler=DatasetBase.NearestRatioRandomSampler(
                                dataset.image_ratios,
                                num_neighbors=config.TRAIN.BATCH_SIZE),
                            num_workers=config.TRAIN.NUM_WORKERS,
                            collate_fn=dataset.collate_fn,
                            pin_memory=True)
    optimizer = torch.optim.Adam(model.parameters())

    metrics = [
        "grid_size",
        "loss",
Ejemplo n.º 14
0
    def main():
        parser = argparse.ArgumentParser()
        parser.add_argument('--image_min_side',
                            type=float,
                            help='default: {:g}'.format(Config.IMAGE_MIN_SIDE))
        parser.add_argument('--image_max_side',
                            type=float,
                            help='default: {:g}'.format(Config.IMAGE_MAX_SIDE))
        parser.add_argument('--anchor_ratios',
                            type=str,
                            help='default: "{!s}"'.format(
                                Config.ANCHOR_RATIOS))
        parser.add_argument('--anchor_sizes',
                            type=str,
                            help='default: "{!s}"'.format(Config.ANCHOR_SIZES))
        parser.add_argument('--pooler_mode',
                            type=str,
                            choices=Pooler.OPTIONS,
                            help='default: {.value:s}'.format(
                                Config.POOLER_MODE))
        parser.add_argument('--rpn_pre_nms_top_n',
                            type=int,
                            help='default: {:d}'.format(
                                Config.RPN_PRE_NMS_TOP_N))
        parser.add_argument('--rpn_post_nms_top_n',
                            type=int,
                            help='default: {:d}'.format(
                                Config.RPN_POST_NMS_TOP_N))

        args = parser.parse_args()

        input_root = '/home/mmlab/CCTV_Server/models/detectors/FasterRCNN/frames'
        output_root = input_root + '_output'
        path_to_checkpoint = '/home/mmlab/CCTV_Server/models/detectors/FasterRCNN/checkpoints/obstacle/model-90000.pth'
        dataset_name = 'obstacle'
        backbone_name = 'resnet101'
        prob_thresh = 0.6
        Config.setup(image_min_side=args.image_min_side,
                     image_max_side=args.image_max_side,
                     anchor_ratios=args.anchor_ratios,
                     anchor_sizes=args.anchor_sizes,
                     pooler_mode=args.pooler_mode,
                     rpn_pre_nms_top_n=args.rpn_pre_nms_top_n,
                     rpn_post_nms_top_n=args.rpn_post_nms_top_n)

        print('Arguments:')
        for k, v in vars(args).items():
            print(f'\t{k} = {v}')
        print(Config.describe())

        os.makedirs(output_root, exist_ok=True)

        input_sub_dirnames = [
            directory for directory in os.listdir(input_root)
            if os.path.isdir(os.path.join(input_root, directory))
        ]
        dataset_class = DatasetBase.from_name(dataset_name)
        backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
        model = Model(backbone,
                      dataset_class.num_classes(),
                      pooler_mode=Config.POOLER_MODE,
                      anchor_ratios=Config.ANCHOR_RATIOS,
                      anchor_sizes=Config.ANCHOR_SIZES,
                      rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                      rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
        model.load(path_to_checkpoint)

        for sub_dir in input_sub_dirnames:
            input_sub_dirpath = os.path.join(input_root, sub_dir)
            output_sub_dirpath = os.path.join(output_root, sub_dir)

            filenames = [
                image_basename(f) for f in os.listdir(input_sub_dirpath)
                if is_image(f)
            ]
            for filename in filenames:
                path_to_input_image = image_path(input_sub_dirpath, filename,
                                                 '.jpg')
                # path_to_input_image = '/faster-RCNN/frames/1_360p/1_360p_0001.jpg'
                path_to_output_image = image_path(output_sub_dirpath, filename,
                                                  '.jpg')
                # path_to_output_image = '/faster-RCNN/frames_output/1_360p/1_360p_0001.jpg'

                os.makedirs(os.path.join(
                    os.path.curdir, os.path.dirname(path_to_output_image)),
                            exist_ok=True)

                with torch.no_grad():
                    image = transforms.Image.open(path_to_input_image)
                    image_tensor, scale = dataset_class.preprocess(
                        image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

                    detection_bboxes, detection_classes, detection_probs, _, _ = \
                        model.eval().forward(image_tensor.unsqueeze(dim=0).cuda())
                    detection_bboxes /= scale

                    kept_indices = detection_probs > prob_thresh
                    detection_bboxes = detection_bboxes[kept_indices]
                    detection_classes = detection_classes[kept_indices]
                    detection_probs = detection_probs[kept_indices]

                    draw = ImageDraw.Draw(image)

                    for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                               detection_classes.tolist(),
                                               detection_probs.tolist()):
                        color = random.choice([
                            'red', 'green', 'blue', 'yellow', 'purple', 'white'
                        ])
                        bbox = BBox(left=bbox[0],
                                    top=bbox[1],
                                    right=bbox[2],
                                    bottom=bbox[3])
                        category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

                        draw.rectangle(
                            ((bbox.left, bbox.top), (bbox.right, bbox.bottom)),
                            outline=color)
                        draw.text((bbox.left, bbox.top),
                                  text=f'{category:s} {prob:.3f}',
                                  fill=color)

                    image.save(path_to_output_image)
                    print(f'Output image is saved to {path_to_output_image}')
Ejemplo n.º 15
0
def _train(dataset_name: str, backbone_name: str, path_to_data_dir: str, path_to_checkpoints_dir: str, path_to_resuming_checkpoint: Optional[str]):
    dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.TRAIN, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=8, pin_memory=True)

    Log.i('Found {:d} samples'.format(len(dataset)))

    backbone = BackboneBase.from_name(backbone_name)(pretrained=True)
    model = Model(backbone, dataset.num_classes(), pooling_mode=Config.POOLING_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS, anchor_scales=Config.ANCHOR_SCALES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    optimizer = optim.SGD(model.parameters(), lr=Config.LEARNING_RATE,
                          momentum=Config.MOMENTUM, weight_decay=Config.WEIGHT_DECAY)
    scheduler = MultiStepLR(optimizer, milestones=Config.STEP_LR_SIZES, gamma=Config.STEP_LR_GAMMA)

    step = 0
    time_checkpoint = time.time()
    losses = deque(maxlen=100)
    summary_writer = SummaryWriter(os.path.join(path_to_checkpoints_dir, 'summaries'))
    should_stop = False

    num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY
    num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT
    num_steps_to_finish = Config.NUM_STEPS_TO_FINISH

    if path_to_resuming_checkpoint is not None:
        step = model.load(path_to_resuming_checkpoint, optimizer, scheduler)
        Log.i(f'Model has been restored from file: {path_to_resuming_checkpoint}')

    Log.i('Start training')

    while not should_stop:
        for batch_index, (_, image_batch, _, bboxes_batch, labels_batch) in enumerate(dataloader):
            assert image_batch.shape[0] == 1, 'only batch size of 1 is supported'

            image = image_batch[0].cuda()
            bboxes = bboxes_batch[0].cuda()
            labels = labels_batch[0].cuda()

            forward_input = Model.ForwardInput.Train(image, gt_classes=labels, gt_bboxes=bboxes)
            forward_output: Model.ForwardOutput.Train = model.train().forward(forward_input)

            anchor_objectness_loss, anchor_transformer_loss, proposal_class_loss, proposal_transformer_loss = forward_output
            loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()
            losses.append(loss.item())
            summary_writer.add_scalar('train/anchor_objectness_loss', anchor_objectness_loss.item(), step)
            summary_writer.add_scalar('train/anchor_transformer_loss', anchor_transformer_loss.item(), step)
            summary_writer.add_scalar('train/proposal_class_loss', proposal_class_loss.item(), step)
            summary_writer.add_scalar('train/proposal_transformer_loss', proposal_transformer_loss.item(), step)
            summary_writer.add_scalar('train/loss', loss.item(), step)
            step += 1

            if step == num_steps_to_finish:
                should_stop = True

            if step % num_steps_to_display == 0:
                elapsed_time = time.time() - time_checkpoint
                time_checkpoint = time.time()
                steps_per_sec = num_steps_to_display / elapsed_time
                samples_per_sec = dataloader.batch_size * steps_per_sec
                eta = (num_steps_to_finish - step) / steps_per_sec / 3600
                avg_loss = sum(losses) / len(losses)
                lr = scheduler.get_lr()[0]
                Log.i(f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr:.6f} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)')

            if step % num_steps_to_snapshot == 0 or should_stop:
                path_to_checkpoint = model.save(path_to_checkpoints_dir, step, optimizer, scheduler)
                Log.i(f'Model has been saved to {path_to_checkpoint}')

            if should_stop:
                break

    Log.i('Done')
Ejemplo n.º 16
0
def _infer(path_to_input_image: str, path_to_output_image: str,
           path_to_checkpoint: str, dataset_name: str, backbone_name: str,
           prob_thresh: float):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone,
                  dataset_class.num_classes(),
                  pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS,
                  anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                  rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).to(device)
    model.load(path_to_checkpoint)

    if os.path.isfile(path_to_input_image):
        files = [path_to_input_image]
    else:
        files = os.listdir(path_to_input_image)
        print('Running inference on folder:', path_to_input_image)

    with torch.no_grad():
        for file in tqdm(files):
            image = transforms.Image.open(
                os.path.join(path_to_input_image, file))
            image_tensor, scale = dataset_class.preprocess(
                image, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)

            detection_bboxes, detection_probs, detection_vertices, _ = \
                model.eval().forward(image_tensor.unsqueeze(dim=0).to(device))
            detection_bboxes /= scale
            detection_vertices /= scale

            kept_indices = detection_probs > prob_thresh
            detection_bboxes = detection_bboxes[kept_indices]
            detection_probs = detection_probs[kept_indices]
            detection_vertices = detection_vertices[kept_indices]

            draw = ImageDraw.Draw(image)

            for bbox, prob, vert in zip(detection_bboxes.tolist(),
                                        detection_probs.tolist(),
                                        detection_vertices.tolist()):
                color = random.choice(
                    ['red', 'green', 'blue', 'yellow', 'purple', 'white'])
                bbox = BBox(left=bbox[0],
                            top=bbox[1],
                            right=bbox[2],
                            bottom=bbox[3])
                category = "cuboid"

                draw.rectangle(
                    ((bbox.left, bbox.top), (bbox.right, bbox.bottom)),
                    outline=color)
                draw.text((bbox.left, bbox.top),
                          text=f'{category:s} {prob:.3f}',
                          fill=color)

                quads = []
                quads.append(
                    ((int(vert[0][0]), int(vert[1][0])),
                     (int(vert[0][1]), int(vert[1][1])), (int(vert[0][3]),
                                                          int(vert[1][3])),
                     (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]),
                                                          int(vert[1][0]))))
                quads.append(
                    ((int(vert[0][0]), int(vert[1][0])),
                     (int(vert[0][4]), int(vert[1][4])), (int(vert[0][5]),
                                                          int(vert[1][5])),
                     (int(vert[0][1]), int(vert[1][1])), (int(vert[0][0]),
                                                          int(vert[1][0]))))
                quads.append(
                    ((int(vert[0][0]), int(vert[1][0])),
                     (int(vert[0][4]), int(vert[1][4])), (int(vert[0][6]),
                                                          int(vert[1][6])),
                     (int(vert[0][2]), int(vert[1][2])), (int(vert[0][0]),
                                                          int(vert[1][0]))))
                quads.append(
                    ((int(vert[0][1]), int(vert[1][1])),
                     (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]),
                                                          int(vert[1][7])),
                     (int(vert[0][3]), int(vert[1][3])), (int(vert[0][1]),
                                                          int(vert[1][1]))))
                quads.append(
                    ((int(vert[0][4]), int(vert[1][4])),
                     (int(vert[0][5]), int(vert[1][5])), (int(vert[0][7]),
                                                          int(vert[1][7])),
                     (int(vert[0][6]), int(vert[1][6])), (int(vert[0][4]),
                                                          int(vert[1][4]))))
                quads.append(
                    ((int(vert[0][2]), int(vert[1][2])),
                     (int(vert[0][3]), int(vert[1][3])), (int(vert[0][7]),
                                                          int(vert[1][7])),
                     (int(vert[0][6]), int(vert[1][6])), (int(vert[0][2]),
                                                          int(vert[1][2]))))

                for quad in quads:
                    draw.line(quad, fill=color)

            output_path = os.path.join(path_to_output_image, file)
            image.save(output_path)

            if detection_probs.size()[0] > 0:
                max_index = torch.argmax(detection_probs)
                detection_vertices = detection_vertices[max_index]
            detection_vertices = detection_vertices.cpu().numpy()
            with open(os.path.join(path_to_output_image, file + '.npy'),
                      'wb') as f:
                np.save(f, detection_vertices)
Ejemplo n.º 17
0
def _train(dataset_name: str, backbone_name: str, path_to_data_dir: str,
           path_to_checkpoints_dir: str,
           path_to_resuming_checkpoint: Optional[str]):

    dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir,
                                                  DatasetBase.Mode.TRAIN,
                                                  Config.IMAGE_MIN_SIDE,
                                                  Config.IMAGE_MAX_SIDE)
    dataloader = DataLoader(dataset,
                            batch_size=Config.BATCH_SIZE,
                            sampler=DatasetBase.NearestRatioRandomSampler(
                                dataset.image_ratios,
                                num_neighbors=Config.BATCH_SIZE),
                            num_workers=0,
                            collate_fn=DatasetBase.padding_collate_fn,
                            pin_memory=True)

    #为便于调试,num_works置为0

    Log.i('Found {:d} samples'.format(len(dataset)))

    backbone = BackboneBase.from_name(backbone_name)(pretrained=True)

    model = nn.DataParallel(
        Model(backbone,
              dataset.num_classes(),
              pooler_mode=Config.POOLER_MODE,
              anchor_ratios=Config.ANCHOR_RATIOS,
              anchor_sizes=Config.ANCHOR_SIZES,
              rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
              rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N,
              anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA,
              proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA
              ).cuda())

    # 便于调试
    # model =  Model(
    #         backbone, dataset.num_classes(), pooler_mode=Config.POOLER_MODE,
    #         anchor_ratios=Config.ANCHOR_RATIOS, anchor_sizes=Config.ANCHOR_SIZES,
    #         rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N, rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N,
    #         anchor_smooth_l1_loss_beta=Config.ANCHOR_SMOOTH_L1_LOSS_BETA, proposal_smooth_l1_loss_beta=Config.PROPOSAL_SMOOTH_L1_LOSS_BETA
    #     ).cuda()
    ''' 训练用参数:

        IMAGE_MIN_SIDE: float = 600.0
        IMAGE_MAX_SIDE: float = 1000.0

        ANCHOR_RATIOS: List[Tuple[int, int]] = [(1, 2), (1, 1), (2, 1)]
        ANCHOR_SIZES: List[int] = [128, 256, 512]
        POOLER_MODE: Pooler.Mode = Pooler.Mode.ALIGN

        RPN_PRE_NMS_TOP_N: int = 12000
        RPN_POST_NMS_TOP_N: int = 2000

        ANCHOR_SMOOTH_L1_LOSS_BETA: float = 1.0
        PROPOSAL_SMOOTH_L1_LOSS_BETA: float = 1.0

        BATCH_SIZE: int = 1
        LEARNING_RATE: float = 0.001
        MOMENTUM: float = 0.9
        WEIGHT_DECAY: float = 0.0005
        STEP_LR_SIZES: List[int] = [50000, 70000]
        STEP_LR_GAMMA: float = 0.1
        WARM_UP_FACTOR: float = 0.3333
        WARM_UP_NUM_ITERS: int = 500

        NUM_STEPS_TO_DISPLAY: int = 20
        NUM_STEPS_TO_SNAPSHOT: int = 10000
        NUM_STEPS_TO_FINISH: int = 90000
    '''
    #动量的意义:
    #1.降低病态条件数带来的振荡
    #2.减少随机梯度带来的方差(权值的衰减也有这个用处)

    #优化算法的两种衰减:
    #1. 权值的衰减:表现为在总的损失函数后面再加上权值的L2范数
    #2.学习率的衰减:表现为通过学习率调节器以不同策略随着学习步增加,对学习率进行衰减调节

    #optimizer = optim.Adam(model.parameters())

    optimizer = optim.SGD(model.parameters(),
                          lr=Config.LEARNING_RATE,
                          momentum=Config.MOMENTUM,
                          weight_decay=Config.WEIGHT_DECAY)
    scheduler = WarmUpMultiStepLR(optimizer,
                                  milestones=Config.STEP_LR_SIZES,
                                  gamma=Config.STEP_LR_GAMMA,
                                  factor=Config.WARM_UP_FACTOR,
                                  num_iters=Config.WARM_UP_NUM_ITERS)

    step = 0
    time_checkpoint = time.time()
    losses = deque(maxlen=100)
    summary_writer = SummaryWriter(
        os.path.join(path_to_checkpoints_dir, 'summaries'))
    should_stop = False

    num_steps_to_display = Config.NUM_STEPS_TO_DISPLAY
    num_steps_to_snapshot = Config.NUM_STEPS_TO_SNAPSHOT
    num_steps_to_finish = Config.NUM_STEPS_TO_FINISH

    if path_to_resuming_checkpoint is not None:
        step = model.module.load(path_to_resuming_checkpoint, optimizer,
                                 scheduler)
        Log.i(
            f'Model has been restored from file: {path_to_resuming_checkpoint}'
        )

    device_count = torch.cuda.device_count()
    #BATCH_SIZE默认是1
    assert Config.BATCH_SIZE % device_count == 0, 'The batch size is not divisible by the device count'
    Log.i('Start training with {:d} GPUs ({:d} batches per GPU)'.format(
        torch.cuda.device_count(),
        Config.BATCH_SIZE // torch.cuda.device_count()))

    while not should_stop:
        for _, (_, image_batch, _, bboxes_batch,
                labels_batch) in enumerate(dataloader):
            #训练使用的数据集采用voc2007
            batch_size = image_batch.shape[0]  #(1,)
            image_batch = image_batch.cuda()  #(1,3,h,w)
            bboxes_batch = bboxes_batch.cuda()  #(1,gt_n,4)
            labels_batch = labels_batch.cuda()  #(1,gt_n)

            anchor_objectness_losses, anchor_transformer_losses, proposal_class_losses, proposal_transformer_losses = \
                model.train().forward(image_batch, bboxes_batch, labels_batch)
            #rpn的损失
            anchor_objectness_loss = anchor_objectness_losses.mean()
            anchor_transformer_loss = anchor_transformer_losses.mean()
            #detection的损失
            proposal_class_loss = proposal_class_losses.mean()
            proposal_transformer_loss = proposal_transformer_losses.mean()
            loss = anchor_objectness_loss + anchor_transformer_loss + proposal_class_loss + proposal_transformer_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            losses.append(loss.item())
            summary_writer.add_scalar('train/anchor_objectness_loss',
                                      anchor_objectness_loss.item(), step)
            summary_writer.add_scalar('train/anchor_transformer_loss',
                                      anchor_transformer_loss.item(), step)
            summary_writer.add_scalar('train/proposal_class_loss',
                                      proposal_class_loss.item(), step)
            summary_writer.add_scalar('train/proposal_transformer_loss',
                                      proposal_transformer_loss.item(), step)
            summary_writer.add_scalar('train/loss', loss.item(), step)
            step += 1

            if step == num_steps_to_finish:
                should_stop = True

            if step % num_steps_to_display == 0:
                elapsed_time = time.time() - time_checkpoint
                time_checkpoint = time.time()
                steps_per_sec = num_steps_to_display / elapsed_time
                samples_per_sec = batch_size * steps_per_sec
                eta = (num_steps_to_finish - step) / steps_per_sec / 3600
                avg_loss = sum(losses) / len(losses)
                lr = scheduler.get_lr()[0]
                #lr = optimizer.param_groups[0]['lr']
                Log.i(
                    f'[Step {step}] Avg. Loss = {avg_loss:.6f}, Learning Rate = {lr} ({samples_per_sec:.2f} samples/sec; ETA {eta:.1f} hrs)'
                )

            #test
            if step == 10:
                path_to_checkpoint = model.module.save(path_to_checkpoints_dir,
                                                       step, optimizer,
                                                       scheduler)
                #path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer)

                Log.i(f'Model has been saved to {path_to_checkpoint}')

            if step % num_steps_to_snapshot == 0 or should_stop:
                path_to_checkpoint = model.module.save(path_to_checkpoints_dir,
                                                       step, optimizer,
                                                       scheduler)
                #path_to_checkpoint = model.module.save(path_to_checkpoints_dir, step, optimizer)

                Log.i(f'Model has been saved to {path_to_checkpoint}')

            if should_stop:
                break

    Log.i('Done')
Ejemplo n.º 18
0
def _infer(path_to_input_image: str, path_to_output_image: str,
           path_to_checkpoint: str, dataset_name: str, backbone_name: str,
           prob_thresh: float):
    dataset_class = DatasetBase.from_name(dataset_name)
    backbone = BackboneBase.from_name(backbone_name)(pretrained=False)
    model = Model(backbone,
                  dataset_class.num_classes(),
                  pooler_mode=Config.POOLER_MODE,
                  anchor_ratios=Config.ANCHOR_RATIOS,
                  anchor_sizes=Config.ANCHOR_SIZES,
                  rpn_pre_nms_top_n=Config.RPN_PRE_NMS_TOP_N,
                  rpn_post_nms_top_n=Config.RPN_POST_NMS_TOP_N).cuda()
    model.load(path_to_checkpoint)
    '''
    默认选项:
    pooler_mode=Config.POOLER_MODE= Pooler.Mode.ALIGN
    anchor_ratios=Config.ANCHOR_RATIOS= [(1, 2), (1, 1), (2, 1)]
    anchor_sizes=对于infer,这里默认增加了一个64,因此最后就是[64,128, 256, 512]

    用于Eval的RPN_NMS:
        RPN_PRE_NMS_TOP_N: int = 6000
        RPN_POST_NMS_TOP_N: int = 300

    '''

    with torch.no_grad():
        #预处理,使得输入图像至少一边满足min_side或max_side
        #yolo需要固定图像尺寸,这里并不需要.
        image = transforms.Image.open(path_to_input_image)
        image_tensor, scale = dataset_class.preprocess(image,
                                                       Config.IMAGE_MIN_SIDE,
                                                       Config.IMAGE_MAX_SIDE)

        #先增加一个批的维度,再以eval模式下执行forward.
        #(gd_n,4) (gd_n,) (gd_n,)
        detection_bboxes, detection_classes, detection_probs, _ = model.eval(
        ).forward(image_tensor.unsqueeze(dim=0).cuda())
        detection_bboxes /= scale  #原图像是经过乘scale的,因此这里对于detection_box要除scale。

        kept_indices = detection_probs > prob_thresh  #0.6
        detection_bboxes = detection_bboxes[kept_indices]  #(gd_thresh_n,4)
        detection_classes = detection_classes[kept_indices]  #(gd_thresh_n,)
        detection_probs = detection_probs[kept_indices]  #(gd_thresh_n,)

        draw = ImageDraw.Draw(image)

        for bbox, cls, prob in zip(detection_bboxes.tolist(),
                                   detection_classes.tolist(),
                                   detection_probs.tolist()):
            color = random.choice(
                ['red', 'green', 'blue', 'yellow', 'purple', 'white'])
            bbox = BBox(left=bbox[0],
                        top=bbox[1],
                        right=bbox[2],
                        bottom=bbox[3])
            category = dataset_class.LABEL_TO_CATEGORY_DICT[cls]

            draw.rectangle(((bbox.left, bbox.top), (bbox.right, bbox.bottom)),
                           outline=color)
            draw.text((bbox.left, bbox.top),
                      text=f'{category:s} {prob:.3f}',
                      fill=color)

        image.save(path_to_output_image)
        print(f'Output image is saved to {path_to_output_image}')
Ejemplo n.º 19
0
def val(model, dataset_name, path_to_data_dir, device):
    dataset = DatasetBase.from_name(dataset_name)(path_to_data_dir, DatasetBase.Mode.EVAL, Config.IMAGE_MIN_SIDE, Config.IMAGE_MAX_SIDE)
    evaluator = Evaluator(dataset, path_to_data_dir)
    mean_ap, detail = evaluator.evaluate_pck(model.module, device)
    print('VALIDATION', detail, mean_ap)