Beispiel #1
0
def test(opt):
    model = SSD(backbone=ResNet())
    checkpoint = torch.load(opt.pretrained_model)
    model.load_state_dict(checkpoint["model_state_dict"])
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    dboxes = generate_dboxes()
    test_set = CocoDataset(opt.data_path, 2017, "val",
                           SSDTransformer(dboxes, (300, 300), val=True))
    encoder = Encoder(dboxes)

    if os.path.isdir(opt.output):
        shutil.rmtree(opt.output)
    os.makedirs(opt.output)

    for img, img_id, img_size, _, _ in test_set:
        if img is None:
            continue
        if torch.cuda.is_available():
            img = img.cuda()
        with torch.no_grad():
            ploc, plabel = model(img.unsqueeze(dim=0))
            result = encoder.decode_batch(ploc, plabel, opt.nms_threshold,
                                          20)[0]
            loc, label, prob = [r.cpu().numpy() for r in result]
            best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1)
            loc = loc[best]
            label = label[best]
            prob = prob[best]
            if len(loc) > 0:
                path = test_set.coco.loadImgs(img_id)[0]["file_name"]
                output_img = cv2.imread(
                    os.path.join(opt.data_path, "val2017", path))
                height, width, _ = output_img.shape
                loc[:, 0::2] *= width
                loc[:, 1::2] *= height
                loc = loc.astype(np.int32)
                for box, lb, pr in zip(loc, label, prob):
                    category = test_set.label_info[lb]
                    color = colors[lb]
                    xmin, ymin, xmax, ymax = box
                    cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax),
                                  color, 2)
                    text_size = cv2.getTextSize(category + " : %.2f" % pr,
                                                cv2.FONT_HERSHEY_PLAIN, 1,
                                                1)[0]
                    cv2.rectangle(
                        output_img, (xmin, ymin),
                        (xmin + text_size[0] + 3, ymin + text_size[1] + 4),
                        color, -1)
                    cv2.putText(output_img, category + " : %.2f" % pr,
                                (xmin, ymin + text_size[1] + 4),
                                cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)
                    cv2.imwrite(
                        "{}/{}_prediction.jpg".format(opt.output, path[:-4]),
                        output_img)
Beispiel #2
0
def test(opt):
    model = SSD(backbone=ResNet())
    checkpoint = torch.load(opt.pretrained_model)
    model.load_state_dict(checkpoint["model_state_dict"])
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    dboxes = generate_dboxes()
    transformer = SSDTransformer(dboxes, (300, 300), val=True)
    img = Image.open(opt.input).convert("RGB")
    img, _, _, _ = transformer(img, None, torch.zeros(1,4), torch.zeros(1))
    encoder = Encoder(dboxes)

    if torch.cuda.is_available():
        img = img.cuda()
    with torch.no_grad():
        ploc, plabel = model(img.unsqueeze(dim=0))
        result = encoder.decode_batch(ploc, plabel, opt.nms_threshold, 20)[0]
        loc, label, prob = [r.cpu().numpy() for r in result]
        best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1)
        loc = loc[best]
        label = label[best]
        prob = prob[best]
        output_img = cv2.imread(opt.input)
        if len(loc) > 0:
            height, width, _ = output_img.shape
            loc[:, 0::2] *= width
            loc[:, 1::2] *= height
            loc = loc.astype(np.int32)
            for box, lb, pr in zip(loc, label, prob):
                category = coco_classes[lb]
                color = colors[lb]
                xmin, ymin, xmax, ymax = box
                cv2.rectangle(output_img, (xmin, ymin), (xmax, ymax), color, 2)
                text_size = cv2.getTextSize(category + " : %.2f" % pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0]
                cv2.rectangle(output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color,
                              -1)
                cv2.putText(
                    output_img, category + " : %.2f" % pr,
                    (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1,
                    (255, 255, 255), 1)
        if opt.output is None:
            output = "{}_prediction.jpg".format(opt.input[:-4])
        else:
            output = opt.output
        cv2.imwrite(output, output_img)
Beispiel #3
0

def evaluate_test_dataset():
    test_params = {
        "batch_size": 4,
        "shuffle": True,
        "drop_last": False,
        "num_workers": 4,
        "collate_fn": collate_fn
    }
    test_set = OIDataset(transformer, test=True)
    test_loader = DataLoader(test_set, **test_params)
    evaluate(model, test_loader, encoder, 0.45)


if __name__ == "__main__":
    model = SSD()
    checkpoint = torch.load(model_path)
    model.load_state_dict(checkpoint["model_state_dict"])
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    dboxes = generate_dboxes()
    transformer = SimpleTransformer(dboxes, eval=True)
    encoder = Encoder(dboxes)

    # evaluate_test_dataset()

    for image in tqdm(os.listdir(input_folder)):
        draw_prediction_one(input_folder + image)
Beispiel #4
0
def test(opt):
    model = SSD(backbone=ResNet())
    checkpoint = torch.load(opt.pretrained_model)
    model.load_state_dict(checkpoint["model_state_dict"])
    if torch.cuda.is_available():
        model.cuda()
    model.eval()
    dboxes = generate_dboxes()
    transformer = SSDTransformer(dboxes, (300, 300), val=True)
    cap = cv2.VideoCapture(opt.input)
    if opt.output is None:
        output = "{}_prediction.mp4".format(opt.input[:-4])
    else:
        output = opt.output
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    out = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"MJPG"),
                          int(cap.get(cv2.CAP_PROP_FPS)), (width, height))
    encoder = Encoder(dboxes)
    while cap.isOpened():
        flag, frame = cap.read()
        output_frame = np.copy(frame)
        if flag:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        else:
            break
        frame = Image.fromarray(frame)
        frame, _, _, _ = transformer(frame, None, torch.zeros(1, 4),
                                     torch.zeros(1))

        if torch.cuda.is_available():
            frame = frame.cuda()
        with torch.no_grad():
            ploc, plabel = model(frame.unsqueeze(dim=0))
            result = encoder.decode_batch(ploc, plabel, opt.nms_threshold,
                                          20)[0]
            loc, label, prob = [r.cpu().numpy() for r in result]
            best = np.argwhere(prob > opt.cls_threshold).squeeze(axis=1)
            loc = loc[best]
            label = label[best]
            prob = prob[best]
            if len(loc) > 0:
                loc[:, 0::2] *= width
                loc[:, 1::2] *= height
                loc = loc.astype(np.int32)
                for box, lb, pr in zip(loc, label, prob):
                    category = coco_classes[lb]
                    color = colors[lb]
                    xmin, ymin, xmax, ymax = box
                    cv2.rectangle(output_frame, (xmin, ymin), (xmax, ymax),
                                  color, 2)
                    text_size = cv2.getTextSize(category + " : %.2f" % pr,
                                                cv2.FONT_HERSHEY_PLAIN, 1,
                                                1)[0]
                    cv2.rectangle(
                        output_frame, (xmin, ymin),
                        (xmin + text_size[0] + 3, ymin + text_size[1] + 4),
                        color, -1)
                    cv2.putText(output_frame, category + " : %.2f" % pr,
                                (xmin, ymin + text_size[1] + 4),
                                cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1)

        out.write(output_frame)
    cap.release()
    out.release()