Beispiel #1
0
    def _load_model(self, model_path):
        model = get_instance_segmentation_model(
            num_classes=2)  # bakground and foreground num_classes
        # Load the weights (assumes that infernece is run on CPU)
        model.load_state_dict(
            torch.load('maskrcnn_resnet_50.pt',
                       map_location=torch.device('cpu'))['model'],
            strict=True)

        return model
def predict(img):
    model = get_instance_segmentation_model(3)
    model.load_state_dict(torch.load("generator_14.pth",map_location=torch.device('cpu')))
    #model.load_state_dict(torch.load("saved_models_cat_dogs_horse/generator_9.pth"))
    model.to(device)
    model.eval()
    im=img/255
    im=torch.tensor(im.transpose(2, 0, 1),dtype=torch.float32)
    model.eval()
    with torch.no_grad():
        prediction = model([im.to(device)])
    return prediction
def main(args):
    # get device (GPU or CPU)
    if torch.cuda.is_available():
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.enabled = True
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    # fix seed for reproducibility
    torch.manual_seed(7777)

    # load config
    if args.config[-4:] != '.yaml': args.config += '.yaml'
    with open(args.config) as cfg_file:
        config = yaml.safe_load(cfg_file)
        print(config)

    # load dataset
    val_dataset = get_dataset(config, mode=args.mode)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             num_workers=config["num_workers"],
                                             batch_size=1,
                                             shuffle=False,
                                             collate_fn=collate_fn)
    print("... Get COCO Dataloader for evaluation")
    coco = get_coco_api_from_dataset(val_loader.dataset)

    ckp_paths = glob.glob(os.path.join(args.ckp_dir, "*.tar"))
    for ckp_idx, ckp_path in enumerate(ckp_paths):
        print("[CKP {} / {}]".format(ckp_idx, len(ckp_paths)), "-----" * 10)
        # load model
        model = get_instance_segmentation_model(num_classes=2)
        model.load_state_dict(torch.load(ckp_path))
        model.to(device)

        coco_evaluator = evaluate(coco, model, val_loader, device)

        if args.write_excel:
            os.makedirs(args.excel_save_dir, exist_ok=True)
            epoch = int(os.path.basename(ckp_path)[6:-4])
            coco_to_excel(
                coco_evaluator, epoch, args.excel_save_dir,
                "{}_{}".format(config["dataset"], config["label_type"]))
Beispiel #4
0
def main(args):
    # # get device (GPU or CPU)
    # if torch.cuda.is_available():
    #     os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    #     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    #     torch.backends.cudnn.benchmark = True
    #     torch.backends.cudnn.enabled = True
    #     device = torch.device("cuda")
    # else:
    #     device = torch.device("cpu")

    # fix seed for reproducibility
    torch.manual_seed(7777)

    # load config
    if args.config[-4:] != '.yaml': args.config += '.yaml'
    with open(args.config) as cfg_file:
        config = yaml.safe_load(cfg_file)
        print(config)

    # load dataset
    val_dataset = get_dataset(config, mode=args.mode)
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             num_workers=config["num_workers"],
                                             batch_size=1,
                                             shuffle=False,
                                             collate_fn=collate_fn)
    print("... Get COCO Dataloader for evaluation")
    coco = get_coco_api_from_dataset(val_loader.dataset)

    # load model
    model = get_instance_segmentation_model(num_classes=2)
    model.load_state_dict(torch.load(args.trained_ckp))
    model.to(device)

    coco_evaluator = evaluate(coco, model, val_loader, device)
Beispiel #5
0
def main(args):
    # get device (GPU or CPU)
    if torch.cuda.is_available():
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.enabled = True
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    # load model (MASKRCNN)
    print("... loading model")
    model = get_instance_segmentation_model(num_classes=2)
    model.to(device)
    model.load_state_dict(torch.load(args.ckp_path))
    model.eval()

    # load transform
    transform = T.Compose([
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    print("... loading", end=' ')
    cap = cv2.VideoCapture(args.input_dir)
    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    IMG_W, IMG_H = int(cap.get(3)), int(cap.get(4))
    print("{} images".format(num_frames))

    # visualization setting
    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir, exist_ok=True)
    thres = float(args.thres)

    # save as video
    fourcc = cv2.VideoWriter_fourcc(*'DIVX')
    out = cv2.VideoWriter(os.path.join(args.output_dir, "demo.mp4"), fourcc,
                          30.0, (IMG_W, IMG_H))

    # inference
    print("+++ Start inference !")
    for fr_idx in range(num_frames):
        print("... inference ({}/{})".format(fr_idx + 1, num_frames))
        # load and transform image
        ret, img_arr = cap.read()
        IMG_H, IMG_W, IMG_C = img_arr.shape
        img_data = Image.fromarray(img_arr).convert("RGB")
        img_tensor = transform(img_data)
        img_tensor = img_tensor.unsqueeze(0).to(device)

        # forward and post-process results
        pred_result = model(img_tensor, None)[0]
        pred_mask = pred_result['masks'].cpu().detach().numpy().transpose(
            0, 2, 3, 1)
        pred_mask[pred_mask >= 0.5] = 1
        pred_mask[pred_mask < 0.5] = 0
        pred_mask = np.repeat(pred_mask, 3, 3)
        pred_scores = pred_result['scores'].cpu().detach().numpy()
        pred_boxes = pred_result['boxes'].cpu().detach().numpy()
        # pred_labels = pred_result['labels']

        # draw predictions
        ids = np.where(pred_scores > thres)[0]
        colors = np.random.randint(0, 255, (len(ids), 3))
        # set colors considering location and size of bbox
        colors = []
        for (x1, y1, x2, y2) in pred_boxes:
            w = max(x1, x2) - min(x1, x2)
            h = max(y1, y2) - min(y1, y2)
            x = (x1 + x2) / 2
            y = (y1 + y2) / 2
            ratio_x, ratio_y = x / IMG_W, y / IMG_H
            ratio_s = min(w, h) / max(w, h)
            ratio_s = 1 + ratio_s if ratio_s < 0 else ratio_s
            ratio_x, ratio_y, ratio_s = int(ratio_x * 255), int(
                ratio_y * 255), int(ratio_s * 255)
            colors.append([ratio_x, ratio_y, ratio_s])

        for color_i, pred_i in enumerate(ids):
            color = tuple(map(int, colors[color_i]))
            # draw segmentation
            mask = pred_mask[pred_i]
            mask = mask * color
            img_arr = cv2.addWeighted(img_arr, 1, mask.astype(np.uint8), 0.5,
                                      0)
            # draw bbox and text
            x1, y1, x2, y2 = map(int, pred_boxes[pred_i])
            cv2.rectangle(img_arr, (x1, y1), (x2, y2), color, 2)
            # vis_text = "FOOD({:.2f})".format(pred_scores[pred_i])
            # cv2.putText(img_arr, vis_text, (x1+5, y1+15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, [255, 255, 255], 2)
            # cv2.putText(img_arr, vis_text, (x1+5, y1+15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
            # # save for debugging
            # cv2.imwrite("tmp_{}.png".format(color_i), img_arr)
        # save visualized image
        # img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGR2RGB)
        out.write(img_arr)

        save_name = os.path.join(args.output_dir, "{}.png".format(fr_idx))
        cv2.imwrite(save_name, img_arr)

    cap.release()
    out.release()
    cv2.destroyAllWindows()
Beispiel #6
0
def main(args):
    # get device (GPU or CPU)
    if torch.cuda.is_available():
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.enabled = True
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    # load model (MASKRCNN)
    print("... loading model")
    model = get_instance_segmentation_model(num_classes=2)
    model.to(device)
    model.load_state_dict(torch.load(args.ckp_path))
    model.eval()

    # load images and transform
    print("... loading", end=' ')
    img_list = sorted(os.listdir(args.input_dir))
    transform = T.Compose([
        T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    print("{} images".format(len(img_list)))

    # visualization setting
    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir, exist_ok=True)
    thres = float(args.thres)

    # inference
    print("+++ Start inference !")
    for i, img_name in enumerate(img_list):
        print("... inference ({}/{}) _ {}".format(i + 1, len(img_list),
                                                  img_name))
        # load and transform image
        img_file = os.path.join(args.input_dir, img_name)
        img_data = Image.open(img_file).convert("RGB")
        # img_tensor = img_data.resize(img_resize, Img.BICUBIC)
        img_tensor = transform(img_data)
        img_tensor = img_tensor.unsqueeze(0).to(device)
        img_arr = np.array(img_data).astype(np.uint8)

        # forward and post-process results
        pred_result = model(img_tensor, None)[0]
        pred_mask = pred_result['masks'].cpu().detach().numpy().transpose(
            0, 2, 3, 1)
        pred_mask[pred_mask >= 0.5] = 1
        pred_mask[pred_mask < 0.5] = 0
        pred_mask = np.repeat(pred_mask, 3, 3)
        pred_scores = pred_result['scores'].cpu().detach().numpy()
        pred_boxes = pred_result['boxes'].cpu().detach().numpy()
        # pred_labels = pred_result['labels']

        # draw predictions
        # print("[{} Scores]:".format(pred_scores.shape[0]), list(pred_scores))
        ids = np.where(pred_scores > thres)[0]
        colors = np.random.randint(0, 255, (len(ids), 3))
        for color_i, pred_i in enumerate(ids):
            color = tuple(map(int, colors[color_i]))
            # draw segmentation
            mask = pred_mask[pred_i]
            mask = mask * color
            img_arr = cv2.addWeighted(img_arr, 1, mask.astype(np.uint8), 0.5,
                                      0)
            # draw bbox and text
            x1, y1, x2, y2 = map(int, pred_boxes[pred_i])
            cv2.rectangle(img_arr, (x1, y1), (x2, y2), color, 2)
            vis_text = "FOOD({:.2f})".format(pred_scores[pred_i])
            cv2.putText(img_arr, vis_text, (x1 + 5, y1 + 15),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, [255, 255, 255], 2)
            cv2.putText(img_arr, vis_text, (x1 + 5, y1 + 15),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
            # # save for debugging
            # cv2.imwrite("tmp_{}.png".format(color_i), img_arr)
        # save visualized image
        img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGR2RGB)
        save_name = os.path.join(args.output_dir, img_name)
        cv2.imwrite(save_name, img_arr)
 def test_simple(self):
     net = models.get_instance_segmentation_model(num_classes=2,
                                                  pretrained=False)
     self.assertIsInstance(net,
                           torchvision.models.detection.mask_rcnn.MaskRCNN)
        default='../experiments/20200711_14-19-46/config.yml'
    )
    args = parser.parse_args()

    with open('logger_conf.yaml', 'r') as f:
        log_config: Dict[str, Any] = yaml.safe_load(f.read())
        logging.config.dictConfig(log_config)

    logger = getLogger(__name__)
    cfg_dict: Dict[str, Any] = utils.load_yaml(args.config_path)
    cfg: utils.DotDict = utils.DotDict(cfg_dict)

    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'

    model = get_instance_segmentation_model(
        cfg.num_classes, cfg.pretrained
    )
    model.load_state_dict(torch.load(args.weights_path, map_location=device))
    model.to(device)
    model.eval()

    scores: Dict[str, List[float]] = {
        'bbox_iou': [],
        'mask_iou': [],
        'mAP': []
    }
    image_paths, mask_paths = load_images_and_masks()
    image_paths, mask_paths = image_paths[-170:], mask_paths[-170:]
    total: int = len(image_paths)
    for img_path, mask_path in tqdm(
        zip(image_paths, mask_paths), total=total
def main(args):
    # get device (GPU or CPU)
    if torch.cuda.is_available():
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.enabled = True
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    # fix seed for reproducibility
    torch.manual_seed(7777)

    # load config
    if args.config[-4:] != '.yaml': args.config += '.yaml'
    with open(args.config) as cfg_file:
        config = yaml.safe_load(cfg_file)
        print(config)

    # load dataset
    train_dataset = get_dataset(config)
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        num_workers=config["num_workers"],
        batch_size=config["batch_size"],
        shuffle=True,
        collate_fn=collate_fn)
    val_dataset = get_dataset(config, mode="val")
    val_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                             num_workers=config["num_workers"],
                                             batch_size=1,
                                             shuffle=False,
                                             collate_fn=collate_fn)
    print("... Get COCO Dataloader for evaluation")
    coco = get_coco_api_from_dataset(val_loader.dataset)

    # load model
    model = get_instance_segmentation_model(num_classes=2)
    if args.resume:
        if args.resume_ckp:
            resume_ckp = args.resume_ckp
        elif "resume_ckp" in config:
            resume_ckp = config["resume_ckp"]
        else:
            raise ValueError(
                "Wrong resume setting, there's no trainied weight in config and args"
            )
        model.load_state_dict(torch.load(resume_ckp))
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.Adam(params,
                                 lr=config["lr"],
                                 weight_decay=config["wd"])
    lr_update = config["save_interval"] if "save_interval" in config else None

    # set training epoch
    start_epoch = args.resume_epoch if args.resume_epoch else 0
    if args.max_epoch:
        max_epoch = args.max_epoch
    else:
        max_epoch = config['max_epoch'] if "max_epoch" in config else 100
    assert start_epoch < max_epoch
    save_interval = config["save_interval"] if "save_interval" in config else 1

    # logging
    output_folder = config["save_dir"]
    os.makedirs(output_folder, exist_ok=True)

    print("+++ Start Training  @start:{} @max: {}".format(
        start_epoch, max_epoch))
    for epoch in range(start_epoch, max_epoch):
        # train
        train_one_epoch(epoch, model, train_loader, optimizer, device,
                        lr_update)
        # validate and write results
        coco_evaluator = evaluate(coco, model, val_loader, device)
        # save weight
        if epoch % save_interval == 0:
            torch.save(model.state_dict(),
                       '{}/epoch_{}.tar'.format(output_folder, epoch))
            if args.write_excel:
                coco_to_excel(
                    coco_evaluator, epoch, output_folder,
                    "{}_{}".format(config["dataset"], config["label_type"]))
Beispiel #10
0
ckp_path = "/data/joo/food/maskrcnn/210327_real_easy_finetuning/epoch_18.tar"
save_dir = "./tmp/Inference/OurReal_MediHard_FineTune"
# ckp_path = "/data/joo/food/maskrcnn/210326_UNIMIB_FineTuning/epoch_49.tar"
# save_dir = "./tmp/Inference/UNIMIB_Testset_FineTune"

# # # from-scratch
# ckp_path = "/data/joo/food/maskrcnn/210327_real_easy/epoch_22.tar"
# save_dir = "./tmp/Inference/OurReal_MediHard_FromScr"
# ckp_path = "/data/joo/food/maskrcnn/210326_UNIMIB_FronScratch/epoch_44.tar"
# save_dir = "./tmp/Inference/UNIMIB_Testset_FromScr"

os.makedirs(save_dir, exist_ok=True)

# load trained model
print("... loading model")
model = get_instance_segmentation_model(num_classes=2)
model.to(device)
model.load_state_dict(torch.load(ckp_path))
model.eval()

if cfg_name[-4:] != '.yaml': cfg_name += '.yaml'
with open(cfg_name) as cfg_file:
    config = yaml.safe_load(cfg_file)
    print(config)

dataset = get_dataset(config, mode=mode)
dataloader = torch.utils.data.DataLoader(dataset=dataset,
                                         num_workers=config["num_workers"],
                                         batch_size=1,
                                         shuffle=False)
unorm = UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
Beispiel #11
0
        '-i', '--img_path', type=str
    )
    args = parser.parse_args()

    log_config: Dict[str, Any] = utils.load_yaml('logger_conf.yaml')
    logging.config.dictConfig(log_config)
    logger = getLogger(__name__)

    cfg_dict: Dict[str, Any] = utils.load_yaml(args.config_path)
    cfg: utils.DotDict = utils.DotDict(cfg_dict)

    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Load model and weights.
    net = get_instance_segmentation_model(
        num_classes=cfg.num_classes, pretrained=False
    )
    net.load_state_dict(torch.load(args.weights_path, map_location=device))

    # Prepare input image.
    orig_img: Image.Image = Image.open(args.img_path).convert('RGB')
    albu_cfg: Dict[str, Any] = cfg.albumentations.eval.todict()
    transforms: Callable = albu.core.serialization.from_dict(albu_cfg)
    img = transforms(image=np.array(orig_img))['image']
    img = torch.as_tensor(img).permute(2, 0, 1)
    c, h, w = img.size()
    img = img.reshape(1, c, h, w)

    # Predictioon.
    net.eval()
    out = net(img)[0]
Beispiel #12
0
    dataset, batch_size=2, shuffle=True, num_workers=0,
    collate_fn=utils.collate_fn
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=2, shuffle=False, num_workers=0,
    collate_fn=utils.collate_fn
)

# %%
device = torch.device(
    'cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 2

model = models.get_instance_segmentation_model(num_classes)

model.to(device)


params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

# %%
num_epochs = 10
Beispiel #13
0
def main(args):
    # get device (GPU or CPU)
    if torch.cuda.is_available():
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
        torch.backends.cudnn.benchmark = True
        torch.backends.cudnn.enabled = True
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")

    # load model (MASKRCNN)
    print("... loading model")
    model = get_instance_segmentation_model(num_classes=2)
    model.to(device)
    model.load_state_dict(torch.load(args.ckp_path, map_location=torch.device('cpu')))
    model.eval()
    thres = float(args.thres)

    # load transform 
    transform = T.Compose([T.ToTensor(),
                           T.Normalize(
                            mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225]),
                        ])
    print("... loading", end=' ')

    # load camera
    cam_id = args.cam_id
    cam = cv2.VideoCapture(cam_id)
    assert cam.isOpened(), 'Cannot capture source'

    # inference 
    print("+++ Start inference !")
    while cam.isOpened():
        start_time = time.time()
        # load and transform image
        start_time = time.time()
        ret, img_arr = cam.read()
        IMG_H, IMG_W, IMG_C = img_arr.shape
        img_data = Image.fromarray(img_arr).convert("RGB")
        img_tensor = transform(img_data)
        img_tensor = img_tensor.unsqueeze(0).to(device)

        # forward and post-process results
        pred_result = model(img_tensor, None)[0]
        pred_mask = pred_result['masks'].cpu().detach().numpy().transpose(0, 2, 3, 1)
        pred_mask[pred_mask >= 0.5] = 1
        pred_mask[pred_mask < 0.5] = 0
        pred_mask = np.repeat(pred_mask, 3, 3)
        pred_scores = pred_result['scores'].cpu().detach().numpy()
        pred_boxes = pred_result['boxes'].cpu().detach().numpy()
        # pred_labels = pred_result['labels']

        # draw predictions
        ids = np.where(pred_scores > thres)[0]
        colors = np.random.randint(0, 255, (len(ids), 3))
        # set colors considering location and size of bbox 
        colors = []
        for (x1, y1, x2, y2) in pred_boxes: 
            w = max(x1, x2) - min(x1, x2)
            h = max(y1, y2) - min(y1, y2)
            x = (x1 + x2) / 2
            y = (y1 + y2) / 2
            ratio_x, ratio_y = x / IMG_W, y / IMG_H
            ratio_s = min(w, h) / max(w, h)
            ratio_s = 1 + ratio_s if ratio_s < 0 else ratio_s
            ratio_x, ratio_y, ratio_s = int(ratio_x*255), int(ratio_y*255), int(ratio_s*255)
            colors.append([ratio_x, ratio_y, ratio_s])

        for color_i, pred_i in enumerate(ids):
            color = tuple(map(int, colors[color_i]))
            # draw segmentation
            mask = pred_mask[pred_i] 
            mask = mask * color
            img_arr = cv2.addWeighted(img_arr, 1, mask.astype(np.uint8), 0.5, 0)
            # draw bbox 
            x1, y1, x2, y2 = map(int, pred_boxes[pred_i])
            cv2.rectangle(img_arr, (x1, y1), (x2, y2), color, 2)
            # put text
            vis_text = "FOOD({:.2f})".format(pred_scores[pred_i])
            cv2.putText(img_arr, vis_text, (x1+5, y1+15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, [255, 255, 255], 2)
            cv2.putText(img_arr, vis_text, (x1+5, y1+15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

        cv2.imshow('frame', img_arr)
        key = cv2.waitKey(1)
        if key == 27:
            break

        print("FPS is {:.2f} | Image Size:{}\t\t".format(
              1/(time.time()-start_time), img_arr.shape), end='\r')
    cam.release()
    cv2.destroyAllWindows()