Ejemplo n.º 1
0
def inicializar_segsem():

    print("Loading Semantic Segmentation Model:")
    start = time.time()
    global loader
    global device
    global model

    device = torch.device("cuda")

    model_name = "hardnet"
    data_loader = get_loader("ade20k")
    loader = data_loader(root=None,
                         is_transform=True,
                         img_norm=True,
                         test_mode=True)
    n_classes = loader.n_classes

    # Setup Model
    model_dict = {"arch": model_name}
    model = get_model(model_dict, n_classes, version="ade20k")
    state = convert_state_dict(
        torch.load(
            "/home/socialab/FCHarDNet/runs/config./cur/hardnet_ade20k_best_model.pkl",
        )["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)
    end = time.time()
    print("  (time): " + str(end - start))
Ejemplo n.º 2
0
def test(args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[:model_file_name.find("_")]

    # Setup image
    print("Read Input Image from : {}".format(args.img_path))
    img = misc.imread(args.img_path)

    data_loader = get_loader(args.dataset)
    loader = data_loader(root=None,
                         is_transform=True,
                         img_norm=args.img_norm,
                         test_mode=True)
    n_classes = loader.n_classes

    resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]),
                                interp="bicubic")

    orig_size = img.shape[:-1]
    if model_name in ["pspnet", "icnet", "icnetBN"]:
        # uint8 with RGB mode, resize width and height which are odd numbers
        img = misc.imresize(
            img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1))
    else:
        img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]))

    img = img[:, :, ::-1]
    img = img.astype(np.float64)
    img -= loader.mean
    if args.img_norm:
        img = img.astype(float) / 255.0

    # NHWC -> NCHW
    img = img.transpose(2, 0, 1)
    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img).float()

    # Setup Model
    model_dict = {"arch": model_name}
    model = get_model(model_dict, n_classes, version=args.dataset)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    images = img.to(device)
    outputs = model(images)

    pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
    if model_name in ["pspnet", "icnet", "icnetBN"]:
        pred = pred.astype(np.float32)
        # float32 with F mode, resize back to orig_size
        pred = misc.imresize(pred, orig_size, "nearest", mode="F")

    print("Classes found: ", np.unique(pred))
    misc.imsave(args.out_path, pred.astype('uint8'))
    print("Segmentation Mask Saved at: {}".format(args.out_path))
Ejemplo n.º 3
0
def train(args):
    device = "cpu"

    # Setup model
    model = get_model({"arch":"fcn8s"}, N_CLASSES, version="mit_sceneparsing_benchmark")
    state = convert_state_dict(torch.load(args.feature_model_path, map_location='cpu')["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    # Setup classifier
    classifier = Classifier()
    if args.classifier_model_path is not None:
        classifier.load_state_dict(torch.load(args.classifier_model_path, map_location='cpu'))

    classifier.to(device)
    optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=True)

    if args.train_csv_path is not None:
        print("Read training csv file from : {}".format(args.train_csv_path))
        train_data = read_samples(args.train_csv_path, args.batch_size)
        for i in range(args.num_epoch):
            for img, label in train_data:
                train_step(model, classifier, optimizer, img, label)
        torch.save(classifier.state_dict(), args.output_model_path)

    if args.test_csv_path is not None:
        classifier.eval()
        print("Read testing csv file from : {}".format(args.test_csv_path))
        test_data = read_samples(args.test_csv_path, 999)
        eval(model, classifier, test_data[0][0], test_data[0][1])
Ejemplo n.º 4
0
def validate(args):

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols))
    n_classes = loader.n_classes
    valloader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=4)
    running_metrics = runningScore(n_classes)

    # Setup Model
    model = get_model(args.model_path[:args.model_path.find('_')], n_classes)
    state = convert_state_dict(torch.load(args.model_path)['model_state'])
    model.load_state_dict(state)
    model.eval()

    for i, (images, labels) in tqdm(enumerate(valloader)):
        model.cuda()
        images = Variable(images.cuda(), volatile=True)
        labels = Variable(labels.cuda(), volatile=True)

        outputs = model(images)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.data.cpu().numpy()
        
        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
def test(args, cfg):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[:model_file_name.find("_")]

    IMG_Path = Path(args.img_path)
    IMG_File = natsort.natsorted(list(IMG_Path.glob("*.png")),
                                 alg=natsort.PATH)
    IMG_Str = []
    for i in IMG_File:
        IMG_Str.append(str(i))
    # Setup image
    print("Read Input Image from : {}".format(args.img_path))

    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset, config_file=cfg)
    loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm)
    n_classes = loader.n_classes

    # Setup Model
    model = get_model(cfg['model'], n_classes)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    # state=torch.load(args.model_path)["model_state"]
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    for j in tqdm(range(len(IMG_Str))):
        img_path = IMG_Str[j]
        img = misc.imread(img_path)
        # img = img[:, :, ::-1]
        img = img.astype(np.float64)
        # img -= loader.mean
        if args.img_norm:
            img = img.astype(float) / 255.0

        # NHWC -> NCHW
        img = img.transpose(2, 0, 1)
        img = np.expand_dims(img, 0)
        img = torch.from_numpy(img).float()
        images = img.to(device)
        outputs = model(images)
        outputs_probability = F.softmax(outputs)
        data = outputs_probability.data
        data_max = data.max(1)
        prob = data_max[0]
        prob_img_format = np.squeeze(prob.cpu().numpy(), axis=0)
        avg_prob = np.mean(prob_img_format)
        print("Confidence Score for %s: \n%f" % (img_path, avg_prob))
        pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)

        decoded = loader.decode_segmap(pred)
        out_path = "test_out/test_confidence/out/" + Path(img_path).name
        decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR)
        # misc.imsave(out_path, decoded)
        cv.imwrite(out_path, decoded_bgr)
Ejemplo n.º 6
0
def get_sem_mask(model_file_name):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #img_path = input('Image path: ')
    img_path = 'results/munich_000009_000019_leftImg8bit.png'

    if len(img_path):
        if img_path[-3:] == 'png' or img_path[-3:] == 'jpg':
            print("Read Input Image from : %s" % (img_path))
        else:
            raise Exception('Non PNG or JPG image!')

    else:
        img_path = 'results/munich_000009_000019_leftImg8bit.png'

    img = cv2.imread(img_path)

    img_orig = img

    model_name = model_file_name[:model_file_name.find("_")]

    data_loader = get_loader('cityscapes')
    loader = data_loader(root=None, is_transform=True, test_mode=True)
    n_classes = loader.n_classes

    img = image_preproc(img, loader.img_size)

    model_dict = {"arch": model_name}
    model = get_model(model_dict, n_classes, version='cityscapes')

    try:
        state = convert_state_dict(torch.load(model_file_name)["model_state"])
    except:
        state = convert_state_dict(
            torch.load(model_file_name, map_location='cpu')["model_state"])

    model.load_state_dict(state)
    model.eval()
    model.to(device)

    images = img.to(device)
    outputs = model(images)
    pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)

    return pred, img_orig
Ejemplo n.º 7
0
def test(args, cfg):

    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[:model_file_name.find("_")]

    IMG_Path = Path(args.img_path)
    IMG_File = natsort.natsorted(list(IMG_Path.glob("*.tif")),
                                 alg=natsort.PATH)
    IMG_Str = []
    for i in IMG_File:
        IMG_Str.append(str(i))
    # Setup image
    print("Read Input Image from : {}".format(args.img_path))

    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset, config_file=cfg)
    loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm)
    n_classes = loader.n_classes

    v_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg['data']['val_split'],
        img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
    )

    valloader = data.DataLoader(v_loader,
                                batch_size=cfg['training']['batch_size'],
                                num_workers=cfg['training']['n_workers'])

    # Setup Model
    model = get_model(cfg['model'], n_classes)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    # state=torch.load(args.model_path)["model_state"]
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    with torch.no_grad():
        for i_val, (img_path, images_val,
                    labels_val) in tqdm(enumerate(valloader)):
            img_name = img_path[0]
            images_val = images_val.to(device)
            outputs = model(images_val)

            pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
            decoded = loader.decode_segmap(pred)
            out_path = "test_out/CAN_res50_4band_data07/" + Path(
                img_name).stem + ".png"
            decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR)
            # misc.imsave(out_path, decoded)
            cv.imwrite(out_path, decoded_bgr)
Ejemplo n.º 8
0
def validate(cfg, args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Dataloader
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]

    loader = data_loader(
        data_path,
        split=cfg["data"]["val_split"],
        is_transform=True,
        img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]),
    )

    n_classes = loader.n_classes

    valloader = data.DataLoader(loader,
                                batch_size=cfg["training"]["batch_size"],
                                num_workers=8)
    running_metrics = runningScore(n_classes)

    # Setup Model

    model = get_model(cfg["model"], n_classes).to(device)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    for i, (images, labels) in enumerate(valloader):

        images = images.to(device)
        gt = labels.numpy()

        outputs = model(images).data.cpu().numpy()

        flipped_images = torch.flip(images, dims=(3, ))
        outputs_flipped = model(flipped_images)
        outputs_flipped = torch.flip(outputs_flipped,
                                     dims=(3, )).data.cpu().numpy()

        outputs = (outputs + outputs_flipped) / 2.0
        pred = np.argmax(outputs, axis=1)

        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
Ejemplo n.º 9
0
 def load_model(self, config, modelfile):
     model = get_model({
         'arch': config['backbone']
     }, config['classes']).to(self.device)
     if os.path.isfile(modelfile):
         print('loaded model from:', modelfile)
         state = convert_state_dict(torch.load(modelfile)["model_state"])
         model.load_state_dict(state)
     model = torch.nn.DataParallel(model,
                                   device_ids=range(
                                       torch.cuda.device_count()))
     self.dummy_input = None
     self.graph_exported = False
     return model
Ejemplo n.º 10
0
def test(args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[:model_file_name.find("_")]

    # Setup image
    print("Read Input Image from : {}".format(args.img_path))
    img = misc.imread(args.img_path)

    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm)
    n_classes = loader.n_classes

    img = img[:, :, ::-1]
    img = img.astype(np.float64)
    img -= loader.mean
    if args.img_norm:
        img = img.astype(float) / 255.0

    # NHWC -> NCHW
    img = img.transpose(2, 0, 1)
    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img).float()

    # Setup Model
    model_dict = {"arch": model_name}
    model = get_model(model_dict, n_classes, version=args.dataset)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    images = img.to(device)
    outputs = model(images)
    if args.mask_path:
        print("Read Image Mask from : {}".format(args.mask_path))
        mask = torch.load(args.mask_path)
        mask = mask.to(device)
        outputs = to_super_to_pixels(outputs, mask)
    pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)

    decoded = loader.decode_segmap(pred)
    print("Classes found: ", np.unique(pred))
    misc.imsave(args.out_path, decoded)
    print("Segmentation Mask Saved at: {}".format(args.out_path))
Ejemplo n.º 11
0
 def _load_model(self, cfg):
     self.device = torch.device(cfg['device'])
     data_loader = get_loader('vistas')
     self.loader = data_loader(root=cfg['testing']['config_path'],
                               is_transform=True,
                               test_mode=True)
     n_classes = self.loader.n_classes
     # Setup Model
     model_dict = {"arch": 'icnetBN'}
     model = get_model(model_dict, n_classes)
     state = convert_state_dict(
         torch.load(cfg['testing']['model_path'])["model_state"])
     model.load_state_dict(state)
     model.eval()
     model.to(self.device)
     return model
Ejemplo n.º 12
0
def infer(args):
    device = "cpu"

    # Setup image
    print("Read Input Image from : {}".format(args.img_path))
    img = misc.imread(args.img_path)
    orig_size = img.shape[:-1]

    img = misc.imresize(img, (240, 240))
    img = img[:, :, ::-1]
    img = img.astype(np.float64)
    img -= np.array([104.00699, 116.66877, 122.67892])
    img = img.astype(float) / 255.0

    # NHWC -> NCHW
    img = img.transpose(2, 0, 1)
    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img).float()

    # Setup model
    model = get_model({"arch": "fcn8s"},
                      N_CLASSES,
                      version="mit_sceneparsing_benchmark")
    state = convert_state_dict(
        torch.load(args.model_path, map_location='cpu')["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    # Setup classifier
    classifier = Classifier()
    classifier.eval()
    classifier.to(device)

    images = img.to(device)
    outputs = model(images)
    # outputs = F.avg_pool2d(outputs, 8) # Uncomment to see the real feature map being used.
    pred_raw = outputs.data.max(1)[1]
    pred = np.squeeze(pred_raw.cpu().numpy(), axis=0)

    turn_logit = classifier(pred_raw.type(torch.FloatTensor) / N_CLASSES)
    print(turn_logit.detach().cpu().numpy())

    decoded = decode_segmap(pred)
    print("Classes found: ", np.unique(pred))
    misc.imsave(args.out_path, decoded)
    print("Segmentation Mask Saved at: {}".format(args.out_path))
Ejemplo n.º 13
0
def init_model(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data_loader = get_loader("icboard")
    loader = data_loader(root=None,
                         is_transform=True,
                         img_size=eval(args.size),
                         test_mode=True)
    n_classes = loader.n_classes

    # Setup Model
    model = get_model({"arch": "hardnet"}, n_classes)
    state = convert_state_dict(
        torch.load(args.model_path, map_location=device)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    return device, model, loader
Ejemplo n.º 14
0
def test(cfg):
    device = torch.device(cfg['device'])
    data_loader = get_loader('vistas')
    loader = data_loader(root=cfg['testing']['config_path'],
                         is_transform=True,
                         test_mode=True)
    n_classes = loader.n_classes
    # Setup Model
    model_dict = {"arch": 'icnetBN'}
    model = get_model(model_dict, n_classes)
    state = convert_state_dict(
        torch.load(cfg['testing']['model_path'])["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    for img_name in os.listdir(cfg['testing']['img_fold']):
        img_path = os.path.join(cfg['testing']['img_fold'], img_name)
        img = misc.imread(img_path)
        orig_size = img.shape[:-1]

        # uint8 with RGB mode, resize width and height which are odd numbers
        # img = misc.imresize(img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1))
        img = misc.imresize(
            img, (cfg['testing']['img_rows'], cfg['testing']['img_cols']))
        img = img.astype(np.float64)
        img = img.astype(float) / 255.0
        # NHWC -> NCHW
        img = img.transpose(2, 0, 1)
        img = np.expand_dims(img, 0)
        img = torch.from_numpy(img).float()

        img = img.to(device)
        outputs = model(img)

        outputs = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
        outputs = outputs.astype(np.float32)
        # float32 with F mode, resize back to orig_size
        outputs = misc.imresize(outputs, orig_size, "nearest", mode="F")

        decoded = loader.decode_segmap(outputs)
        output_path = os.path.join(cfg['testing']['output_fold'],
                                   'mask_%s.png' % img_name.split('.')[0])
        misc.imsave(output_path, decoded)
Ejemplo n.º 15
0
def validate(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpus

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path,
                         split=args.split,
                         is_transform=True,
                         img_size=(args.img_rows, args.img_cols))
    n_classes = loader.n_classes
    valloader = data.DataLoader(loader,
                                batch_size=args.batch_size,
                                num_workers=4)
    running_metrics = runningScore(n_classes)

    # Setup Model
    model = get_model(args.arch, n_classes)
    checkpoint = torch.load(args.model_path)
    state = convert_state_dict(checkpoint['model_state'])
    model.load_state_dict(state)
    print("Loaded checkpoint '{}' (epoch {})".format(args.model_path,
                                                     checkpoint['epoch']))
    model.eval()

    for i, (images, labels) in tqdm(enumerate(valloader)):
        model.cuda()
        images = Variable(images.cuda(), volatile=True)
        labels = Variable(labels.cuda(), volatile=True)

        outputs = model(images)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.data.cpu().numpy()

        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, classes[i], class_iou[i])
    print('\t'.join([str(class_iou[i]) for i in range(n_classes)]))
Ejemplo n.º 16
0
def test(args, cfg):

    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    data_loader = get_loader(cfg['data']['dataset'])
    data_path = get_data_path(cfg['data']['dataset'], config_file=cfg)
    loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm)
    n_classes = loader.n_classes

    t_loader = data_loader(
        data_path,
        is_transform=True,
        split='test',
        img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']),
    )

    testloader = data.DataLoader(t_loader,
                                 batch_size=1,
                                 num_workers=cfg['training']['n_workers'])

    # Setup Model
    model = get_model(cfg['model'], n_classes)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    # state=torch.load(args.model_path)["model_state"]
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    with torch.no_grad():
        for i_val, (img_path, image_src,
                    image_dst) in tqdm(enumerate(testloader)):
            img_name = img_path[0]
            image_src = image_src.to(device)
            image_dst = image_dst.to(device)
            outputs = model(image_src, image_dst)

            pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
            decoded = loader.decode_segmap(pred)
            out_path = "test_out/changenet_change_det/" + Path(
                img_name).stem + ".png"
            decoded_bgr = cv.cvtColor(decoded, cv.COLOR_RGB2BGR)
            # misc.imsave(out_path, decoded)
            cv.imwrite(out_path, decoded_bgr)
Ejemplo n.º 17
0
def run():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    mean = np.array([104.00699, 116.66877, 122.67892])

    dataloader = DataLoader('E:/Autopilot/input/vc', 'E:/Autopilot/output/vc')
    model_path = "E:/Autopilot/pytorch-semseg-master/runs/39060/fcn8s_camvid_best_model.pkl"

    model_file_name = os.path.split(model_path)[1]
    model_name = model_file_name[:model_file_name.find("_")]

    model_dict = {"arch": model_name}
    model = get_model(model_dict, 2, version='camvid')
    state = convert_state_dict(torch.load(model_path)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    buffer = []
    for img0, _, _, _, frame in dataloader:
        if frame == 1:
            buffer = []
        # x = 520
        # y = 770
        x = 550
        y = 680
        crop = img0[y:y + 304, x:x + 1085]
        img = preproc_img(crop, mean)
        img = img.to(device)
        outputs = model(img)

        pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
        decoded = decode_segmap(pred)

        res = overlay_mask(crop, decoded)
        res, buffer = foo(crop, res, decoded, buffer, x_l=455, y_l=180)

        img0[y:y + 304, x:x + 1085] = res
        dataloader.save_results(img0)
        cv2.imshow('123', res)
        if cv2.waitKey(1) == ord('q'):
            dataloader.release()
            break
Ejemplo n.º 18
0
def load_model_and_preprocess(cfg, args, n_classes, device):
    if 'NoParamShare' in cfg['model']['arch']:
        args.steps = cfg['model']['steps']
    model = get_model(cfg['model'], n_classes, args).to(device)
    if os.path.exists(args.model_path):
        model_path = args.model_path
    else:
        model_path = pjoin(cfg['logdir'], cfg['training']['resume'])
    # print(model)
    state = convert_state_dict(
        torch.load(model_path,
                   map_location=lambda storage, loc: storage)["model_state"])

    # IPython.embed()

    model.load_state_dict(state)
    model.eval()
    model.to(device)

    return model, model_path
    def __init__(self):
        self.img_width, self.img_height = 640, 480
        print('Setting up CNN model...')
        # Set device
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else
                                   "cpu")  # GPU: device=cuda
        dataset = 'ade20k'
        model_name = 'pspnet'
        model_path = '/home/yubao/data/Dataset/semantic_slam/pspnet_50_ade20k.pth'

        if dataset == 'sunrgbd':  # If use version fine tuned on sunrgbd dataset
            self.n_classes = 38  # Semantic class number
            self.model = get_model(model_name,
                                   self.n_classes,
                                   version='sunrgbd_res50')
            state = torch.load(model_path, map_location='cuda:0')
            self.model.load_state_dict(state)
            self.cnn_input_size = (321, 321)
            self.mean = np.array([104.00699, 116.66877,
                                  122.67892])  # Mean value of dataset
        elif dataset == 'ade20k':
            self.n_classes = 150  # Semantic class number
            self.model = get_model(model_name,
                                   self.n_classes,
                                   version='ade20k')
            state = torch.load(model_path)
            self.model.load_state_dict(convert_state_dict(
                state['model_state']))  # Remove 'module' from dictionary keys
            self.cnn_input_size = (473, 473)
            self.mean = np.array([104.00699, 116.66877,
                                  122.67892])  # Mean value of dataset
        self.model = self.model.to(self.device)
        self.model.eval()
        self.cmap = color_map(
            N=self.n_classes,
            normalized=False)  # Color map for semantic classes
Ejemplo n.º 20
0
def train(cfg, writer, logger):
    # Setup dataset split before setting up the seed for random
    data_split_info = init_data_split(cfg['data']['path'], cfg['data'].get(
        'split_ratio', 0), cfg['data'].get('compound',
                                           False))  # fly jenelia dataset

    # Setup seeds
    torch.manual_seed(cfg.get('seed', 1337))
    torch.cuda.manual_seed(cfg.get('seed', 1337))
    np.random.seed(cfg.get('seed', 1337))
    random.seed(cfg.get('seed', 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Cross Entropy Weight
    if cfg['training']['loss']['name'] != 'regression_l1':
        weight = prep_class_val_weights(cfg['training']['cross_entropy_ratio'])
    else:
        weight = None
    log('Using loss : {}'.format(cfg['training']['loss']['name']))

    # Setup Augmentations
    augmentations = cfg['training'].get(
        'augmentations', None)  # if no augmentation => default None
    data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']
    patch_size = [para for axis, para in cfg['training']['patch_size'].items()]

    t_loader = data_loader(data_path,
                           split=cfg['data']['train_split'],
                           augmentations=data_aug,
                           data_split_info=data_split_info,
                           patch_size=patch_size,
                           allow_empty_patch=cfg['training'].get(
                               'allow_empty_patch', True),
                           n_classes=cfg['training'].get('n_classes', 1))

    # v_loader = data_loader(
    #     data_path,
    #     split=cfg['data']['val_split'],
    #     data_split_info=data_split_info,
    #     patch_size=patch_size,
    #     n_classe=cfg['training'].get('n_classes', 1))

    n_classes = t_loader.n_classes
    log('n_classes is: {}'.format(n_classes))
    trainloader = data.DataLoader(t_loader,
                                  batch_size=cfg['training']['batch_size'],
                                  num_workers=cfg['training']['n_workers'],
                                  shuffle=False)

    # valloader = data.DataLoader(v_loader,
    #                             batch_size=cfg['training']['batch_size'],
    #                             num_workers=cfg['training']['n_workers'])

    # Setup Metrics
    running_metrics_val = runningScore(
        n_classes)  # a confusion matrix is created

    # Setup Model
    model = get_model(cfg['model'], n_classes).to(device)

    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))
    # if cfg['training'].get('pretrained_model', None) is not None:
    #     log('Load pretrained model: {}'.format(cfg['training'].get('pretrained_model', None)))
    #     pretrainedModel = torch.load(cfg['training'].get('pretrained_model', None))
    #     my_dict = model.state_dict()
    #     x = my_dict.copy()
    #     pretrained_dict = pretrainedModel['model_state']
    #
    #     pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in my_dict}
    #     my_dict.update(pretrained_dict)
    #     y = my_dict.copy()
    #     shared_items = {k: x[k] for k in x if k in y and torch.equal(x[k], y[k])}
    #     if len(shared_items) == len(my_dict):
    #         exit(1)

    # Setup optimizer, lr_scheduler and loss function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {
        k: v
        for k, v in cfg['training']['optimizer'].items() if k != 'name'
    }

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    logger.info("Using optimizer {}".format(optimizer))

    scheduler = get_scheduler(optimizer, cfg['training']['lr_schedule'])

    loss_fn = get_loss_function(cfg)
    logger.info("Using loss {}".format(loss_fn))
    softmax_function = nn.Softmax(dim=1)

    # model_count = 0
    min_loss = None
    start_iter = 0
    if cfg['training']['resume'] is not None:
        log('resume saved model')
        if os.path.isfile(cfg['training']['resume']):
            display("Loading model and optimizer from checkpoint '{}'".format(
                cfg['training']['resume']))
            checkpoint = torch.load(cfg['training']['resume'])
            model.load_state_dict(checkpoint["model_state"])
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_iter = checkpoint["epoch"]
            min_loss = checkpoint["min_loss"]
            display("Loaded checkpoint '{}' (iter {})".format(
                cfg['training']['resume'], checkpoint["epoch"]))
        else:
            display("No checkpoint found at '{}'".format(
                cfg['training']['resume']))
            log('no saved model found')

    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    i_train_iter = start_iter

    display('Training from {}th iteration\n'.format(i_train_iter))
    while i_train_iter < cfg['training']['train_iters']:
        i_batch_idx = 0
        train_iter_start_time = time.time()
        averageLoss = 0

        # training
        for (images, labels) in trainloader:
            start_ts = time.time()
            scheduler.step()
            model.train()
            images = images.to(device)
            labels = labels.to(device)

            # mean = images[0]

            soft_loss = -1
            mediate_average_loss = -1
            optimizer.zero_grad()
            if cfg['model']['arch'] == 'unet3dreg' or cfg['model'][
                    'arch'] == 'unet3d':
                outputs = model(images)
            else:
                outputs, myconv1_copy, myconv3_copy, myup2_copy, myup1_copy = model(
                    images)
            if cfg['training'].get('task', 'regression') == 'regression':
                loss = nn.L1Loss()
                hard_loss = loss(outputs, labels)

            else:
                hard_loss = loss_fn(
                    input=outputs,
                    target=labels,
                    weight=weight,
                    size_average=cfg['training']['loss']['size_average'])

            if cfg['training'].get('fed_by_teacher', False):
                # Setup Teacher Model
                model_file_name = cfg['training'].get('pretrained_model', None)
                model_name = {
                    'arch': model_file_name.split('/')[-1].split('_')[0]
                }
                teacher_model = get_model(model_name, n_classes)
                pretrainedModel = torch.load(cfg['training'].get(
                    'pretrained_model', None))
                teacher_state = convert_state_dict(
                    pretrainedModel["model_state"]
                )  # maybe in this way it can take multiple images???
                teacher_model.load_state_dict(teacher_state)
                teacher_model.eval()
                teacher_model.to(device)
                outputs_teacher, conv1_copy, conv3_copy, up2_copy, up1_copy = teacher_model(
                    images)
                outputs_teacher = autograd.Variable(outputs_teacher,
                                                    requires_grad=False)
                conv1_copy = autograd.Variable(conv1_copy, requires_grad=False)
                conv3_copy = autograd.Variable(conv3_copy, requires_grad=False)
                up2_copy = autograd.Variable(up2_copy, requires_grad=False)
                up1_copy = autograd.Variable(up1_copy, requires_grad=False)
                soft_loss = loss(outputs, outputs_teacher)
                # loss_hard_soft = 0.8 * hard_loss + 0.1 * soft_loss
                loss_hard_soft = hard_loss + 0.1 * soft_loss
                if cfg['training'].get('fed_by_intermediate', False):
                    mediate1_loss = loss(myconv1_copy, conv1_copy)
                    mediate2_loss = loss(myconv3_copy, conv3_copy)
                    mediate3_loss = loss(myup2_copy, up2_copy)
                    mediate4_loss = loss(myup1_copy, up1_copy)
                    mediate_average_loss = (mediate1_loss + mediate2_loss +
                                            mediate3_loss + mediate4_loss) / 4
                    log('mediate1_loss: {}, mediate2_loss: {}, mediate3_loss: {}, mediate4_loss: {}'
                        .format(mediate1_loss, mediate2_loss, mediate3_loss,
                                mediate4_loss))
                    loss = loss_hard_soft + 0.1 * mediate_average_loss
                else:
                    loss = 0.9 * hard_loss + 0.1 * soft_loss
            elif cfg['training'].get('fed_by_intermediate', False):
                # Setup Teacher Model
                model_file_name = cfg['training'].get('pretrained_model', None)
                model_name = {
                    'arch': model_file_name.split('/')[-1].split('_')[0]
                }
                teacher_model = get_model(model_name, n_classes)
                pretrainedModel = torch.load(cfg['training'].get(
                    'pretrained_model', None))
                teacher_state = convert_state_dict(
                    pretrainedModel["model_state"]
                )  # maybe in this way it can take multiple images???
                teacher_model.load_state_dict(teacher_state)
                teacher_model.eval()
                teacher_model.to(device)
                outputs_teacher, conv1_copy, conv3_copy, up2_copy, up1_copy = teacher_model(
                    images)
                outputs_teacher = autograd.Variable(outputs_teacher,
                                                    requires_grad=False)
                conv1_copy = autograd.Variable(conv1_copy, requires_grad=False)
                conv3_copy = autograd.Variable(conv3_copy, requires_grad=False)
                up2_copy = autograd.Variable(up2_copy, requires_grad=False)
                up1_copy = autograd.Variable(up1_copy, requires_grad=False)
                mediate1_loss = loss(myconv1_copy, conv1_copy)
                mediate2_loss = loss(myconv3_copy, conv3_copy)
                mediate3_loss = loss(myup2_copy, up2_copy)
                mediate4_loss = loss(myup1_copy, up1_copy)
                mediate_average_loss = (mediate1_loss + mediate2_loss +
                                        mediate3_loss + mediate4_loss) / 4
                log('mediate1_loss: {}, mediate2_loss: {}, mediate3_loss: {}, mediate4_loss: {}'
                    .format(mediate1_loss, mediate2_loss, mediate3_loss,
                            mediate4_loss))
                loss = 0.9 * hard_loss + 0.1 * mediate_average_loss
            else:
                loss = hard_loss

            log('==> hard loss: {} soft loss: {} mediate loss: {}'.format(
                hard_loss, soft_loss, mediate_average_loss))
            averageLoss += loss
            loss.backward()
            optimizer.step()

            time_meter.update(time.time() - start_ts)
            print_per_batch_check = True if cfg['training'][
                'print_interval_per_batch'] else i_batch_idx + 1 == len(
                    trainloader)
            if (i_train_iter + 1) % cfg['training'][
                    'print_interval'] == 0 and print_per_batch_check:
                fmt_str = "Iter [{:d}/{:d}]  Loss: {:.4f}  Time/Image: {:.4f}"
                print_str = fmt_str.format(
                    i_train_iter + 1, cfg['training']['train_iters'],
                    loss.item(),
                    time_meter.avg / cfg['training']['batch_size'])

                display(print_str)
                writer.add_scalar('loss/train_loss', loss.item(),
                                  i_train_iter + 1)
                time_meter.reset()
            i_batch_idx += 1
        time_for_one_iteration = time.time() - train_iter_start_time

        display(
            'EntireTime for {}th training iteration: {}  EntireTime/Image: {}'.
            format(
                i_train_iter + 1, time_converter(time_for_one_iteration),
                time_converter(
                    time_for_one_iteration /
                    (len(trainloader) * cfg['training']['batch_size']))))
        averageLoss /= (len(trainloader) * cfg['training']['batch_size'])
        # validation
        validation_check = (i_train_iter + 1) % cfg['training']['val_interval'] == 0 or \
                           (i_train_iter + 1) == cfg['training']['train_iters']
        if not validation_check:
            print('no validation check')
        else:
            '''
            This IF-CHECK is used to update the best model
            '''
            log('Validation: average loss for current iteration is: {}'.format(
                averageLoss))
            if min_loss is None:
                min_loss = averageLoss

            if averageLoss <= min_loss:
                min_loss = averageLoss
                state = {
                    "epoch": i_train_iter + 1,
                    "model_state": model.state_dict(),
                    "optimizer_state": optimizer.state_dict(),
                    "scheduler_state": scheduler.state_dict(),
                    "min_loss": min_loss
                }

                save_path = os.path.join(
                    os.getcwd(), writer.file_writer.get_logdir(),
                    "{}_{}_model_best.pkl".format(cfg['model']['arch'],
                                                  cfg['data']['dataset']))
                print('save_path is: ' + save_path)
                # with open('/home/heng/Research/isbi/log_final_experiment.txt', 'a') as f:  # to change!!!!!
                #     id = cfg['id']
                #     f.write(str(id) + ':' + save_path + '\n')

                torch.save(state, save_path)

            # if score["Mean IoU       : \t"] >= best_iou:
            #     best_iou = score["Mean IoU       : \t"]
            #     state = {
            #         "epoch": i_train_iter + 1,
            #         "model_state": model.state_dict(),
            #         "optimizer_state": optimizer.state_dict(),
            #         "scheduler_state": scheduler.state_dict(),
            #         "best_iou": best_iou,
            #     }
            #     save_path = os.path.join(writer.file_writer.get_logdir(),
            #                              "{}_{}_best_model.pkl".format(
            #                                  cfg['model']['arch'],
            #                                  cfg['data']['dataset']))
            #     torch.save(state, save_path)

            # model_count += 1

        i_train_iter += 1
    with open('/home/heng/Research/isbi/log_final_experiment_flyJanelia.txt',
              'a') as f:  # to change!!!!!
        id = cfg['id']
        f.write(str(id) + ':' + save_path + '\n')
Ejemplo n.º 21
0
def test(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[: model_file_name.find("_")]

    for img, type, name, path in dataloader:

        # Setup image
        print("Read Input Image from : {}".format(args.img_path))
        img = misc.imread(args.img_path)
        dataloader =

        data_loader = get_loader(args.dataset)
        loader = data_loader(root=None, is_transform=True, img_norm=args.img_norm, test_mode=True)
        n_classes = loader.n_classes

        resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp="bicubic")

        orig_size = img.shape[:-1]
        if model_name in ["pspnet", "icnet", "icnetBN"]:
            # uint8 with RGB mode, resize width and height which are odd numbers
            img = misc.imresize(img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1))
        else:
            img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]))

        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= loader.mean
        if args.img_norm:
            img = img.astype(float) / 255.0

        # NHWC -> NCHW
        img = img.transpose(2, 0, 1)
        img = np.expand_dims(img, 0)
        img = torch.from_numpy(img).float()

        # Setup Model
        model_dict = {"arch": model_name}
        model = get_model(model_dict, n_classes, version=args.dataset)
        state = convert_state_dict(torch.load(args.model_path)["model_state"])
        model.load_state_dict(state)
        model.eval()
        model.to(device)

        images = img.to(device)
        outputs = model(images)

        if args.dcrf:
            unary = outputs.data.cpu().numpy()
            unary = np.squeeze(unary, 0)
            unary = -np.log(unary)
            unary = unary.transpose(2, 1, 0)
            w, h, c = unary.shape
            unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1)
            unary = np.ascontiguousarray(unary)

            resized_img = np.ascontiguousarray(resized_img)

            d = dcrf.DenseCRF2D(w, h, loader.n_classes)
            d.setUnaryEnergy(unary)
            d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1)

            q = d.inference(50)
            mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0)
            decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8))
            dcrf_path = args.out_path[:-4] + "_drf.png"
            misc.imsave("{}/{}.jpg", decoded_crf)
            print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path))

        pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
        if model_name in ["pspnet", "icnet", "icnetBN"]:
            pred = pred.astype(np.float32)
            # float32 with F mode, resize back to orig_size
            pred = misc.imresize(pred, orig_size, "nearest", mode="F")

        decoded = loader.decode_segmap(pred)
        print("Classes found: ", np.unique(pred))
        misc.imsave(args.out_path, decoded)
        print("Segmentation Mask Saved at: {}".format(args.out_path))
Ejemplo n.º 22
0
def validate(cfg, args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[:model_file_name.find("_")]

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = get_data_path(cfg['data']['dataset'])

    loader = data_loader(
        data_path,
        split=cfg['data']['val_split'],
        is_transform=True,
        img_size=(cfg['data']['img_rows'], cfg['data']['img_rows']),
    )

    n_classes = loader.n_classes

    valloader = data.DataLoader(loader,
                                batch_size=cfg['training']['batch_size'],
                                num_workers=8)
    running_metrics = runningScore(n_classes)

    # Setup Model
    model = get_model(model_name, n_classes, version=cfg['data']['dataset'])
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    for i, (images, labels) in enumerate(valloader):
        start_time = timeit.default_timer()

        images = images.to(device)

        if args.eval_flip:
            outputs = model(images)

            # Flip images in numpy (not support in tensor)
            outputs = outputs.data.cpu().numpy()
            flipped_images = np.copy(images.data.cpu().numpy()[:, :, :, ::-1])
            flipped_images = torch.from_numpy(flipped_images).float().to(
                device)
            outputs_flipped = model(flipped_images)
            outputs_flipped = outputs_flipped.data.cpu().numpy()
            outputs = (outputs + outputs_flipped[:, :, :, ::-1]) / 2.0

            pred = np.argmax(outputs, axis=1)
        else:
            outputs = model(images)
            pred = outputs.data.max(1)[1].cpu().numpy()

        gt = labels.numpy()

        if args.measure_time:
            elapsed_time = timeit.default_timer() - start_time
            print("Inference time \
                  (iter {0:5d}): {1:3.5f} fps".format(
                i + 1, pred.shape[0] / elapsed_time))
        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
Ejemplo n.º 23
0
def test(args):
    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[:model_file_name.find('_')]

    # Setup image
    print("Read Input Image from : {}".format(args.img_path))
    img = misc.imread(args.img_path)

    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm)
    n_classes = loader.n_classes
    
    resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp='bicubic')

    orig_size = img.shape[:-1]
    if model_name in ['pspnet', 'icnet', 'icnetBN']:
        img = misc.imresize(img, (orig_size[0]//2*2+1, orig_size[1]//2*2+1)) # uint8 with RGB mode, resize width and height which are odd numbers
    else:
        img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]))
    img = img[:, :, ::-1]
    img = img.astype(np.float64)
    img -= loader.mean
    if args.img_norm:
        img = img.astype(float) / 255.0
    # NHWC -> NCHW
    img = img.transpose(2, 0, 1)
    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img).float()

    # Setup Model
    model = get_model(model_name, n_classes, version=args.dataset)
    state = convert_state_dict(torch.load(args.model_path)['model_state'])
    model.load_state_dict(state)
    model.eval()

    if torch.cuda.is_available():
        model.cuda(0)
        images = Variable(img.cuda(0), volatile=True)
    else:
        images = Variable(img, volatile=True)

    outputs = model(images)
    #outputs = F.softmax(outputs, dim=1)

    if args.dcrf:
        unary = outputs.data.cpu().numpy()
        unary = np.squeeze(unary, 0)
        unary = -np.log(unary)
        unary = unary.transpose(2, 1, 0)
        w, h, c = unary.shape
        unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1)
        unary = np.ascontiguousarray(unary)
       
        resized_img = np.ascontiguousarray(resized_img)

        d = dcrf.DenseCRF2D(w, h, loader.n_classes)
        d.setUnaryEnergy(unary)
        d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1)

        q = d.inference(50)
        mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0)
        decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8))
        dcrf_path = args.out_path[:-4] + '_drf.png'
        misc.imsave(dcrf_path, decoded_crf)
        print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path))

    pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
    if model_name in ['pspnet', 'icnet', 'icnetBN']:
        pred = pred.astype(np.float32)
        pred = misc.imresize(pred, orig_size, 'nearest', mode='F') # float32 with F mode, resize back to orig_size
    decoded = loader.decode_segmap(pred)
    print('Classes found: ', np.unique(pred))
    misc.imsave(args.out_path, decoded)
    print("Segmentation Mask Saved at: {}".format(args.out_path))
Ejemplo n.º 24
0
def validate(cfg, args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Augmentations
    data_aug = None
    if "validation" in cfg:
        augmentations = cfg["validation"].get("augmentations", None)
        if cfg["data"]["dataset"] == "softmax_cityscapes_convention":
            data_aug = get_composed_augmentations_softmax(augmentations)
        else:
            data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]

    loader = data_loader(
        data_path,
        config = cfg["data"],
        is_transform=True,
        split=cfg["data"][args.dataset_split],
        img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]),
        augmentations=data_aug,
    )
    n_classes = loader.n_classes
    valloader = data.DataLoader(loader, batch_size=1, num_workers=1)
    
    # Setup Metrics
    running_metrics_val = {"seg": runningScoreSeg(n_classes)}
    if "classifiers" in cfg["data"]:
        for name, classes in cfg["data"]["classifiers"].items():
            running_metrics_val[name] = runningScoreClassifier( len(classes) )
    if "bin_classifiers" in cfg["data"]:
        for name, classes in cfg["data"]["bin_classifiers"].items():
            running_metrics_val[name] = runningScoreClassifier(2)

    # Setup Model
    model = get_model(cfg["model"], n_classes).to(device)
    state = torch.load(args.model_path, map_location="cuda:0")["model_state"]
    state = convert_state_dict(state) # converts from dataParallel module to normal module
    model.load_state_dict(state, strict=False)
    
    if args.bn_fusion:
      model = fuse_bn_recursively(model)
    
    if args.update_bn:
      print("Reset BatchNorm and recalculate mean/var")
      model.apply(reset_batchnorm)
      model.train()
    else:
      model.eval() # set batchnorm and dropouts to work in eval mode
    model.to(device)
    total_time = 0
    
    total_params = sum(p.numel() for p in model.parameters())
    print('Parameters: ', total_params )
    
    #stat(model, (3, 1024, 2048))
    torch.backends.cudnn.benchmark=True

    with open(args.output_csv_path, 'a') as output_csv:

        output_csv.write(create_overall_logs_header(running_metrics_val))

        for i, (images, label_dict, fname) in enumerate(valloader):
            images = images.to(device)
            
            torch.cuda.synchronize()
            start_time = time.perf_counter()
            with torch.no_grad(): # deactivates autograd engine, less mem usage
                output_dict = model(images)
            torch.cuda.synchronize()
            elapsed_time = time.perf_counter() - start_time
            
            if args.save_image:
                save_image(images, output_dict, fname, args.output_path, loader=loader)
            
            image_score = []
            
            for name, metrics in running_metrics_val.items(): # update running metrics and record imagewise metrics
                gt_array = label_dict[name].data.cpu().numpy()
                if name+'_loss' in cfg['training'] and cfg['training'][name+'_loss']['name'] == 'l1': # for binary classification
                    pred_array = output_dict[name].data.cpu().numpy()
                    pred_array = np.sign(pred_array)
                    pred_array[pred_array == -1] = 0
                    gt_array[gt_array == -1] = 0
                else:
                    pred_array = output_dict[name].data.max(1)[1].cpu().numpy()

                if name == "seg" or name == "softmax":
                    image_score.append( "%.3f" %metrics.get_image_score(gt_array, pred_array) )
                else:
                    imagewise_score = softmax(np.squeeze(
                        output_dict[name].data.cpu().numpy()
                    )).round(3)
                    image_score.append( "%.3f" %(imagewise_score[gt_array[0]]) )
                    image_score.append( str(imagewise_score) ) # append raw probability results for non-segmentation task
                    image_score.append( "pred %s label %s" %(np.argmax(imagewise_score), gt_array[0]))
                
                metrics.update(gt_array, pred_array)

            output_csv.write( '%s, %.4f, %s\n' %(fname[0], 1 / elapsed_time, ",".join(image_score)) ) # record imagewise metrics

            if args.measure_time:
                total_time += elapsed_time
                print(
                    "Iter {0:5d}: {1:3.5f} fps {2}".format(
                        i + 1, 1 / elapsed_time, " ".join(image_score)
                    )
                )

    print("Total Frame Rate = %.2f fps" %(i/total_time ))

    if args.update_bn:
        model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count()))
        state2 = {"model_state": model.state_dict()}
        torch.save(state2, 'hardnet_cityscapes_mod.pth')

    with open(args.miou_logs_path, 'a') as main_output_csv: # record overall metrics
        main_output_csv.write( '%s\n' %args.output_csv_path )

        for name, metrics in running_metrics_val.items():
            overall, classwise = metrics.get_scores()
            
            for k, v in overall.items():
                print("{}_{}: {}".format(name, k, v))
                main_output_csv.write("%s,%s,%s\n" %(name, k, v))

            for metric_name, metric in classwise.items():
                for k, v in metric.items():
                    print("{}_{}_{}: {}".format(name, metric_name, k, v))
                    main_output_csv.write( "%s,%s,%s,%s\n" %(name, metric_name, k, v))
            
            confusion_matrix = np.round(metrics.confusion_matrix, 3)
            print("confusion matrix:\n%s" %confusion_matrix)
            main_output_csv.write("%s\n" %(
                "\n".join(str(i) for i in confusion_matrix)
            ))
Ejemplo n.º 25
0
def validate(cfg, args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Dataloader
    data_loader = get_loader(cfg['data']['dataset'])
    data_path = cfg['data']['path']

    loader = data_loader(
        data_path,
        split=cfg['data']['val_split'],
        is_transform=True,
        img_size=(cfg['data']['img_rows'],
                  cfg['data']['img_cols']),
        fold=cfg['data']['fold'],
        n_classes=cfg['data']['n_classes']
    )

    n_classes = loader.n_classes

    valloader = data.DataLoader(loader,
                                batch_size=cfg['training']['batch_size'],
                                num_workers=1)
    running_metrics = runningScore(n_classes)

    # Setup Model

    model = get_model(cfg['model'], n_classes).to(device)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    for i, (images, labels) in enumerate(valloader):
        start_time = timeit.default_timer()

        images = images.to(device)
        done = False
        while not done:
            try:
                outputs = model(images)
                done = True
                break
            except:
                print('Caught an exception with image ', i)
                torch.cuda.empty_cache()

        pred = outputs.data.max(1)[1].cpu().numpy()

        gt = labels.numpy()
        if args.measure_time:
            elapsed_time = timeit.default_timer() - start_time
            print(
                "Inference time \
                  (iter {0:5d}): {1:3.5f} fps".format(
                    i + 1, pred.shape[0] / elapsed_time
                )
            )
        running_metrics.update(gt, pred)

    score, class_iou = running_metrics.get_scores()

    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
Ejemplo n.º 26
0
def train(cfg, writer, logger):

    # Setup seeds
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Setup Augmentations
    augmentations = cfg["training"].get("augmentations", None)
    data_aug = get_composed_augmentations(augmentations)

    # Setup Dataloader
    data_loader = get_loader(cfg["data"]["dataset"])
    data_path = cfg["data"]["path"]

    t_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg["data"]["train_split"],
        img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]),
        augmentations=data_aug,
    )

    v_loader = data_loader(
        data_path,
        is_transform=True,
        split=cfg["data"]["val_split"],
        img_size=(cfg["data"]["img_rows"], cfg["data"]["img_cols"]),
    )

    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(
        t_loader,
        batch_size=cfg["training"]["batch_size"],
        num_workers=cfg["training"]["n_workers"],
        shuffle=True,
    )

    valloader = data.DataLoader(v_loader,
                                batch_size=cfg["training"]["batch_size"],
                                num_workers=cfg["training"]["n_workers"])

    # Setup Metrics
    running_metrics_val = runningScore(n_classes)

    # Setup Model
    model = get_model(cfg["model"], n_classes).to(device)
    state = convert_state_dict(torch.load(args.model_path))
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    model = torch.nn.DataParallel(model,
                                  device_ids=range(torch.cuda.device_count()))

    # Setup optimizer, lr_scheduler and loss function
    optimizer_cls = get_optimizer(cfg)
    optimizer_params = {
        k: v
        for k, v in cfg["training"]["optimizer"].items() if k != "name"
    }

    optimizer = optimizer_cls(model.parameters(), **optimizer_params)
    logger.info("Using optimizer {}".format(optimizer))

    scheduler = get_scheduler(optimizer, cfg["training"]["lr_schedule"])

    loss_fn = get_loss_function(cfg)
    logger.info("Using loss {}".format(loss_fn))

    start_iter = 0
    if cfg["training"]["resume"] is not None:
        if os.path.isfile(cfg["training"]["resume"]):
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    cfg["training"]["resume"]))
            checkpoint = torch.load(cfg["training"]["resume"])
            model.load_state_dict(checkpoint["model_state"])
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_iter = checkpoint["epoch"]
            logger.info("Loaded checkpoint '{}' (iter {})".format(
                cfg["training"]["resume"], checkpoint["epoch"]))
        else:
            logger.info("No checkpoint found at '{}'".format(
                cfg["training"]["resume"]))

    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    best_iou = -100.0
    i = start_iter
    flag = True

    while i <= cfg["training"]["train_iters"] and flag:
        for (images, labels) in trainloader:
            i += 1
            start_ts = time.time()
            scheduler.step()
            model.train()
            if torch.max(labels) > n_classes or torch.min(labels) < 0:
                print(torch.min(labels), torch.max(labels))
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)

            loss = loss_fn(input=outputs, target=labels)

            loss.backward()
            optimizer.step()

            time_meter.update(time.time() - start_ts)

            if (i + 1) % cfg["training"]["print_interval"] == 0:
                fmt_str = "Iter [{:d}/{:d}]  Loss: {:.4f}  Time/Image: {:.4f}"
                print_str = fmt_str.format(
                    i + 1,
                    cfg["training"]["train_iters"],
                    loss.item(),
                    time_meter.avg / cfg["training"]["batch_size"],
                )

                print(print_str)
                logger.info(print_str)
                writer.add_scalar("loss/train_loss", loss.item(), i + 1)
                time_meter.reset()

            if (i + 1) % cfg["training"]["val_interval"] == 0 or (
                    i + 1) == cfg["training"]["train_iters"]:
                model.eval()
                with torch.no_grad():
                    for i_val, (images_val,
                                labels_val) in tqdm(enumerate(valloader)):
                        images_val = images_val.to(device)
                        labels_val = labels_val.to(device)

                        outputs = model(images_val)
                        val_loss = loss_fn(input=outputs, target=labels_val)

                        pred = outputs.data.max(1)[1].cpu().numpy()
                        gt = labels_val.data.cpu().numpy()

                        running_metrics_val.update(gt, pred)
                        val_loss_meter.update(val_loss.item())

                writer.add_scalar("loss/val_loss", val_loss_meter.avg, i + 1)
                logger.info("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg))

                score, class_iou = running_metrics_val.get_scores()
                for k, v in score.items():
                    print(k, v)
                    logger.info("{}: {}".format(k, v))
                    writer.add_scalar("val_metrics/{}".format(k), v, i + 1)

                for k, v in class_iou.items():
                    logger.info("{}: {}".format(k, v))
                    writer.add_scalar("val_metrics/cls_{}".format(k), v, i + 1)

                val_loss_meter.reset()
                running_metrics_val.reset()

                if score["Mean IoU : \t"] >= best_iou:
                    best_iou = score["Mean IoU : \t"]
                    state = {
                        "epoch": i + 1,
                        "model_state": model.state_dict(),
                        "optimizer_state": optimizer.state_dict(),
                        "scheduler_state": scheduler.state_dict(),
                        "best_iou": best_iou,
                    }
                    save_path = os.path.join(
                        writer.file_writer.get_logdir(),
                        "{}_{}_best_model.pkl".format(cfg["model"]["arch"],
                                                      cfg["data"]["dataset"]),
                    )
                    torch.save(state, save_path)

            if (i + 1) == cfg["training"]["train_iters"]:
                flag = False
                break
Ejemplo n.º 27
0
    def __init__(self, gen_pcl=True):
        """
        Constructor
        \param gen_pcl (bool) whether generate point cloud, if set to true the node will subscribe to depth image
        """
        # Get point type
        point_type = rospy.get_param('/semantic_pcl/point_type')
        #point_type = 0
        if point_type == 0:
            self.point_type = PointType.COLOR
            print('Generate color point cloud.')
        elif point_type == 1:
            self.point_type = PointType.SEMANTICS_MAX
            print('Generate semantic point cloud [max fusion].')
        elif point_type == 2:
            self.point_type = PointType.SEMANTICS_BAYESIAN
            print('Generate semantic point cloud [bayesian fusion].')
        else:
            print("Invalid point type.")
            return
        # Get image size
        # self.img_width, self.img_height = rospy.get_param('/camera/width'), rospy.get_param('/camera/height')
        self.img_width, self.img_height = 640, 480
        # Set up CNN is use semantics
        if self.point_type is PointType.COLOR:
            print('Setting up CNN model...')
            # Set device
            self.device = torch.device("cuda:0" if torch.cuda.is_available()
                                       else "cpu")  #GPU: device=cuda
            # Get dataset
            dataset = rospy.get_param('/semantic_pcl/dataset')
            # Setup model
            model_name = 'pspnet'
            model_path = rospy.get_param('/semantic_pcl/model_path')
            #model_path = '/home/yubao/data/SpacialAI/catkin_ws/src/dataset/pspnet_sunrgbd_best_model180625_5k.pth'
            if dataset == 'sunrgbd':  # If use version fine tuned on sunrgbd dataset
                self.n_classes = 38  # Semantic class number
                self.model = get_model(model_name,
                                       self.n_classes,
                                       version='sunrgbd_res50')
                state = torch.load(model_path, map_location='cuda:0')
                self.model.load_state_dict(state)
                self.cnn_input_size = (321, 321)
                self.mean = np.array([104.00699, 116.66877,
                                      122.67892])  # Mean value of dataset
            elif dataset == 'ade20k':
                self.n_classes = 150  # Semantic class number
                self.model = get_model(model_name,
                                       self.n_classes,
                                       version='ade20k')
                state = torch.load(model_path)
                self.model.load_state_dict(
                    convert_state_dict(state['model_state'])
                )  # Remove 'module' from dictionary keys
                self.cnn_input_size = (473, 473)
                self.mean = np.array([104.00699, 116.66877,
                                      122.67892])  # Mean value of dataset
            self.model = self.model.to(self.device)
            self.model.eval()
            self.cmap = color_map(
                N=self.n_classes,
                normalized=False)  # Color map for semantic classes

        if self.point_type is not PointType.COLOR:
            print('Setting up CNN model...')
            # Set device
            self.device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")
            # Get dataset
            dataset = rospy.get_param('/semantic_pcl/dataset')
            # Setup model
            model_name = 'pspnet'
            model_path = rospy.get_param('/semantic_pcl/model_path')
            if dataset == 'sunrgbd':  # If use version fine tuned on sunrgbd dataset
                self.n_classes = 38  # Semantic class number
                self.model = get_model(model_name,
                                       self.n_classes,
                                       version='sunrgbd_res50')
                state = torch.load(model_path)
                self.model.load_state_dict(state)
                self.cnn_input_size = (321, 321)
                self.mean = np.array([104.00699, 116.66877,
                                      122.67892])  # Mean value of dataset
            elif dataset == 'ade20k':
                self.n_classes = 150  # Semantic class number
                self.model = get_model(model_name,
                                       self.n_classes,
                                       version='ade20k')
                state = torch.load(model_path)
                self.model.load_state_dict(
                    convert_state_dict(state['model_state'])
                )  # Remove 'module' from dictionary keys
                self.cnn_input_size = (473, 473)
                self.mean = np.array([104.00699, 116.66877,
                                      122.67892])  # Mean value of dataset
            self.model = self.model.to(self.device)
            self.model.eval()
            self.cmap = color_map(
                N=self.n_classes,
                normalized=False)  # Color map for semantic classes
        # Declare array containers
        if self.point_type is PointType.SEMANTICS_BAYESIAN:
            self.semantic_colors = np.zeros(
                (3, self.img_height, self.img_width, 3), dtype=np.uint8
            )  # Numpy array to store 3 decoded semantic images with highest confidences
            self.confidences = np.zeros(
                (3, self.img_height, self.img_width), dtype=np.float32
            )  # Numpy array to store top 3 class confidences
        # Set up ROS
        print('Setting up ROS...')
        self.bridge = CvBridge(
        )  # CvBridge to transform ROS Image message to OpenCV image
        # Semantic image publisher
        self.sem_img_pub = rospy.Publisher("/semantic_pcl/semantic_image",
                                           Image,
                                           queue_size=1)
        # Set up ros image subscriber
        # Set buff_size to average msg size to avoid accumulating delay
        if gen_pcl:
            # Point cloud frame id
            frame_id = rospy.get_param('/semantic_pcl/frame_id')
            # Camera intrinsic matrix
            fx = rospy.get_param('/camera/fx')
            fy = rospy.get_param('/camera/fy')
            cx = rospy.get_param('/camera/cx')
            cy = rospy.get_param('/camera/cy')
            intrinsic = np.matrix([[fx, 0, cx], [0, fy, cy], [0, 0, 1]],
                                  dtype=np.float32)
            self.pcl_pub = rospy.Publisher("/semantic_pcl/semantic_pcl",
                                           PointCloud2,
                                           queue_size=1)
            self.color_sub = message_filters.Subscriber(
                rospy.get_param('/semantic_pcl/color_image_topic'),
                Image,
                queue_size=1,
                buff_size=30 * 480 * 640)
            self.depth_sub = message_filters.Subscriber(
                rospy.get_param('/semantic_pcl/depth_image_topic'),
                Image,
                queue_size=1,
                buff_size=40 * 480 * 640
            )  # increase buffer size to avoid del ay (despite queue_size = 1)
            self.ts = message_filters.ApproximateTimeSynchronizer(
                [self.color_sub, self.depth_sub], queue_size=1, slop=0.3
            )  # Take in one color image and one depth image with a limite time gap between message time stamps
            self.ts.registerCallback(self.color_depth_callback)
            self.cloud_generator = ColorPclGenerator(intrinsic, self.img_width,
                                                     self.img_height, frame_id,
                                                     self.point_type)
        else:
            self.image_sub = rospy.Subscriber(
                rospy.get_param('/semantic_pcl/color_image_topic'),
                Image,
                self.color_callback,
                queue_size=1,
                buff_size=30 * 480 * 640)
            #self.image_sub = rospy.Subscriber('/kinect2/hd/image_color_rect', Image, self.color_callback, queue_size = 1, buff_size = 30*480*640)

        print('Ready.')
Ejemplo n.º 28
0
def test(args):
    imgList = glob.glob('datasets/cityscapes/leftImg8bit/val/*/*_leftImg8bit.png')
    outputDir = 'datasets/cityscapes/results'
    overlayedDir = 'datasets/cityscapes/overlayed_results'
    gtSemDir = 'datasets/cityscapes/gtFine/val'

    data_loader = get_loader(args.dataset)
    loader = data_loader(root=None, is_transform=True, img_norm=args.img_norm, test_mode=True)
    n_classes = loader.n_classes

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
   
    model_file_name = os.path.split(args.model_path)[1]
    model_name = 'icnet_is'


    if args.origianl_icnet_semantic_pred:
        model_dict = {"arch": "icnet"}
        model_icnet = get_model(model_dict, n_classes, version=args.dataset)
        state = convert_state_dict(torch.load("pretrained_models/icnetBN_cityscapes_trainval_90k.pth")["model_state"])
        # state = torch.load(args.model_path)["model_state"]
        model_icnet.load_state_dict(state)
        model_icnet.eval()
        model_icnet.to(device)
   
    # Setup Model
    model_dict = {"arch": model_name}
    model = get_model(model_dict, n_classes, version=args.dataset)
    model = FullModel(model,None)

    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    # state = torch.load(args.model_path)["model_state"]
    model.load_state_dict(state)
    model.eval()
    model.to(device)
    img_processed = 0
    for imgPath in imgList:
        img_processed += 1
        imgId = os.path.split(imgPath)[1].split('.')[0]
        output_txt = open(os.path.join(outputDir, imgId + '.txt'), 'w')
        # import ipdb
        # ipdb.set_trace()
        # Setup image
        print("Read Input Image from : {} ({}/{})".format(imgPath, img_processed, len(imgList)))
        # if img_processed > 10: break
        img = imageio.imread(imgPath)
        original_img = Image.fromarray(img).convert('RGBA')

        # resized_img = misc.imresize(img, (1025, 2049), interp="bicubic")

        orig_size = img.shape[:-1]
        # if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is"]:
        #     # uint8 with RGB mode, resize width and height which are odd numbers
        #     img = misc.imresize(img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1), 'bilinear')
        # else:
        #     img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]))
        img = pad_one_more(img)

        img = img[:, :, ::-1]
        img = img.astype(np.float64)
        img -= loader.mean
        # if args.img_norm:
        #     img = img.astype(float) / 255.0

        # NHWC -> NCHW
        img = img.transpose(2, 0, 1)
        img = np.expand_dims(img, 0)
        img = torch.from_numpy(img).float()
        images = img.to(device)
        
        if args.origianl_icnet_semantic_pred:
            outputs = model_icnet(images)
            _, outputs_inst = model.model(images)
        else:
            outputs, outputs_inst = model.model(images)

        # if args.dcrf:
        #     unary = outputs.data.cpu().numpy()
        #     unary = np.squeeze(unary, 0)
        #     unary = -np.log(unary)
        #     unary = unary.transpose(2, 1, 0)
        #     w, h, c = unary.shape
        #     unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1)
        #     unary = np.ascontiguousarray(unary)

        #     resized_img = np.ascontiguousarray(resized_img)

        #     d = dcrf.DenseCRF2D(w, h, loader.n_classes)
        #     d.setUnaryEnergy(unary)
        #     d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1)

        #     q = d.inference(50)
        #     mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0)
        #     decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8))
        #     dcrf_path = args.out_path[:-4] + "_drf.png"
        #     misc.imsave(dcrf_path, decoded_crf)
        #     print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path))
        
        pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
        pred = remove_pad_one_more(np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0))

        outputs_inst = outputs_inst.cpu().detach().numpy()
        outputs_sem = outputs.cpu().detach().numpy()
        # if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is"]:
        #     pred = pred.astype(np.float32)
        #     # float32 with F mode, resize back to orig_size
        #     pred = misc.imresize(pred, orig_size, "nearest", mode="F")
        #     outputs_inst = misc.imresize(outputs_inst, orig_size, "nearest", mode="F")
        #     outputs_sem = misc.imresize(outputs_sem, orig_size, "nearest", mode="F")


        outputs_inst = outputs_inst[0, ...]
        outputs_inst = outputs_inst.transpose((1, 2, 0))
        outputs_inst = remove_pad_one_more(outputs_inst)

        outputs_sem = outputs_sem[0, ...]
        outputs_sem = outputs_sem.transpose((1, 2, 0))
        outputs_sem = remove_pad_one_more(outputs_sem)

        h, w, c = outputs_inst.shape

        pred_flattened = pred.reshape((h * w))
        outputs_inst_flattened = np.copy(outputs_inst.reshape((h * w, c)))
        inst_num = 0
        min_inst_size = 500
        single_obj_dist = 1.5
        bd_decay_rate = 0.9

        if args.use_gt_sem_map:
            imgId_np = ('_').join(imgId.split('_')[:-1])
            gtImgDir = os.path.join(gtSemDir, imgId.split('_')[0], imgId_np + '_gtFine_labelTrainIds.png')
            pred = imageio.imread(gtImgDir)
            # pred_flattened = pred.reshape(h * w)
            pred_flattened = misc.imresize(pred, (outputs_sem.shape[0], outputs_sem.shape[1])).reshape((h * w))

        for inst_class in has_inst_class:
            
            interested_semantic_class_train_id = inst_class['trainID']
            predID = inst_class['id']

            # if interested_semantic_class_train_id != 13: continue

            if np.sum(pred_flattened == interested_semantic_class_train_id) == 0: continue
            
            inst_segment_map = np.zeros((h * w), dtype = np.uint16)

            avg_dist = estimate_bandwidth(outputs_inst_flattened[pred_flattened == interested_semantic_class_train_id, :], quantile=1.0, n_samples=1000, n_jobs = 12)
            if avg_dist > single_obj_dist:
                bandwidth = inst_class['bandwidth']
                while True:
                    # ms = MeanShift(bandwidth=inst_class['bandwidth'], bin_seeding=True, n_jobs = 12)
                    try:
                        ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, n_jobs = 12)
                        ms.fit(outputs_inst_flattened[pred_flattened == interested_semantic_class_train_id, :])
                        clustering_label = ms.labels_
                        break
                    except:
                        bandwidth *= bd_decay_rate
                        print(bandwidth)
                inst_segment_map[pred_flattened == interested_semantic_class_train_id] = clustering_label + 1
            else:
                inst_segment_map[pred_flattened == interested_semantic_class_train_id] = 1

            for lbl in range(inst_segment_map.max()):
                if np.sum(inst_segment_map == lbl + 1) < min_inst_size: continue
                inst_num += 1
                mask_file_name = imgId + '_inst_{:03d}.png'.format(inst_num)
                mask_dir = os.path.join(outputDir, mask_file_name)
                mask_img = np.zeros((h * w), dtype = np.uint8)
                mask_img[inst_segment_map == lbl + 1] = 255
                mask_img.resize((h, w))
                # mask_img_orig_size = misc.imresize(mask_img, orig_size)
                imageio.imsave(mask_dir, mask_img)
                sem_lbl_pred = predID
                conf = np.mean(outputs_sem[..., interested_semantic_class_train_id][mask_img > 0]) - outputs_sem.min()
                output_txt.write(mask_file_name + ' ' + str(sem_lbl_pred) + ' {:.4f}\n'.format(conf))


            if inst_num > 0:
                # import ipdb
                # ipdb.set_trace()
                                
    
                inst_segment_map = inst_segment_map.reshape(h, w)
                cmap = plt.cm.jet
                norm = plt.Normalize(vmin=inst_segment_map.min(), vmax=inst_segment_map.max())
                # import ipdb
                # ipdb.set_trace()
                # map the normalized data to colors
                # image is now RGBA (512x512x4) 
                inst_segment_map_single_image = cmap(norm(inst_segment_map))
                inst_segment_map_single_image[inst_segment_map == 0] = [0, 0, 0, 1]
                inst_segment_map_single_image = Image.fromarray((inst_segment_map_single_image * 255).astype(np.uint8))

                # save the image
                # inst_segment_map_single_image.save('inst_seg_map_' + args.out_path)
                # import ipdb
                # ipdb.set_trace()
                original_img = original_img.resize(inst_segment_map_single_image.size)
                inst_segment_map_single_image.putalpha(128)

                overlayed_image = Image.alpha_composite(original_img, inst_segment_map_single_image)
                overlayed_image_path = os.path.join(overlayedDir, str(interested_semantic_class_train_id), imgId + '.png')
                print(overlayed_image_path)
                if not os.path.exists(os.path.dirname(overlayed_image_path)):
                    try:
                        os.makedirs(os.path.dirname(overlayed_image_path))
                    except:
                        pass
                overlayed_image.save(overlayed_image_path)



        output_txt.close()
Ejemplo n.º 29
0
def test(args):

    # Setup image
    print("Read Input Image from : {}".format(args.img_path))
    img = misc.imread(args.img_path)
    
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, is_transform=True)
    n_classes = loader.n_classes
    
    resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]), interp='bicubic')

    img = img[:, :, ::-1]
    img = img.astype(np.float64)
    img -= loader.mean
    img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]))
    img = img.astype(float) / 255.0
    # NHWC -> NCWH
    img = img.transpose(2, 0, 1) 
    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img).float()

    # Setup Model
    model = get_model(args.model_path[:args.model_path.find('_')], n_classes)
    state = convert_state_dict(torch.load(args.model_path)['model_state'])
    model.load_state_dict(state)
    model.eval()
    
    model.cuda(0)
    images = Variable(img.cuda(0), volatile=True)

    outputs = F.softmax(model(images), dim=1)
    
    if args.dcrf == "True":
        unary = outputs.data.cpu().numpy()
        unary = np.squeeze(unary, 0)
        unary = -np.log(unary)
        unary = unary.transpose(2, 1, 0)
        w, h, c = unary.shape
        unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1)
        unary = np.ascontiguousarray(unary)
       
        resized_img = np.ascontiguousarray(resized_img)

        d = dcrf.DenseCRF2D(w, h, loader.n_classes)
        d.setUnaryEnergy(unary)
        d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1)

        q = d.inference(50)
        mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0)
        decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8))
        dcrf_path = args.out_path[:-4] + '_drf.png'
        misc.imsave(dcrf_path, decoded_crf)
        print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path))

    if torch.cuda.is_available():
        model.cuda(0)
        images = Variable(img.cuda(0), volatile=True)
    else:
        images = Variable(img, volatile=True)

    pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
    decoded = loader.decode_segmap(pred)
    print('Classes found: ', np.unique(pred))
    misc.imsave(args.out_path, decoded)
    print("Segmentation Mask Saved at: {}".format(args.out_path))
Ejemplo n.º 30
0
def test(args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[:model_file_name.find("_")]
    model_name = 'icnet_is_wp'
    # import ipdb
    # ipdb.set_trace()
    # Setup image
    print("Read Input Image from : {}".format(args.img_path))
    img = imageio.imread(args.img_path)
    original_img = Image.fromarray(img).convert('RGBA')

    data_loader = get_loader(args.dataset)
    loader = data_loader(root=None,
                         is_transform=True,
                         img_norm=args.img_norm,
                         test_mode=True)
    n_classes = loader.n_classes

    resized_img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]),
                                interp="bicubic")

    orig_size = img.shape[:-1]
    if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is", "icnet_is_wp"]:
        # uint8 with RGB mode, resize width and height which are odd numbers
        img = misc.imresize(
            img, (orig_size[0] // 2 * 2 + 1, orig_size[1] // 2 * 2 + 1))
    else:
        img = misc.imresize(img, (loader.img_size[0], loader.img_size[1]))

    img = img[:, :, ::-1]
    img = img.astype(np.float64)
    img -= loader.mean
    # if args.img_norm:
    #     img = img.astype(float) / 255.0

    # NHWC -> NCHW
    img = img.transpose(2, 0, 1)
    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img).float()
    images = img.to(device)

    if args.origianl_icnet_semantic_pred:
        model_dict = {"arch": "icnet"}
        model = get_model(model_dict, n_classes, version=args.dataset)
        state = convert_state_dict(
            torch.load("pretrained_models/icnetBN_cityscapes_trainval_90k.pth")
            ["model_state"])
        # state = torch.load(args.model_path)["model_state"]
        model.load_state_dict(state)
        model.eval()
        model.to(device)
        outputs = model(images)

    # Setup Model
    model_dict = {"arch": model_name}
    model = get_model(model_dict, n_classes, version=args.dataset)
    model = FullModel(model, None)

    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    # state = torch.load(args.model_path)["model_state"]
    model.load_state_dict(state)
    model.eval()
    model.to(device)
    if args.origianl_icnet_semantic_pred:
        _, outputs_inst = model.model(images)
    else:
        outputs, outputs_inst = model.model(images)

    if args.dcrf:
        unary = outputs.data.cpu().numpy()
        unary = np.squeeze(unary, 0)
        unary = -np.log(unary)
        unary = unary.transpose(2, 1, 0)
        w, h, c = unary.shape
        unary = unary.transpose(2, 0, 1).reshape(loader.n_classes, -1)
        unary = np.ascontiguousarray(unary)

        resized_img = np.ascontiguousarray(resized_img)

        d = dcrf.DenseCRF2D(w, h, loader.n_classes)
        d.setUnaryEnergy(unary)
        d.addPairwiseBilateral(sxy=5, srgb=3, rgbim=resized_img, compat=1)

        q = d.inference(50)
        mask = np.argmax(q, axis=0).reshape(w, h).transpose(1, 0)
        decoded_crf = loader.decode_segmap(np.array(mask, dtype=np.uint8))
        dcrf_path = args.out_path[:-4] + "_drf.png"
        misc.imsave(dcrf_path, decoded_crf)
        print("Dense CRF Processed Mask Saved at: {}".format(dcrf_path))

    pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0)
    pred_original = np.copy(pred)
    if model_name in ["pspnet", "icnet", "icnetBN", "icnet_is"]:
        pred = pred.astype(np.float32)
        # float32 with F mode, resize back to orig_size
        pred = misc.imresize(pred, orig_size, "nearest", mode="F")

    interested_semantic_class_train_id = 17

    outputs_inst = outputs_inst.cpu().detach().numpy()

    outputs_inst = outputs_inst[0, ...]
    outputs_inst = outputs_inst.transpose((1, 2, 0))
    h, w, c = outputs_inst.shape
    outputs_inst_transformed = np.copy(outputs_inst.reshape((h * w, c)))
    pca = sklearnPCA(n_components=3)

    pca.fit(outputs_inst_transformed)
    outputs_inst_transformed = pca.transform(outputs_inst_transformed)
    outputs_inst_transformed -= outputs_inst_transformed.min(axis=0)
    outputs_inst_transformed /= outputs_inst_transformed.max(axis=0)
    outputs_inst_img = outputs_inst_transformed.reshape((h, w, 3))
    outputs_inst_img = (outputs_inst_img * 255).astype(int)

    decoded = loader.decode_segmap(pred)
    print("Classes found: ", np.unique(pred))
    imageio.imsave(args.out_path, decoded)
    imageio.imsave("inst_" + args.out_path, outputs_inst_img)
    print("Segmentation Mask Saved at: {}".format(args.out_path))

    outputs_inst_transformed_single = np.copy(outputs_inst.reshape((h * w, c)))
    pred_transformed = pred_original.reshape((h * w))
    pca.fit(outputs_inst_transformed_single[
        pred_transformed == interested_semantic_class_train_id, :])
    outputs_inst_transformed_single = pca.transform(
        outputs_inst_transformed_single)
    outputs_inst_transformed_single -= outputs_inst_transformed_single.min(
        axis=0)
    outputs_inst_transformed_single /= outputs_inst_transformed_single.max(
        axis=0)
    outputs_inst_transformed_single[
        pred_transformed != interested_semantic_class_train_id, :] = 0
    outputs_inst_single_img = outputs_inst_transformed_single.reshape(
        (h, w, 3))
    outputs_inst_single_img = Image.fromarray(
        (outputs_inst_single_img * 255).astype(np.uint8))
    outputs_inst_single_img.save("inst_single_" + args.out_path)

    outputs_inst_transformed_single = np.copy(outputs_inst.reshape((h * w, c)))
    bandwidth = estimate_bandwidth(outputs_inst_transformed_single[
        pred_transformed == interested_semantic_class_train_id, :],
                                   quantile=0.1,
                                   n_samples=1000,
                                   n_jobs=12)
    print(bandwidth)
    ms = MeanShift(bandwidth=bandwidth, bin_seeding=True, n_jobs=12)
    ms.fit(outputs_inst_transformed_single[
        pred_transformed == interested_semantic_class_train_id, :])
    clustering_label = ms.labels_
    inst_segment_map_single = np.zeros((h * w))
    inst_segment_map_single[
        pred_transformed ==
        interested_semantic_class_train_id] = clustering_label + 1
    inst_segment_map_single = inst_segment_map_single.reshape(h, w)
    cmap = plt.cm.jet
    norm = plt.Normalize(vmin=inst_segment_map_single.min(),
                         vmax=inst_segment_map_single.max())
    # import ipdb
    # ipdb.set_trace()
    # map the normalized data to colors
    # image is now RGBA (512x512x4)
    inst_segment_map_single_image = cmap(norm(inst_segment_map_single))
    inst_segment_map_single_image[inst_segment_map_single == 0] = [0, 0, 0, 1]
    inst_segment_map_single_image = Image.fromarray(
        (inst_segment_map_single_image * 255).astype(np.uint8))

    # save the image
    inst_segment_map_single_image.save('inst_seg_map_' + args.out_path)
    # import ipdb
    # ipdb.set_trace()
    original_img = original_img.resize(inst_segment_map_single_image.size)
    inst_segment_map_single_image.putalpha(128)

    overlayed_image = Image.alpha_composite(original_img,
                                            inst_segment_map_single_image)
    overlayed_image.save('inst_seg_map_overlayed_' + args.out_path)
Ejemplo n.º 31
0
def test(args, cfg):

    # os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # device=torch.device("cuda:0")
    # device_1=torch.device("cpu")

    model_file_name = os.path.split(args.model_path)[1]
    model_name = model_file_name[:model_file_name.find("_")]

    IMG_Path = Path(args.img_path)
    IMG_File = natsort.natsorted(list(IMG_Path.glob("*.png")),
                                 alg=natsort.PATH)
    IMG_Str = []
    for i in IMG_File:
        IMG_Str.append(str(i))
    # Setup image
    print("Read Input Image from : {}".format(args.img_path))

    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset, config_file=cfg)
    loader = data_loader(data_path, is_transform=True, img_norm=args.img_norm)
    n_classes = loader.n_classes

    # Setup Model
    model = get_model(cfg['model'], n_classes)
    state = convert_state_dict(torch.load(args.model_path)["model_state"])
    # state=torch.load(args.model_path)["model_state"]
    model.load_state_dict(state)
    model.eval()
    model.to(device)

    for j in tqdm(range(len(IMG_Str))):
        img_path = IMG_Str[j]
        img_input = misc.imread(img_path)
        sp = list(img_input.shape)
        #shape height*width*channel
        sp = sp[0:2]
        ori_size = tuple(sp)
        # img = img[:, :, ::-1]
        # multiscale
        # img_125=cv.resize(img,dsize=(0,0),fx=1.25,fy=1.25,interpolation=cv.INTER_LINEAR)
        # img_075=cv.resize(img,dsize=(0,0),fx=0.75,fy=0.75,interpolation=cv.INTER_LINEAR)
        # scale_list=[2.0,1.75,1.5,1.25,1,0.75,0.5]
        scale_list = [1.5, 1.25, 0.75, 0.5]
        # scale_list=[1.4,1.2,0.8,0.6]

        # scale_list=[2.0]

        multi_avg = torch.zeros((1, 6, 512, 512),
                                dtype=torch.float32).to(device)
        # torch.zeros(batch-size,num-classes,height,width)
        for scale in scale_list:
            if scale != 1:
                img = cv.resize(img_input,
                                dsize=(0, 0),
                                fx=scale,
                                fy=scale,
                                interpolation=cv.INTER_LINEAR)
            else:
                img = img_input
            img = img.astype(np.float64)
            # img -= loader.mean
            if args.img_norm:
                img = img.astype(float) / 255.0

            # NHWC -> NCHW
            img = img.transpose(2, 0, 1)
            img = np.expand_dims(img, 0)
            img = torch.from_numpy(img).float()
            images = img.to(device)
            outputs = model(images)
            # del images
            # bilinear is ok for both upsample and downsample
            if scale != 1:
                outputs = F.upsample(outputs,
                                     ori_size,
                                     mode='bilinear',
                                     align_corners=False)
            # outputs=outputs.to(device)
            multi_avg = multi_avg + outputs
            # del outputs
        # outputs=multi_avg/len(scale_list)
        outputs = multi_avg
        out_path = "test_out/mv3_1_true_2_res50_data10_MS/mv3_1_true_2_res50_data10_MS_7/" + Path(
            img_path).stem + "_S4_not_1.pt"

        torch.save(outputs, out_path)