def uncertainty_test(model, input_var, heat_thresh, ax):

    model.train()
    model.apply(set_dropout_to_train)

    T = 100

    all_kps = None
    gmm_component_num = 0

    # do sampling
    for i in range(T):
        output = model(input_var)
        hm = output[-1].data.cpu().numpy()

        ps = parseHeatmap(hm[0], heat_thresh)
        kp_num = len(ps[0])
        if kp_num > gmm_component_num:
            gmm_component_num = kp_num

        for k in range(kp_num):
            kp = [ps[1][k] * 4, ps[0][k] * 4]
            if all_kps is None:
                all_kps = kp
            else:
                all_kps = np.vstack((all_kps, kp))

    #print("debug: gmm_component_num {}".format(gmm_component_num))
    #print("debug: all kp {}".format(all_kps[:, 0]))
    #exit()

    #gmm = GaussianMixture(n_components=gmm_component_num, covariance_type='full', random_state=42).fit(all_kps)

    # Fit a Dirichlet process Gaussian mixture using five components
    dpgmm = mixture.BayesianGaussianMixture(
        n_components=gmm_component_num,
        covariance_type='full',
        weight_concentration_prior_type="dirichlet_process",
        init_params='kmeans',
        mean_precision_prior=1,
        weight_concentration_prior=None).fit(all_kps)
    plot_gmm(dpgmm, all_kps, ax)
Beispiel #2
0
def main():
    opt = opts().parse()
    model = torch.load(opt.loadModel)
    img = cv2.imread(opt.demo)
    s = max(img.shape[0], img.shape[1]) * 1.0
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.])
    img = Crop(img, c, s, 0, ref.inputRes) / 256.
    input = torch.from_numpy(img.copy()).float()
    input = input.view(1, input.size(0), input.size(1), input.size(2))
    input_var = torch.autograd.Variable(input).float()
    if opt.GPU > -1:
        model = model.cuda(opt.GPU)
        input_var = input_var.cuda(opt.GPU)

    output = model(input_var)
    hm = output[-1].data.cpu().numpy()

    debugger = Debugger()
    img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8).copy()
    inp = img.copy()
    star = (cv2.resize(hm[0, 0], (ref.inputRes, ref.inputRes)) * 255)
    star[star > 255] = 255
    star[star < 0] = 0
    star = np.tile(star, (3, 1, 1)).transpose(1, 2, 0)
    trans = 0.8
    star = (trans * star + (1. - trans) * img).astype(np.uint8)

    ps = parseHeatmap(hm[0], thresh=0.1)
    canonical, pred, color, score = [], [], [], []
    for k in range(len(ps[0])):
        x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) *
                   ref.outputRes).astype(np.int32)
        dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(
            np.int32)
        canonical.append([x, y, z])
        pred.append([ps[1][k], ref.outputRes - dep, ref.outputRes - ps[0][k]])
        score.append(hm[0, 0, ps[0][k], ps[1][k]])
        color.append((1.0 * x / ref.outputRes, 1.0 * y / ref.outputRes,
                      1.0 * z / ref.outputRes))
        cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 4, (255, 255, 255), -1)
        cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 2,
                   (int(z * 4), int(y * 4), int(x * 4)), -1)

    pred = np.array(pred).astype(np.float32)
    canonical = np.array(canonical).astype(np.float32)

    pointS = canonical * 1.0 / ref.outputRes
    pointT = pred * 1.0 / ref.outputRes
    R, t, s = horn87(pointS.transpose(), pointT.transpose(), score)

    rotated_pred = s * np.dot(
        R, canonical.transpose()).transpose() + t * ref.outputRes

    debugger.addImg(inp, 'inp')
    debugger.addImg(star, 'star')
    debugger.addImg(img, 'nms')
    debugger.addPoint3D(canonical / ref.outputRes - 0.5, c=color, marker='^')
    debugger.addPoint3D(pred / ref.outputRes - 0.5, c=color, marker='x')
    debugger.addPoint3D(rotated_pred / ref.outputRes - 0.5,
                        c=color,
                        marker='*')

    debugger.showAllImg(pause=True)
    debugger.show3D()
Beispiel #3
0
    class_name = ref.ObjectNet3DClassName[class_id]

    v = np.array([
        dataset.annot['viewpoint_azimuth'][index],
        dataset.annot['viewpoint_elevation'][index],
        dataset.annot['viewpoint_theta'][index]
    ]) / 180.

    valid = dataset.annot['valid'][index]
    gt_model = np.array(dataset.annot['space_embedding'][index])[valid > 0]
    gt_view = v * PI
    anchors = np.array(dataset.annot['anchors_3d'][index][valid > 0])
    gt_point = Rotate(gt_model, gt_view)

    hm = preds[index]['map']
    ps = parseHeatmap(hm[0], thresh=0.05)
    if len(ps[0]) == 0:
        num[class_name] += 1
        continue

    canonical = []
    pred = []
    color = []
    score = []
    for k in range(len(ps[0])):
        x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) *
                   ref.outputRes).astype(np.int32)
        dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * ref.outputRes).astype(
            np.int32)
        score.append(hm[0, 0, ps[0][k], ps[1][k]])
        canonical.append([x, y, z])
def main():

    # use the model trained with dropout enabled
    model_path = '/home/erl/moshan/orcvio_gamma/orcvio_gamma/pytorch_models/starmap/trained_models/with_dropout/model_cpu.pth'
    img_path = './images/car.png'
    det_name = './det/car.png'

    # by default img size is 256
    inputRes = 256
    outputRes = 64
    CUDA = torch.cuda.is_available()

    model = torch.load(model_path)

    img = cv2.imread(img_path)
    s = max(img.shape[0], img.shape[1]) * 1.0
    c = np.array([img.shape[1] / 2., img.shape[0] / 2.])

    # img = cv2.resize(img, (320, 240))
    # print(img.shape)

    # crop only change h, w, c to c, h, w for images with size 256 x 256
    img = Crop(img, c, s, 0, inputRes).astype(np.float32).transpose(2, 0,
                                                                    1) / 256.
    input = torch.from_numpy(img.copy()).float()

    # change to b, c, h, w
    input = input.view(1, input.size(0), input.size(1), input.size(2))
    input_var = torch.autograd.Variable(input).float()

    if CUDA:
        model.cuda()
        input_var = input_var.cuda()

    output = model(input_var)
    hm = output[-1].data.cpu().numpy()

    # convert to bgr, uint8 for display
    img = (input[0].numpy().transpose(1, 2, 0) * 256).astype(np.uint8).copy()
    inp = img.copy()

    # hm[0, 0] is an image, since 1st dim is batch
    star = (cv2.resize(hm[0, 0], (inputRes, inputRes)) * 255)

    # clip the values to 0-255
    star[star > 255] = 255
    star[star < 0] = 0

    # tile Construct an array by repeating A the number of times given by reps.
    # convert to 3 channels, for bgr
    star = np.tile(star, (3, 1, 1)).transpose(1, 2, 0)
    trans = 0.8
    star = (trans * star + (1. - trans) * img).astype(np.uint8)

    # select peaks and perform nms

    # set nms threshold
    heat_thresh = 0.25

    ps = parseHeatmap(hm[0], heat_thresh)
    canonical, pred, color, score = [], [], [], []

    # mc dropout
    f1 = plt.figure()
    ax1 = f1.add_subplot(111)
    ax1.imshow(img)
    uncertainty_test(model, input_var, heat_thresh, ax1)

    for k in range(len(ps[0])):
        # camviewfeature
        x, y, z = ((hm[0, 1:4, ps[0][k], ps[1][k]] + 0.5) * outputRes).astype(
            np.int32)
        dep = ((hm[0, 4, ps[0][k], ps[1][k]] + 0.5) * outputRes).astype(
            np.int32)
        canonical.append([x, y, z])

        pred.append([ps[1][k], outputRes - dep, outputRes - ps[0][k]])
        # kp confidence score
        score.append(hm[0, 0, ps[0][k], ps[1][k]])

        color.append(
            (1.0 * x / outputRes, 1.0 * y / outputRes, 1.0 * z / outputRes))

        # cv2.circle(img, center, radius, color[, thickness[, lineType[, shift]]]) → img
        # -1 means that a filled circle is to be drawn
        cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 6, (0, 0, 255), -1)
        cv2.circle(img, (ps[1][k] * 4, ps[0][k] * 4), 2,
                   (int(z * 4), int(y * 4), int(x * 4)), -1)

        # plot cov
        # pos = kps_mean[k]
        # covar = kps_cov[k]
        # draw_ellipse(pos, covar, ax1)

    plt.axis('off')
    ax1.get_xaxis().set_visible(False)
    ax1.get_yaxis().set_visible(False)
    plt.show()
    f1.savefig('kp_cov.png', bbox_inches='tight', pad_inches=0)
    # plt.pause(5)

    pred = np.array(pred).astype(np.float32)
    canonical = np.array(canonical).astype(np.float32)

    pointS = canonical * 1.0 / outputRes
    pointT = pred * 1.0 / outputRes

    # calculate viewpoint
    R, t, s = horn87(pointS.transpose(), pointT.transpose(), score)

    rotated_pred = s * np.dot(
        R, canonical.transpose()).transpose() + t * outputRes
def step(split, epoch, opt, dataLoader, model, criterion, optimizer=None):
    if split == 'train':
        model.train()
    else:
        model.eval()
    preds = []
    Loss, LossStar = AverageMeter(), AverageMeter()

    nIters = len(dataLoader)
    bar = Bar('{}'.format(opt.expID), max=nIters)

    for i, (input, target, mask) in enumerate(dataLoader):
        if mask.size(1) > 1:
            mask[:, 1:, :, :] *= ref.outputRes * (opt.regWeight**0.5)
        if opt.GPU > -1:
            input_var = torch.autograd.Variable(input.cuda(
                opt.GPU, async=True)).float().cuda(opt.GPU)
            target_var = torch.autograd.Variable(
                target.cuda(opt.GPU, async=True)).float().cuda(opt.GPU)
            mask_var = torch.autograd.Variable(mask.cuda(
                opt.GPU, async=True)).float().cuda(opt.GPU)
        else:
            input_var = torch.autograd.Variable(input).float()
            target_var = torch.autograd.Variable(target).float()
            mask_var = torch.autograd.Variable(mask).float()
        output = model(input_var)

        output_pred = output[opt.nStack - 1].data.cpu().numpy().copy()
        for k in range(opt.nStack):
            output[k] = mask_var * output[k]
        target_var = mask_var * target_var

        loss = 0
        for k in range(opt.nStack):
            loss += criterion(output[k], target_var)

        LossStar.update((
            (target.float()[:, 0, :, :] -
             output[opt.nStack - 1].cpu().data.float()[:, 0, :, :])**2).mean())
        Loss.update(loss.data[0], input.size(0))

        if split == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            if opt.test:
                out = {}
                input_ = input.cpu().numpy()
                input_[0] = Flip(input_[0]).copy()
                inputFlip_var = torch.autograd.Variable(
                    torch.from_numpy(input_).view(
                        1, input_.shape[1], ref.inputRes,
                        ref.inputRes)).float().cuda(opt.GPU)
                outputFlip = model(inputFlip_var)
                output_flip = outputFlip[opt.nStack - 1].data.cpu().numpy()
                output_flip[0] = Flip(output_flip[0])
                if not (opt.task == 'star'):
                    output_flip[0, 1, :, :] = -output_flip[0, 1, :, :]
                output_pred = (output_pred + output_flip) / 2.0
                out['map'] = output_pred
                preds.append(out)

        Bar.suffix = '{split:5} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | LossStar {lossStar.avg:.6f}'.format(
            epoch,
            i,
            nIters,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=Loss,
            lossStar=LossStar,
            split=split)
        bar.next()

        if opt.DEBUG > 1 or (opt.DEBUG == 1 and i % (nIters / 200) == 0):
            for j in range(input.size(0)):
                debugger = Debugger()
                img = (input[j].numpy()[:3].transpose(1, 2, 0) * 256).astype(
                    np.uint8).copy()
                img2 = img.copy().astype(np.float32)
                img3 = img.copy().astype(np.float32)
                imgMNS = img.copy()
                out = (cv2.resize(
                    ((output[opt.nStack - 1][j, 0].data).cpu().numpy()).copy(),
                    (ref.inputRes, ref.inputRes)) * 256)
                gtmap = (cv2.resize((target[j, 0].cpu().numpy()).copy(),
                                    (ref.inputRes, ref.inputRes)) * 256)
                out[out < 0] = 0
                out[out > 255] = 255
                img2[:, :, 0] = (img2[:, :, 0] + out)
                img2[img2 > 255] = 255
                img3[:, :, 2] = (img3[:, :, 2] + gtmap)
                img3[img3 > 255] = 255
                gtmap[gtmap > 255] = 255
                idx = i * input.size(0) + j if opt.DEBUG == 1 else 0
                img2, out, gtmap, img3 = img2.astype(np.uint8), out.astype(
                    np.uint8), gtmap.astype(np.uint8), img3.astype(np.uint8)

                if 'emb' in opt.task:
                    gt, pred = [], []
                    ps = parseHeatmap(target[j].numpy())
                    print('ps', ps)
                    for k in range(len(ps[0])):
                        print('target', k, target[j, 1:4, ps[0][k],
                                                  ps[1][k]].numpy())
                        x, y, z = (
                            (target[j, 1:4, ps[0][k], ps[1][k]].numpy() + 0.5)
                            * 255).astype(np.int32)
                        gt.append(target[j, 1:4, ps[0][k], ps[1][k]].numpy())
                        cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 6,
                                   (int(x), int(y), int(z)), -1)

                    ps = parseHeatmap(output_pred[j])
                    for k in range(len(ps[0])):
                        print('pred', k, output_pred[j, 1:4, ps[0][k],
                                                     ps[1][k]])
                        x, y, z = (
                            (output_pred[j, 1:4, ps[0][k], ps[1][k]] + 0.5) *
                            255).astype(np.int32)
                        pred.append(output_pred[j, 1:4, ps[0][k], ps[1][k]])
                        cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 4,
                                   (255, 255, 255), -1)
                        cv2.circle(imgMNS, (ps[1][k] * 4, ps[0][k] * 4), 2,
                                   (int(x), int(y), int(z)), -1)
                    debugger.addPoint3D(np.array(gt), c='auto', marker='o')
                    #debugger.addPoint3D(np.array(pred), c = 'auto', marker = 'x')
                debugger.addImg(imgMNS, '{}_mns'.format(idx))
                debugger.addImg(out, '{}_out'.format(idx))
                debugger.addImg(gtmap, '{}_gt'.format(idx))
                debugger.addImg(img, '{}_img'.format(idx))
                debugger.addImg(img2, '{}_img2'.format(idx))
                debugger.addImg(img3, '{}_img3'.format(idx))
                if opt.DEBUG == 1:
                    debugger.saveAllImg(path=opt.debugPath)
                else:
                    debugger.showAllImg(pause=not ('emb' in opt.task))
                if 'emb' in opt.task:
                    debugger.show3D()

    bar.finish()
    return {'Loss': Loss.avg, 'LossStar': LossStar.avg}, preds