コード例 #1
0
ファイル: dataset.py プロジェクト: cbiras/3DHPose
def Preprocessing(d, stage='train'):
    height, width = cfg.data_shape
    imgs = []
    labels = []
    valids = []
    #asta e setat pe false in cfg
    if cfg.use_seg:
        segms = []

    vis = False
    #nu stiu cand se seteaza proprietatea de 'data', s-ar putea sa fie salvata direct din dataset. oricum, din ce arata in comentariu, va fi pathul la o imagine
    img = d['data']  #cv2.imread(os.path.join(cfg.img_path, d['imgpath']))
    # hack(multiprocessing data provider)
    #daca nu e setata proprietatea 'data', cum s-ar putea sa nu fie
    while img is None:
        import pdb
        pdb.set_trace()
        print('read none image')
        time.sleep(np.random.rand() * 5)
        #img ia valoarea imaginii, ca practic asta se face cv2.imread
        img = cv2.imread(os.path.join(cfg.img_path, d['imgpath']))
    #shape[0] e height, shape[1] e width si shape[2] e channels
    add = max(img.shape[0], img.shape[1])
    #bimg e o imagine ca si img, doar ca la care adauga bordere de dimensiunea cea mai mare a imaginii, si de ceva culoare
    bimg = cv2.copyMakeBorder(img,
                              add,
                              add,
                              add,
                              add,
                              borderType=cv2.BORDER_CONSTANT,
                              value=cfg.pixel_means.reshape(-1))

    bbox = np.array(d['bbox']).reshape(4, ).astype(np.float32)
    #la primele 2 coordonate ale bbox-ului, se adauga cea mai mare dimensiune a imaginii
    bbox[:2] += add

    if 'joints' in d:
        #joints o sa fie o matrice cu 17 linii si 3 coloane. banuiesc ca din cauza ca sunt 17 jointuri luate in considerare
        joints = np.array(d['joints']).reshape(cfg.nr_skeleton,
                                               3).astype(np.float32)
        #la primele 2 coloane se adauga cea mai mare dimensiune a imaginii
        joints[:, :2] += add
        #nu stiu de ce joints au si 3 coloane, dar daca a treia coloana are vreo valoare pe 0, se pun si celelalte 2 linii de la coloana respectiva pe -10000
        #am aflat de ce sunt 3 coloane. primele 2 sunt x si y, si a treia banuiesc ca e z sau ceva depth
        inds = np.where(joints[:, -1] == 0)
        joints[inds, :2] = -1000000

    #ideea e ca aici seteaza dimensiunile la care sa fie cropuita poza
    crop_width = bbox[2] * (1 + cfg.imgExtXBorder * 2)
    crop_height = bbox[3] * (1 + cfg.imgExtYBorder * 2)
    #in asta e salvat centrul imaginii
    objcenter = np.array([bbox[0] + bbox[2] / 2., bbox[1] + bbox[3] / 2.])

    if stage == 'train':
        crop_width = crop_width * (1 + 0.25)
        crop_height = crop_height * (1 + 0.25)
    #seteaza dimensiunea cea mai mica
    if crop_height / height > crop_width / width:
        crop_size = crop_height
        min_shape = height
    else:
        crop_size = crop_width
        min_shape = width
    crop_size = min(crop_size, objcenter[0] / width * min_shape * 2. - 1.)
    crop_size = min(
        crop_size, (bimg.shape[1] - objcenter[0]) / width * min_shape * 2. - 1)
    crop_size = min(crop_size, objcenter[1] / height * min_shape * 2. - 1.)
    crop_size = min(crop_size,
                    (bimg.shape[0] - objcenter[1]) / height * min_shape * 2. -
                    1)

    min_x = int(objcenter[0] - crop_size / 2. / min_shape * width)
    max_x = int(objcenter[0] + crop_size / 2. / min_shape * width)
    min_y = int(objcenter[1] - crop_size / 2. / min_shape * height)
    max_y = int(objcenter[1] + crop_size / 2. / min_shape * height)

    x_ratio = float(width) / (max_x - min_x)
    y_ratio = float(height) / (max_y - min_y)

    #asta e un soi de normalizare la dimensiunile dorite, si practic asigneaza valorile pt jointuri pt training si validare
    if 'joints' in d:
        joints[:, 0] = joints[:, 0] - min_x
        joints[:, 1] = joints[:, 1] - min_y

        joints[:, 0] *= x_ratio
        joints[:, 1] *= y_ratio
        label = joints[:, :2].copy()
        valid = joints[:, 2].copy()
    #imaginea devine imaginea cu border, dar cu widht si height
    img = cv2.resize(bimg[min_y:max_y, min_x:max_x, :], (width, height))

    if stage != 'train':
        #un aray cu coordonatele care sunt diferenta dintre minimurile obtinute mai sus si cea mai mare dimensiune a imaginii initiale
        details = np.asarray(
            [min_x - add, min_y - add, max_x - add, max_y - add])

    if cfg.use_seg is True and 'segmentation' in d:
        seg = get_seg(ori_img.shape[0], ori_img.shape[1], d['segmentation'])
        add = max(seg.shape[0], seg.shape[1])
        bimg = cv2.copyMakeBorder(seg,
                                  add,
                                  add,
                                  add,
                                  add,
                                  borderType=cv2.BORDER_CONSTANT,
                                  value=(0, 0, 0))
        seg = cv2.resize(bimg[min_y:max_y, min_x:max_x], (width, height))
        segms.append(seg)
    # e pus in functia asta pe false
    if vis:
        tmpimg = img.copy()
        from utils.visualize import draw_skeleton
        draw_skeleton(tmpimg, label.astype(int))
        cv2.imwrite('vis.jpg', tmpimg)
        from IPython import embed
        embed()

    #creadeam ca in pixel means e culoarea borderului da aparent sunt si ceva dimensiuni
    img = img - cfg.pixel_means
    if cfg.pixel_norm:
        img = img / 255.
    img = img.transpose(2, 0, 1)
    imgs.append(img)
    if 'joints' in d:
        labels.append(label.reshape(-1))
        valids.append(valid.reshape(-1))

    if stage == 'train':
        imgs, labels, valids = data_augmentation(imgs, labels, valids)
        heatmaps15 = joints_heatmap_gen(imgs,
                                        labels,
                                        cfg.output_shape,
                                        cfg.data_shape,
                                        return_valid=False,
                                        gaussian_kernel=cfg.gk15)
        heatmaps11 = joints_heatmap_gen(imgs,
                                        labels,
                                        cfg.output_shape,
                                        cfg.data_shape,
                                        return_valid=False,
                                        gaussian_kernel=cfg.gk11)
        heatmaps9 = joints_heatmap_gen(imgs,
                                       labels,
                                       cfg.output_shape,
                                       cfg.data_shape,
                                       return_valid=False,
                                       gaussian_kernel=cfg.gk9)
        heatmaps7 = joints_heatmap_gen(imgs,
                                       labels,
                                       cfg.output_shape,
                                       cfg.data_shape,
                                       return_valid=False,
                                       gaussian_kernel=cfg.gk7)

        return [
            imgs.astype(np.float32).transpose(0, 2, 3, 1),
            heatmaps15.astype(np.float32).transpose(0, 2, 3, 1),
            heatmaps11.astype(np.float32).transpose(0, 2, 3, 1),
            heatmaps9.astype(np.float32).transpose(0, 2, 3, 1),
            heatmaps7.astype(np.float32).transpose(0, 2, 3, 1),
            valids.astype(np.float32)
        ]
    else:
        #practic intoarce un array cu imaginile normalizate si cu border, si un array cu dimensiunile
        return [np.asarray(imgs).astype(np.float32), details]
コード例 #2
0
ファイル: dataset.py プロジェクト: zju3dv/mvpose
def Preprocessing(d, stage='train'):
    height, width = cfg.data_shape
    imgs = []
    labels = []
    valids = []
    if cfg.use_seg:
        segms = []

    vis = False
    img = d['data']#cv2.imread(os.path.join(cfg.img_path, d['imgpath']))
    # hack(multiprocessing data provider)
    while img is None:
        import pdb
        pdb.set_trace()
        print('read none image')
        time.sleep(np.random.rand() * 5)
        img = cv2.imread(os.path.join(cfg.img_path, d['imgpath']))
    add = max(img.shape[0], img.shape[1])
    bimg = cv2.copyMakeBorder(img, add, add, add, add, borderType=cv2.BORDER_CONSTANT,
                              value=cfg.pixel_means.reshape(-1))

    bbox = np.array(d['bbox']).reshape(4, ).astype(np.float32)
    bbox[:2] += add

    if 'joints' in d:
        joints = np.array(d['joints']).reshape(cfg.nr_skeleton, 3).astype(np.float32)
        joints[:, :2] += add
        inds = np.where(joints[:, -1] == 0)
        joints[inds, :2] = -1000000

    crop_width = bbox[2] * (1 + cfg.imgExtXBorder * 2)
    crop_height = bbox[3] * (1 + cfg.imgExtYBorder * 2)
    objcenter = np.array([bbox[0] + bbox[2] / 2., bbox[1] + bbox[3] / 2.])

    if stage == 'train':
        crop_width = crop_width * (1 + 0.25)
        crop_height = crop_height * (1 + 0.25)

    if crop_height / height > crop_width / width:
        crop_size = crop_height
        min_shape = height
    else:
        crop_size = crop_width
        min_shape = width
    crop_size = min(crop_size, objcenter[0] / width * min_shape * 2. - 1.)
    crop_size = min(crop_size, (bimg.shape[1] - objcenter[0]) / width * min_shape * 2. - 1)
    crop_size = min(crop_size, objcenter[1] / height * min_shape * 2. - 1.)
    crop_size = min(crop_size, (bimg.shape[0] - objcenter[1]) / height * min_shape * 2. - 1)

    min_x = int(objcenter[0] - crop_size / 2. / min_shape * width)
    max_x = int(objcenter[0] + crop_size / 2. / min_shape * width)
    min_y = int(objcenter[1] - crop_size / 2. / min_shape * height)
    max_y = int(objcenter[1] + crop_size / 2. / min_shape * height)

    x_ratio = float(width) / (max_x - min_x)
    y_ratio = float(height) / (max_y - min_y)

    if 'joints' in d:
        joints[:, 0] = joints[:, 0] - min_x
        joints[:, 1] = joints[:, 1] - min_y

        joints[:, 0] *= x_ratio
        joints[:, 1] *= y_ratio
        label = joints[:, :2].copy()
        valid = joints[:, 2].copy()

    img = cv2.resize(bimg[min_y:max_y, min_x:max_x, :], (width, height))

    if stage != 'train':
        details = np.asarray([min_x - add, min_y - add, max_x - add, max_y - add])

    if cfg.use_seg is True and 'segmentation' in d:
        seg = get_seg(ori_img.shape[0], ori_img.shape[1], d['segmentation'])
        add = max(seg.shape[0], seg.shape[1])
        bimg = cv2.copyMakeBorder(seg, add, add, add, add, borderType=cv2.BORDER_CONSTANT, value=(0, 0, 0))
        seg = cv2.resize(bimg[min_y:max_y, min_x:max_x], (width, height))
        segms.append(seg)

    if vis:
        tmpimg = img.copy()
        from utils.visualize import draw_skeleton
        draw_skeleton(tmpimg, label.astype(int))
        cv2.imwrite('vis.jpg', tmpimg)
        from IPython import embed; embed()

    img = img - cfg.pixel_means
    if cfg.pixel_norm:
        img = img / 255.
    img = img.transpose(2, 0, 1)
    imgs.append(img)
    if 'joints' in d:
        labels.append(label.reshape(-1))
        valids.append(valid.reshape(-1))

    if stage == 'train':
        imgs, labels, valids = data_augmentation(imgs, labels, valids)
        heatmaps15 = joints_heatmap_gen(imgs, labels, cfg.output_shape, cfg.data_shape, return_valid=False,
                                        gaussian_kernel=cfg.gk15)
        heatmaps11 = joints_heatmap_gen(imgs, labels, cfg.output_shape, cfg.data_shape, return_valid=False,
                                        gaussian_kernel=cfg.gk11)
        heatmaps9 = joints_heatmap_gen(imgs, labels, cfg.output_shape, cfg.data_shape, return_valid=False,
                                       gaussian_kernel=cfg.gk9)
        heatmaps7 = joints_heatmap_gen(imgs, labels, cfg.output_shape, cfg.data_shape, return_valid=False,
                                       gaussian_kernel=cfg.gk7)

        return [imgs.astype(np.float32).transpose(0, 2, 3, 1),
                heatmaps15.astype(np.float32).transpose(0, 2, 3, 1),
                heatmaps11.astype(np.float32).transpose(0, 2, 3, 1),
                heatmaps9.astype(np.float32).transpose(0, 2, 3, 1),
                heatmaps7.astype(np.float32).transpose(0, 2, 3, 1),
                valids.astype(np.float32)]
    else:
        return [np.asarray(imgs).astype(np.float32), details]