Exemple #1
0
    def loadSequence(self, objName):
        '''
        :returns: NamedImgSequence
        '''

        w = 64
        img = Image.new('RGB', size=(w, w), color=(0, 0, 0))
        draw = ImageDraw.Draw(img)

        b = math.floor(w / 5)

        if objName == 'circle':
            draw.ellipse([(b, b), (w - b, w - b)], fill=(255, 0, 0))

        elif objName == 'square':
            draw.rectangle([(b, b), (w - b, w - b)], fill=(255, 0, 0))

        elif objName == 'triangle':
            draw.polygon([(w / 2, b), (b, w - b), (w - b, w - b)],
                         fill=(255, 0, 0))

        del draw

        data = []
        data.append(Frame(img, None, None, None, objName, ""))
        return NamedImgSequence(objName, data)
Exemple #2
0
 def jitterImgSeqs(self, N, maxRot=30.0, maxTranslate=2.0, maxScale=0.1):
     imj = transformations.ImageJitterer()
     seqs = []
     for imgSeq in self._imgSeqs:
         jitteredImgs = imj.getJitteredImgSeq(imgSeq.data,
                                              N,
                                              maxRot=maxRot,
                                              maxTranslate=maxTranslate,
                                              maxScale=maxScale)
         seqs.append(NamedImgSequence(imgSeq.name, jitteredImgs))
     self._imgSeqs = seqs
Exemple #3
0
    def loadSequence(self, seqName, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None, hand=None):
        """
        Load an image sequence from the dataset
        :param seqName: sequence name, e.g. train
        :param Nmax: maximum number of samples to load
        :return: returns named image sequence
        """
        if cube is None:
            config = {'cube': self.default_cubes[seqName]}
        else:
            assert isinstance(cube, tuple)
            assert len(cube) == 3
            config = {'cube': cube}

        pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, self.allJoints, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0])
        if self.useCache:
            if os.path.isfile(pickleCache):
                print("Loading cache data from {}".format(pickleCache))
                f = open(pickleCache, 'rb')
                (seqName, data, config) = cPickle.load(f)
                f.close()

                # shuffle data
                if shuffle and rng is not None:
                    print("Shuffling")
                    rng.shuffle(data)
                if not(np.isinf(Nmax)):
                    return NamedImgSequence(seqName, data[0:Nmax], config)
                else:
                    return NamedImgSequence(seqName, data, config)

        self.loadRefineNetLazy(self.refineNet)

        # Load the dataset
        objdir = '{}/{}/'.format(self.basepath, seqName)
        trainlabels = '{}/{}/joint_data.mat'.format(self.basepath, seqName)

        mat = scipy.io.loadmat(trainlabels)
        names = mat['joint_names'][0]
        joints3D = mat['joint_xyz'][0]
        joints2D = mat['joint_uvd'][0]
        if self.allJoints:
            eval_idxs = np.arange(36)
        else:
            eval_idxs = self.restrictedJointsEval

        self.numJoints = len(eval_idxs)

        txt = 'Loading {}'.format(seqName)
        pbar = pb.ProgressBar(maxval=joints3D.shape[0], widgets=[txt, pb.Percentage(), pb.Bar()])
        pbar.start()

        data = []
        i=0
        for line in range(joints3D.shape[0]):
            dptFileName = '{0:s}/depth_1_{1:07d}.png'.format(objdir, line+1)

            if not os.path.isfile(dptFileName):
                print("File {} does not exist!".format(dptFileName))
                i += 1
                continue
            dpt = self.loadDepthMap(dptFileName)
            if hand is not None:
                raise NotImplementedError()

            # joints in image coordinates
            gtorig = np.zeros((self.numJoints, 3), np.float32)
            jt = 0
            for ii in range(joints2D.shape[1]):
                if ii not in eval_idxs:
                    continue
                gtorig[jt, 0] = joints2D[line, ii, 0]
                gtorig[jt, 1] = joints2D[line, ii, 1]
                gtorig[jt, 2] = joints2D[line, ii, 2]
                jt += 1

            # normalized joints in 3D coordinates
            gt3Dorig = np.zeros((self.numJoints, 3), np.float32)
            jt = 0
            for jj in range(joints3D.shape[1]):
                if jj not in eval_idxs:
                    continue
                gt3Dorig[jt, 0] = joints3D[line, jj, 0]
                gt3Dorig[jt, 1] = joints3D[line, jj, 1]
                gt3Dorig[jt, 2] = joints3D[line, jj, 2]
                jt += 1
            # print gt3D
            # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dorig,0,dptFileName,'',''))

            # Detect hand
            hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self)
            if not hd.checkImage(1):
                print("Skipping image {}, no content".format(dptFileName))
                i += 1
                continue
            try:
                dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom)
            except UserWarning:
                print("Skipping image {}, no hand detected".format(dptFileName))
                continue

            com3D = self.jointImgTo3D(com)
            gt3Dcrop = gt3Dorig - com3D  # normalize to com
            gtcrop = transformPoints2D(gtorig, M)

            # print("{}".format(gt3Dorig))
            # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'',''))

            data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName,
                                   '', self.sides[seqName], {}))
            pbar.update(i)
            i += 1

            # early stop
            if len(data) >= Nmax:
                break

        pbar.finish()
        print("Loaded {} samples.".format(len(data)))

        if self.useCache:
            print("Save cache data to {}".format(pickleCache))
            f = open(pickleCache, 'wb')
            cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()

        # shuffle data
        if shuffle and rng is not None:
            print("Shuffling")
            rng.shuffle(data)
        return NamedImgSequence(seqName, data, config)
Exemple #4
0
    def loadSequence(self, seqName, subSeq=None, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None, hand=None):
        """
        Load an image sequence from the dataset
        :param seqName: sequence name, e.g. subject1
        :param Nmax: maximum number of samples to load
        :return: returns named image sequence
        """

        if (subSeq is not None) and (not isinstance(subSeq, list)):
            raise TypeError("subSeq must be None or list")

        if cube is None:
            config = {'cube': self.default_cubes[seqName]}
        else:
            assert isinstance(cube, tuple)
            assert len(cube) == 3
            config = {'cube': cube}

        if subSeq is None:
            pickleCache = '{}/{}_{}_None_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0])
        else:
            pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, ''.join(subSeq), HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0])
        if self.useCache & os.path.isfile(pickleCache):
            print("Loading cache data from {}".format(pickleCache))
            f = open(pickleCache, 'rb')
            (seqName, data, config) = cPickle.load(f)
            f.close()
            # shuffle data
            if shuffle and rng is not None:
                print("Shuffling")
                rng.shuffle(data)
            if not(np.isinf(Nmax)):
                return NamedImgSequence(seqName, data[0:Nmax], config)
            else:
                return NamedImgSequence(seqName, data, config)

        self.loadRefineNetLazy(self.refineNet)

        # Load the dataset
        objdir = '{}/{}/'.format(self.basepath, seqName)
        subdirs = sorted([name for name in os.listdir(objdir) if os.path.isdir(os.path.join(objdir, name))])

        txt = 'Loading {}'.format(seqName)
        nImgs = sum([len(files) for r, d, files in os.walk(objdir)]) // 2
        pbar = pb.ProgressBar(maxval=nImgs, widgets=[txt, pb.Percentage(), pb.Bar()])
        pbar.start()

        data = []
        pi = 0
        for subdir in subdirs:
            # check for subsequences and skip them if necessary
            subSeqName = ''
            if subSeq is not None:
                if subdir not in subSeq:
                    continue

                subSeqName = subdir

            # iterate all subdirectories
            trainlabels = '{}/{}/joint.txt'.format(objdir, subdir)

            inputfile = open(trainlabels)
            # read number of samples
            nImgs = int(inputfile.readline())

            for i in range(nImgs):
                # early stop
                if len(data) >= Nmax:
                    break

                line = inputfile.readline()
                part = line.split(' ')

                dptFileName = '{}/{}/{}_depth.bin'.format(objdir, subdir, str(i).zfill(6))

                if not os.path.isfile(dptFileName):
                    print("File {} does not exist!".format(dptFileName))
                    continue
                dpt = self.loadDepthMap(dptFileName)
                if hand is not None:
                    raise NotImplementedError()

                # joints in image coordinates
                gt3Dorig = np.zeros((self.numJoints, 3), np.float32)
                for joint in range(gt3Dorig.shape[0]):
                    for xyz in range(0, 3):
                        gt3Dorig[joint, xyz] = part[joint*3+xyz]

                # invert axis
                # gt3Dorig[:, 0] *= (-1.)
                # gt3Dorig[:, 1] *= (-1.)
                gt3Dorig[:, 2] *= (-1.)

                # normalized joints in 3D coordinates
                gtorig = self.joints3DToImg(gt3Dorig)
                # print gt3D
                # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dcrop,com3D,dptFileName,'',''))
                # Detect hand
                hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self)
                if not hd.checkImage(1.):
                    print("Skipping image {}, no content".format(dptFileName))
                    continue

                try:
                    dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom)
                except UserWarning:
                    print("Skipping image {}, no hand detected".format(dptFileName))
                    continue

                com3D = self.jointImgTo3D(com)
                gt3Dcrop = gt3Dorig - com3D  # normalize to com

                gtcrop = transformPoints2D(gtorig, M)

                # print("{}".format(gt3Dorig))
                # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','',{}))

                data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D,
                                       dptFileName, subSeqName, self.sides[seqName], {}))
                pbar.update(pi)
                pi += 1

            inputfile.close()

        pbar.finish()
        print("Loaded {} samples.".format(len(data)))

        if self.useCache:
            print("Save cache data to {}".format(pickleCache))
            f = open(pickleCache, 'wb')
            cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()

        # shuffle data
        if shuffle and rng is not None:
            print("Shuffling")
            rng.shuffle(data)
        return NamedImgSequence(seqName, data, config)
Exemple #5
0
    def loadSequence(self, seqName, subSeq=None, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None,
                     hand=None):
        """
        Load an image sequence from the dataset
        :param seqName: sequence name, e.g. train
        :param subSeq: list of subsequence names, e.g. 0, 45, 122-5
        :param Nmax: maximum number of samples to load
        :return: returns named image sequence
        """

        if (subSeq is not None) and (not isinstance(subSeq, list)):
            raise TypeError("subSeq must be None or list")

        if cube is None:
            config = {'cube': self.default_cubes[seqName]}
        else:
            assert isinstance(cube, tuple)
            assert len(cube) == 3
            config = {'cube': cube}

        if subSeq is None:
            pickleCache = '{}/{}_{}_None_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0])
        else:
            pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, ''.join(subSeq), HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0])
        if self.useCache:
            if os.path.isfile(pickleCache):
                print("Loading cache data from {}".format(pickleCache))
                f = open(pickleCache, 'rb')
                (seqName, data, config) = cPickle.load(f)
                f.close()

                # shuffle data
                if shuffle and rng is not None:
                    print("Shuffling")
                    rng.shuffle(data)
                if not(np.isinf(Nmax)):
                    return NamedImgSequence(seqName, data[0:Nmax], config)
                else:
                    return NamedImgSequence(seqName, data, config)

            # check for multiple subsequences
            if subSeq is not None:
                if len(subSeq) > 1:
                    missing = False
                    for i in range(len(subSeq)):
                        if not os.path.isfile('{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[i], HandDetector.detectionModeToString(docom, self.refineNet is not None))):
                            missing = True
                            print("missing: {}".format(subSeq[i]))
                            break

                    if not missing:
                        # load first data
                        pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[0], HandDetector.detectionModeToString(docom, self.refineNet is not None))
                        print("Loading cache data from {}".format(pickleCache))
                        f = open(pickleCache, 'rb')
                        (seqName, fullData, config) = cPickle.load(f)
                        f.close()
                        # load rest of data
                        for i in range(1, len(subSeq)):
                            pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[i], HandDetector.detectionModeToString(docom, self.refineNet is not None))
                            print("Loading cache data from {}".format(pickleCache))
                            f = open(pickleCache, 'rb')
                            (seqName, data, config) = cPickle.load(f)
                            fullData.extend(data)
                            f.close()

                        # shuffle data
                        if shuffle and rng is not None:
                            print("Shuffling")
                            rng.shuffle(fullData)
                        if not(np.isinf(Nmax)):
                            return NamedImgSequence(seqName, fullData[0:Nmax], config)
                        else:
                            return NamedImgSequence(seqName, fullData, config)

        self.loadRefineNetLazy(self.refineNet)

        # Load the dataset
        objdir = '{}/Depth/'.format(self.basepath)
        trainlabels = '{}/{}.txt'.format(self.basepath, seqName)

        inputfile = open(trainlabels)
        
        txt = 'Loading {}'.format(seqName)
        pbar = pb.ProgressBar(maxval=len(inputfile.readlines()), widgets=[txt, pb.Percentage(), pb.Bar()])
        pbar.start()
        inputfile.seek(0)
        
        data = []
        i = 0
        for line in inputfile:
            # early stop
            if len(data) >= Nmax:
                break

            part = line.split(' ')
            # check for subsequences and skip them if necessary
            subSeqName = ''
            if subSeq is not None:
                p = part[0].split('/')
                # handle original data (unrotated '0') separately
                if ('0' in subSeq) and len(p[0]) > 6:
                    pass
                elif not('0' in subSeq) and len(p[0]) > 6:
                    i += 1
                    continue
                elif (p[0] in subSeq) and len(p[0]) <= 6:
                    pass
                elif not(p[0] in subSeq) and len(p[0]) <= 6:
                    i += 1
                    continue

                if len(p[0]) <= 6:
                    subSeqName = p[0]
                else:
                    subSeqName = '0'

            dptFileName = '{}/{}'.format(objdir, part[0])

            if not os.path.isfile(dptFileName):
                print("File {} does not exist!".format(dptFileName))
                i += 1
                continue
            dpt = self.loadDepthMap(dptFileName)
            if hand is not None:
                raise NotImplementedError()

            # joints in image coordinates
            gtorig = np.zeros((self.numJoints, 3), np.float32)
            for joint in range(self.numJoints):
                for xyz in range(0, 3):
                    gtorig[joint, xyz] = part[joint*3+xyz+1]

            # normalized joints in 3D coordinates
            gt3Dorig = self.jointsImgTo3D(gtorig)
            # print gt3D
            # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dcrop,0,dptFileName,subSeqName,''))

            # Detect hand
            hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self)
            if not hd.checkImage(1):
                print("Skipping image {}, no content".format(dptFileName))
                i += 1
                continue
            try:
                dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom)
            except UserWarning:
                print("Skipping image {}, no hand detected".format(dptFileName))
                continue

            com3D = self.jointImgTo3D(com)
            gt3Dcrop = gt3Dorig - com3D  # normalize to com
            gtcrop = transformPoints2D(gtorig, M)

            # print("{}".format(gt3Dorig))
            # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,subSeqName,''))

            data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName,
                                   subSeqName, 'left', {}))
            pbar.update(i)
            i += 1
                   
        inputfile.close()
        pbar.finish()
        print("Loaded {} samples.".format(len(data)))

        if self.useCache:
            print("Save cache data to {}".format(pickleCache))
            f = open(pickleCache, 'wb')
            cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()

        # shuffle data
        if shuffle and rng is not None:
            print("Shuffling")
            rng.shuffle(data)
        return NamedImgSequence(seqName, data, config)
Exemple #6
0
    def loadSequence(self,
                     seqName,
                     camera=None,
                     Nmax=float('inf'),
                     shuffle=False,
                     rng=None):
        """
        Load an image sequence from the dataset
        :param seqName: sequence name, e.g. subject1
        :param Nmax: maximum number of samples to load
        :return: returns named image sequence
        """

        config = {'cube': (220, 220, 220)}

        pickleCache = '{}/{}_{}_{}_cache.pkl'.format(self.cacheDir,
                                                     self.__class__.__name__,
                                                     seqName, camera)
        if self.useCache & os.path.isfile(pickleCache):
            print("Loading cache data from {}".format(pickleCache))
            f = open(pickleCache, 'rb')
            (seqName, data, config) = cPickle.load(f)
            f.close()
            # shuffle data
            if shuffle and rng is not None:
                print("Shuffling")
                rng.shuffle(data)
            if not (np.isinf(Nmax)):
                return NamedImgSequence(seqName, data[0:Nmax], config)
            else:
                return NamedImgSequence(seqName, data, config)

        # Load the dataset
        objdir = os.path.join(self.basepath, seqName)
        if camera is not None:
            dflt = "c{0:02d}_*.npy".format(camera)
        else:
            dflt = '*.npy'
        dptFiles = sorted([
            os.path.join(dirpath, f)
            for dirpath, dirnames, files in os.walk(objdir)
            for f in fnmatch.filter(files, dflt)
        ])
        # dptFiles = sorted(glob.glob(os.path.join(objdir, '*exr')))

        if camera is not None:
            lflt = "c{0:02d}_*anno_blender.txt".format(camera)
        else:
            lflt = '*anno_blender.txt'
        labelFiles = sorted([
            os.path.join(dirpath, f)
            for dirpath, dirnames, files in os.walk(objdir)
            for f in fnmatch.filter(files, lflt)
        ])
        #labelFiles = sorted(glob.glob(os.path.join(objdir, '*txt')))

        # sync lists
        newdpt = []
        newlbl = []
        if camera is not None:
            number_idx = 1
        else:
            number_idx = 0
        labelIdx = [
            ''.join(os.path.basename(lbl).split('_')[number_idx])
            for lbl in labelFiles
        ]
        for dpt in dptFiles:
            if ''.join(
                    os.path.basename(dpt).split('_')[number_idx]) in labelIdx:
                newdpt.append(dpt)
                newlbl.append(labelFiles[labelIdx.index(''.join(
                    os.path.basename(dpt).split('_')[number_idx]))])

        dptFiles = newdpt
        labelFiles = newlbl

        assert len(dptFiles) == len(labelFiles)

        txt = 'Loading {}'.format(seqName)
        nImgs = len(dptFiles)
        pbar = pb.ProgressBar(maxval=nImgs,
                              widgets=[txt, pb.Percentage(),
                                       pb.Bar()])
        pbar.start()

        data = []
        for i in xrange(nImgs):
            labelFileName = labelFiles[i]
            dptFileName = dptFiles[i]

            if not os.path.isfile(dptFileName):
                print("File {} does not exist!".format(dptFileName))
                continue
            dpt = self.loadDepthMap(dptFileName)

            if not os.path.isfile(labelFileName):
                print("File {} does not exist!".format(labelFileName))
                continue

            # joints in image coordinates
            gtorig = np.genfromtxt(labelFileName)
            gtorig = gtorig.reshape((gtorig.shape[0] / 3, 3))
            gtorig[:, 2] *= 1000.

            # normalized joints in 3D coordinates
            gt3Dorig = self.jointsImgTo3D(gtorig)

            ### Shorting the tip bone, BUGFIX
            tips = [3, 8, 13, 18, 23]
            shrink_factor = 0.85
            for joint_idx in tips:
                t = gt3Dorig[joint_idx, :] - gt3Dorig[joint_idx - 1, :]
                gt3Dorig[joint_idx, :] = gt3Dorig[joint_idx -
                                                  1, :] + shrink_factor * t
            gtorig = self.joints3DToImg(gt3Dorig)
            ###

            # print gtorig, gt3Dorig
            # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,0,0,dptFileName,'','',{}))
            # Detect hand
            hd = HandDetector(dpt, self.fx, self.fy, importer=self)
            if not hd.checkImage(1.):
                print("Skipping image {}, no content".format(dptFileName))
                continue

            try:
                dpt, M, com = hd.cropArea3D(gtorig[10], size=config['cube'])
            except UserWarning:
                print(
                    "Skipping image {}, no hand detected".format(dptFileName))
                continue

            com3D = self.jointImgTo3D(com)
            gt3Dcrop = gt3Dorig - com3D  # normalize to com

            gtcrop = np.zeros((gt3Dorig.shape[0], 3), np.float32)
            for joint in range(gtcrop.shape[0]):
                t = transformPoint2D(gtorig[joint], M)
                gtcrop[joint, 0] = t[0]
                gtcrop[joint, 1] = t[1]
                gtcrop[joint, 2] = gtorig[joint, 2]

            # print("{}".format(gt3Dorig))
            # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'',''))

            data.append(
                DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig,
                           gt3Dcrop, com3D, dptFileName, '',
                           self.sides[seqName], {}))
            pbar.update(i)

            # early stop
            if len(data) >= Nmax:
                break

        pbar.finish()
        print("Loaded {} samples.".format(len(data)))

        if self.useCache:
            print("Save cache data to {}".format(pickleCache))
            f = open(pickleCache, 'wb')
            cPickle.dump((seqName, data, config),
                         f,
                         protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()

        # shuffle data
        if shuffle and rng is not None:
            print("Shuffling")
            rng.shuffle(data)
        return NamedImgSequence(seqName, data, config)
Exemple #7
0
    def loadSequence(self,
                     seqName,
                     zRotInv=0,
                     inputMode=2,
                     cropAtGtPos=False,
                     cropSize=None,
                     targetSize=None,
                     imgNums=None):

        objdir = '{}{}/data/'.format(self.basepath, seqName)
        s = '{}color*.jpg'.format(objdir, self.basepath, seqName)

        imgFileNames = glob.glob(s)
        imgFileNames = sorted(imgFileNames)

        assert len(imgFileNames) > 0, "No images under '{}'".format(s)

        # tmp
        #imgFileNames = imgFileNames[0:100]
        #imgFileNames = imgFileNames[0:19]
        #imgFileNames = imgFileNames[0:800]
        if imgNums is not None:
            imgFileNames = [imgFileNames[i] for i in imgNums]

        print("inputMode {}".format(inputMode))

        txt = 'Loading {}'.format(seqName)
        pbar = pb.ProgressBar(maxval=len(imgFileNames),
                              widgets=[txt, pb.Percentage(),
                                       pb.Bar()])
        pbar.start()

        data = []
        for i in range(0, len(imgFileNames)):
            imgFileName = imgFileNames[i]
            d, fn = os.path.split(imgFileName)  # @UnusedVariable
            numStr = fn[5:-4]
            #dptFileName = '{}depth{}.dpt'.format(objdir,numStr)
            #dptFileName = '{}inp/depth{}.dpt'.format(objdir,numStr)  # inpainted
            dptFileName = '{}inp/depth{}.png'.format(objdir,
                                                     numStr)  # inpainted
            rotFileName = '{}rot{}.rot'.format(objdir, numStr)
            traFileName = '{}tra{}.tra'.format(objdir, numStr)

            if (inputMode > 0):
                img = self.loadImage(imgFileName)
            else:
                img = None
            if (inputMode != 1):
                dpt = self.loadDepthMap(dptFileName)
            else:
                dpt = None
            mask = None  # or numpy.ones with the size of img?
            rot = self.loadMatFromTxt(rotFileName)
            tra = self.loadMatFromTxt(traFileName)

            #data.append({img:img,dpt:dpt,rot:rot,tra:tra})
            pose = LinemodObjPose(rot=rot, tra=tra, zRotInv=zRotInv)
            frame = LinemodTestFrame(img=img,
                                     dpt=dpt,
                                     mask=mask,
                                     pose=pose,
                                     className=seqName,
                                     filename=dptFileName)
            if cropAtGtPos:
                patch = self.cropPatchAtGTPos(frame, cropSize, targetSize,
                                              inputMode)
                #print("patch mn/mx {},{}".format(numpy.min(patch),numpy.max(patch)))
                data.append(patch)
                # keep only the first few full frames for debugging
                if i > 10:
                    frame.img = None
                    frame.dpt = None
                    frame.mask = None
            else:
                data.append(frame)
            pbar.update(i)

        return NamedImgSequence(seqName, data)
Exemple #8
0
    def loadSequence(self,
                     objName,
                     imgNums=None,
                     cropSize=None,
                     targetSize=None,
                     zRotInv=0):

        floatX = theano.config.floatX  # @UndefinedVariable

        if cropSize is None:
            cropSize = 20.0  # 28.
        if targetSize is None:
            targetSize = 64

        seqData = []

        #cam = BlenderCam()

        camPositionsElAz = numpy.loadtxt(self.basePath +
                                         'camPositionsElAz.txt')
        camOrientations = numpy.loadtxt(self.basePath + 'camOrientations.txt')
        if camOrientations.ndim < 2:
            camOrientations = camOrientations.reshape(
                (len(camOrientations) / 2, 2))

        numCams = len(camPositionsElAz)
        camElAz = numpy.repeat(camPositionsElAz,
                               camOrientations.shape[0],
                               axis=0)
        camOrientations = numpy.tile(camOrientations, (numCams, 1))
        numCams = len(camElAz)
        camDist = 40.  # cm, TODO adjust for other objects ?

        if zRotInv == 1:
            camElAz[:,
                    1] = 0.0  # if it is fully rotationally symmetric, the azimuth is always 0

        path = "{}{}/train/".format(self.basePath, objName)
        #path = "{}{}/".format(self.basePath,objName)

        #raise ValueError("path {}".format(path))

        pat = "{}img*.png".format(path)
        print("looking for " + pat)
        d = sorted(glob.glob(pat))

        assert numCams == len(d), "hey! {} != {}".format(numCams, len(d))

        if imgNums is not None:
            d = [d[i] for i in imgNums]
            #camPoses = [camPoses[i,:] for i in imgNums]
            camElAz = [camElAz[i, :] for i in imgNums]
            camOrientations = [camOrientations[i, :] for i in imgNums]

        txt = "loading {}".format(objName)
        pbar = pb.ProgressBar(maxval=len(d),
                              widgets=[txt, pb.Percentage(),
                                       pb.Bar()])
        pbar.start()

        for frameNum in xrange(len(d)):
            pbar.update(frameNum)
            fileName = d[frameNum]
            fn = os.path.split(fileName)[1]
            fnroot = fn[3:]

            imgFileName = fileName
            dptFileName = "{}dpt{}".format(path, fnroot)
            maskFileName = "{}mask{}".format(path, fnroot)

            #print("files: {}, {}".format(imgFileName,dptFileName))

            #dptRendered = Image.open(dptFileName)
            #dptrData = numpy.asarray(dptRendered,numpy.float32)
            dptrData = cv2.imread(dptFileName,
                                  cv2.IMREAD_UNCHANGED).astype(numpy.float32)
            if len(dptrData.shape) > 2:
                dptrData = dptrData[:, :, 0]

            dptrData *= 200. / 2**16.  # first in blender we expressed everything in dm (ie 1unit = 10cm), then we mapped 0..20 -> 0..1, then blender fills the the 16bit up. image is scaled to 0..1 -> 0..2^16

            maskData = cv2.imread(maskFileName,
                                  cv2.IMREAD_UNCHANGED).astype(numpy.float32)
            #print("maskData mn/mx {},{}".format(numpy.min(maskData),numpy.max(maskData)))
            #cv2.imshow("maskData",maskData/(2**16.))
            #raise ValueError("ok")
            maskData = (maskData < (2**16. - 5)).astype(numpy.uint8)
            #cv2.imshow("maskData2",maskData)
            #cv2.waitKey(0)
            #raise ValueError("ok")

            # DEBUG print value range
            #print("dptrData min {}, max {}".format(dptrData.min(),dptrData.max()))

            # DEBUG print value of center pixel
            #h,w = dptrData.shape
            #print dptrData[h/2,w/2]

            # DEBUG show
            #dptRendered = Image.fromarray(dptrData*255./8.) # to show it we scale from 0..8 to 0..255, everything beyond 8 is cut away (saturated)
            #dptRendered.show('rendered')

            #self.cropPatchFromDptAndImg(dptrData,dptrData,(320,240),3.)

            imgData = cv2.imread(imgFileName)
            imgData = imgData.astype(floatX) / 255. - 0.5

            #tra = numpy.array([0,0,0])
            #pose = LinemodObjPose(relCamPos=rot[0:3],tra=tra)
            elAz = camElAz[frameNum]
            pose = LinemodObjPose(zRotInv=zRotInv)
            pose.setAzEl(az=elAz[1],
                         el=elAz[0],
                         dist=camDist,
                         lookAt=numpy.array([0., 0., -5.]))

            #--------------------
            # DEBUG - consistency check
            # check if this relCamPos calculated from the rotation matrix, set by az,el is still the same as calculated directly
            rcp = pose.relCamPos
            camEl = -elAz[0]
            camAz = elAz[1]
            camX = numpy.sin(camAz) * numpy.cos(
                camEl)  # coordinates of cameras in blender ()
            camY = numpy.cos(camAz) * numpy.cos(camEl)
            camZ = numpy.sin(camEl)
            camP = numpy.array([camX, camY, camZ]) * camDist
            camP += numpy.array([0., 0., -5.])  # offset from ground
            #print("camP {}, rcp {}".format(camP,rcp))
            if not numpy.allclose(rcp, camP):
                raise ValueError(
                    "Shit just got real: rcp={} != camP={}".format(rcp, camP))

            frame = Frame(imgData, dptrData, maskData, pose, objName,
                          imgFileName)
            #dptPatch,imgPatch = self.cropPatchFromDptAndImg(dptrData,imgData,(320,240),cropSize,targetSize,fixedD=4.0)
            #seqData.append(Frame(imgPatch,dptPatch,pose,objName,imgFileName))
            patch = self.cropPatchFromFrame(frame, (320, 240),
                                            cropSize,
                                            targetSize,
                                            fixedD=camDist)
            seqData.append(patch)

            # keep only the first few full frames for debugging
            if frameNum > 10:
                frame.img = None
                frame.dpt = None
                frame.mask = None

        return NamedImgSequence(objName, seqData)
Exemple #9
0
def loadSingleSampleNyu(basepath, seqName, index, rng,
                        doLoadRealSample=True,
                        camId=1, fx=588.03, fy=587.07, ux=320., uy=240.,
                        allJoints=False,
                        config={'cube': (250,250,250)},
                        cropSize=(128,128),
                        doJitterCom=False,
                        sigmaCom=20.,
                        doAddWhiteNoise=False,
                        sigmaNoise=10.,
                        doJitterCubesize=False,
                        sigmaCubesize=20.,
                        doJitterRotation=False,
                        rotationAngleRange=[-45.0, 45.0],
                        useCache=True,
                        cacheDir="./cache/single_samples",
                        labelMat=None,
                        doUseLabel=True,
                        minRatioInside=0.3,
                        jitter=None):
    """
    Load an image sequence from the NYU hand pose dataset

    Arguments:
        basepath (string): base path, containing sub-folders "train" and "test"
        seqName: sequence name, e.g. train
        index (int): index of image to be loaded
        rng (random number generator): as returned by numpy.random.RandomState
        doLoadRealSample (boolean, optional): whether to load the real
            sample, i.e., captured by the camera (True; default) or
            the synthetic sample, rendered using a hand model (False)
        camId (int, optional): camera ID, as used in filename; \in {1,2,3};
            default: 1
        fx, fy (float, optional): camera focal length; default for cam 1
        ux, uy (float, optional): camera principal point; default for cam 1
        allJoints (boolean): use all 36 joints or just the eval.-subset
        config (dictionary, optional): need to have a key 'cube' whose value
            is a 3-tuple specifying the cube-size (x,y,z in mm)
            which is extracted around the found hand center location
        cropSize (2-tuple, optional): size of cropped patches in pixels;
            default: 128x128
        doJitterCom (boolean, optional): default: False
        sigmaCom (float, optional): sigma for center of mass samples
            (in millimeters); only relevant if doJitterCom is True
        sigmaNoise (float, optional): sigma for additive noise;
            only relevant if doAddWhiteNoise is True
        doAddWhiteNoise (boolean, optional): add normal distributed noise
            to the depth image; default: False
        useCache (boolean, optional): True (default): store in/load from
            pickle file
        cacheDir (string, optional): path to store/load pickle
        labelMat (optional): loaded mat file; (full file need to be loaded for
            each sample if not given)
        doUseLabel (bool, optional): specify if the label should be used;
            default: True

    Returns:
        named image sequence
    """

    idComGT = 13
    if allJoints:
        idComGT = 34

    if jitter == None:
        doUseFixedJitter = False
        jitter = Jitter()
    else:
        doUseFixedJitter = True

    # Load the dataset
    objdir = '{}/{}/'.format(basepath,seqName)

    if labelMat == None:
        trainlabels = '{}/{}/joint_data.mat'.format(basepath, seqName)
        labelMat = scipy.io.loadmat(trainlabels)

    joints3D = labelMat['joint_xyz'][camId-1]
    joints2D = labelMat['joint_uvd'][camId-1]
    if allJoints:
        eval_idxs = np.arange(36)
    else:
        eval_idxs = nyuRestrictedJointsEval

    numJoints = len(eval_idxs)

    data = []
    line = index

    # Assemble original filename
    prefix = "depth" if doLoadRealSample else "synthdepth"
    dptFileName = '{0:s}/{1:s}_{2:1d}_{3:07d}.png'.format(objdir, prefix, camId, line+1)
    # Assemble pickled filename
    cacheFilename = "frame_{}_all{}.pgz".format(index, allJoints)
    pickleCacheFile = os.path.join(cacheDir, cacheFilename)

    # Load image
    if useCache and os.path.isfile(pickleCacheFile):
        # Load from pickle file
        with gzip.open(pickleCacheFile, 'rb') as f:
            try:
                dpt = cPickle.load(f)
            except:
                print("Data file exists but failed to load. File: {}".format(pickleCacheFile))
                raise

    else:
        # Load from original file
        if not os.path.isfile(dptFileName):
            raise UserWarning("Desired image file from NYU dataset does not exist \
                (Filename: {}) \
                use cache? {}, cache file: {}".format(dptFileName, useCache, pickleCacheFile))
        dpt = loadDepthMap(dptFileName)

        # Write to pickle file
        if useCache:
            with gzip.GzipFile(pickleCacheFile, 'wb') as f:
                cPickle.dump(dpt, f, protocol=cPickle.HIGHEST_PROTOCOL)

    # Add noise?
    if doAddWhiteNoise:
        jitter.img_white_noise_scale = rng.randn(dpt.shape[0], dpt.shape[1])
        dpt = dpt + sigmaNoise * jitter.img_white_noise_scale
    else:
        jitter.img_white_noise_scale = np.zeros((dpt.shape[0], dpt.shape[1]), dtype=np.float32)

    # joints in image coordinates
    gtorig = np.zeros((numJoints, 3), np.float32)
    jt = 0
    for ii in range(joints2D.shape[1]):
        if ii not in eval_idxs:
            continue
        gtorig[jt,0] = joints2D[line,ii,0]
        gtorig[jt,1] = joints2D[line,ii,1]
        gtorig[jt,2] = joints2D[line,ii,2]
        jt += 1

    # normalized joints in 3D coordinates
    gt3Dorig = np.zeros((numJoints,3),np.float32)
    jt = 0
    for jj in range(joints3D.shape[1]):
        if jj not in eval_idxs:
            continue
        gt3Dorig[jt,0] = joints3D[line,jj,0]
        gt3Dorig[jt,1] = joints3D[line,jj,1]
        gt3Dorig[jt,2] = joints3D[line,jj,2]
        jt += 1

    if doJitterRotation:
        if not doUseFixedJitter:
            jitter.rotation_angle_scale = rng.rand(1)
        rot = jitter.rotation_angle_scale * (rotationAngleRange[1] - rotationAngleRange[0]) + rotationAngleRange[0]
        dpt, gtorig, gt3Dorig = rotateImageAndGt(dpt, gtorig, gt3Dorig, rot,
                                                 fx, fy, ux, uy,
                                                 jointIdRotCenter=idComGT,
                                                 pointsImgTo3DFunction=pointsImgTo3D,
                                                 bgValue=10000)
    else:
        # Compute scale factor corresponding to no rotation
        jitter.rotation_angle_scale = -rotationAngleRange[0] / (rotationAngleRange[1] - rotationAngleRange[0])

    # Detect hand
    hdOwn = HandDetectorICG()
    comGT = copy.deepcopy(gtorig[idComGT])  # use GT position for comparison

    # Jitter com?
    comOffset = np.zeros((3), np.float32)
    if doJitterCom:
        if not doUseFixedJitter:
            jitter.detection_offset_scale = rng.randn(3)
        comOffset = sigmaCom * jitter.detection_offset_scale
        # Transform x/y to pixel coords (since com is in uvd coords)
        comOffset[0] = (fx * comOffset[0]) / (comGT[2] + comOffset[2])
        comOffset[1] = (fy * comOffset[1]) / (comGT[2] + comOffset[2])
    else:
        jitter.detection_offset_scale = np.zeros((3), dtype=np.float32)
    comGT = comGT + comOffset

    # Jitter scale (cube size)?
    cubesize = np.array((config['cube'][0], config['cube'][1], config['cube'][2]), dtype=np.float64)
    if doJitterCubesize:
        if not doUseFixedJitter:
            jitter.crop_scale = rng.rand(1)
        # sigma defines range for uniform distr.
        cubesizeChange = 2 * sigmaCubesize * jitter.crop_scale - sigmaCubesize
        cubesize += cubesizeChange
    else:
        jitter.crop_scale = 0.5

    dpt, M, com = hdOwn.cropArea3D(imgDepth=dpt, com=comGT, fx=fx, fy=fy,
                                   minRatioInside=minRatioInside, \
                                   size=cubesize, dsize=cropSize)

    com3D = jointImgTo3D(com, fx, fy, ux, uy)
    gt3Dcrop = gt3Dorig - com3D     # normalize to com
    gtcrop = np.zeros((gtorig.shape[0], 3), np.float32)
    for joint in range(gtorig.shape[0]):
        t=transformPoint2D(gtorig[joint], M)
        gtcrop[joint, 0] = t[0]
        gtcrop[joint, 1] = t[1]
        gtcrop[joint, 2] = gtorig[joint, 2]

    if not doUseLabel:
        gtorig = np.zeros(gtorig.shape, gtorig.dtype)
        gtcrop = np.zeros(gtcrop.shape, gtcrop.dtype)
        gt3Dorig = np.zeros(gt3Dorig.shape, gt3Dorig.dtype)
        gt3Dcrop = np.zeros(gt3Dcrop.shape, gt3Dcrop.dtype)

    sampleConfig = copy.deepcopy(config)
    sampleConfig['cube'] = (cubesize[0], cubesize[1], cubesize[2])
    data.append(ICVLFrame(
        dpt.astype(np.float32),gtorig,gtcrop,M,gt3Dorig,
        gt3Dcrop,com3D,dptFileName,'',sampleConfig) )

    return NamedImgSequence(seqName,data,sampleConfig), jitter
Exemple #10
0
    def loadSequence(self,
                     seqName,
                     allJoints=False,
                     Nmax=float('inf'),
                     shuffle=False,
                     rng=None,
                     docom=False,
                     rotation=False,
                     dsize=(128, 128)):
        """
        Load an image sequence from the dataset
        :param seqName: sequence name, e.g. train
        :param subSeq: list of subsequence names, e.g. 0, 45, 122-5
        :param Nmax: maximum number of samples to load
        :return: returns named image sequence
        """

        config = {'cube': (300, 300, 300)}
        config['cube'] = [s * self.scales[seqName] for s in config['cube']]
        refineNet = None

        pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format(
            self.cacheDir, self.__class__.__name__, seqName, allJoints, docom)
        if self.useCache:
            if os.path.isfile(pickleCache):
                print("Loading cache data from {}".format(pickleCache))
                f = open(pickleCache, 'rb')
                (seqName, data, config) = cPickle.load(f)
                f.close()

                # shuffle data
                if shuffle and rng is not None:
                    print("Shuffling")
                    rng.shuffle(data)
                if not (np.isinf(Nmax)):
                    return NamedImgSequence(seqName, data[0:Nmax], config)
                else:
                    return NamedImgSequence(seqName, data, config)

        # Load the dataset
        objdir = '{}/{}/'.format(self.basepath, seqName)
        trainlabels = '{}/{}/joint_data.mat'.format(self.basepath, seqName)

        mat = scipy.io.loadmat(trainlabels)
        names = mat['joint_names'][0]
        joints3D = mat['joint_xyz'][0]
        joints2D = mat['joint_uvd'][0]
        if allJoints:
            eval_idxs = np.arange(36)
        else:
            eval_idxs = self.restrictedJointsEval

        self.numJoints = len(eval_idxs)

        txt = 'Loading {}'.format(seqName)
        pbar = pb.ProgressBar(maxval=joints3D.shape[0],
                              widgets=[txt, pb.Percentage(),
                                       pb.Bar()])
        pbar.start()

        data = []
        if rotation:
            data_rotation = []
        i = 0

        for line in range(joints3D.shape[0]):
            dptFileName = '{0:s}/depth_1_{1:07d}.png'.format(objdir, line + 1)
            print dptFileName
            # augment the training dataset
            if rotation:
                assert seqName == 'train', 'we only rotate the training data'
                # 90', 180', or 270' rotation degree
                rotation_degree = np.random.randint(1, 4) * 90

            if not os.path.isfile(dptFileName):
                if seqName == 'test_1' and line <= 2439 and line >= 0:
                    print("File {} does not exist!".format(dptFileName))
                    i += 1
                    continue
                elif seqName == 'test_2' and line >= 2440 and line <= 8251:
                    print("File {} does not exist!".format(dptFileName))
                    i += 1
                    continue
                else:
                    i += 1
                    continue

            dpt = self.loadDepthMap(dptFileName)
            h, w = dpt.shape
            if rotation:
                import math

                def rotate_about_center(src, angle, scale=1.):
                    w = src.shape[1]
                    h = src.shape[0]
                    rangle = np.deg2rad(angle)  # angle in radians
                    # now calculate new image width and height
                    nw = (abs(np.sin(rangle) * h) +
                          abs(np.cos(rangle) * w)) * scale
                    nh = (abs(np.cos(rangle) * h) +
                          abs(np.sin(rangle) * w)) * scale
                    # ask OpenCV for the rotation matrix
                    rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5),
                                                      angle, scale)
                    # calculate the move from the old center to the new center combined
                    # with the rotation
                    rot_move = np.dot(
                        rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0]))
                    # the move only affects the translation, so update the translation
                    # part of the transform
                    rot_mat[0, 2] += rot_move[0]
                    rot_mat[1, 2] += rot_move[1]
                    return cv2.warpAffine(
                        src,
                        rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))),
                        flags=cv2.INTER_LANCZOS4), rot_mat

                # get the rotation matrix for warpAffine
                dpt_rotation, rotMat = rotate_about_center(
                    dpt, rotation_degree)

            # joints in image coordinates
            gtorig = np.zeros((self.numJoints, 3), np.float32)
            jt = 0
            for ii in range(joints2D.shape[1]):
                if ii not in eval_idxs:
                    continue
                gtorig[jt, 0] = joints2D[line, ii, 0]
                gtorig[jt, 1] = joints2D[line, ii, 1]
                gtorig[jt, 2] = joints2D[line, ii, 2]
                jt += 1

            if rotation:
                gtorig_rotation = np.zeros((self.numJoints, 3), np.float32)
                for i, joint in enumerate(gtorig):
                    m11 = rotMat[0, 0]
                    m12 = rotMat[0, 1]
                    m13 = rotMat[0, 2]
                    m21 = rotMat[1, 0]
                    m22 = rotMat[1, 1]
                    m23 = rotMat[1, 2]
                    gtorig_rotation[
                        i, 0] = gtorig[i, 0] * m11 + gtorig[i, 1] * m12 + m13
                    gtorig_rotation[
                        i, 1] = gtorig[i, 0] * m21 + gtorig[i, 1] * m22 + m23
                    gtorig_rotation[i, 2] = gtorig[i, 2]

            # normalized joints in 3D coordinates
            gt3Dorig = np.zeros((self.numJoints, 3), np.float32)
            jt = 0
            for jj in range(joints3D.shape[1]):
                if jj not in eval_idxs:
                    continue
                gt3Dorig[jt, 0] = joints3D[line, jj, 0]
                gt3Dorig[jt, 1] = joints3D[line, jj, 1]
                gt3Dorig[jt, 2] = joints3D[line, jj, 2]
                jt += 1

            # transform from gtorign
            gt3Dorig_rotation = np.zeros((self.numJoints, 3), np.float32)
            if rotation:
                gt3Dorig_rotation = self.jointsImgTo3D(gtorig_rotation)

            #print gt3D
            #print("{}".format(gtorig))
            #self.showAnnotatedDepth(ICVLFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dorig,0,dptFileName,''))
            #print("{}".format(gtorig_rotation))
            #self.showAnnotatedDepth(ICVLFrame(dpt_rotation,gtorig_rotation,gtorig_rotation,0,gt3Dorig_rotation,gt3Dorig_rotation,0,dptFileName,''))

            # Detect hand
            hd = HandDetector(dpt,
                              self.fx,
                              self.fy,
                              refineNet=refineNet,
                              importer=self)
            if not hd.checkImage(1):
                print("Skipping image {}, no content".format(dptFileName))
                i += 1
                continue
            try:
                if allJoints:
                    dpt, M, com = hd.cropArea3D(gtorig[34],
                                                size=config['cube'],
                                                docom=docom,
                                                dsize=dsize)
                else:
                    dpt, M, com = hd.cropArea3D(gtorig[13],
                                                size=config['cube'],
                                                docom=docom,
                                                dsize=dsize)
            except UserWarning:
                print(
                    "Skipping image {}, no hand detected".format(dptFileName))
                continue

            if rotation:
                # Detect hand
                hd_rotation = HandDetector(dpt_rotation,
                                           self.fx,
                                           self.fy,
                                           refineNet=refineNet,
                                           importer=self)
                try:
                    if allJoints:
                        dpt_rotation, M_rotation, com_rotation = hd_rotation.cropArea3D(
                            gtorig_rotation[34],
                            size=config['cube'],
                            docom=docom,
                            dsize=dsize)
                    else:
                        dpt_rotation, M_rotation, com_rotation = hd_rotation.cropArea3D(
                            gtorig_rotation[13],
                            size=config['cube'],
                            docom=docom,
                            dsize=dsize)
                except UserWarning:
                    print("Skipping image {}, no hand detected".format(
                        dptFileName))
                    continue

            com3D = self.jointImgTo3D(com)
            gt3Dcrop = gt3Dorig - com3D  # normalize to com
            gtcrop = np.zeros((gtorig.shape[0], 3), np.float32)
            for joint in range(gtorig.shape[0]):
                t = transformPoint2D(gtorig[joint], M)
                gtcrop[joint, 0] = t[0]
                gtcrop[joint, 1] = t[1]
                gtcrop[joint, 2] = gtorig[joint, 2]

            # create 3D voxel
            dpt3D = np.zeros((8, dpt.shape[0], dpt.shape[1]), dtype=np.uint8)
            sorted_dpt = np.sort(dpt, axis=None)
            iii = np.where(sorted_dpt != 0)[0][0]
            min_d = sorted_dpt[iii]
            max_d = np.max(dpt)
            slice_range = []
            slice_step = (max_d - min_d) / 8
            for i in range(9):
                slice_range.append(min_d + slice_step * i)
            slice_range = np.array(slice_range)
            lh, lw = np.where(dpt != 0)
            for ii in xrange(lh.shape[0]):
                ih, iw = lh[ii], lw[ii]
                dptValue = dpt[ih, iw]
                slice_layer = np.where(dptValue >= slice_range)[0][-1]
                if slice_layer == 8:
                    slice_layer = 7
                dpt3D[slice_layer, ih, iw] = 1

            if rotation:
                com3D_rotation = self.jointImgTo3D(com_rotation)
                gt3Dcrop_rotation = gt3Dorig_rotation - com3D_rotation  # normalize to com
                gtcrop_rotation = np.zeros((gtorig_rotation.shape[0], 3),
                                           np.float32)
                for joint in range(gtorig_rotation.shape[0]):
                    t = transformPoint2D(gtorig_rotation[joint], M_rotation)
                    gtcrop_rotation[joint, 0] = t[0]
                    gtcrop_rotation[joint, 1] = t[1]
                    gtcrop_rotation[joint, 2] = gtorig_rotation[joint, 2]
                dpt3D_rotation = np.zeros((8, dpt.shape[0], dpt.shape[1]),
                                          dtype=np.uint8)
                sorted_dpt_rotation = np.sort(dpt_rotation, axis=None)
                iii = np.where(sorted_dpt_rotation != 0)[0][0]
                min_d = sorted_dpt_rotation[iii]
                max_d = np.max(dpt_rotation)
                slice_range = []
                slice_step = (max_d - min_d) / 8
                for i in range(9):
                    slice_range.append(min_d + slice_step * i)
                slice_range = np.array(slice_range)
                lh, lw = np.where(dpt_rotation != 0)
                for ii in xrange(lh.shape[0]):
                    ih, iw = lh[ii], lw[ii]
                    dptValue = dpt_rotation[ih, iw]
                    slice_layer = np.where(dptValue >= slice_range)[0][-1]
                    if slice_layer == 8:
                        slice_layer = 7
                    dpt3D_rotation[slice_layer, ih, iw] = 1

            # print("{}".format(gt3Dorig))
            # self.showAnnotatedDepth(ICVLFrame(dpt,dpt3D,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,''))
            # if rotation:
            #    print("{}".format(gt3Dorig_rotation))
            #    self.showAnnotatedDepth(ICVLFrame(dpt_rotation,dpt3D_rotation,gtorig_rotation,gtcrop_rotation,M_rotation,gt3Dorig_rotation,gt3Dcrop_rotation,com3D_rotation,dptFileName,''))
            # visualize 3D
            # from mpl_toolkits.mplot3d import Axes3D
            # fig = plt.figure()
            # ax = fig.add_subplot(111,projection='3d')
            # d,x,y = np.where(dpt3D == 1)
            # ax.scatter(x,y,8 - d)
            # # ax.view_init(0,0)
            # ax.set_xlabel('x')
            # ax.set_ylabel('y')
            # ax.set_zlabel('d')
            # plt.show()

            data.append(
                ICVLFrame(dpt.astype(np.float32), dpt3D, gtorig, gtcrop, M,
                          gt3Dorig, gt3Dcrop, com3D, dptFileName, ''))
            if rotation:
                data_rotation.append(
                    ICVLFrame(dpt_rotation.astype(np.float32), dpt3D_rotation,
                              gtorig_rotation, gtcrop_rotation, M_rotation,
                              gt3Dorig_rotation, gt3Dcrop_rotation,
                              com3D_rotation, dptFileName, ''))

            pbar.update(i)
            i += 1

            # early stop
            if len(data) >= Nmax:
                break

        pbar.finish()
        # augmentation
        if rotation:
            data.extend(data_rotation)

        print("Loaded {} samples.".format(len(data)))

        if self.useCache:
            print("Save cache data to {}".format(pickleCache))
            f = open(pickleCache, 'wb')
            cPickle.dump((seqName, data, config),
                         f,
                         protocol=cPickle.HIGHEST_PROTOCOL)
            f.close()

        # shuffle data
        if shuffle and rng is not None:
            print("Shuffling")
            rng.shuffle(data)
        return NamedImgSequence(seqName, data, config)