def loadSequence(self, objName): ''' :returns: NamedImgSequence ''' w = 64 img = Image.new('RGB', size=(w, w), color=(0, 0, 0)) draw = ImageDraw.Draw(img) b = math.floor(w / 5) if objName == 'circle': draw.ellipse([(b, b), (w - b, w - b)], fill=(255, 0, 0)) elif objName == 'square': draw.rectangle([(b, b), (w - b, w - b)], fill=(255, 0, 0)) elif objName == 'triangle': draw.polygon([(w / 2, b), (b, w - b), (w - b, w - b)], fill=(255, 0, 0)) del draw data = [] data.append(Frame(img, None, None, None, objName, "")) return NamedImgSequence(objName, data)
def jitterImgSeqs(self, N, maxRot=30.0, maxTranslate=2.0, maxScale=0.1): imj = transformations.ImageJitterer() seqs = [] for imgSeq in self._imgSeqs: jitteredImgs = imj.getJitteredImgSeq(imgSeq.data, N, maxRot=maxRot, maxTranslate=maxTranslate, maxScale=maxScale) seqs.append(NamedImgSequence(imgSeq.name, jitteredImgs)) self._imgSeqs = seqs
def loadSequence(self, seqName, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None, hand=None): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. train :param Nmax: maximum number of samples to load :return: returns named image sequence """ if cube is None: config = {'cube': self.default_cubes[seqName]} else: assert isinstance(cube, tuple) assert len(cube) == 3 config = {'cube': cube} pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, self.allJoints, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) if self.useCache: if os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not(np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) self.loadRefineNetLazy(self.refineNet) # Load the dataset objdir = '{}/{}/'.format(self.basepath, seqName) trainlabels = '{}/{}/joint_data.mat'.format(self.basepath, seqName) mat = scipy.io.loadmat(trainlabels) names = mat['joint_names'][0] joints3D = mat['joint_xyz'][0] joints2D = mat['joint_uvd'][0] if self.allJoints: eval_idxs = np.arange(36) else: eval_idxs = self.restrictedJointsEval self.numJoints = len(eval_idxs) txt = 'Loading {}'.format(seqName) pbar = pb.ProgressBar(maxval=joints3D.shape[0], widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] i=0 for line in range(joints3D.shape[0]): dptFileName = '{0:s}/depth_1_{1:07d}.png'.format(objdir, line+1) if not os.path.isfile(dptFileName): print("File {} does not exist!".format(dptFileName)) i += 1 continue dpt = self.loadDepthMap(dptFileName) if hand is not None: raise NotImplementedError() # joints in image coordinates gtorig = np.zeros((self.numJoints, 3), np.float32) jt = 0 for ii in range(joints2D.shape[1]): if ii not in eval_idxs: continue gtorig[jt, 0] = joints2D[line, ii, 0] gtorig[jt, 1] = joints2D[line, ii, 1] gtorig[jt, 2] = joints2D[line, ii, 2] jt += 1 # normalized joints in 3D coordinates gt3Dorig = np.zeros((self.numJoints, 3), np.float32) jt = 0 for jj in range(joints3D.shape[1]): if jj not in eval_idxs: continue gt3Dorig[jt, 0] = joints3D[line, jj, 0] gt3Dorig[jt, 1] = joints3D[line, jj, 1] gt3Dorig[jt, 2] = joints3D[line, jj, 2] jt += 1 # print gt3D # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dorig,0,dptFileName,'','')) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self) if not hd.checkImage(1): print("Skipping image {}, no content".format(dptFileName)) i += 1 continue try: dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom) except UserWarning: print("Skipping image {}, no hand detected".format(dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = transformPoints2D(gtorig, M) # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','')) data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, '', self.sides[seqName], {})) pbar.update(i) i += 1 # early stop if len(data) >= Nmax: break pbar.finish() print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)
def loadSequence(self, seqName, subSeq=None, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None, hand=None): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. subject1 :param Nmax: maximum number of samples to load :return: returns named image sequence """ if (subSeq is not None) and (not isinstance(subSeq, list)): raise TypeError("subSeq must be None or list") if cube is None: config = {'cube': self.default_cubes[seqName]} else: assert isinstance(cube, tuple) assert len(cube) == 3 config = {'cube': cube} if subSeq is None: pickleCache = '{}/{}_{}_None_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) else: pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, ''.join(subSeq), HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) if self.useCache & os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not(np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) self.loadRefineNetLazy(self.refineNet) # Load the dataset objdir = '{}/{}/'.format(self.basepath, seqName) subdirs = sorted([name for name in os.listdir(objdir) if os.path.isdir(os.path.join(objdir, name))]) txt = 'Loading {}'.format(seqName) nImgs = sum([len(files) for r, d, files in os.walk(objdir)]) // 2 pbar = pb.ProgressBar(maxval=nImgs, widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] pi = 0 for subdir in subdirs: # check for subsequences and skip them if necessary subSeqName = '' if subSeq is not None: if subdir not in subSeq: continue subSeqName = subdir # iterate all subdirectories trainlabels = '{}/{}/joint.txt'.format(objdir, subdir) inputfile = open(trainlabels) # read number of samples nImgs = int(inputfile.readline()) for i in range(nImgs): # early stop if len(data) >= Nmax: break line = inputfile.readline() part = line.split(' ') dptFileName = '{}/{}/{}_depth.bin'.format(objdir, subdir, str(i).zfill(6)) if not os.path.isfile(dptFileName): print("File {} does not exist!".format(dptFileName)) continue dpt = self.loadDepthMap(dptFileName) if hand is not None: raise NotImplementedError() # joints in image coordinates gt3Dorig = np.zeros((self.numJoints, 3), np.float32) for joint in range(gt3Dorig.shape[0]): for xyz in range(0, 3): gt3Dorig[joint, xyz] = part[joint*3+xyz] # invert axis # gt3Dorig[:, 0] *= (-1.) # gt3Dorig[:, 1] *= (-1.) gt3Dorig[:, 2] *= (-1.) # normalized joints in 3D coordinates gtorig = self.joints3DToImg(gt3Dorig) # print gt3D # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','')) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self) if not hd.checkImage(1.): print("Skipping image {}, no content".format(dptFileName)) continue try: dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom) except UserWarning: print("Skipping image {}, no hand detected".format(dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = transformPoints2D(gtorig, M) # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','',{})) data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, subSeqName, self.sides[seqName], {})) pbar.update(pi) pi += 1 inputfile.close() pbar.finish() print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)
def loadSequence(self, seqName, subSeq=None, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None, hand=None): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. train :param subSeq: list of subsequence names, e.g. 0, 45, 122-5 :param Nmax: maximum number of samples to load :return: returns named image sequence """ if (subSeq is not None) and (not isinstance(subSeq, list)): raise TypeError("subSeq must be None or list") if cube is None: config = {'cube': self.default_cubes[seqName]} else: assert isinstance(cube, tuple) assert len(cube) == 3 config = {'cube': cube} if subSeq is None: pickleCache = '{}/{}_{}_None_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) else: pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, ''.join(subSeq), HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) if self.useCache: if os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not(np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) # check for multiple subsequences if subSeq is not None: if len(subSeq) > 1: missing = False for i in range(len(subSeq)): if not os.path.isfile('{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[i], HandDetector.detectionModeToString(docom, self.refineNet is not None))): missing = True print("missing: {}".format(subSeq[i])) break if not missing: # load first data pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[0], HandDetector.detectionModeToString(docom, self.refineNet is not None)) print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, fullData, config) = cPickle.load(f) f.close() # load rest of data for i in range(1, len(subSeq)): pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[i], HandDetector.detectionModeToString(docom, self.refineNet is not None)) print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) fullData.extend(data) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(fullData) if not(np.isinf(Nmax)): return NamedImgSequence(seqName, fullData[0:Nmax], config) else: return NamedImgSequence(seqName, fullData, config) self.loadRefineNetLazy(self.refineNet) # Load the dataset objdir = '{}/Depth/'.format(self.basepath) trainlabels = '{}/{}.txt'.format(self.basepath, seqName) inputfile = open(trainlabels) txt = 'Loading {}'.format(seqName) pbar = pb.ProgressBar(maxval=len(inputfile.readlines()), widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() inputfile.seek(0) data = [] i = 0 for line in inputfile: # early stop if len(data) >= Nmax: break part = line.split(' ') # check for subsequences and skip them if necessary subSeqName = '' if subSeq is not None: p = part[0].split('/') # handle original data (unrotated '0') separately if ('0' in subSeq) and len(p[0]) > 6: pass elif not('0' in subSeq) and len(p[0]) > 6: i += 1 continue elif (p[0] in subSeq) and len(p[0]) <= 6: pass elif not(p[0] in subSeq) and len(p[0]) <= 6: i += 1 continue if len(p[0]) <= 6: subSeqName = p[0] else: subSeqName = '0' dptFileName = '{}/{}'.format(objdir, part[0]) if not os.path.isfile(dptFileName): print("File {} does not exist!".format(dptFileName)) i += 1 continue dpt = self.loadDepthMap(dptFileName) if hand is not None: raise NotImplementedError() # joints in image coordinates gtorig = np.zeros((self.numJoints, 3), np.float32) for joint in range(self.numJoints): for xyz in range(0, 3): gtorig[joint, xyz] = part[joint*3+xyz+1] # normalized joints in 3D coordinates gt3Dorig = self.jointsImgTo3D(gtorig) # print gt3D # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dcrop,0,dptFileName,subSeqName,'')) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self) if not hd.checkImage(1): print("Skipping image {}, no content".format(dptFileName)) i += 1 continue try: dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom) except UserWarning: print("Skipping image {}, no hand detected".format(dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = transformPoints2D(gtorig, M) # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,subSeqName,'')) data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, subSeqName, 'left', {})) pbar.update(i) i += 1 inputfile.close() pbar.finish() print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)
def loadSequence(self, seqName, camera=None, Nmax=float('inf'), shuffle=False, rng=None): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. subject1 :param Nmax: maximum number of samples to load :return: returns named image sequence """ config = {'cube': (220, 220, 220)} pickleCache = '{}/{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, camera) if self.useCache & os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not (np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) # Load the dataset objdir = os.path.join(self.basepath, seqName) if camera is not None: dflt = "c{0:02d}_*.npy".format(camera) else: dflt = '*.npy' dptFiles = sorted([ os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk(objdir) for f in fnmatch.filter(files, dflt) ]) # dptFiles = sorted(glob.glob(os.path.join(objdir, '*exr'))) if camera is not None: lflt = "c{0:02d}_*anno_blender.txt".format(camera) else: lflt = '*anno_blender.txt' labelFiles = sorted([ os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk(objdir) for f in fnmatch.filter(files, lflt) ]) #labelFiles = sorted(glob.glob(os.path.join(objdir, '*txt'))) # sync lists newdpt = [] newlbl = [] if camera is not None: number_idx = 1 else: number_idx = 0 labelIdx = [ ''.join(os.path.basename(lbl).split('_')[number_idx]) for lbl in labelFiles ] for dpt in dptFiles: if ''.join( os.path.basename(dpt).split('_')[number_idx]) in labelIdx: newdpt.append(dpt) newlbl.append(labelFiles[labelIdx.index(''.join( os.path.basename(dpt).split('_')[number_idx]))]) dptFiles = newdpt labelFiles = newlbl assert len(dptFiles) == len(labelFiles) txt = 'Loading {}'.format(seqName) nImgs = len(dptFiles) pbar = pb.ProgressBar(maxval=nImgs, widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] for i in xrange(nImgs): labelFileName = labelFiles[i] dptFileName = dptFiles[i] if not os.path.isfile(dptFileName): print("File {} does not exist!".format(dptFileName)) continue dpt = self.loadDepthMap(dptFileName) if not os.path.isfile(labelFileName): print("File {} does not exist!".format(labelFileName)) continue # joints in image coordinates gtorig = np.genfromtxt(labelFileName) gtorig = gtorig.reshape((gtorig.shape[0] / 3, 3)) gtorig[:, 2] *= 1000. # normalized joints in 3D coordinates gt3Dorig = self.jointsImgTo3D(gtorig) ### Shorting the tip bone, BUGFIX tips = [3, 8, 13, 18, 23] shrink_factor = 0.85 for joint_idx in tips: t = gt3Dorig[joint_idx, :] - gt3Dorig[joint_idx - 1, :] gt3Dorig[joint_idx, :] = gt3Dorig[joint_idx - 1, :] + shrink_factor * t gtorig = self.joints3DToImg(gt3Dorig) ### # print gtorig, gt3Dorig # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,0,0,dptFileName,'','',{})) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, importer=self) if not hd.checkImage(1.): print("Skipping image {}, no content".format(dptFileName)) continue try: dpt, M, com = hd.cropArea3D(gtorig[10], size=config['cube']) except UserWarning: print( "Skipping image {}, no hand detected".format(dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = np.zeros((gt3Dorig.shape[0], 3), np.float32) for joint in range(gtcrop.shape[0]): t = transformPoint2D(gtorig[joint], M) gtcrop[joint, 0] = t[0] gtcrop[joint, 1] = t[1] gtcrop[joint, 2] = gtorig[joint, 2] # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','')) data.append( DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, '', self.sides[seqName], {})) pbar.update(i) # early stop if len(data) >= Nmax: break pbar.finish() print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)
def loadSequence(self, seqName, zRotInv=0, inputMode=2, cropAtGtPos=False, cropSize=None, targetSize=None, imgNums=None): objdir = '{}{}/data/'.format(self.basepath, seqName) s = '{}color*.jpg'.format(objdir, self.basepath, seqName) imgFileNames = glob.glob(s) imgFileNames = sorted(imgFileNames) assert len(imgFileNames) > 0, "No images under '{}'".format(s) # tmp #imgFileNames = imgFileNames[0:100] #imgFileNames = imgFileNames[0:19] #imgFileNames = imgFileNames[0:800] if imgNums is not None: imgFileNames = [imgFileNames[i] for i in imgNums] print("inputMode {}".format(inputMode)) txt = 'Loading {}'.format(seqName) pbar = pb.ProgressBar(maxval=len(imgFileNames), widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] for i in range(0, len(imgFileNames)): imgFileName = imgFileNames[i] d, fn = os.path.split(imgFileName) # @UnusedVariable numStr = fn[5:-4] #dptFileName = '{}depth{}.dpt'.format(objdir,numStr) #dptFileName = '{}inp/depth{}.dpt'.format(objdir,numStr) # inpainted dptFileName = '{}inp/depth{}.png'.format(objdir, numStr) # inpainted rotFileName = '{}rot{}.rot'.format(objdir, numStr) traFileName = '{}tra{}.tra'.format(objdir, numStr) if (inputMode > 0): img = self.loadImage(imgFileName) else: img = None if (inputMode != 1): dpt = self.loadDepthMap(dptFileName) else: dpt = None mask = None # or numpy.ones with the size of img? rot = self.loadMatFromTxt(rotFileName) tra = self.loadMatFromTxt(traFileName) #data.append({img:img,dpt:dpt,rot:rot,tra:tra}) pose = LinemodObjPose(rot=rot, tra=tra, zRotInv=zRotInv) frame = LinemodTestFrame(img=img, dpt=dpt, mask=mask, pose=pose, className=seqName, filename=dptFileName) if cropAtGtPos: patch = self.cropPatchAtGTPos(frame, cropSize, targetSize, inputMode) #print("patch mn/mx {},{}".format(numpy.min(patch),numpy.max(patch))) data.append(patch) # keep only the first few full frames for debugging if i > 10: frame.img = None frame.dpt = None frame.mask = None else: data.append(frame) pbar.update(i) return NamedImgSequence(seqName, data)
def loadSequence(self, objName, imgNums=None, cropSize=None, targetSize=None, zRotInv=0): floatX = theano.config.floatX # @UndefinedVariable if cropSize is None: cropSize = 20.0 # 28. if targetSize is None: targetSize = 64 seqData = [] #cam = BlenderCam() camPositionsElAz = numpy.loadtxt(self.basePath + 'camPositionsElAz.txt') camOrientations = numpy.loadtxt(self.basePath + 'camOrientations.txt') if camOrientations.ndim < 2: camOrientations = camOrientations.reshape( (len(camOrientations) / 2, 2)) numCams = len(camPositionsElAz) camElAz = numpy.repeat(camPositionsElAz, camOrientations.shape[0], axis=0) camOrientations = numpy.tile(camOrientations, (numCams, 1)) numCams = len(camElAz) camDist = 40. # cm, TODO adjust for other objects ? if zRotInv == 1: camElAz[:, 1] = 0.0 # if it is fully rotationally symmetric, the azimuth is always 0 path = "{}{}/train/".format(self.basePath, objName) #path = "{}{}/".format(self.basePath,objName) #raise ValueError("path {}".format(path)) pat = "{}img*.png".format(path) print("looking for " + pat) d = sorted(glob.glob(pat)) assert numCams == len(d), "hey! {} != {}".format(numCams, len(d)) if imgNums is not None: d = [d[i] for i in imgNums] #camPoses = [camPoses[i,:] for i in imgNums] camElAz = [camElAz[i, :] for i in imgNums] camOrientations = [camOrientations[i, :] for i in imgNums] txt = "loading {}".format(objName) pbar = pb.ProgressBar(maxval=len(d), widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() for frameNum in xrange(len(d)): pbar.update(frameNum) fileName = d[frameNum] fn = os.path.split(fileName)[1] fnroot = fn[3:] imgFileName = fileName dptFileName = "{}dpt{}".format(path, fnroot) maskFileName = "{}mask{}".format(path, fnroot) #print("files: {}, {}".format(imgFileName,dptFileName)) #dptRendered = Image.open(dptFileName) #dptrData = numpy.asarray(dptRendered,numpy.float32) dptrData = cv2.imread(dptFileName, cv2.IMREAD_UNCHANGED).astype(numpy.float32) if len(dptrData.shape) > 2: dptrData = dptrData[:, :, 0] dptrData *= 200. / 2**16. # first in blender we expressed everything in dm (ie 1unit = 10cm), then we mapped 0..20 -> 0..1, then blender fills the the 16bit up. image is scaled to 0..1 -> 0..2^16 maskData = cv2.imread(maskFileName, cv2.IMREAD_UNCHANGED).astype(numpy.float32) #print("maskData mn/mx {},{}".format(numpy.min(maskData),numpy.max(maskData))) #cv2.imshow("maskData",maskData/(2**16.)) #raise ValueError("ok") maskData = (maskData < (2**16. - 5)).astype(numpy.uint8) #cv2.imshow("maskData2",maskData) #cv2.waitKey(0) #raise ValueError("ok") # DEBUG print value range #print("dptrData min {}, max {}".format(dptrData.min(),dptrData.max())) # DEBUG print value of center pixel #h,w = dptrData.shape #print dptrData[h/2,w/2] # DEBUG show #dptRendered = Image.fromarray(dptrData*255./8.) # to show it we scale from 0..8 to 0..255, everything beyond 8 is cut away (saturated) #dptRendered.show('rendered') #self.cropPatchFromDptAndImg(dptrData,dptrData,(320,240),3.) imgData = cv2.imread(imgFileName) imgData = imgData.astype(floatX) / 255. - 0.5 #tra = numpy.array([0,0,0]) #pose = LinemodObjPose(relCamPos=rot[0:3],tra=tra) elAz = camElAz[frameNum] pose = LinemodObjPose(zRotInv=zRotInv) pose.setAzEl(az=elAz[1], el=elAz[0], dist=camDist, lookAt=numpy.array([0., 0., -5.])) #-------------------- # DEBUG - consistency check # check if this relCamPos calculated from the rotation matrix, set by az,el is still the same as calculated directly rcp = pose.relCamPos camEl = -elAz[0] camAz = elAz[1] camX = numpy.sin(camAz) * numpy.cos( camEl) # coordinates of cameras in blender () camY = numpy.cos(camAz) * numpy.cos(camEl) camZ = numpy.sin(camEl) camP = numpy.array([camX, camY, camZ]) * camDist camP += numpy.array([0., 0., -5.]) # offset from ground #print("camP {}, rcp {}".format(camP,rcp)) if not numpy.allclose(rcp, camP): raise ValueError( "Shit just got real: rcp={} != camP={}".format(rcp, camP)) frame = Frame(imgData, dptrData, maskData, pose, objName, imgFileName) #dptPatch,imgPatch = self.cropPatchFromDptAndImg(dptrData,imgData,(320,240),cropSize,targetSize,fixedD=4.0) #seqData.append(Frame(imgPatch,dptPatch,pose,objName,imgFileName)) patch = self.cropPatchFromFrame(frame, (320, 240), cropSize, targetSize, fixedD=camDist) seqData.append(patch) # keep only the first few full frames for debugging if frameNum > 10: frame.img = None frame.dpt = None frame.mask = None return NamedImgSequence(objName, seqData)
def loadSingleSampleNyu(basepath, seqName, index, rng, doLoadRealSample=True, camId=1, fx=588.03, fy=587.07, ux=320., uy=240., allJoints=False, config={'cube': (250,250,250)}, cropSize=(128,128), doJitterCom=False, sigmaCom=20., doAddWhiteNoise=False, sigmaNoise=10., doJitterCubesize=False, sigmaCubesize=20., doJitterRotation=False, rotationAngleRange=[-45.0, 45.0], useCache=True, cacheDir="./cache/single_samples", labelMat=None, doUseLabel=True, minRatioInside=0.3, jitter=None): """ Load an image sequence from the NYU hand pose dataset Arguments: basepath (string): base path, containing sub-folders "train" and "test" seqName: sequence name, e.g. train index (int): index of image to be loaded rng (random number generator): as returned by numpy.random.RandomState doLoadRealSample (boolean, optional): whether to load the real sample, i.e., captured by the camera (True; default) or the synthetic sample, rendered using a hand model (False) camId (int, optional): camera ID, as used in filename; \in {1,2,3}; default: 1 fx, fy (float, optional): camera focal length; default for cam 1 ux, uy (float, optional): camera principal point; default for cam 1 allJoints (boolean): use all 36 joints or just the eval.-subset config (dictionary, optional): need to have a key 'cube' whose value is a 3-tuple specifying the cube-size (x,y,z in mm) which is extracted around the found hand center location cropSize (2-tuple, optional): size of cropped patches in pixels; default: 128x128 doJitterCom (boolean, optional): default: False sigmaCom (float, optional): sigma for center of mass samples (in millimeters); only relevant if doJitterCom is True sigmaNoise (float, optional): sigma for additive noise; only relevant if doAddWhiteNoise is True doAddWhiteNoise (boolean, optional): add normal distributed noise to the depth image; default: False useCache (boolean, optional): True (default): store in/load from pickle file cacheDir (string, optional): path to store/load pickle labelMat (optional): loaded mat file; (full file need to be loaded for each sample if not given) doUseLabel (bool, optional): specify if the label should be used; default: True Returns: named image sequence """ idComGT = 13 if allJoints: idComGT = 34 if jitter == None: doUseFixedJitter = False jitter = Jitter() else: doUseFixedJitter = True # Load the dataset objdir = '{}/{}/'.format(basepath,seqName) if labelMat == None: trainlabels = '{}/{}/joint_data.mat'.format(basepath, seqName) labelMat = scipy.io.loadmat(trainlabels) joints3D = labelMat['joint_xyz'][camId-1] joints2D = labelMat['joint_uvd'][camId-1] if allJoints: eval_idxs = np.arange(36) else: eval_idxs = nyuRestrictedJointsEval numJoints = len(eval_idxs) data = [] line = index # Assemble original filename prefix = "depth" if doLoadRealSample else "synthdepth" dptFileName = '{0:s}/{1:s}_{2:1d}_{3:07d}.png'.format(objdir, prefix, camId, line+1) # Assemble pickled filename cacheFilename = "frame_{}_all{}.pgz".format(index, allJoints) pickleCacheFile = os.path.join(cacheDir, cacheFilename) # Load image if useCache and os.path.isfile(pickleCacheFile): # Load from pickle file with gzip.open(pickleCacheFile, 'rb') as f: try: dpt = cPickle.load(f) except: print("Data file exists but failed to load. File: {}".format(pickleCacheFile)) raise else: # Load from original file if not os.path.isfile(dptFileName): raise UserWarning("Desired image file from NYU dataset does not exist \ (Filename: {}) \ use cache? {}, cache file: {}".format(dptFileName, useCache, pickleCacheFile)) dpt = loadDepthMap(dptFileName) # Write to pickle file if useCache: with gzip.GzipFile(pickleCacheFile, 'wb') as f: cPickle.dump(dpt, f, protocol=cPickle.HIGHEST_PROTOCOL) # Add noise? if doAddWhiteNoise: jitter.img_white_noise_scale = rng.randn(dpt.shape[0], dpt.shape[1]) dpt = dpt + sigmaNoise * jitter.img_white_noise_scale else: jitter.img_white_noise_scale = np.zeros((dpt.shape[0], dpt.shape[1]), dtype=np.float32) # joints in image coordinates gtorig = np.zeros((numJoints, 3), np.float32) jt = 0 for ii in range(joints2D.shape[1]): if ii not in eval_idxs: continue gtorig[jt,0] = joints2D[line,ii,0] gtorig[jt,1] = joints2D[line,ii,1] gtorig[jt,2] = joints2D[line,ii,2] jt += 1 # normalized joints in 3D coordinates gt3Dorig = np.zeros((numJoints,3),np.float32) jt = 0 for jj in range(joints3D.shape[1]): if jj not in eval_idxs: continue gt3Dorig[jt,0] = joints3D[line,jj,0] gt3Dorig[jt,1] = joints3D[line,jj,1] gt3Dorig[jt,2] = joints3D[line,jj,2] jt += 1 if doJitterRotation: if not doUseFixedJitter: jitter.rotation_angle_scale = rng.rand(1) rot = jitter.rotation_angle_scale * (rotationAngleRange[1] - rotationAngleRange[0]) + rotationAngleRange[0] dpt, gtorig, gt3Dorig = rotateImageAndGt(dpt, gtorig, gt3Dorig, rot, fx, fy, ux, uy, jointIdRotCenter=idComGT, pointsImgTo3DFunction=pointsImgTo3D, bgValue=10000) else: # Compute scale factor corresponding to no rotation jitter.rotation_angle_scale = -rotationAngleRange[0] / (rotationAngleRange[1] - rotationAngleRange[0]) # Detect hand hdOwn = HandDetectorICG() comGT = copy.deepcopy(gtorig[idComGT]) # use GT position for comparison # Jitter com? comOffset = np.zeros((3), np.float32) if doJitterCom: if not doUseFixedJitter: jitter.detection_offset_scale = rng.randn(3) comOffset = sigmaCom * jitter.detection_offset_scale # Transform x/y to pixel coords (since com is in uvd coords) comOffset[0] = (fx * comOffset[0]) / (comGT[2] + comOffset[2]) comOffset[1] = (fy * comOffset[1]) / (comGT[2] + comOffset[2]) else: jitter.detection_offset_scale = np.zeros((3), dtype=np.float32) comGT = comGT + comOffset # Jitter scale (cube size)? cubesize = np.array((config['cube'][0], config['cube'][1], config['cube'][2]), dtype=np.float64) if doJitterCubesize: if not doUseFixedJitter: jitter.crop_scale = rng.rand(1) # sigma defines range for uniform distr. cubesizeChange = 2 * sigmaCubesize * jitter.crop_scale - sigmaCubesize cubesize += cubesizeChange else: jitter.crop_scale = 0.5 dpt, M, com = hdOwn.cropArea3D(imgDepth=dpt, com=comGT, fx=fx, fy=fy, minRatioInside=minRatioInside, \ size=cubesize, dsize=cropSize) com3D = jointImgTo3D(com, fx, fy, ux, uy) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = np.zeros((gtorig.shape[0], 3), np.float32) for joint in range(gtorig.shape[0]): t=transformPoint2D(gtorig[joint], M) gtcrop[joint, 0] = t[0] gtcrop[joint, 1] = t[1] gtcrop[joint, 2] = gtorig[joint, 2] if not doUseLabel: gtorig = np.zeros(gtorig.shape, gtorig.dtype) gtcrop = np.zeros(gtcrop.shape, gtcrop.dtype) gt3Dorig = np.zeros(gt3Dorig.shape, gt3Dorig.dtype) gt3Dcrop = np.zeros(gt3Dcrop.shape, gt3Dcrop.dtype) sampleConfig = copy.deepcopy(config) sampleConfig['cube'] = (cubesize[0], cubesize[1], cubesize[2]) data.append(ICVLFrame( dpt.astype(np.float32),gtorig,gtcrop,M,gt3Dorig, gt3Dcrop,com3D,dptFileName,'',sampleConfig) ) return NamedImgSequence(seqName,data,sampleConfig), jitter
def loadSequence(self, seqName, allJoints=False, Nmax=float('inf'), shuffle=False, rng=None, docom=False, rotation=False, dsize=(128, 128)): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. train :param subSeq: list of subsequence names, e.g. 0, 45, 122-5 :param Nmax: maximum number of samples to load :return: returns named image sequence """ config = {'cube': (300, 300, 300)} config['cube'] = [s * self.scales[seqName] for s in config['cube']] refineNet = None pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format( self.cacheDir, self.__class__.__name__, seqName, allJoints, docom) if self.useCache: if os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not (np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) # Load the dataset objdir = '{}/{}/'.format(self.basepath, seqName) trainlabels = '{}/{}/joint_data.mat'.format(self.basepath, seqName) mat = scipy.io.loadmat(trainlabels) names = mat['joint_names'][0] joints3D = mat['joint_xyz'][0] joints2D = mat['joint_uvd'][0] if allJoints: eval_idxs = np.arange(36) else: eval_idxs = self.restrictedJointsEval self.numJoints = len(eval_idxs) txt = 'Loading {}'.format(seqName) pbar = pb.ProgressBar(maxval=joints3D.shape[0], widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] if rotation: data_rotation = [] i = 0 for line in range(joints3D.shape[0]): dptFileName = '{0:s}/depth_1_{1:07d}.png'.format(objdir, line + 1) print dptFileName # augment the training dataset if rotation: assert seqName == 'train', 'we only rotate the training data' # 90', 180', or 270' rotation degree rotation_degree = np.random.randint(1, 4) * 90 if not os.path.isfile(dptFileName): if seqName == 'test_1' and line <= 2439 and line >= 0: print("File {} does not exist!".format(dptFileName)) i += 1 continue elif seqName == 'test_2' and line >= 2440 and line <= 8251: print("File {} does not exist!".format(dptFileName)) i += 1 continue else: i += 1 continue dpt = self.loadDepthMap(dptFileName) h, w = dpt.shape if rotation: import math def rotate_about_center(src, angle, scale=1.): w = src.shape[1] h = src.shape[0] rangle = np.deg2rad(angle) # angle in radians # now calculate new image width and height nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale # ask OpenCV for the rotation matrix rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale) # calculate the move from the old center to the new center combined # with the rotation rot_move = np.dot( rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0])) # the move only affects the translation, so update the translation # part of the transform rot_mat[0, 2] += rot_move[0] rot_mat[1, 2] += rot_move[1] return cv2.warpAffine( src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4), rot_mat # get the rotation matrix for warpAffine dpt_rotation, rotMat = rotate_about_center( dpt, rotation_degree) # joints in image coordinates gtorig = np.zeros((self.numJoints, 3), np.float32) jt = 0 for ii in range(joints2D.shape[1]): if ii not in eval_idxs: continue gtorig[jt, 0] = joints2D[line, ii, 0] gtorig[jt, 1] = joints2D[line, ii, 1] gtorig[jt, 2] = joints2D[line, ii, 2] jt += 1 if rotation: gtorig_rotation = np.zeros((self.numJoints, 3), np.float32) for i, joint in enumerate(gtorig): m11 = rotMat[0, 0] m12 = rotMat[0, 1] m13 = rotMat[0, 2] m21 = rotMat[1, 0] m22 = rotMat[1, 1] m23 = rotMat[1, 2] gtorig_rotation[ i, 0] = gtorig[i, 0] * m11 + gtorig[i, 1] * m12 + m13 gtorig_rotation[ i, 1] = gtorig[i, 0] * m21 + gtorig[i, 1] * m22 + m23 gtorig_rotation[i, 2] = gtorig[i, 2] # normalized joints in 3D coordinates gt3Dorig = np.zeros((self.numJoints, 3), np.float32) jt = 0 for jj in range(joints3D.shape[1]): if jj not in eval_idxs: continue gt3Dorig[jt, 0] = joints3D[line, jj, 0] gt3Dorig[jt, 1] = joints3D[line, jj, 1] gt3Dorig[jt, 2] = joints3D[line, jj, 2] jt += 1 # transform from gtorign gt3Dorig_rotation = np.zeros((self.numJoints, 3), np.float32) if rotation: gt3Dorig_rotation = self.jointsImgTo3D(gtorig_rotation) #print gt3D #print("{}".format(gtorig)) #self.showAnnotatedDepth(ICVLFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dorig,0,dptFileName,'')) #print("{}".format(gtorig_rotation)) #self.showAnnotatedDepth(ICVLFrame(dpt_rotation,gtorig_rotation,gtorig_rotation,0,gt3Dorig_rotation,gt3Dorig_rotation,0,dptFileName,'')) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, refineNet=refineNet, importer=self) if not hd.checkImage(1): print("Skipping image {}, no content".format(dptFileName)) i += 1 continue try: if allJoints: dpt, M, com = hd.cropArea3D(gtorig[34], size=config['cube'], docom=docom, dsize=dsize) else: dpt, M, com = hd.cropArea3D(gtorig[13], size=config['cube'], docom=docom, dsize=dsize) except UserWarning: print( "Skipping image {}, no hand detected".format(dptFileName)) continue if rotation: # Detect hand hd_rotation = HandDetector(dpt_rotation, self.fx, self.fy, refineNet=refineNet, importer=self) try: if allJoints: dpt_rotation, M_rotation, com_rotation = hd_rotation.cropArea3D( gtorig_rotation[34], size=config['cube'], docom=docom, dsize=dsize) else: dpt_rotation, M_rotation, com_rotation = hd_rotation.cropArea3D( gtorig_rotation[13], size=config['cube'], docom=docom, dsize=dsize) except UserWarning: print("Skipping image {}, no hand detected".format( dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = np.zeros((gtorig.shape[0], 3), np.float32) for joint in range(gtorig.shape[0]): t = transformPoint2D(gtorig[joint], M) gtcrop[joint, 0] = t[0] gtcrop[joint, 1] = t[1] gtcrop[joint, 2] = gtorig[joint, 2] # create 3D voxel dpt3D = np.zeros((8, dpt.shape[0], dpt.shape[1]), dtype=np.uint8) sorted_dpt = np.sort(dpt, axis=None) iii = np.where(sorted_dpt != 0)[0][0] min_d = sorted_dpt[iii] max_d = np.max(dpt) slice_range = [] slice_step = (max_d - min_d) / 8 for i in range(9): slice_range.append(min_d + slice_step * i) slice_range = np.array(slice_range) lh, lw = np.where(dpt != 0) for ii in xrange(lh.shape[0]): ih, iw = lh[ii], lw[ii] dptValue = dpt[ih, iw] slice_layer = np.where(dptValue >= slice_range)[0][-1] if slice_layer == 8: slice_layer = 7 dpt3D[slice_layer, ih, iw] = 1 if rotation: com3D_rotation = self.jointImgTo3D(com_rotation) gt3Dcrop_rotation = gt3Dorig_rotation - com3D_rotation # normalize to com gtcrop_rotation = np.zeros((gtorig_rotation.shape[0], 3), np.float32) for joint in range(gtorig_rotation.shape[0]): t = transformPoint2D(gtorig_rotation[joint], M_rotation) gtcrop_rotation[joint, 0] = t[0] gtcrop_rotation[joint, 1] = t[1] gtcrop_rotation[joint, 2] = gtorig_rotation[joint, 2] dpt3D_rotation = np.zeros((8, dpt.shape[0], dpt.shape[1]), dtype=np.uint8) sorted_dpt_rotation = np.sort(dpt_rotation, axis=None) iii = np.where(sorted_dpt_rotation != 0)[0][0] min_d = sorted_dpt_rotation[iii] max_d = np.max(dpt_rotation) slice_range = [] slice_step = (max_d - min_d) / 8 for i in range(9): slice_range.append(min_d + slice_step * i) slice_range = np.array(slice_range) lh, lw = np.where(dpt_rotation != 0) for ii in xrange(lh.shape[0]): ih, iw = lh[ii], lw[ii] dptValue = dpt_rotation[ih, iw] slice_layer = np.where(dptValue >= slice_range)[0][-1] if slice_layer == 8: slice_layer = 7 dpt3D_rotation[slice_layer, ih, iw] = 1 # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(ICVLFrame(dpt,dpt3D,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'')) # if rotation: # print("{}".format(gt3Dorig_rotation)) # self.showAnnotatedDepth(ICVLFrame(dpt_rotation,dpt3D_rotation,gtorig_rotation,gtcrop_rotation,M_rotation,gt3Dorig_rotation,gt3Dcrop_rotation,com3D_rotation,dptFileName,'')) # visualize 3D # from mpl_toolkits.mplot3d import Axes3D # fig = plt.figure() # ax = fig.add_subplot(111,projection='3d') # d,x,y = np.where(dpt3D == 1) # ax.scatter(x,y,8 - d) # # ax.view_init(0,0) # ax.set_xlabel('x') # ax.set_ylabel('y') # ax.set_zlabel('d') # plt.show() data.append( ICVLFrame(dpt.astype(np.float32), dpt3D, gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, '')) if rotation: data_rotation.append( ICVLFrame(dpt_rotation.astype(np.float32), dpt3D_rotation, gtorig_rotation, gtcrop_rotation, M_rotation, gt3Dorig_rotation, gt3Dcrop_rotation, com3D_rotation, dptFileName, '')) pbar.update(i) i += 1 # early stop if len(data) >= Nmax: break pbar.finish() # augmentation if rotation: data.extend(data_rotation) print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)