def detect(self, frame): """ Detect the hand :param frame: image frame :return: cropped image, transformation, center """ hd = HandDetector(frame, self.sync['config']['fx'], self.sync['config']['fy'], importer=self.importer, refineNet=self.comrefNet) doHS = (self.state.value == self.STATE_INIT) if self.tracking.value and not numpy.allclose(self.lastcom, 0): loc, handsz = hd.track(self.lastcom, self.sync['config']['cube'], doHandSize=doHS) else: loc, handsz = hd.detect(size=self.sync['config']['cube'], doHandSize=doHS) self.lastcom = loc if self.state.value == self.STATE_INIT: self.handsizes.append(handsz) if self.verbose is True: print(numpy.median(numpy.asarray(self.handsizes), axis=0)) else: self.handsizes = [] if self.state.value == self.STATE_INIT and len( self.handsizes) >= self.numinitframes: cfg = self.sync['config'] cfg['cube'] = tuple( numpy.median(numpy.asarray(self.handsizes), axis=0).astype('int')) self.sync.update(config=cfg) self.state.value = self.STATE_RUN self.handsizes = [] if numpy.allclose(loc, 0): return numpy.zeros( (self.poseNet.input_images.get_shape()[1].value, self.poseNet.input_images.get_shape()[2].value), dtype='float32'), numpy.eye(3), loc else: crop, M, com = hd.cropArea3D( com=loc, size=self.sync['config']['cube'], dsize=(self.poseNet.input_images.get_shape()[1].value, self.poseNet.input_images.get_shape()[2].value)) com3D = self.importer.jointImgTo3D(com) sc = (self.sync['config']['cube'][2] / 2.) crop[crop == 0] = com3D[2] + sc crop.clip(com3D[2] - sc, com3D[2] + sc) crop -= com3D[2] crop /= sc return crop, M, com3D
def detect(self, frame): """ Detect the hand :param frame: image frame :return: cropped image, transformation, center """ hd = HandDetector(frame, self.config['fx'], self.config['fy'], importer=self.importer, refineNet=self.comrefNet) doHS = (self.state == self.STATE_INIT) if self.tracking and not numpy.allclose(self.lastcom, 0): loc, handsz = hd.track(self.lastcom, self.config['cube'], doHandSize=doHS) else: loc, handsz = hd.detect(size=self.config['cube'], doHandSize=doHS) self.lastcom = loc if self.state == self.STATE_INIT: self.handsizes.append(handsz) print numpy.median(numpy.asarray(self.handsizes), axis=0) else: self.handsizes = [] if self.state == self.STATE_INIT and len( self.handsizes) >= self.numinitframes: self.config['cube'] = tuple( numpy.median(numpy.asarray(self.handsizes), axis=0).astype('int')) self.state = self.STATE_RUN self.handsizes = [] if numpy.allclose(loc, 0): return numpy.zeros((self.poseNet.cfgParams.inputDim[2], self.poseNet.cfgParams.inputDim[3]), dtype='float32'), numpy.eye(3), loc else: crop, M, com = hd.cropArea3D( loc, size=self.config['cube'], dsize=(self.poseNet.layers[0].cfgParams.inputDim[2], self.poseNet.layers[0].cfgParams.inputDim[3])) com3D = self.importer.jointImgTo3D(com) crop[crop == 0] = com3D[2] + (self.config['cube'][2] / 2.) crop[crop >= com3D[2] + (self.config['cube'][2] / 2.)] = com3D[2] + ( self.config['cube'][2] / 2.) crop[crop <= com3D[2] - (self.config['cube'][2] / 2.)] = com3D[2] - ( self.config['cube'][2] / 2.) crop -= com3D[2] crop /= (self.config['cube'][2] / 2.) return crop, M, com3D
def pointcloud(self): dm = self.importer.loadDepthMap(self.path_prefix + self._seq.data[self.curFrame].fileName) cur3D = self.importer.jointImgTo3D(self.curData[0]).reshape((1, 3)) hd = HandDetector(dm, self.importer.fx, self.importer.fy, importer=self.importer) dpt, M, com = hd.cropArea3D(self.curData[0].reshape((3, )), size=self._seq.config['cube'], docom=False) self.hpe.plotResult3D(dpt, M, cur3D, cur3D, showGT=False, niceColors=False)
def getCurrentStatus(self, filename_detections): pbar = pb.ProgressBar( maxval=len(self._seq.data), widgets=['Loading last status', pb.Percentage(), pb.Bar()]) pbar.start() cache_str = '' with open(filename_detections, "r") as inputfile: cache_str = inputfile.readlines() for i in xrange(len(self._seq.data)): pbar.update(i) if len(self.subset_idxs) > 0: if i not in self.subset_idxs: break hd = HandDetector(numpy.zeros((1, 1)), 0., 0.) # dummy object com = numpy.asarray( hd.detectFromCache(filename_detections, self._seq.data[i].fileName, cache_str)) if numpy.allclose(com[2], 0.): self.curFrame = i break else: self._seq.data[i] = self._seq.data[i]._replace( com=self.importer.jointImgTo3D(com.reshape((3, )))) # redo last pose, it might be set to default and saved if self.curFrame > 0: if len(self.subset_idxs) > 0: if self.subset_idxs.index(self.curFrame) - 1 >= 0: self.curFrame = self.subset_idxs[ self.subset_idxs.index(self.curFrame) - 1] else: self.curFrame -= 1
import matplotlib.pyplot as plt for i in range(0, test_num, 10): plt.imshow(imgs_test[i], cmap='gray') gt3D = gt3Dcrops_test[i] + coms_test[i] jtI = transformPoints2D(di.joints3DToImg(gt3D), Ms_test[i]) plt.scatter(jtI[:, 0], jtI[:, 1]) for edge in hd_edges: plt.plot(jtI[:, 0][edge], jtI[:, 1][edge], c='r') plt.pause(0.001) plt.cla() print cubes_test[i] hd = HandDetector(imgs_train[0].copy(), abs(di.fx), abs(di.fy), importer=di, refineNet=None) inputs = tf.placeholder(dtype=tf.float32, shape=(None, 96, 96, 1)) label = tf.placeholder(dtype=tf.float32, shape=(None, 21 * 3)) gt_ht = tf.placeholder(dtype=tf.float32, shape=(None, 24, 24, 21)) is_train = tf.placeholder(dtype=tf.bool, shape=None) coms = tf.placeholder(dtype=tf.float32, shape=(None, 3)) Ms = tf.placeholder(dtype=tf.float32, shape=(None, 3, 3)) Kernels = tf.placeholder(dtype=tf.float32, shape=(None, 3, 3)) kp = tf.placeholder(dtype=tf.float32, shape=None) batch_size = 8 last_e = 100 outdims = (21, 6, 15)
def loadSequence(self, seqName, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None, hand=None): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. train :param Nmax: maximum number of samples to load :return: returns named image sequence """ if cube is None: config = {'cube': self.default_cubes[seqName]} else: assert isinstance(cube, tuple) assert len(cube) == 3 config = {'cube': cube} pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, self.allJoints, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) if self.useCache: if os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not(np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) self.loadRefineNetLazy(self.refineNet) # Load the dataset objdir = '{}/{}/'.format(self.basepath, seqName) trainlabels = '{}/{}/joint_data.mat'.format(self.basepath, seqName) mat = scipy.io.loadmat(trainlabels) names = mat['joint_names'][0] joints3D = mat['joint_xyz'][0] joints2D = mat['joint_uvd'][0] if self.allJoints: eval_idxs = np.arange(36) else: eval_idxs = self.restrictedJointsEval self.numJoints = len(eval_idxs) txt = 'Loading {}'.format(seqName) pbar = pb.ProgressBar(maxval=joints3D.shape[0], widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] i=0 for line in range(joints3D.shape[0]): dptFileName = '{0:s}/depth_1_{1:07d}.png'.format(objdir, line+1) if not os.path.isfile(dptFileName): print("File {} does not exist!".format(dptFileName)) i += 1 continue dpt = self.loadDepthMap(dptFileName) if hand is not None: raise NotImplementedError() # joints in image coordinates gtorig = np.zeros((self.numJoints, 3), np.float32) jt = 0 for ii in range(joints2D.shape[1]): if ii not in eval_idxs: continue gtorig[jt, 0] = joints2D[line, ii, 0] gtorig[jt, 1] = joints2D[line, ii, 1] gtorig[jt, 2] = joints2D[line, ii, 2] jt += 1 # normalized joints in 3D coordinates gt3Dorig = np.zeros((self.numJoints, 3), np.float32) jt = 0 for jj in range(joints3D.shape[1]): if jj not in eval_idxs: continue gt3Dorig[jt, 0] = joints3D[line, jj, 0] gt3Dorig[jt, 1] = joints3D[line, jj, 1] gt3Dorig[jt, 2] = joints3D[line, jj, 2] jt += 1 # print gt3D # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dorig,0,dptFileName,'','')) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self) if not hd.checkImage(1): print("Skipping image {}, no content".format(dptFileName)) i += 1 continue try: dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom) except UserWarning: print("Skipping image {}, no hand detected".format(dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = transformPoints2D(gtorig, M) # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','')) data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, '', self.sides[seqName], {})) pbar.update(i) i += 1 # early stop if len(data) >= Nmax: break pbar.finish() print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)
def loadSequence(self, seqName, subSeq=None, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None, hand=None): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. subject1 :param Nmax: maximum number of samples to load :return: returns named image sequence """ if (subSeq is not None) and (not isinstance(subSeq, list)): raise TypeError("subSeq must be None or list") if cube is None: config = {'cube': self.default_cubes[seqName]} else: assert isinstance(cube, tuple) assert len(cube) == 3 config = {'cube': cube} if subSeq is None: pickleCache = '{}/{}_{}_None_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) else: pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, ''.join(subSeq), HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) if self.useCache & os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not(np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) self.loadRefineNetLazy(self.refineNet) # Load the dataset objdir = '{}/{}/'.format(self.basepath, seqName) subdirs = sorted([name for name in os.listdir(objdir) if os.path.isdir(os.path.join(objdir, name))]) txt = 'Loading {}'.format(seqName) nImgs = sum([len(files) for r, d, files in os.walk(objdir)]) // 2 pbar = pb.ProgressBar(maxval=nImgs, widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] pi = 0 for subdir in subdirs: # check for subsequences and skip them if necessary subSeqName = '' if subSeq is not None: if subdir not in subSeq: continue subSeqName = subdir # iterate all subdirectories trainlabels = '{}/{}/joint.txt'.format(objdir, subdir) inputfile = open(trainlabels) # read number of samples nImgs = int(inputfile.readline()) for i in range(nImgs): # early stop if len(data) >= Nmax: break line = inputfile.readline() part = line.split(' ') dptFileName = '{}/{}/{}_depth.bin'.format(objdir, subdir, str(i).zfill(6)) if not os.path.isfile(dptFileName): print("File {} does not exist!".format(dptFileName)) continue dpt = self.loadDepthMap(dptFileName) if hand is not None: raise NotImplementedError() # joints in image coordinates gt3Dorig = np.zeros((self.numJoints, 3), np.float32) for joint in range(gt3Dorig.shape[0]): for xyz in range(0, 3): gt3Dorig[joint, xyz] = part[joint*3+xyz] # invert axis # gt3Dorig[:, 0] *= (-1.) # gt3Dorig[:, 1] *= (-1.) gt3Dorig[:, 2] *= (-1.) # normalized joints in 3D coordinates gtorig = self.joints3DToImg(gt3Dorig) # print gt3D # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','')) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self) if not hd.checkImage(1.): print("Skipping image {}, no content".format(dptFileName)) continue try: dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom) except UserWarning: print("Skipping image {}, no hand detected".format(dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = transformPoints2D(gtorig, M) # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','',{})) data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, subSeqName, self.sides[seqName], {})) pbar.update(pi) pi += 1 inputfile.close() pbar.finish() print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)
def loadSequence(self, seqName, subSeq=None, Nmax=float('inf'), shuffle=False, rng=None, docom=False, cube=None, hand=None): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. train :param subSeq: list of subsequence names, e.g. 0, 45, 122-5 :param Nmax: maximum number of samples to load :return: returns named image sequence """ if (subSeq is not None) and (not isinstance(subSeq, list)): raise TypeError("subSeq must be None or list") if cube is None: config = {'cube': self.default_cubes[seqName]} else: assert isinstance(cube, tuple) assert len(cube) == 3 config = {'cube': cube} if subSeq is None: pickleCache = '{}/{}_{}_None_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) else: pickleCache = '{}/{}_{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, ''.join(subSeq), HandDetector.detectionModeToString(docom, self.refineNet is not None), config['cube'][0]) if self.useCache: if os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not(np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) # check for multiple subsequences if subSeq is not None: if len(subSeq) > 1: missing = False for i in range(len(subSeq)): if not os.path.isfile('{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[i], HandDetector.detectionModeToString(docom, self.refineNet is not None))): missing = True print("missing: {}".format(subSeq[i])) break if not missing: # load first data pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[0], HandDetector.detectionModeToString(docom, self.refineNet is not None)) print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, fullData, config) = cPickle.load(f) f.close() # load rest of data for i in range(1, len(subSeq)): pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, subSeq[i], HandDetector.detectionModeToString(docom, self.refineNet is not None)) print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) fullData.extend(data) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(fullData) if not(np.isinf(Nmax)): return NamedImgSequence(seqName, fullData[0:Nmax], config) else: return NamedImgSequence(seqName, fullData, config) self.loadRefineNetLazy(self.refineNet) # Load the dataset objdir = '{}/Depth/'.format(self.basepath) trainlabels = '{}/{}.txt'.format(self.basepath, seqName) inputfile = open(trainlabels) txt = 'Loading {}'.format(seqName) pbar = pb.ProgressBar(maxval=len(inputfile.readlines()), widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() inputfile.seek(0) data = [] i = 0 for line in inputfile: # early stop if len(data) >= Nmax: break part = line.split(' ') # check for subsequences and skip them if necessary subSeqName = '' if subSeq is not None: p = part[0].split('/') # handle original data (unrotated '0') separately if ('0' in subSeq) and len(p[0]) > 6: pass elif not('0' in subSeq) and len(p[0]) > 6: i += 1 continue elif (p[0] in subSeq) and len(p[0]) <= 6: pass elif not(p[0] in subSeq) and len(p[0]) <= 6: i += 1 continue if len(p[0]) <= 6: subSeqName = p[0] else: subSeqName = '0' dptFileName = '{}/{}'.format(objdir, part[0]) if not os.path.isfile(dptFileName): print("File {} does not exist!".format(dptFileName)) i += 1 continue dpt = self.loadDepthMap(dptFileName) if hand is not None: raise NotImplementedError() # joints in image coordinates gtorig = np.zeros((self.numJoints, 3), np.float32) for joint in range(self.numJoints): for xyz in range(0, 3): gtorig[joint, xyz] = part[joint*3+xyz+1] # normalized joints in 3D coordinates gt3Dorig = self.jointsImgTo3D(gtorig) # print gt3D # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dcrop,0,dptFileName,subSeqName,'')) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, refineNet=self.refineNet, importer=self) if not hd.checkImage(1): print("Skipping image {}, no content".format(dptFileName)) i += 1 continue try: dpt, M, com = hd.cropArea3D(com=gtorig[self.crop_joint_idx], size=config['cube'], docom=docom) except UserWarning: print("Skipping image {}, no hand detected".format(dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = transformPoints2D(gtorig, M) # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,subSeqName,'')) data.append(DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, subSeqName, 'left', {})) pbar.update(i) i += 1 inputfile.close() pbar.finish() print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)
def loadSequence(self, seqName, camera=None, Nmax=float('inf'), shuffle=False, rng=None): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. subject1 :param Nmax: maximum number of samples to load :return: returns named image sequence """ config = {'cube': (220, 220, 220)} pickleCache = '{}/{}_{}_{}_cache.pkl'.format(self.cacheDir, self.__class__.__name__, seqName, camera) if self.useCache & os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not (np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) # Load the dataset objdir = os.path.join(self.basepath, seqName) if camera is not None: dflt = "c{0:02d}_*.npy".format(camera) else: dflt = '*.npy' dptFiles = sorted([ os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk(objdir) for f in fnmatch.filter(files, dflt) ]) # dptFiles = sorted(glob.glob(os.path.join(objdir, '*exr'))) if camera is not None: lflt = "c{0:02d}_*anno_blender.txt".format(camera) else: lflt = '*anno_blender.txt' labelFiles = sorted([ os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk(objdir) for f in fnmatch.filter(files, lflt) ]) #labelFiles = sorted(glob.glob(os.path.join(objdir, '*txt'))) # sync lists newdpt = [] newlbl = [] if camera is not None: number_idx = 1 else: number_idx = 0 labelIdx = [ ''.join(os.path.basename(lbl).split('_')[number_idx]) for lbl in labelFiles ] for dpt in dptFiles: if ''.join( os.path.basename(dpt).split('_')[number_idx]) in labelIdx: newdpt.append(dpt) newlbl.append(labelFiles[labelIdx.index(''.join( os.path.basename(dpt).split('_')[number_idx]))]) dptFiles = newdpt labelFiles = newlbl assert len(dptFiles) == len(labelFiles) txt = 'Loading {}'.format(seqName) nImgs = len(dptFiles) pbar = pb.ProgressBar(maxval=nImgs, widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] for i in xrange(nImgs): labelFileName = labelFiles[i] dptFileName = dptFiles[i] if not os.path.isfile(dptFileName): print("File {} does not exist!".format(dptFileName)) continue dpt = self.loadDepthMap(dptFileName) if not os.path.isfile(labelFileName): print("File {} does not exist!".format(labelFileName)) continue # joints in image coordinates gtorig = np.genfromtxt(labelFileName) gtorig = gtorig.reshape((gtorig.shape[0] / 3, 3)) gtorig[:, 2] *= 1000. # normalized joints in 3D coordinates gt3Dorig = self.jointsImgTo3D(gtorig) ### Shorting the tip bone, BUGFIX tips = [3, 8, 13, 18, 23] shrink_factor = 0.85 for joint_idx in tips: t = gt3Dorig[joint_idx, :] - gt3Dorig[joint_idx - 1, :] gt3Dorig[joint_idx, :] = gt3Dorig[joint_idx - 1, :] + shrink_factor * t gtorig = self.joints3DToImg(gt3Dorig) ### # print gtorig, gt3Dorig # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtorig,0,gt3Dorig,0,0,dptFileName,'','',{})) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, importer=self) if not hd.checkImage(1.): print("Skipping image {}, no content".format(dptFileName)) continue try: dpt, M, com = hd.cropArea3D(gtorig[10], size=config['cube']) except UserWarning: print( "Skipping image {}, no hand detected".format(dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = np.zeros((gt3Dorig.shape[0], 3), np.float32) for joint in range(gtcrop.shape[0]): t = transformPoint2D(gtorig[joint], M) gtcrop[joint, 0] = t[0] gtcrop[joint, 1] = t[1] gtcrop[joint, 2] = gtorig[joint, 2] # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(DepthFrame(dpt,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'','')) data.append( DepthFrame(dpt.astype(np.float32), gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, '', self.sides[seqName], {})) pbar.update(i) # early stop if len(data) >= Nmax: break pbar.finish() print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)
batchSize = 64 poseNetParams = ScaleNetParams(type=1, nChan=nChannels, wIn=imgSizeW, hIn=imgSizeH, batchSize=batchSize, resizeFactor=2, numJoints=1, nDims=3) poseNet = ScaleNet(rng, cfgParams=poseNetParams) poseNetTrainerParams = ScaleNetTrainerParams() poseNetTrainerParams.use_early_stopping = False poseNetTrainerParams.batch_size = batchSize poseNetTrainerParams.learning_rate = 0.0005 poseNetTrainerParams.weightreg_factor = 0.0001 poseNetTrainerParams.force_macrobatch_reload = True poseNetTrainerParams.para_augment = True poseNetTrainerParams.augment_fun_params = {'fun': 'augment_poses', 'args': {'normZeroOne': False, 'di': di, 'aug_modes': aug_modes, 'hd': HandDetector(train_data[0, 0].copy(), abs(di.fx), abs(di.fy), importer=di)}} print("setup trainer") poseNetTrainer = ScaleNetTrainer(poseNet, poseNetTrainerParams, rng, './eval/'+eval_prefix) poseNetTrainer.setData(train_data, train_gt3D[:, di.crop_joint_idx, :], val_data, val_gt3D[:, di.crop_joint_idx, :]) poseNetTrainer.addStaticData({'val_data_x1': val_data2, 'val_data_x2': val_data4}) poseNetTrainer.addManagedData({'train_data_x1': train_data2, 'train_data_x2': train_data4}) poseNetTrainer.addManagedData({'train_data_com': train_data_com, 'train_data_cube': train_data_cube, 'train_data_M': train_data_M, 'train_gt3D': train_gt3D}) poseNetTrainer.compileFunctions() ################################################################### # TRAIN train_res = poseNetTrainer.train(n_epochs=100)
print train_gt3D.max(), test_gt3D1.max(), train_gt3D.min(), test_gt3D1.min( ) print train_data.max(), test_data1.max(), train_data.min(), test_data1.min( ) imgSizeW = train_data.shape[3] imgSizeH = train_data.shape[2] nChannels = train_data.shape[1] #################################### # convert data to embedding pca = PCA(n_components=30) pca.fit( HandDetector.sampleRandomPoses(di, rng, train_gt3Dcrop, train_data_com, train_data_cube, 1e6, aug_modes).reshape( (-1, train_gt3D.shape[1] * 3))) train_gt3D_embed = pca.transform( train_gt3D.reshape((train_gt3D.shape[0], train_gt3D.shape[1] * 3))) test_gt3D_embed1 = pca.transform( test_gt3D1.reshape((test_gt3D1.shape[0], test_gt3D1.shape[1] * 3))) test_gt3D_embed2 = pca.transform( test_gt3D2.reshape((test_gt3D2.shape[0], test_gt3D2.shape[1] * 3))) val_gt3D_embed = pca.transform( val_gt3D.reshape((val_gt3D.shape[0], val_gt3D.shape[1] * 3))) ############################################################################ print("create network") batchSize = 128 poseNetParams = PoseRegNetParams(type=0, nChan=nChannels,
test_label[it]=gt3Dcrops_test[it]/(cubes_test[it][0]/2.) test_data=np.expand_dims(test_data,3) test_label=np.reshape(test_label,(-1,42)) print "training data {}".format(imgs_train.shape[0]) print "testing data {}".format(imgs_test.shape[0]) print "done" visual_aug=False hand_edges=[[0,1],[2,3],[4,5],[6,7],[8,9],[9,10],[13,10],[13,1],[13,3],[13,5],[13,7],[13,11],[13,12]] hd=HandDetector(imgs_train[0].copy(),abs(588.03),abs(587.07),di_train,refineNet=None) inputs=tf.placeholder(dtype=tf.float32,shape=(None,96,96,1)) label=tf.placeholder(dtype=tf.float32,shape=(None,42)) gt_ht=tf.placeholder(dtype=tf.float32,shape=(None,24,24,14)) is_train=tf.placeholder(dtype=tf.bool,shape=None) kp=tf.placeholder(dtype=tf.float32,shape=None) batch_size=128 last_e=100 outdims=(14,9,5) ################################################################# import tensorflow.contrib.slim as slim import tensorflow.contrib.layers as layers
# y_data = {'train': train_gt3D_embed, 'val': val_gt3D_embed, 'test': test_gt3D_embed} y_data = { 'train': train_gt3D.reshape(-1, train_gt3D.shape[1] * 3), 'val': val_gt3D.reshape(-1, val_gt3D.shape[1] * 3), 'test': test_gt3D.reshape(-1, test_gt3D.shape[1] * 3) } Cubes = { 'train': train_data_cube, 'val': val_data_cube, 'test': test_data_cube } gt3Dcrops = {'train': train_gt3Dcrop} coms = {'train': train_data_com} solver = HandRegSolver(Xdata=X_data, Ydata=y_data, params=params, Cubes=Cubes, pca=None, gt3Dcrops=gt3Dcrops, hd=HandDetector(train_data[0, :, :, 0].copy(), abs(di.fx), abs(di.fy), importer=di), coms=coms) # model = PoseRegModel() model = ResEncoderModel(embedSize=20) solver.train(model, draw=False, cacheModel=True) # print('sleep 20s') # time.sleep(20)
def loadSequence(self, seqName, allJoints=False, Nmax=float('inf'), shuffle=False, rng=None, docom=False, rotation=False, dsize=(128, 128)): """ Load an image sequence from the dataset :param seqName: sequence name, e.g. train :param subSeq: list of subsequence names, e.g. 0, 45, 122-5 :param Nmax: maximum number of samples to load :return: returns named image sequence """ config = {'cube': (300, 300, 300)} config['cube'] = [s * self.scales[seqName] for s in config['cube']] refineNet = None pickleCache = '{}/{}_{}_{}_{}_cache.pkl'.format( self.cacheDir, self.__class__.__name__, seqName, allJoints, docom) if self.useCache: if os.path.isfile(pickleCache): print("Loading cache data from {}".format(pickleCache)) f = open(pickleCache, 'rb') (seqName, data, config) = cPickle.load(f) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) if not (np.isinf(Nmax)): return NamedImgSequence(seqName, data[0:Nmax], config) else: return NamedImgSequence(seqName, data, config) # Load the dataset objdir = '{}/{}/'.format(self.basepath, seqName) trainlabels = '{}/{}/joint_data.mat'.format(self.basepath, seqName) mat = scipy.io.loadmat(trainlabels) names = mat['joint_names'][0] joints3D = mat['joint_xyz'][0] joints2D = mat['joint_uvd'][0] if allJoints: eval_idxs = np.arange(36) else: eval_idxs = self.restrictedJointsEval self.numJoints = len(eval_idxs) txt = 'Loading {}'.format(seqName) pbar = pb.ProgressBar(maxval=joints3D.shape[0], widgets=[txt, pb.Percentage(), pb.Bar()]) pbar.start() data = [] if rotation: data_rotation = [] i = 0 for line in range(joints3D.shape[0]): dptFileName = '{0:s}/depth_1_{1:07d}.png'.format(objdir, line + 1) print dptFileName # augment the training dataset if rotation: assert seqName == 'train', 'we only rotate the training data' # 90', 180', or 270' rotation degree rotation_degree = np.random.randint(1, 4) * 90 if not os.path.isfile(dptFileName): if seqName == 'test_1' and line <= 2439 and line >= 0: print("File {} does not exist!".format(dptFileName)) i += 1 continue elif seqName == 'test_2' and line >= 2440 and line <= 8251: print("File {} does not exist!".format(dptFileName)) i += 1 continue else: i += 1 continue dpt = self.loadDepthMap(dptFileName) h, w = dpt.shape if rotation: import math def rotate_about_center(src, angle, scale=1.): w = src.shape[1] h = src.shape[0] rangle = np.deg2rad(angle) # angle in radians # now calculate new image width and height nw = (abs(np.sin(rangle) * h) + abs(np.cos(rangle) * w)) * scale nh = (abs(np.cos(rangle) * h) + abs(np.sin(rangle) * w)) * scale # ask OpenCV for the rotation matrix rot_mat = cv2.getRotationMatrix2D((nw * 0.5, nh * 0.5), angle, scale) # calculate the move from the old center to the new center combined # with the rotation rot_move = np.dot( rot_mat, np.array([(nw - w) * 0.5, (nh - h) * 0.5, 0])) # the move only affects the translation, so update the translation # part of the transform rot_mat[0, 2] += rot_move[0] rot_mat[1, 2] += rot_move[1] return cv2.warpAffine( src, rot_mat, (int(math.ceil(nw)), int(math.ceil(nh))), flags=cv2.INTER_LANCZOS4), rot_mat # get the rotation matrix for warpAffine dpt_rotation, rotMat = rotate_about_center( dpt, rotation_degree) # joints in image coordinates gtorig = np.zeros((self.numJoints, 3), np.float32) jt = 0 for ii in range(joints2D.shape[1]): if ii not in eval_idxs: continue gtorig[jt, 0] = joints2D[line, ii, 0] gtorig[jt, 1] = joints2D[line, ii, 1] gtorig[jt, 2] = joints2D[line, ii, 2] jt += 1 if rotation: gtorig_rotation = np.zeros((self.numJoints, 3), np.float32) for i, joint in enumerate(gtorig): m11 = rotMat[0, 0] m12 = rotMat[0, 1] m13 = rotMat[0, 2] m21 = rotMat[1, 0] m22 = rotMat[1, 1] m23 = rotMat[1, 2] gtorig_rotation[ i, 0] = gtorig[i, 0] * m11 + gtorig[i, 1] * m12 + m13 gtorig_rotation[ i, 1] = gtorig[i, 0] * m21 + gtorig[i, 1] * m22 + m23 gtorig_rotation[i, 2] = gtorig[i, 2] # normalized joints in 3D coordinates gt3Dorig = np.zeros((self.numJoints, 3), np.float32) jt = 0 for jj in range(joints3D.shape[1]): if jj not in eval_idxs: continue gt3Dorig[jt, 0] = joints3D[line, jj, 0] gt3Dorig[jt, 1] = joints3D[line, jj, 1] gt3Dorig[jt, 2] = joints3D[line, jj, 2] jt += 1 # transform from gtorign gt3Dorig_rotation = np.zeros((self.numJoints, 3), np.float32) if rotation: gt3Dorig_rotation = self.jointsImgTo3D(gtorig_rotation) #print gt3D #print("{}".format(gtorig)) #self.showAnnotatedDepth(ICVLFrame(dpt,gtorig,gtorig,0,gt3Dorig,gt3Dorig,0,dptFileName,'')) #print("{}".format(gtorig_rotation)) #self.showAnnotatedDepth(ICVLFrame(dpt_rotation,gtorig_rotation,gtorig_rotation,0,gt3Dorig_rotation,gt3Dorig_rotation,0,dptFileName,'')) # Detect hand hd = HandDetector(dpt, self.fx, self.fy, refineNet=refineNet, importer=self) if not hd.checkImage(1): print("Skipping image {}, no content".format(dptFileName)) i += 1 continue try: if allJoints: dpt, M, com = hd.cropArea3D(gtorig[34], size=config['cube'], docom=docom, dsize=dsize) else: dpt, M, com = hd.cropArea3D(gtorig[13], size=config['cube'], docom=docom, dsize=dsize) except UserWarning: print( "Skipping image {}, no hand detected".format(dptFileName)) continue if rotation: # Detect hand hd_rotation = HandDetector(dpt_rotation, self.fx, self.fy, refineNet=refineNet, importer=self) try: if allJoints: dpt_rotation, M_rotation, com_rotation = hd_rotation.cropArea3D( gtorig_rotation[34], size=config['cube'], docom=docom, dsize=dsize) else: dpt_rotation, M_rotation, com_rotation = hd_rotation.cropArea3D( gtorig_rotation[13], size=config['cube'], docom=docom, dsize=dsize) except UserWarning: print("Skipping image {}, no hand detected".format( dptFileName)) continue com3D = self.jointImgTo3D(com) gt3Dcrop = gt3Dorig - com3D # normalize to com gtcrop = np.zeros((gtorig.shape[0], 3), np.float32) for joint in range(gtorig.shape[0]): t = transformPoint2D(gtorig[joint], M) gtcrop[joint, 0] = t[0] gtcrop[joint, 1] = t[1] gtcrop[joint, 2] = gtorig[joint, 2] # create 3D voxel dpt3D = np.zeros((8, dpt.shape[0], dpt.shape[1]), dtype=np.uint8) sorted_dpt = np.sort(dpt, axis=None) iii = np.where(sorted_dpt != 0)[0][0] min_d = sorted_dpt[iii] max_d = np.max(dpt) slice_range = [] slice_step = (max_d - min_d) / 8 for i in range(9): slice_range.append(min_d + slice_step * i) slice_range = np.array(slice_range) lh, lw = np.where(dpt != 0) for ii in xrange(lh.shape[0]): ih, iw = lh[ii], lw[ii] dptValue = dpt[ih, iw] slice_layer = np.where(dptValue >= slice_range)[0][-1] if slice_layer == 8: slice_layer = 7 dpt3D[slice_layer, ih, iw] = 1 if rotation: com3D_rotation = self.jointImgTo3D(com_rotation) gt3Dcrop_rotation = gt3Dorig_rotation - com3D_rotation # normalize to com gtcrop_rotation = np.zeros((gtorig_rotation.shape[0], 3), np.float32) for joint in range(gtorig_rotation.shape[0]): t = transformPoint2D(gtorig_rotation[joint], M_rotation) gtcrop_rotation[joint, 0] = t[0] gtcrop_rotation[joint, 1] = t[1] gtcrop_rotation[joint, 2] = gtorig_rotation[joint, 2] dpt3D_rotation = np.zeros((8, dpt.shape[0], dpt.shape[1]), dtype=np.uint8) sorted_dpt_rotation = np.sort(dpt_rotation, axis=None) iii = np.where(sorted_dpt_rotation != 0)[0][0] min_d = sorted_dpt_rotation[iii] max_d = np.max(dpt_rotation) slice_range = [] slice_step = (max_d - min_d) / 8 for i in range(9): slice_range.append(min_d + slice_step * i) slice_range = np.array(slice_range) lh, lw = np.where(dpt_rotation != 0) for ii in xrange(lh.shape[0]): ih, iw = lh[ii], lw[ii] dptValue = dpt_rotation[ih, iw] slice_layer = np.where(dptValue >= slice_range)[0][-1] if slice_layer == 8: slice_layer = 7 dpt3D_rotation[slice_layer, ih, iw] = 1 # print("{}".format(gt3Dorig)) # self.showAnnotatedDepth(ICVLFrame(dpt,dpt3D,gtorig,gtcrop,M,gt3Dorig,gt3Dcrop,com3D,dptFileName,'')) # if rotation: # print("{}".format(gt3Dorig_rotation)) # self.showAnnotatedDepth(ICVLFrame(dpt_rotation,dpt3D_rotation,gtorig_rotation,gtcrop_rotation,M_rotation,gt3Dorig_rotation,gt3Dcrop_rotation,com3D_rotation,dptFileName,'')) # visualize 3D # from mpl_toolkits.mplot3d import Axes3D # fig = plt.figure() # ax = fig.add_subplot(111,projection='3d') # d,x,y = np.where(dpt3D == 1) # ax.scatter(x,y,8 - d) # # ax.view_init(0,0) # ax.set_xlabel('x') # ax.set_ylabel('y') # ax.set_zlabel('d') # plt.show() data.append( ICVLFrame(dpt.astype(np.float32), dpt3D, gtorig, gtcrop, M, gt3Dorig, gt3Dcrop, com3D, dptFileName, '')) if rotation: data_rotation.append( ICVLFrame(dpt_rotation.astype(np.float32), dpt3D_rotation, gtorig_rotation, gtcrop_rotation, M_rotation, gt3Dorig_rotation, gt3Dcrop_rotation, com3D_rotation, dptFileName, '')) pbar.update(i) i += 1 # early stop if len(data) >= Nmax: break pbar.finish() # augmentation if rotation: data.extend(data_rotation) print("Loaded {} samples.".format(len(data))) if self.useCache: print("Save cache data to {}".format(pickleCache)) f = open(pickleCache, 'wb') cPickle.dump((seqName, data, config), f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # shuffle data if shuffle and rng is not None: print("Shuffling") rng.shuffle(data) return NamedImgSequence(seqName, data, config)