def dump_amass2pytroch(datasets, amass_dir, out_posepath, logger = None, rnd_seed = 100, keep_rate = 0.01): ''' Select random number of frames from central 80 percent of each mocap sequence Save individual data features like pose and shape per frame in pytorch pt files test set will have the extra field for original markers :param datasets: the name of the dataset :param amass_dir: directory of downloaded amass npz files. should be in this structure: path/datasets/subjects/*_poses.npz :param out_posepath: the path for final pose.pt file :param logger: an instance of human_body_prior.tools.omni_tools.log2file :param rnd_seed: :return: Number of datapoints dumped using out_poseth address pattern ''' import glob np.random.seed(rnd_seed) makepath(out_posepath, isfile=True) if logger is None: starttime = datetime.now().replace(microsecond=0) log_name = datetime.strftime(starttime, '%Y%m%d_%H%M') logger = log2file(out_posepath.replace('pose.pt', '%s.log' % (log_name))) logger('Creating pytorch dataset at %s' % out_posepath) data_pose = [] data_dmpl = [] data_betas = [] data_gender = [] data_trans = [] for ds_name in datasets: npz_fnames = glob.glob(os.path.join(amass_dir, ds_name, '*/*_poses.npz')) logger('randomly selecting data points from %s.' % (ds_name)) for npz_fname in tqdm(npz_fnames): try: cdata = np.load(npz_fname) except: logger('Could not read %s! skipping..'%npz_fname) continue N = len(cdata['poses']) cdata_ids = np.random.choice(list(range(int(0.1*N), int(0.9*N),1)), int(keep_rate*0.8*N), replace=False)#removing first and last 10% of the data to avoid repetitive initial poses if len(cdata_ids)<1: continue data_pose.extend(cdata['poses'][cdata_ids].astype(np.float32)) data_dmpl.extend(cdata['dmpls'][cdata_ids].astype(np.float32)) data_trans.extend(cdata['trans'][cdata_ids].astype(np.float32)) data_betas.extend(np.repeat(cdata['betas'][np.newaxis].astype(np.float32), repeats=len(cdata_ids), axis=0)) data_gender.extend([gdr2num[str(cdata['gender'].astype(np.str))] for _ in cdata_ids]) assert len(data_pose) != 0 torch.save(torch.tensor(np.asarray(data_pose, np.float32)), out_posepath) torch.save(torch.tensor(np.asarray(data_dmpl, np.float32)), out_posepath.replace('pose.pt', 'dmpl.pt')) torch.save(torch.tensor(np.asarray(data_betas, np.float32)), out_posepath.replace('pose.pt', 'betas.pt')) torch.save(torch.tensor(np.asarray(data_trans, np.float32)), out_posepath.replace('pose.pt', 'trans.pt')) torch.save(torch.tensor(np.asarray(data_gender, np.int32)), out_posepath.replace('pose.pt', 'gender.pt')) return len(data_pose)
def prepare_data(self): '''' Similar to standard AMASS dataset preparation pipeline: Donwload npz file, corresponding to body data from https://amass.is.tue.mpg.de/ and place them under amass_dir ''' self.text_logger = log2file(makepath(self.work_dir, '{}.log'.format(self.expr_id), isfile=True), prefix=self._log_prefix) prepare_vposer_datasets(self.dataset_dir, self.vp_ps.data_parms.amass_splits, self.vp_ps.data_parms.amass_dir, logger=self.text_logger)
def __init__(self, vposer_expr_dir: str, data_loss, optimizer_args: dict={'type':'ADAM'}, stepwise_weights: List[Dict]=[{'data': 10., 'poZ_body': .01, 'betas': .5}], display_rc: tuple = (2,1), verbosity: int = 1, logger=None, ): ''' :param vposer_expr_dir: The vposer directory that holds the settings and model snapshot :param data_loss: should be a pytorch callable (source, target) that returns the accumulated loss :param optimizer_args: arguments for optimizers :param stepwise_weights: list of dictionaries. each list element defines weights for one full step of optimization if a weight value is left out, its respective object item will be removed as well. imagine optimizing without data term! :param display_rc: number of row and columns in case verbosity > 1 :param verbosity: 0: silent, 1: text, 2: text/visual. running 2 over ssh would need extra work :param logger: an instance of human_body_prior.tools.omni_tools.log2file ''' super(IK_Engine, self).__init__() assert isinstance(stepwise_weights, list), ValueError('stepwise_weights should be a list of dictionaries.') assert np.all(['data' in l for l in stepwise_weights]), ValueError('The term data should be available in every weight of anealed optimization step: {}'.format(stepwise_weights)) self.data_loss = torch.nn.SmoothL1Loss(reduction='mean') if data_loss is None else data_loss self.stepwise_weights = stepwise_weights self.verbosity = verbosity self.optimizer_args = optimizer_args self.logger = log2file() if logger is None else logger if verbosity>1: mvs = MeshViewers(display_rc, keepalive=True) self.mvs = flatten_list(mvs) self.mvs[0].set_background_color(colors['white']) else: self.mvs=None self.vp_model, _ = load_model(vposer_expr_dir, model_code=VPoser, remove_words_in_model_weights='vp_model.', disable_grad=True)
def __init__(self, _config): super(VPoserTrainer, self).__init__() _support_data_dir = get_support_data_dir() vp_ps = load_config(**_config) make_deterministic(vp_ps.general.rnd_seed) self.expr_id = vp_ps.general.expr_id self.dataset_id = vp_ps.general.dataset_id self.work_dir = vp_ps.logging.work_dir = makepath( vp_ps.general.work_basedir, self.expr_id) self.dataset_dir = vp_ps.logging.dataset_dir = osp.join( vp_ps.general.dataset_basedir, vp_ps.general.dataset_id) self._log_prefix = '[{}]'.format(self.expr_id) self.text_logger = log2file(prefix=self._log_prefix) self.seq_len = vp_ps.data_parms.num_timeseq_frames self.vp_model = VPoser(vp_ps) with torch.no_grad(): self.bm_train = BodyModel(vp_ps.body_model.bm_fname) if vp_ps.logging.render_during_training: self.renderer = vposer_trainer_renderer( self.bm_train, vp_ps.logging.num_bodies_to_display) else: self.renderer = None self.example_input_array = { 'pose_body': torch.ones(vp_ps.train_parms.batch_size, 63), } self.vp_ps = vp_ps
def visualize(points, bm_f, mvs, kpts_colors, verbosity=2, logger=None): from human_body_prior.tools.omni_tools import log2file if logger is None: logger = log2file() def view(opt_objs, body_v, virtual_markers, opt_it): if verbosity <= 0: return opt_objs_cpu = {k: c2c(v) for k, v in opt_objs.items()} total_loss = np.sum([np.sum(v) for k, v in opt_objs_cpu.items()]) message = 'it {} -- [total loss = {:.2e}] - {}'.format(opt_it, total_loss, ' | '.join(['%s = %2.2e' % (k, np.sum(v)) for k, v in opt_objs_cpu.items()])) logger(message) if verbosity>1: bs = body_v.shape[0] np.random.seed(100) frame_ids = list(range(bs)) if bs <= len(mvs) else np.random.choice(bs , size=len(mvs), replace=False).tolist() if bs > len(mvs): message += ' -- [frame_ids: {}]'.format(frame_ids) for dispId, fId in enumerate(frame_ids): # check for the number of frames in mvs and show a randomly picked number of frames in body if there is more to show than row*cols available new_body_v = rotateXYZ(body_v[fId], [-90,0,0]) orig_mrk_mesh = points_to_spheres(rotateXYZ(c2c(points[fId]), [-90,0,0]), radius=0.01, color=kpts_colors) virtual_markers_mesh = points_to_cubes(rotateXYZ(virtual_markers[fId], [-90,0,0]), radius=0.01, color=kpts_colors) new_body_mesh = Mesh(new_body_v, bm_f, vc=colors['grey']) # linev = rotateXYZ(np.hstack((c2c(points[fId]), virtual_markers[fId])).reshape((-1, 3)), [-90,0,0]) # linee = np.arange(len(linev)).reshape((-1, 2)) # ll = Lines(v=linev, e=linee) # ll.vc = (ll.v * 0. + 1) * np.array([0.00, 0.00, 1.00]) # mvs[dispId].set_dynamic_lines([ll]) # orig_mrk_mesh = points_to_spheres(data_pc, radius=0.01, vc=colors['blue']) mvs[dispId].set_dynamic_meshes([orig_mrk_mesh, virtual_markers_mesh]) mvs[dispId].set_static_meshes([new_body_mesh]) mvs[0].set_titlebar(message) # if out_dir is not None: mv.save_snapshot(os.path.join(out_dir, '%05d_it_%.5d.png' %(frame_id, opt_it))) return view
def __init__(self, work_dir, ps): from tensorboardX import SummaryWriter from human_body_prior.data.dataloader import VPoserDS self.pt_dtype = torch.float64 if ps.fp_precision == '64' else torch.float32 torch.manual_seed(ps.seed) ps.work_dir = makepath(work_dir, isfile=False) logger = log2file(os.path.join(work_dir, '%s.log' % ps.expr_code)) summary_logdir = os.path.join(work_dir, 'summaries') self.swriter = SummaryWriter(log_dir=summary_logdir) logger('tensorboard --logdir=%s' % summary_logdir) logger('Torch Version: %s\n' % torch.__version__) shutil.copy2(os.path.realpath(__file__), work_dir) use_cuda = torch.cuda.is_available() if use_cuda: torch.cuda.empty_cache() self.comp_device = torch.device( "cuda:%d" % ps.cuda_id if torch.cuda.is_available() else "cpu") logger('%d CUDAs available!' % torch.cuda.device_count()) gpu_brand = torch.cuda.get_device_name( ps.cuda_id) if use_cuda else None logger('Training with %s [%s]' % (self.comp_device, gpu_brand) if use_cuda else 'Training on CPU!!!') logger('Base dataset_dir is %s' % ps.dataset_dir) kwargs = {'num_workers': ps.n_workers} ds_train = VPoserDS(dataset_dir=os.path.join(ps.dataset_dir, 'train')) self.ds_train = DataLoader(ds_train, batch_size=ps.batch_size, shuffle=True, drop_last=True, **kwargs) ds_val = VPoserDS(dataset_dir=os.path.join(ps.dataset_dir, 'vald')) self.ds_val = DataLoader(ds_val, batch_size=ps.batch_size, shuffle=True, drop_last=True, **kwargs) ds_test = VPoserDS(dataset_dir=os.path.join(ps.dataset_dir, 'test')) self.ds_test = DataLoader(ds_test, batch_size=ps.batch_size, shuffle=True, drop_last=True, **kwargs) logger('Train dataset size %.2f M' % (len(self.ds_train.dataset) * 1e-6)) logger('Validation dataset size %d' % len(self.ds_val.dataset)) logger('Test dataset size %d' % len(self.ds_test.dataset)) ps.data_shape = list(ds_val[0]['pose_aa'].shape) self.vposer_model = VPoser(num_neurons=ps.num_neurons, latentD=ps.latentD, data_shape=ps.data_shape, use_cont_repr=ps.use_cont_repr) if ps.use_multigpu: self.vposer_model = nn.DataParallel(self.vposer_model) self.vposer_model.to(self.comp_device) varlist = [var[1] for var in self.vposer_model.named_parameters()] params_count = sum(p.numel() for p in varlist if p.requires_grad) logger('Total Trainable Parameters Count is %2.2f M.' % ((params_count) * 1e-6)) self.optimizer = optim.Adam(varlist, lr=ps.base_lr, weight_decay=ps.reg_coef) self.logger = logger self.best_loss_total = np.inf self.try_num = ps.try_num self.epochs_completed = 0 self.ps = ps if ps.best_model_fname is not None: if isinstance(self.vposer_model, torch.nn.DataParallel): self.vposer_model.module.load_state_dict( torch.load(ps.best_model_fname, map_location=self.comp_device)) else: self.vposer_model.load_state_dict( torch.load(ps.best_model_fname, map_location=self.comp_device)) logger('Restored model from %s' % ps.best_model_fname) chose_ids = np.random.choice(list(range(len(ds_val))), size=ps.num_bodies_to_display, replace=False, p=None) data_all = {} for id in chose_ids: for k, v in ds_val[id].items(): if k in data_all.keys(): data_all[k] = torch.cat([data_all[k], v[np.newaxis]], dim=0) else: data_all[k] = v[np.newaxis] self.vis_dorig = { k: data_all[k].to(self.comp_device) for k in data_all.keys() } self.bm = BodyModel(self.ps.bm_path, 'smplh', batch_size=self.ps.batch_size, use_posedirs=True).to(self.comp_device)
def dump_amass2pytroch(datasets, amass_dir, out_dir, split_name, logger=None, rnd_seed=100): ''' Select random number of frames from central 80 percent of each mocap sequence :param datasets: :param amass_dir: :param out_dir: :param split_name: :param logger :param rnd_seed: :return: ''' import glob from tqdm import tqdm assert split_name in ['train', 'vald', 'test'] np.random.seed(rnd_seed) makepath(out_dir, isfile=False) if logger is None: starttime = datetime.now().replace(microsecond=0) log_name = datetime.strftime(starttime, '%Y%m%d_%H%M') logger = log2file(os.path.join(out_dir, '%s.log' % (log_name))) logger('Creating pytorch dataset at %s' % out_dir) if split_name in ['vald', 'test']: keep_rate = 0.3 # this should be fixed for vald and test datasets elif split_name == 'train': keep_rate = 0.3 # 30 percent, which would give you around 3.5 M training data points data_pose = [] data_betas = [] data_gender = [] data_trans = [] data_markers = [] for ds_name in datasets: npz_fnames = glob.glob(os.path.join(amass_dir, ds_name, '*/*.npz')) logger('randomly selecting data points from %s.' % (ds_name)) for npz_fname in tqdm(npz_fnames): cdata = np.load(npz_fname) N = len(cdata['poses']) # skip first and last frames to avoid initial standard poses, e.g. T pose cdata_ids = np.random.choice(list( range(int(0.1 * N), int(0.9 * N), 1)), int(keep_rate * 0.8 * N), replace=False) if len(cdata_ids) < 1: continue data_pose.extend(cdata['poses'][cdata_ids].astype(np.float32)) data_trans.extend(cdata['trans'][cdata_ids].astype(np.float32)) data_betas.extend( np.repeat(cdata['betas'][np.newaxis].astype(np.float32), repeats=len(cdata_ids), axis=0)) data_gender.extend([{ 'male': -1, 'neutral': 0, 'female': 1 }[str(cdata['gender'].astype(np.str))] for _ in cdata_ids]) if split_name == 'test': data_markers.extend( np.repeat(cdata['betas'][np.newaxis].astype(np.float32), repeats=len(cdata_ids), axis=0)) outdir = makepath(os.path.join(out_dir, split_name)) assert len(data_pose) != 0 outpath = os.path.join(outdir, 'pose.pt') torch.save(torch.tensor(np.asarray(data_pose, np.float32)), outpath) outpath = os.path.join(outdir, 'betas.pt') torch.save(torch.tensor(np.asarray(data_betas, np.float32)), outpath) outpath = os.path.join(outdir, 'trans.pt') torch.save(torch.tensor(np.asarray(data_trans, np.float32)), outpath) outpath = os.path.join(outdir, 'gender.pt') torch.save(torch.tensor(np.asarray(data_gender, np.int32)), outpath) logger('Len. split %s %d' % (split_name, len(data_pose)))
def prepare_vposer_datasets(amass_splits, amass_dir, vposer_datadir, logger=None): if logger is None: starttime = datetime.now().replace(microsecond=0) log_name = datetime.strftime(starttime, '%Y%m%d_%H%M') logger = log2file(os.path.join(vposer_datadir, '%s.log' % (log_name))) logger('Creating pytorch dataset at %s' % vposer_datadir) stageI_outdir = os.path.join(vposer_datadir, 'stage_I') shutil.copy2(sys.argv[0], os.path.join(vposer_datadir, os.path.basename(sys.argv[0]))) logger('Stage I: Fetch data from AMASS npz files') for split_name, datasets in amass_splits.items(): if os.path.exists(os.path.join(stageI_outdir, split_name, 'pose.pt')): continue dump_amass2pytroch(datasets, amass_dir, stageI_outdir, split_name=split_name, logger=logger) logger( 'Stage II: augment data by noise and save into h5 files to be used in a cross framework scenario.' ) ## Writing to h5 files is also convinient since appending to files is possible from torch.utils.data import DataLoader import tables as pytables from tqdm import tqdm class AMASS_ROW(pytables.IsDescription): gender = pytables.Int16Col(1) # 1-character String pose = pytables.Float32Col(52 * 3) # float (single-precision) pose_matrot = pytables.Float32Col(52 * 9) # float (single-precision) betas = pytables.Float32Col(16) # float (single-precision) trans = pytables.Float32Col(3) # float (single-precision) stageII_outdir = makepath(os.path.join(vposer_datadir, 'stage_II')) batch_size = 256 max_num_epochs = 1 # how much augmentation we would get for split_name in amass_splits.keys(): h5_outpath = os.path.join(stageII_outdir, '%s.h5' % split_name) if os.path.exists(h5_outpath): continue ds = AMASS_Augment(dataset_dir=os.path.join(stageI_outdir, split_name)) logger('%s has %d data points!' % (split_name, len(ds))) dataloader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=32, drop_last=False) with pytables.open_file(h5_outpath, mode="w") as h5file: table = h5file.create_table('/', 'data', AMASS_ROW) for epoch_num in range(max_num_epochs): for bId, bData in tqdm(enumerate(dataloader)): for i in range(len(bData['trans'])): for k in bData.keys(): table.row[k] = c2c(bData[k][i]) table.row.append() table.flush() logger('Stage III: dump every thing as a final thing to pt files') # we would like to use pt files because their interface could run in multiple threads stageIII_outdir = makepath(os.path.join(vposer_datadir, 'stage_III')) for split_name in amass_splits.keys(): h5_filepath = os.path.join(stageII_outdir, '%s.h5' % split_name) if not os.path.exists(h5_filepath): continue with pytables.open_file(h5_filepath, mode="r") as h5file: data = h5file.get_node('/data') data_dict = {k: [] for k in data.colnames} for id in range(len(data)): cdata = data[id] for k in data_dict.keys(): data_dict[k].append(cdata[k]) for k, v in data_dict.items(): outfname = makepath(os.path.join(stageIII_outdir, split_name, '%s.pt' % k), isfile=True) if os.path.exists(outfname): continue torch.save(torch.from_numpy(np.asarray(v)), outfname) logger('Dumped final pytorch dataset at %s' % stageIII_outdir)
import sys, os import torch import numpy as np from human_body_prior.tools.omni_tools import makepath, log2file from human_body_prior.tools.omni_tools import copy2cpu as c2c expr_code = 'poses' msg = ''' Initial use of standard AMASS dataset preparation pipeline ''' amass_dir = '/data/BDSC/datasets/DFaust_67/*/*_poses.npz' work_dir = '/data/BDSC/datasets/AMASS/' logger = log2file(os.path.join(work_dir, '%s.log' % (expr_code))) logger('[%s] AMASS Data Preparation Began.' % expr_code) logger(msg) #Note: should download all data of https://amass.is.tue.mpg.de/dataset amass_splits = { 'vald': ['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'], 'test': ['Transitions_mocap', 'SSM_synced'], 'train': [ 'CMU', 'MPI_Limits', 'TotalCapture', 'Eyes_Japan_Dataset', 'KIT', 'BML', 'EKUT', 'TCD_handMocap', 'ACCAD' ] } amass_splits['train'] = list( set(amass_splits['train']).difference( set(amass_splits['test'] + amass_splits['vald']))) from amass.prepare_data import prepare_amass prepare_amass(amass_splits, amass_dir, work_dir, logger=logger)
def dump_amass2pytroch(datasets, amass_dir, out_posepath, logger=None, rnd_seed=100, keep_rate=0.01): ''' Select random number of frames from central 80 percent of each mocap sequence Save individual data features like pose and shape per frame in pytorch pt files test set will have the extra field for original markers :param datasets: the name of the dataset :param amass_dir: directory of downloaded amass npz files. should be in this structure: path/datasets/subjects/*_poses.npz :param out_posepath: the path for final pose.pt file :param logger: an instance of human_body_prior.tools.omni_tools.log2file :param rnd_seed: :return: Number of datapoints dumped using out_poseth address pattern ''' import glob np.random.seed(rnd_seed) makepath(out_posepath, isfile=True) if logger is None: starttime = datetime.now().replace(microsecond=0) log_name = datetime.strftime(starttime, '%Y%m%d_%H%M') logger = log2file( out_posepath.replace('pose.pt', '%s.log' % (log_name))) logger('Creating pytorch dataset at %s' % out_posepath) data_pose = [] data_betas = [] data_gender = [] data_trans = [] data_idx = [] data_frame = [] data_tightness = [] data_outfit = [] for ds_name in datasets: npz_fnames = glob.glob(os.path.join(amass_dir, ds_name, '*/info.mat')) logger('randomly selecting data points from %s.' % (ds_name)) for npz_fname in tqdm(npz_fnames): try: cdata = loadInfo(npz_fname) cdata['idx'] = int(npz_fname.split("/")[-2]) except: logger('Could not read %s! skipping..' % npz_fname) continue cdata['poses'] = cdata['poses'].T cdata['trans'] = cdata['trans'].T outfit_arr = np.zeros(len(outfit_types)) for key in cdata['outfit'].keys(): outfit_arr[outfit_types.index(key)] = fabric_types.index( cdata['outfit'][key]['fabric']) + 1 if len(cdata['poses'].shape) < 2: continue N = len(cdata['poses']) cdata_ids = np.arange(N) np.random.shuffle(cdata_ids) if len(cdata_ids) < 1: continue # try: data_frame.extend(np.array(cdata_ids).astype(np.int32)) data_idx.extend(np.array([cdata['idx'] for _ in cdata_ids])) data_pose.extend(cdata['poses'][cdata_ids].astype(np.float32)) data_trans.extend(cdata['trans'][cdata_ids].astype(np.float32)) data_betas.extend( np.repeat(cdata['shape'][np.newaxis].astype(np.float32), repeats=len(cdata_ids), axis=0)) data_gender.extend(np.array([cdata['gender'] for _ in cdata_ids])) data_tightness.extend( np.repeat(cdata['tightness'][np.newaxis].astype(np.float32), repeats=len(cdata_ids), axis=0)) data_outfit.extend( np.repeat(outfit_arr[np.newaxis].astype(np.int32), repeats=len(cdata_ids), axis=0)) # except: # print(N, cdata['poses'].shape) assert len(data_pose) != 0 torch.save(torch.tensor(np.asarray(data_pose, np.float32)), out_posepath) torch.save(torch.tensor(np.asarray(data_betas, np.float32)), out_posepath.replace('pose.pt', 'betas.pt')) torch.save(torch.tensor(np.asarray(data_trans, np.float32)), out_posepath.replace('pose.pt', 'trans.pt')) torch.save(torch.tensor(np.asarray(data_gender, np.int32)), out_posepath.replace('pose.pt', 'gender.pt')) torch.save(torch.tensor(np.asarray(data_frame, np.int32)), out_posepath.replace('pose.pt', 'frame.pt')) torch.save(torch.tensor(np.asarray(data_idx, np.int32)), out_posepath.replace('pose.pt', 'idx.pt')) torch.save(torch.tensor(np.asarray(data_tightness, np.float32)), out_posepath.replace('pose.pt', 'tightness.pt')) torch.save(torch.tensor(np.asarray(data_outfit, np.int32)), out_posepath.replace('pose.pt', 'outfit.pt')) return len(data_pose)
def dump_amass2pytroch(datasets, amass_dir, out_posepath, logger=None, betas_range=None, betas_limit=None, splits=None, rnd_seed=100, keep_rate=0.01, max_len=None): ''' Select random number of frames from central 80 percent of each mocap sequence Save individual data features like pose and shape per frame in pytorch pt files test set will have the extra field for original markers :param datasets: the name of the dataset :param amass_dir: directory of downloaded amass npz files. should be in this structure: path/datasets/subjects/*_poses.npz :param out_posepath: the path for final pose.pt file :param logger: an instance of human_body_prior.tools.omni_tools.log2file :param betas_range: variance of each beta :param betas_limit: betas variance ranging from -betas_limit to betas_limit. only works with integer betas_range :param splits: (splits_start, splits_end), e.g. (.85, .90) means splits 5% of the dataset starts from 85% :param rnd_seed: random seed :param max_len: max frame allowed :return: Number of datapoints dumped using out_poseth address pattern ''' import glob np.random.seed(rnd_seed) makepath(out_posepath, isfile=True) if logger is None: starttime = datetime.now().replace(microsecond=0) log_name = datetime.strftime(starttime, '%Y%m%d_%H%M') logger = log2file( out_posepath.replace('pose.pt', '%s.log' % (log_name))) logger('Creating pytorch dataset at %s' % out_posepath) data_pose = [] data_dmpl = [] data_betas = [] data_gender = [] data_trans = [] data_fname = [] data_fid = [] for ds_name in datasets: npz_fnames = glob.glob( os.path.join(amass_dir, ds_name, '*/*_poses.npz')) if splits: logger( f'randomly selecting {"%.1f" % ((splits[1] - splits[0]) * 100)}% data points from {ds_name}.' ) else: logger(f'randomly selecting data points from {ds_name}.') for dir_id, npz_fname in enumerate(tqdm(npz_fnames)): try: cdata = np.load(npz_fname) except: logger('Could not read %s! skipping..' % npz_fname) continue N = len(cdata['poses']) # fname = abs(hash(npz_fname.split('/')[-1].split('.')[0])) % (10 ** 8) # hash filename to a unique positive 8-digits integer fname = dir_id * 1000 cdata_ids = np.random.choice( list(range(int(0.1 * N), int(0.9 * N), 1)), int(keep_rate * 0.8 * N), replace=False ) # removing first and last 10% of the data to avoid repetitive initial poses if len(cdata_ids) < 1: continue if 'int' in str(type(betas_range)) or 'numpy.ndarray' in str( type(betas_range)): if 'int' in str(type(betas_range)): if betas_range == 0: data_pose.extend(cdata['poses'][cdata_ids].astype( np.float32)) data_dmpl.extend(cdata['dmpls'][cdata_ids].astype( np.float32)) data_trans.extend(cdata['trans'][cdata_ids].astype( np.float32)) data_betas.extend( np.repeat(cdata['betas'][np.newaxis].astype( np.float32), repeats=len(cdata_ids), axis=0)) data_gender.extend([ gdr2num[str(cdata['gender'].astype(np.str))] for _ in cdata_ids ]) data_fname.extend([fname for _ in cdata_ids]) data_fid.extend([i for i, _ in enumerate(cdata_ids)]) else: assert betas_range % 2 == 0, ValueError( 'betas_range should be multiple to 2') if betas_limit is None: betas_limit = 2. # if `betas_range` is an integer, # sample the number of betas1 and betas2 # that varience from -2. to 2. as follows: beta1, beta2 = cdata['betas'][0], cdata['betas'][1] # left range, right range beta1_lr, beta1_rr = max(0., betas_limit + beta1), max( 0., betas_limit - beta1) beta2_lr, beta2_rr = max(0., betas_limit + beta2), max( 0., betas_limit - beta2) # left range percentage, right range percentage beta1_lp, beta1_rp = beta1_lr / ( beta1_lr + beta1_rr), beta1_rr / (beta1_lr + beta1_rr) beta2_lp, beta2_rp = beta2_lr / ( beta2_lr + beta2_rr), beta2_rr / (beta2_lr + beta2_rr) # left range sample number beta1_ln, beta2_ln = int(betas_range * beta1_lp), int( betas_range * beta2_lp) # do sampling for beta1 range beta1_range = betas_range_sample( betas_range, beta1, beta1_ln, betas_limit) beta2_range = betas_range_sample( betas_range, beta2, beta2_ln, betas_limit) # reconstruct beatas_range as numpy.ndarray betas_range = np.zeros( (betas_range, len(cdata['betas']))) betas_range[:, 0] = beta1_range betas_range[:, 1] = beta2_range for i, beta_delta in enumerate(betas_range): cdata_betas = np.array(cdata['betas']).astype(np.float32) data_pose.extend(cdata['poses'][cdata_ids].astype( np.float32)) data_dmpl.extend(cdata['dmpls'][cdata_ids].astype( np.float32)) data_trans.extend(cdata['trans'][cdata_ids].astype( np.float32)) data_betas.extend( np.repeat((cdata_betas + beta_delta)[np.newaxis].astype(np.float32), repeats=len(cdata_ids), axis=0)) data_gender.extend([ gdr2num[str(cdata['gender'].astype(np.str))] for _ in cdata_ids ]) data_fname.extend([fname + i for _ in cdata_ids]) data_fid.extend([ii for ii, _ in enumerate(cdata_ids)]) else: data_pose.extend(cdata['poses'][cdata_ids].astype(np.float32)) data_dmpl.extend(cdata['dmpls'][cdata_ids].astype(np.float32)) data_trans.extend(cdata['trans'][cdata_ids].astype(np.float32)) data_betas.extend( np.repeat(cdata['betas'][np.newaxis].astype(np.float32), repeats=len(cdata_ids), axis=0)) data_gender.extend([ gdr2num[str(cdata['gender'].astype(np.str))] for _ in cdata_ids ]) data_fname.extend([fname for _ in cdata_ids]) data_fid.extend([i for i, _ in enumerate(cdata_ids)]) assert len(data_pose) != 0 assert len(data_pose) == len(data_dmpl) == len(data_betas) == len( data_trans) == len(data_gender) == len(data_fname) == len(data_fid) if splits: import math # split data split_start = math.floor(len(data_pose) * splits[0]) split_end = math.floor(len(data_pose) * splits[1]) data_pose = data_pose[split_start:split_end] data_dmpl = data_dmpl[split_start:split_end] data_betas = data_betas[split_start:split_end] data_trans = data_trans[split_start:split_end] data_gender = data_gender[split_start:split_end] data_fname = data_fname[split_start:split_end] data_fid = data_fid[split_start:split_end] assert len(data_pose) > 0 logger( f'data length: {len(data_pose)}, parsing from proportion ({"%.1f" % splits[0]}, {"%.1f" % splits[1]}) to index ({split_start}, {split_end})\n\n' ) torch.save(torch.tensor(np.asarray(data_pose, np.float32)), out_posepath) torch.save(torch.tensor(np.asarray(data_dmpl, np.float32)), out_posepath.replace('pose.pt', 'dmpl.pt')) torch.save(torch.tensor(np.asarray(data_betas, np.float32)), out_posepath.replace('pose.pt', 'betas.pt')) torch.save(torch.tensor(np.asarray(data_trans, np.float32)), out_posepath.replace('pose.pt', 'trans.pt')) torch.save(torch.tensor(np.asarray(data_gender, np.int32)), out_posepath.replace('pose.pt', 'gender.pt')) torch.save(torch.tensor(np.asarray(data_fname, np.int32)), out_posepath.replace('pose.pt', 'fname.pt')) torch.save(torch.tensor(np.asarray(data_fid, np.int32)), out_posepath.replace('pose.pt', 'fid.pt')) return len(data_pose)
def prepare_amass(amass_splits, amass_dir, work_dir, logger=None, betas_range=None, betas_limit=None, frame_len=None, max_len=None, downsample_rate=None): if logger is None: starttime = datetime.now().replace(microsecond=0) log_name = datetime.strftime(starttime, '%Y%m%d_%H%M') logger = log2file(os.path.join(work_dir, '%s.log' % (log_name))) logger('Creating pytorch dataset at %s' % work_dir) stageI_outdir = os.path.join(work_dir, 'stage_I') shutil.copy2(sys.argv[0], os.path.join(work_dir, os.path.basename(sys.argv[0]))) logger('Stage I: Fetch data from AMASS npz files') # split mode - split a single dataset into train/vald/test with specified proportions # e.g. # amass_splits = { # 'dataset': 'HumanEva', # 'splits': (.85, .05, .1) # train, vald, test # } if 'splits' in amass_splits.keys(): import numbers from functools import reduce splits = amass_splits['splits'] _amass_splits = {} assert [isinstance(s, numbers.Number) for s in splits] == [ True, True, True ], "amass_splits['splits'] must be (number, number, number)" assert reduce( lambda x, y: x + y, splits ) <= 1., "sum of amass_splits['splits'] must equal or less than 1.0" for split_idx, split_name in enumerate(('train', 'vald', 'test')): # if there is a zero-split, skip through the dataset creation if split_idx > 0 and splits[split_idx] == 0: continue final_splits = (0., 1.) outpath = makepath(os.path.join(stageI_outdir, split_name, 'pose.pt'), isfile=True) # reconstruct amass_splits as normal mode for stage II and III _amass_splits[split_name] = amass_splits['dataset'] if os.path.exists(outpath): continue if split_name is 'train': final_splits = (0., splits[0]) elif split_name is 'vald': final_splits = (splits[0], splits[0] + splits[1]) else: final_splits = (splits[0] + splits[1], splits[0] + splits[1] + splits[2]) if frame_len: downsample_amass2pytroch(amass_splits['dataset'], amass_dir, outpath, logger=logger, betas_range=betas_range, betas_limit=betas_limit, splits=final_splits, frame_len=frame_len, max_len=max_len, downsample_rate=downsample_rate) else: dump_amass2pytroch(amass_splits['dataset'], amass_dir, outpath, logger=logger, betas_range=betas_range, betas_limit=betas_limit, splits=final_splits, max_len=max_len) # assigin the reconstructed amass_splits back after stage I compeletion amass_splits = _amass_splits # normal mode - using different datasets as train/vald/test # e.g. # amass_splits = { # 'vald': ['HumanEva'], # 'test': ['SSM_synced'], # 'train': ['CMU'] # } else: for split_name, datasets in amass_splits.items(): outpath = makepath(os.path.join(stageI_outdir, split_name, 'pose.pt'), isfile=True) if os.path.exists(outpath): continue if frame_len: downsample_amass2pytroch(datasets, amass_dir, outpath, logger=logger, betas_range=betas_range, betas_limit=betas_limit, frame_len=frame_len, max_len=max_len, downsample_rate=downsample_rate) else: dump_amass2pytroch(datasets, amass_dir, outpath, logger=logger, betas_range=betas_range, betas_limit=betas_limit, max_len=max_len) logger( 'Stage II: augment the data and save into h5 files to be used in a cross framework scenario.' ) class AMASS_ROW(pytables.IsDescription): fid = pytables.Int16Col(1) # 1-character String fname = pytables.Int32Col(1) # 1-character String gender = pytables.Int16Col(1) # 1-character String pose = pytables.Float32Col(52 * 3) # float (single-precision) dmpl = pytables.Float32Col(8) # float (single-precision) pose_matrot = pytables.Float32Col(52 * 9) # float (single-precision) betas = pytables.Float32Col(16) # float (single-precision) trans = pytables.Float32Col(3) # float (single-precision) stageII_outdir = makepath(os.path.join(work_dir, 'stage_II')) batch_size = 256 max_num_epochs = 1 # how much augmentation we would get for split_name in amass_splits.keys(): h5_outpath = os.path.join(stageII_outdir, '%s.h5' % split_name) if os.path.exists(h5_outpath): continue ds = AMASS_Augment(dataset_dir=os.path.join(stageI_outdir, split_name)) logger('%s has %d data points!' % (split_name, len(ds))) dataloader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=32, drop_last=False) with pytables.open_file(h5_outpath, mode="w") as h5file: table = h5file.create_table('/', 'data', AMASS_ROW) for epoch_num in range(max_num_epochs): for bId, bData in tqdm(enumerate(dataloader)): for i in range(len(bData['trans'])): for k in bData.keys(): table.row[k] = c2c(bData[k][i]) table.row.append() table.flush() logger( '\nStage III: dump every data field for all the splits as final pytorch pt files' ) # we would like to use pt files because their interface could run in multiple threads stageIII_outdir = makepath(os.path.join(work_dir, 'stage_III')) for split_name in amass_splits.keys(): h5_filepath = os.path.join(stageII_outdir, '%s.h5' % split_name) if not os.path.exists(h5_filepath): continue with pytables.open_file(h5_filepath, mode="r") as h5file: data = h5file.get_node('/data') data_dict = {k: [] for k in data.colnames} for id in range(len(data)): cdata = data[id] for k in data_dict.keys(): data_dict[k].append(cdata[k]) for k, v in data_dict.items(): outfname = makepath(os.path.join(stageIII_outdir, split_name, '%s.pt' % k), isfile=True) if os.path.exists(outfname): continue torch.save(torch.from_numpy(np.asarray(v)), outfname) logger('Dumped final pytorch dataset at %s' % stageIII_outdir)
def prepare_vposer_datasets(vposer_dataset_dir, amass_splits, amass_dir, logger=None): if dataset_exists(vposer_dataset_dir): if logger is not None: logger('VPoser dataset already exists at {}'.format( vposer_dataset_dir)) return ds_logger = log2file(makepath(vposer_dataset_dir, 'dataset.log', isfile=True), write2file_only=True) logger = ds_logger if logger is None else logger_sequencer( [ds_logger, logger]) logger('Creating pytorch dataset at %s' % vposer_dataset_dir) logger('Using AMASS body parameters from {}'.format(amass_dir)) shutil.copy2(__file__, vposer_dataset_dir) # class AMASS_ROW(pytables.IsDescription): # # # gender = pytables.Int16Col(1) # 1-character String # root_orient = pytables.Float32Col(3) # float (single-precision) # pose_body = pytables.Float32Col(21 * 3) # float (single-precision) # # pose_hand = pytables.Float32Col(2 * 15 * 3) # float (single-precision) # # # betas = pytables.Float32Col(16) # float (single-precision) # # trans = pytables.Float32Col(3) # float (single-precision) def fetch_from_amass(ds_names): keep_rate = 0.3 npz_fnames = [] for ds_name in ds_names: mosh_stageII_fnames = glob.glob( osp.join(amass_dir, ds_name, '*/*_poses.npz')) npz_fnames.extend(mosh_stageII_fnames) logger('Found {} sequences from {}.'.format( len(mosh_stageII_fnames), ds_name)) for npz_fname in npz_fnames: cdata = np.load(npz_fname) N = len(cdata['poses']) # skip first and last frames to avoid initial standard poses, e.g. T pose cdata_ids = np.random.choice(list( range(int(0.1 * N), int(0.9 * N), 1)), int(keep_rate * 0.8 * N), replace=False) if len(cdata_ids) < 1: continue fullpose = cdata['poses'][cdata_ids].astype(np.float32) yield { 'pose_body': fullpose[:, 3:66], 'root_orient': fullpose[:, :3] } for split_name, ds_names in amass_splits.items(): if dataset_exists(vposer_dataset_dir, split_names=[split_name]): continue logger('Preparing VPoser data for split {}'.format(split_name)) data_fields = {} for data in fetch_from_amass(ds_names): for k in data.keys(): if k not in data_fields: data_fields[k] = [] data_fields[k].append(data[k]) for k, v in data_fields.items(): outpath = makepath(vposer_dataset_dir, split_name, '{}.pt'.format(k), isfile=True) v = np.concatenate(v) torch.save(torch.tensor(v), outpath) logger('{} datapoints dumped for split {}. ds_meta_pklpath: {}'.format( len(v), split_name, osp.join(vposer_dataset_dir, split_name))) Configer(**{ 'amass_splits': amass_splits.toDict(), 'amass_dir': amass_dir, }).dump_settings(makepath(vposer_dataset_dir, 'settings.ini', isfile=True)) logger('Dumped final pytorch dataset at %s' % vposer_dataset_dir)
import os from human_body_prior.tools.omni_tools import makepath, log2file from human_body_prior.data.prepare_data import prepare_vposer_datasets from datetime import datetime expr_code = datetime.now().strftime("%d %m %Y %H:%M:%S") amass_dir = r'/content/drive/My Drive/LAZAR/AMASS' vposer_datadir = makepath('prepared/%s' % (expr_code)) logger = log2file(os.path.join(vposer_datadir, '%s.log' % (expr_code))) logger('[%s] Preparing data for training VPoser.'%expr_code) amass_splits = { 'vald': ['HumanEva', 'MPIHDM05', 'SFU', 'MPImosh'], 'test': ['Transitions_mocap', 'SSMsynced'], 'train': ['CMU', 'MPILimits', 'TotalCapture', 'EyesJapanDataset', 'KIT', 'BMLrub', 'EKUT', 'TCDhandMocap', 'ACCAD'] } amass_splits['train'] = list(set(amass_splits['train']).difference(set(amass_splits['test'] + amass_splits['vald']))) prepare_vposer_datasets(amass_splits, amass_dir, vposer_datadir, logger=logger)