def save_training_state(self): state = { "steps_since_lr_change": self.steps_since_lr_change, "best_self_model_step": self.best_self_model_step, "best_rule_model_step": self.best_rule_model_step, "learning_rate": self.lr_tracker, "current_step": self.current_step } write_json(os.path.join(self.logs_dir, "training_state.json"), state)
def __init__(self, config): self.config = config self.p = TrainingConfig.from_dict(config) self.game = get_gameplay(self.config) self.repr = RepresentationGenerator() self.replay_buffer = ReplayBuffer(self.config) self.logs_dir = "learn/logs/self_play_{}_{}".format( self.p.network_type, time.strftime("%Y-%m-%d_%H-%M")) self.logs_base_str = os.path.join(self.logs_dir, "ckpt-{}.pth") make_dir_if_not_exists(self.logs_dir) make_dir_if_not_exists(os.path.join(self.logs_dir, "tensorboard")) write_json(os.path.join(self.logs_dir, "config.json"), self.p.to_dict()) self.best_self_ckpt_path = os.path.join(self.logs_dir, "best_self.pth") self.best_rule_ckpt_path = os.path.join(self.logs_dir, "best_rule.pth") self.latest_ckpt_path = os.path.join(self.logs_dir, "latest.pth") if self.p.restore_ckpt_dir is not None: self.load_training_state(self.p.restore_ckpt_dir) else: self.steps_since_lr_change = 0 self.current_step = 0 self.best_self_model_step = 0 self.best_rule_model_step = 0 self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") print("Training using device:", self.device) self.net = self.get_new_network() # self.net = get_network(self.config).to(self.device) # set_model_to_half(self.net) self.optimizer = optim.SGD( self.net.parameters(), lr=self.p.initial_learning_rate, momentum=0.9, weight_decay=self.p.weight_decay, ) self.lr_tracker = self.p.initial_learning_rate self.loss_criterion = nn.MSELoss(reduction='mean') if self.p.effective_batch_size % self.p.max_train_batch_size == 0: self.accumulate_loss_n_times = self.p.effective_batch_size // self.p.max_train_batch_size else: self.accumulate_loss_n_times = self.p.effective_batch_size // self.p.max_train_batch_size + 1 self.strategy_types = [ "random", "max", "increase_min", "reduce_deficit", "mixed" ] self.writer = SummaryWriter(os.path.join(self.logs_dir, "tensorboard")) print(f"Writing logs to: {self.logs_dir}")
def _dump_summary_info(self): """Save training summary""" info_file_path = os.path.join(self.save_dir, self.training_name, 'INFO.json') if not io.file_exists(info_file_path): info = self._summary_info() io.write_json(info_file_path, info) else: info = io.read_from_json(info_file_path) self.training_info = info
def main(): # parse args -------------------------------------------------------------- args = _parse_args() # set device -------------------------------------------------------------- os.environ['CUDA_VISIBLE_DEVICES'] = args.devices # set learning phase ------------------------------------------------------ K.set_learning_phase(0) # set data format --------------------------------------------------------- if args.devices == '' or args.model == 'mobilenet_v2': # note: tensorflow supports b01c pooling on cpu only K.set_image_data_format('channels_last') else: K.set_image_data_format('channels_first') # set dtype --------------------------------------------------------------- K.set_floatx(args.dtype) # load model -------------------------------------------------------------- model_module = globals()[args.model] model_kwargs = {} if args.model == 'mobilenet_v2': model_kwargs['alpha'] = args.mobilenet_v2_alpha model = model_module.get_model(input_type=args.input_type, input_shape=(args.input_height, args.input_width), output_type=args.output_type, n_classes=args.n_classes, sampling=False, **model_kwargs) # create frozen graph sess = K.get_session() out_name = [out.op.name for out in model.outputs] frozen_graph = _freeze_session(sess, output_names=out_name) dirname = os.path.dirname(args.output_filepath) filename = os.path.basename(args.output_filepath) assert os.path.splitext(filename)[1] == '.pb' tf.train.write_graph(frozen_graph, dirname, filename, as_text=False) # tf.train.write_graph(frozen_graph, dirname, filename, as_text=False) # store input and output names as json file write_json( args.output_filepath + '.json', { 'input_names': [input.op.name for input in model.inputs], 'output_names': [output.op.name for output in model.outputs] })
def generate_data(config): attr = config['attributes'] generator_class = get_generator(attr['dataset']) generator = generator_class(config) # vars and housekeeping out_dir = attr['output_dir'] n_samples = attr['num_samples'] # out directory io.makedirs(out_dir) io.write_json(config, os.path.join(out_dir, 'config.json')) # generate io.generate_data(generator, out_dir, n_samples)
def _update_summary(self, global_step, loss): """Update training summary details Arguments: global_step {int} -- global step in the training process loss {float} -- loss value metrics {Metric} -- metrics used for evaluating the model """ self.training_info['global_step'] = global_step self.training_info['val_loss'] = loss info_file_path = os.path.join(self.save_dir, self.training_name, 'INFO.json') io.write_json(info_file_path, self.training_info)
def main(): # parse args -------------------------------------------------------------- args = _parse_args() # output path ------------------------------------------------------------- if args.output_path is None: output_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) output_path = os.path.join(output_path, 'eval_outputs') else: output_path = args.output_path create_directory_if_not_exists(output_path) # prepare evaluation ------------------------------------------------------ # get all training runs runs = _parse_training_basepath(args.training_basepath) # define metric for determining best epoch if args.selection_set == ds.VALID_SET: best_epoch_metric = 'valid_loss' else: best_epoch_metric = args.selection_set+'_loss' # load dataset to get access to samples (same order in outputs of all runs) dataset = ds.load_set(dataset_basepath=args.dataset_basepath, set_name=args.set, default_size=args.dataset_size) # run evaluation ---------------------------------------------------------- res = _evaluate(runs, output_type=args.output_type, dataset_name=args.set, dataset=dataset, best_epoch_metric=best_epoch_metric) # save output filepath = os.path.join(output_path, f'results_{args.set}_{args.output_type}.json') write_json(filepath, res)
def main(): # parse args -------------------------------------------------------------- args = _parse_args() # set device -------------------------------------------------------------- os.environ['CUDA_VISIBLE_DEVICES'] = args.devices # output path ------------------------------------------------------------- lr_str = f'{args.learning_rate:0.6f}'.replace('.', '_') model_str = args.model if args.model == 'mobilenet_v2': model_str += f"_{args.mobilenet_v2_alpha:0.2f}".replace('.', '_') exp_identifier = (f'{model_str}__' f'{args.input_type}__' f'{args.input_height}x{args.input_width}__' f'{args.output_type}__' f'{lr_str}__' f'{args.run_id}') output_path = os.path.join(args.output_basepath, exp_identifier) create_directory_if_not_exists(output_path) # dump args --------------------------------------------------------------- write_json(os.path.join(output_path, 'config.json'), vars(args)) # data -------------------------------------------------------------------- # samples for training train_set = dataset.load_set(dataset_basepath=args.dataset_basepath, set_name=args.training_set, default_size=args.dataset_size) # limit training samples to multiple of batch size train_set.strip_to_multiple_of_batch_size(args.batch_size) train_steps_per_epoch = len(train_set) // args.batch_size # samples for validation valid_sets = [ dataset.load_set(dataset_basepath=args.dataset_basepath, set_name=sn, default_size=args.dataset_size) for sn in args.validation_sets ] valid_steps_per_epoch = \ [(len(set)+args.validation_batch_size-1) // args.validation_batch_size for set in valid_sets] # create tensorflow datasets tf_dataset_train = _create_tensorflow_dataset( dataset=train_set, input_type=args.input_type, input_shape=(args.input_height, args.input_width), output_type=args.output_type, batch_size=args.batch_size, n_prefetch_batches=5, n_classes=args.n_classes, shuffle=True, flip=not args.no_augmentation, scale01=args.input_preprocessing == 'scale01', standardize=args.input_preprocessing == 'standardize', zero_mean=True, unit_variance=True) tf_datasets_valid = \ [_create_tensorflow_dataset( dataset=set_, input_type=args.input_type, input_shape=(args.input_height, args.input_width), output_type=args.output_type, batch_size=args.validation_batch_size, n_prefetch_batches=5, n_classes=args.n_classes, shuffle=False, flip=False, scale01=args.input_preprocessing == 'scale01', standardize=args.input_preprocessing == 'standardize', zero_mean=True, unit_variance=True) for set_ in valid_sets] # model ------------------------------------------------------------------- model_module = globals()[args.model] model_kwargs = {} if args.model == 'mobilenet_v2': model_kwargs['alpha'] = args.mobilenet_v2_alpha model = model_module.get_model(input_type=args.input_type, input_shape=(args.input_height, args.input_width), output_type=args.output_type, n_classes=args.n_classes, sampling=False, **model_kwargs) if args.optimizer == 'adam': # adam opt = Adam(lr=args.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) elif args.optimizer == 'rmsprop': opt = RMSprop(lr=args.learning_rate, rho=0.9, epsilon=None, decay=0.0) else: # sgd opt = SGD(lr=args.learning_rate, momentum=args.momentum, decay=0.0) if args.output_type == OUTPUT_BITERNION: kappa = args.kappa or 1.0 loss = losses.vonmisses_loss_biternion(kappa) elif args.output_type == OUTPUT_REGRESSION: kappa = args.kappa or 0.5 loss = losses.vonmisses_loss(kappa) else: loss = 'categorical_crossentropy' model.compile(optimizer=opt, loss=loss) # callbacks --------------------------------------------------------------- cbs = [] # Validation callbacks # map 'validation' (= dataset.VALID_SET) to 'valid' (keras default for # validation set) dataset_names = [ n.replace(dataset.VALID_SET, 'valid') for n in args.validation_sets ] for tf_ds, ds_name, steps in zip(tf_datasets_valid, dataset_names, valid_steps_per_epoch): # note: epoch is in range [0, args.n_epochs-1] filepath = os.path.join(output_path, f'outputs_{ds_name}' + '_{epoch:04d}.npy') cbs.append( callbacks.ValidationCallback(tf_dataset=tf_ds, dataset_name=ds_name, output_filepath=filepath, validation_steps=steps, verbose=int(args.verbose))) # early stopping if args.early_stopping > 0: cbs.append( EarlyStopping(monitor='valid_loss', patience=args.early_stopping, mode='min', verbose=int(args.verbose))) # learning rate poly decay max_iter = train_steps_per_epoch * args.n_epochs cbs.append( callbacks.LRPolyDecay(lr_init=args.learning_rate, power=0.9, max_iter=max_iter, lr_min=1e-6, verbose=int(args.verbose))) # model checkpoints # note: due to keras implementation 'epoch' is in range [1, args.n_epochs] filepath = os.path.join(output_path, 'weights_valid_{epoch:04d}.hdf5') cbs.append( ModelCheckpoint(filepath=filepath, monitor='valid_loss', mode='min', verbose=int(args.verbose), save_best_only=True, save_weights_only=True)) filepath = os.path.join(output_path, 'weights_test_{epoch:04d}.hdf5') cbs.append( ModelCheckpoint(filepath=filepath, monitor='test_loss', mode='min', verbose=int(args.verbose), save_best_only=True, save_weights_only=True)) # CSV logger cbs.append(CSVLogger(filename=os.path.join(output_path, 'log.csv'))) # Tensorboard cbs.append( TensorBoard(log_dir=output_path, histogram_freq=0, batch_size=32, write_graph=False, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None)) # TerminateOnNaN cbs.append(TerminateOnNaN()) # training ---------------------------------------------------------------- model.fit(tf_dataset_train, epochs=args.n_epochs, steps_per_epoch=train_steps_per_epoch, callbacks=cbs)
def train(self): if self.opts['train_reconstruction']: self.train_reconstruction() if self.opts['freeze_encoder']: self.model.freeze_encoder() loader = torch.utils.data.DataLoader( self.scene_graph_dataset, opts['batch_size'], num_workers=0, collate_fn=self.scene_graph_dataset.collate_fn) # baseline for moving average baseline = 0. alpha = self.opts['moving_avg_alpha'] for e in range(self.opts['max_epochs']): # Set seeds for epoch rn = utils.set_seeds(e) with torch.no_grad(): # Generate this epoch's data for task net i = 0 # datadir out_dir = os.path.join(self.logdir, 'datagen') io.makedirs(out_dir) for idx, (g, x, m, adj) in tqdm(enumerate(loader), desc='Generating Data'): x, adj = x.float().to(self.device), adj.float().to( self.device) # no sampling here dec, dec_act = self.model(x, adj) f = dec_act.cpu().numpy() m = m.cpu().numpy() g = self.generator.update(g, f, m) r = self.generator.render(g) for k in range(len(g)): img, lbl = r[k] out_img = os.path.join(out_dir, f'{str(i).zfill(6)}.jpg') out_lbl = os.path.join(out_dir, f'{str(i).zfill(6)}.json') io.write_img(img, out_img) io.write_json(lbl, out_lbl) i += 1 # task accuracy acc = self.tasknet.train_from_dir(out_dir) # compute moving average if e > 0: baseline = alpha * acc + (1 - alpha) * baseline else: # initialize baseline to acc baseline = acc # Reset seeds to get exact same outputs rn2 = utils.set_seeds(e) for i in range(len(rn)): assert rn[i] == rn2[ i], 'Random numbers generated are different' # zero out gradients for first step self.optimizer.zero_grad() # Train dist matching and task loss for idx, (g, x, m, adj) in enumerate(loader): x, m, adj = (x.float().to(self.device), m.float().to(self.device), adj.float().to(self.device)) dec, dec_act, log_probs = self.model(x, adj, m, sample=True) # sample here # get real images im_real = torch.from_numpy( self.target_dataset.get_bunch_images( self.opts['num_real_images'])).to(self.device) # get fake images im = self.renderer.render(g, dec_act, m) # different from generator.render, this # has a backward pass implemented and # it calls the generator.render function in # the forward pass if self.opts['dataset'] == 'mnist': # add channel dimension and repeat 3 times for MNIST im = im.unsqueeze(1).repeat(1, 3, 1, 1) / 255. im_real = im_real.permute(0, 3, 1, 2).repeat(1, 3, 1, 1) / 255. mmd = self.mmd(im_real, im) * self.opts['weight']['dist_mmd'] if self.opts['use_task_loss']: task_loss = -1 * torch.mean((acc - baseline) * log_probs) loss = mmd + task_loss # weighting is already done loss.backward() else: mmd.backward() self.optimizer.step() self.optimizer.zero_grad() if idx % self.opts['print_freq'] == 0: print(f'[Dist] Step: {idx} MMD: {mmd.item()}') if self.opts['use_task_loss']: print(f'[Task] Reward: {acc}, Baseline: {baseline}') # debug information print( f'[Feat] Step: {idx} {dec_act[0, 2, 15:].tolist()} {x[0, 2, 15:].tolist()}' ) # To debug, this index is the loc_x, loc_y, yaw of the # digit in MNIST if self.opts['use_task_loss']: self.optimizer.step() self.optimizer.zero_grad() # LR scheduler step self.lr_sched.step()
def main(): """Main""" LOGGER.info('Starting demo...') # ----------------------------------------------------------- # ----------------------------------------------------------- # --------------------- Training Phase ---------------------- # ----------------------------------------------------------- # ----------------------------------------------------------- LOGGER.info('Training Lifting...') # ------------------- Data loader ------------------- train_data_transform = transforms.Compose( [trsf.ImageTrsf(), trsf.Joints3DTrsf(), trsf.ToTensor()]) # let's load data from validation set as example train_data = Mocap(config_lifting_singleBranch.dataset.train, SetType.TRAIN, transform=train_data_transform) train_data_loader = DataLoader( train_data, batch_size=config_lifting_singleBranch.train_data_loader.batch_size, shuffle=config_lifting_singleBranch.train_data_loader.shuffle, num_workers=config_lifting_singleBranch.train_data_loader.workers) # ------------------- Build Model ------------------- # backbone = resnet101() encoder = HeatmapEncoder() decoder = PoseDecoder() # reconstructer = HeatmapReconstructer() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: LOGGER.info( str("Let's use " + str(torch.cuda.device_count()) + " GPUs!")) # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs # backbone = nn.DataParallel(backbone) encoder = nn.DataParallel(encoder) decoder = nn.DataParallel(decoder) # reconstructer = nn.DataParallel(reconstructer) # backbone = backbone.cuda() encoder = encoder.cuda() decoder = decoder.cuda() # reconstructer = reconstructer.cuda() # Load or Init Model Weights # if config_lifting_singleBranch.train_setting.backbone_path: # backbone.load_state_dict(torch.load(config_lifting_singleBranch.train_setting.backbone_path)) # else: # backbone.apply(init_weights) if config_lifting_singleBranch.train_setting.encoder_path: encoder.load_state_dict( torch.load(config_lifting_singleBranch.train_setting.encoder_path)) # encoder = torch.load(config_lifting_singleBranch.train_setting.encoder_path) LOGGER.info('Encoder Weight Loaded!') else: encoder.apply(init_weights) LOGGER.info('Encoder Weight Initialized!') if config_lifting_singleBranch.train_setting.decoder_path: decoder.load_state_dict( torch.load(config_lifting_singleBranch.train_setting.decoder_path)) # decoder = torch.load(config_lifting_singleBranch.train_setting.decoder_path) LOGGER.info('Decoder Weight Loaded!') else: decoder.apply(init_weights) LOGGER.info('Decoder Weight Initialized!') # if config_lifting_singleBranch.train_setting.reconstructer_path: # reconstructer.load_state_dict(torch.load(config_lifting_singleBranch.train_setting.reconstructer_path)) # # reconstructer = torch.load(config_lifting_singleBranch.train_setting.reconstructer_path) # LOGGER.info('Reconstructer Weight Loaded!') # else: # reconstructer.apply(init_weights) # LOGGER.info('Reconstructer Weight Initialized!') # ------------------- Build Loss & Optimizer ------------------- # Build Loss pose_prediction_cosine_similarity_loss_func = PosePredictionCosineSimilarityPerJointLoss( ) pose_prediction_l1_loss_func = PosePredictionDistancePerJointLoss() pose_prediction_l2_loss_func = PosePredictionMSELoss() # heatmap_reconstruction_loss_func = HeatmapReconstructionMSELoss() pose_prediction_cosine_similarity_loss_func = pose_prediction_cosine_similarity_loss_func.cuda( ) pose_prediction_l1_loss_func = pose_prediction_l1_loss_func.cuda() pose_prediction_l2_loss_func = pose_prediction_l2_loss_func.cuda() # heatmap_reconstruction_loss_func = heatmap_reconstruction_loss_func.cuda() # Build Optimizer optimizer = optim.Adam( [ # {"params": backbone.parameters()}, { "params": encoder.parameters() }, { "params": decoder.parameters() }, # {"params": reconstructer.parameters()} ], lr=0.001) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # Variable for Final Model Selection # errorMin = 100 # errorMinIsUpdatedInThisEpoch = False # ------------------- Read dataset frames ------------------- for ep in range(config_lifting_singleBranch.train_setting.epoch): # ------------------- Evaluation ------------------- eval_body_train = evaluate.EvalBody() # eval_upper_train = evaluate.EvalUpperBody() # eval_lower_train = evaluate.EvalLowerBody() # eval_neck_train = evaluate.EvalNeck() # eval_head_train = evaluate.EvalHead() # eval_left_arm_train = evaluate.EvalLeftArm() # eval_left_elbow_train = evaluate.EvalLeftElbow() # eval_left_hand_train = evaluate.EvalLeftHand() # eval_right_arm_train = evaluate.EvalRightArm() # eval_right_elbow_train = evaluate.EvalRightElbow() # eval_right_hand_train = evaluate.EvalRightHand() # eval_left_leg_train = evaluate.EvalLeftLeg() # eval_left_knee_train = evaluate.EvalLeftKnee() # eval_left_foot_train = evaluate.EvalLeftFoot() # eval_left_toe_train = evaluate.EvalLeftToe() # eval_right_leg_train = evaluate.EvalRightLeg() # eval_right_knee_train = evaluate.EvalRightKnee() # eval_right_foot_train = evaluate.EvalRightFoot() # eval_right_toe_train = evaluate.EvalRightToe() # backbone.train() encoder.train() decoder.train() # reconstructer.train() # Averagemeter for Epoch lossAverageMeter = AverageMeter() # fullBodyErrorAverageMeter = AverageMeter() # upperBodyErrorAverageMeter = AverageMeter() # lowerBodyErrorAverageMeter = AverageMeter() # heatmapPredictionErrorAverageMeter = AverageMeter() PosePredictionCosineSimilarityPerJointErrorAverageMeter = AverageMeter( ) PosePredictionDistancePerJointErrorAverageMeter = AverageMeter() PosePredictionMSEErrorAverageMeter = AverageMeter() # heatmapReconstructionErrorAverageMeter = AverageMeter() # neckErrorAverageMeter = AverageMeter() # headErrorAverageMeter = AverageMeter() # leftArmErrorAverageMeter = AverageMeter() # leftElbowErrorAverageMeter = AverageMeter() # leftHandErrorAverageMeter = AverageMeter() # rightArmErrorAverageMeter = AverageMeter() # rightElbowErrorAverageMeter = AverageMeter() # rightHandErrorAverageMeter = AverageMeter() # leftLegErrorAverageMeter = AverageMeter() # leftKneeErrorAverageMeter = AverageMeter() # leftFootErrorAverageMeter = AverageMeter() # leftToeErrorAverageMeter = AverageMeter() # rightLegErrorAverageMeter = AverageMeter() # rightKneeErrorAverageMeter = AverageMeter() # rightFootErrorAverageMeter = AverageMeter() # rightToeErrorAverageMeter = AverageMeter() lossAverageMeterTrain = AverageMeter() # fullBodyErrorAverageMeterTrain = AverageMeter() # upperBodyErrorAverageMeterTrain = AverageMeter() # lowerBodyErrorAverageMeterTrain = AverageMeter() # heatmapPredictionErrorAverageMeterTrain = AverageMeter() PosePredictionCosineSimilarityPerJointErrorAverageMeterTrain = AverageMeter( ) PosePredictionDistancePerJointErrorAverageMeterTrain = AverageMeter() PosePredictionMSEErrorAverageMeterTrain = AverageMeter() # heatmapReconstructionErrorAverageMeterTrain = AverageMeter() # neckErrorAverageMeterTrain = AverageMeter() # headErrorAverageMeterTrain = AverageMeter() # leftArmErrorAverageMeterTrain = AverageMeter() # leftElbowErrorAverageMeterTrain = AverageMeter() # leftHandErrorAverageMeterTrain = AverageMeter() # rightArmErrorAverageMeterTrain = AverageMeter() # rightElbowErrorAverageMeterTrain = AverageMeter() # rightHandErrorAverageMeterTrain = AverageMeter() # leftLegErrorAverageMeterTrain = AverageMeter() # leftKneeErrorAverageMeterTrain = AverageMeter() # leftFootErrorAverageMeterTrain = AverageMeter() # leftToeErrorAverageMeterTrain = AverageMeter() # rightLegErrorAverageMeterTrain = AverageMeter() # rightKneeErrorAverageMeterTrain = AverageMeter() # rightFootErrorAverageMeterTrain = AverageMeter() # rightToeErrorAverageMeterTrain = AverageMeter() for it, (img, p2d, p3d, action, heatmap) in tqdm(enumerate(train_data_loader), total=len(train_data_loader)): #################### p2d는 각 Joint별 (x,y) 좌표를 나타낸듯. Image의 좌측상단이 (0,0)이다. #################### p3d는 Neck의 좌표를 (0,0,0)으로 생각했을 때의 각 Joint별 (^x,^y,^z) 좌표를 나타낸듯. #################### Joint 순서는 config_lifting_singleBranch.py에 있다. # LOGGER.info('Iteration: {}'.format(it)) # LOGGER.info('Images: {}'.format(img.shape)) # (Batch, Channel, Height(y), Width(x)) # LOGGER.info('p2dShapes: {}'.format(p2d.shape)) # (Width, Height) # # LOGGER.info('p2ds: {}'.format(p2d)) # LOGGER.info('p3dShapes: {}'.format(p3d.shape)) # (^x, ^y, ^z) # # LOGGER.info('p3ds: {}'.format(p3d)) # LOGGER.info('Actions: {}'.format(action)) # LOGGER.info('heatmapShapes: {}'.format(heatmap.shape)) # ----------------------------------------------------------- # ------------------- Run your model here ------------------- # ----------------------------------------------------------- optimizer.zero_grad() # Move Tensors to GPUs # img = img.cuda() p3d = p3d.cuda() heatmap = heatmap.cuda() # Forward # predicted_heatmap = backbone(img) latent = encoder(heatmap) predicted_pose = decoder(latent) # reconstructed_heatmap = reconstructer(latent) # Loss Calculation # heatmap_prediction_loss = heatmap_prediction_loss_func(predicted_heatmap, heatmap) p3d_for_loss = torch.cat( (p3d[:, 4:6, :], p3d[:, 7:10, :], p3d[:, 11:, :]), dim=1) # 13까지가 Upper Body p3d_for_loss = torch.reshape(p3d_for_loss, (-1, 48)) pose_prediction_cosine_similarity_loss = pose_prediction_cosine_similarity_loss_func( predicted_pose, p3d_for_loss) pose_prediction_l1_loss = pose_prediction_l1_loss_func( predicted_pose, p3d_for_loss) pose_prediction_l2_loss = pose_prediction_l2_loss_func( predicted_pose, p3d_for_loss) pose_prediction_loss = pose_prediction_l2_loss - 0.01 * pose_prediction_cosine_similarity_loss + 0.5 * pose_prediction_l1_loss # heatmap_reconstruction_loss = heatmap_reconstruction_loss_func(reconstructed_heatmap, heatmap) # Backpropagating Loss with Weighting Factors # backbone_loss = heatmap_prediction_loss lifting_loss = 0.1 * pose_prediction_loss # + 0.001*heatmap_reconstruction_loss # loss = backbone_loss + lifting_loss loss = lifting_loss # print(0.1*(-0.01)*pose_prediction_cosine_similarity_loss) # print(0.1*0.5*pose_prediction_l1_loss) # print(0.1*pose_prediction_l2_loss) # print(0.001*heatmap_reconstruction_loss) # Backward & Update loss.backward() optimizer.step() # Evaluate results using different evaluation metrices predicted_pose = torch.reshape(predicted_pose, (-1, 16, 3)) y_output = predicted_pose.data.cpu().numpy() p3d_for_loss = torch.cat( (p3d[:, 4:6, :], p3d[:, 7:10, :], p3d[:, 11:, :]), dim=1) # 13까지가 Upper Body p3d_for_loss = torch.reshape(p3d_for_loss, (-1, 16, 3)) y_target = p3d_for_loss.data.cpu().numpy() eval_body_train.eval(y_output, y_target, action) # eval_upper_train.eval(y_output, y_target, action) # eval_lower_train.eval(y_output, y_target, action) # eval_neck_train.eval(y_output, y_target, action) # eval_head_train.eval(y_output, y_target, action) # eval_left_arm_train.eval(y_output, y_target, action) # eval_left_elbow_train.eval(y_output, y_target, action) # eval_left_hand_train.eval(y_output, y_target, action) # eval_right_arm_train.eval(y_output, y_target, action) # eval_right_elbow_train.eval(y_output, y_target, action) # eval_right_hand_train.eval(y_output, y_target, action) # eval_left_leg_train.eval(y_output, y_target, action) # eval_left_knee_train.eval(y_output, y_target, action) # eval_left_foot_train.eval(y_output, y_target, action) # eval_left_toe_train.eval(y_output, y_target, action) # eval_right_leg_train.eval(y_output, y_target, action) # eval_right_knee_train.eval(y_output, y_target, action) # eval_right_foot_train.eval(y_output, y_target, action) # eval_right_toe_train.eval(y_output, y_target, action) # heatmap_prediction_loss = heatmap_prediction_loss_func(predicted_heatmap, heatmap) # heatmap_reconstruction_loss = heatmap_reconstruction_loss_func(reconstructed_heatmap, heatmap) # AverageMeter Update # fullBodyErrorAverageMeterTrain.update(eval_body_train.get_results()["All"]) # upperBodyErrorAverageMeterTrain.update(eval_upper_train.get_results()["All"]) # lowerBodyErrorAverageMeterTrain.update(eval_lower_train.get_results()["All"]) # heatmapPredictionErrorAverageMeterTrain.update(heatmap_prediction_loss.data.cpu().numpy()) PosePredictionCosineSimilarityPerJointErrorAverageMeterTrain.update( -0.001 * pose_prediction_cosine_similarity_loss.data.cpu().numpy()) PosePredictionDistancePerJointErrorAverageMeterTrain.update( 0.05 * pose_prediction_l1_loss.data.cpu().numpy()) PosePredictionMSEErrorAverageMeterTrain.update( 0.1 * pose_prediction_l2_loss.data.cpu().numpy()) # heatmapReconstructionErrorAverageMeterTrain.update(0.001 * heatmap_reconstruction_loss.data.cpu().numpy()) # neckErrorAverageMeterTrain.update(eval_neck_train.get_results()["All"]) # headErrorAverageMeterTrain.update(eval_head_train.get_results()["All"]) # leftArmErrorAverageMeterTrain.update(eval_left_arm_train.get_results()["All"]) # leftElbowErrorAverageMeterTrain.update(eval_left_elbow_train.get_results()["All"]) # leftHandErrorAverageMeterTrain.update(eval_left_hand_train.get_results()["All"]) # rightArmErrorAverageMeterTrain.update(eval_right_arm_train.get_results()["All"]) # rightElbowErrorAverageMeterTrain.update(eval_right_elbow_train.get_results()["All"]) # rightHandErrorAverageMeterTrain.update(eval_right_hand_train.get_results()["All"]) # leftLegErrorAverageMeterTrain.update(eval_left_leg_train.get_results()["All"]) # leftKneeErrorAverageMeterTrain.update(eval_left_knee_train.get_results()["All"]) # leftFootErrorAverageMeterTrain.update(eval_left_foot_train.get_results()["All"]) # leftToeErrorAverageMeterTrain.update(eval_left_toe_train.get_results()["All"]) # rightLegErrorAverageMeterTrain.update(eval_right_leg_train.get_results()["All"]) # rightKneeErrorAverageMeterTrain.update(eval_right_knee_train.get_results()["All"]) # rightFootErrorAverageMeterTrain.update(eval_right_foot_train.get_results()["All"]) # rightToeErrorAverageMeterTrain.update(eval_right_toe_train.get_results()["All"]) # AverageMeter Update lossAverageMeterTrain.update(loss.data.cpu().numpy()) LOGGER.info( str("Training Loss in Epoch " + str(ep) + " : " + str(lossAverageMeterTrain.avg))) LOGGER.info( str("Training PosePredictionCosineSimilarityPerJointErrorAverageMeter in Epoch " + str(ep) + " : " + str(PosePredictionCosineSimilarityPerJointErrorAverageMeterTrain .avg))) LOGGER.info( str("Training PosePredictionDistancePerJointErrorAverageMeter in Epoch " + str(ep) + " : " + str(PosePredictionDistancePerJointErrorAverageMeterTrain.avg))) LOGGER.info( str("Training PosePredictionMSEErrorAverageMeter in Epoch " + str(ep) + " : " + str(PosePredictionMSEErrorAverageMeterTrain.avg))) # LOGGER.info(str("Training heatmapReconstructionErrorAverageMeter in Epoch " + str(ep) + " : " + str(heatmapReconstructionErrorAverageMeterTrain.avg))) LOGGER.info( str("Training fullBodyErrorAverageMeter in Epoch " + str(ep) + " : " + str(eval_body_train.get_results()["All"]))) LOGGER.info( str("Training upperBodyErrorAverageMeter in Epoch " + str(ep) + " : " + str(eval_body_train.get_results()["UpperBody"]))) LOGGER.info( str("Training lowerBodyErrorAverageMeter in Epoch " + str(ep) + " : " + str(eval_body_train.get_results()["LowerBody"]))) # LOGGER.info(str("Training heatmapPredictionErrorAverageMeter in Epoch " + str(ep) + " : " + str(heatmapPredictionErrorAverageMeterTrain.avg))) # if ep+1 == config_lifting_singleBranch.train_setting.epoch: # Test only in Final Epoch because of Training Time Issue if True: # ----------------------------------------------------------- # ----------------------------------------------------------- # -------------------- Validation Phase --------------------- # ----------------------------------------------------------- # ----------------------------------------------------------- LOGGER.info('Validation...') # ------------------- Data loader ------------------- test_data_transform = transforms.Compose( [trsf.ImageTrsf(), trsf.Joints3DTrsf(), trsf.ToTensor()]) # let's load data from validation set as example test_data = Mocap(config_lifting_singleBranch.dataset.test, SetType.TEST, transform=test_data_transform) test_data_loader = DataLoader( test_data, batch_size=config_lifting_singleBranch.test_data_loader. batch_size, shuffle=config_lifting_singleBranch.test_data_loader.shuffle, num_workers=config_lifting_singleBranch.test_data_loader. workers) # ------------------- Evaluation ------------------- eval_body = evaluate.EvalBody() # eval_upper = evaluate.EvalUpperBody() # eval_lower = evaluate.EvalLowerBody() # eval_neck = evaluate.EvalNeck() # eval_head = evaluate.EvalHead() # eval_left_arm = evaluate.EvalLeftArm() # eval_left_elbow = evaluate.EvalLeftElbow() # eval_left_hand = evaluate.EvalLeftHand() # eval_right_arm = evaluate.EvalRightArm() # eval_right_elbow = evaluate.EvalRightElbow() # eval_right_hand = evaluate.EvalRightHand() # eval_left_leg = evaluate.EvalLeftLeg() # eval_left_knee = evaluate.EvalLeftKnee() # eval_left_foot = evaluate.EvalLeftFoot() # eval_left_toe = evaluate.EvalLeftToe() # eval_right_leg = evaluate.EvalRightLeg() # eval_right_knee = evaluate.EvalRightKnee() # eval_right_foot = evaluate.EvalRightFoot() # eval_right_toe = evaluate.EvalRightToe() # ------------------- Read dataset frames ------------------- # backbone.eval() encoder.eval() decoder.eval() # reconstructer.eval() for it, (img, p2d, p3d, action, heatmap) in tqdm(enumerate(test_data_loader), total=len(test_data_loader)): #################### p2d는 각 Joint별 (x,y) 좌표를 나타낸듯. Image의 좌측상단이 (0,0)이다. #################### p3d는 Neck의 좌표를 (0,0,0)으로 생각했을 때의 각 Joint별 (^x,^y,^z) 좌표를 나타낸듯. #################### Joint 순서는 config_lifting_singleBranch.py에 있다. # LOGGER.info('Iteration: {}'.format(it)) # LOGGER.info('Images: {}'.format(img.shape)) # (Batch, Channel, Height(y), Width(x)) # LOGGER.info('p2dShapes: {}'.format(p2d.shape)) # (Width, Height) # # LOGGER.info('p2ds: {}'.format(p2d)) # LOGGER.info('p3dShapes: {}'.format(p3d.shape)) # (^x, ^y, ^z) # # LOGGER.info('p3ds: {}'.format(p3d)) # LOGGER.info('Actions: {}'.format(action)) # LOGGER.info('heatmapShapes: {}'.format(heatmap.shape)) # ------------------- Evaluate ------------------- # TODO: replace p3d_hat with model preditions # p3d_hat = torch.ones_like(p3d) # Move Tensors to GPUs # img = img.cuda() p3d = p3d.cuda() heatmap = heatmap.cuda() # Forward # predicted_heatmap = backbone(img) latent = encoder(heatmap) predicted_pose = decoder(latent) # reconstructed_heatmap = reconstructer(latent) # Loss Calculation # heatmap_prediction_loss = heatmap_prediction_loss_func(predicted_heatmap, heatmap) p3d_for_loss = torch.cat( (p3d[:, 4:6, :], p3d[:, 7:10, :], p3d[:, 11:, :]), dim=1) # 13까지가 Upper Body p3d_for_loss = torch.reshape(p3d_for_loss, (-1, 48)) pose_prediction_cosine_similarity_loss = pose_prediction_cosine_similarity_loss_func( predicted_pose, p3d_for_loss) pose_prediction_l1_loss = pose_prediction_l1_loss_func( predicted_pose, p3d_for_loss) pose_prediction_l2_loss = pose_prediction_l2_loss_func( predicted_pose, p3d_for_loss) pose_prediction_loss = pose_prediction_l2_loss - 0.01 * pose_prediction_cosine_similarity_loss + 0.5 * pose_prediction_l1_loss # heatmap_reconstruction_loss = heatmap_reconstruction_loss_func(reconstructed_heatmap, heatmap) # Backpropagating Loss with Weighting Factors # backbone_loss = heatmap_prediction_loss lifting_loss = 0.1 * pose_prediction_loss # + 0.001*heatmap_reconstruction_loss # loss = backbone_loss + lifting_loss loss = lifting_loss # print(0.1*(-0.01)*pose_prediction_cosine_similarity_loss) # print(0.1*0.5*pose_prediction_l1_loss) # print(0.1*pose_prediction_l2_loss) # print(0.001*heatmap_reconstruction_loss) # Evaluate results using different evaluation metrices predicted_pose = torch.reshape(predicted_pose, (-1, 16, 3)) y_output = predicted_pose.data.cpu().numpy() p3d_for_loss = torch.cat( (p3d[:, 4:6, :], p3d[:, 7:10, :], p3d[:, 11:, :]), dim=1) # 13까지가 Upper Body p3d_for_loss = torch.reshape(p3d_for_loss, (-1, 16, 3)) y_target = p3d_for_loss.data.cpu().numpy() eval_body.eval(y_output, y_target, action) # eval_upper.eval(y_output, y_target, action) # eval_lower.eval(y_output, y_target, action) # eval_neck.eval(y_output, y_target, action) # eval_head.eval(y_output, y_target, action) # eval_left_arm.eval(y_output, y_target, action) # eval_left_elbow.eval(y_output, y_target, action) # eval_left_hand.eval(y_output, y_target, action) # eval_right_arm.eval(y_output, y_target, action) # eval_right_elbow.eval(y_output, y_target, action) # eval_right_hand.eval(y_output, y_target, action) # eval_left_leg.eval(y_output, y_target, action) # eval_left_knee.eval(y_output, y_target, action) # eval_left_foot.eval(y_output, y_target, action) # eval_left_toe.eval(y_output, y_target, action) # eval_right_leg.eval(y_output, y_target, action) # eval_right_knee.eval(y_output, y_target, action) # eval_right_foot.eval(y_output, y_target, action) # eval_right_toe.eval(y_output, y_target, action) # heatmap_reconstruction_loss = heatmap_reconstruction_loss_func(reconstructed_heatmap, heatmap) # AverageMeter Update # fullBodyErrorAverageMeter.update(eval_body.get_results()["All"]) # upperBodyErrorAverageMeter.update(eval_upper.get_results()["All"]) # lowerBodyErrorAverageMeter.update(eval_lower.get_results()["All"]) # heatmapPredictionErrorAverageMeter.update(heatmap_prediction_loss.data.cpu().numpy()) PosePredictionCosineSimilarityPerJointErrorAverageMeter.update( -0.001 * pose_prediction_cosine_similarity_loss.data.cpu().numpy()) PosePredictionDistancePerJointErrorAverageMeter.update( 0.05 * pose_prediction_l1_loss.data.cpu().numpy()) PosePredictionMSEErrorAverageMeter.update( 0.1 * pose_prediction_l2_loss.data.cpu().numpy()) # heatmapReconstructionErrorAverageMeter.update(0.001 * heatmap_reconstruction_loss.data.cpu().numpy()) # neckErrorAverageMeter.update(eval_neck.get_results()["All"]) # headErrorAverageMeter.update(eval_head.get_results()["All"]) # leftArmErrorAverageMeter.update(eval_left_arm.get_results()["All"]) # leftElbowErrorAverageMeter.update(eval_left_elbow.get_results()["All"]) # leftHandErrorAverageMeter.update(eval_left_hand.get_results()["All"]) # rightArmErrorAverageMeter.update(eval_right_arm.get_results()["All"]) # rightElbowErrorAverageMeter.update(eval_right_elbow.get_results()["All"]) # rightHandErrorAverageMeter.update(eval_right_hand.get_results()["All"]) # leftLegErrorAverageMeter.update(eval_left_leg.get_results()["All"]) # leftKneeErrorAverageMeter.update(eval_left_knee.get_results()["All"]) # leftFootErrorAverageMeter.update(eval_left_foot.get_results()["All"]) # leftToeErrorAverageMeter.update(eval_left_toe.get_results()["All"]) # rightLegErrorAverageMeter.update(eval_right_leg.get_results()["All"]) # rightKneeErrorAverageMeter.update(eval_right_knee.get_results()["All"]) # rightFootErrorAverageMeter.update(eval_right_foot.get_results()["All"]) # rightToeErrorAverageMeter.update(eval_right_toe.get_results()["All"]) # AverageMeter Update lossAverageMeter.update(loss.data.cpu().numpy()) LOGGER.info( str("Validation Loss in Epoch " + str(ep) + " : " + str(lossAverageMeter.avg))) LOGGER.info( str("Validation PosePredictionCosineSimilarityPerJointErrorAverageMeter in Epoch " + str(ep) + " : " + str(PosePredictionCosineSimilarityPerJointErrorAverageMeter .avg))) LOGGER.info( str("Validation PosePredictionDistancePerJointErrorAverageMeter in Epoch " + str(ep) + " : " + str(PosePredictionDistancePerJointErrorAverageMeter.avg))) LOGGER.info( str("Validation PosePredictionMSEErrorAverageMeter in Epoch " + str(ep) + " : " + str(PosePredictionMSEErrorAverageMeter.avg))) # LOGGER.info(str("Validation heatmapReconstructionErrorAverageMeter in Epoch " + str(ep) + " : " + str(heatmapReconstructionErrorAverageMeter.avg))) LOGGER.info( str("Validation fullBodyErrorAverageMeter in Epoch " + str(ep) + " : " + str(eval_body.get_results()["All"]))) LOGGER.info( str("Validation upperBodyErrorAverageMeter in Epoch " + str(ep) + " : " + str(eval_body.get_results()["UpperBody"]))) LOGGER.info( str("Validation lowerBodyErrorAverageMeter in Epoch " + str(ep) + " : " + str(eval_body.get_results()["LowerBody"]))) # LOGGER.info(str("Validation heatmapPredictionErrorAverageMeter in Epoch " + str(ep) + " : " + str(heatmapPredictionErrorAverageMeter.avg))) # ----------------------------------------------------------- # ----------------------------------------------------------- # ----------------------- Save Phase ------------------------ # ----------------------------------------------------------- # ----------------------------------------------------------- LOGGER.info('Save...') # mkdir for this experiment if not os.path.exists( os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder)): os.mkdir( os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder)) # mkdir for this epoch if not os.path.exists( os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + str(ep)))): os.mkdir( os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + str(ep)))) # Variable for Final Model Selection # if errorAverageMeter.avg <= errorMin: # errorMin = ErrorAverageMeter.avg # errorMinIsUpdatedInThisEpoch = True # ------------------- Save results ------------------- LOGGER.info('Saving evaluation results...') # Training Result Saving res_train = { 'Loss': lossAverageMeterTrain.avg, # 'HeatmapPrediction': heatmapPredictionErrorAverageMeterTrain.avg, 'PosePredictionCosineSimilarityPerJoint': PosePredictionCosineSimilarityPerJointErrorAverageMeterTrain. avg, 'PosePredictionDistancePerJoint': PosePredictionDistancePerJointErrorAverageMeterTrain.avg, 'PosePredictionMSE': PosePredictionMSEErrorAverageMeterTrain.avg, # 'HeatmapReconstruction': heatmapReconstructionErrorAverageMeterTrain.avg, 'FullBody': eval_body_train.get_results()["All"], 'UpperBody': eval_body_train.get_results()["UpperBody"], 'LowerBody': eval_body_train.get_results()["LowerBody"], 'Neck': eval_body_train.get_results()["Neck"], 'Head': eval_body_train.get_results()["Head"], 'LeftArm': eval_body_train.get_results()["LeftArm"], 'LeftElbow': eval_body_train.get_results()["LeftElbow"], 'LeftHand': eval_body_train.get_results()["LeftHand"], 'RightArm': eval_body_train.get_results()["RightArm"], 'RightElbow': eval_body_train.get_results()["RightElbow"], 'RightHand': eval_body_train.get_results()["RightHand"], 'LeftLeg': eval_body_train.get_results()["LeftLeg"], 'LeftKnee': eval_body_train.get_results()["LeftKnee"], 'LeftFoot': eval_body_train.get_results()["LeftFoot"], 'LeftToe': eval_body_train.get_results()["LeftToe"], 'RightLeg': eval_body_train.get_results()["RightLeg"], 'RightKnee': eval_body_train.get_results()["RightKnee"], 'RightFoot': eval_body_train.get_results()["RightFoot"], 'RightToe': eval_body_train.get_results()["RightToe"] } io.write_json( os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + str(ep)), config_lifting_singleBranch.eval.training_result_file), res_train) # Evaluation Result Saving res = { 'Loss': lossAverageMeter.avg, # 'HeatmapPrediction': heatmapPredictionErrorAverageMeter.avg, 'PosePredictionCosineSimilarityPerJoint': PosePredictionCosineSimilarityPerJointErrorAverageMeter.avg, 'PosePredictionDistancePerJoint': PosePredictionDistancePerJointErrorAverageMeter.avg, 'PosePredictionMSE': PosePredictionMSEErrorAverageMeter.avg, # 'HeatmapReconstruction': heatmapReconstructionErrorAverageMeter.avg, 'FullBody': eval_body.get_results()["All"], 'UpperBody': eval_body.get_results()["UpperBody"], 'LowerBody': eval_body.get_results()["LowerBody"], 'Neck': eval_body.get_results()["Neck"], 'Head': eval_body.get_results()["Head"], 'LeftArm': eval_body.get_results()["LeftArm"], 'LeftElbow': eval_body.get_results()["LeftElbow"], 'LeftHand': eval_body.get_results()["LeftHand"], 'RightArm': eval_body.get_results()["RightArm"], 'RightElbow': eval_body.get_results()["RightElbow"], 'RightHand': eval_body.get_results()["RightHand"], 'LeftLeg': eval_body.get_results()["LeftLeg"], 'LeftKnee': eval_body.get_results()["LeftKnee"], 'LeftFoot': eval_body.get_results()["LeftFoot"], 'LeftToe': eval_body.get_results()["LeftToe"], 'RightLeg': eval_body.get_results()["RightLeg"], 'RightKnee': eval_body.get_results()["RightKnee"], 'RightFoot': eval_body.get_results()["RightFoot"], 'RightToe': eval_body.get_results()["RightToe"] } io.write_json( os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + str(ep)), config_lifting_singleBranch.eval.evaluation_result_file), res) # Experiement config_lifting_singleBranchuration Saving copyfile( "data/config_lifting_singleBranch.yml", os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + str(ep)), config_lifting_singleBranch.eval. experiment_configuration_file)) # Model Weights Saving # torch.save(backbone, os.path.join(os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + ep), config_lifting_singleBranch.eval.backbone_weight_file)) torch.save( encoder.state_dict(), os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + str(ep)), config_lifting_singleBranch.eval.encoder_weight_file)) torch.save( decoder.state_dict(), os.path.join( os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + str(ep)), config_lifting_singleBranch.eval.decoder_weight_file)) # torch.save(reconstructer.state_dict(), os.path.join(os.getcwd(), config_lifting_singleBranch.eval.experiment_folder, str("epoch_" + str(ep)), config_lifting_singleBranch.eval.reconstructer_weight_file)) # Variable for Final Model Selection # errorMinIsUpdatedInThisEpoch = False scheduler.step() LOGGER.info('Done.')
def main(): """Main""" LOGGER.info('Starting demo...') # ----------------------------------------------------------- # ----------------------------------------------------------- # --------------------- Training Phase ---------------------- # ----------------------------------------------------------- # ----------------------------------------------------------- LOGGER.info('Training Backbone...') # ------------------- Data loader ------------------- train_data_transform = transforms.Compose( [trsf.ImageTrsf(), trsf.Joints3DTrsf(), trsf.ToTensor()]) # let's load data from validation set as example train_data = Mocap(config_backbone.dataset.train, SetType.TRAIN, transform=train_data_transform) train_data_loader = DataLoader( train_data, batch_size=config_backbone.train_data_loader.batch_size, shuffle=config_backbone.train_data_loader.shuffle, num_workers=config_backbone.train_data_loader.workers) # ------------------- Build Model ------------------- if config_backbone.train_setting.backbone_type == "resnet18": backbone = resnet18() LOGGER.info('Using ResNet18 Backbone!') elif config_backbone.train_setting.backbone_type == "resnet34": backbone = resnet34() LOGGER.info('Using ResNet34 Backbone!') elif config_backbone.train_setting.backbone_type == "resnet50": backbone = resnet50() LOGGER.info('Using ResNet50 Backbone!') elif config_backbone.train_setting.backbone_type == "resnet101": backbone = resnet101() LOGGER.info('Using ResNet101 Backbone!') elif config_backbone.train_setting.backbone_type == "resnet152": backbone = resnet152() LOGGER.info('Using ResNet152 Backbone!') # encoder = HeatmapEncoder() # decoder = PoseDecoder() # reconstructer = HeatmapReconstructer() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: LOGGER.info( str("Let's use " + str(torch.cuda.device_count()) + " GPUs!")) # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs backbone = nn.DataParallel(backbone) # encoder = nn.DataParallel(encoder) # decoder = nn.DataParallel(decoder) # reconstructer = nn.DataParallel(reconstructer) backbone = backbone.cuda() # encoder = encoder.cuda() # decoder = decoder.cuda() # reconstructer = reconstructer.cuda() # Load or Init Model Weights if config_backbone.train_setting.backbone_path: backbone.load_state_dict( torch.load(config_backbone.train_setting.backbone_path)) # backbone = torch.load(config_backbone.train_setting.backbone_path) LOGGER.info('Backbone Weight Loaded!') else: backbone.apply(init_weights) LOGGER.info('Backbone Weight Initialized!') # if config_backbone.train_setting.encoder_path: # encoder.load_state_dict(torch.load(config_backbone.train_setting.encoder_path)) # else: # encoder.apply(init_weights) # if config_backbone.train_setting.decoder_path: # decoder.load_state_dict(torch.load(config_backbone.train_setting.decoder_path)) # else: # decoder.apply(init_weights) # if config_backbone.train_setting.reconstructer_path: # reconstructer.load_state_dict(torch.load(config_backbone.train_setting.reconstructer_path)) # else: # reconstructer.apply(init_weights) # ------------------- Build Loss & Optimizer ------------------- # Build Loss heatmap_prediction_loss_func = nn.MSELoss(reduction='mean') # pose_prediction_cosine_similarity_loss_func = nn.CosineSimilarity() # pose_prediction_l1_loss_func = nn.L1Loss() # heatmap_reconstruction_loss_func = nn.MSELoss() # Build Optimizer optimizer = optim.Adam( [ { "params": backbone.parameters() }, # {"params": encoder.parameters()}, # {"params": decoder.parameters()}, # {"params": reconstructer.parameters()} ], lr=0.001) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) # Variable for Final Model Selection # errorMin = 100 # errorMinIsUpdatedInThisEpoch = False # ------------------- Read dataset frames ------------------- for ep in range(config_backbone.train_setting.epoch): backbone.train() # encoder.train() # decoder.train() # reconstructer.train() # Averagemeter for Epoch lossAverageMeter = AverageMeter() # fullBodyErrorAverageMeter = AverageMeter() # upperBodyErrorAverageMeter = AverageMeter() # lowerBodyErrorAverageMeter = AverageMeter() heatmapPredictionErrorAverageMeter = AverageMeter() # heatmapReconstructionErrorAverageMeter() = AverageMeter() for it, (img, p2d, p3d, action, heatmap) in tqdm(enumerate(train_data_loader), total=len(train_data_loader)): #################### p2d는 각 Joint별 (x,y) 좌표를 나타낸듯. Image의 좌측상단이 (0,0)이다. #################### p3d는 Neck의 좌표를 (0,0,0)으로 생각했을 때의 각 Joint별 (^x,^y,^z) 좌표를 나타낸듯. #################### Joint 순서는 config_backbone.py에 있다. # LOGGER.info('Iteration: {}'.format(it)) # LOGGER.info('Images: {}'.format(img.shape)) # (Batch, Channel, Height(y), Width(x)) # LOGGER.info('p2dShapes: {}'.format(p2d.shape)) # (Width, Height) # # LOGGER.info('p2ds: {}'.format(p2d)) # LOGGER.info('p3dShapes: {}'.format(p3d.shape)) # (^x, ^y, ^z) # # LOGGER.info('p3ds: {}'.format(p3d)) # LOGGER.info('Actions: {}'.format(action)) # LOGGER.info('heatmapShapes: {}'.format(heatmap.shape)) # ----------------------------------------------------------- # ------------------- Run your model here ------------------- # ----------------------------------------------------------- optimizer.zero_grad() # Move Tensors to GPUs img = img.cuda() # p3d = p3d.cuda() heatmap = heatmap.cuda() # Forward predicted_heatmap = backbone(img) # latent = encoder(predicted_heatmap) # predicted_pose = decoder(latent) # reconstructed_heatmap = reconstructer(latent) # Loss Calculation heatmap_prediction_loss = heatmap_prediction_loss_func( predicted_heatmap, heatmap) # p3d_for_loss = torch.cat((p3d[:, 4:6, :], p3d[:, 7:10, :], p3d[:, 11:, :]), dim=1) # 13까지가 Upper Body # p3d_for_loss = torch.reshape(p3d_for_loss, (-1, 48)) # pose_prediction_cosine_similarity_loss = pose_prediction_cosine_similarity_loss_func(predicted_pose, p3d_for_loss) # pose_prediction_cosine_similarity_loss = torch.mean(pose_prediction_cosine_similarity_loss) # pose_prediction_l1_loss = pose_prediction_l1_loss_func(predicted_pose, p3d_for_loss) # pose_prediction_loss = -0.01*pose_prediction_cosine_similarity_loss + 0.5*pose_prediction_l1_loss # heatmap_reconstruction_loss = heatmap_reconstruction_loss_func(reconstructed_heatmap, heatmap) # Backpropagating Loss with Weighting Factors backbone_loss = heatmap_prediction_loss # lifting_loss = 0.1*pose_prediction_loss + 0.001*heatmap_reconstruction_loss loss = backbone_loss # Backward & Update loss.backward() optimizer.step() # AverageMeter Update lossAverageMeter.update(loss.data.cpu().numpy()) LOGGER.info( str("Training Loss in Epoch " + str(ep) + " : " + str(lossAverageMeter.avg))) # if ep+1 == config_backbone.train_setting.epoch: # Test only in Final Epoch because of Training Time Issue if True: # ----------------------------------------------------------- # ----------------------------------------------------------- # -------------------- Validation Phase --------------------- # ----------------------------------------------------------- # ----------------------------------------------------------- LOGGER.info('Validation...') # ------------------- Data loader ------------------- test_data_transform = transforms.Compose( [trsf.ImageTrsf(), trsf.Joints3DTrsf(), trsf.ToTensor()]) # let's load data from validation set as example test_data = Mocap(config_backbone.dataset.test, SetType.TEST, transform=test_data_transform) test_data_loader = DataLoader( test_data, batch_size=config_backbone.test_data_loader.batch_size, shuffle=config_backbone.test_data_loader.shuffle, num_workers=config_backbone.test_data_loader.workers) # ------------------- Evaluation ------------------- # eval_body = evaluate.EvalBody() # eval_upper = evaluate.EvalUpperBody() # eval_lower = evaluate.EvalUpperBody() # ------------------- Read dataset frames ------------------- backbone.eval() # encoder.eval() # decoder.eval() # reconstructer.eval() for it, (img, p2d, p3d, action, heatmap) in tqdm(enumerate(test_data_loader), total=len(test_data_loader)): #################### p2d는 각 Joint별 (x,y) 좌표를 나타낸듯. Image의 좌측상단이 (0,0)이다. #################### p3d는 Neck의 좌표를 (0,0,0)으로 생각했을 때의 각 Joint별 (^x,^y,^z) 좌표를 나타낸듯. #################### Joint 순서는 config_backbone.py에 있다. # LOGGER.info('Iteration: {}'.format(it)) # LOGGER.info('Images: {}'.format(img.shape)) # (Batch, Channel, Height(y), Width(x)) # LOGGER.info('p2dShapes: {}'.format(p2d.shape)) # (Width, Height) # # LOGGER.info('p2ds: {}'.format(p2d)) # LOGGER.info('p3dShapes: {}'.format(p3d.shape)) # (^x, ^y, ^z) # # LOGGER.info('p3ds: {}'.format(p3d)) # LOGGER.info('Actions: {}'.format(action)) # LOGGER.info('heatmapShapes: {}'.format(heatmap.shape)) # ------------------- Evaluate ------------------- # TODO: replace p3d_hat with model preditions # p3d_hat = torch.ones_like(p3d) # Move Tensors to GPUs img = img.cuda() # p3d = p3d.cuda() heatmap = heatmap.cuda() # Forward predicted_heatmap = backbone(img) # latent = encoder(predicted_heatmap) # predicted_pose = decoder(latent) # Evaluate results using different evaluation metrices heatmap_prediction_loss = heatmap_prediction_loss_func( predicted_heatmap, heatmap) # predicted_pose = torch.reshape(predicted_pose, (-1, 16, 3)) # y_output = predicted_pose.data.cpu().numpy() # p3d_for_loss = torch.cat((p3d[:, 4:6, :], p3d[:, 7:10, :], p3d[:, 11:, :]), dim=1) # 13까지가 Upper Body # p3d_for_loss = torch.reshape(p3d_for_loss, (-1, 16, 3)) # y_target = p3d_for_loss.data.cpu().numpy() # eval_body.eval(y_output, y_target, action) # eval_upper.eval(y_output, y_target, action) # eval_lower.eval(y_output, y_target, action) # AverageMeter Update heatmapPredictionErrorAverageMeter.update( heatmap_prediction_loss.data.cpu().numpy()) LOGGER.info( str("Validation heatmapPredictionErrorAverageMeter in Epoch " + str(ep) + " : " + str(heatmapPredictionErrorAverageMeter.avg))) # ----------------------------------------------------------- # ----------------------------------------------------------- # ----------------------- Save Phase ------------------------ # ----------------------------------------------------------- # ----------------------------------------------------------- LOGGER.info('Save...') # mkdir for this experiment if not os.path.exists( os.path.join(os.getcwd(), config_backbone.eval.experiment_folder)): os.mkdir( os.path.join(os.getcwd(), config_backbone.eval.experiment_folder)) # mkdir for this epoch if not os.path.exists( os.path.join(os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + str(ep)))): os.mkdir( os.path.join(os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + str(ep)))) # Variable for Final Model Selection # if errorAverageMeter.avg <= errorMin: # errorMin = ErrorAverageMeter.avg # errorMinIsUpdatedInThisEpoch = True # ------------------- Save results ------------------- LOGGER.info('Saving evaluation results...') # Evaluation Result Saving res_train = {'HeatmapPrediction': lossAverageMeter.avg} # res = {'FullBody': eval_body.get_results(), # 'UpperBody': eval_upper.get_results(), # 'LowerBody': eval_lower.get_results()} io.write_json( os.path.join(os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + str(ep)), config_backbone.eval.training_result_file), res_train) res = {'HeatmapPrediction': heatmapPredictionErrorAverageMeter.avg} # res = {'FullBody': eval_body.get_results(), # 'UpperBody': eval_upper.get_results(), # 'LowerBody': eval_lower.get_results()} io.write_json( os.path.join(os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + str(ep)), config_backbone.eval.evaluation_result_file), res) # Experiement config_backboneuration Saving copyfile( "data/config_backbone.yml", os.path.join( os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + str(ep)), config_backbone.eval.experiment_configuration_file)) # Model Weights Saving torch.save( backbone.state_dict(), os.path.join(os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + str(ep)), config_backbone.eval.backbone_weight_file)) # torch.save(encoder, os.path.join(os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + ep), config_backbone.eval.encoder_weight_file)) # torch.save(decoder, os.path.join(os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + ep), config_backbone.eval.decoder_weight_file)) # torch.save(reconstructer, os.path.join(os.getcwd(), config_backbone.eval.experiment_folder, str("epoch_" + ep), config_backbone.eval.reconstructer_weight_file)) # Variable for Final Model Selection # errorMinIsUpdatedInThisEpoch = False scheduler.step() LOGGER.info('Done.')
def main(): """Main""" LOGGER.info('Starting demo...') # ------------------- Data loader ------------------- data_transform = transforms.Compose( [trsf.ImageTrsf(), trsf.Joints3DTrsf(), trsf.ToTensor()]) # let's load data from validation set as example data = Mocap(config.dataset.val, SetType.VAL, transform=data_transform) data_loader = DataLoader(data, batch_size=config.data_loader.batch_size, shuffle=config.data_loader.shuffle) # ------------------- Evaluation ------------------- eval_body = evaluate.EvalBody() eval_upper = evaluate.EvalUpperBody() eval_lower = evaluate.EvalUpperBody() # ------------------- Read dataset frames ------------------- for it, (img, p2d, p3d, action) in enumerate(data_loader): LOGGER.info('Iteration: {}'.format(it)) LOGGER.info('Images: {}'.format(img.shape)) LOGGER.info('p2ds: {}'.format(p2d.shape)) LOGGER.info('p3ds: {}'.format(p3d.shape)) LOGGER.info('Actions: {}'.format(action)) # ----------------------------------------------------------- # ------------------- Run your model here ------------------- # ----------------------------------------------------------- # TODO: replace p3d_hat with model preditions p3d_hat = torch.ones_like(p3d) # Evaluate results using different evaluation metrices y_output = p3d_hat.data.cpu().numpy() y_target = p3d.data.cpu().numpy() eval_body.eval(y_output, y_target, action) eval_upper.eval(y_output, y_target, action) eval_lower.eval(y_output, y_target, action) # TODO: remove break break # ------------------- Save results ------------------- LOGGER.info('Saving evaluation results...') res = { 'FullBody': eval_body.get_results(), 'UpperBody': eval_upper.get_results(), 'LowerBody': eval_lower.get_results() } io.write_json(config.eval.output_file, res) LOGGER.info('Done.')
def process_data(config): """ Import and preprocess raw datasets. Then export processed datasets, vocabularies, word counter, punctuation counter for downstream jobs or exploratory data analysis. Args: config: a dictionary contains parameters for datasets. Returns: None. """ train_file = os.path.join(config["raw_path"], "train.txt") dev_file = os.path.join(config["raw_path"], "dev.txt") ref_file = os.path.join(config["raw_path"], "ref.txt") asr_file = os.path.join(config["raw_path"], "asr.txt") if not os.path.exists(config["save_path"]): os.makedirs(config["save_path"]) # build vocabulary train_word_vocab = build_vocab_list([train_file], config["min_word_count"], config["max_vocab_size"]) train_word_counter = get_word_counter(train_word_vocab) train_word_vocab = list(train_word_counter.keys()) dev_word_vocab = build_vocab_list([dev_file], config["min_word_count"], config["max_vocab_size"]) dev_word_counter = get_word_counter(dev_word_vocab) ref_word_vocab = build_vocab_list([ref_file], config["min_word_count"], config["max_vocab_size"]) ref_word_counter = get_word_counter(ref_word_vocab) asr_word_vocab = build_vocab_list([asr_file], config["min_word_count"], config["max_vocab_size"]) asr_word_counter = get_word_counter(asr_word_vocab) if not config["use_pretrained"]: word_dict = build_vocabulary(train_word_vocab) else: glove_path = config["glove_path"].format(config["glove_name"], config["emb_dim"]) glove_vocab = load_glove_vocab(glove_path, config["glove_name"]) glove_vocab = glove_vocab & {word.lower() for word in glove_vocab} filtered_train_word_vocab = [ word for word in train_word_vocab if word in glove_vocab ] word_dict = build_vocabulary(filtered_train_word_vocab) tmp_word_dict = word_dict.copy() del tmp_word_dict[UNK], tmp_word_dict[NUM], tmp_word_dict[END] vectors = filter_glove_emb(tmp_word_dict, glove_path, config["glove_name"], config["emb_dim"]) np.savez_compressed(config["pretrained_emb"], embeddings=vectors) # create indices dataset punct_dict = dict([(punct, idx) for idx, punct in enumerate(PUNCTUATION_VOCABULARY)]) train_set, train_punct_counter = build_dataset([train_file], word_dict, punct_dict, config["max_sequence_len"]) dev_set, dev_punct_counter = build_dataset([dev_file], word_dict, punct_dict, config["max_sequence_len"]) ref_set, ref_punct_counter = build_dataset([ref_file], word_dict, punct_dict, config["max_sequence_len"]) asr_set, asr_punct_counter = build_dataset([asr_file], word_dict, punct_dict, config["max_sequence_len"]) vocab = {"word_dict": word_dict, "tag_dict": punct_dict} # write to file write_json(config["vocab"], vocab) write_json(config["train_word_counter"], train_word_counter) write_json(config["dev_word_counter"], dev_word_counter) write_json(config["ref_word_counter"], ref_word_counter) write_json(config["asr_word_counter"], asr_word_counter) write_json(config["train_punct_counter"], train_punct_counter) write_json(config["dev_punct_counter"], dev_punct_counter) write_json(config["ref_punct_counter"], ref_punct_counter) write_json(config["asr_punct_counter"], asr_punct_counter) write_json(config["train_set"], train_set) write_json(config["dev_set"], dev_set) write_json(config["ref_set"], ref_set) write_json(config["asr_set"], asr_set)
print("Initialized Network ... ") train_GPU = True device = torch.device("cuda" if (torch.cuda.is_available() and train_GPU) else "cpu") print(device) net.to(device) print("Loaded Network to GPU ... ") # load already trained model net.load_state_dict(checkpoint['model_state_dict']) print("Loaded existing model check point ...") start_t = time.time() # evaluate gt, pred = eval(net, testloader, device) # compute the performance metrics result = metrics.classification_report(gt, pred, digits=3) print(result) # save the loss and accuracy of train and validation into numpy array npz file. io.write_json(result_file, result) end_t = time.time() print("Time for training {:.03f} hrs.".format((end_t - start_t)/3600)) print('Finished Testing')