def build_dataset(self, mode): """Builds train/val dataset.""" if not hasattr(self.config, 'data'): return assert isinstance(mode, str) mode = mode.lower() self.logger.info(f'Building `{mode}` dataset ...') if mode not in ['train', 'val']: raise ValueError(f'Invalid dataset mode `{mode}`!') dataset = BaseDataset(**self.config.data[mode]) if mode == 'train': self.train_loader = IterDataLoader( dataset=dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.config.data.get('num_workers', 2), current_iter=self.iter, repeat=self.config.data.get('repeat', 1)) elif mode == 'val': self.val_loader = IterDataLoader(dataset=dataset, batch_size=self.val_batch_size, shuffle=False, num_workers=self.config.data.get( 'num_workers', 2), current_iter=0, repeat=1) else: raise NotImplementedError( f'Not implemented dataset mode `{mode}`!') self.logger.info(f'Finish building `{mode}` dataset.')
def cmapss_hyperopt(fd, window_size, batch_size=1, shuffle=True, percent_fail_runs=None, percent_broken=None, normalization='minmax'): """Build a dev and eval set from the training data to do hyperopt on.""" train_data = CMAPSSDataset(fd, 'train', window_size=window_size, normalization=normalization, percent_fail_runs=percent_fail_runs, percent_broken=percent_broken) split_idx = np.argwhere(train_data.targets == 1) num_train = int(0.8 * len(split_idx)) dev_data = torch.utils.data.Subset(train_data, np.arange(split_idx[num_train])) eval_data = torch.utils.data.Subset( train_data, np.arange(split_idx[num_train], split_idx[-1] + 1)) return BaseDataset(dev_data, eval_data, batch_size, shuffle, dvc_file=os.path.join(DATA_ROOT, 'CMAPSS.dvc'))
def cmapss(fd, window_size, batch_size=1, shuffle=True, percent_fail_runs=None, percent_broken=None, normalization='minmax'): """CMAPSS construction function to get a BaseDataset.""" train_data = CMAPSSDataset(fd, 'train', window_size=window_size, normalization=normalization, percent_fail_runs=percent_fail_runs, percent_broken=percent_broken) test_data = CMAPSSDataset(fd, 'test', window_size=window_size, normalization=normalization, percent_fail_runs=percent_fail_runs, percent_broken=percent_broken) return BaseDataset(train_data, test_data, batch_size, shuffle, dvc_file=os.path.join(DATA_ROOT, 'CMAPSS.dvc'))
def no_adaption_dataset(source_dataset, target_dataset, **kwargs): """Build a dataset with training data from source and eval data from target dataset.""" if isinstance(source_dataset, dict) and isinstance(target_dataset, dict): source_dataset, target_dataset = _build_datasets( source_dataset, target_dataset, kwargs) return BaseDataset(source_dataset.train_data, target_dataset.eval_data, dvc_file=(source_dataset.dvc_file, target_dataset.dvc_file), **kwargs)
def sample_batch() -> torch.Tensor: resolution = 64 val = dict(root_dir='data/demo.zip', data_format='zip', resolution=resolution) dataset = BaseDataset(**val) val_loader = LocalIterDataloader( dataset=dataset, batch_size=args.batch_size, shuffle=False, current_iter=0, repeat=1) return next(val_loader)['image'].cuda()
def office_dslr_dataset(batch_size, shuffle): """Create a dataset from the dslr part of Office-31.""" trans = transforms.Compose([ transforms.CenterCrop(244), transforms.ToTensor(), transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD) ]) data_path = os.path.join(DATA_ROOT, 'Office-31', 'dslr', 'images') data = torchvision.datasets.ImageFolder(data_path, transform=trans) return BaseDataset(data, data, batch_size, shuffle, dvc_file=os.path.join(DATA_ROOT, 'Office-31.dvc'))
def main(args): transform = getTransforms() data_path = os.path.join('data', args.data) if not os.path.exists(data_path): print('ERROR: No dataset named {}'.format(args.data)) exit(1) testset = BaseDataset(list_path=os.path.join(data_path, 'val.lst'), transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=True, num_workers=1) class_list = getClassList(data_path) model = ResNet(num_layers=18, num_classes=len(class_list)).to(DEVICE) model.eval() output_dir = os.path.join('outputs', args.data) model_state_file = os.path.join(output_dir, 'checkpoint.pth.tar') model_file = args.model_file if len(model_file) == 0: model_file = model_state_file if os.path.exists(model_file): checkpoint = torch.load(model_file) if 'state_dict' in checkpoint.keys(): model.load_state_dict(checkpoint['state_dict'], strict=False) else: model.load_state_dict(checkpoint, strict=False) print('=> loaded {}'.format(model_file)) else: print('model_file "{}" does not exists.'.format(model_file)) exit(1) accuracy = test(model=model, dataloader=testloader, device=DEVICE, classes=class_list) print('Accuracy: {:.2f}%'.format(100 * accuracy))
print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() if __name__ == '__main__': args = parser.parse_args() with open(args.config, 'r') as f: options = json.load(f) options = namedtuple('options', options.keys())(**options) # Load model mesh = Mesh() model = CMR(mesh, options.num_layers, options.num_channels, pretrained_checkpoint=args.checkpoint) # Setup evaluation dataset dataset = BaseDataset(options, args.dataset, is_train=False) # Run evaluation run_evaluation(model, args.dataset, dataset, mesh, batch_size=args.batch_size, shuffle=args.shuffle, log_freq=args.log_freq)
# params +=['--num_workers',0] args = parser.parse_args(params) args.batch_size =128 args.num_workers =0 model = hmr(config.SMPL_MEAN_PARAMS) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model'], strict=False) model.cuda() model.eval() # Setup evaluation dataset # dataset = BaseDataset(None, '3dpw', is_train=False, bMiniTest=False) dataset = BaseDataset(None, '3dpw', is_train=False, bMiniTest=False, bEnforceUpperOnly=False) # Run evaluation # result_file_name = '/run/media/hjoo/disk/data/cocoPose3D_amt/0_SPIN/result_3dpw_urs_11_04_59961_4030.pkl' result_file_name = '/run/media/hjoo/disk/data/cocoPose3D_amt/0_SPIN/spin_11-06-42861-upper0_2_ours_lc3d_all-8935.pkl' run_evaluation(model, '3dpw',dataset , result_file_name, batch_size=args.batch_size, shuffle=args.shuffle, log_freq=args.log_freq, num_workers=args.num_workers) # # Setup evaluation dataset # dataset = BaseDataset(None, 'h36m-p1', is_train=False, bMiniTest=False) # # Run evaluation # args.result_file = '/run/media/hjoo/disk/data/cocoPose3D_amt/0_SPIN/result_h36m_spin' # run_evaluation(model, 'h36m-p1', dataset, args.result_file,
# Print final results during evaluation print('*** Final Results ***') print() if eval_pose: print('MPJPE: ' + str(1000 * mpjpe.mean())) print('Reconstruction Error: ' + str(1000 * recon_err.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() if __name__ == '__main__': args = parser.parse_args() model = hmr(config.SMPL_MEAN_PARAMS) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model'], strict=False) model.eval() # Setup evaluation dataset dataset = BaseDataset(None, args.dataset, is_train=False) # Run evaluation run_evaluation(model, args.dataset, dataset, args.result_file, batch_size=args.batch_size, shuffle=args.shuffle, log_freq=args.log_freq)
args = docopt(__doc__) dataset_file = args['--dataset'] VERBOSE = True ENC_EMB_DIM = 256 #768 DEC_EMB_DIM = 256 ENC_HID_DIM = 512 DEC_HID_DIM = 512 DROPOUT = 0 FORCING_RATIO = 1 BATCH_SIZE = 8 #dataset = BaseDataset([dataset_file]) #dataloader = SequenceLoader(dataset, BATCH_SIZE, 'train') dataset = BaseDataset([dataset_file]) dataset.convert() dataloader = SequenceLoader(dataset, BATCH_SIZE, 'train') #embedding_model = ContextualEmbeddingModel('bert', dataset.max_num_variables, dataset.max_num_constants) ''' print(dataset.questions) questions = [q for q in dataset.src_vocab.indices2words(dataset.questions[0].tolist())] equations = [e for e in dataset.tgt_vocab.indices2words(dataset.equations[0].tolist())] print(questions) print(equations) print(dataset.alignments) print(dataset.solutions) ''' #criterion = nn.CrossEntropyLoss() #TODO: add ignore index for pad token? model = RNNModel(len(dataset.src_vocab),
if __name__ == '__main__': args = parser.parse_args() # load danet configures cfg_from_file(args.danet_cfg_file) cfg.DANET.REFINEMENT = EasyDict(cfg.DANET.REFINEMENT) cfg.MSRES_MODEL.EXTRA = EasyDict(cfg.MSRES_MODEL.EXTRA) if args.regressor == 'hmr': model = hmr(path_config.SMPL_MEAN_PARAMS) elif args.regressor == 'danet': model = DaNet(args, path_config.SMPL_MEAN_PARAMS, pretrained=False) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model'], strict=False) model.eval() # Setup evaluation dataset dataset = BaseDataset(args, args.dataset, is_train=False) # Run evaluation run_evaluation(model, args.dataset, dataset, args.result_file, batch_size=args.batch_size, shuffle=args.shuffle, log_freq=args.log_freq, options=args)
def main(cli_args): # Read from config file and make args with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f: args = AttrDict(json.load(f)) logger.info("Training/evaluation parameters {}".format(args)) logger.info("cliargs parameters {}".format(cli_args)) args.output_dir = os.path.join(args.ckpt_dir, cli_args.result_dir) args.model_mode = cli_args.model_mode args.margin = cli_args.margin init_logger() set_seed(args) model_link = None if cli_args.transformer_mode.upper() == "T5": model_link = "t5-base" elif cli_args.transformer_mode.upper() == "ELECTRA": model_link = "google/electra-base-discriminator" elif cli_args.transformer_mode.upper() == "ALBERT": model_link = "albert-base-v2" elif cli_args.transformer_mode.upper() == "ROBERTA": model_link = "roberta-base" elif cli_args.transformer_mode.upper() == "BERT": model_link = "bert-base-uncased" print(model_link) tokenizer = AutoTokenizer.from_pretrained(model_link) args.test_file = os.path.join(cli_args.dataset, args.test_file) args.dev_file = os.path.join(cli_args.dataset, args.dev_file) args.train_file = os.path.join(cli_args.dataset, args.train_file) # Load dataset train_dataset = BaseDataset(args, tokenizer, mode="train") if args.train_file else None dev_dataset = BaseDataset(args, tokenizer, mode="dev") if args.dev_file else None test_dataset = BaseDataset(args, tokenizer, mode="test") if args.test_file else None if dev_dataset == None: args.evaluate_test_during_training = True # If there is no dev dataset, only use testset args.logging_steps = int(len(train_dataset) / args.train_batch_size) + 1 args.save_steps = args.logging_steps labelNumber = train_dataset.getLabelNumber() labels = [str(i) for i in range(labelNumber)] config = AutoConfig.from_pretrained(model_link) # GPU or CPU args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" config.device = args.device args.model_mode = cli_args.model_mode model = MODEL_LIST[cli_args.model_mode](model_link, args.model_type, args.model_name_or_path, config, labelNumber, args.margin) model.to(args.device) if args.do_train: global_step, tr_loss = train(args, model, train_dataset, dev_dataset, test_dataset) logger.info(" global_step = {}, average loss = {}".format( global_step, tr_loss)) results = {} if args.do_eval: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True))) if not args.eval_all_checkpoints: checkpoints = checkpoints[-1:] else: logging.getLogger("transformers.configuration_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split("-")[-1] model = MODEL_LIST[args.model_type].from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, test_dataset, mode="test", global_step=global_step) result = dict( (k + "_{}".format(global_step), v) for k, v in result.items()) results.update(result) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as f_w: for key in sorted(results.keys()): f_w.write("{} = {}\n".format(key, str(results[key])))
def main(args): transform = getTransforms() data_path = os.path.join('data', args.data) if not os.path.exists(data_path): print('ERROR: No dataset named {}'.format(args.data)) exit(1) trainset = BaseDataset(list_path=os.path.join(data_path, 'train.lst'), transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, num_workers=1) testset = BaseDataset(list_path=os.path.join(data_path, 'val.lst'), transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=args.val_batch, shuffle=True, num_workers=1) model = ResNet(num_layers=18, num_classes=args.num_classes, pretrained=True).to(DEVICE) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9) summary(model, input_size=(3, 32, 32)) max_epoch = args.max_epoch last_epoch = 0 best_val_loss = None best_accuracy = None train_losses = [] val_losses = [] accuracies = [] output_dir = os.path.join('outputs', args.data) model_state_file = os.path.join(output_dir, 'checkpoint.pth.tar') os.makedirs(output_dir, exist_ok=True) if os.path.exists(model_state_file): checkpoint = torch.load(model_state_file) last_epoch = checkpoint['epoch'] best_val_loss = checkpoint['best_val_loss'] best_accuracy = checkpoint['best_accuracy'] train_losses = checkpoint['train_losses'] val_losses = checkpoint['val_losses'] accuracies = checkpoint['accuracies'] model.load_state_dict(checkpoint['state_dict'], strict=False) optimizer.load_state_dict(checkpoint['optimizer']) print('=> loaded checkpoint (epoch {})'.format(last_epoch)) for epoch in range(last_epoch, max_epoch): print('Epoch {}'.format(epoch)) train_loss = train(model=model, dataloader=trainloader, criterion=criterion, optimizer=optimizer, device=DEVICE) val_loss = val(model=model, dataloader=testloader, criterion=criterion, device=DEVICE) accuracy = test(model=model, dataloader=testloader, device=DEVICE) train_losses.append(train_loss) val_losses.append(val_loss) accuracies.append(accuracy) print('Loss: train = {}, val = {}, acc. = {}'.format( train_loss, val_loss, accuracy)) # if best_val_loss is None or val_loss < best_val_loss: # best_val_loss = val_loss # torch.save( # model.state_dict(), # os.path.join(output_dir, 'best.pth') # ) if best_accuracy is None or accuracy > best_accuracy: best_accuracy = accuracy torch.save(model.state_dict(), os.path.join(output_dir, 'best.pth')) print('=> saving checkpoint to {}'.format(model_state_file)) torch.save( { 'epoch': epoch + 1, 'best_val_loss': best_val_loss, 'best_accuracy': best_accuracy, 'train_losses': train_losses, 'val_losses': val_losses, 'accuracies': accuracies, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, model_state_file) if (epoch + 1) % 100 == 0: # plt.plot(range(epoch+1), train_losses, label="train") # plt.plot(range(epoch+1), val_losses, label="val") # plt.yscale('log') # plt.legend() # plt.savefig(os.path.join(output_dir, 'losses.png')) # plt.clf() fig, ax1 = plt.subplots() ax2 = ax1.twinx() ax1.plot(range(epoch + 1), train_losses, label='train') ax1.plot(range(epoch + 1), val_losses, label='val') ax1.set_xscale('log') ax1.set_yscale('log') ax2.plot(range(epoch + 1), accuracies, color='red', label='accuracy') ax2.set_xscale('log') handler1, label1 = ax1.get_legend_handles_labels() handler2, label2 = ax2.get_legend_handles_labels() ax1.legend(handler1 + handler2, label1 + label2, loc=3, borderaxespad=0.) plt.savefig(os.path.join(output_dir, 'losses.png')) plt.clf()
def main(cli_args): # Read from config file and make args max_checkpoint = "checkpoint-best" args = torch.load(os.path.join("ckpt", cli_args.result_dir, max_checkpoint, "training_args.bin")) args.test_file = cli_args.test_file with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f: config = json.load(f) args.data_dir = config["data_dir"] if args.test_file == None: args.test_file = config["test_file"] logger.info("Testing parameters {}".format(args)) args.model_mode = cli_args.model_mode args.device = "cuda:"+str(cli_args.gpu) init_logger() labels = ["0", "1"] config = CONFIG_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, num_labels=2, id2label={str(i): label for i, label in enumerate(labels)}, label2id={label: i for i, label in enumerate(labels)}, ) tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, do_lower_case=args.do_lower_case ) args.device = "cuda:{}".format(cli_args.gpu) if torch.cuda.is_available() and not args.no_cuda else "cpu" config.device = args.device print(args.test_file) # Load dataset test_dataset = BaseDataset(args, tokenizer, mode="test") if args.test_file else None logger.info("Testing model checkpoint to {}".format(max_checkpoint)) global_step = max_checkpoint.split("-")[-1] model = MODEL_LIST[cli_args.model_mode](args.model_type, args.model_name_or_path, config) model.load_state_dict(torch.load(os.path.join("ckpt", cli_args.result_dir, max_checkpoint, "training_model.bin"))) model.to(args.device) if "KOSAC" in args.model_mode: preds, labels, result, txt_all, polarity_ids, intensity_ids = evaluate(args, model, test_dataset, mode="test", global_step=global_step) else: preds, labels, result, txt_all= evaluate(args, model, test_dataset, mode="test", global_step=global_step) pred_and_labels = pd.DataFrame([]) pred_and_labels["data"] = txt_all pred_and_labels["pred"] = preds pred_and_labels["label"] = labels pred_and_labels["result"] = preds == labels decode_result = list( pred_and_labels["data"].apply(lambda x: tokenizer.convert_ids_to_tokens(tokenizer(x)["input_ids"]))) pred_and_labels["tokenizer"] = decode_result if "KOSAC" in args.model_mode: tok_an = [list(zip(x, test_dataset.convert_ids_to_polarity(y)[:len(x) + 1], test_dataset.convert_ids_to_intensity(z)[:len(x) + 1])) for x, y, z in zip(decode_result, polarity_ids, intensity_ids)] pred_and_labels["tokenizer_analysis(token,polarity,intensitiy)"] = tok_an pred_and_labels.to_excel(os.path.join("ckpt", cli_args.result_dir, "test_result_" + max_checkpoint + ".xlsx"), encoding="cp949")
def init_fn(self): if self.options.rank == 0: self.summary_writer.add_text('command_args', print_args()) if self.options.regressor == 'hmr': # HMR/SPIN model self.model = hmr(path_config.SMPL_MEAN_PARAMS, pretrained=True) self.smpl = SMPL(path_config.SMPL_MODEL_DIR, batch_size=cfg.TRAIN.BATCH_SIZE, create_transl=False).to(self.device) elif self.options.regressor == 'pymaf_net': # PyMAF model self.model = pymaf_net(path_config.SMPL_MEAN_PARAMS, pretrained=True) self.smpl = self.model.regressor[0].smpl if self.options.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if self.options.gpu is not None: torch.cuda.set_device(self.options.gpu) self.model.cuda(self.options.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have self.options.batch_size = int(self.options.batch_size / self.options.ngpus_per_node) self.options.workers = int( (self.options.workers + self.options.ngpus_per_node - 1) / self.options.ngpus_per_node) self.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( self.model) self.model = torch.nn.parallel.DistributedDataParallel( self.model, device_ids=[self.options.gpu], output_device=self.options.gpu, find_unused_parameters=True) else: self.model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set self.model = torch.nn.parallel.DistributedDataParallel( self.model, find_unused_parameters=True) self.models_dict = {'model': self.model.module} else: self.model = self.model.to(self.device) self.models_dict = {'model': self.model} cudnn.benchmark = True # Per-vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # Keypoint (2D and 3D) loss # No reduction because confidence weighting needs to be applied self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # Loss for SMPL parameter regression self.criterion_regr = nn.MSELoss().to(self.device) self.focal_length = constants.FOCAL_LENGTH if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=cfg.SOLVER.BASE_LR, weight_decay=0) self.optimizers_dict = {'optimizer': self.optimizer} if self.options.single_dataset: self.train_ds = BaseDataset(self.options, self.options.single_dataname, is_train=True) else: self.train_ds = MixedDataset(self.options, is_train=True) self.valid_ds = BaseDataset(self.options, self.options.eval_dataset, is_train=False) if self.options.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( self.train_ds) val_sampler = None else: train_sampler = None val_sampler = None self.train_data_loader = DataLoader(self.train_ds, batch_size=self.options.batch_size, num_workers=self.options.workers, pin_memory=cfg.TRAIN.PIN_MEMORY, shuffle=(train_sampler is None), sampler=train_sampler) self.valid_loader = DataLoader(dataset=self.valid_ds, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.TRAIN.NUM_WORKERS, pin_memory=cfg.TRAIN.PIN_MEMORY, sampler=val_sampler) # Load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) self.evaluation_accumulators = dict.fromkeys([ 'pred_j3d', 'target_j3d', 'target_theta', 'pred_verts', 'target_verts' ]) # Create renderer try: self.renderer = OpenDRenderer() except: print('No renderer for visualization.') self.renderer = None if cfg.MODEL.PyMAF.AUX_SUPV_ON: self.iuv_maker = IUV_Renderer( output_size=cfg.MODEL.PyMAF.DP_HEATMAP_SIZE) self.decay_steps_ind = 1 self.decay_epochs_ind = 1
def main(cli_args): # Read from config file and make args max_checkpoint = "checkpoint-best" args = torch.load( os.path.join("ckpt", cli_args.result_dir, max_checkpoint, "training_args.bin")) with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f: args = AttrDict(json.load(f)) logger.info("Training/evaluation parameters {}".format(args)) logger.info("cliargs parameters {}".format(cli_args)) args.output_dir = os.path.join(args.ckpt_dir, cli_args.result_dir) args.model_mode = cli_args.model_mode args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" init_logger() set_seed(args) model_link = None if cli_args.transformer_mode.upper() == "T5": model_link = "t5-base" elif cli_args.transformer_mode.upper() == "ELECTRA": model_link = "google/electra-base-discriminator" elif cli_args.transformer_mode.upper() == "ALBERT": model_link = "albert-base-v2" elif cli_args.transformer_mode.upper() == "ROBERTA": model_link = "roberta-base" elif cli_args.transformer_mode.upper() == "BERT": model_link = "bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_link) args.test_file = os.path.join(cli_args.dataset, args.test_file) args.dev_file = os.path.join(cli_args.dataset, args.train_file) args.train_file = os.path.join(cli_args.dataset, args.train_file) # Load dataset train_dataset = BaseDataset(args, tokenizer, mode="train") if args.train_file else None dev_dataset = BaseDataset(args, tokenizer, mode="dev") if args.dev_file else None test_dataset = BaseDataset(args, tokenizer, mode="test") if args.test_file else None if dev_dataset == None: args.evaluate_test_during_training = True # If there is no dev dataset, only use testset args.logging_steps = int(len(train_dataset) / args.train_batch_size) + 1 args.save_steps = args.logging_steps labelNumber = train_dataset.getLabelNumber() labels = [str(i) for i in range(labelNumber)] config = AutoConfig.from_pretrained(model_link) args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" config.device = args.device args.model_mode = cli_args.model_mode logger.info("Testing model checkpoint to {}".format(max_checkpoint)) global_step = max_checkpoint.split("-")[-1] # GPU or CPU args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" config.device = args.device args.model_mode = cli_args.model_mode model = MODEL_LIST[cli_args.model_mode](model_link, args.model_type, args.model_name_or_path, config, labelNumber, -0.75) model.load_state_dict( torch.load( os.path.join("ckpt", cli_args.result_dir, max_checkpoint, "training_model.bin"))) model.to(args.device) preds, labels, result, txt_all = evaluate(args, model, test_dataset, mode="test", global_step=global_step) pred_and_labels = pd.DataFrame([]) pred_and_labels["data"] = txt_all pred_and_labels["pred"] = preds pred_and_labels["label"] = labels pred_and_labels["result"] = preds == labels decode_result = list(pred_and_labels["data"].apply( lambda x: tokenizer.convert_ids_to_tokens(tokenizer(x)["input_ids"]))) pred_and_labels["tokenizer"] = decode_result pred_and_labels.to_csv(os.path.join( "ckpt", cli_args.result_dir, "test_result_" + max_checkpoint + ".csv"), encoding="utf-8")
def main(cli_args): # Read from config file and make args with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f: args = AttrDict(json.load(f)) logger.info("Training/evaluation parameters {}".format(args)) logger.info("cliargs parameters {}".format(cli_args)) args.output_dir = os.path.join(args.ckpt_dir, cli_args.result_dir) args.model_mode = cli_args.model_mode init_logger() set_seed(args) labels = ["0", "1"] config = CONFIG_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, num_labels=2, id2label={str(i): label for i, label in enumerate(labels)}, label2id={label: i for i, label in enumerate(labels)}, ) tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, do_lower_case=args.do_lower_case) # GPU or CPU args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" config.device = args.device args.model_mode = cli_args.model_mode model = MODEL_LIST[cli_args.model_mode](args.model_type, args.model_name_or_path, config) model.to(args.device) # Load dataset train_dataset = DATASET_LIST[cli_args.model_mode]( args, tokenizer, mode="train") if args.train_file else None dev_dataset = BaseDataset(args, tokenizer, mode="dev") if args.dev_file else None test_dataset = BaseDataset(args, tokenizer, mode="test") if args.test_file else None args.logging_steps = int(len(train_dataset) / args.train_batch_size) + 2 args.save_steps = args.logging_steps if dev_dataset == None: args.evaluate_test_during_training = True # If there is no dev dataset, only use testset if args.do_train: global_step, tr_loss = train(args, model, train_dataset, dev_dataset, test_dataset) logger.info(" global_step = {}, average loss = {}".format( global_step, tr_loss)) results = {} if args.do_eval: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True))) if not args.eval_all_checkpoints: checkpoints = checkpoints[-1:] else: logging.getLogger("transformers.configuration_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split("-")[-1] model = MODEL_LIST[args.model_type].from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, test_dataset, mode="test", global_step=global_step) result = dict( (k + "_{}".format(global_step), v) for k, v in result.items()) results.update(result) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as f_w: for key in sorted(results.keys()): f_w.write("{} = {}\n".format(key, str(results[key])))
def __init__(self, base_path): BaseDataset.__init__(self, base_path)
args.crop_setting, 'keypoints.pkl') log_dir = os.path.join(cfg.BASE_DATA_DIR, 'cmr_pck_results.txt') with open(gt_kp_path, 'rb') as f: gt = pkl.load(f) calc = CalcPCK( all_kps, gt, num_imgs=cfg.DATASET_SIZES[args.dataset][args.crop_setting], log_dir=log_dir, dataset=args.dataset, crop_setting=args.crop_setting, pck_eval_threshold=args.pck_eval_threshold) calc.eval() if __name__ == '__main__': args = parser.parse_args() # Load model mesh = Mesh() model = CMR(mesh, args.num_layers, args.num_channels, pretrained_checkpoint=args.pretrained_checkpoint) # Setup evaluation dataset dataset = BaseDataset(args, args.dataset) # Run evaluation run_evaluation(model, args, dataset, mesh)