def train(self): # Script arguments args = self._parse_args() # Set custom seed before doing anything common.set_custom_seed(args.seed) # Create directory to save plots, models, results, etc log_path = common.create_log_dir(args.exp_id, args.task, args.loss) print(f"Logging to {log_path}") # Load dataset train, nclass = self._load_dataset(args.loss, args.batch_size) # Create model config = common.get_config(args.loss, self.nfeat, nclass, self.task, args.margin) model = self._create_model(config) # Train and evaluation plugins test_plugins = [] train_plugins = [] # Logging configuration self._logging(args.log_interval, log_path, train, train_plugins, test_plugins) # Model saving configuration self._model_saving(args.save, train_plugins, test_plugins) # Plotting configuration self._plotting(args.plot, train_plugins, test_plugins) # Other useful plugins self._on_all_plugins_added(train_plugins, test_plugins) # Evaluation configuration train_plugins.append( self._create_evaluator(args.loss, args.batch_size, config, test_plugins)) # Training configuration trainer = Trainer(args.loss, model, config.loss, train, config.optimizer(model, self.task, args.lr), model_loader=ModelLoader(args.recover) if args.recover is not None else None, callbacks=train_plugins) print(f"[LR: {args.lr}]") print(f"[Batch Size: {args.batch_size}]") print(f"[Epochs: {args.epochs}]") print() # Start training trainer.train(args.epochs)
# Model saving configuration print(f"[Model Saving: {common.enabled_str(args.save)}]") if args.save: test_callbacks.append( BestModelSaver(task, args.loss, log_path, args.exp_id)) # Evaluation configuration metric = KNNAccuracyMetric(config.test_distance) train_callbacks.append( ClassAccuracyEvaluator(common.DEVICE, dev, metric, test_callbacks)) # Training configuration trainer = Trainer(args.loss, model, config.loss, train, config.optimizer(model, task, args.lr), model_loader=ModelLoader(args.recover) if args.recover is not None else None, callbacks=train_callbacks) print(f"[LR: {args.lr}]") print(f"[Batch Size: {args.batch_size}]") print(f"[Epochs: {args.epochs}]") print() # Start training trainer.train( args.epochs, log_path, common.get_basic_plots(args.lr, args.batch_size, 'Accuracy', 'green'))
dev_evaluator = Evaluator(dev, metric, 'dev', test_callbacks) test_evaluator = Evaluator( test, metric, 'test', callbacks=[MetricFileLogger(log_path=join(log_path, 'test-metric.log'))]) train_callbacks.extend([dev_evaluator, test_evaluator]) # Training configuration optim = config.optimizer(model, lr=args.lr) model_loader = ModelLoader( args.recover, args.recover_optim) if args.recover is not None else None trainer = Trainer(args.loss, model, config.loss, train, optim, model_loader=model_loader, callbacks=train_callbacks, last_metric_fn=lambda: dev_evaluator.last_metric) print() # Start training plots = builders.build_essential_plots(args.lr, args.batch_size, 'Macro F1', 'green') trainer.train(args.epochs, log_path, plots) # Dump and print results best_epoch = dev_evaluator.best_epoch best_dev = dev_evaluator.best_metric best_test = test_evaluator.results[dev_evaluator.best_epoch - 1]
parser.add_argument('--gpu', type=str, default='0,1', help='assign multi-gpus by comma concat') parser.add_argument('--cfg', type=str, help='experiment configure file name') args = parser.parse_args() if args.cfg: update_config(args.cfg) torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) print("Work on GPU: ", os.environ['CUDA_VISIBLE_DEVICES']) from core.base import Trainer, Tester, LiftTrainer, LiftTester if cfg.MODEL.name == 'pose2mesh_net': trainer = Trainer(args, load_dir='') tester = Tester(args) # if not args.debug else None elif cfg.MODEL.name == 'posenet': trainer = LiftTrainer(args, load_dir='') tester = LiftTester(args) # if not args.debug else None print("===> Start training...") for epoch in range(cfg.TRAIN.begin_epoch, cfg.TRAIN.end_epoch + 1): trainer.train(epoch) trainer.lr_scheduler.step() tester.test(epoch, current_model=trainer.model) if epoch > 1: is_best = tester.joint_error < min(trainer.error_history['joint']) else:
loss = focal_loss() else: loss = nn.BCELoss() # plt train_loss_list = [] train_acc_list = [] val_loss_list = [] val_acc_list = [] epoch_list = [] # 获取trainer and tester 且训练 if args.train == "train": trainer = Trainer(model=model, optimizer=optimizer, scheduler=exp_lr_scheduler, batch_size=args.batch_size, empochs=args.epoch, train_loader=train_loader, use_cuda=args.use_gpu) tester = Tester(val_loader=val_loader, model=model, batch_size=args.batch_size, use_cuda=args.use_gpu) # epoch训练 for epoch in range(start_epoch, args.epoch): train_loss, train_acc = trainer.train(epoch=epoch, criterion_ce=loss, numclass=num_class) val_loss, val_acc = tester.val_test(epoch=epoch, current_model=trainer.model, criterion_ce=loss,
config.test_distance, args.eval_interval, dataset.config, test_callbacks), SpeakerVerificationEvaluator( 'test', args.batch_size, config.test_distance, args.eval_interval, dataset.config, callbacks=[ MetricFileLogger(log_path=join(log_path, 'test-metric.log')) ]) ] train_callbacks.extend(evaluators) # Training configuration trainer = Trainer(args.loss, model, config.loss, train, config.optimizer(model, task, args.lr), model_loader=ModelLoader(args.recover) if args.recover is not None else None, callbacks=train_callbacks) print(f"[LR: {args.lr}]") print(f"[Batch Size: {args.batch_size}]") print(f"[Epochs: {args.epochs}]") print() # Start training trainer.train(args.epochs, log_path, plots)