def train(dataloaders, model, opt, num_epochs=10): trainer = BaseTrainer(opt, device) board = TensorboardCallback(opt) trainer.run_train(model, dataloaders, callbacks=[board], num_epochs=num_epochs) return model
def train(args): if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) shutil.copy(args.config_file, cfg.OUTPUT_DIR) num_gpus = torch.cuda.device_count() logger = setup_logger('reid_baseline', output_dir, 0) logger.info('Using {} GPUS'.format(num_gpus)) logger.info(args) logger.info('Running with config:\n{}'.format(cfg)) train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus) model = build_model(cfg, num_classes) # print(model) loss_func = make_loss(cfg, num_classes) trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss_func, num_query, num_gpus) for epoch in range(trainer.epochs): for batch in trainer.train_dl: trainer.step(batch) trainer.handle_new_batch() trainer.handle_new_epoch()
def worker(gpu, ngpus_per_node, args): print("running base training...") model = BaseTrainer(args) model.make_model_env(gpu, ngpus_per_node) model.make_run_env() model.evaluate_model(60001) #change the output name by changing number
def setup_teacher(t_name, params): # Teacher Model num_classes = params["num_classes"] t_net = create_model(t_name, num_classes, params["device"]) teacher_config = params.copy() teacher_config["test_name"] = t_name + "_teacher" if params["t_checkpoint"]: # Just validate the performance print("---------- Loading Teacher -------") best_teacher = params["t_checkpoint"] else: # Teacher training print("---------- Training Teacher -------") teacher_trainer = BaseTrainer(t_net, config=teacher_config) teacher_trainer.train() best_teacher = teacher_trainer.best_model_file # reload and get the best model t_net = util.load_checkpoint(t_net, best_teacher) teacher_trainer = BaseTrainer(t_net, config=teacher_config) best_t_acc = teacher_trainer.validate() # also save this information in a csv file for plotting name = teacher_config["test_name"] + "_val" acc_file_name = params["results_dir"].joinpath(f"{name}.csv") with acc_file_name.open("w+") as acc_file: acc_file.write("Training Loss,Validation Loss\n") for _ in range(params["epochs"]): acc_file.write(f"0.0,{best_t_acc}\n") return t_net, best_teacher, best_t_acc
def worker(gpu, ngpus_per_node, args): if args.adv: model = AdvTrainer(args) else: model = BaseTrainer(args) model.make_model_env(gpu, ngpus_per_node) model.make_run_env() model.train()
def worker(gpu, ngpus_per_node, args): if args.adv: print("running adv training...") model = AdvTrainer(args) else: print("running base training...") model = BaseTrainer(args) model.make_model_env(gpu, ngpus_per_node) model.make_run_env() model.train()
def worker(gpu, ngpus_per_node, args): if args.adv: print("running adv training...") model = AdvTrainer(args) else: print("running base training...") model = BaseTrainer(args) model.make_model_env(gpu, ngpus_per_node) model.make_run_env() if args.only_test: #model.test() model.get_embeddings() else: model.train()
def main(): output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) num_gpus = torch.cuda.device_count() logger = setup_logger('reid_baseline', output_dir, 0) logger.info('Using {} GPUS'.format(num_gpus)) logger.info('Running with config:\n{}'.format(cfg)) train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus) model = build_model(cfg, num_classes) loss = make_loss(cfg, num_classes) trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss, num_query, num_gpus) for epoch in range(trainer.epochs): for batch in trainer.train_dl: trainer.step(batch) trainer.handle_new_batch() trainer.handle_new_epoch()
def main(): parser = argparse.ArgumentParser(description="Baseline Training") parser.add_argument("--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) num_gpus = 0 device = torch.device("cpu") if cfg.MODEL.DEVICE == 'cuda' and torch.cuda.is_available(): num_gpus = len(cfg.MODEL.DEVICE_IDS)-1 device_ids = cfg.MODEL.DEVICE_IDS.strip("d") print(device_ids) device = torch.device("cuda:{0}".format(device_ids)) logger = setup_logger('baseline', output_dir, 0) logger.info('Using {} GPUS'.format(num_gpus)) logger.info('Running with config:\n{}'.format(cfg)) train_dl, val_dl = make_dataloader(cfg, num_gpus) model = build_model(cfg) loss = make_loss(cfg, device) trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss, num_gpus, device) logger.info(type(model)) logger.info(loss) logger.info(trainer) for epoch in range(trainer.epochs): for batch in trainer.train_dl: trainer.step(batch) trainer.handle_new_batch() trainer.handle_new_epoch()
def build_trainer(self, args: ClassifierArgs, dataset: Dataset, data_loader: DataLoader) -> BaseTrainer: # get optimizer optimizer = self.build_optimizer(args) # get learning rate decay lr_scheduler = CosineAnnealingLR( optimizer, len(dataset) // args.batch_size * args.epochs) # get tensorboard writer writer = self.build_writer(args) trainer = BaseTrainer(data_loader, self.model, self.loss_function, optimizer, lr_scheduler, writer) if args.training_type == 'freelb': trainer = FreeLBTrainer(data_loader, self.model, self.loss_function, optimizer, lr_scheduler, writer) elif args.training_type == 'pgd': trainer = PGDTrainer(data_loader, self.model, self.loss_function, optimizer, lr_scheduler, writer) elif args.training_type == 'advhotflip': trainer = HotflipTrainer(args, self.tokenizer, data_loader, self.model, self.loss_function, optimizer, lr_scheduler, writer) elif args.training_type == 'metric': trainer = EmbeddingLevelMetricTrainer(data_loader, self.model, self.loss_function, optimizer, lr_scheduler, writer) elif args.training_type == 'metric_token': trainer = TokenLevelMetricTrainer(args, self.tokenizer, data_loader, self.model, self.loss_function, optimizer, lr_scheduler, writer) elif args.training_type == 'sparse': # trick = True if args.dataset_name in ['mr'] else False trainer = MaskTrainer(args, self.data_processor, data_loader, self.model, self.loss_function, optimizer, lr_scheduler, writer) elif args.training_type == 'safer': trainer = SAFERTrainer(args, self.data_processor, data_loader, self.model, self.loss_function, optimizer, lr_scheduler, writer) return trainer
def main(): parser = argparse.ArgumentParser(description="ReID Baseline Training") parser.add_argument("--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None,nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) num_gpus = torch.cuda.device_count() logger = setup_logger('reid_baseline', output_dir, 0) logger.info('Using {} GPUS'.format(num_gpus)) logger.info('Running with config:\n{}'.format(cfg)) if cfg.INPUT.SEPNORM.USE: train_dl, val_dl, num_query, num_classes = make_sepnorm_dataloader(cfg, num_gpus) elif cfg.DATASETS.EXEMPLAR.USE: train_dl, val_dl, num_query, num_classes,exemplar_dl = make_dataloader(cfg, num_gpus) else: train_dl, val_dl, num_query, num_classes = make_dataloader(cfg, num_gpus) model = build_model(cfg, num_classes) loss = make_loss(cfg, num_classes) if cfg.SOLVER.CENTER_LOSS.USE == True: trainer = CenterTrainer(cfg, model, train_dl, val_dl, loss, num_query, num_gpus) else: if cfg.SOLVER.MIXUP.USE: trainer = NegMixupTrainer(cfg, model, train_dl, val_dl, loss, num_query, num_gpus) elif cfg.DATASETS.EXEMPLAR.USE: if cfg.DATASETS.EXEMPLAR.MEMORY.USE: trainer = ExemplarMemoryTrainer(cfg, model, train_dl, val_dl,exemplar_dl, loss, num_query, num_gpus) else: trainer = UIRLTrainer(cfg, model, train_dl, val_dl,exemplar_dl, loss, num_query, num_gpus) elif cfg.DATASETS.HIST_LABEL.USE: trainer = HistLabelTrainer(cfg, model, train_dl, val_dl, loss, num_query, num_gpus) else: trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss, num_query, num_gpus) if cfg.INPUT.SEPNORM.USE: logger.info('train transform0: \n{}'.format(train_dl.dataset.transform0)) logger.info('train transform1: \n{}'.format(train_dl.dataset.transform1)) logger.info('valid transform0: \n{}'.format(val_dl.dataset.transform0)) logger.info('valid transform1: \n{}'.format(val_dl.dataset.transform1)) else: logger.info('train transform: \n{}'.format(train_dl.dataset.transform)) logger.info('valid transform: \n{}'.format(val_dl.dataset.transform)) logger.info(type(model)) logger.info(loss) logger.info(trainer) for epoch in range(trainer.epochs): for batch in trainer.train_dl: trainer.step(batch) trainer.handle_new_batch() trainer.handle_new_epoch()
def test_nokd(s_net, t_net, params): print("---------- Training NOKD -------") nokd_config = params.copy() nokd_trainer = BaseTrainer(s_net, config=nokd_config) best_acc = nokd_trainer.train() return best_acc
train_dataset, test_dataset = random_split( HAM10000('datasets/archive/'), [8015, 2000], generator=torch.Generator().manual_seed(42)) model = AttUNet().to(device) if pretrained_path is not None: load(model=model, name=pretrained_path) if what == 'train': # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Trainer and Training # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ trainer = BaseTrainer(model, train_dataset, loss=FocalLoss()).to(device) trainer.supervise(lr=lr, epochs=epochs, batch_size=batch_size, name='store/' + name) if what in ['train', 'validate']: # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Validate # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ test(model, test_dataset, BinaryIOU()) if what in ['train', 'validate', 'draw']:
import yaml import argparse from trainer import BaseTrainer def arg_parser(): parser = argparse.ArgumentParser(description="config") parser.add_argument("--config", type=str, default="config/test.yaml", help="Specified the path of configuration file to be used.") return parser.parse_args() if __name__ == '__main__': import torch torch.backends.cudnn.benchmark = True torch.backends.cudnn.enabled = True arg = arg_parser() config = arg.config with open(config) as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) trainer = BaseTrainer(config, stage="Test") trainer.test()
import argparse from trainer import BaseTrainer def arg_parser(): parser = argparse.ArgumentParser(description="config") parser.add_argument( "--config", type=str, default="config/train.yaml", help="Specified the path of configuration file to be used.") return parser.parse_args() if __name__ == '__main__': import torch torch.backends.cudnn.benchmark = True torch.backends.cudnn.enabled = True arg = arg_parser() config = arg.config with open(config) as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) trainer = BaseTrainer(config, stage="Train") trainer.train()