def train(args): if args.ckpt_path: model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) args.start_epoch = ckpt_info['epoch'] + 1 else: model_fn = models.__dict__[args.model] model = model_fn(**vars(args)) model = nn.DataParallel(model, args.gpu_ids) model = model.to(args.device) model.train() # Get optimizer and scheduler optimizer = optim.get_optimizer( filter(lambda p: p.requires_grad, model.parameters()), args) lr_scheduler = optim.get_scheduler(optimizer, args) if args.ckpt_path: ModelSaver.load_optimizer(args.ckpt_path, optimizer, lr_scheduler) # Get logger, evaluator, saver loss_fn = nn.CrossEntropyLoss() train_loader = CIFARLoader('train', args.batch_size, args.num_workers) logger = TrainLogger(args, len(train_loader.dataset)) eval_loaders = [CIFARLoader('val', args.batch_size, args.num_workers)] evaluator = ModelEvaluator(eval_loaders, logger, args.max_eval, args.epochs_per_eval) saver = ModelSaver(**vars(args)) # Train model while not logger.is_finished_training(): logger.start_epoch() for inputs, targets in train_loader: logger.start_iter() with torch.set_grad_enabled(True): logits = model.forward(inputs.to(args.device)) loss = loss_fn(logits, targets.to(args.device)) logger.log_iter(loss) optimizer.zero_grad() loss.backward() optimizer.step() logger.end_iter() metrics = evaluator.evaluate(model, args.device, logger.epoch) saver.save(logger.epoch, model, optimizer, lr_scheduler, args.device, metric_val=metrics.get(args.metric_name, None)) logger.end_epoch(metrics) optim.step_scheduler(lr_scheduler, metrics, logger.epoch)
def train(args): train_loader = get_loader(args=args) if args.ckpt_path: model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) args.start_epoch = ckpt_info['epoch'] + 1 else: model_fn = models.__dict__[args.model] args.D_in = train_loader.D_in model = model_fn(**vars(args)) model = model.to(args.device) model.train() # Get optimizer and scheduler optimizer = optim.get_optimizer( filter(lambda p: p.requires_grad, model.parameters()), args) lr_scheduler = optim.get_scheduler(optimizer, args) if args.ckpt_path: ModelSaver.load_optimizer(args.ckpt_path, optimizer, lr_scheduler) # Get logger, evaluator, saver loss_fn = optim.get_loss_fn(args.loss_fn, args) logger = TrainLogger(args, len(train_loader.dataset)) eval_loaders = [ get_loader(args, phase='train', is_training=False), get_loader(args, phase='valid', is_training=False) ] evaluator = ModelEvaluator(args, eval_loaders, logger, args.max_eval, args.epochs_per_eval) saver = ModelSaver(**vars(args)) # Train model while not logger.is_finished_training(): logger.start_epoch() for src, tgt in train_loader: logger.start_iter() with torch.set_grad_enabled(True): pred_params = model.forward(src.to(args.device)) ages = src[:, 1] loss = loss_fn(pred_params, tgt.to(args.device), ages.to(args.device), args.use_intvl) #loss = loss_fn(pred_params, tgt.to(args.device), src.to(args.device), args.use_intvl) logger.log_iter(src, pred_params, tgt, loss) optimizer.zero_grad() loss.backward() optimizer.step() logger.end_iter() metrics = evaluator.evaluate(model, args.device, logger.epoch) # print(metrics) saver.save(logger.epoch, model, optimizer, lr_scheduler, args.device,\ metric_val=metrics.get(args.metric_name, None)) logger.end_epoch(metrics=metrics)
def train(args): if args.ckpt_path: model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) args.start_epoch = ckpt_info['epoch'] + 1 else: model_fn = models.__dict__[args.model] model = model_fn(**vars(args)) model = nn.DataParallel(model, args.gpu_ids) model = model.to(args.device) model.train() # Set up population-based training client pbt_client = PBTClient(args.pbt_server_url, args.pbt_server_port, args.pbt_server_key, args.pbt_config_path) # Get optimizer and scheduler parameters = model.module.parameters() optimizer = optim.get_optimizer(parameters, args, pbt_client) ModelSaver.load_optimizer(args.ckpt_path, args.gpu_ids, optimizer) # Get logger, evaluator, saver train_loader = DataLoader(args, 'train', is_training_set=True) eval_loaders = [DataLoader(args, 'valid', is_training_set=False)] evaluator = ModelEvaluator(eval_loaders, args.epochs_per_eval, args.max_eval, args.num_visuals, use_ten_crop=args.use_ten_crop) saver = ModelSaver(**vars(args)) for _ in range(args.num_epochs): optim.update_hyperparameters(model.module, optimizer, pbt_client.hyperparameters()) for inputs, targets in train_loader: with torch.set_grad_enabled(True): logits = model.forward(inputs.to(args.device)) loss = F.binary_cross_entropy_with_logits(logits, targets.to(args.device)) optimizer.zero_grad() loss.backward() optimizer.step() metrics = evaluator.evaluate(model, args.device) metric_val = metrics.get(args.metric_name, None) ckpt_path = saver.save(model, args.model, optimizer, args.device, metric_val) pbt_client.save(ckpt_path, metric_val) if pbt_client.should_exploit(): # Exploit pbt_client.exploit() # Load model and optimizer parameters from exploited network model, ckpt_info = ModelSaver.load_model(pbt_client.parameters_path(), args.gpu_ids) model = model.to(args.device) model.train() ModelSaver.load_optimizer(pbt_client.parameters_path(), args.gpu_ids, optimizer) # Explore pbt_client.explore()
def __init__(self, args, model, logger): self.args = args self.model = model self.logger = logger self.dataloader: DataLoader self.num_train_steps: int self.optimizer: AdamW self.scheduler: LambdaLR if args["do_eval"]: self.evaluator = ModelEvaluator(args, model, logger)
def test(args): model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) args.start_epoch = ckpt_info['epoch'] + 1 model = model.to(args.device) model.eval() # Run a single evaluation eval_loader = WhiteboardLoader(args.data_dir, args.phase, args.batch_size, shuffle=False, do_augment=False, num_workers=args.num_workers) logger = TestLogger(args, len(eval_loader.dataset)) logger.start_epoch() evaluator = ModelEvaluator([eval_loader], logger, num_visuals=args.num_visuals, prob_threshold=args.prob_threshold) metrics = evaluator.evaluate(model, args.device, logger.epoch) logger.end_epoch(metrics)
])) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers) testloader = torch.utils.data.DataLoader(testset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers) nc = 1 model = SweatyNet1(nc) if opt.use_gpu: model = model.cuda() epoch = 20 #Checkpoint model to load model_name = opt.model_root + 'Model_lr_{}_opt_{}_epoch_{}'.format( opt.lr, opt.optimizer, epoch) model.load_state_dict(load_model(model_name, key='state_dict_model')) nc = 10 #Number of Sequence map_size = [120, 160] model.layer18 = ConvLSTM(nc, map_size) opt.lr = 0.00001 modeleval = ModelEvaluator(model) modeleval.evaluator(seq_trainloader, seq_testloader) modeleval.plot_loss()
def train(args): if args.ckpt_path and not args.use_pretrained: model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) args.start_epoch = ckpt_info['epoch'] + 1 else: model_fn = models.__dict__[args.model] model = model_fn(**vars(args)) if args.use_pretrained: model.load_pretrained(args.ckpt_path, args.gpu_ids) model = nn.DataParallel(model, args.gpu_ids) model = model.to(args.device) model.train() # Get optimizer and scheduler if args.use_pretrained or args.fine_tune: parameters = model.module.fine_tuning_parameters( args.fine_tuning_boundary, args.fine_tuning_lr) else: parameters = model.parameters() optimizer = util.get_optimizer(parameters, args) lr_scheduler = util.get_scheduler(optimizer, args) if args.ckpt_path and not args.use_pretrained and not args.fine_tune: ModelSaver.load_optimizer(args.ckpt_path, optimizer, lr_scheduler) # Get logger, evaluator, saver cls_loss_fn = util.get_loss_fn(is_classification=True, dataset=args.dataset, size_average=False) data_loader_fn = data_loader.__dict__[args.data_loader] train_loader = data_loader_fn(args, phase='train', is_training=True) logger = TrainLogger(args, len(train_loader.dataset), train_loader.dataset.pixel_dict) eval_loaders = [data_loader_fn(args, phase='val', is_training=False)] evaluator = ModelEvaluator(args.do_classify, args.dataset, eval_loaders, logger, args.agg_method, args.num_visuals, args.max_eval, args.epochs_per_eval) saver = ModelSaver(args.save_dir, args.epochs_per_save, args.max_ckpts, args.best_ckpt_metric, args.maximize_metric) # Train model while not logger.is_finished_training(): logger.start_epoch() for inputs, target_dict in train_loader: logger.start_iter() with torch.set_grad_enabled(True): inputs.to(args.device) cls_logits = model.forward(inputs) cls_targets = target_dict['is_abnormal'] cls_loss = cls_loss_fn(cls_logits, cls_targets.to(args.device)) loss = cls_loss.mean() logger.log_iter(inputs, cls_logits, target_dict, cls_loss.mean(), optimizer) optimizer.zero_grad() loss.backward() optimizer.step() logger.end_iter() util.step_scheduler(lr_scheduler, global_step=logger.global_step) metrics, curves = evaluator.evaluate(model, args.device, logger.epoch) saver.save(logger.epoch, model, optimizer, lr_scheduler, args.device, metric_val=metrics.get(args.best_ckpt_metric, None)) logger.end_epoch(metrics, curves) util.step_scheduler(lr_scheduler, metrics, epoch=logger.epoch, best_ckpt_metric=args.best_ckpt_metric)
transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)), ])) nc=1 elif opt.dataset == 'fake': dataset = dset.FakeData(image_size=(3, opt.imageSize, opt.imageSize), transform=transforms.ToTensor()) nc=3 assert dataset dataloader = torch.utils.data.DataLoader(dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) assert_path(opt.model_path) assert_path(opt.result_path) gan_ = GAN(opt.nz, opt.ngf, opt.ndf, opt.nc, opt.filters, opt.strides, opt.padding, opt.lrg, opt.lrd, opt.batch_size, opt.beta1, out_path=opt.model_path, use_gpu=opt.use_gpu, resume=opt.resume) netG, netD = gan_.model() evaluator = ModelEvaluator(opt.nz, opt.lrg, opt.lrd, opt.beta1, opt.batch_size, opt.nm_epochs, model_path=opt.model_path, out_path=opt.result_path, use_gpu=opt.use_gpu) evaluator.train(dataloader, netG, netD)
class ModelTrainer: """Class for training a classifier.""" def __init__(self, args, model, logger): self.args = args self.model = model self.logger = logger self.dataloader: DataLoader self.num_train_steps: int self.optimizer: AdamW self.scheduler: LambdaLR if args["do_eval"]: self.evaluator = ModelEvaluator(args, model, logger) def load_optimizer(self): """Loads the AdamW optimizer used during training.""" param_optimizer = list(self.model.named_parameters()) # Excluded to reproduce the behaviour of the original optimizer. no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] apply = [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ] no_apply = [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ] grouped_params = [ { "params": apply, "weight_decay": 0.01 }, { "params": no_apply, "weight_decay": 0.0 }, ] self.optimizer = AdamW( grouped_params, lr=self.args["learning_rate"], correct_bias=False, ) def load_scheduler(self): """Loads the scheduler controlling the learning rate during training.""" self.scheduler = get_linear_schedule_with_warmup( self.optimizer, num_warmup_steps=self.args["warmup_proportion"] * self.num_train_steps, num_training_steps=self.num_train_steps, ) def train(self): """Performs model training using labeled data.""" self.model.to(self.args["device"]) self.load_optimizer() self.load_scheduler() for i in range(self.args["num_train_epochs"]): train_loss = 0 self.model.train() for step, batch in enumerate(tqdm(self.dataloader, desc="Batch")): batch = tuple(t.to(self.args["device"]) for t in batch) if self.args["use_parents"]: input_ids, input_mask, segment_ids, label_ids, parent_labels = batch outputs = self.model( input_ids, segment_ids, input_mask, label_ids, parent_labels=parent_labels, ) else: input_ids, input_mask, segment_ids, label_ids = batch # Forward pass, compute loss for prediction outputs = self.model(input_ids, segment_ids, input_mask, label_ids) loss = outputs[0] # Backward pass, compute gradient of loss w.r.t. model parameters loss.backward() train_loss += loss.item() self.optimizer.step() # Update model parameters self.scheduler.step() # Update learning rate schedule self.optimizer.zero_grad( ) # Set gradients of model parameters to zero self.logger.info( colored(f"TRAINING EPOCH {i + 1} COMPLETE", "green")) self.logger.info( f"Training loss = {train_loss / len(self.dataloader)}") self.logger.info( f"Learning rate = {self.scheduler.get_last_lr()[0]}") if self.args["do_eval"]: self.evaluator.evaluate(i + 1) if self.args["save_checkpoints"] and 0 < i < 3: self._save_model(i + 1) self._save_model() def _save_model(self, epoch=None): """Saves a checkpoint or complete model""" if epoch: state = { "epoch": epoch, "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), } output_model_file = join( self.args["DATA_PATH"], f'model_files/{self.args["session_num"]}_epoch_{epoch}.ckpt', ) else: state = self.model.state_dict() output_model_file = join( self.args["DATA_PATH"], f'model_files/{self.args["session_num"]}_finetuned_pytorch_model.bin', ) torch.save(state, output_model_file)
if args["use_parents"]: model = create_experimental(args, len(processor.labels)) else: model = create_baseline(args, len(processor.labels)) model_state_dict = torch.load( join(args["DATA_PATH"], "model_files/13_finetuned_pytorch_model.bin"), map_location="cpu", ) model.load_state_dict(model_state_dict) if args["do_train"]: trainer = ModelTrainer(args, model, logger) logger.info("Loading data…") trainer.dataloader, trainer.num_train_steps = prepare_data( args, processor, "train_ext.pkl", "train") if args["do_eval"]: trainer.evaluator.dataloader, _ = prepare_data( args, processor, "dev_raw.pkl", "dev") logger.info("Training…") trainer.train() else: evaluator = ModelEvaluator(args, model, logger) logger.info("Loading data…") evaluator.dataloader, _ = prepare_data(args, processor, "test_raw.pkl", "dev") logger.info("Evaluating…") result = evaluator.evaluate()
train=False, download=True, transform=transforms.ToTensor()) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # Parameters n_out = len(classes) batch_size = 128 # Hyperparameters lr = 0.001 epochs = 15 # Data Loader trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=3) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=3) # Model l2 = 0.0 pool = 'max' optim = 'adam' # Pytorch Cross Entropy Loss model = CNN(pool) modeleval = ModelEvaluator(model, epochs, lr, l2=l2, use_gpu=True, optim=optim) modeleval.evaluator(trainloader, testloader, print_every=100, validation=False)
trainset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) testset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) batch_size = 100 trainloader = torch.utils.data.DataLoader(dataset=trainset, batch_size=batch_size, shuffle=True) testloader = torch.utils.data.DataLoader(dataset=testset, batch_size=batch_size, shuffle=False) n_in = 28 n_hidden = 100 n_out = 10 seq_dim = 28 use_gpu = True #model = LSTMModel(n_in, n_hidden, n_out, batch_size, use_gpu) model = GRUModel(n_in, n_hidden, n_out, batch_size, use_gpu) if use_gpu: model.cuda() l2 = 0.0 lr = 0.001 epochs = 10 optim = 'adam' modeleval = ModelEvaluator(model, epochs, lr, batch_size, l2, use_gpu, optim) acc_ = modeleval.evaluator(trainloader, testloader, seq_dim, n_in)
# Data Loader trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers) testloader = torch.utils.data.DataLoader(testset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers) import pdb nc = 1 threshold = trainset.threshold min_radius = trainset.min_radius if opt.net == 'net1': model = SweatyNet1(nc, opt.drop_p) print('SweatyNet1') elif opt.net == 'net2': model = SweatyNet2(nc, opt.drop_p) print('SweatyNet2') elif opt.net == 'net3': model = SweatyNet3(nc, opt.drop_p) print('SweatyNet3') else: raise ValueError('Model not supported') modeleval = ModelEvaluator(model, min_radius, threshold) modeleval.evaluator(trainloader, testloader) modeleval.save_output() modeleval.plot_loss()
def train(args): if args.ckpt_path: model, ckpt_info = ModelSaver.load_model(args.ckpt_path, args.gpu_ids) args.start_epoch = ckpt_info['epoch'] + 1 else: model_fn = models.__dict__[args.model] model = model_fn(pretrained=args.pretrained) if args.pretrained: model.fc = nn.Linear(model.fc.in_features, args.num_classes) model = nn.DataParallel(model, args.gpu_ids) model = model.to(args.device) model.train() # Get optimizer and scheduler parameters = optim.get_parameters(model.module, args) optimizer = optim.get_optimizer(parameters, args) lr_scheduler = optim.get_scheduler(optimizer, args) if args.ckpt_path: ModelSaver.load_optimizer(args.ckpt_path, optimizer, lr_scheduler) # Get logger, evaluator, saver loss_fn = nn.CrossEntropyLoss() train_loader = WhiteboardLoader(args.data_dir, 'train', args.batch_size, shuffle=True, do_augment=True, num_workers=args.num_workers) logger = TrainLogger(args, len(train_loader.dataset)) eval_loaders = [ WhiteboardLoader(args.data_dir, 'val', args.batch_size, shuffle=False, do_augment=False, num_workers=args.num_workers) ] evaluator = ModelEvaluator(eval_loaders, logger, args.epochs_per_eval, args.max_eval, args.num_visuals) saver = ModelSaver(**vars(args)) # Train model while not logger.is_finished_training(): logger.start_epoch() for inputs, targets, paths in train_loader: logger.start_iter() with torch.set_grad_enabled(True): logits = model.forward(inputs.to(args.device)) loss = loss_fn(logits, targets.to(args.device)) logger.log_iter(inputs, logits, targets, paths, loss) optimizer.zero_grad() loss.backward() optimizer.step() optim.step_scheduler(lr_scheduler, global_step=logger.global_step) logger.end_iter() metrics = evaluator.evaluate(model, args.device, logger.epoch) saver.save(logger.epoch, model, args.model, optimizer, lr_scheduler, args.device, metric_val=metrics.get(args.metric_name, None)) logger.end_epoch(metrics) optim.step_scheduler(lr_scheduler, metrics, logger.epoch)