def create_models(emb_dim, dropout=0.0): trunk = models.resnet18(pretrained=True) trunk_output_size = trunk.fc.in_features trunk.fc = Identity() model = nn.Sequential( nn.Dropout(p=dropout) if dropout > 0.0 else Identity(), nn.Linear(trunk_output_size, emb_dim), Normalize()) return trunk, model
def create_models(emb_dim, dropout=0.0): trunk = BoundingBoxTrunkModel() model = nn.Sequential( nn.Linear(trunk.output_size, emb_dim), Normalize(), nn.Dropout(p=dropout) if dropout > 0.0 else Identity()) model = BoundingBoxEmbedder(model) return trunk, model
def __init__(self, device): super().__init__(device) dropout = 0.0 emb_dim = 500 model_fn = os.path.join(os.path.dirname(__file__), "res18_0222.mdl") self.trunk = models.resnet18(pretrained=True) trunk_output_size = self.trunk.fc.in_features self.trunk.fc = Identity() self.model = nn.Sequential( nn.Dropout(p=dropout) if dropout > 0.0 else Identity(), nn.Linear(trunk_output_size, emb_dim), Normalize()) saved_models = torch.load(model_fn, map_location="cpu") self.trunk.load_state_dict(saved_models["trunk"]) self.model.load_state_dict(saved_models["embedder"]) self.comp_model = nn.Sequential(self.trunk, self.model) self.comp_model.to(device)
try: ckpt_name = os.path.basename(args.ckpt) if 'latest' in ckpt_name and 'iter' in ckpt: args.start_iter = ckpt["iter"] else: args.start_iter = int(os.path.splitext(ckpt_name)[0]) except ValueError: pass encoder.load_state_dict(ckpt["e"]) e_optim.load_state_dict(ckpt["e_optim"]) g1 = g1_ema = g1_optim = None if args.train_latent_mlp: if args.which_encoder == 'debug': from model import Identity g1 = Identity() g1_ema = Identity() else: from model import LatentMLP g1 = LatentMLP( args.latent, use_residual=args.use_residual_latent_mlp).to(device) g1_ema = LatentMLP( args.latent, use_residual=args.use_residual_latent_mlp).to(device) g1_ema.eval() accumulate(g1_ema, g1, 0) g1_optim = optim.Adam( g1.parameters(), lr=args.lr * 1, betas=(0**1, 0.99**1),
def __init__(self): super().__init__() self.trunk = models.resnet18(pretrained=True) self.output_size = self.trunk.fc.in_features self.trunk.fc = Identity()
def train_eval(args, train_data, dev_data): logger = logging.getLogger("main") # Create dataset & dataloader trans = [ transforms.Resize((224, 224)), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ] trans = transforms.Compose(trans) train_dataset, train_char_idx = \ create_dataset(args.root, train_data, trans) train_sampler = MetricBatchSampler(train_dataset, train_char_idx, n_max_per_char=args.n_max_per_char, n_batch_size=args.n_batch_size, n_random=args.n_random) train_dataloader = DataLoader(train_dataset, batch_sampler=train_sampler, collate_fn=collate_fn) # number of batches given to trainer n_batch = int(len(train_dataloader)) eval_train_dataloaders = \ prepare_evaluation_dataloaders(args, args.eval_split*3, train_data, trans) eval_dev_dataloaders = \ prepare_evaluation_dataloaders(args, args.eval_split, dev_data, trans) # Construct model & optimizer device = "cpu" if args.gpu < 0 else "cuda:{}".format(args.gpu) trunk = models.resnet18(pretrained=True) trunk_output_size = trunk.fc.in_features trunk.fc = Identity() trunk.to(device) model = nn.Sequential(nn.Linear(trunk_output_size, args.emb_dim), Normalize()) model.to(device) if args.optimizer == "SGD": trunk_optimizer = torch.optim.SGD(trunk.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) model_optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.decay) else: raise NotImplementedError loss_func = losses.TripletMarginLoss(margin=args.margin, normalize_embeddings=args.normalize) best_dev_eer = 1.0 i_epoch = 0 def end_of_epoch_hook(trainer): nonlocal i_epoch, best_dev_eer logger.info(f"EPOCH\t{i_epoch}") if i_epoch % args.eval_freq == 0: train_eer, train_eer_std = evaluate(args, trainer.models["trunk"], trainer.models["embedder"], eval_train_dataloaders) dev_eer, dev_eer_std = evaluate(args, trainer.models["trunk"], trainer.models["embedder"], eval_dev_dataloaders) logger.info("Eval EER (mean, std):\t{}\t{}".format( train_eer, train_eer_std)) logger.info("Eval EER (mean, std):\t{}\t{}".format( dev_eer, dev_eer_std)) if dev_eer < best_dev_eer: logger.info("New best model!") best_dev_eer = dev_eer i_epoch += 1 trainer = trainers.MetricLossOnly( models={ "trunk": trunk, "embedder": model }, optimizers={ "trunk_optimizer": trunk_optimizer, "embedder_optimizer": model_optimizer }, batch_size=None, loss_funcs={"metric_loss": loss_func}, mining_funcs={}, iterations_per_epoch=n_batch, dataset=train_dataset, data_device=None, loss_weights=None, sampler=train_sampler, collate_fn=collate_fn, lr_schedulers=None, #TODO: use warm-up, end_of_epoch_hook=end_of_epoch_hook, dataloader_num_workers=1) trainer.train(num_epochs=args.epoch) if args.save_model: torch.save(trainer.models, f"model/{args.suffix}.mdl") return best_dev_eer
# empty cuda cache logger.info('clearing torch cache...') torch.cuda.empty_cache() logger.info('torch cache cleared.') # clearing the python cache logger.info('clearing python cache...') gc.collect() logger.info('python cache cleared.') # ~~~~~~~~~~~~~~~~~~~~~ start training ~~~~~~~~~~~~~~~~~~~~~ # data_dir = opt['data dir'] lr = config['lr'] alpha = opt['alpha'] model = Identity() data = TrafficDataSet(data_dir, model) optim = Adam(model.parameters(), lr) # loss = TripletLoss(alpha) training(model, config, opt, data, optim, criteria, device) # clearing the cache of cuda if device.type != 'cpu': # empty cuda cache logger.info('clearing torch cache...') torch.cuda.empty_cache() logger.info('torch cache cleared.') # clearing the python cache logger.info('clearing python cache...')
input_sizes = input_percentages.mul_(int(inputs.size(3))).int() tensorboard_logger.add_image(inputs, input_sizes, targets, network=model) # add graph doesn't work if model is in gpu if freeze_conv: model.conv.requires_grad_(requires_grad=False) # Free batch norm layer to learn running average model.conv.seq_module[1].requires_grad_(requires_grad=True) model.conv.seq_module[4].requires_grad_(requires_grad=True) if freeze_rnns: model.rnns.requires_grad_(requires_grad=False) for i in range(1, len(model.rnns)): model.rnns[i].batch_norm.requires_grad_(requires_grad=True) if remove_bn_conv: model.conv.seq_module[1] = Identity() model.conv.seq_module[4] = Identity() if remove_bn_rnns: for i in range(1, len(model.rnns)): model.rnns[i].batch_norm = Identity() if remove_bn_fc: model.fc[0] = Identity() # import pdb; pdb.set_trace() model = model.to(device) parameters = model.parameters()