def setup_model(self): self.model = NLVR2Model() # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) # GPU options if args.multiGPU: self.model.lxrt_encoder.multi_gpu() self.model = self.model.cuda()
def __init__(self): self.train_tuple = get_tuple( args.train, bs=args.batch_size, shuffle=True, drop_last=True ) if args.valid != "": valid_bsize = 2048 if args.multiGPU else 512 self.valid_tuple = get_tuple( args.valid, bs=valid_bsize, shuffle=False, drop_last=False ) else: self.valid_tuple = None self.momentum = 0.9995 self.model = NLVR2Model() self.siam_model = copy.deepcopy(self.model) # Load pre-trained weights if args.load_lxmert is not None: self.model.lxrt_encoder.load(args.load_lxmert) self.siam_model.lxrt_encoder.load(args.load_lxmert) if args.load_lxmert_qa is not None: load_lxmert_qa(args.load_lxmert_qa, self.model, label2ans=self.train_tuple.dataset.label2ans) load_lxmert_qa(args.load_lxmert_qa, self.siam_model, label2ans=self.train_tuple.dataset.label2ans) # GPU options if args.multiGPU: self.model.lxrt_encoder.multi_gpu() self.siam_model.lxrt_encoder.multi_gpu() self.model = self.model.cuda() self.siam_model = self.siam_model.cuda() # Losses and optimizer self.mce_loss = nn.CrossEntropyLoss(ignore_index=-1) if 'bert' in args.optim: batch_per_epoch = len(self.train_tuple.loader) t_total = int(batch_per_epoch * args.epochs) print("Total Iters: %d" % t_total) from lxrt.optimization import BertAdam self.optim = BertAdam(list(self.model.parameters()), lr=args.lr, warmup=0.1, t_total=t_total) else: self.optim = args.optimizer(list(self.model.parameters()), args.lr) self.output = args.output os.makedirs(self.output, exist_ok=True)
def __init__(self, args, train_loader=None, val_loader=None, logger=None, train=True): self.args = args self.max_text_length = args.max_text_length self.train_loader = train_loader self.val_loader = val_loader self.logger = logger self.model = NLVR2Model.from_pretrained( "bert-base-uncased", args=args, ) self.verbose = True if self.args.distributed: if self.args.gpu != 0: self.verbose = False # GPU Options print(f'Model Launching at GPU {self.args.gpu}') from time import time start = time() self.model.cuda(args.gpu) # Load Checkpoint self.start_epoch = None if args.load is not None: path = args.load + '.pth' self.load(path, verbose=self.verbose) elif args.load_lxmert is not None: path = args.load_lxmert + '_LXRT.pth' self.load(path, verbose=self.verbose) # GPU Options print(f'Model Launching at GPU {self.args.gpu}') from time import time start = time() self.model.cuda(args.gpu) # Optimizer if train: self.optim, self.lr_scheduler = self.create_optimizer_and_scheduler( ) self.mce_loss = nn.CrossEntropyLoss() if args.multiGPU: assert args.distributed self.model = DDP(self.model, device_ids=[args.gpu], find_unused_parameters=True) if args.gpu == 0: print(f'It took {time() - start:.1f}s') self.output = args.output os.makedirs(self.output, exist_ok=True)