def load_model(config, num_train_steps, label_list): device = torch.device('cuda') n_gpu = config.n_gpu model = BertQueryNER(config, ) if config.n_gpu > 1: model = torch.nn.DataParallel(model) model.to(device) # prepare optimzier param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_grouped_parameters = [{ "params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], "weight_decay": 0.01 }, { "params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, eps=10e-8) sheduler = None return model, optimizer, sheduler, device, n_gpu
def load_model(config, num_train_steps, label_list, logger): device = torch.device("cuda") n_gpu = config.n_gpu model = BertQueryNER(config, train_steps=num_train_steps) model.to(device) if config.n_gpu > 1: model = torch.nn.DataParallel(model) # prepare optimzier param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ {"params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], "weight_decay": config.weight_decay}, {"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0}] optimizer = build_fp32_optimizer(config, optimizer_grouped_parameters,) scheduler = build_lr_scheduler(config, optimizer, num_train_steps) if config.fp16: try: from apex import amp except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") model, optimizer = amp.initialize(model, optimizer, opt_level=config.amp_level) # Distributed training (should be after apex fp16 initialization) if config.local_rank != -1 and config.data_parallel == "ddp": model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[config.local_rank], output_device=config.local_rank, find_unused_parameters=True ) return model, optimizer, scheduler, device, n_gpu
def load_model(config, num_train_steps, label_list): device = torch.device("cuda") n_gpu = config.n_gpu model = BertQueryNER(config, ) model.to(device) if config.n_gpu > 1: model = torch.nn.DataParallel(model) # prepare optimzier param_optimizer = list(model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight", 'gamma', 'beta'] optimizer_grouped_parameters = [ {"params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], "weight_decay": 0.01}, {"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0}] optimizer = AdamW(optimizer_grouped_parameters, lr=config.learning_rate, betas=(0.9, 0.98), eps=1e-6, weight_decay=0.01) # optimizer = BertAdam(optimizer_grouped_parameters, lr=config.learning_rate, warmup=config.warmup_proportion, # t_total=num_train_steps, max_grad_norm=config.clip_grad) sheduler = None if config.fp16: try: from apex import amp except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") model, optimizer = amp.initialize(model, optimizer, opt_level=config.amp_level) # Distributed training (should be after apex fp16 initialization) if config.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[config.local_rank], output_device=config.local_rank, find_unused_parameters=True ) return model, optimizer, sheduler, device, n_gpu
def load_model(config, label_list): device = torch.device("cuda") n_gpu = config.n_gpu model = BertQueryNER(config) checkpoint = torch.load(config.saved_model) model.load_state_dict(checkpoint) model.to(device) if config.n_gpu > 1: model = torch.nn.DataParallel(model) return model, device, n_gpu