def _init_model(self, saved_model=None): # Load pre-trained BERT if saved_model: print("Loading the pre-trained model from: " + saved_model) # if loading on a cpu: model_state_dict = torch.load(saved_model, map_location='cpu') # model_state_dict = torch.load(saved_model) self._model = BertForSequenceClassification.from_pretrained( bert_model, state_dict=model_state_dict, num_labels=2) else: cache_dir = os.path.join( PYTORCH_PRETRAINED_BERT_CACHE, 'distributed_{}'.format(self._config["local_rank"])) self._model = BertForSequenceClassification.from_pretrained( self._config["bert_model"], cache_dir=cache_dir, num_labels=len(self._processor.get_labels())) if self._config["fp16"]: self._model.half() self._model.to(self._device) if self._config["local_rank"] != -1: model = torch.nn.parallel.DistributedDataParallel( self._model, device_ids=[self._config["local_rank"]], output_device=self._config["local_rank"]) elif self._n_gpu > 1: _model = torch.nn.DataParallel(self._model)
def get_model(): if model_state_dict: model = BertForSequenceClassification.from_pretrained( args['bert_model'], num_labels=num_labels, state_dict=model_state_dict) else: model = BertForSequenceClassification.from_pretrained( args['bert_model'], num_labels=num_labels) return model
def load_model(self, model_dir, model_config: str = "model_config.json"): model_config = os.path.join(model_dir, model_config) model_config = json.load(open(model_config)) output_config_file = os.path.join(model_dir, CONFIG_NAME) output_model_file = os.path.join(model_dir, WEIGHTS_NAME) config = BertConfig(output_config_file) model = BertForSequenceClassification(config, num_labels=model_config["num_labels"]) model.load_state_dict(torch.load(output_model_file, map_location='cpu')) tokenizer = BertTokenizer.from_pretrained(model_config["bert_model"], do_lower_case=model_config["do_lower"]) return model, tokenizer, model_config
def load_model_classification( model_path: Path, model_name: str, num_labels: int = 2 ) -> BertForSequenceClassification: model_path = Path(model_path) config = BertConfig(str(model_path / f"{model_name}-config.json")) model = BertForSequenceClassification(config, num_labels=num_labels) model.load_state_dict( torch.load(str(model_path / f"{model_name}-model.pt"), map_location=device) ) return model
def get_pretrained_model(model_path, num_labels, cache_dir, model_state_dict=None): if model_state_dict: model = BertForSequenceClassification.from_pretrained(model_path, num_labels=num_labels, state_dict=model_state_dict) else: model = BertForSequenceClassification.from_pretrained(model_path, cache_dir=cache_dir, num_labels=num_labels) return model
def __init__(self): self.label_list = [ "xuexi", "huodong", "xunwu", "chushou", "qiugou", "huzhu", "zhaopin" ] self.tokenizer = BertTokenizer.from_pretrained('bert-base-chinese', do_lower_case=True) filepath = pkg_resources.resource_filename( __name__, "../.FILE/checkpoints/bert_classification.pth") state_dict = torch.load(filepath, map_location=torch.device('cpu')) self.model = BertForSequenceClassification(BertConfig(21128), 7) self.model.load_state_dict(state_dict['state_dict'])
def load_model_and_tokenizer(args, best_path='best'): # Load a trained model and vocabulary that you have fine-tuned if best_path and os.path.exists(os.path.join(args.output_dir, best_path)): best_output_path = os.path.join(args.output_dir, best_path) model = BertForSequenceClassification.from_pretrained(best_output_path, num_labels=2) tokenizer = BertTokenizer.from_pretrained( best_output_path, do_lower_case=args.do_lower_case) else: model = BertForSequenceClassification.from_pretrained(args.output_dir, num_labels=2) tokenizer = BertTokenizer.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) return model, tokenizer
def init_model(config): cfg, cfg_data, cfg_model, cfg_optim = read_config(config) device, n_gpu = utils.get_device() utils.set_seeds(cfg.seed, n_gpu) train_batch_size = int(cfg_optim.train_batch_size / cfg_optim.gradient_accumulation_steps) processor = get_class(cfg.task.lower()) processor.get_train_examples(cfg.data_dir) label_list = processor.get_labels() tokenizer = BertTokenizer.from_pretrained(cfg.bert_model, do_lower_case=cfg.do_lower_case) # Prepare model model = BertForSequenceClassification.from_pretrained( cfg.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(-1), num_labels=len(label_list)) model.to(device) if not torch.cuda.is_available(): model.load_state_dict( torch.load(cfg.model_save_pth, map_location='cpu')['state_dict']) else: model.load_state_dict(torch.load(cfg.model_save_pth)['state_dict']) return model, processor, cfg_optim, label_list, tokenizer, device
def get(cls, num_labels, dir='./data/datasets/yelp_review_polarity_csv/', model_name='bert-base-uncased'): model_weights = BertForSequenceClassification.from_pretrained( model_name, num_labels=num_labels) param_optimizer = list(model_weights.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=1e-5) model_weights, _, _, _, _ = load_model_optimizer( model_weights, optimizer, dir) model = BertForMaskedLM.from_pretrained( pretrained_model_name_or_path=dir + 'finetuned_lm/') model.bert = model_weights.bert model.classifier = model_weights.classifier model.cuda() model.eval() return model
def init_weights(self): MODEL_DIR = "/models/intents" device_name = "cuda" if torch.cuda.is_available() else "cpu" print(device_name) self.device = torch.device(device_name) self.labelencoder = preprocessing.LabelEncoder() self.labelencoder.classes_ = np.load(os.path.join(MODEL_DIR, 'classes.npy')) config = BertConfig(os.path.join(MODEL_DIR, 'bert_config.json')) self.model = BertForSequenceClassification(config, num_labels=len(self.labelencoder.classes_)) self.model.load_state_dict(torch.load(os.path.join(MODEL_DIR, 'pytorch_model.bin'), map_location="cpu")) self.model.to(self.device) self.model.eval() tokenizer_class, pretrained_weights = BertTokenizer, 'bert-base-uncased' self.tokenizer = tokenizer_class.from_pretrained(pretrained_weights) self.batch_size = 30 self.dataloader_num_workers = 0
def get_test_model(output_model_file) -> BertForSequenceClassification: # output_model_file = os.path.join('save', "finetuned_pytorch_model.bin") device = torch.device("cuda" if torch.cuda.is_available() and not args["no_cuda"] else "cpu") model_state_dict = torch.load(output_model_file) test_model = BertForSequenceClassification.from_pretrained(args['bert_model'], num_labels=40, state_dict=model_state_dict) return test_model.to(device).eval()
def __init__(self, pretrained_dir, model="spanbert-base-cased"): assert os.path.exists( pretrained_dir ), "Pre-trained model folder does not exist: {}".format(pretrained_dir) self.seed = 42 self.max_seq_length = 128 self.batch_size = 32 self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.n_gpu = torch.cuda.device_count() self.fp16 = self.n_gpu > 0 self._set_seed() self.label2id = {label: i for i, label in enumerate(label_list)} self.id2label = {i: label for i, label in enumerate(label_list)} self.num_labels = len(label_list) #self.tokenizer = AutoTokenizer.from_pretrained("SpanBERT/spanbert-base-cased", do_lower_case=False) self.tokenizer = BertTokenizer.from_pretrained(model, do_lower_case=False) print("Loading pre-trained spanBERT from {}".format(pretrained_dir)) self.classifier = BertForSequenceClassification.from_pretrained( pretrained_dir, num_labels=self.num_labels) if self.fp16: self.classifier.half() self.classifier.to(self.device)
def __init__(self, label_list, ren, norm_fn, device): self._label_list = label_list self._ren = ren self._device = device self._tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True) self._model = BertForSequenceClassification.from_pretrained( BERT_MODEL, num_labels=len(label_list)).to(device) self._optimizer = None self._dataset = {} self._data_loader = {} self._weights = None self._w_decay = None if norm_fn == 'linear': self._norm_fn = _linear_normalize elif norm_fn == 'softmax': self._norm_fn = _softmax_normalize if ren: assert norm_fn == 'linear'
def prep_for_training(num_train_optimization_steps, _config): tokenizer = BertTokenizer.from_pretrained( _config["bert_model"], do_lower_case=_config["do_lower_case"]) # TODO:Change model here model = BertForSequenceClassification.from_pretrained( _config["bert_model"], cache_dir=_config["cache_dir"], num_labels=_config["num_labels"]) model.to(_config["device"]) # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=_config["learning_rate"], warmup=_config["warmup_proportion"], t_total=num_train_optimization_steps) return model, optimizer, tokenizer
def define_model(self): """defines the model, device and tokenizer Parameters ---------- """ max_len=64 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') def tokenize(sentence): return tokenizer.encode_plus( sentence, add_special_tokens = True, # add [CLS], [SEP] max_length = max_len, # max length of the text that can go to BERT pad_to_max_length = True, # add [PAD] tokens return_attention_mask = True, # add attention mask to not focus on pad tokens ) model = BertForSequenceClassification.from_pretrained(self.model_path,num_labels=3,cache_dir=None) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(self.device) model.eval() model.cuda() self.model=model self.tokenize=tokenize
def __init__(self, no_cuda, local_rank, task_name, output_dir, data_dir, bert_model, do_lower_case, max_seq_length, eval_batch_size, fp16): self.no_cuda = no_cuda self.local_rank = local_rank self.task_name = task_name self.output_dir = output_dir self.data_dir = data_dir self.bert_model = bert_model self.do_lower_case = do_lower_case self.max_seq_length = max_seq_length self.eval_batch_size = eval_batch_size self.fp16 = fp16 self.processor = processors.processor_for_task(self.task_name) self.label_list = self.processor.get_labels() self.num_labels = len(self.label_list) model_file = os.path.join(args.output_dir, "pytorch_model.bin") self.tokenizer = BertTokenizer.from_pretrained( self.bert_model, do_lower_case=self.do_lower_case) self.device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") self.n_gpu = torch.cuda.device_count() model_state_dict = torch.load( model_file, map_location='cpu') if args.no_cuda else torch.load(model_file) self.model = BertForSequenceClassification.from_pretrained( self.bert_model, state_dict=model_state_dict, num_labels=self.num_labels) self.model.to(self.device)
def load_bert_adapter(task_type, bert_model_name, bert_load_mode, bert_load_args, all_state, num_labels, bert_config_json_path): if bert_config_json_path is None: bert_config_json_path = os.path.join( get_bert_config_path(bert_model_name), "bert_config.json") if bert_load_mode in ["model_only_adapter"]: adapter_state = all_state elif bert_load_mode in ["state_adapter"]: adapter_state = all_state["model"] else: raise KeyError(bert_load_mode) # Format: "bert_model_path:{path}" # Very hackish bert_state = torch.load(bert_load_args.replace("bert_model_path:", "")) config = BertConfig.from_json_file(bert_config_json_path) if task_type == TaskType.CLASSIFICATION: model = BertForSequenceClassification(config, num_labels=num_labels) elif task_type == TaskType.REGRESSION: assert num_labels == 1 model = BertForSequenceRegression(config) else: raise KeyError(task_type) load_from_adapter( model=model, bert_state=bert_state, adapter_state=adapter_state, ) return model
def prepare_model(self): model_dir = self.args.resume_dir if self.args.resume_dir else self.args.bert_model cache_dir = self.args.cache_dir if self.args.cache_dir else os.path.join( str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format( self.args.local_rank)) self.model = BertForSequenceClassification.from_pretrained( model_dir, cache_dir=cache_dir, num_labels=self.num_labels) self.tokenizer = BertTokenizer.from_pretrained( model_dir, do_lower_case=self.args.do_lower_case) if self.args.fp16: self.model.half() print(self.device) self.model.to(self.device) if self.args.local_rank != -1: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) self.model = DDP(self.model) elif self.n_gpu > 1: self.model = torch.nn.DataParallel(self.model)
def train(logger, args): os.makedirs(args.output_dir, exist_ok=True) processor = NLIProcessor() output_mode = "classification" # # Prepare inputs label_list = processor.get_labels() num_labels = len(label_list) tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=False) train_examples = processor.get_train_examples(args.data_dir, args.train_rte_file) train_examples = train_examples[0:10] # debugging num_eg = len(train_examples) train_features = convert_examples_to_features(train_examples, label_list, args.max_seq_length, tokenizer, output_mode) # # Prepare model cache_dir = (args.cache_dir if args.cache_dir else os.path.join( str(PYTORCH_PRETRAINED_BERT_CACHE), f"distributed_{args.local_rank}")) model = BertForSequenceClassification.from_pretrained( args.bert_model, cache_dir=cache_dir, num_labels=num_labels) # # Train trainer = Trainer(module="nli", model=model, args=args, tokenizer=tokenizer) trainer.train(train_features, num_labels, num_eg)
def _init_evidence_classifier(self, weight_path): self.logger.warn('Loading StanceClassifier models...') # Load a trained model that you have fine-tuned model_dir = Path('./model') #model_dir = Path('./../fever-irnlp-master/src/pytorch-pretrained-BERT-master/examples/tmp/DEMO_stance') output_model_file = str( model_dir / weight_path) ## put pretrained weight from training num_labels = 2 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): model_state_dict = torch.load(output_model_file) else: model_state_dict = torch.load(output_model_file, map_location='cpu') model = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', state_dict=model_state_dict, num_labels=num_labels) model.to(device) model.eval() tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) return model, tokenizer, device
def save_and_reload(self, path, model_name): torch.cuda.empty_cache() self.model.to('cpu') # Save a trained model model_to_save = self.model.module if hasattr(self.model, 'module') else self.model # Only save the model it-self output_model_file = os.path.join(path, "{}.bin".format(model_name)) torch.save(model_to_save.state_dict(), output_model_file) # Load a trained model that you have fine-tuned model_state_dict = torch.load(output_model_file) if self.multi_label: self.model = BertForMultiLabelSequenceClassification.from_pretrained(self.pretrained_model_path, num_labels = len(self.data.labels), state_dict=model_state_dict) else: self.model = BertForSequenceClassification.from_pretrained(self.pretrained_model_path, num_labels = len(self.data.labels), state_dict=model_state_dict) if self.is_fp16: self.model.half() torch.cuda.empty_cache() self.model.to(self.device) if self.multi_gpu == False: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError("Please install apex distributed and fp16 training.") self.model = DDP(self.model) else: self.model = torch.nn.DataParallel(self.model)
def __init__(self, output_dir, model, topk=50, bert_model="bert-large-cased", do_lower_case=False, eval_batch_size=64, max_seq_length=128, num_labels=2, entail_label=1): print("Loading BERT NLIIR Model") self.name = "BertNLIIR" self.topk = topk device = torch.device("cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() tokenizer = BertTokenizer.from_pretrained(bert_model, do_lower_case=do_lower_case) output_model_file = os.path.join(output_dir, model) # Load a trained model that you have fine-tuned model_state_dict = torch.load(output_model_file) model = BertForSequenceClassification.from_pretrained( bert_model, state_dict=model_state_dict, num_labels=num_labels) model.half() model.to(device) model = torch.nn.DataParallel(model) self.model = model self.device = device self.tokenizer = tokenizer self.max_seq_length = max_seq_length self.eval_batch_size = eval_batch_size self.num_labels = num_labels self.entail_label = entail_label
def _create_net_and_optim(self, word_vocab, char_vocab, net_cfg, optim_cfg, train=True): #net = BCN(word_vocab, char_vocab, **net_cfg) #BertConfig net = BertForSequenceClassification.from_pretrained('bert-large-cased', num_labels=5) if train: net.to(device=self._device) optim = getattr(torch.optim, optim_cfg.algo) optim = optim(filter(lambda p: p.requires_grad, net.parameters()), **optim_cfg.kwargs) """ param_optimizer = list(net.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}] optimizer = BertAdam(optimizer_grouped_parameters, lr=1.0e-6) """ return net, optim
def __init__(self, bert_interface, model='bert-base-uncased', num_labels=2): self.model = BertForSequenceClassification.from_pretrained( model, num_labels) super().__init__(bert_interface, Path('.'))
def get_bert_binary_model() -> BertForSequenceClassification: bert_model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=2) if n_gpu > 1: bert_model = torch.nn.DataParallel(bert_model) return bert_model
def __init__(self, archive_file, model_file=None, use_cuda=False): if not os.path.isfile(archive_file): if not model_file: raise Exception("No model for DA-predictor is specified!") archive_file = cached_path(model_file) model_dir = os.path.dirname(os.path.abspath(__file__)) if not os.path.exists(os.path.join(model_dir, 'checkpoints')): archive = zipfile.ZipFile(archive_file, 'r') archive.extractall(model_dir) load_dir = os.path.join(model_dir, "checkpoints/predictor/save_step_15120") self.db = Database() if not os.path.exists(load_dir): archive = zipfile.ZipFile(f'{load_dir}.zip', 'r') archive.extractall(os.path.dirname(load_dir)) self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True) self.max_seq_length = 256 self.domain = 'restaurant' self.model = BertForSequenceClassification.from_pretrained( load_dir, cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(-1)), num_labels=44) self.device = 'cuda' if use_cuda else 'cpu' self.model.to(self.device)
def create_model(args, dataset, train=True): print("[*] Create model.") global model if train: model = BertForSequenceClassification.from_pretrained(BERT, num_labels=5) else: if BERT == 'bert-large-uncased': config = BertConfig.from_json_file("uncase_model") else: config = BertConfig.from_json_file("case_model") model = BertForSequenceClassification(config, num_labels=5) # for i in model.bert.named_parameters(): # i[1].requires_grad=False model = model.to(device) # print(model) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] if train: num_train_optimization_steps = int( len(dataset["train"]) / args.batch_size / args.gradient_accumulation_steps) * args.epochs global optimizer optimizer = BertAdam(optimizer_grouped_parameters, lr=args.lr_rate, warmup=0.1, t_total=num_train_optimization_steps) # optimizer = optim.Adam(model.parameters(), # lr=args.lr_rate) # , betas=(0.9, 0.999), weight_decay=1e-3) return
def init_model(args): # 对模型输入进行处理的processor,git上可能都是针对英文的processor processors = {'mypro': MyPro, 'classify': ClassificationProcessor} if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: device = torch.device("cuda", args.local_rank) n_gpu = 1 torch.distributed.init_process_group(backend='nccl') if args.fp16: logger.info( "16-bits training currently not supported in distributed training" ) args.fp16 = False # (see https://github.com/pytorch/pytorch/pull/13496) logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) task_name = args.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() processor.get_train_examples(args.data_dir) label_list = processor.get_labels() tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) # Prepare model model = BertForSequenceClassification.from_pretrained( args.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank), num_labels=len(label_list)) if args.fp16: model.half() model.to(device) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank) elif n_gpu > 1: model = torch.nn.DataParallel(model) if not torch.cuda.is_available(): model.load_state_dict( torch.load(args.model_save_pth, map_location='cpu')['state_dict']) else: model.load_state_dict(torch.load(args.model_save_pth)['state_dict']) return model, processor, args, label_list, tokenizer, device
def load_raw_model_and_tokenizer(args): vocab_file_path = '{}/bert-large-uncased-vocab.txt'.format(args.cache_dir) tokenizer = BertTokenizer.from_pretrained(vocab_file_path, do_lower_case=args.do_lower_case) model_file_path = '{}/{}.tar.gz'.format(args.cache_dir, args.bert_model) model = BertForSequenceClassification.from_pretrained(model_file_path, num_labels=2) return model, tokenizer
def load_bert(task_type, bert_model_name, bert_load_mode, all_state, num_labels, bert_config_json_path=None): if bert_config_json_path is None: bert_config_json_path = os.path.join( get_bert_config_path(bert_model_name), "bert_config.json") if bert_load_mode in ("model_only", "full_model_only"): state_dict = all_state elif bert_load_mode in [ "state_model_only", "state_all", "state_full_model" ]: state_dict = all_state["model"] else: raise KeyError(bert_load_mode) if task_type == TaskType.CLASSIFICATION: if bert_load_mode in ("state_full_model", "full_model_only"): model = BertForSequenceClassification.from_state_dict_full( config_file=bert_config_json_path, state_dict=state_dict, num_labels=num_labels, ) else: model = BertForSequenceClassification.from_state_dict( config_file=bert_config_json_path, state_dict=state_dict, num_labels=num_labels, ) elif task_type == TaskType.REGRESSION: assert num_labels == 1 if bert_load_mode in ("state_full_model", "full_model_only"): model = BertForSequenceRegression.from_state_dict_full( config_file=bert_config_json_path, state_dict=state_dict, ) else: model = BertForSequenceRegression.from_state_dict( config_file=bert_config_json_path, state_dict=state_dict, ) else: raise KeyError(task_type) return model