def save_and_reload(self, path, model_name): torch.cuda.empty_cache() self.model.to('cpu') # Save a trained model model_to_save = self.model.module if hasattr(self.model, 'module') else self.model # Only save the model it-self output_model_file = os.path.join(path, "{}.bin".format(model_name)) torch.save(model_to_save.state_dict(), output_model_file) # Load a trained model that you have fine-tuned model_state_dict = torch.load(output_model_file) if self.multi_label: self.model = BertForMultiLabelSequenceClassification.from_pretrained(self.pretrained_model_path, num_labels = len(self.data.labels), state_dict=model_state_dict) else: self.model = BertForSequenceClassification.from_pretrained(self.pretrained_model_path, num_labels = len(self.data.labels), state_dict=model_state_dict) if self.is_fp16: self.model.half() torch.cuda.empty_cache() self.model.to(self.device) if self.multi_gpu == False: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError("Please install apex distributed and fp16 training.") self.model = DDP(self.model) else: self.model = torch.nn.DataParallel(self.model)
def prep_for_training(num_train_optimization_steps, _config): tokenizer = BertTokenizer.from_pretrained( _config["bert_model"], do_lower_case=_config["do_lower_case"]) # TODO:Change model here model = BertForSequenceClassification.from_pretrained( _config["bert_model"], cache_dir=_config["cache_dir"], num_labels=_config["num_labels"]) model.to(_config["device"]) # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=_config["learning_rate"], warmup=_config["warmup_proportion"], t_total=num_train_optimization_steps) return model, optimizer, tokenizer
def define_model(self): """defines the model, device and tokenizer Parameters ---------- """ max_len=64 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') def tokenize(sentence): return tokenizer.encode_plus( sentence, add_special_tokens = True, # add [CLS], [SEP] max_length = max_len, # max length of the text that can go to BERT pad_to_max_length = True, # add [PAD] tokens return_attention_mask = True, # add attention mask to not focus on pad tokens ) model = BertForSequenceClassification.from_pretrained(self.model_path,num_labels=3,cache_dir=None) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(self.device) model.eval() model.cuda() self.model=model self.tokenize=tokenize
def __init__(self, archive_file, model_file=None, use_cuda=False): if not os.path.isfile(archive_file): if not model_file: raise Exception("No model for DA-predictor is specified!") archive_file = cached_path(model_file) model_dir = os.path.dirname(os.path.abspath(__file__)) if not os.path.exists(os.path.join(model_dir, 'checkpoints')): archive = zipfile.ZipFile(archive_file, 'r') archive.extractall(model_dir) load_dir = os.path.join(model_dir, "checkpoints/predictor/save_step_23926") self.db = Database() if not os.path.exists(load_dir): archive = zipfile.ZipFile('{}.zip'.format(load_dir), 'r') archive.extractall(os.path.dirname(load_dir)) self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", do_lower_case=True) self.max_seq_length = 256 self.domain = 'restaurant' self.model = BertForSequenceClassification.from_pretrained( load_dir, cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(-1)), num_labels=44) self.device = 'cuda' if use_cuda else 'cpu' self.model.to(self.device)
def train(logger, args): os.makedirs(args.output_dir, exist_ok=True) processor = NLIProcessor() output_mode = "classification" # # Prepare inputs label_list = processor.get_labels() num_labels = len(label_list) tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=False) train_examples = processor.get_train_examples(args.data_dir, args.train_rte_file) train_examples = train_examples[0:10] # debugging num_eg = len(train_examples) train_features = convert_examples_to_features(train_examples, label_list, args.max_seq_length, tokenizer, output_mode) # # Prepare model cache_dir = (args.cache_dir if args.cache_dir else os.path.join( str(PYTORCH_PRETRAINED_BERT_CACHE), f"distributed_{args.local_rank}")) model = BertForSequenceClassification.from_pretrained( args.bert_model, cache_dir=cache_dir, num_labels=num_labels) # # Train trainer = Trainer(module="nli", model=model, args=args, tokenizer=tokenizer) trainer.train(train_features, num_labels, num_eg)
def get_test_model(output_model_file) -> BertForSequenceClassification: # output_model_file = os.path.join('save', "finetuned_pytorch_model.bin") device = torch.device("cuda" if torch.cuda.is_available() and not args["no_cuda"] else "cpu") model_state_dict = torch.load(output_model_file) test_model = BertForSequenceClassification.from_pretrained(args['bert_model'], num_labels=40, state_dict=model_state_dict) return test_model.to(device).eval()
def __init__(self, label_list, ren, norm_fn, device): self._label_list = label_list self._ren = ren self._device = device self._tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True) self._model = BertForSequenceClassification.from_pretrained( BERT_MODEL, num_labels=len(label_list)).to(device) self._optimizer = None self._dataset = {} self._data_loader = {} self._weights = None self._w_decay = None if norm_fn == 'linear': self._norm_fn = _linear_normalize elif norm_fn == 'softmax': self._norm_fn = _softmax_normalize if ren: assert norm_fn == 'linear'
def get_bert_binary_model() -> BertForSequenceClassification: bert_model = BertForSequenceClassification.from_pretrained( "bert-base-uncased", num_labels=2) if n_gpu > 1: bert_model = torch.nn.DataParallel(bert_model) return bert_model
def init_model(args): # 对模型输入进行处理的processor,git上可能都是针对英文的processor processors = {'mypro': MyPro, 'classify': ClassificationProcessor} if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: device = torch.device("cuda", args.local_rank) n_gpu = 1 torch.distributed.init_process_group(backend='nccl') if args.fp16: logger.info( "16-bits training currently not supported in distributed training" ) args.fp16 = False # (see https://github.com/pytorch/pytorch/pull/13496) logger.info("device %s n_gpu %d distributed training %r", device, n_gpu, bool(args.local_rank != -1)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) task_name = args.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() processor.get_train_examples(args.data_dir) label_list = processor.get_labels() tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) # Prepare model model = BertForSequenceClassification.from_pretrained( args.bert_model, cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(args.local_rank), num_labels=len(label_list)) if args.fp16: model.half() model.to(device) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank) elif n_gpu > 1: model = torch.nn.DataParallel(model) if not torch.cuda.is_available(): model.load_state_dict( torch.load(args.model_save_pth, map_location='cpu')['state_dict']) else: model.load_state_dict(torch.load(args.model_save_pth)['state_dict']) return model, processor, args, label_list, tokenizer, device
def load_saved_model(experiment_name, model_output_dir, num_labels, ): saved_model_path = os.path.join(model_output_dir, experiment_name) model_state_dict = torch.load(os.path.join(saved_model_path, WEIGHTS_NAME)) model = BertForSequenceClassification.from_pretrained(saved_model_path, num_labels=num_labels, state_dict=model_state_dict) return model
def load_raw_model_and_tokenizer(args): vocab_file_path = '{}/bert-large-uncased-vocab.txt'.format(args.cache_dir) tokenizer = BertTokenizer.from_pretrained(vocab_file_path, do_lower_case=args.do_lower_case) model_file_path = '{}/{}.tar.gz'.format(args.cache_dir, args.bert_model) model = BertForSequenceClassification.from_pretrained(model_file_path, num_labels=2) return model, tokenizer
def __init__(self): # configuration self.ROOT_FOLDER = os.path.dirname(__file__) print('ROOT_FOLDER', self.ROOT_FOLDER) DIRECTORIES = { 'ml_hate_speech_path': os.path.join(self.ROOT_FOLDER, 'models/ml_hate_speech_classifier') } # Load a trained model that you have fine-tuned self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.bert_model = 'bert-base-multilingual-uncased' self.model_file = os.path.join( DIRECTORIES['ml_hate_speech_path'], 'pytorch_model_epoch_20_seqlen_256.bin') # self.model_file = 'pytorch_model_epoch_20_seqlen_256.bin' print('model_file', self.model_file) print('model_dir', os.listdir(os.path.join(self.ROOT_FOLDER, 'models'))) print('model_dir_s', os.listdir(DIRECTORIES['ml_hate_speech_path'])) print(os.path.isfile(self.model_file)) if not os.path.isfile(self.model_file): print('Please Download the model ...') exit(0) # # model_download_file = "sh " + self.ROOT_FOLDER+'/model_download.sh' # os.system('ls /usr/src/app/project/') # print('model_download_file ',model_download_file, os.path.isfile(model_download_file)) # os.system(model_download_file) # print(self.model_file) if torch.cuda.is_available(): model_state_dict = torch.load(self.model_file) else: print('Loading model ...', self.model_file) model_state_dict = torch.load(self.model_file, map_location='cpu') tokenizer_file = DIRECTORIES[ 'ml_hate_speech_path'] + '/' + self.bert_model + '/vocab.txt' config_file = DIRECTORIES[ 'ml_hate_speech_path'] + '/' + self.bert_model + '/bert_config.json' bert_model_file = DIRECTORIES[ 'ml_hate_speech_path'] + '/' + self.bert_model + '/' #+'-pytorch_model.bin' self.tokenizer = BertTokenizer.from_pretrained(tokenizer_file) config = BertConfig.from_json_file(config_file) self.model = BertForSequenceClassification.from_pretrained( bert_model_file, state_dict=model_state_dict, num_labels=2) # self.model = BertForSequenceClassification.from_pretrained(self.bert_model, state_dict=model_state_dict, # num_labels=2) self.model.to(self.device)
def get_model(num_labels, device, n_gpu): model = BertForSequenceClassification.from_pretrained( config['bert_model'], cache_dir=config['cache_dir'], num_labels=num_labels) model.to(device) if n_gpu > 1: model = torch.nn.DataParallel(model) return model
def __init__(self): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") c_bert_model = "./tmp_chinese/mrpc_output/" raw_bert_model = "./models/chinese_L-12_H-768_A-12" num_labels = 2 self.tokenizer = BertTokenizer.from_pretrained(raw_bert_model) self.model = BertForSequenceClassification.from_pretrained( c_bert_model, num_labels=num_labels) self.model.to(device)
def boot_model(): processor = QqpProcessor() label_list = processor.get_labels() num_labels = len(label_list) model = BertForSequenceClassification.from_pretrained( load_dir, cache_dir=cache_dir, num_labels=num_labels) tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) return model, tokenizer, label_list, label_list
def get_argument_labels(topics=[], sentences=[]): tokenizer = BertTokenizer.from_pretrained(model_path, do_lower_case=True) if len(topics) == 0 or len( sentences) == 0 or len(topics) != len(sentences): input_examples = input_examples_init[:] else: input_examples = create_examples(topics, sentences) eval_features = convert_examples_to_features(input_examples, label_list, max_seq_length, tokenizer) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=eval_batch_size) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = BertForSequenceClassification.from_pretrained( model_path, num_labels=num_labels) model.to(device) model.eval() predicted_labels = [] with torch.no_grad(): for input_ids, input_mask, segment_ids in eval_dataloader: input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) logits = model(input_ids, segment_ids, input_mask) logits = logits.detach().cpu().numpy() for prediction in np.argmax(logits, axis=1): predicted_labels.append(label_list[prediction]) args_idx, no_args_idx = [], [] # print("Predicted labels:") for idx in range(len(input_examples)): example = input_examples[idx] if predicted_labels[idx] != 'NoArgument': args_idx.append(idx) else: no_args_idx.append(idx) # # print("Topic:", example.text_a) # print("Sentence:", example.text_b) # # print("Gold label:", example.label) # print("Predicted label:", predicted_labels[idx]) # print("") return args_idx, no_args_idx
def predict(new): model = BertForSequenceClassification.from_pretrained("./", num_labels=2) tokenizer = BertTokenizer.from_pretrained("./", do_lower_case=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") label_list = ["0", "1"] num_labels = len(label_list) if sys.version_info[0] == 2: new = list(unicode(cell, 'utf-8') for cell in new) example = InputExample(guid="test-0", text_a=new, text_b=None, label="1") eval_examples =[] eval_examples.append(example) eval_features = convert_examples_to_features( eval_examples, label_list, 128, tokenizer, "classification") all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=8) model.eval() eval_loss = 0 nb_eval_steps = 0 preds = [] for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating"): input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) label_ids = label_ids.to(device) with torch.no_grad(): logits = model(input_ids, segment_ids, input_mask, labels=None) nb_eval_steps += 1 if len(preds) == 0: preds.append(logits.detach().cpu().numpy()) else: preds[0] = np.append( preds[0], logits.detach().cpu().numpy(), axis=0) preds = preds[0] neg,pos = preds[0] prob_pos = pow(2,pos)/(1+pow(2,pos)) return prob_pos
def __init__( self, language=Language.ENGLISH, num_labels=2, cache_dir=".", use_distributed=False, ): """ Args: language: Language passed to pre-trained BERT model to pick the appropriate model num_labels: number of unique labels in train dataset cache_dir: cache_dir to load pre-trained BERT model. Defaults to "." """ if num_labels < 2: raise ValueError("Number of labels should be at least 2.") self.language = language self.num_labels = num_labels self.cache_dir = cache_dir self.use_distributed = use_distributed # create classifier self.model = BertForSequenceClassification.from_pretrained( language.value, cache_dir=cache_dir, num_labels=num_labels) # define optimizer and model parameters param_optimizer = list(self.model.named_parameters()) no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in param_optimizer if not any(nd in n for nd in no_decay) ], "weight_decay": 0.01, }, { "params": [ p for n, p in param_optimizer if any(nd in n for nd in no_decay) ] }, ] self.optimizer_params = optimizer_grouped_parameters self.name_parameters = self.model.named_parameters() self.state_dict = self.model.state_dict() if use_distributed: hvd.init() if torch.cuda.is_available(): torch.cuda.set_device(hvd.local_rank()) else: warnings.warn("No GPU available! Using CPU.")
def gcn_bert0(num_classes, t, pretrained=True, adj_file=None, in_channel=300): model = BertForSequenceClassification.from_pretrained( './bert-base-uncased', cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(-1), num_labels=2) return GCNBert(model, num_classes, t=t, adj_file=adj_file, in_channel=in_channel)
def train_bert(): project_dir = str(Path.cwd()) + "/finbert/finBERT" project_dir = str(Path.cwd()) + "/finbert/finBERT" pd.set_option('max_colwidth', -1) logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.ERROR) lm_path = project_dir / 'models' / 'language_model' / 'finbertTRC2' cl_path = project_dir / 'models' / 'classifier_model' / 'finbert-sentiment' cl_data_path = project_dir / 'data' / 'sentiment_data' # Clean the cl_path try: shutil.rmtree(cl_path) except: pass bertmodel = BertForSequenceClassification.from_pretrained(lm_path, cache_dir=None, num_labels=3) config = Config(data_dir=cl_data_path, bert_model=bertmodel, num_train_epochs=4, model_dir=cl_path, max_seq_length=48, train_batch_size=32, learning_rate=2e-5, output_mode='classification', warm_up_proportion=0.2, local_rank=-1, discriminate=True, gradual_unfreeze=True) print("Fine tuning BERT model to the financial domain!\n") finbert = FinBert(config) finbert.prepare_model(label_list=['positive', 'negative', 'neutral']) # Get the training examples train_data = finbert.get_data('train') model = finbert.create_the_model() trained_model = finbert.train(train_examples=train_data, model=model) test_data = finbert.get_data('test') results = finbert.evaluate(examples=test_data, model=trained_model) results['prediction'] = results.predictions.apply( lambda x: pd.np.argmax(x, axis=0)) report(results, cols=['labels', 'prediction', 'predictions'])
def __init__(self, args): self.model = BertForSequenceClassification.from_pretrained( args.output_dir, num_labels=args.num_labels) self.tokenizer = BertTokenizer.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) self.max_sentence_length = args.max_sentence_length print('Loaded model with fine-tuned weights') # Set to eval mode self.model.cuda() self.model.eval()
def bertForSequenceClassification(*args, **kwargs): """ BertForSequenceClassification is a fine-tuning model that includes BertModel and a sequence-level (sequence or pair of sequences) classifier on top of the BertModel. The sequence-level classifier is a linear layer that takes as input the last hidden state of the first character in the input sequence (see Figures 3a and 3b in the BERT paper). """ model = BertForSequenceClassification.from_pretrained(*args, **kwargs) return model
def __init__(self, model_dir, num_classes, seed=None): super().__init__() if seed is None: seed = torch.initial_seed() & ((1 << 63) - 1) self.logger.info("Using seed {}".format(seed)) torch.manual_seed(seed) self.model = BertForSequenceClassification.from_pretrained( model_dir, num_labels=num_classes) print(self.model)
def __init__(self, opt, emb_matrix=None): super().__init__() self.gcn_model = GCNRelationModel(opt, emb_matrix=emb_matrix) in_dim = opt['hidden_dim'] labelNum = constant.LABEL_TO_ID self.bert = BertForSequenceClassification.from_pretrained( opt["bert_model_file"], cache_dir=str(PYTORCH_PRETRAINED_BERT_CACHE), num_labels=opt['num_class']) self.classifier = nn.Linear(in_dim + 768, opt['num_class']) self.opt = opt self.init_embeddings()
def load_bert_model(model_path): data_root = '../data/toxic_comment/cleaned/' bert_tok = BertTokenizer.from_pretrained( "bert-base-uncased" ) train_df = pd.read_csv(data_root + 'train.csv') test_df = pd.read_csv(data_root + 'test.csv') # split 8:2 ratio train_df.fillna('no comment', inplace=True) test_df.fillna('no comment', inplace=True) train, val = train_test_split( train_df, shuffle=True, test_size=0.2, random_state=42 ) fastai_bert_vocab = Vocab(list(bert_tok.vocab.keys())) label_columns = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"] fastai_tokenizer = Tokenizer( tok_func=FastAiBertTokenizer( bert_tok, max_seq_len=256 ), pre_rules=[], post_rules=[] ) data_bunch_train = TextClasDataBunch.from_df( model_path, train, val, tokenizer=fastai_tokenizer, vocab=fastai_bert_vocab, include_bos=False, include_eos=False, text_cols="comment_text", label_cols=label_columns, bs=12, collate_fn=partial(pad_collate, pad_first=False, pad_idx=0), ) bert_model_class = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=6 ) loss_func = nn.BCEWithLogitsLoss() acc_02 = partial(accuracy_thresh, thresh=0.25) model = bert_model_class learner = Learner( data_bunch_train, model, loss_func=loss_func, model_dir='model/', metrics=acc_02 ) return learner
def _prepare_model(self) -> BertPreTrainedModel: if self.args.cache_dir: cache_dir = self.args.cache_dir else: cache_dir = os.path.join( str(PYTORCH_PRETRAINED_BERT_CACHE), f"distributed_{self.args.local_rank}", ) model = BertForSequenceClassification.from_pretrained( self.args.bert_model, cache_dir=cache_dir, num_labels=self.num_labels) model.to(self.device) return model
def __init__(self, cfg: Namespace, data: Dataset): """ Args: cfg: configuration data: train dataset """ self.cfg = cfg self.train, self.valid = data.split(0.8) RATING_FIELD.build_vocab(self.train) self.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # pylint: disable=no-member self.batch_size = cfg.batch_size if torch.cuda.is_available(): self.batch_size *= torch.cuda.device_count() self.trn_itr = BucketIterator( self.train, device=self.device, batch_size=self.batch_size, shuffle=True, train=True, sort_within_batch=True, sort_key=lambda exam: -len(exam.comment_text)) self.vld_itr = BucketIterator( self.valid, device=self.device, batch_size=self.batch_size, shuffle=False, train=False, sort_within_batch=True, sort_key=lambda exam: -len(exam.comment_text)) self.log_step = 1000 if len(self.vld_itr) < 100: self.log_step = 10 elif len(self.vld_itr) < 1000: self.log_step = 100 bert_path = cfg.bert_path if cfg.bert_path else 'bert-base-cased' self.model = BertForSequenceClassification.from_pretrained( bert_path, num_labels=2) pos_weight = ( len([exam for exam in self.train.examples if exam.target < 0.5]) / len([exam for exam in self.train.examples if exam.target >= 0.5])) pos_wgt_tensor = torch.tensor([1.0, pos_weight], device=self.device) # pylint: disable=not-callable self.criterion = nn.CrossEntropyLoss(weight=pos_wgt_tensor) if torch.cuda.is_available(): self.model = DataParallelModel(self.model.cuda()) self.criterion = DataParallelCriterion(self.criterion) self.optimizer = optim.Adam(self.model.parameters(), cfg.learning_rate)
def __init__(self, label_list, device): self._label_list = label_list self._device = device self._tokenizer = BertTokenizer.from_pretrained(BERT_MODEL, do_lower_case=True) self._model = BertForSequenceClassification.from_pretrained( BERT_MODEL, num_labels=len(label_list)).to(device) self._optimizer = None self._dataset = {} self._data_loader = {}
def save_train(model,tokenizer): # Save a trained model, configuration and tokenizer model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self # If we save using the predefined names, we can load using `from_pretrained` output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) output_config_file = os.path.join(args.output_dir, CONFIG_NAME) torch.save(model_to_save.state_dict(), output_model_file) model_to_save.config.to_json_file(output_config_file) tokenizer.save_vocabulary(args.output_dir) # Load a trained model and vocabulary that you have fine-tuned model = BertForSequenceClassification.from_pretrained(args.output_dir, num_labels=num_labels) tokenizer = BertTokenizer.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
def Bowen_load_model(self,model_path,use_cuda): """ model = torch.load(model_path) metadata = json.load(open(model_path[:-3] + ".meta","r",encoding="utf-8")) if use_cuda is not False: torch.cuda.set_device(use_cuda) model.cuda() metadata["device"] = use_cuda # 这里记载一下所在的gpu编号 return model, metadata """ model = BertForSequenceClassification.from_pretrained(model_path, num_labels=2).cuda(use_cuda) # metadata = json.load(open(model_path[:-1] + "2.meta", "r", encoding="utf-8")) metadata = {} tokenizer = GongwenTokenizer(model_path) return model, tokenizer, metadata