class BERT_PyTorch_SUT(): def __init__(self): print("Loading BERT configs...") with open("bert_config.json") as f: config_json = json.load(f) config = BertConfig( attention_probs_dropout_prob=config_json["attention_probs_dropout_prob"], hidden_act=config_json["hidden_act"], hidden_dropout_prob=config_json["hidden_dropout_prob"], hidden_size=config_json["hidden_size"], initializer_range=config_json["initializer_range"], intermediate_size=config_json["intermediate_size"], max_position_embeddings=config_json["max_position_embeddings"], num_attention_heads=config_json["num_attention_heads"], num_hidden_layers=config_json["num_hidden_layers"], type_vocab_size=config_json["type_vocab_size"], vocab_size=config_json["vocab_size"]) print("Loading PyTorch model...") self.model = BertForQuestionAnswering(config) self.model.eval() self.model.cuda() self.model.load_state_dict(torch.load("build/data/bert_tf_v1_1_large_fp32_384_v2/model.pytorch")) print("Constructing SUT...") self.sut = lg.ConstructSUT(self.issue_queries, self.flush_queries, self.process_latencies) print("Finished constructing SUT.") self.qsl = get_squad_QSL() def issue_queries(self, query_samples): with torch.no_grad(): for i in range(len(query_samples)): eval_features = self.qsl.get_features(query_samples[i].index) start_scores, end_scores = self.model.forward(input_ids=torch.LongTensor(eval_features.input_ids).unsqueeze(0).cuda(), attention_mask=torch.LongTensor(eval_features.input_mask).unsqueeze(0).cuda(), token_type_ids=torch.LongTensor(eval_features.segment_ids).unsqueeze(0).cuda()) output = torch.stack([start_scores, end_scores], axis=-1).squeeze(0).cpu().numpy() response_array = array.array("B", output.tobytes()) bi = response_array.buffer_info() response = lg.QuerySampleResponse(query_samples[i].id, bi[0], bi[1]) lg.QuerySamplesComplete([response]) def flush_queries(self): pass def process_latencies(self, latencies_ns): pass def __del__(self): print("Finished destroying SUT.")
def model_fn(model_dir): config_path = model_dir + '/config_file.json' model_path = model_dir + '/pytorch_model.bin' config = BertConfig.from_json_file(config_path) model = BertForQuestionAnswering(config) # Checks GPU state model.load_state_dict( torch.load(model_path, map_location=torch.device( 'cuda' if torch.cuda.is_available() else 'cpu'))) return model
def interaction(args): while True: qa_text = input("------------------\nPlease Enter :\n") if qa_text == 'exit': break # qa_text = '{ "sentence":"在下薩克森邦留下歷史印記的主要建築風格是文藝復興主義的一個分支[UNK][UNK]「威悉河文藝復興風格」。此外,漢諾瓦著名的海恩豪森王宮花園是歐洲巴洛克風格的典型代表。在歐斯納布魯克,人們可以找到很多古典主義和洛可可風格的建築物。這座城市的著名景點包括大教堂、威斯伐倫和約的簽署地市政廳、許多石雕和木桁架建築。下薩克森邦最大的巴洛克城堡[UNK][UNK]歐斯納布魯克城堡和最高的中世紀後哥德式建築[UNK][UNK]聖凱薩琳教堂也坐落在歐斯納布魯克。巴特伊堡的伊堡城堡和本篤會修道院在建築學和藝術史學上具有重要意義。19世紀以來,下薩克森邦造就了多位享有國際聲譽的藝術家,其中的代表性人物是畫家威廉•布施。", "question":"歐斯納布魯克有哪一座中世紀後哥德式建築是這類建築中最高的?"}' # qa_text = '{ "sentence":"蔡英文從小備受父母親、兄姐寵愛[26]。早期就讀臺北市私立雙連幼稚園[47],啟蒙教育完成後,便接受國民教育[29]。1963年,就讀臺北市中山區長安國民小學[48]。1966年,四年級的她轉學到新成立的臺北市中山區吉林國民小學[48]。1971年,她以臺北市立北安國民中學第一屆畢業生畢業[48]。高級中學時,就讀臺北市立中山女子高級中學[49],前立法院副院長、中國國民黨主席洪秀柱是大她八屆的學姐[50]。 ", "question":"誰是蔡英文總統的學姊?"}' # qa_test = '{ "sentence":"辛普森家庭是馬特·格朗寧為美國福斯廣播公司創作的一部成人動畫情景喜劇。該劇透過展現荷馬、美枝、霸子、花枝和奶嘴一家五口的日常生活,諷刺性地描繪了美國中產階級的生活方式。空間設定於虛構小鎮內糊的辛普森家庭,幽默地嘲諷了美國文化、社會、電視節目和人生百態。為了給製片人詹姆斯·L·布魯克斯製作一出動畫短劇,馬特·格朗寧構思出了辛普森一家人的形象。格朗寧用自己家族成員的名字逐一地給他們命名,而自己的名字則用「霸子」替代。1987年4月19日短劇成為了《特蕾西·厄爾曼秀》的一部分。在播映三季後,《辛普森家庭》得以轉正進入半小時的黃金時段,並成為了福克斯在早期達成的成功之一。", "question":"辛普森家庭是哪家公司的創作?"}' # qa_test = '{ "sentence":"海賊王的世界觀舞台是由世界各地的加盟國與所組成的國際組織「世界政府」所共同管理。然而,由於「海賊王」哥爾·D·羅傑被執行死刑後迎來了「大海賊時代」,結果海賊們於世界各地擴展權力,並直接與直屬世界政府的海軍作戰。本作是以島上的國家為單位,也有的島嶼只有村子、城鎮存在,大部分主要國家加入世界政府聯盟,並支持海軍討伐海賊。至於生活方式和科學技術,基本上是以現實世界海賊的「黃金時代」(17世紀到18世紀)為藍本,但是與現實世界而言還是擁有很大的差別,以作品中世界固有的獨特設定。惡魔果實服用後會依不同的果實而對應獲得不可思議的特殊能力,許多角色因其能力都擁有了超人般的戰鬥力。", "question":"在海賊王中如何得到超人般的戰鬥力?"}' qa_text = json.loads(qa_text) config = BertConfig.from_pretrained('bert-base-chinese') model = BertForQuestionAnswering(config) model.load_state_dict( torch.load('{}/model/best_model.bin'.format(args.load_model_path))) BertQA = BertQATrainer(args, model, None, None, None) BertQA.interaction(tokenizer, qa_text) pass
type=str, required=True, help="model para after pretrained") args = parser.parse_args() args.n_gpu = torch.cuda.device_count() args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu) device = torch.device( "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.device = device tokenizer = BertTokenizer.from_pretrained('bert-base-chinese', do_lower_case=False) config = BertConfig.from_pretrained('bert-base-chinese') model = BertForQuestionAnswering(config) model_state_dict = args.state_dict model.load_state_dict(torch.load(model_state_dict)) model.to(args.device) model.eval() input_file = args.predict_file def handle_file(input_file, context, question): orig_data = {"data": [{"paragraphs": [{"context": context, "qas": []}]}]} for i in range(len(question)): orig_data["data"][0]['paragraphs'][0]['qas'].append({ 'question': question[i], 'id': str(i) }) with open(input_file, "w", encoding='utf-8') as writer:
class TorchBertSQuADModel(TorchModel): """Bert-based on PyTorch model for SQuAD-like problem setting: It predicts start and end position of answer for given question and context. [CLS] token is used as no_answer. If model selects [CLS] token as most probable answer, it means that there is no answer in given context. Start and end position of answer are predicted by linear transformation of Bert outputs. Args: pretrained_bert: pretrained Bert checkpoint path or key title (e.g. "bert-base-uncased") attention_probs_keep_prob: keep_prob for Bert self-attention layers hidden_keep_prob: keep_prob for Bert hidden layers optimizer: optimizer name from `torch.optim` optimizer_parameters: dictionary with optimizer's parameters, e.g. {'lr': 0.1, 'weight_decay': 0.001, 'momentum': 0.9} bert_config_file: path to Bert configuration file, or None, if `pretrained_bert` is a string name learning_rate_drop_patience: how many validations with no improvements to wait learning_rate_drop_div: the divider of the learning rate after `learning_rate_drop_patience` unsuccessful validations load_before_drop: whether to load best model before dropping learning rate or not clip_norm: clip gradients by norm min_learning_rate: min value of learning rate if learning rate decay is used """ def __init__(self, pretrained_bert: str, attention_probs_keep_prob: Optional[float] = None, hidden_keep_prob: Optional[float] = None, optimizer: str = "AdamW", optimizer_parameters: dict = { "lr": 0.01, "weight_decay": 0.01, "betas": (0.9, 0.999), "eps": 1e-6 }, bert_config_file: Optional[str] = None, learning_rate_drop_patience: int = 20, learning_rate_drop_div: float = 2.0, load_before_drop: bool = True, clip_norm: Optional[float] = None, min_learning_rate: float = 1e-06, **kwargs) -> None: self.attention_probs_keep_prob = attention_probs_keep_prob self.hidden_keep_prob = hidden_keep_prob self.clip_norm = clip_norm self.pretrained_bert = pretrained_bert self.bert_config_file = bert_config_file super().__init__( optimizer=optimizer, optimizer_parameters=optimizer_parameters, learning_rate_drop_patience=learning_rate_drop_patience, learning_rate_drop_div=learning_rate_drop_div, load_before_drop=load_before_drop, min_learning_rate=min_learning_rate, **kwargs) def train_on_batch(self, features: List[InputFeatures], y_st: List[List[int]], y_end: List[List[int]]) -> Dict: """Train model on given batch. This method calls train_op using features and labels from y_st and y_end Args: features: batch of InputFeatures instances y_st: batch of lists of ground truth answer start positions y_end: batch of lists of ground truth answer end positions Returns: dict with loss and learning_rate values """ input_ids = [f.input_ids for f in features] input_masks = [f.attention_mask for f in features] input_type_ids = [f.token_type_ids for f in features] b_input_ids = torch.cat(input_ids, dim=0).to(self.device) b_input_masks = torch.cat(input_masks, dim=0).to(self.device) b_input_type_ids = torch.cat(input_type_ids, dim=0).to(self.device) y_st = [x[0] for x in y_st] y_end = [x[0] for x in y_end] b_y_st = torch.from_numpy(np.array(y_st)).to(self.device) b_y_end = torch.from_numpy(np.array(y_end)).to(self.device) self.optimizer.zero_grad() outputs = self.model(input_ids=b_input_ids, attention_mask=b_input_masks, token_type_ids=b_input_type_ids, start_positions=b_y_st, end_positions=b_y_end) loss = outputs[0] loss.backward() # Clip the norm of the gradients to 1.0. # This is to help prevent the "exploding gradients" problem. if self.clip_norm: torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip_norm) self.optimizer.step() if self.lr_scheduler is not None: self.lr_scheduler.step() return {'loss': loss.item()} def __call__( self, features: List[InputFeatures] ) -> Tuple[List[int], List[int], List[float], List[float]]: """get predictions using features as input Args: features: batch of InputFeatures instances Returns: predictions: start, end positions, start, end logits positions """ input_ids = [f.input_ids for f in features] input_masks = [f.attention_mask for f in features] input_type_ids = [f.token_type_ids for f in features] b_input_ids = torch.cat(input_ids, dim=0).to(self.device) b_input_masks = torch.cat(input_masks, dim=0).to(self.device) b_input_type_ids = torch.cat(input_type_ids, dim=0).to(self.device) with torch.no_grad(): # Forward pass, calculate logit predictions outputs = self.model(input_ids=b_input_ids, attention_mask=b_input_masks, token_type_ids=b_input_type_ids) logits_st, logits_end = outputs[:2] bs = b_input_ids.size()[0] seq_len = b_input_ids.size()[-1] mask = torch.cat([ torch.ones(bs, 1, dtype=torch.int32), torch.zeros(bs, seq_len - 1, dtype=torch.int32) ], dim=-1).to(self.device) logit_mask = b_input_type_ids + mask logits_st = softmax_mask(logits_st, logit_mask) logits_end = softmax_mask(logits_end, logit_mask) start_probs = torch.nn.functional.softmax(logits_st, dim=-1) end_probs = torch.nn.functional.softmax(logits_end, dim=-1) scores = torch.tensor(1) - start_probs[:, 0] * end_probs[:, 0] # ok outer = torch.matmul( start_probs.view(*start_probs.size(), 1), end_probs.view(end_probs.size()[0], 1, end_probs.size()[1])) outer_logits = torch.exp( logits_st.view(*logits_st.size(), 1) + logits_end.view(logits_end.size()[0], 1, logits_end.size()[1])) context_max_len = torch.max(torch.sum(b_input_type_ids, dim=1)).to(torch.int64) max_ans_length = torch.min( torch.tensor(20).to(self.device), context_max_len).to(torch.int64).item() outer = torch.triu(outer, diagonal=0) - torch.triu( outer, diagonal=outer.size()[1] - max_ans_length) outer_logits = torch.triu(outer_logits, diagonal=0) - torch.triu( outer_logits, diagonal=outer_logits.size()[1] - max_ans_length) start_pred = torch.argmax(torch.max(outer, dim=2)[0], dim=1) end_pred = torch.argmax(torch.max(outer, dim=1)[0], dim=1) logits = torch.max(torch.max(outer_logits, dim=2)[0], dim=1)[0] # Move logits and labels to CPU and to numpy arrays start_pred = start_pred.detach().cpu().numpy() end_pred = end_pred.detach().cpu().numpy() logits = logits.detach().cpu().numpy().tolist() scores = scores.detach().cpu().numpy().tolist() return start_pred, end_pred, logits, scores @overrides def load(self, fname=None): if fname is not None: self.load_path = fname if self.pretrained_bert and not Path(self.pretrained_bert).is_file(): self.model = BertForQuestionAnswering.from_pretrained( self.pretrained_bert, output_attentions=False, output_hidden_states=False) elif self.bert_config_file and Path(self.bert_config_file).is_file(): self.bert_config = BertConfig.from_json_file( str(expand_path(self.bert_config_file))) if self.attention_probs_keep_prob is not None: self.bert_config.attention_probs_dropout_prob = 1.0 - self.attention_probs_keep_prob if self.hidden_keep_prob is not None: self.bert_config.hidden_dropout_prob = 1.0 - self.hidden_keep_prob self.model = BertForQuestionAnswering(config=self.bert_config) else: raise ConfigError("No pre-trained BERT model is given.") self.model.to(self.device) self.optimizer = getattr(torch.optim, self.optimizer_name)( self.model.parameters(), **self.optimizer_parameters) if self.lr_scheduler_name is not None: self.lr_scheduler = getattr(torch.optim.lr_scheduler, self.lr_scheduler_name)( self.optimizer, **self.lr_scheduler_parameters) if self.load_path: logger.info(f"Load path {self.load_path} is given.") if isinstance(self.load_path, Path) and not self.load_path.parent.is_dir(): raise ConfigError("Provided load path is incorrect!") weights_path = Path(self.load_path.resolve()) weights_path = weights_path.with_suffix(f".pth.tar") if weights_path.exists(): logger.info(f"Load path {weights_path} exists.") logger.info( f"Initializing `{self.__class__.__name__}` from saved.") # now load the weights, optimizer from saved logger.info(f"Loading weights from {weights_path}.") checkpoint = torch.load(weights_path, map_location=self.device) self.model.load_state_dict(checkpoint["model_state_dict"]) self.optimizer.load_state_dict( checkpoint["optimizer_state_dict"]) self.epochs_done = checkpoint.get("epochs_done", 0) else: logger.info( f"Init from scratch. Load path {weights_path} does not exist." )
model = BertForQuestionAnswering.from_pretrained(BERT_MODEL_HUB_NAME) bert_config_obj = model.config model.eval() model.to(TORCH_DEVICE) else: print("Loading BERT config from {} ...".format(BERT_MODEL_CONFIG_PATH)) with open(BERT_MODEL_CONFIG_PATH) as bert_config_file: bert_config_dict = json.load(bert_config_file) bert_config_obj = BertConfig(**bert_config_dict) model = BertForQuestionAnswering(bert_config_obj) model.eval() model.to(TORCH_DEVICE) print("Loading BERT model weights from {} ...".format( BERT_MODEL_WEIGHTS_PATH)) model.load_state_dict(torch.load(BERT_MODEL_WEIGHTS_PATH)) print("Vocabulary size: {}".format(bert_config_obj.vocab_size)) print("Loading tokenized SQuAD dataset as features from {} ...".format( SQUAD_DATASET_TOKENIZED_PATH)) with open(SQUAD_DATASET_TOKENIZED_PATH, 'rb') as tokenized_features_file: eval_features = pickle.load(tokenized_features_file) print("Example width: {}".format(len(eval_features[0].input_ids))) TOTAL_EXAMPLES = len(eval_features) print("Total examples available: {}".format(TOTAL_EXAMPLES)) ## Processing by batches: #