def __init__(self): self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") self.model = BertForQuestionAnswering.from_pretrained("bert-base-uncased") train_dir = os.path.join("./save", "qa") self.save_dir = os.path.join(train_dir, "train_%d" % int(time.strftime("%m%d%H%M%S"))) if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) # read data-set and prepare iterator self.train_loader = self.get_data_loader("./squad/train-v1.1.json") self.dev_loader = self.get_data_loader("./squad/new_dev-v1.1.json") num_train_optimization_steps = len(self.train_loader) * config.num_epochs # optimizer param_optimizer = list(self.model.named_parameters()) # hack to remove pooler, which is not used # thus it produce None grad that break apex param_optimizer = [n for n in param_optimizer if "pooler" not in n[0]] no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] self.qa_opt = BertAdam(optimizer_grouped_parameters, lr=config.qa_lr, warmup=config.warmup_proportion, t_total=num_train_optimization_steps) # self.qg_lr = config.lr # assign model to device self.model = self.model.to(config.device)
def forward(self, # type: ignore input_ids: torch.Tensor, token_type_ids: torch.Tensor, attention_mask: torch.Tensor, tokens: List[str], document_tokens: List[str], token_to_original_map: Dict[int, int], token_is_max_context: Dict[int, bool]) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ if not self._loaded_qa_weights and self.training: self.bert_qa_model = HuggingFaceBertQA.from_pretrained(self._pretrained_archive_path) self._loaded_qa_weights = True start_logits, end_logits = self.bert_qa_model(torch.stack(input_ids), torch.stack(token_type_ids), torch.stack(attention_mask)) output_dict = {"start_logits": start_logits, "end_logits": end_logits, "tokens": tokens, "document_tokens": document_tokens, "token_to_original_map": token_to_original_map, "token_is_max_context": token_is_max_context} if self.training: loss = torch.sum(start_logits) * 0.0 output_dict["loss"] = loss return output_dict
def make_model_env(self, gpu, ngpus_per_node): if self.args.distributed: self.args.gpu = self.args.devices[gpu] else: self.args.gpu = 0 if self.args.use_cuda and self.args.distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes self.args.rank = self.args.rank * ngpus_per_node + gpu dist.init_process_group(backend=self.args.dist_backend, init_method=self.args.dist_url, world_size=self.args.world_size, rank=self.args.rank) # Load baseline model self.model = BertForQuestionAnswering.from_pretrained( self.args.bert_model) if self.args.load_model is not None: print("Loading model from ", self.args.load_model) self.model.load_state_dict( torch.load(self.args.load_model, map_location=lambda storage, loc: storage)) # max data size among all the train datasets max_len = max([len(f) for f in self.features_lst]) # max steps num_train_optimization_steps = math.ceil( max_len / self.args.batch_size) * self.args.epochs * len( self.features_lst) # freeze the parts of bert model if self.args.freeze_bert: for param in self.model.bert.parameters(): param.requires_grad = False self.optimizer = get_opt(list(self.model.named_parameters()), num_train_optimization_steps, self.args) if self.args.use_cuda: if self.args.distributed: torch.cuda.set_device(self.args.gpu) self.model.cuda(self.args.gpu) self.args.batch_size = int(self.args.batch_size / ngpus_per_node) self.args.workers = int( (self.args.workers + ngpus_per_node - 1) / ngpus_per_node) self.model = DistributedDataParallel( self.model, device_ids=[self.args.gpu], find_unused_parameters=True) else: self.model.cuda() self.model = DataParallel(self.model, device_ids=self.args.devices) cudnn.benchmark = True
def __init__(self, dir_path, max_seq_length=100): self.max_seq_length = max_seq_length self.processor = MRCProcessor() self.processor.log = False self.tokenizer = BertTokenizer.from_pretrained(dir_path) self.model = BertForQuestionAnswering.from_pretrained(dir_path) self.model.eval() global debug_message debug_message = False
def __init__(self, qa_model_path, ca2q_model_path, c2q_model_path, c2a_model_path): super(DualNet, self).__init__() self.qa_model = BertForQuestionAnswering.from_pretrained(qa_model_path) self.ca2q_model = Seq2seq(dropout=0.0, embedding=None, use_tag=True, model_path=ca2q_model_path) self.c2q_model = Seq2seq(dropout=0.0, embedding=None, use_tag=False, model_path=c2q_model_path) self.c2a_model = AnswerSelector(dropout=0.0, embedding=None, model_path=c2a_model_path) # freeze pre-trained c2q and c2a models self.c2q_model.requires_grad = False self.c2a_model.requires_grad = False
os.chdir('/Users/davidbressler/pythonstuff/pytorch-pretrained-BERT/examples') from pytorch_pretrained_bert import BertTokenizer, BertForQuestionAnswering from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler import run_squad # Load pre-trained model tokenizer (vocabulary) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',do_lower_case=True) #set device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Load pre-trained model (weights) model_state_dict = torch.load('/data/squad/pytorch_model.bin') model = BertForQuestionAnswering.from_pretrained('bert-base-uncased',state_dict=model_state_dict) model.to(device) #DO I NEED TO DO ANYTHING WITH THIS? model.eval() #inputs #document='Born in Seattle, Washington, Hendrix began playing guitar at the age of 15.' #query='What did Hendrix play?' #query='Where was Hendrix born?' #query='How old was Hendrix when he began playing guitar?' #query='How old was Hendrix when he began playing music?' #query='Where is the birthplace of Hendrix?' document='The University of Chicago (UChicago, Chicago, or U of C) is a private research university in Chicago. The university, established in 1890, consists of The College, various graduate programs, interdisciplinary committees organized into four academic research divisions and seven professional schools. Beyond the arts and sciences, Chicago is also well known for its professional schools, which include the Pritzker School of Medicine, the University of Chicago Booth School of Business, the Law School, the School of Social Service Administration, the Harris School of Public Policy Studies, the Graham School of Continuing Liberal and Professional Studies and the Divinity School. The university currently enrolls approximately 5,000 students in the College and around 15,000 students overall.' query='What kind of university is the University of Chicago?' #document='Nikola Tesla (Serbian Cyrillic: Никола Тесла; 10 July 1856 – 7 January 1943) was a Serbian American inventor, electrical engineer, mechanical engineer, physicist, and futurist best known for his contributions to the design of the modern alternating current (AC) electricity supply system.' #query='In what year was Nikola Tesla born?' #query='What was Nikola Tesla s ethnicity?'
eval_features = convert_examples_to_features(eval_examples, tokenizer=tokenizer, max_seq_length=config.max_seq_len, max_query_length=config.max_query_len, doc_stride=128, is_training=False) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0)) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=8) model = BertForQuestionAnswering.from_pretrained("./save/dual/train_507200353/bert_1_2.958") model = model.to(config.device) device = "cuda:2" model.eval() all_results = [] for data in eval_dataloader: input_ids, input_mask, segment_ids, example_indices = data input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) with torch.no_grad(): batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask) for i, example_index in enumerate(example_indices): start_logits = batch_start_logits[i].detach().cpu().tolist() end_logits = batch_end_logits[i].detach().cpu().tolist() eval_feature = eval_features[example_index.item()]
# import isin_where binary models isinwhere_model = BertForSequenceClassification.from_pretrained("bert-large-uncased", num_labels=2) # load test model parameters isinwhere_path = F"/content/drive/My Drive/HydraNet/RetrainModels/IsInWhereClause/retrain_isinwhereclause_classifier_epoch_3.pt" isinwhere_model.load_state_dict(torch.load(isinwhere_path)) # isinwhere_model.to(test_device) # import where operator multi-class models whereoperator_model = BertForSequenceClassification.from_pretrained("bert-large-uncased", num_labels=3) # load test model parameters whereoperator_path = F"/content/drive/My Drive/HydraNet/RetrainModels/ConditionOperator/retrain_condition_operator_classifier_epoch_3.pt" whereoperator_model.load_state_dict(torch.load(whereoperator_path)) # whereoperator_model.to(test_device) # import where value question-answering models wherevalue_model = BertForQuestionAnswering.from_pretrained("bert-large-uncased") # load test model parameters wherevalue_path = F"/content/drive/My Drive/HydraNet/RetrainModels/WhereValue/retrain_where_value_model_epoch_3.pt" wherevalue_model.load_state_dict(torch.load(wherevalue_path)) # wherevalue_model.to(test_device) # Input questions and table headers as well as table type input_question = 'what is the total revenue for apple in canada when the profit is more than 300' input_columns = ['Region', 'Fruit', 'Amount (kilo) weight', 'Salesperson', 'Customer Type', 'Revenue (dollar)', 'Profit (dollar)'] input_types = ['text', 'text', 'real', 'text', 'text', 'real', 'real'] MAX_LEN = 64 tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True) aggregation_operator_list = ["NAN", "max", "min", "nunique", "sum", "mean"] condition_operator_list = ["=", ">", "<"] table_name = 'Fruits'
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--data_dir", default=None, type=str, required=True, help="The input data dir.", ) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, help= "Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--model_type", default=None, type=str, required=True, help="Type of model to train.", ) parser.add_argument( "--model_save_name", default=None, type=str, required=True, help= "Path to pretrained model or model identifier from huggingface.co/models", ) parser.add_argument( "--train_setting", default='relaxed', type=str, required=False, help= "Whether to train in strict setting or relaxed setting. Options: strict or relaxed", ) parser.add_argument( "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model.") parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run the model on the dev set.") parser.add_argument("--do_test", action="store_true", help="Whether to run the model on the test set.") parser.add_argument("--evaluate_during_training", action="store_true", help="Whether to evaluate during training.") parser.add_argument("--multi_task", action="store_true", help="Multi-task learning flag.") parser.add_argument("--train_batch_size", default=20, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--train_epochs", default=5, type=int, help="Training epochs.") parser.add_argument("--GRAD_ACC", default=1, type=int, help="Gradient accumulation steps.") parser.add_argument("--eval_batch_size", default=20, type=int, help="Batch size per GPU/CPU for evaluation/testing.") parser.add_argument("--lr", default=2e-5, type=float, help="Learning rate.") parser.add_argument("--auxiliary_task_wt", default=0.3, type=float, help="Weight for the auxiliary task.") parser.add_argument("--weight_decay", default=1e-4, type=float, help="Weight decay.") parser.add_argument("--warmup_proportion", default=0.1, type=float, help="Warmup proportion.") parser.add_argument("--gpu", default=0, type=int, help="which GPU is to be used for training.") args = parser.parse_args() data = pickle.load(open(args.data_dir, 'rb')) selected_sem_types = pickle.load(open('../data/selected_ents.pkl', 'rb')) print('Selected semantic types: ', selected_sem_types) if args.train_setting == 'strict': data = data['strict_split'] else: data = data['split'] entity2id = utils.prepare_entities_to_ix(selected_sem_types) logical2ix = utils.prepare_logical_forms_to_ix(data['train']) shuffle(data['train']) shuffle(data['dev']) shuffle(data['test']) print(entity2id) model_config = { 'label_size': 2, 'num_entities': len(selected_sem_types) + 1, 'entity_dim': 100, 'lr': args.lr, 'weight_decay': args.weight_decay, 'batch_size': args.train_batch_size, 'data_path': args.data_dir, 'model_name': args.model_save_name, 'bert_model': args.model_name_or_path, 'do_lower_case': True, 'gradient_accumulation_steps': args.GRAD_ACC } if args.model_type == 'ernie': from knowledge_bert import modeling from knowledge_bert import BertTokenizer from knowledge_bert.optimization import BertAdam tokenizer = BertTokenizer.from_pretrained( model_config['bert_model'], do_lower_case=model_config['do_lower_case']) model, _ = modeling.BertForQuestionAnsweringEmrQA.from_pretrained( model_config['bert_model'], num_entities=model_config['num_entities']) elif args.model_type == 'bert': from pytorch_pretrained_bert import BertTokenizer, BertForQuestionAnswering from pytorch_pretrained_bert.optimization import BertAdam tokenizer = BertTokenizer.from_pretrained( model_config['bert_model'], do_lower_case=model_config['do_lower_case']) model = BertForQuestionAnswering.from_pretrained( model_config['bert_model']) num_train_optimization_steps = len( data['train'] ) // model_config['gradient_accumulation_steps'] * args.train_epochs # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=model_config['lr'], warmup=args.warmup_proportion, t_total=num_train_optimization_steps) if args.do_train: model_trained = train(args, model=model, optimizer=optimizer, tokenizer=tokenizer, model_config=model_config, data=data, entity2id=entity2id, logical2ix=logical2ix) # The start and end accuracy are just proxies, actual accuracy would be calculated from the pickle dump using the script of SQuAD evaluate: https://rajpurkar.github.io/SQuAD-explorer/ ##### Evaluate the model if do_eval flag is on if args.do_eval: if args.model_type == 'ernie': if args.multi_task: device = torch.device("cuda:" + str(args.gpu)) dev_vals = eval_plot.evaluate_bert_emrqa_ernie_multitask( model_trained, data['dev'], args.eval_batch_size, tokenizer, entity2id, logical2ix, device) else: dev_vals = eval_plot.evaluate_bert_emrqa_ernie( model_trained, data['dev'], args.eval_batch_size, tokenizer, entity2id, logical2ix) elif args.model_type == 'bert': dev_vals = eval_plot.evaluate_bert_emrqa(model_trained, data['dev'], args.eval_batch_size, tokenizer) dict_ = { 'start_accuracy': dev_vals[0], 'end_accuracy': dev_vals[1], 'actual_and_predicted_values': dev_vals[2] } file_name = '../results/' + model_config[ 'model_name'] + '_dev_results.pkl' pickle.dump(dict_, open(file_name, 'wb')) ##### Test the model if args.do_test: if args.model_type == 'ernie': if args.multi_task: device = torch.device("cuda:" + str(args.gpu)) test_vals = eval_plot.evaluate_bert_emrqa_ernie_multitask( model_trained, data['test'], args.eval_batch_size, tokenizer, entity2id, logical2ix, device) else: test_vals = eval_plot.evaluate_bert_emrqa_ernie( model_trained, data['test'], args.eval_batch_size, tokenizer, entity2id, logical2ix) elif args.model_type == 'bert': test_vals = eval_plot.evaluate_bert_emrqa(model_trained, data['dev'], args.eval_batch_size, tokenizer) dict_ = { 'start_accuracy': test_vals[0], 'end_accuracy': test_vals[1], 'actual_and_predicted_values': test_vals[2] } file_name = '../results/' + model_config[ 'model_name'] + '_test_results.pkl' pickle.dump(dict_, open(file_name, 'wb'))
for i, example_index in enumerate(example_indices): start_logits = batch_start_logits[i].detach().cpu().tolist() end_logits = batch_end_logits[i].detach().cpu().tolist() eval_feature = eval_features[example_index.item()] unique_id = int(eval_feature.unique_id) all_results.append(RawResult(unique_id=unique_id, start_logits=start_logits, end_logits=end_logits)) preds = write_predictions(eval_examples, eval_features, all_results, 5, 100, 0.0) return preds if __name__ == '__main__': model = BertForQuestionAnswering.from_pretrained('squader') model.eval() tokenizer = BertTokenizer.from_pretrained('squader') context = """ Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend "Venite Ad Me Omnes". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.