예제 #1
0
 def create_bert_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
     model = BertForQuestionAnswering(config=config)
     model.eval()
     loss = model(input_ids, token_type_ids, input_mask, sequence_labels, sequence_labels)
     start_logits, end_logits = model(input_ids, token_type_ids, input_mask)
     outputs = {
         "loss": loss,
         "start_logits": start_logits,
         "end_logits": end_logits,
     }
     return outputs
예제 #2
0
 def forward(self,  # type: ignore
             input_ids: torch.Tensor,
             token_type_ids: torch.Tensor,
             attention_mask: torch.Tensor,
             tokens: List[str],
             document_tokens: List[str],
             token_to_original_map: Dict[int, int],
             token_is_max_context: Dict[int, bool]) -> Dict[str, torch.Tensor]:
     # pylint: disable=arguments-differ
     if not self._loaded_qa_weights and self.training:
         self.bert_qa_model = HuggingFaceBertQA.from_pretrained(self._pretrained_archive_path)
         self._loaded_qa_weights = True
     start_logits, end_logits = self.bert_qa_model(torch.stack(input_ids),
                                                   torch.stack(token_type_ids),
                                                   torch.stack(attention_mask))
     output_dict = {"start_logits": start_logits,
                    "end_logits": end_logits,
                    "tokens": tokens,
                    "document_tokens": document_tokens,
                    "token_to_original_map": token_to_original_map,
                    "token_is_max_context": token_is_max_context}
     if self.training:
         loss = torch.sum(start_logits) * 0.0
         output_dict["loss"] = loss
     return output_dict
예제 #3
0
    def __init__(self):
        self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
        self.model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")
        train_dir = os.path.join("./save", "qa")
        self.save_dir = os.path.join(train_dir, "train_%d" % int(time.strftime("%m%d%H%M%S")))
        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)
        # read data-set and prepare iterator
        self.train_loader = self.get_data_loader("./squad/train-v1.1.json")
        self.dev_loader = self.get_data_loader("./squad/new_dev-v1.1.json")

        num_train_optimization_steps = len(self.train_loader) * config.num_epochs
        # optimizer
        param_optimizer = list(self.model.named_parameters())
        # hack to remove pooler, which is not used
        # thus it produce None grad that break apex
        param_optimizer = [n for n in param_optimizer if "pooler" not in n[0]]
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ]
        self.qa_opt = BertAdam(optimizer_grouped_parameters,
                               lr=config.qa_lr,
                               warmup=config.warmup_proportion,
                               t_total=num_train_optimization_steps)

        # self.qg_lr = config.lr

        # assign model to device
        self.model = self.model.to(config.device)
예제 #4
0
파일: trainer.py 프로젝트: ankit-bagde/mrqa
    def make_model_env(self, gpu, ngpus_per_node):
        if self.args.distributed:
            self.args.gpu = self.args.devices[gpu]
        else:
            self.args.gpu = 0

        if self.args.use_cuda and self.args.distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            self.args.rank = self.args.rank * ngpus_per_node + gpu
            dist.init_process_group(backend=self.args.dist_backend,
                                    init_method=self.args.dist_url,
                                    world_size=self.args.world_size,
                                    rank=self.args.rank)

        # Load baseline model
        self.model = BertForQuestionAnswering.from_pretrained(
            self.args.bert_model)

        if self.args.load_model is not None:
            print("Loading model from ", self.args.load_model)
            self.model.load_state_dict(
                torch.load(self.args.load_model,
                           map_location=lambda storage, loc: storage))

        # max data size among all the train datasets
        max_len = max([len(f) for f in self.features_lst])

        # max steps
        num_train_optimization_steps = math.ceil(
            max_len / self.args.batch_size) * self.args.epochs * len(
                self.features_lst)

        # freeze the parts of bert model
        if self.args.freeze_bert:
            for param in self.model.bert.parameters():
                param.requires_grad = False

        self.optimizer = get_opt(list(self.model.named_parameters()),
                                 num_train_optimization_steps, self.args)

        if self.args.use_cuda:
            if self.args.distributed:
                torch.cuda.set_device(self.args.gpu)
                self.model.cuda(self.args.gpu)
                self.args.batch_size = int(self.args.batch_size /
                                           ngpus_per_node)
                self.args.workers = int(
                    (self.args.workers + ngpus_per_node - 1) / ngpus_per_node)
                self.model = DistributedDataParallel(
                    self.model,
                    device_ids=[self.args.gpu],
                    find_unused_parameters=True)
            else:
                self.model.cuda()
                self.model = DataParallel(self.model,
                                          device_ids=self.args.devices)

        cudnn.benchmark = True
예제 #5
0
파일: mrc.py 프로젝트: skywindy/carqabot
    def __init__(self, dir_path, max_seq_length=100):
        self.max_seq_length = max_seq_length
        self.processor = MRCProcessor()
        self.processor.log = False
        self.tokenizer = BertTokenizer.from_pretrained(dir_path)
        self.model = BertForQuestionAnswering.from_pretrained(dir_path)
        self.model.eval()

        global debug_message
        debug_message = False
예제 #6
0
    def __init__(self, qa_model_path, ca2q_model_path, c2q_model_path, c2a_model_path):
        super(DualNet, self).__init__()

        self.qa_model = BertForQuestionAnswering.from_pretrained(qa_model_path)

        self.ca2q_model = Seq2seq(dropout=0.0, embedding=None, use_tag=True,
                                  model_path=ca2q_model_path)
        self.c2q_model = Seq2seq(dropout=0.0, embedding=None, use_tag=False,
                                 model_path=c2q_model_path)
        self.c2a_model = AnswerSelector(dropout=0.0,
                                        embedding=None,
                                        model_path=c2a_model_path)

        # freeze pre-trained c2q and c2a models
        self.c2q_model.requires_grad = False
        self.c2a_model.requires_grad = False
예제 #7
0
os.chdir('/Users/davidbressler/pythonstuff/pytorch-pretrained-BERT/examples')

from pytorch_pretrained_bert import BertTokenizer, BertForQuestionAnswering
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import run_squad

# Load pre-trained model tokenizer (vocabulary)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',do_lower_case=True)

#set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained model (weights)
model_state_dict = torch.load('/data/squad/pytorch_model.bin')
model = BertForQuestionAnswering.from_pretrained('bert-base-uncased',state_dict=model_state_dict)
model.to(device) #DO I NEED TO DO ANYTHING WITH THIS?
model.eval()

#inputs
#document='Born in Seattle, Washington, Hendrix began playing guitar at the age of 15.'
#query='What did Hendrix play?'
#query='Where was Hendrix born?'
#query='How old was Hendrix when he began playing guitar?'
#query='How old was Hendrix when he began playing music?'
#query='Where is the birthplace of Hendrix?'
document='The University of Chicago (UChicago, Chicago, or U of C) is a private research university in Chicago. The university, established in 1890, consists of The College, various graduate programs, interdisciplinary committees organized into four academic research divisions and seven professional schools. Beyond the arts and sciences, Chicago is also well known for its professional schools, which include the Pritzker School of Medicine, the University of Chicago Booth School of Business, the Law School, the School of Social Service Administration, the Harris School of Public Policy Studies, the Graham School of Continuing Liberal and Professional Studies and the Divinity School. The university currently enrolls approximately 5,000 students in the College and around 15,000 students overall.'
query='What kind of university is the University of Chicago?'
#document='Nikola Tesla (Serbian Cyrillic: Никола Тесла; 10 July 1856 – 7 January 1943) was a Serbian American inventor, electrical engineer, mechanical engineer, physicist, and futurist best known for his contributions to the design of the modern alternating current (AC) electricity supply system.'
#query='In what year was Nikola Tesla born?'
#query='What was Nikola Tesla s ethnicity?'
예제 #8
0
from pytorch_pretrained_bert import BertTokenizer, BertForQuestionAnswering, BertConfig

config_file = "../config/bert_base_config.json"
vocab_file = "../config/vocab.txt"
config = BertConfig(config_file)
model = BertForQuestionAnswering(config)

tokenizer = BertTokenizer(vocab_file)
print(tokenizer.vocab["i"])

for k, v in model.state_dict().items():
    print(k)
예제 #9
0
eval_features = convert_examples_to_features(eval_examples,
                                             tokenizer=tokenizer,
                                             max_seq_length=config.max_seq_len,
                                             max_query_length=config.max_query_len,
                                             doc_stride=128,
                                             is_training=False)

all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
all_example_index = torch.arange(all_input_ids.size(0))
eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index)
eval_sampler = SequentialSampler(eval_data)
eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=8)

model = BertForQuestionAnswering.from_pretrained("./save/dual/train_507200353/bert_1_2.958")
model = model.to(config.device)
device = "cuda:2"
model.eval()
all_results = []
for data in eval_dataloader:
    input_ids, input_mask, segment_ids, example_indices = data
    input_ids = input_ids.to(device)
    input_mask = input_mask.to(device)
    segment_ids = segment_ids.to(device)
    with torch.no_grad():
        batch_start_logits, batch_end_logits = model(input_ids, segment_ids, input_mask)
    for i, example_index in enumerate(example_indices):
        start_logits = batch_start_logits[i].detach().cpu().tolist()
        end_logits = batch_end_logits[i].detach().cpu().tolist()
        eval_feature = eval_features[example_index.item()]
예제 #10
0
# import isin_where binary models
isinwhere_model = BertForSequenceClassification.from_pretrained("bert-large-uncased", num_labels=2)
# load test model parameters
isinwhere_path = F"/content/drive/My Drive/HydraNet/RetrainModels/IsInWhereClause/retrain_isinwhereclause_classifier_epoch_3.pt"
isinwhere_model.load_state_dict(torch.load(isinwhere_path))
# isinwhere_model.to(test_device)

# import where operator multi-class models
whereoperator_model = BertForSequenceClassification.from_pretrained("bert-large-uncased", num_labels=3)
# load test model parameters
whereoperator_path = F"/content/drive/My Drive/HydraNet/RetrainModels/ConditionOperator/retrain_condition_operator_classifier_epoch_3.pt"
whereoperator_model.load_state_dict(torch.load(whereoperator_path))
# whereoperator_model.to(test_device)

# import where value question-answering models
wherevalue_model = BertForQuestionAnswering.from_pretrained("bert-large-uncased")
# load test model parameters
wherevalue_path = F"/content/drive/My Drive/HydraNet/RetrainModels/WhereValue/retrain_where_value_model_epoch_3.pt"
wherevalue_model.load_state_dict(torch.load(wherevalue_path))
# wherevalue_model.to(test_device)

# Input questions and table headers as well as table type
input_question = 'what is the total revenue for apple in canada when the profit is more than 300'
input_columns = ['Region', 'Fruit', 'Amount (kilo) weight', 'Salesperson', 'Customer Type', 'Revenue (dollar)', 'Profit (dollar)']
input_types = ['text', 'text', 'real', 'text', 'text', 'real', 'real']
MAX_LEN = 64
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True)
aggregation_operator_list = ["NAN", "max", "min", "nunique", "sum", "mean"]
condition_operator_list = ["=", ">", "<"]
table_name = 'Fruits'
예제 #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help="The input data dir.",
    )
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help=
        "Path to pretrained model or model identifier from huggingface.co/models",
    )
    parser.add_argument(
        "--model_type",
        default=None,
        type=str,
        required=True,
        help="Type of model to train.",
    )
    parser.add_argument(
        "--model_save_name",
        default=None,
        type=str,
        required=True,
        help=
        "Path to pretrained model or model identifier from huggingface.co/models",
    )
    parser.add_argument(
        "--train_setting",
        default='relaxed',
        type=str,
        required=False,
        help=
        "Whether to train in strict setting or relaxed setting. Options: strict or relaxed",
    )
    parser.add_argument(
        "--do_lower_case",
        action="store_true",
        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action="store_true",
                        help="Whether to run the model on the dev set.")
    parser.add_argument("--do_test",
                        action="store_true",
                        help="Whether to run the model on the test set.")
    parser.add_argument("--evaluate_during_training",
                        action="store_true",
                        help="Whether to evaluate during training.")
    parser.add_argument("--multi_task",
                        action="store_true",
                        help="Multi-task learning flag.")

    parser.add_argument("--train_batch_size",
                        default=20,
                        type=int,
                        help="Batch size per GPU/CPU for training.")
    parser.add_argument("--train_epochs",
                        default=5,
                        type=int,
                        help="Training epochs.")
    parser.add_argument("--GRAD_ACC",
                        default=1,
                        type=int,
                        help="Gradient accumulation steps.")
    parser.add_argument("--eval_batch_size",
                        default=20,
                        type=int,
                        help="Batch size per GPU/CPU for evaluation/testing.")
    parser.add_argument("--lr",
                        default=2e-5,
                        type=float,
                        help="Learning rate.")
    parser.add_argument("--auxiliary_task_wt",
                        default=0.3,
                        type=float,
                        help="Weight for the auxiliary task.")
    parser.add_argument("--weight_decay",
                        default=1e-4,
                        type=float,
                        help="Weight decay.")
    parser.add_argument("--warmup_proportion",
                        default=0.1,
                        type=float,
                        help="Warmup proportion.")
    parser.add_argument("--gpu",
                        default=0,
                        type=int,
                        help="which GPU is to be used for training.")

    args = parser.parse_args()

    data = pickle.load(open(args.data_dir, 'rb'))
    selected_sem_types = pickle.load(open('../data/selected_ents.pkl', 'rb'))
    print('Selected semantic types: ', selected_sem_types)

    if args.train_setting == 'strict':
        data = data['strict_split']
    else:
        data = data['split']

    entity2id = utils.prepare_entities_to_ix(selected_sem_types)
    logical2ix = utils.prepare_logical_forms_to_ix(data['train'])

    shuffle(data['train'])
    shuffle(data['dev'])
    shuffle(data['test'])
    print(entity2id)

    model_config = {
        'label_size': 2,
        'num_entities': len(selected_sem_types) + 1,
        'entity_dim': 100,
        'lr': args.lr,
        'weight_decay': args.weight_decay,
        'batch_size': args.train_batch_size,
        'data_path': args.data_dir,
        'model_name': args.model_save_name,
        'bert_model': args.model_name_or_path,
        'do_lower_case': True,
        'gradient_accumulation_steps': args.GRAD_ACC
    }

    if args.model_type == 'ernie':
        from knowledge_bert import modeling
        from knowledge_bert import BertTokenizer
        from knowledge_bert.optimization import BertAdam

        tokenizer = BertTokenizer.from_pretrained(
            model_config['bert_model'],
            do_lower_case=model_config['do_lower_case'])
        model, _ = modeling.BertForQuestionAnsweringEmrQA.from_pretrained(
            model_config['bert_model'],
            num_entities=model_config['num_entities'])
    elif args.model_type == 'bert':
        from pytorch_pretrained_bert import BertTokenizer, BertForQuestionAnswering
        from pytorch_pretrained_bert.optimization import BertAdam
        tokenizer = BertTokenizer.from_pretrained(
            model_config['bert_model'],
            do_lower_case=model_config['do_lower_case'])
        model = BertForQuestionAnswering.from_pretrained(
            model_config['bert_model'])

    num_train_optimization_steps = len(
        data['train']
    ) // model_config['gradient_accumulation_steps'] * args.train_epochs

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    optimizer = BertAdam(optimizer_grouped_parameters,
                         lr=model_config['lr'],
                         warmup=args.warmup_proportion,
                         t_total=num_train_optimization_steps)

    if args.do_train:
        model_trained = train(args,
                              model=model,
                              optimizer=optimizer,
                              tokenizer=tokenizer,
                              model_config=model_config,
                              data=data,
                              entity2id=entity2id,
                              logical2ix=logical2ix)

    # The start and end accuracy are just proxies, actual accuracy would be calculated from the pickle dump using the script of SQuAD evaluate: https://rajpurkar.github.io/SQuAD-explorer/
    ##### Evaluate the model if do_eval flag is on
    if args.do_eval:
        if args.model_type == 'ernie':
            if args.multi_task:
                device = torch.device("cuda:" + str(args.gpu))
                dev_vals = eval_plot.evaluate_bert_emrqa_ernie_multitask(
                    model_trained, data['dev'], args.eval_batch_size,
                    tokenizer, entity2id, logical2ix, device)
            else:
                dev_vals = eval_plot.evaluate_bert_emrqa_ernie(
                    model_trained, data['dev'], args.eval_batch_size,
                    tokenizer, entity2id, logical2ix)
        elif args.model_type == 'bert':
            dev_vals = eval_plot.evaluate_bert_emrqa(model_trained,
                                                     data['dev'],
                                                     args.eval_batch_size,
                                                     tokenizer)
        dict_ = {
            'start_accuracy': dev_vals[0],
            'end_accuracy': dev_vals[1],
            'actual_and_predicted_values': dev_vals[2]
        }
        file_name = '../results/' + model_config[
            'model_name'] + '_dev_results.pkl'
        pickle.dump(dict_, open(file_name, 'wb'))

    ##### Test the model
    if args.do_test:
        if args.model_type == 'ernie':
            if args.multi_task:
                device = torch.device("cuda:" + str(args.gpu))
                test_vals = eval_plot.evaluate_bert_emrqa_ernie_multitask(
                    model_trained, data['test'], args.eval_batch_size,
                    tokenizer, entity2id, logical2ix, device)
            else:
                test_vals = eval_plot.evaluate_bert_emrqa_ernie(
                    model_trained, data['test'], args.eval_batch_size,
                    tokenizer, entity2id, logical2ix)
        elif args.model_type == 'bert':
            test_vals = eval_plot.evaluate_bert_emrqa(model_trained,
                                                      data['dev'],
                                                      args.eval_batch_size,
                                                      tokenizer)
        dict_ = {
            'start_accuracy': test_vals[0],
            'end_accuracy': test_vals[1],
            'actual_and_predicted_values': test_vals[2]
        }
        file_name = '../results/' + model_config[
            'model_name'] + '_test_results.pkl'
        pickle.dump(dict_, open(file_name, 'wb'))
예제 #12
0
        for i, example_index in enumerate(example_indices):
            start_logits = batch_start_logits[i].detach().cpu().tolist()
            end_logits = batch_end_logits[i].detach().cpu().tolist()
            eval_feature = eval_features[example_index.item()]
            unique_id = int(eval_feature.unique_id)
            all_results.append(RawResult(unique_id=unique_id,
                                         start_logits=start_logits,
                                         end_logits=end_logits))
    preds = write_predictions(eval_examples, eval_features, all_results,
                                      5, 100, 0.0)
    return preds



if __name__ == '__main__':
    model = BertForQuestionAnswering.from_pretrained('squader')
    model.eval()
    tokenizer = BertTokenizer.from_pretrained('squader')
    context = """
        Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is 
        a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a 
        copper statue of Christ with arms upraised with 
        the legend "Venite Ad Me Omnes". Next to the Main 
        Building is the Basilica of the Sacred Heart. 
        Immediately behind the basilica is the Grotto, a 
        Marian place of prayer and reflection. It is a 
        replica of the grotto at Lourdes, France where 
        the Virgin Mary reputedly appeared to Saint 
        Bernadette Soubirous in 1858. At the end of the 
        main drive (and in a direct line that connects 
        through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.
예제 #13
0
        device = torch.device("cpu")

    config = BertConfig(args.config_file)
    if "large" in args.config_file:
        print("large model")
        hidden_size = 1024
    else:
        print("base model")
        hidden_size = 768

    if args.use_adv:
        model = DomainQA(config,
                         hidden_size=hidden_size,
                         use_conv=args.use_conv)
    else:
        model = BertForQuestionAnswering(config)

    # This part also have to be checked whether we are using the gpu or nots
    if torch.cuda.is_available():
        state_dict = torch.load(args.model_path)
    else:
        state_dict = torch.load(args.model_path, map_location='cpu')

    model.load_state_dict(state_dict)
    # check whether there's avaible gpu device
    model = model.to(device)
    model.eval()
    tokenizer = BertTokenizer(args.vocab_file)
    app = flask.Flask(__name__)

    @app.route('/', methods=['POST'])