def __init__(self, job_config, use_pretrain, tokenizer, cache_dir, device,
                 write_log, summary_writer):
        self.job_config = job_config

        if not use_pretrain:
            model_config = self.job_config.get_model_config()
            bert_config = BertConfig(**model_config)
            bert_config.vocab_size = len(tokenizer.vocab)

            self.bert_encoder = BertModel(bert_config)
        # Use pretrained bert weights
        else:
            self.bert_encoder = BertModel.from_pretrained(
                self.job_config.get_model_file_type(), cache_dir=cache_dir)
            bert_config = self.bert_encoder.config

        self.network = MTLRouting(self.bert_encoder,
                                  write_log=write_log,
                                  summary_writer=summary_writer)

        #config_data=self.config['data']

        # Pretrain Dataset
        self.network.register_batch(BatchType.PRETRAIN_BATCH,
                                    "pretrain_dataset",
                                    loss_calculation=BertPretrainingLoss(
                                        self.bert_encoder, bert_config))

        self.device = device
Beispiel #2
0
    def __init__(
        self,
        bert_model,
        output_dim,
        add_transformer_layer=False,
        layer_pulled=-1,
        aggregation="first",
    ):
        super(BertWrapper, self).__init__()
        self.layer_pulled = layer_pulled
        self.aggregation = aggregation
        self.add_transformer_layer = add_transformer_layer
        # deduce bert output dim from the size of embeddings
        bert_output_dim = bert_model.embeddings.word_embeddings.weight.size(1)

        if add_transformer_layer:
            config_for_one_layer = BertConfig(
                0,
                hidden_size=bert_output_dim,
                num_attention_heads=int(bert_output_dim / 64),
                intermediate_size=3072,
                hidden_act='gelu',
            )
            self.additional_transformer_layer = BertLayer(config_for_one_layer)
        self.additional_linear_layer = torch.nn.Linear(bert_output_dim, output_dim)
        self.bert_model = bert_model
Beispiel #3
0
def load_model2(is_train, device, output_model_file, output_config_file,
                output_vocab_file, max_seq_length, chooser):
    output_config_file = output_config_file + str(chooser) + ".bin"
    output_model_file = output_model_file + str(chooser) + ".bin"
    output_vocab_file = output_vocab_file + str(chooser) + ".bin"
    try:
        config = BertConfig.from_json_file(output_config_file)
        model = BertBoosting(config, 768)
        state_dict = torch.load(output_model_file)
        model.load_state_dict(state_dict)
        tokenizer = BertTokenizer(output_vocab_file,
                                  do_lower_case=False,
                                  max_len=max_seq_length)
        model.cuda()
    except:
        print("could not load file, initializing randomly")
        model = BertBoosting.from_pretrained("bert-base-cased")
        tokenizer = BertTokenizer.from_pretrained('bert-base-cased',
                                                  do_lower_case=False,
                                                  max_len=max_seq_length)
    if is_train == 0:
        model.train()
    else:
        model.eval()
    return model, tokenizer
Beispiel #4
0
    def __init__(self,
                 copy_attn,
                 vocab_size,
                 pad_idx,
                 init_context=False,
                 token_type='A',
                 opt=None):
        super(TransformerDecoder, self).__init__()

        # Basic attributes.
        self.decoder_type = 'bert'
        self.pad_idx = pad_idx
        self.token_type = token_type
        self.init_context = init_context
        self.opt = opt
        # Decoder State
        self.state = {}

        self._copy = copy_attn

        self.config = BertConfig(vocab_size)
        bert = BertModel(self.config)

        self.embeddings = MyBertEmbeddings(bert.embeddings, token_type, opt)

        self.transformer_layers = nn.ModuleList([
            BERTDecoderLayer(bert_layer, init_context)
            for bert_layer in bert.encoder.layer
        ])
def eval(config, filename):
    logger = config.get_logger('test')
    # setup data_loader instances
    processor = config.initialize('processor', module_processor, logger,
                                  config)
    processor.get_eval(filename)
    test_data_loader = config.initialize('data_loader',
                                         module_data,
                                         processor.data_dir,
                                         mode="eval",
                                         debug=config.debug_mode)

    # build model architecture, then print to console
    if config.bert_config_path:
        bert_config = BertConfig(config.bert_config_path)
        model = config.initialize('arch',
                                  module_arch,
                                  config=bert_config,
                                  num_labels=processor.nums_label())
    else:
        model = config.initialize_bert_model('arch',
                                             module_arch,
                                             num_labels=processor.nums_label())
    logger.info(model)
    agent = Agent(model, config=config, test_data_loader=test_data_loader)
    return agent.test(detail=True), processor
Beispiel #6
0
    def __init__(self, opt, bert_config=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()
        self.bert_config = BertConfig.from_dict(opt)
        self.bert = BertModel(self.bert_config)
        if opt.get('dump_feature', False):
            self.opt = opt
            return
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        mem_size = self.bert_config.hidden_size
        self.decoder_opt = opt['answer_opt']
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']
        self.bert_pooler = None

        for task, lab in enumerate(labels):
            decoder_opt = self.decoder_opt[task]
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            if decoder_opt == 1:
                out_proj = SANClassifier(mem_size, mem_size, lab, opt, prefix='answer', dropout=dropout)
                self.scoring_list.append(out_proj)
            else:
                out_proj = nn.Linear(self.bert_config.hidden_size, lab)
                self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
        self.set_embed(opt)
Beispiel #7
0
def load_bert_adapter(task_type, bert_model_name, bert_load_mode,
                      bert_load_args, all_state, num_labels,
                      bert_config_json_path):
    if bert_config_json_path is None:
        bert_config_json_path = os.path.join(
            get_bert_config_path(bert_model_name), "bert_config.json")

    if bert_load_mode in ["model_only_adapter"]:
        adapter_state = all_state
    elif bert_load_mode in ["state_adapter"]:
        adapter_state = all_state["model"]
    else:
        raise KeyError(bert_load_mode)

    # Format: "bert_model_path:{path}"
    #  Very hackish
    bert_state = torch.load(bert_load_args.replace("bert_model_path:", ""))

    config = BertConfig.from_json_file(bert_config_json_path)
    if task_type == TaskType.CLASSIFICATION:
        model = BertForSequenceClassification(config, num_labels=num_labels)
    elif task_type == TaskType.REGRESSION:
        assert num_labels == 1
        model = BertForSequenceRegression(config)
    else:
        raise KeyError(task_type)

    load_from_adapter(
        model=model,
        bert_state=bert_state,
        adapter_state=adapter_state,
    )

    return model
Beispiel #8
0
    def test_sliding_window_with_batch(self):
        tokenizer = WordTokenizer(word_splitter=BertBasicWordSplitter())

        sentence = "the quickest quick brown fox jumped over the lazy dog"
        tokens = tokenizer.tokenize(sentence)

        vocab = Vocabulary()

        vocab_path = self.FIXTURES_ROOT / 'bert' / 'vocab.txt'
        token_indexer = PretrainedBertIndexer(str(vocab_path), truncate_long_sequences=False, max_pieces=8)

        config_path = self.FIXTURES_ROOT / 'bert' / 'config.json'
        config = BertConfig(str(config_path))
        bert_model = BertModel(config)
        token_embedder = BertEmbedder(bert_model, max_pieces=8)

        instance = Instance({"tokens": TextField(tokens, {"bert": token_indexer})})
        instance2 = Instance({"tokens": TextField(tokens + tokens + tokens, {"bert": token_indexer})})

        batch = Batch([instance, instance2])
        batch.index_instances(vocab)

        padding_lengths = batch.get_padding_lengths()
        tensor_dict = batch.as_tensor_dict(padding_lengths)
        tokens = tensor_dict["tokens"]
        bert_vectors = token_embedder(tokens["bert"], offsets=tokens["bert-offsets"])
        assert bert_vectors is not None
Beispiel #9
0
    def __init__(self, config):
        self.config = config
        self.align_layer_idx = -1

        cout_word = f'{self.description}: building    '
        sys.stdout.write(cout_word)
        sys.stdout.flush()

        bert_config = BertConfig(
            vocab_size_or_config_json_file=config.phn_size,
            hidden_size=768,
            num_hidden_layers=12,
            num_attention_heads=12,
            intermediate_size=3072,
        )

        self.bert_model = BertModel(
            bert_config,
            config.feat_dim,
            config.phn_size,
            (config.batch_size * config.repeat) // 2,
        )
        self.optimizer = BertAdam(
            params=self.bert_model.parameters(),
            lr=config.sup_lr,
            warmup=0.1,
            t_total=config.pretrain_step + config.finetune_step,
        )
        if torch.cuda.is_available():
            self.bert_model.cuda()

        sys.stdout.write('\b' * len(cout_word))
        cout_word = f'{self.description}: finish     '
        sys.stdout.write(cout_word + '\n')
        sys.stdout.flush()
Beispiel #10
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file,
                                     pytorch_dump_path):
    config_path = os.path.abspath(bert_config_file)
    tf_path = os.path.abspath(tf_checkpoint_path)
    print("Converting TensorFlow checkpoint from {} with config at {}".format(
        tf_path, config_path))
    # Load weights from TF model
    init_vars = tf.train.list_variables(tf_path)
    names = []
    arrays = []
    for name, shape in init_vars:
        print("Loading TF weight {} with shape {}".format(name, shape))
        array = tf.train.load_variable(tf_path, name)
        names.append(name)
        arrays.append(array)

    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)

    for name, array in zip(names, arrays):
        name = name.split('/')
        # adam_v and adam_m are variables used in AdamWeightDecayOptimizer to calculated m and v
        # which are not required for using pretrained model
        if any(n in ["adam_v", "adam_m"] for n in name):
            print("Skipping {}".format("/".join(name)))
            continue
        pointer = model
        for m_name in name:
            if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
                l = re.split(r'_(\d+)', m_name)
            else:
                l = [m_name]
            if l[0] == 'kernel' or l[0] == 'gamma':
                pointer = getattr(pointer, 'weight')
            elif l[0] == 'output_bias' or l[0] == 'beta':
                pointer = getattr(pointer, 'bias')
            elif l[0] == 'output_weights':
                pointer = getattr(pointer, 'weight')
            else:
                pointer = getattr(pointer, l[0])
            if len(l) >= 2:
                num = int(l[1])
                pointer = pointer[num]
        if m_name[-11:] == '_embeddings':
            pointer = getattr(pointer, 'weight')
        elif m_name == 'kernel':
            array = np.transpose(array)
        try:
            assert pointer.shape == array.shape
        except AssertionError as e:
            e.args += (pointer.shape, array.shape)
            raise
        print("Initialize PyTorch weight {}".format(name))
        pointer.data = torch.from_numpy(array)

    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
Beispiel #11
0
def get_config(config_path_or_type, logger):
    if config_path_or_type in PRETRAINED_MODEL_ARCHIVE_MAP:
        archive_file = PRETRAINED_MODEL_ARCHIVE_MAP[config_path_or_type]
    else:
        archive_file = config_path_or_type
    # redirect to the cache, if necessary
    try:
        resolved_archive_file = cached_path(archive_file)
    except EnvironmentError:
        logger.error(
            "Model name '{}' was not found in model name list ({}). "
            "We assumed '{}' was a path or url but couldn't find any file "
            "associated to this path or url.".format(
                config_path_or_type,
                ', '.join(PRETRAINED_MODEL_ARCHIVE_MAP.keys()), archive_file))
        return None
    if resolved_archive_file == archive_file:
        logger.info("loading archive file {}".format(archive_file))
    else:
        logger.info("loading archive file {} from cache at {}".format(
            archive_file, resolved_archive_file))
    if os.path.isdir(resolved_archive_file):
        serialization_dir = resolved_archive_file
    else:
        # Extract archive to temp dir
        tempdir = tempfile.mkdtemp()
        logger.info("extracting archive file {} to temp dir {}".format(
            resolved_archive_file, tempdir))
        with tarfile.open(resolved_archive_file, 'r:gz') as archive:
            archive.extractall(tempdir)
        serialization_dir = tempdir
    # Load config
    config_file = os.path.join(serialization_dir, CONFIG_NAME)
    config = BertConfig.from_json_file(config_file)
    return config
Beispiel #12
0
def create_model(args, device, config_file='', weights_file=''):
    ''' create squad model from args '''
    ModelClass = None
    if args.squad_model == 'bert_base':
        print('creating bert base model')
        ModelClass = SquadModel
    if args.squad_model == 'bert_linear':
        print('creating bert linear model')
        ModelClass = SquadLinearModel
    if args.squad_model == 'bert_deep':
        print('creating bert deep model')
        ModelClass = SquadDeepModel
    if args.squad_model == 'bert_qanet':
        print('creating bert qanet model')
        ModelClass = SquadModelQANet

    if config_file == '' and weights_file == '':
        print('creating an untrained model')
        return ModelClass.from_pretrained(args.bert_model,
                cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(args.local_rank)))
    else:
        print('loading a trained model')
        config = BertConfig(config_file)
        model = ModelClass(config)
        model.load_state_dict(torch.load(weights_file, map_location=device))
        return model
Beispiel #13
0
def create_model(args, dataset, train=True):
    print("[*] Create model.")

    global model
    if train:
        model = BertForSequenceClassification.from_pretrained(BERT,
                                                              num_labels=5)
    else:
        if BERT == 'bert-large-uncased':
            config = BertConfig.from_json_file("uncase_model")
        else:
            config = BertConfig.from_json_file("case_model")
        model = BertForSequenceClassification(config, num_labels=5)
    # for i in model.bert.named_parameters():
    #     i[1].requires_grad=False

    model = model.to(device)
    # print(model)

    param_optimizer = list(model.named_parameters())

    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    if train:
        num_train_optimization_steps = int(
            len(dataset["train"]) / args.batch_size /
            args.gradient_accumulation_steps) * args.epochs
        global optimizer
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.lr_rate,
                             warmup=0.1,
                             t_total=num_train_optimization_steps)

    # optimizer = optim.Adam(model.parameters(),
    #                        lr=args.lr_rate) # , betas=(0.9, 0.999), weight_decay=1e-3)
    return
Beispiel #14
0
def train(model, args, n_gpu, optimizer, num_train_optimization_steps,
          num_labels, train_dataloader, device):
    '''
        train model
    '''
    model.train()
    global global_step
    global nb_tr_steps
    global tr_loss
    for _ in trange(int(args.num_train_epochs), desc="Epoch"):
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0
        for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
            batch = tuple(t.to(device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids = batch
            #print("input_ids_shape", input_ids.shape)
            #print("label_ids_shape", label_ids.shape)
            loss = model(input_ids, segment_ids, input_mask, label_ids)
            if n_gpu > 1:
                loss = loss.mean()  # mean() to average on multi-gpu.
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                optimizer.backward(loss)
            else:
                loss.backward()

            tr_loss += loss.item()
            nb_tr_examples += input_ids.size(0)
            nb_tr_steps += 1
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    # modify learning rate with special warm up BERT uses
                    # if args.fp16 is False, BertAdam is used that handles this automatically
                    lr_this_step = args.learning_rate * warmup_linear(
                        global_step / num_train_optimization_steps,
                        args.warmup_proportion)
                    for param_group in optimizer.param_groups:
                        param_group['lr'] = lr_this_step
                optimizer.step()
                optimizer.zero_grad()
                global_step += 1

    ## save model
    model_to_save = model.module if hasattr(
        model, 'module') else model  # Only save the model it-self
    output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME)
    torch.save(model_to_save.state_dict(), output_model_file)
    output_config_file = os.path.join(args.output_dir, CONFIG_NAME)
    with open(output_config_file, 'w') as f:
        f.write(model_to_save.config.to_json_string())

    # Load a trained model and config that you have fine-tuned
    config = BertConfig(output_config_file)
    model = BertForDocMultiClassification(config, num_labels=num_labels)
    model.load_state_dict(torch.load(output_model_file))

    return model
    def test_squad_with_unwordpieceable_passage(self):

        tokenizer = SpacyTokenizer()

        token_indexer = PretrainedBertIndexer("bert-base-uncased")

        passage1 = (
            "There were four major HDTV systems tested by SMPTE in the late 1970s, "
            "and in 1979 an SMPTE study group released A Study of High Definition Television Systems:"
        )
        question1 = "Who released A Study of High Definition Television Systems?"

        passage2 = (
            "Broca, being what today would be called a neurosurgeon, "
            "had taken an interest in the pathology of speech. He wanted "
            "to localize the difference between man and the other animals, "
            "which appeared to reside in speech. He discovered the speech "
            "center of the human brain, today called Broca's area after him. "
            "His interest was mainly in Biological anthropology, but a German "
            "philosopher specializing in psychology, Theodor Waitz, took up the "
            "theme of general and social anthropology in his six-volume work, "
            "entitled Die Anthropologie der Naturvölker, 1859–1864. The title was "
            """soon translated as "The Anthropology of Primitive Peoples". """
            "The last two volumes were published posthumously.")
        question2 = "What did Broca discover in the human brain?"

        from allennlp.data.dataset_readers.reading_comprehension.util import (
            make_reading_comprehension_instance, )

        instance1 = make_reading_comprehension_instance(
            tokenizer.tokenize(question1),
            tokenizer.tokenize(passage1),
            {"bert": token_indexer},
            passage1,
        )

        instance2 = make_reading_comprehension_instance(
            tokenizer.tokenize(question2),
            tokenizer.tokenize(passage2),
            {"bert": token_indexer},
            passage2,
        )

        vocab = Vocabulary()

        batch = Batch([instance1, instance2])
        batch.index_instances(vocab)

        padding_lengths = batch.get_padding_lengths()
        tensor_dict = batch.as_tensor_dict(padding_lengths)
        qtokens = tensor_dict["question"]
        ptokens = tensor_dict["passage"]

        config = BertConfig(len(token_indexer.vocab))
        model = BertModel(config)
        embedder = BertEmbedder(model)

        _ = embedder(ptokens["bert"], offsets=ptokens["bert-offsets"])
        _ = embedder(qtokens["bert"], offsets=qtokens["bert-offsets"])
Beispiel #16
0
    def __init__(self, opt, bert_config=None):
        super(SANBertNetwork, self).__init__()
        self.dropout_list = nn.ModuleList()
        self.encoder_type = opt['encoder_type']
        if opt['encoder_type'] == EncoderModelType.ROBERTA:
            from fairseq.models.roberta import RobertaModel
            self.bert = RobertaModel.from_pretrained(opt['init_checkpoint'])
            hidden_size = self.bert.args.encoder_embed_dim
            self.pooler = LinearPooler(hidden_size)
        else:
            self.bert_config = BertConfig.from_dict(opt)
            self.bert = BertModel(self.bert_config)
            hidden_size = self.bert_config.hidden_size

        if opt.get('dump_feature', False):
            self.opt = opt
            return
        if opt['update_bert_opt'] > 0:
            for p in self.bert.parameters():
                p.requires_grad = False
        self.decoder_opt = opt['answer_opt']
        self.task_types = opt["task_types"]
        self.scoring_list = nn.ModuleList()
        labels = [int(ls) for ls in opt['label_size'].split(',')]
        task_dropout_p = opt['tasks_dropout_p']

        for task, lab in enumerate(labels):
            decoder_opt = self.decoder_opt[task]
            task_type = self.task_types[task]
            dropout = DropoutWrapper(task_dropout_p[task], opt['vb_dropout'])
            self.dropout_list.append(dropout)
            if task_type == TaskType.Span:
                assert decoder_opt != 1
                out_proj = nn.Linear(hidden_size, 2)
            elif task_type == TaskType.SeqenceLabeling:
                out_proj = nn.Linear(hidden_size, lab)
            elif task_type == TaskType.MaskLM:
                if opt['encoder_type'] == EncoderModelType.ROBERTA:
                    # TODO: xiaodl
                    out_proj = MaskLmHeader(
                        self.bert.embeddings.word_embeddings.weight)
                else:
                    out_proj = MaskLmHeader(
                        self.bert.embeddings.word_embeddings.weight)
            else:
                if decoder_opt == 1:
                    out_proj = SANClassifier(hidden_size,
                                             hidden_size,
                                             lab,
                                             opt,
                                             prefix='answer',
                                             dropout=dropout)
                else:
                    out_proj = nn.Linear(hidden_size, lab)
            self.scoring_list.append(out_proj)

        self.opt = opt
        self._my_init()
Beispiel #17
0
    def __init__(self, vocab_size, pad_idx):
        super(BERTEncoder, self).__init__()
        self.config = BertConfig(vocab_size)
        bert = BertModel(self.config)
        self.embeddings = \
            MyEncoderBertEmbeddings(bert.embeddings)
        self.encoder = bert.encoder

        self.pad_idx = pad_idx
Beispiel #18
0
 def __init__(self, num_choices, bert_config_file):
     self.num_choices = num_choices
     bert_config = BertConfig.from_json_file(bert_config_file)
     BertPreTrainedModel.__init__(self, bert_config)
     self.bert = BertModel(bert_config)
     self.dropout = nn.Dropout(bert_config.hidden_dropout_prob)
     self.classifier = nn.Linear(bert_config.hidden_size, 1)
     self.activation = nn.Sigmoid()
     self.apply(self.init_bert_weights)
Beispiel #19
0
 def __init__(self, vocab_size, original_hidden_size, num_layers, tau=1):
     super().__init__()
     self.bert_layer = BertLayer(BertConfig(
         vocab_size_or_config_json_file=vocab_size,
         hidden_size=original_hidden_size * num_layers,
     ))
     self.linear_layer = nn.Linear(original_hidden_size * num_layers, 1)
     self.log_sigmoid = nn.LogSigmoid()
     self.tau = tau
Beispiel #20
0
    def setUp(self):
        super().setUp()
        self.monkeypatch = MonkeyPatch()

        # monkeypatch the PretrainedBertModel to return the tiny test fixture model
        config_path = self.FIXTURES_ROOT / "bert" / "config.json"
        config = BertConfig(str(config_path))
        self.monkeypatch.setattr(BertModel, "from_pretrained",
                                 lambda _: BertModel(config))
Beispiel #21
0
def load_bert(model_path="bert/model/pytorch_model.bin",
              config_file="bert/config_parameters/config.json"):
    print("Loading BERT-model...")
    config = BertConfig(config_file)
    model = BertForQuestionAnswering(config)
    model.load_state_dict(
        torch.load(model_path, map_location=torch.device("cpu")))
    print("Model loaded.\n\n")
    return model
Beispiel #22
0
def main(args):

    logging = config.get_logging(args.log_name)
    logging.info(args)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    n_gpu = torch.cuda.device_count()
    tokenizer = BertTokenizer.build_tokenizer(args)
    # train_data_iter = MSmarco_iterator(args, tokenizer, batch_size=args.train_batch_size, world_size=n_gpu, accumulation_steps=args.gradient_accumulation_steps, name="msmarco_train.pk")
    dev_data_iter = MSmarco_iterator(args, tokenizer, batch_size=args.valid_batch_size, world_size=n_gpu, name="msmarco_dev.pk")

    logging.info("| dev batch data size {}".format(len(dev_data_iter)))


    # num_train_steps = (96032//2//2)+(data_size-96032)//2
    missing_keys = []
    unexpected_keys = []
    error_msgs = []

    pre_dir = args.pre_dir
    config_file = os.path.join(pre_dir, CONFIG_NAME)
    bert_config = BertConfig.from_json_file(config_file)
    model = MSmarco(bert_config)
    logging.info("| load model from {}".format(args.path))
    state_dict = torch.load(args.path, map_location=torch.device('cpu'))
    metadata = getattr(state_dict, '_metadata', None)
    # state_dict = state_dict.copy()
    # if metadata is not None:
    #     state_dict._metadata = metadata

    def load(module, prefix=''):
        local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
        module._load_from_state_dict(
            state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
        for name, child in module._modules.items():
            if child is not None:
                load(child, prefix + name + '.')

    load(model, prefix='module.')

    if len(missing_keys) > 0:
        # logger.info("Weights of {} not initialized from pretrained model: {}".format(
        #     model.__class__.__name__, missing_keys))
        print("| Weights of {} not initialized from pretrained model: {}".format(
            model.__class__.__name__, missing_keys))
    if len(unexpected_keys) > 0:
        # logger.info("Weights from pretrained model not used in {}: {}".format(
            # model.__class__.__name__, unexpected_keys))
        print("Weights from pretrained model not used in {}: {}".format(
            model.__class__.__name__, unexpected_keys))

    # model._load_from_state_dict(state_dict, prefix="module.")
    model.to(device)
    if n_gpu > 1:
        model = torch.nn.DataParallel(model)

        # save_checkpoint(args, model, epochs)
    validation(args, model, dev_data_iter, n_gpu, 0, 0, logging)
Beispiel #23
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path, bert_config_file, pytorch_dump_path):
    # Initialise PyTorch model
    config = BertConfig.from_json_file(bert_config_file)
    print("Building PyTorch model from configuration: {}".format(str(config)))
    model = BertForPreTraining(config)
    # Load weights from tf checkpoint
    load_tf_weights_in_bert(model, tf_checkpoint_path)
    # Save pytorch-model
    print("Save PyTorch model to {}".format(pytorch_dump_path))
    torch.save(model.state_dict(), pytorch_dump_path)
 def from_fine_tuned(cls, model_path, map_location='default_map_location', *inputs, **kwargs):
     config = BertConfig(os.path.join(model_path, CONFIG_NAME))
     model = cls(config, *inputs, **kwargs)
     saved_kwargs = MultiPredictionHead.load_kwargs(model_path)
     if map_location == 'default_map_location':
         map_location = 'cpu' if not torch.cuda.is_available() else None
     state_dict = torch.load(os.path.join(model_path, WEIGHTS_NAME), map_location=map_location)
     model.prediction_head.update_state_dict('prediction_head.', state_dict, saved_kwargs)
     model.load_state_dict(state_dict, strict=False)
     return model
    def setUp(self):
        super().setUp()

        vocab_path = self.FIXTURES_ROOT / 'bert' / 'vocab.txt'
        self.token_indexer = PretrainedBertIndexer(str(vocab_path))

        config_path = self.FIXTURES_ROOT / 'bert' / 'config.json'
        config = BertConfig(str(config_path))
        self.bert_model = BertModel(config)
        self.token_embedder = BertEmbedder(self.bert_model)
 def load_model(self, model_dir, model_config: str = "model_config.json"):
     model_config = os.path.join(model_dir, model_config)
     model_config = json.load(open(model_config))
     output_config_file = os.path.join(model_dir, CONFIG_NAME)
     output_model_file = os.path.join(model_dir, WEIGHTS_NAME)
     config = BertConfig(output_config_file)
     model = BertForSequenceClassification(config, num_labels=model_config["num_labels"])
     model.load_state_dict(torch.load(output_model_file, map_location='cpu'))
     tokenizer = BertTokenizer.from_pretrained(model_config["bert_model"], do_lower_case=model_config["do_lower"])
     return model, tokenizer, model_config
Beispiel #27
0
    def __init__(self, bert_model_path, decoder_config, device):
        super().__init__()

        self.bert_encoder = BertModel.from_pretrained(bert_model_path)
        bert_config_file = os.path.join(bert_model_path, CONFIG_NAME)
        bert_config = BertConfig.from_json_file(bert_config_file)
        self.device = device
        self.bert_emb = BertEmbeddings(bert_config)
        self.decoder = BertDecoder(decoder_config, self.bert_emb, device) 
        self.teacher_forcing = 0.5
Beispiel #28
0
 def load_model(self, model_dir: str, model_config: str = "model_config.json"):
     model_config = os.path.join(model_dir,model_config)
     model_config = json.load(open(model_config))
     output_config_file = os.path.join(model_dir, CONFIG_NAME)
     output_model_file = os.path.join(model_dir, WEIGHTS_NAME)
     config = BertConfig(output_config_file)
     model = BertForTokenClassification(config, num_labels=model_config["num_labels"])
     model.load_state_dict(torch.load(output_model_file))
     tokenizer = FullTokenizer(model_file='cased_bert_base_pytorch/mn_cased.model', vocab_file='cased_bert_base_pytorch/mn_cased.vocab', do_lower_case=False)
     return model, tokenizer, model_config
Beispiel #29
0
def load_model_multilabel(
    model_path: Path, model_name: str, num_labels: int = 2
) -> BertForMultiLabelSequenceClassification:
    model_path = Path(model_path)
    config = BertConfig(str(model_path / f"{model_name}-config.json"))
    model = BertForMultiLabelSequenceClassification(config, num_labels=num_labels)
    model.load_state_dict(
        torch.load(str(model_path / f"{model_name}-model.pt"), map_location=device)
    )
    return model
    def __init__(self, config: Config, output_encoded_layers: bool, *args,
                 **kwargs) -> None:
        super().__init__(config, output_encoded_layers=output_encoded_layers)
        # Load config
        config_file = os.path.join(config.bert_cpt_dir, "bert_config.json")
        bert_config = BertConfig.from_json_file(config_file)
        print("Bert model config {}".format(bert_config))
        # Instantiate model.
        model = BertModel(bert_config)
        weights_path = os.path.join(config.bert_cpt_dir, "pytorch_model.bin")
        # load pre-trained weights if weights_path exists
        if config.load_weights and PathManager.isfile(weights_path):
            state_dict = torch.load(weights_path)

            missing_keys: List[str] = []
            unexpected_keys: List[str] = []
            error_msgs: List[str] = []
            # copy state_dict so _load_from_state_dict can modify it
            metadata = getattr(state_dict, "_metadata", None)
            state_dict = state_dict.copy()
            if metadata is not None:
                state_dict._metadata = metadata

            def load(module, prefix=""):
                local_metadata = ({} if metadata is None else metadata.get(
                    prefix[:-1], {}))
                module._load_from_state_dict(
                    state_dict,
                    prefix,
                    local_metadata,
                    True,
                    missing_keys,
                    unexpected_keys,
                    error_msgs,
                )
                for name, child in module._modules.items():
                    if child is not None:
                        load(child, prefix + name + ".")

            load(model, prefix="" if hasattr(model, "bert") else "bert.")
            if len(missing_keys) > 0:
                print(
                    "Weights of {} not initialized from pretrained model: {}".
                    format(model.__class__.__name__, missing_keys))
            if len(unexpected_keys) > 0:
                print(
                    "Weights from pretrained model not used in {}: {}".format(
                        model.__class__.__name__, unexpected_keys))

        self.bert = model
        self.projection = (torch.nn.Linear(model.config.hidden_size,
                                           config.projection_dim)
                           if config.projection_dim > 0 else None)
        log_class_usage(__class__)