Exemple #1
0
 def __init__(self):
     """ Init the labeler module
     @param p (float): p to use for dropout in the linear heads, 0.1 by default is consistant with
                       transformers.BertForSequenceClassification
     @param clinical (boolean): True if Bio_Clinical BERT desired, False otherwise. Ignored if
                                pretrain_path is not None
     @param freeze_embeddings (boolean): true to freeze bert embeddings during training
     @param pretrain_path (string): path to load checkpoint from
     """
     super(bert_labeler, self).__init__()
     config = AutoConfig.from_pretrained('bert-base-uncased')
     self.bert = AutoModel.from_config(config)
     self.hidden_size = self.bert.pooler.dense.in_features
Exemple #2
0
def load_bert_model(bert_hidden_size):
    bert_config = AutoConfig.from_pretrained("model/",
                                             hidden_size=bert_hidden_size)
    bert_tokenizer = AutoTokenizer.from_pretrained('model/')
    bert_model = AutoModel.from_config(
        bert_config)  # fom_pretrained değil from_config olacakmış :)
    print("loaded bert model config: ", bert_model.config)  # ok sonunda :)

    for param in bert_model.parameters(
    ):  # freeze bert model params (not bert training)
        param.requires_grad = False

    return bert_tokenizer, bert_model
    def __init__(self,
                 model_name_or_path,
                 mapping_path,
                 config_name=None,
                 tokenizer_name=None,
                 cache_dir=None,
                 from_pretrained=False,
                 freeze_base_model=False,
                 fusion="max_pooling",
                 lambdas=[1, 1, 1]):
        super(BertForDBpediaDocumentClassification, self).__init__()
        # Initialize config, tokenizer and model (feature extractor)
        self.base_model_config = AutoConfig.from_pretrained(
            config_name if config_name is not None else model_name_or_path,
            cache_dir=cache_dir,
        )
        self.tokenizer = AutoTokenizer.from_pretrained(
            tokenizer_name if tokenizer_name else model_name_or_path,
            cache_dir=cache_dir,
            use_fast=True,
        )
        if from_pretrained:
            self.base_model = AutoModel.from_config(
                config=self.base_model_config)
        else:
            self.base_model = AutoModel.from_pretrained(
                model_name_or_path,
                from_tf=bool(".ckpt" in model_name_or_path),
                config=self.base_model_config,
                cache_dir=cache_dir,
            )

        # Fusion
        if fusion not in ["max_pooling", "average_pooling", "sum"]:
            raise ValueError(
                f"Invalid fusion value. Expected one of ['max_pooling', 'average_pooling', 'sum'], got "
                f"'{fusion}' instead.")
        self.fusion = fusion

        assert len(lambdas) == 3
        self.lambdas = lambdas

        # Freeze
        if freeze_base_model:
            for p in self.base_model.parameters():
                p.requires_grad = False

        # Intialize layers
        with open(mapping_path, "r") as fin:
            self.mapping = json.load(fin)
        self._initialize_layers()
    def test_from_pretrained_with_tuple_values(self):
        # For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel
        model = AutoModel.from_pretrained("sgugger/funnel-random-tiny")
        self.assertIsInstance(model, FunnelModel)

        config = copy.deepcopy(model.config)
        config.architectures = ["FunnelBaseModel"]
        model = AutoModel.from_config(config)
        self.assertIsInstance(model, FunnelBaseModel)

        with tempfile.TemporaryDirectory() as tmp_dir:
            model.save_pretrained(tmp_dir)
            model = AutoModel.from_pretrained(tmp_dir)
            self.assertIsInstance(model, FunnelBaseModel)
    def test_add_adapter_fusion_different_config(self):
        model = AutoModel.from_config(self.config())
        model.eval()

        # fusion between a and b should be possible whereas fusion between a and c should fail
        model.add_adapter("a", config=PfeifferConfig(reduction_factor=16))
        model.add_adapter("b", config=PfeifferConfig(reduction_factor=2))
        model.add_adapter("c", config="houlsby")

        # correct fusion
        model.add_fusion(["a", "b"])
        self.assertIn("a,b", model.config.adapter_fusion_models)
        # failing fusion
        self.assertRaises(ValueError, lambda: model.add_fusion(["a", "c"]))
Exemple #6
0
    def convert_to_transformers(self):
        from transformers import DPRContextEncoder, DPRQuestionEncoder, AutoModel
        if len(self.prediction_heads) != 1:
            raise ValueError(
                f"Currently conversion only works for models with a SINGLE prediction head. "
                f"Your model has {len(self.prediction_heads)}")

        if self.prediction_heads[0].model_type == "text_similarity":
            # init model
            if "dpr" in self.language_model1.model.config.model_type:
                transformers_model1 = DPRQuestionEncoder(
                    config=self.language_model1.model.config)
            else:
                transformers_model1 = AutoModel.from_config(
                    config=self.language_model1.model.config)
            if "dpr" in self.language_model2.model.config.model_type:
                transformers_model2 = DPRContextEncoder(
                    config=self.language_model2.model.config)
            else:
                transformers_model2 = AutoModel.from_config(
                    config=self.language_model2.model.config)

            # transfer weights for language model + prediction head
            setattr(transformers_model1, transformers_model1.base_model_prefix,
                    self.language_model1.model)
            setattr(transformers_model2, transformers_model2.base_model_prefix,
                    self.language_model2.model)
            logger.warning("No prediction head weights are required for DPR")

        else:
            raise NotImplementedError(
                f"FARM -> Transformers conversion is not supported yet for"
                f" prediction heads of type {self.prediction_heads[0].model_type}"
            )
        pass

        return transformers_model1, transformers_model2
    def load(model_path: str):
        """ Load the model from a file.
        @param model_path (str): path to model
        @return model (nn.Module): model with saved parameters
        """
        params = torch.load(model_path,
                            map_location=lambda storage, loc: storage)
        args = params['args']
        bert_base_config = args['bert_config']
        bert_base_model = AutoModel.from_config(bert_base_config)
        model = BasicBertForClassification(bert_base_model, args['n_class'],
                                           args['dropout_rate'])
        model.load_state_dict(params['state_dict'])

        return model
Exemple #8
0
 def __init__(self, hparams, config=None):
     super().__init__()
     self.save_hyperparameters(hparams)
     if config is None:
         self.transformer = AutoModel.from_pretrained(
             self.hparams.transformer)
     else:
         self.transformer = AutoModel.from_config(config)
     self.dropout = nn.Dropout(self.hparams.dropout)
     hidden_size = self.transformer.config.hidden_size
     self.classifier = BiaffineCRFClassifier(
         hidden_size,
         label_num=self.hparams.num_labels,
         dropout=self.hparams.dropout,
         hidden_size=self.hparams.hidden_size)
    def test_add_adapter_multiple_reduction_factors(self):
        model = AutoModel.from_config(self.config())
        model.eval()
        reduction_factor = {"1": 1, "default": 2}
        for adapter_config in [
                PfeifferConfig(reduction_factor=reduction_factor),
                HoulsbyConfig(reduction_factor=reduction_factor),
        ]:
            with self.subTest(model_class=model.__class__.__name__,
                              config=adapter_config.__class__.__name__):
                name = adapter_config.__class__.__name__
                model.add_adapter(name, config=adapter_config)
                model.set_active_adapters([name])

                # adapter is correctly added to config
                self.assertTrue(name in model.config.adapters)
                self.assertEqual(adapter_config,
                                 model.config.adapters.get(name))

                # TODO: Add this method to model classes.
                def get_adapter_layer(idx):
                    if isinstance(model, RobertaModel):
                        adapter = model.encoder.layer[idx].output.adapters
                    elif isinstance(model, DistilBertModel):
                        adapter = model.transformer.layer[
                            idx].output_adapters.adapters
                    elif isinstance(model, BartModel) or isinstance(
                            model, MBartModel):
                        adapter = model.encoder.layers[
                            idx].output_adapters.adapters
                    elif isinstance(model, GPT2Model):
                        adapter = model.h[idx].output_adapters.adapters
                    else:
                        adapter = model.encoder.layer[idx].output.adapters
                    return (adapter.PfeifferConfig if isinstance(
                        adapter_config, PfeifferConfig) else
                            adapter.HoulsbyConfig)

                self.assertEqual(
                    get_adapter_layer(0).adapter_down[0].in_features /
                    get_adapter_layer(0).adapter_down[0].out_features,
                    reduction_factor["default"],
                )
                self.assertEqual(
                    get_adapter_layer(1).adapter_down[0].in_features /
                    get_adapter_layer(1).adapter_down[0].out_features,
                    reduction_factor["1"],
                )
Exemple #10
0
    def load(self) -> None:
        if self.pretrained_bert:
            log.info(f"From pretrained {self.pretrained_bert}.")
            self.pretrained_bert = str(expand_path(self.pretrained_bert))
            self.config = AutoConfig.from_pretrained(self.pretrained_bert,
                                                     output_hidden_states=True)
            self.encoder = AutoModel.from_pretrained(self.pretrained_bert,
                                                     config=self.config)

        elif self.bert_config_file and Path(self.bert_config_file).is_file():
            self.config = AutoConfig.from_json_file(
                str(expand_path(self.bert_config_file)))
            self.encoder = AutoModel.from_config(config=self.bert_config)
        else:
            raise ConfigError("No pre-trained BERT model is given.")
        self.encoder.to(self.device)
    def from_bytes(self, bytes_data):
        msg = srsly.msgpack_loads(bytes_data)
        config_dict = msg["config"]
        tok_dict = msg["tokenizer"]
        if config_dict:
            with make_tempdir() as temp_dir:
                config_file = temp_dir / "config.json"
                srsly.write_json(config_file, config_dict)
                config = AutoConfig.from_pretrained(config_file)
                for x, x_bytes in tok_dict.items():
                    Path(temp_dir / x).write_bytes(x_bytes)
                tokenizer = AutoTokenizer.from_pretrained(str(temp_dir.absolute()))
                vocab_file_contents = None
                if hasattr(tokenizer, "vocab_file"):
                    vocab_file_name = tokenizer.vocab_files_names["vocab_file"]
                    vocab_file_path = str((temp_dir / vocab_file_name).absolute())
                    with open(vocab_file_path, "rb") as fileh:
                        vocab_file_contents = fileh.read()

            transformer = AutoModel.from_config(config)
            self._hfmodel = HFObjects(
                tokenizer,
                transformer,
                vocab_file_contents,
                SimpleFrozenDict(),
                SimpleFrozenDict(),
            )
            self._model = transformer
            filelike = BytesIO(msg["state"])
            filelike.seek(0)
            ops = get_current_ops()
            if ops.device_type == "cpu":
                map_location = "cpu"
            else:  # pragma: no cover
                device_id = torch.cuda.current_device()
                map_location = f"cuda:{device_id}"
            self._model.load_state_dict(torch.load(filelike, map_location=map_location))
            self._model.to(map_location)
        else:
            self._hfmodel = HFObjects(
                None,
                None,
                None,
                msg["_init_tokenizer_config"],
                msg["_init_transformer_config"],
            )
        return self
Exemple #12
0
 def __init__(self, bert, opt):
     super().__init__()
     self.opt = opt
     if opt.debug:
         conf = AutoConfig.from_pretrained(opt.pretrained_bert_name)
         self.bert = AutoModel.from_config(conf)
     else:
         self.bert = AutoModel.from_pretrained(
             opt.pretrained_bert_name)
     if not opt.no_gnn:
         self.gcn = GNN_RELU_DIFF(opt.bert_dim, opt.bert_dim,
                        step=opt.gnn_step, drop=opt.dropout)
         if not opt.no_short_cut:
             self.layernorm = LayerNorm(opt.bert_dim)
     if not opt.no_sa:
         self.sa = SelfAttention(self.bert.config, opt)
     self.drop = nn.Dropout(opt.dropout)
     self.fc = nn.Linear(opt.bert_dim, opt.polarities_dim)
 def __init__(self, hparams, loss_func=dep_loss, config=None):
     super().__init__()
     self.save_hyperparameters(hparams)
     if config is None:
         self.transformer = AutoModel.from_pretrained(
             self.hparams.transformer)
     else:
         self.transformer = AutoModel.from_config(config)
     self.dropout = nn.Dropout(self.hparams.dropout)
     hidden_size = self.transformer.config.hidden_size
     self.classifier = BiaffineClassifier(
         hidden_size,
         label_num=self.hparams.num_labels,
         dropout=self.hparams.dropout,
         arc_hidden_size=self.hparams.arc_hidden_size,
         rel_hidden_size=self.hparams.rel_hidden_size,
         loss_interpolation=self.hparams.loss_interpolation,
         loss_func=loss_func)
Exemple #14
0
    def __init__(self, config: dict, do_not_download_weights=False):
        super().__init__()
        if do_not_download_weights:
            self.lrm = AutoModel.from_config(
                AutoConfig.from_pretrained(
                    config["model_type"], cache_dir=config["model_cache_dir"]))
        else:
            self.lrm = AutoModel.from_pretrained(
                config["model_type"], cache_dir=config["model_cache_dir"])
        output_shape = self.lrm.config.hidden_size
        projection_bias = config.get("use_projection_bias", True)
        self.linear = nn.Linear(
            output_shape, config["emb_dim"], bias=projection_bias
        ) if config["use_projection"] else nn.Identity()
        self.dropout = nn.Dropout(config["dropout_rate"]) if config.get(
            "dropout_rate", 0.) > 1e-6 else nn.Identity()
        self.config = config

        self.init_weights(type(self.lrm))
Exemple #15
0
    def test_from_pretrained_avoids_weights_download_if_override_weights(self):
        # only download config because downloading pretrained weights in addition takes too long
        transformer = AutoModel.from_config(
            AutoConfig.from_pretrained("epwalsh/bert-xsmall-dummy",
                                       cache_dir=self.TEST_DIR))
        # clear cache directory
        for f in os.listdir(str(self.TEST_DIR)):
            os.remove(str(self.TEST_DIR) + "/" + f)
        assert len(os.listdir(str(self.TEST_DIR))) == 0

        save_weights_path = str(self.TEST_DIR) + "/bert_weights.pth"
        torch.save(transformer.state_dict(), save_weights_path)

        override_transformer = cached_transformers.get(
            "epwalsh/bert-xsmall-dummy",
            False,
            override_weights_file=save_weights_path,
            cache_dir=self.TEST_DIR,
        )
        # check that only three files were downloaded (filename.json, filename, filename.lock), for config.json
        # if more than three files were downloaded, then model weights were also (incorrectly) downloaded
        # NOTE: downloaded files are not explicitly detailed in Huggingface's public API,
        # so this assertion could fail in the future
        json_fnames = [
            fname for fname in os.listdir(str(self.TEST_DIR))
            if fname.endswith(".json")
        ]
        assert len(json_fnames) == 1
        json_data = json.load(open(str(self.TEST_DIR) + "/" + json_fnames[0]))
        assert (
            json_data["url"] ==
            "https://huggingface.co/epwalsh/bert-xsmall-dummy/resolve/main/config.json"
        )
        resource_id = os.path.splitext(json_fnames[0])[0]
        assert set(os.listdir(str(self.TEST_DIR))) == set([
            json_fnames[0], resource_id, resource_id + ".lock",
            "bert_weights.pth"
        ])

        # check that override weights were loaded correctly
        for p1, p2 in zip(transformer.parameters(),
                          override_transformer.parameters()):
            assert p1.data.ne(p2.data).sum() == 0
Exemple #16
0
 def __init__(self, hparams, config=None):
     super().__init__()
     self.save_hyperparameters(hparams)
     if config is None:
         self.transformer = AutoModel.from_pretrained(self.hparams.transformer)
     else:
         self.transformer = AutoModel.from_config(config)
     self.dropout = nn.Dropout(self.hparams.dropout)
     hidden_size = self.transformer.config.hidden_size
     max_length = self.transformer.config.max_position_embeddings
     self.classifier = RelativeTransformerLinearClassifier(
         input_size=hidden_size,
         hidden_size=self.hparams.hidden_size,
         num_layers=self.hparams.num_layers,
         num_heads=self.hparams.num_heads,
         dropout=self.hparams.dropout,
         max_length=max_length,
         num_labels=self.hparams.num_labels
     )
Exemple #17
0
    def __init__(self, pretrained_model: str,
                 requires_grad: bool = False,
                 dropout: float = 0.1,
                 layer_dropout: float = 0.1,
                 combine_layers: str = "mix") -> None:
        config = AutoConfig.from_pretrained(pretrained_model)
        config.output_hidden_states = True
        model = AutoModel.from_config(config)

        for param in model.parameters():
            param.requires_grad = requires_grad

        super().__init__(bert_model=model,
                         layer_dropout=layer_dropout,
                         combine_layers=combine_layers)

        self.model = model
        self.dropout = dropout
        self.set_dropout(dropout)
    def __init__(
        self,
        arch: str,
        temperature: float,
        criterion: str,
        optcfg: DictConfig,
        num_negatives: int,
        num_positives: Optional[int] = None,
        schcfg: Optional[DictConfig] = None,
        **kwargs,
    ):
        super().__init__()

        # this line ensures params passed to LightningModule will be saved to ckpt
        # it also allows to access params with 'self.hparams' attribute
        self.num_negatives = num_negatives
        self.num_positives = num_positives
        self.schcfg = schcfg
        self.optcfg = optcfg
        self.save_hyperparameters()

        config = AutoConfig.from_pretrained(arch)
        self.transformer = AutoModel.from_config(config)
        # TODO
        # custom pooler? like SimCSE
        # config, add_pooling_layer=True
        # )
        pool_size = self.transformer.config.hidden_size
        self.projection = nn.Linear(pool_size, pool_size)

        # loss function
        self.cos_sim = torch.nn.CosineSimilarity(dim=-1)
        self.temperature = temperature
        if criterion == "InfoNCE":
            self.criterion = nn.CrossEntropyLoss()
        elif criterion == "rankloss":
            self.criterion = nn.MarginRankingLoss(0.02)
        else:
            raise NotImplementedError
    def __init__(self,
                 config: Dict[str, Any],
                 initPretrainedWeights: bool = True):
        """
        Initialization of new extractive_reader with config.

        :param config: Configuration used for the initialization
            transformer_type: used type of model
            cache: used cache dir
        :type config: Dict[str, Any]
        :param initPretrainedWeights: Uses pretrained weights for transformer part. If False uses random initialization.
        :type initPretrainedWeights: bool
        """
        super().__init__()

        if initPretrainedWeights:
            self.transformer = AutoModel.from_pretrained(
                config["transformer_type"], cache_dir=config["cache"])
        else:
            self.transformer = AutoModel.from_config(
                AutoConfig.from_pretrained(config["transformer_type"],
                                           cache_dir=config["cache"]))

        self.startEndProjection = torch.nn.Linear(
            self.transformer.config.hidden_size, 2, bias=False)

        self.selectedProjection = torch.nn.Linear(
            self.transformer.config.hidden_size, 1, bias=False)

        # For the joint we use the linear transformation for the start, because otherwise the dot product will be
        # always maximal for the dot product with itself (one token spans).

        self.jointStartProjection = torch.nn.Linear(
            self.transformer.config.hidden_size,
            self.transformer.config.hidden_size)

        self.config = config

        self.init_weights()
Exemple #20
0
def huggingface_from_pretrained(source: Union[Path, str], tok_config: Dict,
                                trf_config: Dict):
    """Create a Huggingface transformer model from pretrained weights. Will
    download the model if it is not already downloaded.

    source (Union[str, Path]): The name of the model or a path to it, such as
        'bert-base-cased'.
    tok_config (dict): Settings to pass to the tokenizer.
    trf_config (dict): Settings to pass to the transformer.
    """
    if hasattr(source, "absolute"):
        str_path = str(source.absolute())
    else:
        str_path = source
    tokenizer = AutoTokenizer.from_pretrained(str_path, **tok_config)
    trf_config["return_dict"] = True
    config = AutoConfig.from_pretrained(str_path, **trf_config)
    transformer = AutoModel.from_config(config)
    ops = get_current_ops()
    if isinstance(ops, CupyOps):
        transformer.cuda()
    return tokenizer, transformer
    def test_load_full_model(self):
        model1 = AutoModel.from_config(self.config())
        model1.eval()

        name = "dummy"
        model1.add_adapter(name)
        model1.set_active_adapters([name])
        with tempfile.TemporaryDirectory() as temp_dir:
            model1.save_pretrained(temp_dir)

            model2 = AutoModel.from_pretrained(temp_dir)
            model2.set_active_adapters([name])

        # check if adapter was correctly loaded
        self.assertTrue(name in model2.config.adapters)

        # check equal output
        input_ids = self.get_input_samples((1, 128), config=model1.config)
        output1 = model1(input_ids)
        output2 = model2(input_ids)
        self.assertEqual(len(output1), len(output2))
        self.assertTrue(torch.equal(output1[0], output2[0]))
Exemple #22
0
 def load_model(self, checkpoint):
     config = self.config
     opt = config['opt']
     labels = load_label(opt.label_path)
     label_size = len(labels)
     config['labels'] = labels
     self.labels = labels
     if config['emb_class'] == 'glove':
         if config['enc_class'] == 'gnb':
             model = TextGloveGNB(config, opt.embedding_path, label_size)
         if config['enc_class'] == 'cnn':
             model = TextGloveCNN(config,
                                  opt.embedding_path,
                                  label_size,
                                  emb_non_trainable=True)
         if config['enc_class'] == 'densenet-cnn':
             model = TextGloveDensenetCNN(config,
                                          opt.embedding_path,
                                          label_size,
                                          emb_non_trainable=True)
         if config['enc_class'] == 'densenet-dsa':
             model = TextGloveDensenetDSA(config,
                                          opt.embedding_path,
                                          label_size,
                                          emb_non_trainable=True)
     else:
         from transformers import AutoTokenizer, AutoConfig, AutoModel
         bert_config = AutoConfig.from_pretrained(opt.bert_output_dir)
         bert_tokenizer = AutoTokenizer.from_pretrained(opt.bert_output_dir)
         bert_model = AutoModel.from_config(bert_config)
         ModelClass = TextBertCNN
         if config['enc_class'] == 'cls': ModelClass = TextBertCLS
         model = ModelClass(config, bert_config, bert_model, bert_tokenizer,
                            label_size)
     model.load_state_dict(checkpoint)
     model = model.to(opt.device)
     logger.info("[Model loaded]")
     return model
Exemple #23
0
 def __init__(self, hparams, config=None):
     super().__init__()
     self.save_hyperparameters(hparams)
     if config is None:
         self.transformer = AutoModel.from_pretrained(
             self.hparams.transformer)
     else:
         self.transformer = AutoModel.from_config(config)
     self.dropout = nn.Dropout(self.hparams.dropout)
     hidden_size = self.transformer.config.hidden_size
     self.classifier = ViClassifier(
         input_size=hidden_size,
         label_num=self.hparams.num_labels,
         dropout=self.hparams.dropout,
         lstm_num_layers=self.hparams.lstm_num_layers,
         lstm_hidden_size=self.hparams.lstm_hidden_size,
         bin_hidden_size=self.hparams.bin_hidden_size,
         arc_hidden_size=self.hparams.arc_hidden_size,
         rel_hidden_size=self.hparams.rel_hidden_size,
         loss_interpolation=self.hparams.loss_interpolation,
         inference=self.hparams.inference,
         max_iter=self.hparams.max_iter,
     )
    def __init__(self, hparams):
        super(LMFineTuner, self).__init__()

        self.hparams = hparams

        print('----------------------')
        pprint(self.hparams)
        print('----------------------')

        self.target = "age" if self.hparams.regression == True else "gender"

        #self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.config = AutoConfig.from_pretrained("roberta-base")
        self.tokenizer = AutoTokenizer.from_pretrained('roberta-base')
        self.contextualModel = AutoModel.from_config(self.config)
        #self.contextualModel = BertModel.from_pretrained('bert-base-uncased')
        self.ftLayer = nn.Linear(
            768, 1) if self.hparams.regression == True else nn.Linear(
                768, self.hparams.num_classes)
        self.activation = torch.nn.ReLU()
        self.lossFunc = nn.MSELoss(
            reduction='mean'
        ) if self.hparams.regression == True else nn.CrossEntropyLoss()
    def test_add_adapter_with_invertible(self):
        model = AutoModel.from_config(self.config())
        model.eval()

        for adapter_config in [PfeifferInvConfig(), HoulsbyInvConfig()]:
            with self.subTest(model_class=model.__class__.__name__,
                              config=adapter_config.__class__.__name__):
                name = adapter_config.__class__.__name__
                model.add_adapter(name, config=adapter_config)
                model.set_active_adapters([name])

                # adapter is correctly added to config
                self.assertTrue(name in model.config.adapters)
                self.assertEqual(adapter_config,
                                 model.config.adapters.get(name))

                # invertible adapter is correctly added and returned
                self.assertTrue(name in model.invertible_adapters)
                self.assertEqual(model.invertible_adapters[name],
                                 model.get_invertible_adapter())

                # all invertible adapter weights should be activated for training
                for param in model.invertible_adapters[name].parameters():
                    self.assertTrue(param.requires_grad)

                # check forward pass
                input_ids = self.get_input_samples((1, 128),
                                                   config=model.config)
                input_data = {"input_ids": input_ids}
                adapter_output = model(**input_data)
                # make sure the output is different without invertible adapter
                del model.invertible_adapters[name]
                adapter_output_no_inv = model(**input_data)
                self.assertEqual(len(adapter_output),
                                 len(adapter_output_no_inv))
                self.assertFalse(
                    torch.equal(adapter_output[0], adapter_output_no_inv[0]))
Exemple #26
0
 def init_bert(self):
     attr = self.attr
     pretrained_name_or_path = attr.pretrained_name_or_path
     bert_config = AutoConfig.from_pretrained(pretrained_name_or_path)
     if attr.input_representation == "subwords":
         # tag informed modeling
         if attr.n_token_type_ids and attr.n_token_type_ids > bert_config.type_vocab_size:
             # load from config and then map embeddings manually from diretory `pretrained_name_or_path`
             if not os.path.exists(pretrained_name_or_path):
                 raise Exception(
                     "when using tag informed modeling by inputting `n_token_type_ids`, "
                     "you mst specify a directory of downloaded model weights and not name"
                 )
             print(
                 f"upping type_vocab_size of BERT to {attr.n_token_type_ids} from {bert_config.type_vocab_size}"
             )
             bert_config.type_vocab_size = attr.n_token_type_ids
             bert_model = AutoModel.from_config(config=bert_config)
             bert_model = load_bert_pretrained_weights(
                 bert_model, pretrained_name_or_path, attr.device)
         else:
             bert_model = AutoModel.from_pretrained(pretrained_name_or_path)
     elif attr.input_representation == "charcnn":
         # bert_model = CharacterBertModel.from_pretrained(pretrained_name_or_path, config=config)
         raise NotImplementedError
     else:
         raise ValueError
     if not attr.finetune_bert:
         for param in bert_model.parameters():
             param.requires_grad = False
     bert_model.to(attr.device)
     self.outdim = bert_config.hidden_size
     self.config = bert_config
     self.model = bert_model
     self.requires_bert_optimizer = True
     return
Exemple #27
0
def load_model(config, checkpoint):
    opt = config['opt']
    labels = load_label(opt.label_path)
    label_size = len(labels)
    config['labels'] = labels
    if config['emb_class'] == 'glove':
        if config['enc_class'] == 'gnb':
            model = TextGloveGNB(config, opt.embedding_path, label_size)
        if config['enc_class'] == 'cnn':
            model = TextGloveCNN(config,
                                 opt.embedding_path,
                                 label_size,
                                 emb_non_trainable=True)
        if config['enc_class'] == 'densenet-cnn':
            model = TextGloveDensenetCNN(config,
                                         opt.embedding_path,
                                         label_size,
                                         emb_non_trainable=True)
        if config['enc_class'] == 'densenet-dsa':
            model = TextGloveDensenetDSA(config,
                                         opt.embedding_path,
                                         label_size,
                                         emb_non_trainable=True)
    else:
        from transformers import AutoTokenizer, AutoConfig, AutoModel
        bert_config = AutoConfig.from_pretrained(opt.bert_output_dir)
        bert_tokenizer = AutoTokenizer.from_pretrained(opt.bert_output_dir)
        bert_model = AutoModel.from_config(bert_config)
        ModelClass = TextBertCNN
        if config['enc_class'] == 'cls': ModelClass = TextBertCLS
        model = ModelClass(config, bert_config, bert_model, bert_tokenizer,
                           label_size)
    if opt.enable_qat:
        assert opt.device == 'cpu'
        model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm')
        '''
        # fuse if applicable
        # model = torch.quantization.fuse_modules(model, [['']])
        '''
        model = torch.quantization.prepare_qat(model)
        model.eval()
        model.to('cpu')
        logger.info("[Convert to quantized model with device=cpu]")
        model = torch.quantization.convert(model)
    if opt.enable_qat_fx:
        import torch.quantization.quantize_fx as quantize_fx
        qconfig_dict = {
            "": torch.quantization.get_default_qat_qconfig('fbgemm')
        }
        model = quantize_fx.prepare_qat_fx(model, qconfig_dict)
        logger.info("[Convert to quantized model]")
        model = quantize_fx.convert_fx(model)

    model.load_state_dict(checkpoint)
    model = model.to(opt.device)
    '''
    for name, param in model.named_parameters():
        print(name, param.data, param.device, param.requires_grad)
    '''
    logger.info("[model] :\n{}".format(model.__str__()))
    logger.info("[Model loaded]")
    return model
Exemple #28
0
 def __init__(self) -> object:
     super(BERTClass, self).__init__()
     config = AutoConfig.from_pretrained('bert-base-uncased')
     self.l1 = AutoModel.from_config(config)
     self.l2 = torch.nn.Dropout(0.3)
     self.l3 = torch.nn.Linear(768, 6)
Exemple #29
0
    def __init__(
        self,
        model: str = "bert-base-uncased",
        fine_tune: bool = True,
        layers: str = "-1",
        layer_mean: bool = True,
        subtoken_pooling: str = "first",
        cls_pooling: str = "cls",
        is_token_embedding: bool = True,
        is_document_embedding: bool = True,
        allow_long_sentences: bool = False,
        use_context: Union[bool, int] = False,
        respect_document_boundaries: bool = True,
        context_dropout: float = 0.5,
        saved_config: Optional[PretrainedConfig] = None,
        tokenizer_data: Optional[BytesIO] = None,
        name: Optional[str] = None,
        **kwargs,
    ):
        self.instance_parameters = self.get_instance_parameters(locals=locals())
        del self.instance_parameters["saved_config"]
        del self.instance_parameters["tokenizer_data"]
        super().__init__()
        # temporary fix to disable tokenizer parallelism warning
        # (see https://stackoverflow.com/questions/62691279/how-to-disable-tokenizers-parallelism-true-false-warning)
        os.environ["TOKENIZERS_PARALLELISM"] = "false"

        # do not print transformer warnings as these are confusing in this case
        from transformers import logging

        logging.set_verbosity_error()

        if tokenizer_data is None:
            # load tokenizer and transformer model
            self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(model, **kwargs)
        else:
            # load tokenizer from inmemory zip-file
            self.tokenizer = self._tokenizer_from_bytes(tokenizer_data)

        if saved_config is None:
            config = AutoConfig.from_pretrained(model, output_hidden_states=True, **kwargs)
            self.model = AutoModel.from_pretrained(model, config=config)
        else:
            self.model = AutoModel.from_config(saved_config, **kwargs)

        self.truncate = True

        if self.tokenizer.model_max_length > LARGE_INTEGER:
            allow_long_sentences = False
            self.truncate = False

        self.stride = self.tokenizer.model_max_length // 2 if allow_long_sentences else 0
        self.allow_long_sentences = allow_long_sentences
        self.use_lang_emb = hasattr(self.model, "use_lang_emb") and self.model.use_lang_emb

        # model name
        if name is None:
            self.name = "transformer-" + str(model)
        else:
            self.name = name
        self.base_model_name = str(model)

        self.token_embedding = is_token_embedding
        self.document_embedding = is_document_embedding

        if not self.token_embedding and not self.document_embedding:
            raise ValueError("either 'is_token_embedding' or 'is_document_embedding' needs to be set.")

        if self.document_embedding and cls_pooling not in ["cls", "max", "mean"]:
            raise ValueError(f"Document Pooling operation `{cls_pooling}` is not defined for TransformerEmbedding")

        if self.token_embedding and subtoken_pooling not in ["first", "last", "first_last", "mean"]:
            raise ValueError(f"Subtoken Pooling operation `{subtoken_pooling}` is not defined for TransformerEmbedding")

        if self.document_embedding and cls_pooling == "cls" and allow_long_sentences:
            log.warning(
                "Using long sentences for Document embeddings is only beneficial for cls_pooling types 'mean' and 'max "
            )

        if isinstance(use_context, bool):
            self.context_length: int = 64 if use_context else 0
        else:
            self.context_length = use_context

        self.context_dropout = context_dropout
        self.respect_document_boundaries = respect_document_boundaries

        self.to(flair.device)

        # embedding parameters
        if layers == "all":
            # send mini-token through to check how many layers the model has
            hidden_states = self.model(torch.tensor([1], device=flair.device).unsqueeze(0))[-1]
            self.layer_indexes = list(range(len(hidden_states)))
        else:
            self.layer_indexes = list(map(int, layers.split(",")))

        self.cls_pooling = cls_pooling
        self.subtoken_pooling = subtoken_pooling
        self.layer_mean = layer_mean
        self.fine_tune = fine_tune
        self.static_embeddings = not self.fine_tune

        # return length
        self.embedding_length_internal = self._calculate_embedding_length()

        self.special_tokens = []
        # check if special tokens exist to circumvent error message
        if self.tokenizer._bos_token:
            self.special_tokens.append(self.tokenizer.bos_token)
        if self.tokenizer._cls_token:
            self.special_tokens.append(self.tokenizer.cls_token)

        # most models have an initial BOS token, except for XLNet, T5 and GPT2
        self.begin_offset = self._get_begin_offset_of_tokenizer()
        self.initial_cls_token: bool = self._has_initial_cls_token()

        # when initializing, embeddings are in eval mode by default
        self.eval()
Exemple #30
0
def load_model(config, checkpoint):
    args = config['args']
    labels = load_dict(args.label_path)
    label_size = len(labels)
    config['labels'] = labels
    config['label_size'] = label_size
    glabels = load_dict(args.glabel_path)
    glabel_size = len(glabels)
    config['glabels'] = glabels
    config['glabel_size'] = glabel_size
    poss = load_dict(args.pos_path)
    pos_size = len(poss)
    config['poss'] = poss
    config['pos_size'] = pos_size
    if config['emb_class'] == 'glove':
        if config['enc_class'] == 'bilstm':
            model = GloveLSTMCRF(config,
                                 args.embedding_path,
                                 label_size,
                                 pos_size,
                                 emb_non_trainable=True,
                                 use_crf=args.use_crf,
                                 use_ncrf=args.use_ncrf,
                                 use_char_cnn=args.use_char_cnn,
                                 use_mha=args.use_mha)
        if config['enc_class'] == 'densenet':
            model = GloveDensenetCRF(config,
                                     args.embedding_path,
                                     label_size,
                                     pos_size,
                                     emb_non_trainable=True,
                                     use_crf=args.use_crf,
                                     use_ncrf=args.use_ncrf,
                                     use_char_cnn=args.use_char_cnn,
                                     use_mha=args.use_mha)
    elif config['emb_class'] == 'elmo':
        from allennlp.modules.elmo import Elmo
        elmo_model = Elmo(args.elmo_options_file,
                          args.elmo_weights_file,
                          2,
                          dropout=0)
        model = ElmoLSTMCRF(config,
                            elmo_model,
                            args.embedding_path,
                            label_size,
                            pos_size,
                            emb_non_trainable=True,
                            use_crf=args.use_crf,
                            use_ncrf=args.use_ncrf,
                            use_char_cnn=args.use_char_cnn,
                            use_mha=args.use_mha)
    else:
        bert_config = AutoConfig.from_pretrained(args.bert_output_dir)
        bert_tokenizer = AutoTokenizer.from_pretrained(args.bert_output_dir)
        bert_model = AutoModel.from_config(bert_config)
        ModelClass = BertLSTMCRF
        model = ModelClass(config,
                           bert_config,
                           bert_model,
                           bert_tokenizer,
                           label_size,
                           glabel_size,
                           pos_size,
                           use_crf=args.use_crf,
                           use_ncrf=args.use_ncrf,
                           use_pos=args.bert_use_pos,
                           use_char_cnn=args.use_char_cnn,
                           use_mha=args.use_mha,
                           use_subword_pooling=args.bert_use_subword_pooling,
                           use_word_embedding=args.bert_use_word_embedding,
                           embedding_path=args.embedding_path,
                           emb_non_trainable=True,
                           use_doc_context=args.bert_use_doc_context,
                           disable_lstm=args.bert_disable_lstm,
                           feature_based=args.bert_use_feature_based,
                           use_mtl=args.bert_use_mtl)
    model.load_state_dict(checkpoint)
    model = model.to(args.device)
    logger.info("[Loaded]")
    return model