def __init__(self): """ Init the labeler module @param p (float): p to use for dropout in the linear heads, 0.1 by default is consistant with transformers.BertForSequenceClassification @param clinical (boolean): True if Bio_Clinical BERT desired, False otherwise. Ignored if pretrain_path is not None @param freeze_embeddings (boolean): true to freeze bert embeddings during training @param pretrain_path (string): path to load checkpoint from """ super(bert_labeler, self).__init__() config = AutoConfig.from_pretrained('bert-base-uncased') self.bert = AutoModel.from_config(config) self.hidden_size = self.bert.pooler.dense.in_features
def load_bert_model(bert_hidden_size): bert_config = AutoConfig.from_pretrained("model/", hidden_size=bert_hidden_size) bert_tokenizer = AutoTokenizer.from_pretrained('model/') bert_model = AutoModel.from_config( bert_config) # fom_pretrained değil from_config olacakmış :) print("loaded bert model config: ", bert_model.config) # ok sonunda :) for param in bert_model.parameters( ): # freeze bert model params (not bert training) param.requires_grad = False return bert_tokenizer, bert_model
def __init__(self, model_name_or_path, mapping_path, config_name=None, tokenizer_name=None, cache_dir=None, from_pretrained=False, freeze_base_model=False, fusion="max_pooling", lambdas=[1, 1, 1]): super(BertForDBpediaDocumentClassification, self).__init__() # Initialize config, tokenizer and model (feature extractor) self.base_model_config = AutoConfig.from_pretrained( config_name if config_name is not None else model_name_or_path, cache_dir=cache_dir, ) self.tokenizer = AutoTokenizer.from_pretrained( tokenizer_name if tokenizer_name else model_name_or_path, cache_dir=cache_dir, use_fast=True, ) if from_pretrained: self.base_model = AutoModel.from_config( config=self.base_model_config) else: self.base_model = AutoModel.from_pretrained( model_name_or_path, from_tf=bool(".ckpt" in model_name_or_path), config=self.base_model_config, cache_dir=cache_dir, ) # Fusion if fusion not in ["max_pooling", "average_pooling", "sum"]: raise ValueError( f"Invalid fusion value. Expected one of ['max_pooling', 'average_pooling', 'sum'], got " f"'{fusion}' instead.") self.fusion = fusion assert len(lambdas) == 3 self.lambdas = lambdas # Freeze if freeze_base_model: for p in self.base_model.parameters(): p.requires_grad = False # Intialize layers with open(mapping_path, "r") as fin: self.mapping = json.load(fin) self._initialize_layers()
def test_from_pretrained_with_tuple_values(self): # For the auto model mapping, FunnelConfig has two models: FunnelModel and FunnelBaseModel model = AutoModel.from_pretrained("sgugger/funnel-random-tiny") self.assertIsInstance(model, FunnelModel) config = copy.deepcopy(model.config) config.architectures = ["FunnelBaseModel"] model = AutoModel.from_config(config) self.assertIsInstance(model, FunnelBaseModel) with tempfile.TemporaryDirectory() as tmp_dir: model.save_pretrained(tmp_dir) model = AutoModel.from_pretrained(tmp_dir) self.assertIsInstance(model, FunnelBaseModel)
def test_add_adapter_fusion_different_config(self): model = AutoModel.from_config(self.config()) model.eval() # fusion between a and b should be possible whereas fusion between a and c should fail model.add_adapter("a", config=PfeifferConfig(reduction_factor=16)) model.add_adapter("b", config=PfeifferConfig(reduction_factor=2)) model.add_adapter("c", config="houlsby") # correct fusion model.add_fusion(["a", "b"]) self.assertIn("a,b", model.config.adapter_fusion_models) # failing fusion self.assertRaises(ValueError, lambda: model.add_fusion(["a", "c"]))
def convert_to_transformers(self): from transformers import DPRContextEncoder, DPRQuestionEncoder, AutoModel if len(self.prediction_heads) != 1: raise ValueError( f"Currently conversion only works for models with a SINGLE prediction head. " f"Your model has {len(self.prediction_heads)}") if self.prediction_heads[0].model_type == "text_similarity": # init model if "dpr" in self.language_model1.model.config.model_type: transformers_model1 = DPRQuestionEncoder( config=self.language_model1.model.config) else: transformers_model1 = AutoModel.from_config( config=self.language_model1.model.config) if "dpr" in self.language_model2.model.config.model_type: transformers_model2 = DPRContextEncoder( config=self.language_model2.model.config) else: transformers_model2 = AutoModel.from_config( config=self.language_model2.model.config) # transfer weights for language model + prediction head setattr(transformers_model1, transformers_model1.base_model_prefix, self.language_model1.model) setattr(transformers_model2, transformers_model2.base_model_prefix, self.language_model2.model) logger.warning("No prediction head weights are required for DPR") else: raise NotImplementedError( f"FARM -> Transformers conversion is not supported yet for" f" prediction heads of type {self.prediction_heads[0].model_type}" ) pass return transformers_model1, transformers_model2
def load(model_path: str): """ Load the model from a file. @param model_path (str): path to model @return model (nn.Module): model with saved parameters """ params = torch.load(model_path, map_location=lambda storage, loc: storage) args = params['args'] bert_base_config = args['bert_config'] bert_base_model = AutoModel.from_config(bert_base_config) model = BasicBertForClassification(bert_base_model, args['n_class'], args['dropout_rate']) model.load_state_dict(params['state_dict']) return model
def __init__(self, hparams, config=None): super().__init__() self.save_hyperparameters(hparams) if config is None: self.transformer = AutoModel.from_pretrained( self.hparams.transformer) else: self.transformer = AutoModel.from_config(config) self.dropout = nn.Dropout(self.hparams.dropout) hidden_size = self.transformer.config.hidden_size self.classifier = BiaffineCRFClassifier( hidden_size, label_num=self.hparams.num_labels, dropout=self.hparams.dropout, hidden_size=self.hparams.hidden_size)
def test_add_adapter_multiple_reduction_factors(self): model = AutoModel.from_config(self.config()) model.eval() reduction_factor = {"1": 1, "default": 2} for adapter_config in [ PfeifferConfig(reduction_factor=reduction_factor), HoulsbyConfig(reduction_factor=reduction_factor), ]: with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__): name = adapter_config.__class__.__name__ model.add_adapter(name, config=adapter_config) model.set_active_adapters([name]) # adapter is correctly added to config self.assertTrue(name in model.config.adapters) self.assertEqual(adapter_config, model.config.adapters.get(name)) # TODO: Add this method to model classes. def get_adapter_layer(idx): if isinstance(model, RobertaModel): adapter = model.encoder.layer[idx].output.adapters elif isinstance(model, DistilBertModel): adapter = model.transformer.layer[ idx].output_adapters.adapters elif isinstance(model, BartModel) or isinstance( model, MBartModel): adapter = model.encoder.layers[ idx].output_adapters.adapters elif isinstance(model, GPT2Model): adapter = model.h[idx].output_adapters.adapters else: adapter = model.encoder.layer[idx].output.adapters return (adapter.PfeifferConfig if isinstance( adapter_config, PfeifferConfig) else adapter.HoulsbyConfig) self.assertEqual( get_adapter_layer(0).adapter_down[0].in_features / get_adapter_layer(0).adapter_down[0].out_features, reduction_factor["default"], ) self.assertEqual( get_adapter_layer(1).adapter_down[0].in_features / get_adapter_layer(1).adapter_down[0].out_features, reduction_factor["1"], )
def load(self) -> None: if self.pretrained_bert: log.info(f"From pretrained {self.pretrained_bert}.") self.pretrained_bert = str(expand_path(self.pretrained_bert)) self.config = AutoConfig.from_pretrained(self.pretrained_bert, output_hidden_states=True) self.encoder = AutoModel.from_pretrained(self.pretrained_bert, config=self.config) elif self.bert_config_file and Path(self.bert_config_file).is_file(): self.config = AutoConfig.from_json_file( str(expand_path(self.bert_config_file))) self.encoder = AutoModel.from_config(config=self.bert_config) else: raise ConfigError("No pre-trained BERT model is given.") self.encoder.to(self.device)
def from_bytes(self, bytes_data): msg = srsly.msgpack_loads(bytes_data) config_dict = msg["config"] tok_dict = msg["tokenizer"] if config_dict: with make_tempdir() as temp_dir: config_file = temp_dir / "config.json" srsly.write_json(config_file, config_dict) config = AutoConfig.from_pretrained(config_file) for x, x_bytes in tok_dict.items(): Path(temp_dir / x).write_bytes(x_bytes) tokenizer = AutoTokenizer.from_pretrained(str(temp_dir.absolute())) vocab_file_contents = None if hasattr(tokenizer, "vocab_file"): vocab_file_name = tokenizer.vocab_files_names["vocab_file"] vocab_file_path = str((temp_dir / vocab_file_name).absolute()) with open(vocab_file_path, "rb") as fileh: vocab_file_contents = fileh.read() transformer = AutoModel.from_config(config) self._hfmodel = HFObjects( tokenizer, transformer, vocab_file_contents, SimpleFrozenDict(), SimpleFrozenDict(), ) self._model = transformer filelike = BytesIO(msg["state"]) filelike.seek(0) ops = get_current_ops() if ops.device_type == "cpu": map_location = "cpu" else: # pragma: no cover device_id = torch.cuda.current_device() map_location = f"cuda:{device_id}" self._model.load_state_dict(torch.load(filelike, map_location=map_location)) self._model.to(map_location) else: self._hfmodel = HFObjects( None, None, None, msg["_init_tokenizer_config"], msg["_init_transformer_config"], ) return self
def __init__(self, bert, opt): super().__init__() self.opt = opt if opt.debug: conf = AutoConfig.from_pretrained(opt.pretrained_bert_name) self.bert = AutoModel.from_config(conf) else: self.bert = AutoModel.from_pretrained( opt.pretrained_bert_name) if not opt.no_gnn: self.gcn = GNN_RELU_DIFF(opt.bert_dim, opt.bert_dim, step=opt.gnn_step, drop=opt.dropout) if not opt.no_short_cut: self.layernorm = LayerNorm(opt.bert_dim) if not opt.no_sa: self.sa = SelfAttention(self.bert.config, opt) self.drop = nn.Dropout(opt.dropout) self.fc = nn.Linear(opt.bert_dim, opt.polarities_dim)
def __init__(self, hparams, loss_func=dep_loss, config=None): super().__init__() self.save_hyperparameters(hparams) if config is None: self.transformer = AutoModel.from_pretrained( self.hparams.transformer) else: self.transformer = AutoModel.from_config(config) self.dropout = nn.Dropout(self.hparams.dropout) hidden_size = self.transformer.config.hidden_size self.classifier = BiaffineClassifier( hidden_size, label_num=self.hparams.num_labels, dropout=self.hparams.dropout, arc_hidden_size=self.hparams.arc_hidden_size, rel_hidden_size=self.hparams.rel_hidden_size, loss_interpolation=self.hparams.loss_interpolation, loss_func=loss_func)
def __init__(self, config: dict, do_not_download_weights=False): super().__init__() if do_not_download_weights: self.lrm = AutoModel.from_config( AutoConfig.from_pretrained( config["model_type"], cache_dir=config["model_cache_dir"])) else: self.lrm = AutoModel.from_pretrained( config["model_type"], cache_dir=config["model_cache_dir"]) output_shape = self.lrm.config.hidden_size projection_bias = config.get("use_projection_bias", True) self.linear = nn.Linear( output_shape, config["emb_dim"], bias=projection_bias ) if config["use_projection"] else nn.Identity() self.dropout = nn.Dropout(config["dropout_rate"]) if config.get( "dropout_rate", 0.) > 1e-6 else nn.Identity() self.config = config self.init_weights(type(self.lrm))
def test_from_pretrained_avoids_weights_download_if_override_weights(self): # only download config because downloading pretrained weights in addition takes too long transformer = AutoModel.from_config( AutoConfig.from_pretrained("epwalsh/bert-xsmall-dummy", cache_dir=self.TEST_DIR)) # clear cache directory for f in os.listdir(str(self.TEST_DIR)): os.remove(str(self.TEST_DIR) + "/" + f) assert len(os.listdir(str(self.TEST_DIR))) == 0 save_weights_path = str(self.TEST_DIR) + "/bert_weights.pth" torch.save(transformer.state_dict(), save_weights_path) override_transformer = cached_transformers.get( "epwalsh/bert-xsmall-dummy", False, override_weights_file=save_weights_path, cache_dir=self.TEST_DIR, ) # check that only three files were downloaded (filename.json, filename, filename.lock), for config.json # if more than three files were downloaded, then model weights were also (incorrectly) downloaded # NOTE: downloaded files are not explicitly detailed in Huggingface's public API, # so this assertion could fail in the future json_fnames = [ fname for fname in os.listdir(str(self.TEST_DIR)) if fname.endswith(".json") ] assert len(json_fnames) == 1 json_data = json.load(open(str(self.TEST_DIR) + "/" + json_fnames[0])) assert ( json_data["url"] == "https://huggingface.co/epwalsh/bert-xsmall-dummy/resolve/main/config.json" ) resource_id = os.path.splitext(json_fnames[0])[0] assert set(os.listdir(str(self.TEST_DIR))) == set([ json_fnames[0], resource_id, resource_id + ".lock", "bert_weights.pth" ]) # check that override weights were loaded correctly for p1, p2 in zip(transformer.parameters(), override_transformer.parameters()): assert p1.data.ne(p2.data).sum() == 0
def __init__(self, hparams, config=None): super().__init__() self.save_hyperparameters(hparams) if config is None: self.transformer = AutoModel.from_pretrained(self.hparams.transformer) else: self.transformer = AutoModel.from_config(config) self.dropout = nn.Dropout(self.hparams.dropout) hidden_size = self.transformer.config.hidden_size max_length = self.transformer.config.max_position_embeddings self.classifier = RelativeTransformerLinearClassifier( input_size=hidden_size, hidden_size=self.hparams.hidden_size, num_layers=self.hparams.num_layers, num_heads=self.hparams.num_heads, dropout=self.hparams.dropout, max_length=max_length, num_labels=self.hparams.num_labels )
def __init__(self, pretrained_model: str, requires_grad: bool = False, dropout: float = 0.1, layer_dropout: float = 0.1, combine_layers: str = "mix") -> None: config = AutoConfig.from_pretrained(pretrained_model) config.output_hidden_states = True model = AutoModel.from_config(config) for param in model.parameters(): param.requires_grad = requires_grad super().__init__(bert_model=model, layer_dropout=layer_dropout, combine_layers=combine_layers) self.model = model self.dropout = dropout self.set_dropout(dropout)
def __init__( self, arch: str, temperature: float, criterion: str, optcfg: DictConfig, num_negatives: int, num_positives: Optional[int] = None, schcfg: Optional[DictConfig] = None, **kwargs, ): super().__init__() # this line ensures params passed to LightningModule will be saved to ckpt # it also allows to access params with 'self.hparams' attribute self.num_negatives = num_negatives self.num_positives = num_positives self.schcfg = schcfg self.optcfg = optcfg self.save_hyperparameters() config = AutoConfig.from_pretrained(arch) self.transformer = AutoModel.from_config(config) # TODO # custom pooler? like SimCSE # config, add_pooling_layer=True # ) pool_size = self.transformer.config.hidden_size self.projection = nn.Linear(pool_size, pool_size) # loss function self.cos_sim = torch.nn.CosineSimilarity(dim=-1) self.temperature = temperature if criterion == "InfoNCE": self.criterion = nn.CrossEntropyLoss() elif criterion == "rankloss": self.criterion = nn.MarginRankingLoss(0.02) else: raise NotImplementedError
def __init__(self, config: Dict[str, Any], initPretrainedWeights: bool = True): """ Initialization of new extractive_reader with config. :param config: Configuration used for the initialization transformer_type: used type of model cache: used cache dir :type config: Dict[str, Any] :param initPretrainedWeights: Uses pretrained weights for transformer part. If False uses random initialization. :type initPretrainedWeights: bool """ super().__init__() if initPretrainedWeights: self.transformer = AutoModel.from_pretrained( config["transformer_type"], cache_dir=config["cache"]) else: self.transformer = AutoModel.from_config( AutoConfig.from_pretrained(config["transformer_type"], cache_dir=config["cache"])) self.startEndProjection = torch.nn.Linear( self.transformer.config.hidden_size, 2, bias=False) self.selectedProjection = torch.nn.Linear( self.transformer.config.hidden_size, 1, bias=False) # For the joint we use the linear transformation for the start, because otherwise the dot product will be # always maximal for the dot product with itself (one token spans). self.jointStartProjection = torch.nn.Linear( self.transformer.config.hidden_size, self.transformer.config.hidden_size) self.config = config self.init_weights()
def huggingface_from_pretrained(source: Union[Path, str], tok_config: Dict, trf_config: Dict): """Create a Huggingface transformer model from pretrained weights. Will download the model if it is not already downloaded. source (Union[str, Path]): The name of the model or a path to it, such as 'bert-base-cased'. tok_config (dict): Settings to pass to the tokenizer. trf_config (dict): Settings to pass to the transformer. """ if hasattr(source, "absolute"): str_path = str(source.absolute()) else: str_path = source tokenizer = AutoTokenizer.from_pretrained(str_path, **tok_config) trf_config["return_dict"] = True config = AutoConfig.from_pretrained(str_path, **trf_config) transformer = AutoModel.from_config(config) ops = get_current_ops() if isinstance(ops, CupyOps): transformer.cuda() return tokenizer, transformer
def test_load_full_model(self): model1 = AutoModel.from_config(self.config()) model1.eval() name = "dummy" model1.add_adapter(name) model1.set_active_adapters([name]) with tempfile.TemporaryDirectory() as temp_dir: model1.save_pretrained(temp_dir) model2 = AutoModel.from_pretrained(temp_dir) model2.set_active_adapters([name]) # check if adapter was correctly loaded self.assertTrue(name in model2.config.adapters) # check equal output input_ids = self.get_input_samples((1, 128), config=model1.config) output1 = model1(input_ids) output2 = model2(input_ids) self.assertEqual(len(output1), len(output2)) self.assertTrue(torch.equal(output1[0], output2[0]))
def load_model(self, checkpoint): config = self.config opt = config['opt'] labels = load_label(opt.label_path) label_size = len(labels) config['labels'] = labels self.labels = labels if config['emb_class'] == 'glove': if config['enc_class'] == 'gnb': model = TextGloveGNB(config, opt.embedding_path, label_size) if config['enc_class'] == 'cnn': model = TextGloveCNN(config, opt.embedding_path, label_size, emb_non_trainable=True) if config['enc_class'] == 'densenet-cnn': model = TextGloveDensenetCNN(config, opt.embedding_path, label_size, emb_non_trainable=True) if config['enc_class'] == 'densenet-dsa': model = TextGloveDensenetDSA(config, opt.embedding_path, label_size, emb_non_trainable=True) else: from transformers import AutoTokenizer, AutoConfig, AutoModel bert_config = AutoConfig.from_pretrained(opt.bert_output_dir) bert_tokenizer = AutoTokenizer.from_pretrained(opt.bert_output_dir) bert_model = AutoModel.from_config(bert_config) ModelClass = TextBertCNN if config['enc_class'] == 'cls': ModelClass = TextBertCLS model = ModelClass(config, bert_config, bert_model, bert_tokenizer, label_size) model.load_state_dict(checkpoint) model = model.to(opt.device) logger.info("[Model loaded]") return model
def __init__(self, hparams, config=None): super().__init__() self.save_hyperparameters(hparams) if config is None: self.transformer = AutoModel.from_pretrained( self.hparams.transformer) else: self.transformer = AutoModel.from_config(config) self.dropout = nn.Dropout(self.hparams.dropout) hidden_size = self.transformer.config.hidden_size self.classifier = ViClassifier( input_size=hidden_size, label_num=self.hparams.num_labels, dropout=self.hparams.dropout, lstm_num_layers=self.hparams.lstm_num_layers, lstm_hidden_size=self.hparams.lstm_hidden_size, bin_hidden_size=self.hparams.bin_hidden_size, arc_hidden_size=self.hparams.arc_hidden_size, rel_hidden_size=self.hparams.rel_hidden_size, loss_interpolation=self.hparams.loss_interpolation, inference=self.hparams.inference, max_iter=self.hparams.max_iter, )
def __init__(self, hparams): super(LMFineTuner, self).__init__() self.hparams = hparams print('----------------------') pprint(self.hparams) print('----------------------') self.target = "age" if self.hparams.regression == True else "gender" #self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') self.config = AutoConfig.from_pretrained("roberta-base") self.tokenizer = AutoTokenizer.from_pretrained('roberta-base') self.contextualModel = AutoModel.from_config(self.config) #self.contextualModel = BertModel.from_pretrained('bert-base-uncased') self.ftLayer = nn.Linear( 768, 1) if self.hparams.regression == True else nn.Linear( 768, self.hparams.num_classes) self.activation = torch.nn.ReLU() self.lossFunc = nn.MSELoss( reduction='mean' ) if self.hparams.regression == True else nn.CrossEntropyLoss()
def test_add_adapter_with_invertible(self): model = AutoModel.from_config(self.config()) model.eval() for adapter_config in [PfeifferInvConfig(), HoulsbyInvConfig()]: with self.subTest(model_class=model.__class__.__name__, config=adapter_config.__class__.__name__): name = adapter_config.__class__.__name__ model.add_adapter(name, config=adapter_config) model.set_active_adapters([name]) # adapter is correctly added to config self.assertTrue(name in model.config.adapters) self.assertEqual(adapter_config, model.config.adapters.get(name)) # invertible adapter is correctly added and returned self.assertTrue(name in model.invertible_adapters) self.assertEqual(model.invertible_adapters[name], model.get_invertible_adapter()) # all invertible adapter weights should be activated for training for param in model.invertible_adapters[name].parameters(): self.assertTrue(param.requires_grad) # check forward pass input_ids = self.get_input_samples((1, 128), config=model.config) input_data = {"input_ids": input_ids} adapter_output = model(**input_data) # make sure the output is different without invertible adapter del model.invertible_adapters[name] adapter_output_no_inv = model(**input_data) self.assertEqual(len(adapter_output), len(adapter_output_no_inv)) self.assertFalse( torch.equal(adapter_output[0], adapter_output_no_inv[0]))
def init_bert(self): attr = self.attr pretrained_name_or_path = attr.pretrained_name_or_path bert_config = AutoConfig.from_pretrained(pretrained_name_or_path) if attr.input_representation == "subwords": # tag informed modeling if attr.n_token_type_ids and attr.n_token_type_ids > bert_config.type_vocab_size: # load from config and then map embeddings manually from diretory `pretrained_name_or_path` if not os.path.exists(pretrained_name_or_path): raise Exception( "when using tag informed modeling by inputting `n_token_type_ids`, " "you mst specify a directory of downloaded model weights and not name" ) print( f"upping type_vocab_size of BERT to {attr.n_token_type_ids} from {bert_config.type_vocab_size}" ) bert_config.type_vocab_size = attr.n_token_type_ids bert_model = AutoModel.from_config(config=bert_config) bert_model = load_bert_pretrained_weights( bert_model, pretrained_name_or_path, attr.device) else: bert_model = AutoModel.from_pretrained(pretrained_name_or_path) elif attr.input_representation == "charcnn": # bert_model = CharacterBertModel.from_pretrained(pretrained_name_or_path, config=config) raise NotImplementedError else: raise ValueError if not attr.finetune_bert: for param in bert_model.parameters(): param.requires_grad = False bert_model.to(attr.device) self.outdim = bert_config.hidden_size self.config = bert_config self.model = bert_model self.requires_bert_optimizer = True return
def load_model(config, checkpoint): opt = config['opt'] labels = load_label(opt.label_path) label_size = len(labels) config['labels'] = labels if config['emb_class'] == 'glove': if config['enc_class'] == 'gnb': model = TextGloveGNB(config, opt.embedding_path, label_size) if config['enc_class'] == 'cnn': model = TextGloveCNN(config, opt.embedding_path, label_size, emb_non_trainable=True) if config['enc_class'] == 'densenet-cnn': model = TextGloveDensenetCNN(config, opt.embedding_path, label_size, emb_non_trainable=True) if config['enc_class'] == 'densenet-dsa': model = TextGloveDensenetDSA(config, opt.embedding_path, label_size, emb_non_trainable=True) else: from transformers import AutoTokenizer, AutoConfig, AutoModel bert_config = AutoConfig.from_pretrained(opt.bert_output_dir) bert_tokenizer = AutoTokenizer.from_pretrained(opt.bert_output_dir) bert_model = AutoModel.from_config(bert_config) ModelClass = TextBertCNN if config['enc_class'] == 'cls': ModelClass = TextBertCLS model = ModelClass(config, bert_config, bert_model, bert_tokenizer, label_size) if opt.enable_qat: assert opt.device == 'cpu' model.qconfig = torch.quantization.get_default_qat_qconfig('fbgemm') ''' # fuse if applicable # model = torch.quantization.fuse_modules(model, [['']]) ''' model = torch.quantization.prepare_qat(model) model.eval() model.to('cpu') logger.info("[Convert to quantized model with device=cpu]") model = torch.quantization.convert(model) if opt.enable_qat_fx: import torch.quantization.quantize_fx as quantize_fx qconfig_dict = { "": torch.quantization.get_default_qat_qconfig('fbgemm') } model = quantize_fx.prepare_qat_fx(model, qconfig_dict) logger.info("[Convert to quantized model]") model = quantize_fx.convert_fx(model) model.load_state_dict(checkpoint) model = model.to(opt.device) ''' for name, param in model.named_parameters(): print(name, param.data, param.device, param.requires_grad) ''' logger.info("[model] :\n{}".format(model.__str__())) logger.info("[Model loaded]") return model
def __init__(self) -> object: super(BERTClass, self).__init__() config = AutoConfig.from_pretrained('bert-base-uncased') self.l1 = AutoModel.from_config(config) self.l2 = torch.nn.Dropout(0.3) self.l3 = torch.nn.Linear(768, 6)
def __init__( self, model: str = "bert-base-uncased", fine_tune: bool = True, layers: str = "-1", layer_mean: bool = True, subtoken_pooling: str = "first", cls_pooling: str = "cls", is_token_embedding: bool = True, is_document_embedding: bool = True, allow_long_sentences: bool = False, use_context: Union[bool, int] = False, respect_document_boundaries: bool = True, context_dropout: float = 0.5, saved_config: Optional[PretrainedConfig] = None, tokenizer_data: Optional[BytesIO] = None, name: Optional[str] = None, **kwargs, ): self.instance_parameters = self.get_instance_parameters(locals=locals()) del self.instance_parameters["saved_config"] del self.instance_parameters["tokenizer_data"] super().__init__() # temporary fix to disable tokenizer parallelism warning # (see https://stackoverflow.com/questions/62691279/how-to-disable-tokenizers-parallelism-true-false-warning) os.environ["TOKENIZERS_PARALLELISM"] = "false" # do not print transformer warnings as these are confusing in this case from transformers import logging logging.set_verbosity_error() if tokenizer_data is None: # load tokenizer and transformer model self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained(model, **kwargs) else: # load tokenizer from inmemory zip-file self.tokenizer = self._tokenizer_from_bytes(tokenizer_data) if saved_config is None: config = AutoConfig.from_pretrained(model, output_hidden_states=True, **kwargs) self.model = AutoModel.from_pretrained(model, config=config) else: self.model = AutoModel.from_config(saved_config, **kwargs) self.truncate = True if self.tokenizer.model_max_length > LARGE_INTEGER: allow_long_sentences = False self.truncate = False self.stride = self.tokenizer.model_max_length // 2 if allow_long_sentences else 0 self.allow_long_sentences = allow_long_sentences self.use_lang_emb = hasattr(self.model, "use_lang_emb") and self.model.use_lang_emb # model name if name is None: self.name = "transformer-" + str(model) else: self.name = name self.base_model_name = str(model) self.token_embedding = is_token_embedding self.document_embedding = is_document_embedding if not self.token_embedding and not self.document_embedding: raise ValueError("either 'is_token_embedding' or 'is_document_embedding' needs to be set.") if self.document_embedding and cls_pooling not in ["cls", "max", "mean"]: raise ValueError(f"Document Pooling operation `{cls_pooling}` is not defined for TransformerEmbedding") if self.token_embedding and subtoken_pooling not in ["first", "last", "first_last", "mean"]: raise ValueError(f"Subtoken Pooling operation `{subtoken_pooling}` is not defined for TransformerEmbedding") if self.document_embedding and cls_pooling == "cls" and allow_long_sentences: log.warning( "Using long sentences for Document embeddings is only beneficial for cls_pooling types 'mean' and 'max " ) if isinstance(use_context, bool): self.context_length: int = 64 if use_context else 0 else: self.context_length = use_context self.context_dropout = context_dropout self.respect_document_boundaries = respect_document_boundaries self.to(flair.device) # embedding parameters if layers == "all": # send mini-token through to check how many layers the model has hidden_states = self.model(torch.tensor([1], device=flair.device).unsqueeze(0))[-1] self.layer_indexes = list(range(len(hidden_states))) else: self.layer_indexes = list(map(int, layers.split(","))) self.cls_pooling = cls_pooling self.subtoken_pooling = subtoken_pooling self.layer_mean = layer_mean self.fine_tune = fine_tune self.static_embeddings = not self.fine_tune # return length self.embedding_length_internal = self._calculate_embedding_length() self.special_tokens = [] # check if special tokens exist to circumvent error message if self.tokenizer._bos_token: self.special_tokens.append(self.tokenizer.bos_token) if self.tokenizer._cls_token: self.special_tokens.append(self.tokenizer.cls_token) # most models have an initial BOS token, except for XLNet, T5 and GPT2 self.begin_offset = self._get_begin_offset_of_tokenizer() self.initial_cls_token: bool = self._has_initial_cls_token() # when initializing, embeddings are in eval mode by default self.eval()
def load_model(config, checkpoint): args = config['args'] labels = load_dict(args.label_path) label_size = len(labels) config['labels'] = labels config['label_size'] = label_size glabels = load_dict(args.glabel_path) glabel_size = len(glabels) config['glabels'] = glabels config['glabel_size'] = glabel_size poss = load_dict(args.pos_path) pos_size = len(poss) config['poss'] = poss config['pos_size'] = pos_size if config['emb_class'] == 'glove': if config['enc_class'] == 'bilstm': model = GloveLSTMCRF(config, args.embedding_path, label_size, pos_size, emb_non_trainable=True, use_crf=args.use_crf, use_ncrf=args.use_ncrf, use_char_cnn=args.use_char_cnn, use_mha=args.use_mha) if config['enc_class'] == 'densenet': model = GloveDensenetCRF(config, args.embedding_path, label_size, pos_size, emb_non_trainable=True, use_crf=args.use_crf, use_ncrf=args.use_ncrf, use_char_cnn=args.use_char_cnn, use_mha=args.use_mha) elif config['emb_class'] == 'elmo': from allennlp.modules.elmo import Elmo elmo_model = Elmo(args.elmo_options_file, args.elmo_weights_file, 2, dropout=0) model = ElmoLSTMCRF(config, elmo_model, args.embedding_path, label_size, pos_size, emb_non_trainable=True, use_crf=args.use_crf, use_ncrf=args.use_ncrf, use_char_cnn=args.use_char_cnn, use_mha=args.use_mha) else: bert_config = AutoConfig.from_pretrained(args.bert_output_dir) bert_tokenizer = AutoTokenizer.from_pretrained(args.bert_output_dir) bert_model = AutoModel.from_config(bert_config) ModelClass = BertLSTMCRF model = ModelClass(config, bert_config, bert_model, bert_tokenizer, label_size, glabel_size, pos_size, use_crf=args.use_crf, use_ncrf=args.use_ncrf, use_pos=args.bert_use_pos, use_char_cnn=args.use_char_cnn, use_mha=args.use_mha, use_subword_pooling=args.bert_use_subword_pooling, use_word_embedding=args.bert_use_word_embedding, embedding_path=args.embedding_path, emb_non_trainable=True, use_doc_context=args.bert_use_doc_context, disable_lstm=args.bert_disable_lstm, feature_based=args.bert_use_feature_based, use_mtl=args.bert_use_mtl) model.load_state_dict(checkpoint) model = model.to(args.device) logger.info("[Loaded]") return model