def load(cls, pretrained_model_name_or_path, language=None, **kwargs): """ Load a pretrained model by supplying * the name of a remote model on s3 ("bert-base-cased" ...) * OR a local path of a model trained via transformers ("some_dir/huggingface_model") * OR a local path of a model trained via FARM ("some_dir/farm_model") :param pretrained_model_name_or_path: The path of the saved pretrained model or its name. :type pretrained_model_name_or_path: str """ bert = cls() if "farm_lm_name" in kwargs: bert.name = kwargs["farm_lm_name"] else: bert.name = pretrained_model_name_or_path # We need to differentiate between loading model using FARM format and Pytorch-Transformers format farm_lm_config = Path(pretrained_model_name_or_path) / "language_model_config.json" if os.path.exists(farm_lm_config): # FARM style bert_config = BertConfig.from_pretrained(farm_lm_config) farm_lm_model = Path(pretrained_model_name_or_path) / "language_model.bin" bert.model = BertModel.from_pretrained(farm_lm_model, config=bert_config, **kwargs) bert.language = bert.model.config.language else: # Pytorch-transformer Style bert.model = BertModel.from_pretrained(str(pretrained_model_name_or_path), **kwargs) bert.language = cls._get_or_infer_language_from_name(language, pretrained_model_name_or_path) return bert
def __init__( self, vocab: Vocabulary, bert_model: Union[str, BertModel], embedding_dropout: float = 0.0, initializer: InitializerApplicator = InitializerApplicator(), label_smoothing: float = None, ignore_span_metric: bool = False, srl_eval_path: str = DEFAULT_SRL_EVAL_PATH, **kwargs, ) -> None: super().__init__(vocab, **kwargs) if isinstance(bert_model, str): self.bert_model = BertModel.from_pretrained(bert_model) else: self.bert_model = bert_model self.num_classes = self.vocab.get_vocab_size("labels") if srl_eval_path is not None: # For the span based evaluation, we don't want to consider labels # for verb, because the verb index is provided to the model. self.span_metric = SrlEvalScorer(srl_eval_path, ignore_classes=["V"]) else: self.span_metric = None self.tag_projection_layer = Linear(self.bert_model.config.hidden_size, self.num_classes) self.embedding_dropout = Dropout(p=embedding_dropout) self._label_smoothing = label_smoothing self.ignore_span_metric = ignore_span_metric initializer(self)
def __init__(self, config): """Initialize the model with config dict. Args: config: python dict must contains the attributes below: config.bert_model_path: pretrained model path or model type e.g. 'bert-base-chinese' config.hidden_size: The same as BERT model, usually 768 config.num_classes: int, e.g. 2 config.dropout: float between 0 and 1 """ super().__init__() self.bert = BertModel.from_pretrained(config.bert_model_path) for param in self.bert.parameters(): param.requires_grad = True hidden_size = config.fc_hidden target_class = config.num_classes # self.resnet = resnet18(num_classes=hidden_size) #self.resnet = ResNet(block=BasicBlock, layers=[1, 1, 1, 1], num_classes=hidden_size) # self.resnet = ResNet(config.in_channels, 18) self.fpn = FPN([256]* 4, 4) self.fpn_seq = FPN([128,128,128,70], 4) #cnn feature map has a total number of 228 dimensions. self.dropout = nn.Dropout(config.dropout) self.fc1 = nn.Linear(hidden_size, target_class) self.num_classes = config.num_classes
def from_pretrained(model_id_or_path: str, device: Optional[torch.device] = None): torch_model = TorchBertModel.from_pretrained(model_id_or_path) model = BertModelNoPooler.from_torch(torch_model, device) model.config = torch_model.config model._torch_model = torch_model # prevent destroy torch model. return model
def bertModel(*args, **kwargs): """ BertModel is the basic BERT Transformer model with a layer of summed token, position and sequence embeddings followed by a series of identical self-attention blocks (12 for BERT-base, 24 for BERT-large). Example: # Load the tokenizer >>> import torch >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False) # Prepare tokenized input >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> tokenized_text = tokenizer.tokenize(text) >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] >>> tokens_tensor = torch.tensor([indexed_tokens]) >>> segments_tensors = torch.tensor([segments_ids]) # Load bertModel >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased') >>> model.eval() # Predict hidden states features for each layer >>> with torch.no_grad(): encoded_layers, _ = model(tokens_tensor, segments_tensors) """ model = BertModel.from_pretrained(*args, **kwargs) return model
def __init__(self, input_path: str = None, model: str = None, tokenizer: Any = None, num_classes: int = 2, cuda_device: int = 0, batch_size: int = 4, num_workers: int = 0, lr: float = 2e-5, weight_decay: float = 0.1, warm_up: int = 500): super(BertClassificationModel, self).__init__() self.num_classes = num_classes self.cuda_device = cuda_device self.batch_size = batch_size self.num_workers = num_workers self.lr = lr self.weight_decay = weight_decay self.warm_up = warm_up self.save_hyperparameters() self.dataset = BertDataset(input_path, tokenizer) self.text_embedding = BertModel.from_pretrained( model, output_attentions=False, output_hidden_states=True) self.classifier_hidden_size = self.text_embedding.config.hidden_size self.classifier = nn.Linear(self.classifier_hidden_size, self.num_classes)
def __init__(self, config): """Initialize the model with config dict. Args: config: python dict must contains the attributes below: config.bert_model_path: pretrained model path or model type e.g. 'bert-base-chinese' config.hidden_size: The same as BERT model, usually 768 config.num_classes: int, e.g. 2 config.dropout: float between 0 and 1 """ super().__init__() self.bert = BertModel.from_pretrained(config.bert_model_path) for param in self.bert.parameters(): param.requires_grad = True hidden_size = config.num_fc_hidden_size target_class = config.num_classes # self.resnet = resnet18(num_classes=hidden_size) self.resnet = resnet_pool[config.resnet_type](num_classes=hidden_size) #cnn feature map has a total number of 228 dimensions. self.dropout = nn.Dropout(config.dropout) self.fc1 = nn.Linear(hidden_size, target_class) self.num_classes = config.num_classes
def __init__(self, decoder, src_pad_idx, trg_pad_idx, bert_config, device): super().__init__() self.bert_encoder = BertModel.from_pretrained('bert-base-uncased') self.decoder = decoder self.src_pad_idx = src_pad_idx self.trg_pad_idx = trg_pad_idx self.device = device
def load(cls, model_name: str, cache_model: bool = True) -> BertModel: if model_name in cls._cache: return PretrainedBertModel._cache[model_name] model = BertModel.from_pretrained(model_name) if cache_model: cls._cache[model_name] = model return model
def main(): if len(sys.argv) != 3: print( "Usage: \n" " convert_huggingface_bert_to_npz model_name (bert-base-uncased) output_file" ) exit(0) torch.set_grad_enabled(False) model_name = sys.argv[1] model = BertModel.from_pretrained(model_name) arrays = {k: v.detach() for k, v in model.named_parameters()} q_weight_key = 'self.query.weight' k_weight_key = 'self.key.weight' v_weight_key = 'self.value.weight' q_bias_key = 'self.query.bias' k_bias_key = 'self.key.bias' v_bias_key = 'self.value.bias' numpy_dict = {} for k in arrays.keys(): if k.endswith(q_weight_key): v = torch.clone( torch.t( torch.cat([ arrays[k], arrays[k[:-len(q_weight_key)] + k_weight_key], arrays[k[:-len(q_weight_key)] + v_weight_key] ], 0).contiguous()).contiguous()) numpy_dict[k[:-len(q_weight_key)] + "qkv.weight"] = v.numpy() elif k.endswith(q_bias_key): v = torch.cat([ arrays[k], arrays[k[:-len(q_bias_key)] + k_bias_key], arrays[k[:-len(q_bias_key)] + v_bias_key] ], 0).numpy() numpy_dict[k[:-len(q_bias_key)] + 'qkv.bias'] = v elif any((k.endswith(suffix) for suffix in (k_weight_key, v_weight_key, k_bias_key, v_bias_key))): continue elif (k.endswith("attention.output.dense.weight") or k.endswith("pooler.dense.weight") or (k.endswith("output.dense.weight") or k.endswith("intermediate.dense.weight"))): numpy_dict[k] = torch.clone(torch.t( arrays[k]).contiguous()).numpy() else: numpy_dict[k] = arrays[k].numpy() del arrays del model numpy.savez_compressed(sys.argv[2], **numpy_dict)
def __init__(self, pretrained_bert_model_dir: str = 'bert-base-uncased'): super().__init__() self.num_target = 3 self.target = 'target' self.class_map = {'negative': 0, 'neutral': 1, 'positive': 2} self.bert = BertModel.from_pretrained(pretrained_bert_model_dir, output_hidden_states=True, output_attentions=False) self.hidden_size = self.bert.config.hidden_size self.batch_norm = nn.BatchNorm1d(num_features=3 * self.hidden_size, momentum=0.1) self.linear = nn.Linear(self.hidden_size, self.num_target)
def __init__(self, config, num_classes): super().__init__() self.num_classes = num_classes self.bert = BertModel.from_pretrained(config.bert_model_dir) for param in self.bert.parameters(): param.requires_grad = True self.dropout = nn.Dropout(config.hidden_dropout_prob) self.classifier = nn.Linear(config.hidden_size, num_classes)
def initialize_bertgraph(BERT_NAME_OR_PATH, layernorm_key=False, layernorm_value=False, input_label_graph=False, input_unlabel_graph=False, label_size=None): bertgconfig = BertGraphConfig.from_pretrained(BERT_NAME_OR_PATH) init_bert = BertModel.from_pretrained(BERT_NAME_OR_PATH) bertgconfig.add_graph_par(layernorm_key, layernorm_value, input_label_graph, input_unlabel_graph, label_size) model = BertGraphModel(bertgconfig) model.load_state_dict(init_bert.state_dict(), strict=False) return model
def __init__(self, bert_model: str, label_size: int, hidden_size: int = 256, layers: int = 1, lstm_dropout: float = 0.50, fine_tune: bool = False) -> None: super(BiRecurrentConvCRF4NestedNER, self).__init__() self.bert: BertModel = BertModel.from_pretrained(bert_model) self.bert.embeddings.dropout = VarDropout(self.bert.embeddings.dropout.p) for l in range(len(self.bert.encoder.layer)): self.bert.encoder.layer[l].attention.output.dropout \ = VarDropout(self.bert.encoder.layer[l].attention.output.dropout.p) self.bert.encoder.layer[l].output.dropout \ = VarDropout(self.bert.encoder.layer[l].output.dropout.p) self.fine_tune: bool = fine_tune if fine_tune: self.bert.embeddings.word_embeddings.weight.requires_grad = False self.bert.embeddings.position_embeddings.weight.requires_grad = False self.bert.embeddings.token_type_embeddings.weight.requires_grad = False else: for name, parameter in self.bert.named_parameters(): parameter.requires_grad = False self.bert.encoder.output_hidden_states = True # standard dropout self.dropout_out: nn.Dropout2d = nn.Dropout2d(p=lstm_dropout) if fine_tune: self.rnn: VarMaskedFastLSTM = VarMaskedFastLSTM(self.bert.config.hidden_size, hidden_size, num_layers=layers, batch_first=True, bidirectional=True, dropout=(lstm_dropout, lstm_dropout)) else: self.bert_layers: int = 8 self.rnn: VarMaskedFastLSTM = VarMaskedFastLSTM(self.bert.config.hidden_size * self.bert_layers, hidden_size, num_layers=layers, batch_first=True, bidirectional=True, dropout=(lstm_dropout, lstm_dropout)) self.reset_parameters() self.all_crfs: List[ChainCRF4NestedNER] = [] for label in range(label_size): crf = ChainCRF4NestedNER(hidden_size * 2, 1) self.all_crfs.append(crf) self.add_module('crf%d' % label, crf) self.b_id: int = 0 self.i_id: int = 1 self.e_id: int = 2 self.s_id: int = 3 self.o_id: int = 4 self.eos_id: int = 5
def __init__(self, config, bert_type_or_path, vocab_size): super().__init__(config) self.config = config self.vocab_size = vocab_size self.main_encoder = BertModel.from_pretrained( bert_type_or_path ) # out: last_HS, pooled_out, all_HS, attention(opt) self.mlp_input_size = config.hidden_size # CR self.mlp = MLPWithLayerNorm(config, self.mlp_input_size) self.decoder = nn.Linear(self.config.hidden_size, self.config.vocab_size, bias=False) self.decoder.weight = self.main_encoder.get_input_embeddings( ).weight #initalize decoder with encoder embedding self.init_weights()
def test_model_from_pretrained(self): logging.basicConfig(level=logging.INFO) for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: config = BertConfig.from_pretrained(model_name) self.assertIsNotNone(config) self.assertIsInstance(config, PretrainedConfig) model = BertModel.from_pretrained(model_name) model, loading_info = BertModel.from_pretrained( model_name, output_loading_info=True) self.assertIsNotNone(model) self.assertIsInstance(model, PreTrainedModel) for value in loading_info.values(): self.assertEqual(len(value), 0) config = BertConfig.from_pretrained(model_name, output_attentions=True, output_hidden_states=True) model = BertModel.from_pretrained(model_name, output_attentions=True, output_hidden_states=True) self.assertEqual(model.config.output_attentions, True) self.assertEqual(model.config.output_hidden_states, True) self.assertEqual(model.config, config)
def __init__(self, config): """Initialize the model with config dict. Args: config: python dict must contains the attributes below: config.bert_model_path: pretrained model path or model type e.g. 'bert-base-chinese' config.hidden_size: The same as BERT model, usually 768 config.num_classes: int, e.g. 2 config.dropout: float between 0 and 1 """ super().__init__() self.bert = BertModel.from_pretrained(config.bert_model_path) for param in self.bert.parameters(): param.requires_grad = True self.dropout = nn.Dropout(config.dropout) self.linear = nn.Linear(4, config.num_classes) self.num_classes = config.num_classes self.dim_capsule = config.dim_capsule self.num_compressed_capsule = config.num_compressed_capsule self.ngram_size = [2, 4, 8] self.convs_doc = nn.ModuleList([ nn.Conv1d(config.max_seq_len, 32, K, stride=2) for K in self.ngram_size ]) torch.nn.init.xavier_uniform_(self.convs_doc[0].weight) torch.nn.init.xavier_uniform_(self.convs_doc[1].weight) torch.nn.init.xavier_uniform_(self.convs_doc[2].weight) self.primary_capsules_doc = PrimaryCaps(num_capsules=self.dim_capsule, in_channels=32, out_channels=32, kernel_size=1, stride=1) self.flatten_capsules = FlattenCaps() self.W_doc = nn.Parameter( torch.FloatTensor(147328, self.num_compressed_capsule)) torch.nn.init.xavier_uniform_(self.W_doc) self.fc_capsules_doc_child = FCCaps( config, output_capsule_num=config.num_classes, input_capsule_num=self.num_compressed_capsule, in_channels=self.dim_capsule, out_channels=self.dim_capsule)
def __init__(self, batch_size=256, num_workers=8): super().__init__() self.model = BertModel.from_pretrained( 'cl-tohoku/bert-base-japanese-whole-word-masking') self.linear = nn.Linear(768, 9) self.batch_size = batch_size self.num_workers = num_workers for param in self.model.parameters(): param.requires_grad = False for param in self.model.encoder.layer[-1].parameters(): param.requires_grad = True for param in self.model.linear.parameters(): param.requires_grad = True
def __init__(self, param_path='bert-base-uncased', aggregation: [Callable, str] = 'cls'): super(BasicBertEncoder, self).__init__() self._encoder = BertModel.from_pretrained(param_path) if isinstance(aggregation, str): if aggregation == 'cls': self.aggregation_layer = lambda x: x[:, 0] elif aggregation == 'mean': self.aggregation_layer = lambda x: torch.sum(x, dim=1) else: raise Exception("Aggregation Layer doesn't support %s!" % aggregation) elif isinstance(aggregation, Callable): self.aggregation_layer = aggregation else: raise Exception("Aggregation Layer doesn't support this!")
def test_from_pytorch(self): with torch.no_grad(): with self.subTest("bert-base-cased"): tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased") fx_model = FlaxBertModel.from_pretrained("bert-base-cased") pt_model = BertModel.from_pretrained("bert-base-cased") # Check for simple input pt_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.PYTORCH) fx_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.JAX) pt_outputs = pt_model(**pt_inputs).to_tuple() fx_outputs = fx_model(**fx_inputs) self.assertEqual(len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch") for fx_output, pt_output in zip(fx_outputs, pt_outputs): self.assert_almost_equals(fx_output, pt_output.numpy(), 5e-4)
def __init__(self, config): """Initialize the model with config dict. Args: config: python dict must contains the attributes below: config.bert_model_path: pretrained model path or model type e.g. 'bert-base-chinese' config.hidden_size: The same as BERT model, usually 768 config.num_classes: int, e.g. 2 config.dropout: float between 0 and 1 """ super().__init__() self.bert = BertModel.from_pretrained(config.bert_model_path) for param in self.bert.parameters(): param.requires_grad = True self.linear = nn.Linear(config.hidden_size, config.num_classes) self.dropout = nn.Dropout(config.dropout) self.num_classes = config.num_classes
def __init__(self, vocab_size, embed_dim, enc_hid_dim, dec_hid_dim, dropout, embedding_matrix): super().__init__() self.vocab_size = vocab_size self.dec_hid_dim = dec_hid_dim self.dropout = nn.Dropout(dropout) self.word_embedding = nn.Embedding(vocab_size, embed_dim) self.word_embedding.weight.data.copy_( torch.from_numpy(embedding_matrix)) self.encoder = BertModel.from_pretrained(model_name_or_path) for param in self.encoder.parameters(): param.requires_grad = True # self.transform = nn.Linear() self.decoder = Decoder(vocab_size, embed_dim, enc_hid_dim, dec_hid_dim)
def __init__(self, config, gpu_list, *args, **params): super(BertXQA, self).__init__() self.bert = BertModel.from_pretrained(config.get("model", "bert_path")) self.dropout = nn.Dropout(0.2) self.criterion = nn.CrossEntropyLoss() # self.multi = config.getboolean("data", "multi_choice") # self.multi_module = nn.Linear(4, 15) # self.softmax = nn.Softmax(dim=-1) # (b, 4, 768) -> conv(b, 4, 768) -> mp(b, 3, 6) self.conv_module = nn.Sequential( nn.Conv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), nn.BatchNorm2d(1), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 32), stride=(2, 32), padding=(1, 1))) self.linear = nn.Linear(18, config.getint("model", "num_classes")) self.accuracy_function = single_label_top1_accuracy self.bn = nn.BatchNorm1d(config.getint("model", "num_classes")) self.num_classes = config.getint("model", "num_classes")
def __init__(self, config): """Initialize the model with config dict. Args: config: python dict must contains the attributes below: config.bert_model_path: pretrained model path or model type e.g. 'bert-base-chinese' config.hidden_size: The same as BERT model, usually 768 config.num_classes: int, e.g. 2 config.dropout: float between 0 and 1 """ super().__init__() self.bert = BertModel.from_pretrained(config.bert_model_path) for param in self.bert.parameters(): param.requires_grad = True num_conv_filters = config.num_conv_filters output_channel = config.output_channel hidden_size = config.num_fc_hidden_size target_class = config.num_classes input_channel = config.hidden_size # data(b, 512, 768) -> conv(b, 511,767) -> bn -> mp(b, 4, 6) self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=7) self.conv2 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=7) self.conv3 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=5) self.conv4 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=5) self.conv5 = nn.Conv1d(num_conv_filters, num_conv_filters, kernel_size=3) self.conv6 = nn.Conv1d(num_conv_filters, output_channel, kernel_size=3) #cnn feature map has a total number of 228 dimensions. self.dropout = nn.Dropout(config.dropout) self.fc1 = nn.Linear(output_channel, hidden_size) self.fc2 = nn.Linear(hidden_size, hidden_size) self.fc3 = nn.Linear(hidden_size, target_class) self.num_classes = config.num_classes
def __init__( self, vocab: Vocabulary, bert_model: Union[str, BertModel], feedforward: Optional[FeedForward] = None, dropout: float = None, num_labels: int = None, label_namespace: str = "labels", initializer: InitializerApplicator = InitializerApplicator(), ) -> None: super().__init__(vocab) if isinstance(bert_model, str): self.bert_model = BertModel.from_pretrained(bert_model) else: self.bert_model = bert_model self._feedforward = feedforward if feedforward is not None: self._classifier_input_dim = self._feedforward.get_output_dim() else: self._classifier_input_dim = self.bert_model.config.hidden_size if dropout: self._dropout = torch.nn.Dropout(dropout) else: self._dropout = None self._label_namespace = label_namespace if num_labels: self._num_labels = num_labels else: self._num_labels = vocab.get_vocab_size( namespace=self._label_namespace) # 分类器 self._classification_layer = torch.nn.Linear( self._classifier_input_dim, self._num_labels) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() initializer(self)
def __init__(self, config, args): super().__init__(config) self.args = args if args.bert_model == "albert-base-v2": bert = AlbertModel.from_pretrained(args.bert_model) elif args.bert_model == "emilyalsentzer/Bio_ClinicalBERT": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bert-small-scratch": config = BertConfig.from_pretrained( "google/bert_uncased_L-4_H-512_A-8") bert = BertModel(config) elif args.bert_model == "bert-base-scratch": config = BertConfig.from_pretrained("bert-base-uncased") bert = BertModel(config) else: bert = BertModel.from_pretrained( args.bert_model) # bert-base-uncased, small, tiny self.txt_embeddings = bert.embeddings self.img_embeddings = ImageBertEmbeddings(args, self.txt_embeddings) if args.img_encoder == 'ViT': img_size = args.img_size patch_sz = 32 if img_size == 512 else 16 self.img_encoder = Img_patch_embedding(image_size=img_size, patch_size=patch_sz, dim=2048) else: self.img_encoder = ImageEncoder_cnn(args) for p in self.img_encoder.parameters(): p.requires_grad = False for c in list(self.img_encoder.children())[5:]: for p in c.parameters(): p.requires_grad = True self.encoder = bert.encoder self.pooler = bert.pooler
def __init__(self, args, tok=None): super().__init__() cfg = BertConfig.from_json_file(args.config_path) cfg.hidden_size = args.hidden_dim cfg.vocab_size = 3 # [SEP], [CLS], [PAD] cfg.type_vocab_size = 3 # seq 0 vid, seq 1 vid, text self.video_transformer = VideoTransformer(cfg, args, tok) self.clip_prediction = VideoTransformerHead(d_in=args.hidden_dim, d_out=3, hidden_act=cfg.hidden_act) self.next_seq_prediction = VideoTransformerHead(d_in=args.hidden_dim, d_out=2, hidden_act=cfg.hidden_act, pool='first') self.args = args if self.args.svo: self.svo_decoder_head = nn.Sequential(nn.GELU(), nn.Linear(args.hidden_dim, args.svo_dim * 3)) self.svo_decoder_embs = nn.Linear(args.svo_dim, args.svo_vocab_size, bias=False) if self.args.svo_pretrained_embs: tok = BertTokenizer.from_pretrained('bert-base-uncased') bert = BertModel.from_pretrained('bert-base-uncased') self.svo_decoder_embs.weight.data = torch.stack([bert.embeddings.word_embeddings( torch.tensor(tok.encode(_, add_special_tokens=False) if _ else [0])).mean(dim=0) for _ in args.svo_vocab]) del bert del tok
def __init__(self, data): super(BertNER, self).__init__() self.gpu = data.HP_gpu self.use_bert = data.use_bert self.bertpath = data.bertpath char_feature_dim = 768 print('total char_feature_dim is {}'.format(char_feature_dim)) self.bert_encoder = BertModel.from_pretrained(self.bertpath) self.hidden2tag = nn.Linear(char_feature_dim, data.label_alphabet_size + 2) self.drop = nn.Dropout(p=data.HP_dropout) self.crf = CRF(data.label_alphabet_size, self.gpu) if self.gpu: self.bert_encoder = self.bert_encoder.cuda() self.hidden2tag = self.hidden2tag.cuda() self.crf = self.crf.cuda()
def __init__(self, config, args): super().__init__(config) self.args = args if args.bert_model == "emilyalsentzer/Bio_ClinicalBERT": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bert-small-scratch": config = BertConfig.from_pretrained( "google/bert_uncased_L-4_H-512_A-8") bert = BertModel(config) elif args.bert_model == "bert-base-scratch": config = BertConfig.from_pretrained("bert-base-uncased") bert = BertModel(config) else: bert = BertModel.from_pretrained( args.bert_model) # bert-base-uncased, small, tiny self.txt_embeddings = bert.embeddings self.encoder = bert.encoder self.pooler = bert.pooler
def __init__(self, hidden_size, dropout, device): super(BERTEncoder, self).__init__() self.device = device # Load config and pre-trained model pre_trained_model = BertModel.from_pretrained( args['bert_model'], cache_dir=PYTORCH_PRETRAINED_BERT_CACHE / 'distributed_{}'.format(-1)) bert_config = pre_trained_model.config # modify config if you want bert_config.num_hidden_layers = args['num_bert_layers'] self.bert = BertModel(bert_config) # load desired layers from pre-trained model self.bert.load_state_dict(pre_trained_model.state_dict(), strict=False) self.proj = nn.Linear(bert_config.hidden_size, hidden_size) self.dropout = dropout self.dropout_layer = nn.Dropout(dropout)