def __init__(self, config, join_layer=0, compress_size=0, compress_fp16=False): super(PrettrBertModel, self).__init__(config) self.embeddings = BertEmbeddings(config) self.join_layer = join_layer self.encoder = BertEncoder(config, join_layer, compress_size, compress_fp16) self.apply(self.init_bert_weights)
def __init__(self, config: BertConfig): super().__init__() self.embeddings = BertEmbeddings(config) self.encoder = SanEncoder(config.hidden_size, config.num_hidden_layers, True, config.hidden_dropout_prob) self.pooler = SanPooler(config.hidden_size, config.hidden_dropout_prob) self.config = config
def __init__(self, config): super(BertModel, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) print("BertModel init bert weights") self.apply(self.init_bert_weights)
def __init__(self, config, gen_attention_mask): super(BertWithCustomAttentionMask, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights) self.gen_attention_mask = gen_attention_mask
def __init__(self, bert_model_path, decoder_config, device): super().__init__() self.bert_encoder = BertModel.from_pretrained(bert_model_path) bert_config_file = os.path.join(bert_model_path, CONFIG_NAME) bert_config = BertConfig.from_json_file(bert_config_file) self.device = device self.bert_emb = BertEmbeddings(bert_config) self.decoder = BertDecoder(decoder_config, self.bert_emb, device) self.teacher_forcing = 0.5
def __init__(self, config): """ :param config: a BertConfig class instance with the configuration to build a new model :type config: BertConfig """ super(BertModel, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights)
def test_BertEmbeddings(): input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) model = BertEmbeddings(config) print(model(input_ids, token_type_ids))
def __init__(self, bert_cfg: BertConfig, cfg: Optional[Dict[str, Any]] = None): self.bert_cfg = bert_cfg super().__init__(bert_cfg) self.cfg = cfg self.embeddings = BertEmbeddings(bert_cfg) self.encoder = BertEncoder(bert_cfg) self.pooler = BertPooler(bert_cfg) self.num_choices = 4 self.classifier = nn.Linear(bert_cfg.hidden_size, 1) self.apply(self.init_bert_weights)
def test_BertPreTrainingHeads(): input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) embeddings = BertEmbeddings(config) model = BertPreTrainingHeads(config, embeddings.word_embeddings.weight) embedding_output = embeddings(input_ids, token_type_ids) print(model(embedding_output, embedding_output))
def test_BertAttention(): input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) embeddings = BertEmbeddings(config) model = BertAttention(config) embedding_output = embeddings(input_ids, token_type_ids) input_mask = input_mask.view([-1, 1, 1, input_mask.size()[-1]]).float() print(model(embedding_output, input_mask))
def __init__(self, config, num_labels): super(BertForSequenceTaggingACTA, self).__init__(config) self.num_labels = num_labels self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.gru = nn.GRU(config.hidden_size, config.hidden_size, batch_first=True, bidirectional=True) self.crf = CRF(num_labels, batch_first=True) self.clf = nn.Linear(2 * config.hidden_size, num_labels) #nn.init.xavier_uniform_(self.clf.weight) #self.clf.bias.data.fill_(0.01) #self.pooler = BertPooler(config) self.apply(self.init_bert_weights)
class PrettrBertModel(BertPreTrainedModel): """ Based on pytorch_pretrained_bert.BertModel, but with some extra goodies: - join_layer: layer to begin attention between query and document (0 for cross-attention in all layers) - compress_size: size of compression layer at join layer (0 for no compression) - compress_fp16: reduce size of floats in compression layer? """ def __init__(self, config, join_layer=0, compress_size=0, compress_fp16=False): super(PrettrBertModel, self).__init__(config) self.embeddings = BertEmbeddings(config) self.join_layer = join_layer self.encoder = BertEncoder(config, join_layer, compress_size, compress_fp16) self.apply(self.init_bert_weights) def forward(self, input_ids, token_type_ids, attention_mask): """ Based on pytorch_pretrained_bert.BertModel """ if self.join_layer > 0: BAT, SEQ = attention_mask.shape join_mask = token_type_ids.reshape( BAT, 1, SEQ, 1) != token_type_ids.reshape(BAT, 1, 1, SEQ) join_mask = join_mask.float() * -10000.0 join_mask = join_mask.to(dtype=next( self.parameters()).dtype) # fp16 compatibility else: join_mask = None embedding_output = self.embeddings(input_ids, token_type_ids) encoded_layers = self.forward_from_layer(embedding_output, attention_mask, from_layer=0, join_mask=join_mask) return [embedding_output] + encoded_layers def forward_from_layer(self, embedding_output, attention_mask, from_layer, join_mask=None): extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2) extended_attention_mask = extended_attention_mask.to( dtype=next(self.parameters()).dtype) # fp16 compatibility extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0 encoded_layers = self.encoder(embedding_output, extended_attention_mask, output_all_encoded_layers=True, join_mask=join_mask, from_layer=from_layer) return encoded_layers def set_trainable(self, trainable, train_min_layer=0): if trainable: for param in self.parameters(): param.requires_grad = trainable if train_min_layer > 0: for param in self.embeddings.parameters(): param.requires_grad = False for layer in self.encoder.layer[:train_min_layer - 1]: for param in layer.parameters(): param.requires_grad = False
def __init__(self, config): super(CustomBertModel, self).__init__(config) self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) self.apply(self.init_bert_weights)