def __init__(self, config, q_dim): super(TransformerPredictionLayer, self).__init__() self.config = config h_dim = config.input_dim # input_dim = config.input_dim self.hidden = h_dim self.position_encoder = PositionalEncoder(h_dim, config) # Cascade Network bert_config = BertConfig(config.hidden_dim, config.trans_heads, config.trans_drop) self.sp_transformer = BertLayer(bert_config) self.sp_linear = nn.Linear(h_dim * 2, 1) self.start_input_linear = nn.Linear(h_dim + 1, h_dim) self.start_transformer = BertLayer(bert_config) self.start_linear = nn.Linear(h_dim, 1) self.end_input_linear = nn.Linear(2 * h_dim + 1, h_dim) self.end_transformer = BertLayer(bert_config) self.end_linear = nn.Linear(h_dim, 1) self.type_input_linear = nn.Linear(2 * h_dim + 1, h_dim) self.type_transformer = BertLayer(bert_config) self.type_linear = nn.Linear(h_dim, 3) self.cache_S = 0 self.cache_mask = None
def __init__( self, bert_model, output_dim, add_transformer_layer=False, layer_pulled=-1, aggregation="first", ): super(BertWrapper, self).__init__() self.layer_pulled = layer_pulled self.aggregation = aggregation self.add_transformer_layer = add_transformer_layer # deduce bert output dim from the size of embeddings bert_output_dim = bert_model.embeddings.word_embeddings.weight.size(1) if add_transformer_layer: config_for_one_layer = BertConfig( 0, hidden_size=bert_output_dim, num_attention_heads=int(bert_output_dim / 64), intermediate_size=3072, hidden_act='gelu', ) self.additional_transformer_layer = BertLayer(config_for_one_layer) self.additional_linear_layer = torch.nn.Linear(bert_output_dim, output_dim) self.bert_model = bert_model
def __init__( self, config, feat_dim, phn_size, sep_size, mask_prob=0.15, mask_but_no_prob=0.1, ): super(BertModel, self).__init__(config) self.mask_prob = mask_prob self.mask_but_no_prob = mask_but_no_prob self.sep_size = sep_size self.feat_embeddings = nn.Linear(feat_dim, config.hidden_size) self.feat_mask_vec = nn.Parameter(torch.zeros(feat_dim), requires_grad=True) self.positional_encoding = PositionalEncoding(config.hidden_size) self.model = BertModel layer = BertLayer(config) self.encoder = nn.ModuleList( [copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) self.feat_out_layer = nn.Linear(config.hidden_size, feat_dim) self.target_out_layer = nn.Linear(config.hidden_size, phn_size) self.apply(self.init_bert_weights)
def __init__(self, vocab_size, original_hidden_size, num_layers, tau=1): super().__init__() self.bert_layer = BertLayer(BertConfig( vocab_size_or_config_json_file=vocab_size, hidden_size=original_hidden_size * num_layers, )) self.linear_layer = nn.Linear(original_hidden_size * num_layers, 1) self.log_sigmoid = nn.LogSigmoid() self.tau = tau
def __init__(self, config, my_dropout_p): super(BertSplitPreTrainedModel, self).__init__(config) logger.info(f'Model {__class__.__name__} is loading...') layers.set_seq_dropout(True) layers.set_my_dropout_prob(my_dropout_p) self.bert = BertModel(config) self.query_self_attn = layers.MultiHeadPooling(config.hidden_size, 6) self.value_self_attn = layers.MultiHeadPooling(config.hidden_size, 6) self.sentence_input = layers.BertSentInput(config) self.sentence_encoder = BertLayer(config) self.attention_score = layers.AttentionScore(config.hidden_size, 256)
def __init__(self, config): super(BertSentencePreTrainedModel2, self).__init__(config) logger.info(f'Model {__class__.__name__} is loading...') # layers.set_seq_dropout(True) # layers.set_my_dropout_prob(my_dropout_p) self.bert = BertModel(config) self.bert_sent_input = layers.BertSentInput(config) config.intermediate_size = 1024 config.num_attention_heads = 6 self.bert_layer = BertLayer(config) self.sent_label_predictor = nn.Linear(config.hidden_size, 1)
def __init__(self, input_dim, out_dim, config): super(InteractionLayer, self).__init__() self.config = config self.use_trans = config.basicblock_trans if config.basicblock_trans: bert_config = BertConfig(input_dim, config.trans_heads, config.trans_drop) self.transformer = BertLayer(bert_config) self.transformer_linear = nn.Linear(input_dim, out_dim) else: self.lstm = LSTMWrapper(input_dim, out_dim // 2, 1)
def __init__(self, config): super(BertEncoder, self).__init__() layer = BertLayer(config) print("BertEncoder layer init...", layer) # 按照config里设置的encoder层数复制n个encoder层 # self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(config.num_hidden_layers)]) encoders = [] for _ in range(config.num_hidden_layers): encoders.append(copy.deepcopy(layer)) # 第13个encoder encoders.append(copy.deepcopy(layer)) self.layer = nn.ModuleList(encoders)
def __init__(self, input_size, causal=True, bidirectional=False, num_layers=3, num_heads=4, dropout=0.2, max_seq_len=32): super().__init__() self.pos_embedding = nn.Embedding(max_seq_len, input_size) self.causal = causal self.bidirectional = bidirectional bert_config = self.BertConfig(input_size, num_heads, dropout) self.forward_transformer = nn.ModuleList( [BertLayer(bert_config) for _ in range(num_layers)]) self.c_size = input_size if bidirectional and causal: self.backward_transformer = nn.ModuleList( [BertLayer(bert_config) for _ in range(num_layers)]) self.c_size *= 2 else: self.backward_transformer = None
def test_BertLayer(): input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) embeddings = BertEmbeddings(config) model = BertLayer(config) embedding_output = embeddings(input_ids, token_type_ids) input_mask = input_mask.view([-1, 1, 1, input_mask.size()[-1]]).float() print(model(embedding_output, input_mask))
def get_pretrained_bert(modelname, num_hidden_layers=None): bert = BertModel.from_pretrained(modelname) if num_hidden_layers is None: return bert old_num_hidden_layers = bert.config.num_hidden_layers if num_hidden_layers < old_num_hidden_layers: # Only use the bottom n layers del bert.encoder.layer[num_hidden_layers:] elif num_hidden_layers > old_num_hidden_layers: # Add BertLayer(s) for i in range(num_hidden_layers, num_hidden_layers): bert.encoder.layer.add_module(str(i), BertLayer(bert.config)) if num_hidden_layers != old_num_hidden_layers: bert.config.num_hidden_layers = num_hidden_layers bert.init_bert_weights(bert.pooler.dense) return bert
def __init__(self, bert_model, mode, add_transformer_layer=False, layer_pulled=-1, aggregation="first"): super().__init__() self.layer_pulled = layer_pulled self.aggregation = aggregation self.add_transformer_layer = add_transformer_layer self.bert_model = bert_model bert_output_dim = bert_model.embeddings.word_embeddings.weight.size(1) if add_transformer_layer: config_for_one_layer = BertConfig( 0, hidden_size=bert_output_dim, num_attention_heads=int( bert_output_dim / 64), intermediate_size=3072, hidden_act='gelu') self.additional_transformer_layer = BertLayer(config_for_one_layer) # Possibly add final linear layer self.output_linear = None assert mode in ['bi_encoder', 'cross_encoder'] if mode == 'cross_encoder': print('BertWrapper (cross_encoder): adding linear output_linear') self.output_linear = torch.nn.Linear(bert_output_dim, 1)
def __init__(self, hidden_size, config): super().__init__() self.layer1 = BertLayer(config) self.layer2 = BertLayer(config) self.xlayer = BertLayer(config)
def __init__(self, config): super(Interaction_2layer, self).__init__() layer = BertLayer(config) self.layer = nn.ModuleList([copy.deepcopy(layer) for _ in range(2)])