コード例 #1
0
 def __init__(self, h, d_model, dropout=0.1):
     super(MultiHeadedAttention, self).__init__()
     assert d_model % h == 0
     self.d_k = d_model // h  # We assume d_v always equals d_k
     self.h = h
     self.linears = clones(nn.Linear(d_model, d_model), 4)
     self.dropout = nn.Dropout(dropout)
コード例 #2
0
 def __init__(self, size, attention, feed_forward, keep_prob):
     super(EncoderLayerGoogle, self).__init__()
     self.size = size
     self.attention = attention
     self.feed_forward = feed_forward
     # Each encoder layer has two sublayers
     self.sublayer = clones(ResidualConnectionGoogle(size, keep_prob), 2)
コード例 #3
0
 def __init__(self, size, self_attn, src_attn, feed_forward, dropout):
     super(DecoderLayer, self).__init__()
     self.self_attn = self_attn
     self.src_attn = src_attn
     self.feed_forward = feed_forward
     self.sublayers = clones(SublayerConnection(size, dropout), 3)
     self.size = size
コード例 #4
0
 def __init__(self, heads=8, d_model=512, keep_prob=0.1):
     super(MultiHeadedAttentionGoogle, self).__init__()
     assert d_model % heads == 0
     self.d_k = d_model // heads
     self.heads = heads
     self.linears = clones(nn.Linear(d_model, d_model), 4)
     self.attn = None
     self.dropout = nn.Dropout(keep_prob)
コード例 #5
0
ファイル: attention.py プロジェクト: JinJackson/GNN4GAT
    def __init__(self, head_nums, d_model, dropout=0.1):
        super(MultiHeadAttention, self).__init__()

        # 保证d_model即维数可以被头的数量head_nums整除
        assert d_model % head_nums == 0

        self.d_k = int(d_model / head_nums)
        self.head_nums = head_nums
        self.linears = clones(
            nn.Linear(d_model, d_model),
            4)  #初始化了4个,前三个用于Q,K,V向量化,最后一个用于MultiHead-Attention最后的部分
        self.attn = None
        self.dropout = nn.Dropout(p=dropout)
コード例 #6
0
    def __init__(self, config):
        super(MultiHeadedAttentiveModule, self).__init__()
        assert "x_dim" in config and "y_dim" in config and "head_num" in config
        # Attention layer
        attention_config = deepcopy(config)
        attention_config["name"] = "Attention"
        self.attention = Attention(attention_config)

        self.input_dim = config["x_dim"]
        self.output_dim = config["y_dim"]
        self.head_num = config["head_num"]
        assert self.input_dim % self.head_num == 0
        self.sub_input_dim = self.input_dim // self.head_num

        self.input_linears = utils.clones(
            nn.Linear(self.input_dim, self.output_dim), 3)
        self.output_linear = nn.Linear(self.output_dim, self.output_dim)

        self.is_layer_norm = config[
            "is_layer_norm"] if "is_layer_norm" in config else True
        if self.is_layer_norm:
            # Attention layer norm
            self.attention_layer_norm = nn.LayerNorm([self.output_dim],
                                                     eps=1e-6)
            # FFN layer norm
            self.ffn_layer_norm = nn.LayerNorm([self.output_dim], eps=1e-6)

        self.ffn = FFN({
            "name": "FFN",
            "input_dim": self.output_dim,
            "out_dim_0": self.output_dim,
            "out_dim_1": self.output_dim
        })

        self.name = config[
            "name"] if "name" in config else "MultiHeadAttentiveModule"
        logger.info(
            utils.generate_module_info(self.name, "head_num", self.head_num,
                                       "input_dim", self.input_dim,
                                       "output_dim", self.output_dim,
                                       "is_layer_norm", self.is_layer_norm))
コード例 #7
0
 def __init__(self, layer, N):
     super(Encoder, self).__init__()
     self.layers = clones(layer, N)
     self.norm = LayerNorm(layer.size)
コード例 #8
0
 def __init__(self, layer, num_layers):
     super(EncoderBlockGoogle, self).__init__()
     self.layers = clones(layer, num_layers)
     self.norm = LayerNormGoogle(layer.size)
コード例 #9
0
    def __init__(self, config):
        super(BERTSMNModel, self).__init__()
        # hyperparameters
        self.bert_hidden_size = config["bert_hidden_size"] if "bert_hidden_size" in config else 768
        self.hidden_size = config["hidden_size"] if "hidden_size" in config else 200
        self.rnn_units = config["rnn_units"] if "rnn_units" in config else 200
        self.bert_layers = config["bert_layers"] if "bert_layers" in config else [11]
        self.feature_maps = config["feature_maps"] if "feature_maps" in config else 8
        self.dense_out_dim = config["dense_out_dim"] if "dense_out_dim" in config else 50
        self.drop_prob = config["drop_prob"] if "drop_prob" in config else 0.0
        self.max_num_utterance = config["max_num_utterance"] if "max_num_utterance" in config else 10
        self.max_sentence_len = config["max_sentence_len"] if "max_sentence_len" in config else 50
        self.final_out_features = config["final_out_features"] if "final_out_features" in config else 2
        self.device = config["device"]
        assert "bert_model_dir" in config
        self.bert_model_dir = config["bert_model_dir"]
        self.bert_trainable = config["bert_trainable"]

        # build model
        # network
        self.bert_config = BertConfig.from_json_file(os.path.join(self.bert_model_dir, 'bert_config.json'))
        # self.output_layernorm = BertLayerNorm(self.bert_config)
        self.activation = gelu

        self.dropout = nn.Dropout(self.drop_prob)
        ## Sentence GRU: default batch_first is False
        self.sentence_gru = nn.GRU(self.bert_hidden_size, self.hidden_size, batch_first=True)
        ## Linear Transformation
        self.a_matrix = nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size, bias=False)
        self.a_matrixs = utils.clones(
            nn.Linear(in_features=self.hidden_size, out_features=self.hidden_size, bias=False), len(self.bert_layers))

        ## Convolution Layer
        ## valid cross-correlation padding, 2 in_channels, 8 out_channels, kernel_size 3*3
        ## relu activation function and 2d valid max_pooling
        in_channels = 1 + len(self.bert_layers)
        self.conv1 = nn.Sequential(OrderedDict([
            ("conv1", nn.Conv2d(in_channels=in_channels, out_channels=self.feature_maps, kernel_size=(3, 3))),
            ("batchnorm", nn.BatchNorm2d(self.feature_maps)),
            ("relu1", nn.ReLU()),
            ("pool1", nn.MaxPool2d(kernel_size=(3, 3), stride=(3, 3)))
        ]))

        ## Dense: fully connected layer
        in_features = op.calculate_dim_with_initialDim_conv((self.max_sentence_len, self.max_sentence_len),
                                                               self.conv1, in_channels=in_channels)
        self.dense = nn.Sequential(OrderedDict([
            ("linear1", nn.Linear(in_features=in_features, out_features=self.dense_out_dim)),
            ("tanh1", nn.Tanh())
        ]))

        ## Final GRU: time major
        self.final_gru = nn.GRU(self.dense_out_dim, self.rnn_units)
        ## SMN Last: Linear Transformation
        self.smn_last_linear = nn.Linear(self.rnn_units, self.final_out_features)

        self.apply(self.init_weights)

        ## Bert pretrained model
        self.bert = BertModelWrapper.from_pretrained(self.bert_model_dir, cache_dir=None)
        # self.emb_linear = nn.Linear(self.bert_hidden_size, self.hidden_size)
        # self.ctxemb_linear = nn.Linear(self.bert_hidden_size, self.hidden_size)
        # self.dense_linear = nn.Linear(self.max_sentence_len * self.max_sentence_len, self.final_out_features)

        self.hidden1 = None
        self.hidden2 = None
        self.hidden3 = None