コード例 #1
0
    def __init__(self, args, pretrained):
        super(BiDAF, self).__init__()
        self.args = args

        # 1. Character Embedding Layer
        self.char_emb = nn.Embedding(args.char_vocab_size,
                                     args.char_dim,
                                     padding_idx=1)
        nn.init.uniform_(self.char_emb.weight, -0.001, 0.001)

        self.char_conv = nn.Conv2d(1, args.char_channel_size,
                                   (args.char_dim, args.char_channel_width))

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True)

        # highway network
        assert self.args.hidden_size * 2 == (self.args.char_channel_size +
                                             self.args.word_dim)
        for i in range(2):
            setattr(
                self, 'highway_linear{}'.format(i),
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.ReLU()))
            setattr(
                self, 'highway_gate{}'.format(i),
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=args.hidden_size * 2,
                                 hidden_size=args.hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=args.dropout)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(args.hidden_size * 2, 1)
        self.att_weight_q = Linear(args.hidden_size * 2, 1)
        self.att_weight_cq = Linear(args.hidden_size * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=args.hidden_size * 8,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        self.modeling_LSTM2 = LSTM(input_size=args.hidden_size * 2,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        # 6. Output Layer
        self.p1_weight_g = Linear(args.hidden_size * 8,
                                  1,
                                  dropout=args.dropout)
        self.p1_weight_m = Linear(args.hidden_size * 2,
                                  1,
                                  dropout=args.dropout)
        self.p2_weight_g = Linear(args.hidden_size * 8,
                                  1,
                                  dropout=args.dropout)
        self.p2_weight_m = Linear(args.hidden_size * 2,
                                  1,
                                  dropout=args.dropout)

        self.output_LSTM = LSTM(input_size=args.hidden_size * 2,
                                hidden_size=args.hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=args.dropout)

        self.dropout = nn.Dropout(p=args.dropout)
コード例 #2
0
    def __init__(self, hps):
        super(BiDAF, self).__init__()
        self.hps = hps
        # 1. Character Embedding Layer
        self.char_emb = nn.Embedding(hps["char_vocab_size"],
                                     hps["char_dim"],
                                     padding_idx=1)
        nn.init.uniform_(self.char_emb.weight, -0.001, 0.001)
        hps['char_channel_size'] = hps["hidden_size"] * 2 - hps["word_dim"]
        assert hps['char_channel_size'] > 0

        self.char_conv = nn.Conv2d(
            1, hps["char_channel_size"],
            (hps["char_dim"], hps["char_channel_width"]))

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        self.word_emb = nn.Embedding(50000, 50)

        # highway network
        # assert self.hps["hidden_size"] * 2 == (self.hps["char_channel_size"] + self.hps["word_dim"])
        for i in range(2):
            setattr(
                self, f'highway_linear{i}',
                nn.Sequential(
                    Linear(hps["hidden_size"] * 2, hps["hidden_size"] * 2),
                    nn.ReLU()))
            setattr(
                self, f'highway_gate{i}',
                nn.Sequential(
                    Linear(hps["hidden_size"] * 2, hps["hidden_size"] * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=hps["hidden_size"] * 2,
                                 hidden_size=hps["hidden_size"],
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=hps["dropout"])

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(hps["hidden_size"] * 2, 1)
        self.att_weight_q = Linear(hps["hidden_size"] * 2, 1)
        self.att_weight_cq = Linear(hps["hidden_size"] * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=hps["hidden_size"] * 8,
                                   hidden_size=hps["hidden_size"],
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=hps["dropout"])

        self.modeling_LSTM2 = LSTM(input_size=hps["hidden_size"] * 2,
                                   hidden_size=hps["hidden_size"],
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=hps["dropout"])

        # 6. Output Layer
        self.p1_weight_g = Linear(hps["hidden_size"] * 8,
                                  1,
                                  dropout=hps["dropout"])
        self.p1_weight_m = Linear(hps["hidden_size"] * 2,
                                  1,
                                  dropout=hps["dropout"])
        self.p2_weight_g = Linear(hps["hidden_size"] * 8,
                                  1,
                                  dropout=hps["dropout"])
        self.p2_weight_m = Linear(hps["hidden_size"] * 2,
                                  1,
                                  dropout=hps["dropout"])

        self.output_LSTM = LSTM(input_size=hps["hidden_size"] * 2,
                                hidden_size=hps["hidden_size"],
                                bidirectional=True,
                                batch_first=True,
                                dropout=hps["dropout"])

        self.dropout = nn.Dropout(p=hps["dropout"])
コード例 #3
0
ファイル: model.py プロジェクト: colinsongf/baidu-mrc-1
    def __init__(self, args):
        super(BiDAF, self).__init__()
        self.args = args

        assert self.args.hidden_size * 2 == (self.args.char_channel_size)

        for i in range(2):
            setattr(
                self, f'highway_linear{i}',
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.ReLU()))
            setattr(
                self, f'highway_gate{i}',
                nn.Sequential(
                    Linear(args.hidden_size * 2, args.hidden_size * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=args.hidden_size * 2,
                                 hidden_size=args.hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=args.dropout)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(args.hidden_size * 2, 1)
        self.att_weight_q = Linear(args.hidden_size * 2, 1)
        self.att_weight_cq = Linear(args.hidden_size * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=args.hidden_size * 8,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        self.modeling_LSTM2 = LSTM(input_size=args.hidden_size * 2,
                                   hidden_size=args.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=args.dropout)

        # 6. Output Layer
        self.p1_weight_g = Linear(args.hidden_size * 8,
                                  1,
                                  dropout=args.dropout)
        self.p1_weight_m = Linear(args.hidden_size * 2,
                                  1,
                                  dropout=args.dropout)
        self.p2_weight_g = Linear(args.hidden_size * 8,
                                  1,
                                  dropout=args.dropout)
        self.p2_weight_m = Linear(args.hidden_size * 2,
                                  1,
                                  dropout=args.dropout)

        self.output_LSTM = LSTM(input_size=args.hidden_size * 2,
                                hidden_size=args.hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=args.dropout)

        # Prevent from over-fitting
        self.dropout = nn.Dropout(p=args.dropout)
コード例 #4
0
    def __init__(self,
                 char_vocab_size,
                 word_vocab_size,
                 pretrained,
                 word_dim=100,
                 char_dim=8,
                 char_channel_width=5,
                 char_channel_size=100,
                 dropout_rate=0.2,
                 hidden_size=100):

        super(BiDAF, self).__init__()

        self.word_dim = word_dim
        self.char_dim = char_dim
        self.char_channel_width = char_channel_width
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.char_vocab_size = char_vocab_size
        self.char_channel_size = char_channel_size
        self.word_vocab_size = word_vocab_size

        # 1. Character Embedding Layer
        self.char_emb = nn.Embedding(self.char_vocab_size,
                                     self.char_dim,
                                     padding_idx=1)
        nn.init.uniform_(self.char_emb.weight, -0.001, 0.001)

        self.char_conv = nn.Conv2d(1, self.char_channel_size,
                                   (self.char_dim, self.char_channel_width))

        # 2. Word Embedding Layer
        # initialize word embedding with GloVe
        # Freeze layer to prevent gradient update
        self.word_emb = nn.Embedding.from_pretrained(pretrained, freeze=True)

        # highway network
        assert (self.hidden_size * 2) == (self.char_channel_size +
                                          self.word_dim)
        # Create 2 hidden layers
        for i in range(2):
            setattr(
                self, 'highway_linear' + str(i),
                nn.Sequential(
                    Linear(self.hidden_size * 2, self.hidden_size * 2),
                    nn.ReLU()))
            setattr(
                self, 'highway_gate' + str(i),
                nn.Sequential(
                    Linear(self.hidden_size * 2, self.hidden_size * 2),
                    nn.Sigmoid()))

        # 3. Contextual Embedding Layer
        self.context_LSTM = LSTM(input_size=self.hidden_size * 2,
                                 hidden_size=self.hidden_size,
                                 bidirectional=True,
                                 batch_first=True,
                                 dropout=self.dropout_rate)

        # 4. Attention Flow Layer
        self.att_weight_c = Linear(self.hidden_size * 2, 1)
        self.att_weight_q = Linear(self.hidden_size * 2, 1)
        self.att_weight_cq = Linear(self.hidden_size * 2, 1)

        # 5. Modeling Layer
        self.modeling_LSTM1 = LSTM(input_size=self.hidden_size * 8,
                                   hidden_size=self.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=self.dropout_rate)

        self.modeling_LSTM2 = LSTM(input_size=self.hidden_size * 2,
                                   hidden_size=self.hidden_size,
                                   bidirectional=True,
                                   batch_first=True,
                                   dropout=self.dropout_rate)

        # 6. Output Layer
        # No softmax applied here reason: https://stackoverflow.com/questions/57516027/does-pytorch-apply-softmax-automatically-in-nn-linear
        self.p1_weight_g = Linear(self.hidden_size * 8,
                                  1,
                                  dropout=self.dropout_rate)
        self.p1_weight_m = Linear(self.hidden_size * 2,
                                  1,
                                  dropout=self.dropout_rate)
        self.p2_weight_g = Linear(self.hidden_size * 8,
                                  1,
                                  dropout=self.dropout_rate)
        self.p2_weight_m = Linear(self.hidden_size * 2,
                                  1,
                                  dropout=self.dropout_rate)

        self.output_LSTM = LSTM(input_size=self.hidden_size * 2,
                                hidden_size=self.hidden_size,
                                bidirectional=True,
                                batch_first=True,
                                dropout=self.dropout_rate)

        self.dropout = nn.Dropout(p=self.dropout_rate)