Ejemplo n.º 1
0
class Encoder(nn.Module):
    """
  Encoder that just runs awd

  Inputs: input, seq_len
    - **input** of shape:
  Outputs: output
    - **output** is a tuple: (all_hid, last_hidden_states, emb) - NOT batch first
  """
    def __init__(self, args):
        super().__init__()

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()

    def forward(self, inp, seq_lengths):

        with torch.no_grad():
            (outputs, outputs_rev), (hidden, hidden_rev), emb = self.bi_awd(
                input=inp, seq_lengths=seq_lengths)

        return (outputs, outputs_rev), (hidden, hidden_rev), emb
Ejemplo n.º 2
0
    def __init__(self, args):
        super().__init__()

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()
Ejemplo n.º 3
0
    def __init__(self, args, bi_awd_layer, architecture):
        super().__init__()
        self.args = args
        self.bi_awd_layer = bi_awd_layer
        self.architecture = architecture
        self.densel1 = nn.Linear(self.args.n_features2, self.args.n_hid2)
        self.densel2 = nn.Linear(self.args.n_hid2, self.args.n_hid2)
        self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 +
                              self.args.n_features2,
                              hidden_size=self.args.n_hid2,
                              num_layers=3,
                              bidirectional=True,
                              batch_first=True)
        self.drop = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()

        if bi_awd_layer in ["second"]:
            self.project = nn.Linear(2560, 300, bias=False)
        elif bi_awd_layer in ["last"]:
            self.project = nn.Linear(320 * 2, 300, bias=False)

        if self.architecture in ["before", "both"]:
            self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 +
                                  self.args.n_features2 + 300,
                                  hidden_size=self.args.n_hid2,
                                  num_layers=3,
                                  bidirectional=True,
                                  batch_first=True)

        init_weights(self)
        self.init_weights()

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()
Ejemplo n.º 4
0
    def __init__(self, args, bi_awd_layer, project_size=None):
        super().__init__()
        self.args = args
        self.bi_awd_layer = bi_awd_layer
        self.project_size = project_size
        self.drop = nn.Dropout(args.hid_dropout)

        if project_size is not None and bi_awd_layer in ["first", "second"]:
            self.project = nn.Linear(2 * 1280, project_size, bias=False)
        elif project_size is not None and bi_awd_layer in ["last"]:
            self.project = nn.Linear(2 * 320, project_size, bias=False)

        if project_size is not None:
            self.lstm = nn.LSTM(project_size,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)
        elif bi_awd_layer in ["first", "second"]:
            self.lstm = nn.LSTM(2 * 1280,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)
        elif bi_awd_layer in ["last"]:
            self.lstm = nn.LSTM(2 * 320,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)

        init_weights(self)

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()
    def __init__(self, args, bi_awd_layer, architecture):
        super().__init__(args)
        self.architecture = architecture
        self.bi_awd_layer = bi_awd_layer

        if bi_awd_layer in ["first", "second"]:
            self.project = nn.Linear(2560, 300, bias=False)
        elif bi_awd_layer in ["last"]:
            self.project = nn.Linear(320 * 2, 300, bias=False)

        if self.architecture in ["before", "both"]:
            self.lstm = nn.LSTM(128 + 300,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)

        init_weights(self)

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()
Ejemplo n.º 6
0
class Encoder(nn.Module):
    """
  Encoder with bi_awd concatenated to the LSTM input

  Parameters:
    -- bi_awd_layer: first, second or last
    -- project_size: size of projection layer from bi_awd to lstm
  Inputs: input, seq_len
    - **input** of shape
  Outputs: output
    - **output** of shape (batch_size, seq_len, hidden_size*2)
  """
    def __init__(self, args, bi_awd_layer, project_size=None):
        super().__init__()
        self.args = args
        self.bi_awd_layer = bi_awd_layer
        self.project_size = project_size
        self.drop = nn.Dropout(args.hid_dropout)

        if project_size is not None and bi_awd_layer in ["first", "second"]:
            self.project = nn.Linear(2 * 1280, project_size, bias=False)
        elif project_size is not None and bi_awd_layer in ["last"]:
            self.project = nn.Linear(2 * 320, project_size, bias=False)

        if project_size is not None:
            self.lstm = nn.LSTM(project_size,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)
        elif bi_awd_layer in ["first", "second"]:
            self.lstm = nn.LSTM(2 * 1280,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)
        elif bi_awd_layer in ["last"]:
            self.lstm = nn.LSTM(2 * 320,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)

        init_weights(self)

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()

    def forward(self, inp, seq_lengths):
        with torch.no_grad():
            (all_hid, all_hid_rev), _, _ = self.bi_awd(
                inp, seq_lengths)  # all_hid, last_hidden_states, emb

        if self.bi_awd_layer == "first":
            bi_awd_hid = all_hid[0]
            bi_awd_hid_rev = all_hid_rev[0]

            bi_awd_hid = bi_awd_hid.permute(1, 0, 2)  # (bs, seq_len, 1280)
            bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0,
                                                    2)  # (bs, seq_len, 1280)

        elif self.bi_awd_layer == "second":
            bi_awd_hid = all_hid[1]
            bi_awd_hid_rev = all_hid_rev[1]

            bi_awd_hid = bi_awd_hid.permute(1, 0, 2)  # (bs, seq_len, 1280)
            bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0,
                                                    2)  # (bs, seq_len, 1280)

        elif self.bi_awd_layer == "last":
            bi_awd_hid = all_hid[2]
            bi_awd_hid_rev = all_hid_rev[2]

            bi_awd_hid = bi_awd_hid.permute(1, 0, 2)  # (bs, seq_len, 320)
            bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0,
                                                    2)  # (bs, seq_len, 320)

        bi_awd_hid = torch.cat((bi_awd_hid, bi_awd_hid_rev),
                               dim=2)  # (bs, seq_len, 640 or 2560)

        if self.project_size is not None:
            bi_awd_hid = self.project(
                bi_awd_hid)  # (bs, seq_len, project_size)
        del bi_awd_hid_rev
        ### End BiAWDEmbedding

        bi_awd_hid = self.drop(
            bi_awd_hid)  #( batch_size, seq_len, project_size or 2*1280)

        pack = nn.utils.rnn.pack_padded_sequence(bi_awd_hid,
                                                 seq_lengths,
                                                 batch_first=True)
        packed_output, _ = self.lstm(pack)  #h = (2, batch_size, hidden_size)
        output, _ = nn.utils.rnn.pad_packed_sequence(
            packed_output,
            batch_first=True)  #(batch_size, seq_len, hidden_size*2)

        return output
class Encoder(BaseEncoder):
    """
  Encoder with bi_awd concatenated to the LSTM output

  Parameters:
    -- bi_awd_layer: first, second or last
    -- architecture: before, after or both

  Inputs: input, seq_len
    - **input** of shape
  Outputs: output
    - **output** of shape (batch_size, seq_len, hidden_size*2 + 300) if arch is after/both else 
      (batch_size, seq_len, hidden_size*2)
  """
    def __init__(self, args, bi_awd_layer, architecture):
        super().__init__(args)
        self.architecture = architecture
        self.bi_awd_layer = bi_awd_layer

        if bi_awd_layer in ["first", "second"]:
            self.project = nn.Linear(2560, 300, bias=False)
        elif bi_awd_layer in ["last"]:
            self.project = nn.Linear(320 * 2, 300, bias=False)

        if self.architecture in ["before", "both"]:
            self.lstm = nn.LSTM(128 + 300,
                                args.n_hid,
                                bidirectional=True,
                                batch_first=True)

        init_weights(self)

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()

    def forward(self, inp, seq_lengths):
        with torch.no_grad():
            (all_hid, all_hid_rev), _, _ = self.bi_awd(
                inp, seq_lengths)  # all_hid, last_hidden_states, emb

        if self.bi_awd_layer == "first":
            bi_awd_hid = all_hid[0]
            bi_awd_hid_rev = all_hid_rev[0]

            bi_awd_hid = bi_awd_hid.permute(1, 0, 2)  # (bs, seq_len, 1280)
            bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0,
                                                    2)  # (bs, seq_len, 1280)

        elif self.bi_awd_layer == "second":
            bi_awd_hid = all_hid[1]
            bi_awd_hid_rev = all_hid_rev[1]

            bi_awd_hid = bi_awd_hid.permute(1, 0, 2)  # (bs, seq_len, 1280)
            bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0,
                                                    2)  # (bs, seq_len, 1280)

        elif self.bi_awd_layer == "last":
            bi_awd_hid = all_hid[2]
            bi_awd_hid_rev = all_hid_rev[2]

            bi_awd_hid = bi_awd_hid.permute(1, 0, 2)  # (bs, seq_len, 320)
            bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0,
                                                    2)  # (bs, seq_len, 320)

        bi_awd_hid = torch.cat((bi_awd_hid, bi_awd_hid_rev),
                               dim=2)  # (bs, seq_len, something big)
        bi_awd_hid = self.project(bi_awd_hid)  # (bs, seq_len, 300)
        del bi_awd_hid_rev
        ### End BiAWDEmbedding

        inp = self.embed(inp)  # (batch_size, seq_len, emb_size)

        inp = self.in_drop1d(inp)  # feature dropout
        inp = self.in_drop2d(
            inp)  # (batch_size, seq_len, emb_size) - 2d dropout

        inp = inp.permute(0, 2, 1)  # (batch_size, emb_size, seq_len)
        conv_cat = torch.cat(
            [self.relu(conv(inp)) for conv in self.convs],
            dim=1)  # (batch_size, emb_size*len(convs), seq_len)
        inp = self.relu(
            self.cnn_final(conv_cat))  #(batch_size, out_channels=128, seq_len)

        inp = inp.permute(0, 2, 1)  #(batch_size, seq_len, out_channels=128)
        if self.architecture in ["before", "both"]:
            inp = torch.cat((inp, bi_awd_hid), dim=2)

        inp = self.drop(inp)  #( batch_size, seq_len, lstm_input_size)

        pack = nn.utils.rnn.pack_padded_sequence(inp,
                                                 seq_lengths,
                                                 batch_first=True)
        packed_output, _ = self.lstm(pack)  #h = (2, batch_size, hidden_size)
        output, _ = nn.utils.rnn.pad_packed_sequence(
            packed_output,
            batch_first=True)  #(batch_size, seq_len, hidden_size*2)

        if self.architecture in ["after", "both"]:
            output = torch.cat(
                (output, bi_awd_hid),
                dim=2)  # (batch_size, seq_len, hidden_size*2+300)

        return output
Ejemplo n.º 8
0
class Encoder(BaseEncoder):
    """
  Encoder with bi_awd concatenated to the LSTM output

  Parameters:
    -- bi_awd_layer: last or second
    -- architecture: before, after or both

  Inputs: input, seq_len
    - **input** of shape
  Outputs: output
    - **output** of shape (batch_size, seq_len, hidden_size*2 + 300) if arch is after/both else 
      (batch_size, seq_len, hidden_size*2)
  """
    def __init__(self, args, direction):
        super().__init__(args)
        self.direction = direction

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()

    def forward(self, inp, seq_lengths):
        with torch.no_grad():
            (all_hid, all_hid_rev), _, _ = self.bi_awd(
                inp, seq_lengths)  # all_hid, last_hidden_states, emb

        if self.direction == "forward":
            bi_awd_hid = all_hid[2].permute(1, 0, 2)  # (bs, seq_len, 320)
        elif self.direction == "backward":
            bi_awd_hid = all_hid_rev[2].permute(1, 0, 2)  # (bs, seq_len, 320)
        ### End BiAWDEmbedding

        inp = self.embed(inp)  # (batch_size, seq_len, emb_size)

        inp = self.in_drop1d(inp)  # feature dropout
        inp = self.in_drop2d(
            inp)  # (batch_size, seq_len, emb_size) - 2d dropout

        inp = inp.permute(0, 2, 1)  # (batch_size, emb_size, seq_len)
        conv_cat = torch.cat(
            [self.relu(conv(inp)) for conv in self.convs],
            dim=1)  # (batch_size, emb_size*len(convs), seq_len)
        inp = self.relu(
            self.cnn_final(conv_cat))  #(batch_size, out_channels=128, seq_len)

        inp = inp.permute(0, 2, 1)  #(batch_size, seq_len, out_channels=128)
        inp = self.drop(inp)  #( batch_size, seq_len, 128)

        pack = nn.utils.rnn.pack_padded_sequence(inp,
                                                 seq_lengths,
                                                 batch_first=True)
        packed_output, _ = self.lstm(pack)  #h = (2, batch_size, hidden_size)
        output, _ = nn.utils.rnn.pad_packed_sequence(
            packed_output,
            batch_first=True)  #(batch_size, seq_len, hidden_size*2)

        output = torch.cat((output, bi_awd_hid),
                           dim=2)  # (batch_size, seq_len, hidden_size*2+320)

        return output
Ejemplo n.º 9
0
class Encoder(nn.Module):
    def __init__(self, args, bi_awd_layer, architecture):
        super().__init__()
        self.args = args
        self.bi_awd_layer = bi_awd_layer
        self.architecture = architecture
        self.densel1 = nn.Linear(self.args.n_features2, self.args.n_hid2)
        self.densel2 = nn.Linear(self.args.n_hid2, self.args.n_hid2)
        self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 +
                              self.args.n_features2,
                              hidden_size=self.args.n_hid2,
                              num_layers=3,
                              bidirectional=True,
                              batch_first=True)
        self.drop = nn.Dropout(p=0.5)
        self.relu = nn.ReLU()

        if bi_awd_layer in ["second"]:
            self.project = nn.Linear(2560, 300, bias=False)
        elif bi_awd_layer in ["last"]:
            self.project = nn.Linear(320 * 2, 300, bias=False)

        if self.architecture in ["before", "both"]:
            self.bi_rnn = nn.LSTM(input_size=self.args.n_hid2 +
                                  self.args.n_features2 + 300,
                                  hidden_size=self.args.n_hid2,
                                  num_layers=3,
                                  bidirectional=True,
                                  batch_first=True)

        init_weights(self)
        self.init_weights()

        self.bi_awd = BiAWDEmbedding(ntoken=21,
                                     ninp=320,
                                     nhid=1280,
                                     nlayers=3,
                                     tie_weights=True)
        self.bi_awd.load_pretrained()

    def init_weights(self):
        self.densel1.bias.data.zero_()
        torch.nn.init.xavier_uniform_(tensor=self.densel1.weight.data,
                                      gain=1.0)

        self.densel2.bias.data.zero_()
        torch.nn.init.xavier_uniform_(tensor=self.densel2.weight.data,
                                      gain=1.0)

    def forward(self, inp, raw, seq_lengths):
        #Something like this. Look into it when needed
        with torch.no_grad():
            (all_hid, all_hid_rev), _, _ = self.bi_awd(
                raw, seq_lengths)  # all_hid, last_hidden_states, emb

        if self.bi_awd_layer == "last":
            bi_awd_hid = all_hid[2]
            bi_awd_hid_rev = all_hid_rev[2]

            bi_awd_hid = bi_awd_hid.permute(1, 0, 2)  # (bs, seq_len, 320)
            bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0,
                                                    2)  # (bs, seq_len, 320)

        elif self.bi_awd_layer == "second":
            bi_awd_hid = all_hid[1]
            bi_awd_hid_rev = all_hid_rev[1]

            bi_awd_hid = bi_awd_hid.permute(1, 0, 2)  # (bs, seq_len, 1280)
            bi_awd_hid_rev = bi_awd_hid_rev.permute(1, 0,
                                                    2)  # (bs, seq_len, 1280)

        bi_awd_hid = torch.cat((bi_awd_hid, bi_awd_hid_rev),
                               dim=2)  # (bs, seq_len, something big)
        bi_awd_hid = self.project(bi_awd_hid)  # (bs, seq_len, 300)
        del bi_awd_hid_rev
        ### End BiAWDEmbedding

        x = self.relu(self.densel2(self.relu(self.densel1(inp))))
        inp = torch.cat((inp, x), dim=2)
        if self.architecture in ["before", "both"]:
            inp = torch.cat((inp, bi_awd_hid), dim=2)
        pack = nn.utils.rnn.pack_padded_sequence(inp,
                                                 seq_lengths,
                                                 batch_first=True)
        packed_output, _ = self.bi_rnn(pack)
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output,
                                                     batch_first=True)

        if self.architecture in ["after", "both"]:
            output = torch.cat(
                (output, bi_awd_hid),
                dim=2)  # (batch_size, seq_len, hidden_size*2+300)

        return output