Ejemplo n.º 1
0
    def __init__(self,
                 args,
                 vocab,
                 pad=False,
                 bidirectional=False,
                 attention=True):
        super().__init__()
        self.args = args
        self.pad = pad
        self.num_dir = 2 if bidirectional else 1
        self.attn = attention

        # char embeddings
        self.char_emb = nn.Embedding(len(vocab['char']),
                                     self.args['char_emb_dim'],
                                     padding_idx=0)
        if self.attn:
            self.char_attn = nn.Linear(self.num_dir *
                                       self.args['char_hidden_dim'],
                                       1,
                                       bias=False)
            self.char_attn.weight.data.zero_()

        # modules
        self.charlstm = PackedLSTM(self.args['char_emb_dim'], self.args['char_hidden_dim'], self.args['char_num_layers'], batch_first=True, \
                dropout=0 if self.args['char_num_layers'] == 1 else args['dropout'], rec_dropout = self.args['char_rec_dropout'], bidirectional=bidirectional)
        self.charlstm_h_init = nn.Parameter(
            torch.zeros(self.num_dir * self.args['char_num_layers'], 1,
                        self.args['char_hidden_dim']))
        self.charlstm_c_init = nn.Parameter(
            torch.zeros(self.num_dir * self.args['char_num_layers'], 1,
                        self.args['char_hidden_dim']))

        self.dropout = nn.Dropout(args['dropout'])
Ejemplo n.º 2
0
    def __init__(self, args, vocab, pad=False, is_forward_lm=True):
        super().__init__()
        self.args = args
        self.vocab = vocab
        self.is_forward_lm = is_forward_lm
        self.pad = pad
        self.finetune = True  # always finetune unless otherwise specified

        # char embeddings
        self.char_emb = nn.Embedding(
            len(self.vocab['char']),
            self.args['char_emb_dim'],
            padding_idx=None
        )  # we use space as padding, so padding_idx is not necessary

        # modules
        self.charlstm = PackedLSTM(self.args['char_emb_dim'], self.args['char_hidden_dim'], self.args['char_num_layers'], batch_first=True, \
                dropout=0 if self.args['char_num_layers'] == 1 else args['char_dropout'], rec_dropout = self.args['char_rec_dropout'], bidirectional=False)
        self.charlstm_h_init = nn.Parameter(
            torch.zeros(self.args['char_num_layers'], 1,
                        self.args['char_hidden_dim']))
        self.charlstm_c_init = nn.Parameter(
            torch.zeros(self.args['char_num_layers'], 1,
                        self.args['char_hidden_dim']))

        # decoder
        self.decoder = nn.Linear(self.args['char_hidden_dim'],
                                 len(self.vocab['char']))
        self.dropout = nn.Dropout(args['char_dropout'])
        self.char_dropout = SequenceUnitDropout(
            args.get('char_unit_dropout', 0), UNK_ID)
Ejemplo n.º 3
0
    def __init__(self,
                 input_size,
                 hidden_size,
                 num_layers=1,
                 bias=True,
                 batch_first=False,
                 dropout=0,
                 bidirectional=False,
                 rec_dropout=0,
                 highway_func=None,
                 pad=False):
        super(HighwayLSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bias = bias
        self.batch_first = batch_first
        self.dropout = dropout
        self.dropout_state = {}
        self.bidirectional = bidirectional
        self.num_directions = 2 if bidirectional else 1
        self.highway_func = highway_func
        self.pad = pad

        self.lstm = nn.ModuleList()
        self.highway = nn.ModuleList()
        self.gate = nn.ModuleList()
        self.drop = nn.Dropout(dropout, inplace=True)

        in_size = input_size
        for l in range(num_layers):
            self.lstm.append(
                PackedLSTM(in_size,
                           hidden_size,
                           num_layers=1,
                           bias=bias,
                           batch_first=batch_first,
                           dropout=0,
                           bidirectional=bidirectional,
                           rec_dropout=rec_dropout))
            self.highway.append(
                nn.Linear(in_size, hidden_size * self.num_directions))
            self.gate.append(
                nn.Linear(in_size, hidden_size * self.num_directions))
            self.highway[-1].bias.data.zero_()
            self.gate[-1].bias.data.zero_()
            in_size = hidden_size * self.num_directions
Ejemplo n.º 4
0
    def __init__(self, args, vocab, emb_matrix=None):
        super().__init__()

        self.vocab = vocab
        self.args = args
        self.unsaved_modules = []

        def add_unsaved_module(name, module):
            self.unsaved_modules += [name]
            setattr(self, name, module)

        # input layers
        input_size = 0
        if self.args['word_emb_dim'] > 0:
            self.word_emb = nn.Embedding(len(self.vocab['word']),
                                         self.args['word_emb_dim'], PAD_ID)
            # load pretrained embeddings if specified
            if emb_matrix is not None:
                self.init_emb(emb_matrix)
            if not self.args.get('emb_finetune', True):
                self.word_emb.weight.detach_()
            input_size += self.args['word_emb_dim']

        if self.args['char'] and self.args['char_emb_dim'] > 0:
            if self.args['charlm']:
                add_unsaved_module(
                    'charmodel_forward',
                    CharacterLanguageModel.load(args['charlm_forward_file'],
                                                finetune=False))
                add_unsaved_module(
                    'charmodel_backward',
                    CharacterLanguageModel.load(args['charlm_backward_file'],
                                                finetune=False))
            else:
                self.charmodel = CharacterModel(args,
                                                vocab,
                                                bidirectional=True,
                                                attention=False)
            input_size += self.args['char_hidden_dim'] * 2

        # optionally add a input transformation layer
        if self.args.get('input_transform', False):
            self.input_transform = nn.Linear(input_size, input_size)
        else:
            self.input_transform = None

        # recurrent layers
        self.taggerlstm = PackedLSTM(input_size, self.args['hidden_dim'], self.args['num_layers'], batch_first=True, \
                bidirectional=True, dropout=0 if self.args['num_layers'] == 1 else self.args['dropout'])
        # self.drop_replacement = nn.Parameter(torch.randn(input_size) / np.sqrt(input_size))
        self.drop_replacement = None
        self.taggerlstm_h_init = nn.Parameter(torch.zeros(
            2 * self.args['num_layers'], 1, self.args['hidden_dim']),
                                              requires_grad=False)
        self.taggerlstm_c_init = nn.Parameter(torch.zeros(
            2 * self.args['num_layers'], 1, self.args['hidden_dim']),
                                              requires_grad=False)

        # tag classifier
        num_tag = len(self.vocab['tag'])
        self.tag_clf = nn.Linear(self.args['hidden_dim'] * 2, num_tag)
        self.tag_clf.bias.data.zero_()

        # criterion
        self.crit = CRFLoss(num_tag)

        self.drop = nn.Dropout(args['dropout'])
        self.worddrop = WordDropout(args['word_dropout'])
        self.lockeddrop = LockedDropout(args['locked_dropout'])