Example #1
0
    def __init__(self, word_vec_size,
                 word_vocab_size,
                 word_padding_idx,
                 position_encoding=False,
                 feat_merge="concat",
                 feat_vec_exponent=0.7, feat_vec_size=-1,
                 feat_padding_idx=[],
                 feat_vocab_sizes=[],
                 dropout=0,
                 sparse=False):

        if feat_padding_idx is None:
            feat_padding_idx = []
        self.word_padding_idx = word_padding_idx

        self.word_vec_size = word_vec_size

        # Dimensions and padding for constructing the word embedding matrix
        vocab_sizes = [word_vocab_size]
        emb_dims = [word_vec_size]
        pad_indices = [word_padding_idx]

        # Dimensions and padding for feature embedding matrices
        # (these have no effect if feat_vocab_sizes is empty)
        if feat_merge == 'sum':
            feat_dims = [word_vec_size] * len(feat_vocab_sizes)
        elif feat_vec_size > 0:
            feat_dims = [feat_vec_size] * len(feat_vocab_sizes)
        else:
            feat_dims = [int(vocab ** feat_vec_exponent)
                         for vocab in feat_vocab_sizes]
        vocab_sizes.extend(feat_vocab_sizes)
        emb_dims.extend(feat_dims)
        pad_indices.extend(feat_padding_idx)

        # The embedding matrix look-up tables. The first look-up table
        # is for words. Subsequent ones are for features, if any exist.
        emb_params = zip(vocab_sizes, emb_dims, pad_indices)
        embeddings = [nn.Embedding(vocab, dim, padding_idx=pad, sparse=sparse)
                      for vocab, dim, pad in emb_params]
        emb_luts = Elementwise(feat_merge, embeddings)

        # The final output size of word + feature vectors. This can vary
        # from the word vector size if and only if features are defined.
        # This is the attribute you should access if you need to know
        # how big your embeddings are going to be.
        self.embedding_size = (sum(emb_dims) if feat_merge == 'concat'
                               else word_vec_size)

        # The sequence of operations that converts the input sequence
        # into a sequence of embeddings. At minimum this consists of
        # looking up the embeddings for each word and feature in the
        # input. Model parameters may require the sequence to contain
        # additional operations as well.
        super(Embeddings, self).__init__()
        self.make_embedding = nn.Sequential()
        self.make_embedding.add_module('emb_luts', emb_luts)

        if feat_merge == 'mlp' and len(feat_vocab_sizes) > 0:
            in_dim = sum(emb_dims)
            out_dim = word_vec_size
            mlp = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU())
            self.make_embedding.add_module('mlp', mlp)

        self.position_encoding = position_encoding

        if self.position_encoding:
            pe = PositionalEncoding(dropout, self.embedding_size)
            self.make_embedding.add_module('pe', pe)
    def __init__(self,
                 word_vec_size,
                 word_vocab_size,
                 word_padding_idx,
                 position_encoding=False,
                 feat_merge="concat",
                 feat_vec_exponent=0.7,
                 feat_vec_size=-1,
                 feat_padding_idx=[],
                 feat_vocab_sizes=[],
                 dropout=0,
                 sparse=False,
                 emb_type=None,
                 gcn_vec_size=0,
                 gcn_dropout=0,
                 gcn_edge_dropout=0,
                 n_gcn_layers=0,
                 activation='',
                 highway='',
                 treelstm_vec_size=0):

        if feat_padding_idx is None:
            feat_padding_idx = []
        self.word_padding_idx = word_padding_idx

        self.word_vec_size = word_vec_size

        # Dimensions and padding for constructing the word embedding matrix
        vocab_sizes = [word_vocab_size]
        emb_dims = [word_vec_size]
        pad_indices = [word_padding_idx]

        # Dimensions and padding for feature embedding matrices
        # (these have no effect if feat_vocab_sizes is empty)
        if feat_merge == 'sum':
            feat_dims = [word_vec_size] * len(feat_vocab_sizes)
        elif feat_vec_size > 0:
            feat_dims = [feat_vec_size] * len(feat_vocab_sizes)
        else:
            feat_dims = [
                int(vocab**feat_vec_exponent) for vocab in feat_vocab_sizes
            ]
        vocab_sizes.extend(feat_vocab_sizes)
        emb_dims.extend(feat_dims)
        pad_indices.extend(feat_padding_idx)

        # The embedding matrix look-up tables. The first look-up table
        # is for words. Subsequent ones are for features, if any exist.
        emb_params = zip(vocab_sizes, emb_dims, pad_indices)
        embeddings = [
            nn.Embedding(vocab, dim, padding_idx=pad, sparse=sparse)
            for vocab, dim, pad in emb_params
        ]
        emb_luts = Elementwise(feat_merge, embeddings)

        # The final output size of word + feature vectors. This can vary
        # from the word vector size if and only if features are defined.
        # This is the attribute you should access if you need to know
        # how big your embeddings are going to be.
        self.embedding_size = (sum(emb_dims)
                               if feat_merge == 'concat' else word_vec_size)

        # The sequence of operations that converts the input sequence
        # into a sequence of embeddings. At minimum this consists of
        # looking up the embeddings for each word and feature in the
        # input. Model parameters may require the sequence to contain
        # additional operations as well.
        super(Embeddings, self).__init__()
        self.make_embedding = nn.Sequential()
        self.make_embedding.add_module('emb_luts', emb_luts)

        self.emb_type = emb_type

        assert (self.emb_type in [
            None, 'gcn', 'treelstm', 'bi_treelstm', 'gcn_and_bi_treelstm',
            'gcn_and_treelstm'
        ])

        if self.emb_type == 'gcn':
            self.gcn = GraphConvolution(word_vec_size, gcn_vec_size,
                                        gcn_dropout, gcn_edge_dropout,
                                        n_gcn_layers, activation, highway)

        elif self.emb_type == 'treelstm':
            self.treelstm = ChildSumTreeLSTM(word_vec_size, treelstm_vec_size)

        elif self.emb_type == 'bi_treelstm':
            self.treelstm = ChildSumTreeLSTM(word_vec_size,
                                             treelstm_vec_size // 2)
            self.topdown_treelstm = TopDownTreeLSTM(treelstm_vec_size // 2,
                                                    treelstm_vec_size // 2)

        elif self.emb_type == "gcn_and_treelstm":
            self.gcn = GraphConvolution(word_vec_size, gcn_vec_size,
                                        gcn_dropout, gcn_edge_dropout,
                                        n_gcn_layers, activation, highway)
            self.treelstm = ChildSumTreeLSTM(word_vec_size, treelstm_vec_size)

        elif self.emb_type == "gcn_and_bi_treelstm":
            self.gcn = GraphConvolution(word_vec_size, gcn_vec_size,
                                        gcn_dropout, gcn_edge_dropout,
                                        n_gcn_layers, activation, highway)
            self.treelstm = ChildSumTreeLSTM(word_vec_size,
                                             treelstm_vec_size // 2)
            self.topdown_treelstm = TopDownTreeLSTM(treelstm_vec_size // 2,
                                                    treelstm_vec_size // 2)

        if self.emb_type is not None:
            if 'gcn' in self.emb_type and 'treelstm' not in self.emb_type:
                ##### word_vec_size += gcn_vec_size + treelstm_vec_size
                self.embedding_size += gcn_vec_size + treelstm_vec_size - word_vec_size
            else:
                #### word_vec_size += gcn_vec_size + treelstm_vec_size
                self.embedding_size += gcn_vec_size + treelstm_vec_size  #- word_vec_size

        if feat_merge == 'mlp' and len(feat_vocab_sizes) > 0:
            in_dim = sum(emb_dims)
            out_dim = word_vec_size
            mlp = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU())
            self.make_embedding.add_module('mlp', mlp)

        self.position_encoding = position_encoding

        if self.position_encoding:
            pe = PositionalEncoding(dropout, self.embedding_size)
            self.make_embedding.add_module('pe', pe)
Example #3
0
    def __init__(self,
                 word_vec_size,
                 word_vocab_size,
                 word_padding_idx,
                 position_encoding=False,
                 position_encoding_learned=False,
                 position_encoding_ctxsize=1024,
                 feat_merge="concat",
                 feat_vec_exponent=0.7,
                 feat_vec_size=-1,
                 feat_padding_idx=[],
                 feat_vocab_sizes=[],
                 dropout=0,
                 sparse=False,
                 fix_word_vecs=False,
                 GPT_representation_mode='none',
                 GPT_representation_tgt=False):
        self._validate_args(feat_merge, feat_vocab_sizes, feat_vec_exponent,
                            feat_vec_size, feat_padding_idx)

        if feat_padding_idx is None:
            feat_padding_idx = []
        self.word_padding_idx = word_padding_idx

        self.word_vec_size = word_vec_size

        # Dimensions and padding for constructing the word embedding matrix
        vocab_sizes = [word_vocab_size]
        emb_dims = [word_vec_size]
        pad_indices = [word_padding_idx]

        # Dimensions and padding for feature embedding matrices
        # (these have no effect if feat_vocab_sizes is empty)
        if feat_merge == 'sum':
            feat_dims = [word_vec_size] * len(feat_vocab_sizes)
        elif feat_vec_size > 0:
            feat_dims = [feat_vec_size] * len(feat_vocab_sizes)
        else:
            feat_dims = [
                int(vocab**feat_vec_exponent) for vocab in feat_vocab_sizes
            ]
        vocab_sizes.extend(feat_vocab_sizes)
        emb_dims.extend(feat_dims)
        pad_indices.extend(feat_padding_idx)

        # The embedding matrix look-up tables. The first look-up table
        # is for words. Subsequent ones are for features, if any exist.
        emb_params = zip(vocab_sizes, emb_dims, pad_indices)
        embeddings = [
            nn.Embedding(vocab, dim, padding_idx=pad, sparse=sparse)
            for vocab, dim, pad in emb_params
        ]

        emb_luts = Elementwise(feat_merge, embeddings)

        # The final output size of word + feature vectors. This can vary
        # from the word vector size if and only if features are defined.
        # This is the attribute you should access if you need to know
        # how big your embeddings are going to be.
        self.embedding_size = (sum(emb_dims)
                               if feat_merge == 'concat' else word_vec_size)

        # The sequence of operations that converts the input sequence
        # into a sequence of embeddings. At minimum this consists of
        # looking up the embeddings for each word and feature in the
        # input. Model parameters may require the sequence to contain
        # additional operations as well.
        super(Embeddings, self).__init__()
        self.make_embedding = nn.Sequential()
        self.make_embedding.add_module('emb_luts', emb_luts)

        if feat_merge == 'mlp' and len(feat_vocab_sizes) > 0:
            in_dim = sum(emb_dims)
            mlp = nn.Sequential(nn.Linear(in_dim, word_vec_size), nn.ReLU())
            self.make_embedding.add_module('mlp', mlp)

        self.position_encoding = position_encoding

        if self.position_encoding:
            if position_encoding_learned:
                pe = LearnedPositionalEncoding(position_encoding_ctxsize,
                                               self.embedding_size,
                                               dropout=dropout)
                if fix_word_vecs:
                    pe.pe.weight.requires_grad = False
            else:
                pe = PositionalEncoding(dropout, self.embedding_size)
            self.make_embedding.add_module('pe', pe)

        if fix_word_vecs:
            self.word_lut.weight.requires_grad = False

        self.GPT_representation_mode = GPT_representation_mode
        self.GPT_representation_tgt = GPT_representation_tgt
        if self.GPT_representation_mode != 'none':
            gpt_dropout = 0 if self.GPT_representation_mode == 'elmo' else dropout
            if self.GPT_representation_tgt:
                self.gpt_model = onmt.decoders.TransformerDecoder(
                    12, 768, 12, 3072, False, 'scaled-dot', gpt_dropout,
                    gpt_dropout, None, 0, False, True, False, False)
            else:
                self.gpt_model = onmt.encoders.TransformerEncoder(
                    12, 768, 12, 3072, gpt_dropout, gpt_dropout, None, 0, True)
            if self.GPT_representation_mode == 'elmo':
                for p in self.gpt_model.parameters():
                    p.requires_grad = False
                self.elmo_scale_params = nn.Parameter(torch.ones(13))
                self.elmo_gamma_param = nn.Parameter(torch.full((1, ), 1.0))
Example #4
0
    def __init__(self,
                 word_vec_size,
                 word_vocab_size,
                 word_padding_idx,
                 position_encoding=False,
                 feat_merge="concat",
                 feat_vec_exponent=0.7,
                 feat_vec_size=-1,
                 feat_padding_idx=[],
                 feat_vocab_sizes=[],
                 dropout=0,
                 sparse=False):

        if feat_padding_idx is None:
            feat_padding_idx = []
        self.word_padding_idx = word_padding_idx
        #latt
        self.feat_merge = feat_merge
        self.feat_vocab_sizes = feat_vocab_sizes
        #latt
        # Dimensions and padding for constructing the word embedding matrix
        vocab_sizes = [word_vocab_size]
        emb_dims = [word_vec_size]
        pad_indices = [word_padding_idx]

        # Dimensions and padding for feature embedding matrices
        # (these have no effect if feat_vocab_sizes is empty)
        if feat_merge == 'sum':
            feat_dims = [word_vec_size] * len(feat_vocab_sizes)
        elif feat_merge == 'latt':  #latt
            feat_dims = [feat_vec_size] * 1
        elif feat_vec_size > 0:
            feat_dims = [feat_vec_size] * len(feat_vocab_sizes)
        else:
            feat_dims = [
                int(vocab**feat_vec_exponent) for vocab in feat_vocab_sizes
            ]
        vocab_sizes.extend(feat_vocab_sizes)
        emb_dims.extend(feat_dims)
        pad_indices.extend(feat_padding_idx)

        # latt for adaptable number of features
        self.num_features = len(pad_indices) - 1
        # print('vocab_sizes, emb_dims, pad_indices, feat_vocab_sizes', vocab_sizes, emb_dims, pad_indices, feat_vocab_sizes)#test
        # print sample
        # source
        # vocab_sizes, emb_dims, pad_indices, feat_vocab_sizes [17398, 26997] [256, 256] [1, 1, 1, 1, 1, 1] [26997]
        #target
        # vocab_sizes, emb_dims, pad_indices, feat_vocab_sizes [17398] [256, 256] [1] []

        # The embedding matrix look-up tables. The first look-up table
        # is for words. Subsequent ones are for features, if any exist.
        emb_params = zip(vocab_sizes, emb_dims, pad_indices)

        #latt
        if feat_merge == 'latt' and len(vocab_sizes) > 1:
            embeddings = [
                nn.Embedding(vocab_sizes[0],
                             emb_dims[0],
                             padding_idx=pad_indices[0],
                             sparse=sparse)
            ]
            emb_params_senses = zip(vocab_sizes[1:], emb_dims[1:],
                                    pad_indices[1:])
            #     print('emb_params_senses', emb_params_senses) #test

            #     print('vocab_sizes[1], emb_dims[1]', vocab_sizes, emb_dims)
            embeddings_senses = [
                nn.Embedding(vocab_sizes[1],
                             emb_dims[1],
                             padding_idx=1,
                             sparse=sparse)
            ]
    #      print('embeddings_senses', embeddings_senses)
        else:
            #for vocab, dim, pad in emb_params:
            #print('emb_params', vocab, dim, pad, type(vocab), type(dim), type(pad)) iterator only usable once, serious mistake!!!
            #embeddings = [nn.Embedding(vocab_sizes[0], emb_dims[0], padding_idx=pad_indices[0], sparse=sparse)]
            embeddings = [
                nn.Embedding(vocab, dim, padding_idx=pad, sparse=sparse)
                for vocab, dim, pad in emb_params
            ]
    #      print('test result embed', embeddings)

#latt
        if feat_merge == 'latt' and len(vocab_sizes) > 1:
            #      print('embeddings_senses in embeddings.py', embeddings_senses) # test
            emb_luts_senses = Elementwise('latt_senses',
                                          embeddings_senses)  #latt

    #      print('emb_luts_senses in embeddings.py', emb_luts_senses) # test
    #  print('embeddings in embeddings.py', embeddings) # test
        emb_luts = Elementwise(feat_merge, embeddings)
        #  print('emb_luts in embeddings.py', emb_luts) # test
        # The final output size of word + feature vectors. This can vary
        # from the word vector size if and only if features are defined.
        # This is the attribute you should access if you need to know
        # how big your embeddings are going to be.
        #   print('emb_dims', emb_dims)
        self.embedding_size = (sum(emb_dims)
                               if feat_merge == 'concat' else word_vec_size)
        #self.embedding_size = (sum(emb_dims) if feat_merge == 'none'
        #                       else word_vec_size)

        # The sequence of operations that converts the input sequence
        # into a sequence of embeddings. At minimum this consists of
        # looking up the embeddings for each word and feature in the
        # input. Model parameters may require the sequence to contain
        # additional operations as well.
        super(Embeddings, self).__init__()
        self.make_embedding = nn.Sequential()
        self.make_embedding.add_module('emb_luts', emb_luts)

        #latt
        # initialize embedding modules 'make_embedding_latt'
        if feat_merge == 'latt' and len(vocab_sizes) > 1:
            self.make_embedding_latt = nn.Sequential()
            self.make_embedding_latt.add_module('emb_luts', emb_luts_senses)
#latt
        if feat_merge == 'mlp' and len(feat_vocab_sizes) > 0:  #latt   ##:
            in_dim = sum(emb_dims)
            out_dim = word_vec_size
            mlp = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU())
            self.make_embedding.add_module('mlp', mlp)

        self.position_encoding = position_encoding

        if self.position_encoding:
            pe = PositionalEncoding(dropout, self.embedding_size)
            #       print('pe in embeddings.py', pe)
            self.make_embedding.add_module('pe', pe)
            #latt
            # add positional encoding matrix to 'make_embedding_latt'
            if feat_merge == 'latt' and len(vocab_sizes) > 1:
                self.make_embedding_latt.add_module('pe', pe)