def __init__(self, word_vec_size, word_vocab_size, word_padding_idx, position_encoding=False, feat_merge="concat", feat_vec_exponent=0.7, feat_vec_size=-1, feat_padding_idx=[], feat_vocab_sizes=[], dropout=0, sparse=False): if feat_padding_idx is None: feat_padding_idx = [] self.word_padding_idx = word_padding_idx self.word_vec_size = word_vec_size # Dimensions and padding for constructing the word embedding matrix vocab_sizes = [word_vocab_size] emb_dims = [word_vec_size] pad_indices = [word_padding_idx] # Dimensions and padding for feature embedding matrices # (these have no effect if feat_vocab_sizes is empty) if feat_merge == 'sum': feat_dims = [word_vec_size] * len(feat_vocab_sizes) elif feat_vec_size > 0: feat_dims = [feat_vec_size] * len(feat_vocab_sizes) else: feat_dims = [int(vocab ** feat_vec_exponent) for vocab in feat_vocab_sizes] vocab_sizes.extend(feat_vocab_sizes) emb_dims.extend(feat_dims) pad_indices.extend(feat_padding_idx) # The embedding matrix look-up tables. The first look-up table # is for words. Subsequent ones are for features, if any exist. emb_params = zip(vocab_sizes, emb_dims, pad_indices) embeddings = [nn.Embedding(vocab, dim, padding_idx=pad, sparse=sparse) for vocab, dim, pad in emb_params] emb_luts = Elementwise(feat_merge, embeddings) # The final output size of word + feature vectors. This can vary # from the word vector size if and only if features are defined. # This is the attribute you should access if you need to know # how big your embeddings are going to be. self.embedding_size = (sum(emb_dims) if feat_merge == 'concat' else word_vec_size) # The sequence of operations that converts the input sequence # into a sequence of embeddings. At minimum this consists of # looking up the embeddings for each word and feature in the # input. Model parameters may require the sequence to contain # additional operations as well. super(Embeddings, self).__init__() self.make_embedding = nn.Sequential() self.make_embedding.add_module('emb_luts', emb_luts) if feat_merge == 'mlp' and len(feat_vocab_sizes) > 0: in_dim = sum(emb_dims) out_dim = word_vec_size mlp = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU()) self.make_embedding.add_module('mlp', mlp) self.position_encoding = position_encoding if self.position_encoding: pe = PositionalEncoding(dropout, self.embedding_size) self.make_embedding.add_module('pe', pe)
def __init__(self, word_vec_size, word_vocab_size, word_padding_idx, position_encoding=False, feat_merge="concat", feat_vec_exponent=0.7, feat_vec_size=-1, feat_padding_idx=[], feat_vocab_sizes=[], dropout=0, sparse=False, emb_type=None, gcn_vec_size=0, gcn_dropout=0, gcn_edge_dropout=0, n_gcn_layers=0, activation='', highway='', treelstm_vec_size=0): if feat_padding_idx is None: feat_padding_idx = [] self.word_padding_idx = word_padding_idx self.word_vec_size = word_vec_size # Dimensions and padding for constructing the word embedding matrix vocab_sizes = [word_vocab_size] emb_dims = [word_vec_size] pad_indices = [word_padding_idx] # Dimensions and padding for feature embedding matrices # (these have no effect if feat_vocab_sizes is empty) if feat_merge == 'sum': feat_dims = [word_vec_size] * len(feat_vocab_sizes) elif feat_vec_size > 0: feat_dims = [feat_vec_size] * len(feat_vocab_sizes) else: feat_dims = [ int(vocab**feat_vec_exponent) for vocab in feat_vocab_sizes ] vocab_sizes.extend(feat_vocab_sizes) emb_dims.extend(feat_dims) pad_indices.extend(feat_padding_idx) # The embedding matrix look-up tables. The first look-up table # is for words. Subsequent ones are for features, if any exist. emb_params = zip(vocab_sizes, emb_dims, pad_indices) embeddings = [ nn.Embedding(vocab, dim, padding_idx=pad, sparse=sparse) for vocab, dim, pad in emb_params ] emb_luts = Elementwise(feat_merge, embeddings) # The final output size of word + feature vectors. This can vary # from the word vector size if and only if features are defined. # This is the attribute you should access if you need to know # how big your embeddings are going to be. self.embedding_size = (sum(emb_dims) if feat_merge == 'concat' else word_vec_size) # The sequence of operations that converts the input sequence # into a sequence of embeddings. At minimum this consists of # looking up the embeddings for each word and feature in the # input. Model parameters may require the sequence to contain # additional operations as well. super(Embeddings, self).__init__() self.make_embedding = nn.Sequential() self.make_embedding.add_module('emb_luts', emb_luts) self.emb_type = emb_type assert (self.emb_type in [ None, 'gcn', 'treelstm', 'bi_treelstm', 'gcn_and_bi_treelstm', 'gcn_and_treelstm' ]) if self.emb_type == 'gcn': self.gcn = GraphConvolution(word_vec_size, gcn_vec_size, gcn_dropout, gcn_edge_dropout, n_gcn_layers, activation, highway) elif self.emb_type == 'treelstm': self.treelstm = ChildSumTreeLSTM(word_vec_size, treelstm_vec_size) elif self.emb_type == 'bi_treelstm': self.treelstm = ChildSumTreeLSTM(word_vec_size, treelstm_vec_size // 2) self.topdown_treelstm = TopDownTreeLSTM(treelstm_vec_size // 2, treelstm_vec_size // 2) elif self.emb_type == "gcn_and_treelstm": self.gcn = GraphConvolution(word_vec_size, gcn_vec_size, gcn_dropout, gcn_edge_dropout, n_gcn_layers, activation, highway) self.treelstm = ChildSumTreeLSTM(word_vec_size, treelstm_vec_size) elif self.emb_type == "gcn_and_bi_treelstm": self.gcn = GraphConvolution(word_vec_size, gcn_vec_size, gcn_dropout, gcn_edge_dropout, n_gcn_layers, activation, highway) self.treelstm = ChildSumTreeLSTM(word_vec_size, treelstm_vec_size // 2) self.topdown_treelstm = TopDownTreeLSTM(treelstm_vec_size // 2, treelstm_vec_size // 2) if self.emb_type is not None: if 'gcn' in self.emb_type and 'treelstm' not in self.emb_type: ##### word_vec_size += gcn_vec_size + treelstm_vec_size self.embedding_size += gcn_vec_size + treelstm_vec_size - word_vec_size else: #### word_vec_size += gcn_vec_size + treelstm_vec_size self.embedding_size += gcn_vec_size + treelstm_vec_size #- word_vec_size if feat_merge == 'mlp' and len(feat_vocab_sizes) > 0: in_dim = sum(emb_dims) out_dim = word_vec_size mlp = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU()) self.make_embedding.add_module('mlp', mlp) self.position_encoding = position_encoding if self.position_encoding: pe = PositionalEncoding(dropout, self.embedding_size) self.make_embedding.add_module('pe', pe)
def __init__(self, word_vec_size, word_vocab_size, word_padding_idx, position_encoding=False, position_encoding_learned=False, position_encoding_ctxsize=1024, feat_merge="concat", feat_vec_exponent=0.7, feat_vec_size=-1, feat_padding_idx=[], feat_vocab_sizes=[], dropout=0, sparse=False, fix_word_vecs=False, GPT_representation_mode='none', GPT_representation_tgt=False): self._validate_args(feat_merge, feat_vocab_sizes, feat_vec_exponent, feat_vec_size, feat_padding_idx) if feat_padding_idx is None: feat_padding_idx = [] self.word_padding_idx = word_padding_idx self.word_vec_size = word_vec_size # Dimensions and padding for constructing the word embedding matrix vocab_sizes = [word_vocab_size] emb_dims = [word_vec_size] pad_indices = [word_padding_idx] # Dimensions and padding for feature embedding matrices # (these have no effect if feat_vocab_sizes is empty) if feat_merge == 'sum': feat_dims = [word_vec_size] * len(feat_vocab_sizes) elif feat_vec_size > 0: feat_dims = [feat_vec_size] * len(feat_vocab_sizes) else: feat_dims = [ int(vocab**feat_vec_exponent) for vocab in feat_vocab_sizes ] vocab_sizes.extend(feat_vocab_sizes) emb_dims.extend(feat_dims) pad_indices.extend(feat_padding_idx) # The embedding matrix look-up tables. The first look-up table # is for words. Subsequent ones are for features, if any exist. emb_params = zip(vocab_sizes, emb_dims, pad_indices) embeddings = [ nn.Embedding(vocab, dim, padding_idx=pad, sparse=sparse) for vocab, dim, pad in emb_params ] emb_luts = Elementwise(feat_merge, embeddings) # The final output size of word + feature vectors. This can vary # from the word vector size if and only if features are defined. # This is the attribute you should access if you need to know # how big your embeddings are going to be. self.embedding_size = (sum(emb_dims) if feat_merge == 'concat' else word_vec_size) # The sequence of operations that converts the input sequence # into a sequence of embeddings. At minimum this consists of # looking up the embeddings for each word and feature in the # input. Model parameters may require the sequence to contain # additional operations as well. super(Embeddings, self).__init__() self.make_embedding = nn.Sequential() self.make_embedding.add_module('emb_luts', emb_luts) if feat_merge == 'mlp' and len(feat_vocab_sizes) > 0: in_dim = sum(emb_dims) mlp = nn.Sequential(nn.Linear(in_dim, word_vec_size), nn.ReLU()) self.make_embedding.add_module('mlp', mlp) self.position_encoding = position_encoding if self.position_encoding: if position_encoding_learned: pe = LearnedPositionalEncoding(position_encoding_ctxsize, self.embedding_size, dropout=dropout) if fix_word_vecs: pe.pe.weight.requires_grad = False else: pe = PositionalEncoding(dropout, self.embedding_size) self.make_embedding.add_module('pe', pe) if fix_word_vecs: self.word_lut.weight.requires_grad = False self.GPT_representation_mode = GPT_representation_mode self.GPT_representation_tgt = GPT_representation_tgt if self.GPT_representation_mode != 'none': gpt_dropout = 0 if self.GPT_representation_mode == 'elmo' else dropout if self.GPT_representation_tgt: self.gpt_model = onmt.decoders.TransformerDecoder( 12, 768, 12, 3072, False, 'scaled-dot', gpt_dropout, gpt_dropout, None, 0, False, True, False, False) else: self.gpt_model = onmt.encoders.TransformerEncoder( 12, 768, 12, 3072, gpt_dropout, gpt_dropout, None, 0, True) if self.GPT_representation_mode == 'elmo': for p in self.gpt_model.parameters(): p.requires_grad = False self.elmo_scale_params = nn.Parameter(torch.ones(13)) self.elmo_gamma_param = nn.Parameter(torch.full((1, ), 1.0))
def __init__(self, word_vec_size, word_vocab_size, word_padding_idx, position_encoding=False, feat_merge="concat", feat_vec_exponent=0.7, feat_vec_size=-1, feat_padding_idx=[], feat_vocab_sizes=[], dropout=0, sparse=False): if feat_padding_idx is None: feat_padding_idx = [] self.word_padding_idx = word_padding_idx #latt self.feat_merge = feat_merge self.feat_vocab_sizes = feat_vocab_sizes #latt # Dimensions and padding for constructing the word embedding matrix vocab_sizes = [word_vocab_size] emb_dims = [word_vec_size] pad_indices = [word_padding_idx] # Dimensions and padding for feature embedding matrices # (these have no effect if feat_vocab_sizes is empty) if feat_merge == 'sum': feat_dims = [word_vec_size] * len(feat_vocab_sizes) elif feat_merge == 'latt': #latt feat_dims = [feat_vec_size] * 1 elif feat_vec_size > 0: feat_dims = [feat_vec_size] * len(feat_vocab_sizes) else: feat_dims = [ int(vocab**feat_vec_exponent) for vocab in feat_vocab_sizes ] vocab_sizes.extend(feat_vocab_sizes) emb_dims.extend(feat_dims) pad_indices.extend(feat_padding_idx) # latt for adaptable number of features self.num_features = len(pad_indices) - 1 # print('vocab_sizes, emb_dims, pad_indices, feat_vocab_sizes', vocab_sizes, emb_dims, pad_indices, feat_vocab_sizes)#test # print sample # source # vocab_sizes, emb_dims, pad_indices, feat_vocab_sizes [17398, 26997] [256, 256] [1, 1, 1, 1, 1, 1] [26997] #target # vocab_sizes, emb_dims, pad_indices, feat_vocab_sizes [17398] [256, 256] [1] [] # The embedding matrix look-up tables. The first look-up table # is for words. Subsequent ones are for features, if any exist. emb_params = zip(vocab_sizes, emb_dims, pad_indices) #latt if feat_merge == 'latt' and len(vocab_sizes) > 1: embeddings = [ nn.Embedding(vocab_sizes[0], emb_dims[0], padding_idx=pad_indices[0], sparse=sparse) ] emb_params_senses = zip(vocab_sizes[1:], emb_dims[1:], pad_indices[1:]) # print('emb_params_senses', emb_params_senses) #test # print('vocab_sizes[1], emb_dims[1]', vocab_sizes, emb_dims) embeddings_senses = [ nn.Embedding(vocab_sizes[1], emb_dims[1], padding_idx=1, sparse=sparse) ] # print('embeddings_senses', embeddings_senses) else: #for vocab, dim, pad in emb_params: #print('emb_params', vocab, dim, pad, type(vocab), type(dim), type(pad)) iterator only usable once, serious mistake!!! #embeddings = [nn.Embedding(vocab_sizes[0], emb_dims[0], padding_idx=pad_indices[0], sparse=sparse)] embeddings = [ nn.Embedding(vocab, dim, padding_idx=pad, sparse=sparse) for vocab, dim, pad in emb_params ] # print('test result embed', embeddings) #latt if feat_merge == 'latt' and len(vocab_sizes) > 1: # print('embeddings_senses in embeddings.py', embeddings_senses) # test emb_luts_senses = Elementwise('latt_senses', embeddings_senses) #latt # print('emb_luts_senses in embeddings.py', emb_luts_senses) # test # print('embeddings in embeddings.py', embeddings) # test emb_luts = Elementwise(feat_merge, embeddings) # print('emb_luts in embeddings.py', emb_luts) # test # The final output size of word + feature vectors. This can vary # from the word vector size if and only if features are defined. # This is the attribute you should access if you need to know # how big your embeddings are going to be. # print('emb_dims', emb_dims) self.embedding_size = (sum(emb_dims) if feat_merge == 'concat' else word_vec_size) #self.embedding_size = (sum(emb_dims) if feat_merge == 'none' # else word_vec_size) # The sequence of operations that converts the input sequence # into a sequence of embeddings. At minimum this consists of # looking up the embeddings for each word and feature in the # input. Model parameters may require the sequence to contain # additional operations as well. super(Embeddings, self).__init__() self.make_embedding = nn.Sequential() self.make_embedding.add_module('emb_luts', emb_luts) #latt # initialize embedding modules 'make_embedding_latt' if feat_merge == 'latt' and len(vocab_sizes) > 1: self.make_embedding_latt = nn.Sequential() self.make_embedding_latt.add_module('emb_luts', emb_luts_senses) #latt if feat_merge == 'mlp' and len(feat_vocab_sizes) > 0: #latt ##: in_dim = sum(emb_dims) out_dim = word_vec_size mlp = nn.Sequential(nn.Linear(in_dim, out_dim), nn.ReLU()) self.make_embedding.add_module('mlp', mlp) self.position_encoding = position_encoding if self.position_encoding: pe = PositionalEncoding(dropout, self.embedding_size) # print('pe in embeddings.py', pe) self.make_embedding.add_module('pe', pe) #latt # add positional encoding matrix to 'make_embedding_latt' if feat_merge == 'latt' and len(vocab_sizes) > 1: self.make_embedding_latt.add_module('pe', pe)