def __init__(self, cl_logits_input_dim=None): self.layers = {} self.initialize_vocab() self.layers['embedding'] = layers_lib.Embedding( self.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings, self.vocab_freqs, FLAGS.keep_prob_emb, name='embedding') self.layers['embedding_1'] = layers_lib.Embedding( self.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings, self.vocab_freqs, FLAGS.keep_prob_emb, name='embedding_1') self.layers['cnn'] = layers_lib.CNN(FLAGS.embedding_dims, FLAGS.keep_prob_emb) self.layers['lstm_1'] = layers_lib.BiLSTM(FLAGS.rnn_cell_size, FLAGS.rnn_num_layers, name="Bilstm") action_type = 5 if FLAGS.action == 'all' else 4 self.layers['action_select'] = layers_lib.Actionselect( action_type, FLAGS.keep_prob_dense, name='action_output') self.layers['cl_logits'] = layers_lib.Project_layer( FLAGS.num_classes, FLAGS.keep_prob_dense, name='project_layer')
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = 2 * hidden_size # As we concatinating word vectors and Char # vectors self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = Encoder(dim=self.hidden_size, depth=1, heads=3, ff_glu=True, ff_dropout=self.drop_prob, attn_dropout=self.drop_prob, use_scalenorm=True, position_infused_attn=True) self.att = layers.TBiDAFAttention(hidden_size=self.hidden_size, drop_prob=drop_prob) self.mod = Encoder(dim=2 * self.hidden_size, depth=3, heads=6, ff_glu=True, ff_dropout=self.drop_prob, attn_dropout=self.drop_prob, use_scalenorm=True, position_infused_attn=True) self.out = layers.BiDAFOutput(hidden_size=self.hidden_size, drop_prob=self.drop_prob)
def __init__(self, model_name, char_vectors, hidden_size, drop_prob=0.): super(BiDAF2, self).__init__() self.hidden_size = hidden_size * 2 # adding the char embedding, double the hidden_size. self.emb = layers.Embedding(model_name=model_name, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) #input_size=self.hidden_size+2 is due to we add two extra features (avg_attention) to both char embedding #and word embedding to boost the performance. The avg_attention is use the attention mechanism to learn #a weighted average among the vectors by the model itself. self.enc = layers.RNNEncoder(input_size=self.hidden_size + 2, hidden_size=self.hidden_size, num_layers=1, drop_prob=drop_prob) self.highway = layers.HighwayEncoder(2, 4 * hidden_size) self.mod = layers.RNNEncoder(input_size=2 * self.hidden_size, hidden_size=self.hidden_size, num_layers=2, drop_prob=drop_prob) # self.sim = nn.CosineSimilarity(dim=1, eps=1e-6) self.qa_outputs = nn.Linear(2 * self.hidden_size, 2)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors = char_vectors) # added last line self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) ### start our code: self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size, hidden_size=hidden_size, dropout = 0.2) ### end our code self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.proj_bert_down = nn.Linear(in_features=768, out_features=hidden_size, bias=True) nn.init.xavier_uniform_(self.proj_bert_down.weight, gain=1) self.proj_glove_down = nn.Linear(in_features=300, out_features=hidden_size, bias=True) nn.init.xavier_uniform_(self.proj_glove_down.weight, gain=1)
def __init__(self, word_vectors, word_vectors_char, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, word_vectors_char=word_vectors_char, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, use_pos, use_ner, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.use_pos = use_pos self.use_ner = use_ner rnn_input_size = hidden_size if use_pos: rnn_input_size += 1 if use_ner: rnn_input_size += 1 self.enc = layers.RNNEncoder(input_size=rnn_input_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(Paraphraser, self).__init__() # We load embeddings from a glove vector file. # embedding, drop, projection (linear), highway layer - todo: do we want all of these or just embedding? self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, num_heads=8, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) hidden_size *= 2 # update hidden size for other layers due to char embeddings self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.1): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) # self.enc = layers.RNNEncoder(input_size=hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) self.emb_encoder = layers.EmbeddingEncoder(d_model=hidden_size, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) # self.model_encoder = layers.ModelEncoder(d_model=hidden_size, drop_prob=drop_prob) # self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF_charCNN, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.char_emb = layers.CharEmbedding(char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.hwy = layers.HighwayEncoder(2, 2*hidden_size) self.enc = layers.RNNEncoder(input_size=2*hidden_size, hidden_size=2*hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * 2*hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * 2*hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() # print("vectors: ", word_vectors) self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.self_att = layers.SelfAttention(hidden_size=8 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.batch_size = 64 self.hidden_size = hidden_size
def __init__(self, cl_logits_input_dim=None): self.global_step = tf.train.get_or_create_global_step() self.vocab_freqs = _get_vocab_freqs() # Cache VatxtInput objects self.cl_inputs = None self.lm_inputs = None # Cache intermediate Tensors that are reused self.tensors = {} # Construct layers which are reused in constructing the LM and # Classification graphs. Instantiating them all once here ensures that # variable reuse works correctly. self.layers = {} self.layers['embedding'] = layers_lib.Embedding( FLAGS.vocab_size, FLAGS.embedding_dims, FLAGS.normalize_embeddings, self.vocab_freqs, FLAGS.keep_prob_emb) self.layers['lstm'] = layers_lib.LSTM(FLAGS.rnn_cell_size, FLAGS.rnn_num_layers, FLAGS.keep_prob_lstm_out) self.layers['lm_loss'] = layers_lib.SoftmaxLoss( FLAGS.vocab_size, FLAGS.num_candidate_samples, self.vocab_freqs, name='LM_loss') cl_logits_input_dim = cl_logits_input_dim or FLAGS.rnn_cell_size self.layers['cl_logits'] = layers_lib.cl_logits_subgraph( [FLAGS.cl_hidden_size] * FLAGS.cl_num_layers, cl_logits_input_dim, FLAGS.num_classes, FLAGS.keep_prob_cl_hidden)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = hidden_size self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors = char_vectors) # added character vectors self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) ### start our code: self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size, hidden_size=hidden_size, dropout = 0.2) ### end our code self.linear = nn.Linear(in_features = 8*self.hidden_size, out_features = 2*self.hidden_size, bias=True) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=4, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, kernel_size, filters, drop_prob=0.): super(QANet, self).__init__() # Input embedding layer self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors=char_vectors) # added character vectors # resize input embedding layer output size to fit embedding encoder layer input size self.resize_emb_pe = nn.Linear(in_features = hidden_size, out_features = filters, bias=False) # Embedding encoder layer self.emb_enc = qa.EncoderBlock(input_size=filters, kernel_size=kernel_size, filters=filters, num_conv_layers=4, drop_prob=drop_prob) # Context-Query attention layer self.att = layers.BiDAFAttention(hidden_size = filters, drop_prob = drop_prob) # Model encoder layer: mod_enc = qa.EncoderBlock(input_size=4*filters, kernel_size=5, filters=filters, num_conv_layers=2, drop_prob=drop_prob) self.mod_enc = nn.ModuleList([mod_enc]*7) # 7 number of blocks # QANet Output layer self.output = qa.QANetOutput(input_size=4*filters, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, output_size, device, drop_prob=0., num_layers=1): super(Seq2Seq, self).__init__() self.hidden_size = hidden_size self.device = device self.word_vectors = word_vectors self.model_type = 'seq2seq' #self.emb = nn.Embedding(num_embeddings=output_size, embedding_dim=hidden_size) #self.dropout = nn.Dropout(p=drop_prob) self.emb = layers.Embedding(word_vectors, hidden_size, drop_prob=drop_prob) self.encoder = layers.EncoderRNN(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, drop_prob=drop_prob) self.decoder = layers.DecoderRNN(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, drop_prob=drop_prob) self.generator = layers.Generator(hidden_size, output_size)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) # self.enc = layers.RNNEncoder(input_size=hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) # self.transformer = make_model(word_vectors, drop_prob, hidden_size) self.emb_enc = EncoderBlock(conv_num=4, ch_num=64, k=7) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) # TODO self.mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, num_heads, char_embed_drop_prob, drop_prob=0.1): super(SketchyReader, self).__init__() '''class QANet(nn.Module): def __init__(self, word_vectors, char_vectors, hidden_size, device, drop_prob=0.): super(QANet, self).__init__() self.device = device''' self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, char_embed_drop_prob=char_embed_drop_prob, word_embed_drop_prob=drop_prob) hidden_size *= 2 # update hidden size for other layers due to char embeddings self.c_resizer = layers.Initialized_Conv1d(hidden_size, 128) self.q_resizer = layers.Initialized_Conv1d(hidden_size, 128) self.model_resizer = layers.Initialized_Conv1d(512, 128) self.enc = layers.StackedEncoder( num_conv_blocks=4, kernel_size=7, num_heads=num_heads, dropout=drop_prob) # embedding encoder layer self.att = layers.BiDAFAttention( hidden_size=128, drop_prob=drop_prob) # context-query attention layer # self.mod1 = layers.StackedEncoder(num_conv_blocks=2, # kernel_size=7, # dropout=drop_prob) # model layer # self.mod2 = layers.StackedEncoder(num_conv_blocks=2, # kernel_size=7, # dropout=drop_prob) # model layer # self.mod3 = layers.StackedEncoder(num_conv_blocks=2, # kernel_size=7, # dropout=drop_prob) # model layer self.model_encoder_layers = nn.ModuleList([ layers.StackedEncoder(num_conv_blocks=2, kernel_size=7, dropout=drop_prob) for _ in range(7) ]) self.out = layers.SketchyOutput(hidden_size=128) # output layer
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.,twist_embeddings=False): super(BiDAF_charCNN_BERTEnc_BERTMod, self).__init__() ### self.twist_embeddings = twist_embeddings idx_list = [] for i in range(hidden_size): idx_list.append(i) idx_list.append(hidden_size+i) self.register_buffer('idx_twist',torch.tensor(idx_list)) ### self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.char_emb = layers.CharEmbedding(char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.hwy = layers.HighwayEncoder(2, 2*hidden_size) self.enc = bert_layers.BertEncoder(n_layers=3, #n_layers=4, d_feature=2*hidden_size, n_heads=8, out_size=2*hidden_size, #d_ff=2048, d_ff = 2*hidden_size, dropout_prob=0.1, #dropout_prob=drop_prob, ff_activation=F.relu) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = bert_layers.BertEncoder(n_layers=3, #n_layers=3, d_feature=8*hidden_size, n_heads=8, out_size=2*hidden_size, #d_ff=2048, d_ff = 2*hidden_size, dropout_prob=0.1, #dropout_prob=drop_prob, ff_activation=F.relu) # self.mod = layers.RNNEncoder(input_size=8 * hidden_size, # hidden_size=hidden_size, # num_layers=2, # drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, vectors, hidden_size, char_limit, use_transformer, use_GRU, drop_prob=.1, **kwargs): super(BiDAF, self).__init__() self.use_transformer = use_transformer self.use_GRU = use_GRU self.hidden_size = hidden_size self.emb = layers.Embedding(vectors=vectors, c2w_size=kwargs['c2w_size'], hidden_size=hidden_size, drop_prob=drop_prob, char_limit=char_limit) if not use_transformer: self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=1, drop_prob=drop_prob, use_GRU=use_GRU) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=2, drop_prob=drop_prob, use_GRU=use_GRU) self.out = layers.BiDAFOutput(hidden_size=2 * hidden_size, drop_prob=drop_prob, use_transformer=use_transformer) else: self.heads = kwargs['heads'] self.inter_size = kwargs['inter_size'] self.enc = layers.TransformerEncoderStack( N=kwargs['enc_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['enc_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.squeeze = layers.InitializedLayer(4*hidden_size, hidden_size, bias=False) self.mod = layers.TransformerEncoderStack( N=kwargs['mod_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['mod_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.out = layers.QAOutput(2*hidden_size) self.att = layers.BiDAFAttention(hidden_size=(1 if self.use_transformer else 2)*hidden_size, drop_prob=drop_prob) # (batch_size, seq_len, 4*input_hidden_size)
def __init__(self, word_vectors, char_vectors, context_max_len, query_max_len, d_model, train_cemb=False, pad=0, dropout=0.1, num_head=8): """ """ super(QANet, self).__init__() if train_cemb: self.char_emb = nn.Embedding.from_pretrained(char_vectors, freeze=False) print("Training char_embeddings") else: self.char_emb = nn.Embedding.from_pretrained(char_vectors) self.word_emb = nn.Embedding.from_pretrained(word_vectors) self.LC = context_max_len self.LQ = query_max_len self.num_head = num_head self.pad = pad self.dropout = dropout wemb_dim = word_vectors.size()[1] cemb_dim = char_vectors.size()[1] #print("Word vector dim-%d, Char vector dim-%d" % (wemb_dim, cemb_dim)) #Layer Declarations self.emb = layers.Embedding(wemb_dim, cemb_dim, d_model) self.emb_enc = layers.Encoder(num_conv=4, d_model=d_model, num_head=num_head, k=7, dropout=0.1) self.cq_att = layers.CQAttention(d_model=d_model) self.cq_resizer = layers.Initialized_Conv1d( d_model * 4, d_model ) #Foward layer to reduce dimension of cq_att output back to d_dim self.model_enc_blks = nn.ModuleList([ layers.Encoder(num_conv=2, d_model=d_model, num_head=num_head, k=5, dropout=0.1) for _ in range(7) ]) self.out = layers.QAOutput(d_model)
def __init__(self, embeddings: InputEmbeddings, hidden_size, drop_prob=0.): super(SLQA, self).__init__() word_vectors = embeddings.word_vectors char_vectors = embeddings.char_vectors self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = AlignedAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.p_fusion1 = FusionLayer(2 * hidden_size) self.q_fusion1 = FusionLayer(2 * hidden_size) self.p_enc_eq_13 = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.q_enc_eq_13 = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.self_attention = FusedSelfAttention(2 * hidden_size) self.p_enc_eq_17 = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.q_enc_eq_17 = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.q_linear_align_18 = LinearAlign(2 * hidden_size) self.bilinear_start = BilinearSeqAtt(2 * hidden_size, 2 * hidden_size) self.bilinear_end = BilinearSeqAtt(2 * hidden_size, 2 * hidden_size)
def __init__(self, weights_matrix, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(weights_matrix=weights_matrix, hidden_size=hidden_size) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.TPRRNN(word_emb_size=(8 * hidden_size), n_symbols=100, d_symbols=10, n_roles=20, d_roles=10, hidden_size=hidden_size) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BERT, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=300, drop_prob=drop_prob) self.bert_start = nn.Linear(in_features=300, out_features=1, bias=True) nn.init.xavier_uniform_(self.bert_start.weight, gain=1) self.bert_end = nn.Linear(in_features=300, out_features=1, bias=True) nn.init.xavier_uniform_(self.bert_end.weight, gain=1) self.proj_up = nn.Linear(in_features=300, out_features=hidden_size, bias=True) self.proj_down = nn.Linear(in_features=hidden_size, out_features=300, bias=True) nn.init.xavier_uniform_(self.proj_down.weight, gain=1)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0., enable_EM=True, enable_posner=True, enable_selfatt=True): super(BiDAF, self).__init__() self.embd_size = hidden_size self.d = self.embd_size * 2 # word_embedding + char_embedding self.enable_EM = enable_EM if enable_EM: self.d += 2 # word_feature if enable_posner: self.d += 10 # word_feature self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=self.embd_size, drop_prob=drop_prob, enable_posner=enable_posner) self.enc = layers.RNNEncoder(input_size=self.d, hidden_size=self.d, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * self.d, drop_prob=drop_prob) self.enable_selfatt = enable_selfatt if enable_selfatt: # self.selfMatch = layers.SelfMatcher(in_size = 8 * self.d, # drop_prob=drop_prob) self.selfMatch = layers.StaticDotAttention(memory_size = 2 * self.d, input_size = 2 * self.d, attention_size = 2 * self.d, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=4 * self.d, hidden_size=self.d, num_layers=2, drop_prob=drop_prob) else: self.mod = layers.RNNEncoder(input_size=2 * self.d, hidden_size=self.d, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=self.d, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAF_attDCA, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = DoubleCrossAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=6 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = BiDAFOutput_att(hidden_size=hidden_size, att_put_h_size=6 * hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, output_size, device, drop_prob=0., num_layers=1): super(Seq2SeqAttn, self).__init__() self.hidden_size = hidden_size self.word_vectors = word_vectors self.device = device self.enc_hiddens = None self.enc_masks = None self.model_type = 'seq2seq_attn' #self.emb = nn.Embedding(num_embeddings=output_size, embedding_dim=hidden_size) self.emb = layers.Embedding(word_vectors, hidden_size) self.encoder = layers.EncoderRNN(input_size=hidden_size, hidden_size=hidden_size, num_layers=num_layers, drop_prob=drop_prob) self.decoder = layers.DecoderRNN(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=num_layers, drop_prob=drop_prob) self.att_projection = nn.Linear(in_features=2 * hidden_size, out_features=hidden_size, bias=False) self.combined_output_projection = nn.Linear(in_features=3 * hidden_size, out_features=hidden_size, bias=False) self.generator = layers.Generator(hidden_size, output_size) self.dropout = nn.Dropout(p=drop_prob)
def __init__(self, word_mat, w_embedding_size, c_embeding_size, c_vocab_size, hidden_size, num_head=1, drop_prob=0.2): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_mat, w_embedding_size, c_embeding_size, c_vocab_size, hidden_size, drop_prob) self.enc = layers.RNNEncoder(input_size=w_embedding_size + hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.var_dropout = layers.VariationalDropout(drop_prob, batch_first=True) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.linear_trans = nn.Sequential( nn.Linear(8 * hidden_size, 2 * hidden_size), nn.ReLU()) self.attn_mod = layers.RNNEncoder(hidden_size * 2, hidden_size, num_layers=1, drop_prob=drop_prob) self.self_attn = layers.BiDAFSelfAttention(num_head, 2 * hidden_size) self.linear_attn = nn.Sequential( nn.Linear(2 * hidden_size, 2 * hidden_size), nn.ReLU()) self.mod = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, ch_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() torch.cuda.empty_cache() self.emb = layers.Embedding(word_vectors=word_vectors, ch_vectors=ch_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.chunk = layers.ChunkLayer(hidden_size=hidden_size, max_ans_len=10) self.out = layers.BiDAFOutput(hidden_size=hidden_size, max_ans_len=10)