def __init__(self, word_vectors, char_vectors, pos_vectors, ner_vectors, hidden_size, drop_prob=0., freeze_tag=True): super(BiDAF_tag_ext, self).__init__() self.emb = layers.Embedding_Tag_Ext(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=hidden_size, drop_prob=drop_prob, freeze_tag=freeze_tag) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors = char_vectors) # added last line self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) ### start our code: self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size, hidden_size=hidden_size, dropout = 0.2) ### end our code self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, pos_vectors, ner_vectors, iob_vectors, hidden_size, drop_prob=0.): super(BiDAF_CharTag, self).__init__() self.hidden_size = hidden_size self.emb = layers.EmbeddingCharTag(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, iob_vectors=iob_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * self.hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * self.hidden_size, hidden_size=self.hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=self.hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, num_heads=8, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) hidden_size *= 2 # update hidden size for other layers due to char embeddings self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, args): super(BiDAFExtra, self).__init__() self.c_emb = layers.EmbeddingExtra(word_vectors=word_vectors, args=args, aux_feat=True) self.q_emb = layers.EmbeddingExtra(word_vectors=word_vectors, args=args, aux_feat=False) self.c_enc = layers.RNNEncoder(input_size=args.hidden_size + args.num_features, hidden_size=args.hidden_size, num_layers=1, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.q_enc = layers.RNNEncoder(input_size=args.hidden_size, hidden_size=args.hidden_size, num_layers=1, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.att = layers.BiDAFAttention(hidden_size=2 * args.hidden_size, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.mod = layers.RNNEncoder(input_size=8 * args.hidden_size, hidden_size=args.hidden_size, num_layers=2, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.out = layers.BiDAFOutput(hidden_size=args.hidden_size, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.args = args
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF_charCNN, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.char_emb = layers.CharEmbedding(char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.hwy = layers.HighwayEncoder(2, 2*hidden_size) self.enc = layers.RNNEncoder(input_size=2*hidden_size, hidden_size=2*hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * 2*hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * 2*hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vec, word_len, hidden_size, emb_size=500, drop_prob=0.): super(BiDAFChar2, self).__init__() self.emb = layers.EmbeddingWithChar(word_vectors=word_vectors, hidden_size=emb_size, char_vec=char_vec, word_len=word_len, drop_prob=drop_prob, char_prop=0.4, hwy_drop=drop_prob, char_dim=200, act='gelu') self.resize = nn.Linear(emb_size, hidden_size) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.proj_bert_down = nn.Linear(in_features=768, out_features=hidden_size, bias=True) nn.init.xavier_uniform_(self.proj_bert_down.weight, gain=1) self.proj_glove_down = nn.Linear(in_features=300, out_features=hidden_size, bias=True) nn.init.xavier_uniform_(self.proj_glove_down.weight, gain=1)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.1): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) # self.enc = layers.RNNEncoder(input_size=hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) self.emb_encoder = layers.EmbeddingEncoder(d_model=hidden_size, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) # self.model_encoder = layers.ModelEncoder(d_model=hidden_size, drop_prob=drop_prob) # self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) # self.enc = layers.RNNEncoder(input_size=hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) # self.transformer = make_model(word_vectors, drop_prob, hidden_size) self.emb_enc = EncoderBlock(conv_num=4, ch_num=64, k=7) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) # TODO self.mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, word_vectors_char, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, word_vectors_char=word_vectors_char, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, use_pos, use_ner, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.use_pos = use_pos self.use_ner = use_ner rnn_input_size = hidden_size if use_pos: rnn_input_size += 1 if use_ner: rnn_input_size += 1 self.enc = layers.RNNEncoder(input_size=rnn_input_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() # print("vectors: ", word_vectors) self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.self_att = layers.SelfAttention(hidden_size=8 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.batch_size = 64 self.hidden_size = hidden_size
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = hidden_size self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors = char_vectors) # added character vectors self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) ### start our code: self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size, hidden_size=hidden_size, dropout = 0.2) ### end our code self.linear = nn.Linear(in_features = 8*self.hidden_size, out_features = 2*self.hidden_size, bias=True) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=4, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_dict_size, char_emb_size, \ conv_kernel_size, conv_depth1, \ conv_output_hidden_size, drop_prob=0.): super(BiDAF_CBE, self).__init__() word_vectors, hidden_size, drop_prob, \ char_dict_size, char_emb_size, \ conv_kernel_size, conv_depth1, \ conv_output_hidden_size self.emb = layers.EmbeddingWithCharLevel(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_dict_size=char_dict_size, char_emb_size=char_emb_size, conv_kernel_size=conv_kernel_size, conv_depth1=conv_depth1, conv_output_hidden_size=conv_output_hidden_size) self.enc = layers.RNNEncoder(input_size=2*hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(SelfAttention_and_global, self).__init__() self.hidden_size = hidden_size self.emb = layers.Char_Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention( hidden_size=2 * hidden_size, drop_prob=drop_prob) # replace this with yours self.self_att = layers.SelfAttention(hidden_size=8 * hidden_size, drop_prob=drop_prob) self.second_mod = layers.RNNEncoder(input_size=16 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, att_size=16 * hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, kernel_size, filters, drop_prob=0.): super(QANet, self).__init__() # Input embedding layer self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors=char_vectors) # added character vectors # resize input embedding layer output size to fit embedding encoder layer input size self.resize_emb_pe = nn.Linear(in_features = hidden_size, out_features = filters, bias=False) # Embedding encoder layer self.emb_enc = qa.EncoderBlock(input_size=filters, kernel_size=kernel_size, filters=filters, num_conv_layers=4, drop_prob=drop_prob) # Context-Query attention layer self.att = layers.BiDAFAttention(hidden_size = filters, drop_prob = drop_prob) # Model encoder layer: mod_enc = qa.EncoderBlock(input_size=4*filters, kernel_size=5, filters=filters, num_conv_layers=2, drop_prob=drop_prob) self.mod_enc = nn.ModuleList([mod_enc]*7) # 7 number of blocks # QANet Output layer self.output = qa.QANetOutput(input_size=4*filters, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, num_heads, char_embed_drop_prob, drop_prob=0.1): super(SketchyReader, self).__init__() '''class QANet(nn.Module): def __init__(self, word_vectors, char_vectors, hidden_size, device, drop_prob=0.): super(QANet, self).__init__() self.device = device''' self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, char_embed_drop_prob=char_embed_drop_prob, word_embed_drop_prob=drop_prob) hidden_size *= 2 # update hidden size for other layers due to char embeddings self.c_resizer = layers.Initialized_Conv1d(hidden_size, 128) self.q_resizer = layers.Initialized_Conv1d(hidden_size, 128) self.model_resizer = layers.Initialized_Conv1d(512, 128) self.enc = layers.StackedEncoder( num_conv_blocks=4, kernel_size=7, num_heads=num_heads, dropout=drop_prob) # embedding encoder layer self.att = layers.BiDAFAttention( hidden_size=128, drop_prob=drop_prob) # context-query attention layer # self.mod1 = layers.StackedEncoder(num_conv_blocks=2, # kernel_size=7, # dropout=drop_prob) # model layer # self.mod2 = layers.StackedEncoder(num_conv_blocks=2, # kernel_size=7, # dropout=drop_prob) # model layer # self.mod3 = layers.StackedEncoder(num_conv_blocks=2, # kernel_size=7, # dropout=drop_prob) # model layer self.model_encoder_layers = nn.ModuleList([ layers.StackedEncoder(num_conv_blocks=2, kernel_size=7, dropout=drop_prob) for _ in range(7) ]) self.out = layers.SketchyOutput(hidden_size=128) # output layer
def test2(): batch = 4 c_len = 10 q_len = 6 hidden_size = 8 c = torch.randn(batch, c_len, hidden_size * 2) q = torch.randn(batch, q_len, hidden_size * 2) BiDAFA = layers.BiDAFAttention(hidden_size * 2) ret = BiDAFA.forward(c, q, None, None) return ret
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.,twist_embeddings=False): super(BiDAF_charCNN_BERTEnc_BERTMod, self).__init__() ### self.twist_embeddings = twist_embeddings idx_list = [] for i in range(hidden_size): idx_list.append(i) idx_list.append(hidden_size+i) self.register_buffer('idx_twist',torch.tensor(idx_list)) ### self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.char_emb = layers.CharEmbedding(char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.hwy = layers.HighwayEncoder(2, 2*hidden_size) self.enc = bert_layers.BertEncoder(n_layers=3, #n_layers=4, d_feature=2*hidden_size, n_heads=8, out_size=2*hidden_size, #d_ff=2048, d_ff = 2*hidden_size, dropout_prob=0.1, #dropout_prob=drop_prob, ff_activation=F.relu) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = bert_layers.BertEncoder(n_layers=3, #n_layers=3, d_feature=8*hidden_size, n_heads=8, out_size=2*hidden_size, #d_ff=2048, d_ff = 2*hidden_size, dropout_prob=0.1, #dropout_prob=drop_prob, ff_activation=F.relu) # self.mod = layers.RNNEncoder(input_size=8 * hidden_size, # hidden_size=hidden_size, # num_layers=2, # drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, vectors, hidden_size, char_limit, use_transformer, use_GRU, drop_prob=.1, **kwargs): super(BiDAF, self).__init__() self.use_transformer = use_transformer self.use_GRU = use_GRU self.hidden_size = hidden_size self.emb = layers.Embedding(vectors=vectors, c2w_size=kwargs['c2w_size'], hidden_size=hidden_size, drop_prob=drop_prob, char_limit=char_limit) if not use_transformer: self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=1, drop_prob=drop_prob, use_GRU=use_GRU) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=2, drop_prob=drop_prob, use_GRU=use_GRU) self.out = layers.BiDAFOutput(hidden_size=2 * hidden_size, drop_prob=drop_prob, use_transformer=use_transformer) else: self.heads = kwargs['heads'] self.inter_size = kwargs['inter_size'] self.enc = layers.TransformerEncoderStack( N=kwargs['enc_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['enc_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.squeeze = layers.InitializedLayer(4*hidden_size, hidden_size, bias=False) self.mod = layers.TransformerEncoderStack( N=kwargs['mod_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['mod_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.out = layers.QAOutput(2*hidden_size) self.att = layers.BiDAFAttention(hidden_size=(1 if self.use_transformer else 2)*hidden_size, drop_prob=drop_prob) # (batch_size, seq_len, 4*input_hidden_size)
def __init__(self, word_vectors, hidden_size, char_vocab_size, drop_prob=0., bidaf_layers = 2): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, char_vocab_size=char_vocab_size, char_embedding_size=100, kernel_size=5, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.encs_att = nn.ModuleList([layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) for _ in range(bidaf_layers)]) self.atts = nn.ModuleList([layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) for _ in range(bidaf_layers)]) self.gates = nn.ModuleList([nn.Linear(8 * hidden_size, 8 * hidden_size) for _ in range(bidaf_layers)]) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.drop_out = nn.Dropout(drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0., args=None): super(BiDAF, self).__init__() self.emb = layers.QAEmbedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.context_enc = layers.EncoderBlock( conv_layers=args.num_conv_layers, k=args.encoder_k, hidden_dim=hidden_size, dropout_prob=drop_prob, attention_heads=args.attention_heads) self.question_enc = layers.EncoderBlock( conv_layers=args.num_conv_layers, k=args.encoder_k, hidden_dim=hidden_size, dropout_prob=drop_prob, attention_heads=args.attention_heads) self.context_conv = DepthwiseSeparableConv2(500, hidden_size, 5) self.question_conv = DepthwiseSeparableConv2(500, hidden_size, 5) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.mod_enc = layers.EncoderBlock( conv_layers=2, k=5, hidden_dim=hidden_size, dropout_prob=drop_prob, attention_heads=args.attention_heads) self.cq_resizer = DepthwiseSeparableConv2(hidden_size * 4, hidden_size, 5) self.out = layers.BiDAFOutput2(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAFBERTEmbeddings, self).__init__() self.emb = None # TODO: BertEmbeddings. self.enc = layers.RNNEncoder(input_size=None, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layer.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, args): super(QANetRevised, self).__init__() self.hidden_size = args.hidden_size self.emb = layers.QAEmbedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=self.hidden_size, drop_prob=args.drop_prob) self.att = layers.BiDAFAttention(hidden_size=self.hidden_size, drop_prob=args.drop_prob) self.context_encoder = layers.EncoderBlock( conv_layers=args.num_conv_layers, k=args.encoder_k, hidden_dim=self.hidden_size, dropout_prob=args.drop_prob, attention_heads=args.attention_heads) self.query_encoder = layers.EncoderBlock( conv_layers=args.num_conv_layers, k=args.encoder_k, hidden_dim=self.hidden_size, dropout_prob=args.drop_prob, attention_heads=args.attention_heads) self.encoder_block_1 = layers.EncoderBlock( conv_layers=2, k=5, hidden_dim=self.hidden_size, dropout_prob=args.drop_prob, attention_heads=args.attention_heads) self.encoder_block_2 = layers.EncoderBlock( conv_layers=2, k=5, hidden_dim=self.hidden_size, dropout_prob=args.drop_prob, attention_heads=args.attention_heads) self.encoder_block_3 = layers.EncoderBlock( conv_layers=2, k=5, hidden_dim=self.hidden_size, dropout_prob=args.drop_prob, attention_heads=args.attention_heads) self.stacked_encoder_blocks = nn.ModuleList( [self.encoder_block_1, self.encoder_block_2, self.encoder_block_3]) self.output = layers.QANetOutput(hidden_size=self.hidden_size, dropout_prob=args.drop_prob) self.resize_attn = layers.DepthwiseSeparableConv( self.hidden_size * 4, self.hidden_size, 5) self.resize_context = DepthwiseSeparableConv2(500, self.hidden_size, 7) self.resize_query = DepthwiseSeparableConv2(500, self.hidden_size, 7)
def __init__(self, weights_matrix, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(weights_matrix=weights_matrix, hidden_size=hidden_size) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.TPRRNN(word_emb_size=(8 * hidden_size), n_symbols=100, d_symbols=10, n_roles=20, d_roles=10, hidden_size=hidden_size) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0., enable_EM=True, enable_posner=True, enable_selfatt=True): super(BiDAF, self).__init__() self.embd_size = hidden_size self.d = self.embd_size * 2 # word_embedding + char_embedding self.enable_EM = enable_EM if enable_EM: self.d += 2 # word_feature if enable_posner: self.d += 10 # word_feature self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=self.embd_size, drop_prob=drop_prob, enable_posner=enable_posner) self.enc = layers.RNNEncoder(input_size=self.d, hidden_size=self.d, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * self.d, drop_prob=drop_prob) self.enable_selfatt = enable_selfatt if enable_selfatt: # self.selfMatch = layers.SelfMatcher(in_size = 8 * self.d, # drop_prob=drop_prob) self.selfMatch = layers.StaticDotAttention(memory_size = 2 * self.d, input_size = 2 * self.d, attention_size = 2 * self.d, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=4 * self.d, hidden_size=self.d, num_layers=2, drop_prob=drop_prob) else: self.mod = layers.RNNEncoder(input_size=2 * self.d, hidden_size=self.d, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=self.d, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = hidden_size self.word_emb = layers.WordEmbedding(word_vectors, hidden_size) self.char_emb = layers.CharEmbedding(char_vectors, hidden_size) # assert hidden_size * 2 == (char_channel_size + word_dim) # highway network self.hwy = layers.HighwayEncoder(2, hidden_size * 2) # highway network # for i in range(2): # setattr(self, f'highway_linear{i}', nn.Sequential( # nn.Linear(hidden_size * 2, hidden_size * 2), nn.ReLU())) # setattr(self, f'hightway_gate{i}', nn.Sequential( # nn.Linear(hidden_size * 2, hidden_size * 2), nn.Sigmoid())) # self.emb = layers.Embedding(word_vectors=word_vectors, # hidden_size=hidden_size, # drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size * 2, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, bert_base_model, hidden_size, drop_prob=0.): super(BertBidaf, self).__init__() BERT_BASE_HIDDEN_SIZE = 768 self.bert = bert_base_model self.enc = layers.RNNEncoder(input_size=BERT_BASE_HIDDEN_SIZE, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = BertBidafOutput(hidden_size=hidden_size, drop_prob=drop_prob)