def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors = char_vectors) # added last line self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) ### start our code: self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size, hidden_size=hidden_size, dropout = 0.2) ### end our code self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, pos_vectors, ner_vectors, iob_vectors, hidden_size, drop_prob=0.): super(BiDAF_CharTag, self).__init__() self.hidden_size = hidden_size self.emb = layers.EmbeddingCharTag(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, iob_vectors=iob_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=self.hidden_size, hidden_size=self.hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * self.hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * self.hidden_size, hidden_size=self.hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=self.hidden_size, drop_prob=drop_prob)
def __init__(self, config): super(BertBidafForQuestionAnswering, self).__init__(config) self.num_labels = config.num_labels BERT_BASE_HIDDEN_SIZE = 768 self.hidden_size = 100 self.drop_prob = 0.2 # self.bert = BertModel(config) self.bert = BertModel.from_pretrained('bert-base-cased') self.enc = layers.RNNEncoder(input_size=BERT_BASE_HIDDEN_SIZE, hidden_size=self.hidden_size, num_layers=1, drop_prob=self.drop_prob) self.att = BertBidafAttention(hidden_size=2 * self.hidden_size, drop_prob=self.drop_prob) self.mod = layers.RNNEncoder(input_size=2 * self.hidden_size, hidden_size=self.hidden_size, num_layers=2, drop_prob=self.drop_prob) self.out = BertBidafOutput(hidden_size=self.hidden_size, drop_prob=self.drop_prob) self.init_weights()
def __init__(self, word_vectors, char_vectors, pos_vectors, ner_vectors, hidden_size, drop_prob=0., freeze_tag=True): super(BiDAF_tag_ext, self).__init__() self.emb = layers.Embedding_Tag_Ext(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=hidden_size, drop_prob=drop_prob, freeze_tag=freeze_tag) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vec, word_len, hidden_size, emb_size=500, drop_prob=0.): super(BiDAFChar2, self).__init__() self.emb = layers.EmbeddingWithChar(word_vectors=word_vectors, hidden_size=emb_size, char_vec=char_vec, word_len=word_len, drop_prob=drop_prob, char_prop=0.4, hwy_drop=drop_prob, char_dim=200, act='gelu') self.resize = nn.Linear(emb_size, hidden_size) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, model_name, char_vectors, hidden_size, drop_prob=0.): super(BiDAF2, self).__init__() self.hidden_size = hidden_size * 2 # adding the char embedding, double the hidden_size. self.emb = layers.Embedding(model_name=model_name, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) #input_size=self.hidden_size+2 is due to we add two extra features (avg_attention) to both char embedding #and word embedding to boost the performance. The avg_attention is use the attention mechanism to learn #a weighted average among the vectors by the model itself. self.enc = layers.RNNEncoder(input_size=self.hidden_size + 2, hidden_size=self.hidden_size, num_layers=1, drop_prob=drop_prob) self.highway = layers.HighwayEncoder(2, 4 * hidden_size) self.mod = layers.RNNEncoder(input_size=2 * self.hidden_size, hidden_size=self.hidden_size, num_layers=2, drop_prob=drop_prob) # self.sim = nn.CosineSimilarity(dim=1, eps=1e-6) self.qa_outputs = nn.Linear(2 * self.hidden_size, 2)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.proj_bert_down = nn.Linear(in_features=768, out_features=hidden_size, bias=True) nn.init.xavier_uniform_(self.proj_bert_down.weight, gain=1) self.proj_glove_down = nn.Linear(in_features=300, out_features=hidden_size, bias=True) nn.init.xavier_uniform_(self.proj_glove_down.weight, gain=1)
def __init__(self, word_vectors, word_vectors_char, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, word_vectors_char=word_vectors_char, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, use_pos, use_ner, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.use_pos = use_pos self.use_ner = use_ner rnn_input_size = hidden_size if use_pos: rnn_input_size += 1 if use_ner: rnn_input_size += 1 self.enc = layers.RNNEncoder(input_size=rnn_input_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = hidden_size self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_vectors = char_vectors) # added character vectors self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) ### start our code: self.selfattention = layers.SelfAttention(input_size = 8 * hidden_size, hidden_size=hidden_size, dropout = 0.2) ### end our code self.linear = nn.Linear(in_features = 8*self.hidden_size, out_features = 2*self.hidden_size, bias=True) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=4, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() # print("vectors: ", word_vectors) self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.self_att = layers.SelfAttention(hidden_size=8 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob) self.batch_size = 64 self.hidden_size = hidden_size
def __init__(self, word_vectors, hidden_size, char_dict_size, char_emb_size, \ conv_kernel_size, conv_depth1, \ conv_output_hidden_size, drop_prob=0.): super(BiDAF_CBE, self).__init__() word_vectors, hidden_size, drop_prob, \ char_dict_size, char_emb_size, \ conv_kernel_size, conv_depth1, \ conv_output_hidden_size self.emb = layers.EmbeddingWithCharLevel(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob, char_dict_size=char_dict_size, char_emb_size=char_emb_size, conv_kernel_size=conv_kernel_size, conv_depth1=conv_depth1, conv_output_hidden_size=conv_output_hidden_size) self.enc = layers.RNNEncoder(input_size=2*hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, args): super(BiDAFExtra, self).__init__() self.c_emb = layers.EmbeddingExtra(word_vectors=word_vectors, args=args, aux_feat=True) self.q_emb = layers.EmbeddingExtra(word_vectors=word_vectors, args=args, aux_feat=False) self.c_enc = layers.RNNEncoder(input_size=args.hidden_size + args.num_features, hidden_size=args.hidden_size, num_layers=1, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.q_enc = layers.RNNEncoder(input_size=args.hidden_size, hidden_size=args.hidden_size, num_layers=1, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.att = layers.BiDAFAttention(hidden_size=2 * args.hidden_size, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.mod = layers.RNNEncoder(input_size=8 * args.hidden_size, hidden_size=args.hidden_size, num_layers=2, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.out = layers.BiDAFOutput(hidden_size=args.hidden_size, drop_prob=args.drop_prob if hasattr(args, 'drop_prob') else 0.) self.args = args
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF_charCNN, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.char_emb = layers.CharEmbedding(char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.hwy = layers.HighwayEncoder(2, 2*hidden_size) self.enc = layers.RNNEncoder(input_size=2*hidden_size, hidden_size=2*hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * 2*hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * 2*hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(SelfAttention_and_global, self).__init__() self.hidden_size = hidden_size self.emb = layers.Char_Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention( hidden_size=2 * hidden_size, drop_prob=drop_prob) # replace this with yours self.self_att = layers.SelfAttention(hidden_size=8 * hidden_size, drop_prob=drop_prob) self.second_mod = layers.RNNEncoder(input_size=16 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, att_size=16 * hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, num_heads=8, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) hidden_size *= 2 # update hidden size for other layers due to char embeddings self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, vectors, hidden_size, char_limit, use_transformer, use_GRU, drop_prob=.1, **kwargs): super(BiDAF, self).__init__() self.use_transformer = use_transformer self.use_GRU = use_GRU self.hidden_size = hidden_size self.emb = layers.Embedding(vectors=vectors, c2w_size=kwargs['c2w_size'], hidden_size=hidden_size, drop_prob=drop_prob, char_limit=char_limit) if not use_transformer: self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=1, drop_prob=drop_prob, use_GRU=use_GRU) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, # output = 2*hidden_size num_layers=2, drop_prob=drop_prob, use_GRU=use_GRU) self.out = layers.BiDAFOutput(hidden_size=2 * hidden_size, drop_prob=drop_prob, use_transformer=use_transformer) else: self.heads = kwargs['heads'] self.inter_size = kwargs['inter_size'] self.enc = layers.TransformerEncoderStack( N=kwargs['enc_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['enc_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.squeeze = layers.InitializedLayer(4*hidden_size, hidden_size, bias=False) self.mod = layers.TransformerEncoderStack( N=kwargs['mod_blocks'], heads=self.heads, input_size=hidden_size, output_size=hidden_size, inter_size=self.inter_size, num_conv=kwargs['mod_convs'], drop_prob=drop_prob, p_sdd=kwargs['p_sdd'] ) self.out = layers.QAOutput(2*hidden_size) self.att = layers.BiDAFAttention(hidden_size=(1 if self.use_transformer else 2)*hidden_size, drop_prob=drop_prob) # (batch_size, seq_len, 4*input_hidden_size)
def __init__(self, model_name, hidden_size, drop_prob=0.): super(LSTM_highway, self).__init__() self.albert = AlbertModel.from_pretrained(model_name) input_size = size_map[model_name] self.enc = layers.RNNEncoder(input_size=input_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.dec = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.highway = layers.HighwayEncoder(2, 2 * hidden_size) self.qa_outputs = nn.Linear(2 * hidden_size, 2)
def __init__(self, word_vectors, hidden_size, char_vectors, drop_prob=0.): super(Final_Model, self).__init__() self.hidden_size = hidden_size self.emb = layers.Char_Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.pointnetGlobal = layers.PointNet(hidden_size=hidden_size, kernel_size=1) self.WordCNN = layers.WordCNN(hidden_size=hidden_size, kernel_size=5, padding=2) self.enc_global = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) # self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, # drop_prob=drop_prob) # replace this with yours self.global_att = layers.GlobalBiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=10 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.self_att = layers.SelfAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.second_mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, att_size=4 * hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0., args=None): super(BiDAF, self).__init__() self.emb = layers.QAEmbedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.context_enc = layers.EncoderBlock( conv_layers=args.num_conv_layers, k=args.encoder_k, hidden_dim=hidden_size, dropout_prob=drop_prob, attention_heads=args.attention_heads) self.question_enc = layers.EncoderBlock( conv_layers=args.num_conv_layers, k=args.encoder_k, hidden_dim=hidden_size, dropout_prob=drop_prob, attention_heads=args.attention_heads) self.context_conv = DepthwiseSeparableConv2(500, hidden_size, 5) self.question_conv = DepthwiseSeparableConv2(500, hidden_size, 5) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.mod_enc = layers.EncoderBlock( conv_layers=2, k=5, hidden_dim=hidden_size, dropout_prob=drop_prob, attention_heads=args.attention_heads) self.cq_resizer = DepthwiseSeparableConv2(hidden_size * 4, hidden_size, 5) self.out = layers.BiDAFOutput2(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_to_idx, seq_max_len): self.use_cuda = False self.embedding_size = 300 self.encoder_hsz = self.embedding_size self.num_rnn_layers = 2 self.learning_rate = 0.01 self.rnn_dropout = 0.1 self.seq_max_len = seq_max_len self.dict = word_to_idx self.NULL_IDX = 0 self.embeds = nn.Embedding(len(self.dict), self.embedding_size, padding_idx=self.NULL_IDX) # self.embeds = nn.Linear(constants.MAX_LEN, self.embedding_size) self.encoder = layers.RNNEncoder(self.embedding_size, self.encoder_hsz, self.num_rnn_layers, self.rnn_dropout) self.mlp = layers.FeatureExtractor(self.encoder_hsz * self.seq_max_len, 2) self.optims = { 'encoder': optim.Adam(self.encoder.parameters(), lr=1e-5), 'mlp': optim.Adam(self.mlp.parameters(), lr=1e-5) } self.criterion = nn.NLLLoss()
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) # self.enc = layers.RNNEncoder(input_size=hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) # self.transformer = make_model(word_vectors, drop_prob, hidden_size) self.emb_enc = EncoderBlock(conv_num=4, ch_num=64, k=7) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) # TODO self.mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, hidden_size, drop_prob=0.): super(BiDAFBERTEmbeddings, self).__init__() self.emb = None # TODO: BertEmbeddings. self.enc = layers.RNNEncoder(input_size=None, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layer.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.1): super(BiDAF, self).__init__() self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) # self.enc = layers.RNNEncoder(input_size=hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) self.emb_encoder = layers.EmbeddingEncoder(d_model=hidden_size, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=4 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) # self.model_encoder = layers.ModelEncoder(d_model=hidden_size, drop_prob=drop_prob) # self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)
def __init__(self, embeddings: InputEmbeddings, hidden_size, drop_prob=0.): super(SLQA, self).__init__() word_vectors = embeddings.word_vectors char_vectors = embeddings.char_vectors self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = AlignedAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.p_fusion1 = FusionLayer(2 * hidden_size) self.q_fusion1 = FusionLayer(2 * hidden_size) self.p_enc_eq_13 = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.q_enc_eq_13 = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.self_attention = FusedSelfAttention(2 * hidden_size) self.p_enc_eq_17 = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.q_enc_eq_17 = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.q_linear_align_18 = LinearAlign(2 * hidden_size) self.bilinear_start = BilinearSeqAtt(2 * hidden_size, 2 * hidden_size) self.bilinear_end = BilinearSeqAtt(2 * hidden_size, 2 * hidden_size)
def __init__(self, hidden_size, drop_prob=0.): super(FlatResnet, self).__init__() hidden_size = 3 * hidden_size self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, is_bidirectional=True, drop_prob=drop_prob)
def __init__(self, weights_matrix, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.emb = layers.Embedding(weights_matrix=weights_matrix, hidden_size=hidden_size) self.enc = layers.RNNEncoder(input_size=hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0., enable_EM=True, enable_posner=True, enable_selfatt=True): super(BiDAF, self).__init__() self.embd_size = hidden_size self.d = self.embd_size * 2 # word_embedding + char_embedding self.enable_EM = enable_EM if enable_EM: self.d += 2 # word_feature if enable_posner: self.d += 10 # word_feature self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=self.embd_size, drop_prob=drop_prob, enable_posner=enable_posner) self.enc = layers.RNNEncoder(input_size=self.d, hidden_size=self.d, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * self.d, drop_prob=drop_prob) self.enable_selfatt = enable_selfatt if enable_selfatt: # self.selfMatch = layers.SelfMatcher(in_size = 8 * self.d, # drop_prob=drop_prob) self.selfMatch = layers.StaticDotAttention(memory_size = 2 * self.d, input_size = 2 * self.d, attention_size = 2 * self.d, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=4 * self.d, hidden_size=self.d, num_layers=2, drop_prob=drop_prob) else: self.mod = layers.RNNEncoder(input_size=2 * self.d, hidden_size=self.d, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=self.d, drop_prob=drop_prob)
def __init__(self, hidden_size, att_put_h_size, drop_prob): super(BiDAFOutput_att, self).__init__() self.att_linear_1 = nn.Linear(att_put_h_size, 1) self.mod_linear_1 = nn.Linear(2 * hidden_size, 1) self.rnn = layers.RNNEncoder(input_size=2 * hidden_size, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att_linear_2 = nn.Linear(att_put_h_size, 1) self.mod_linear_2 = nn.Linear(2 * hidden_size, 1)
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = hidden_size self.word_emb = layers.WordEmbedding(word_vectors, hidden_size) self.char_emb = layers.CharEmbedding(char_vectors, hidden_size) # assert hidden_size * 2 == (char_channel_size + word_dim) # highway network self.hwy = layers.HighwayEncoder(2, hidden_size * 2) # highway network # for i in range(2): # setattr(self, f'highway_linear{i}', nn.Sequential( # nn.Linear(hidden_size * 2, hidden_size * 2), nn.ReLU())) # setattr(self, f'hightway_gate{i}', nn.Sequential( # nn.Linear(hidden_size * 2, hidden_size * 2), nn.Sigmoid())) # self.emb = layers.Embedding(word_vectors=word_vectors, # hidden_size=hidden_size, # drop_prob=drop_prob) self.enc = layers.RNNEncoder(input_size=hidden_size * 2, hidden_size=hidden_size, num_layers=1, drop_prob=drop_prob) self.att = layers.BiDAFAttention(hidden_size=2 * hidden_size, drop_prob=drop_prob) self.mod = layers.RNNEncoder(input_size=8 * hidden_size, hidden_size=hidden_size, num_layers=2, drop_prob=drop_prob) self.out = layers.BiDAFOutput(hidden_size=hidden_size, drop_prob=drop_prob)