def __init__(self, hidden_size, min_window_size=5, n_windows=4): super(KnowledgeSelector, self).__init__() self.min_window_size=min_window_size self.n_windows=n_windows self.b_highway = Highway(hidden_size * 2, hidden_size*2, num_layers=2) self.c_highway = Highway(hidden_size * 2, hidden_size*2, num_layers=2) self.match_attn = BilinearAttention(query_size=hidden_size*2, key_size=hidden_size*2, hidden_size=hidden_size*2) self.area_attn = BilinearAttention(query_size=hidden_size, key_size=hidden_size, hidden_size=hidden_size)
def __init__(self): super(Network, self).__init__() self.fcm = nn.Linear(1881, 256) self.fcc = nn.Linear(7, 256) self.fcg = nn.Linear(60483, 256) self.highway = Highway(256, 10, f=F.relu) self.fc2 = nn.Linear(256, 2) self.fcd = nn.Linear(256, 1) self.bn1 = nn.BatchNorm1d(256) self.bn2 = nn.BatchNorm1d(256) self.bn3 = nn.BatchNorm1d(1, affine=True)
def __init__(self): super(Network, self).__init__() self.fcm = nn.Linear(1881, 256) # ==== OLD ==== # self.fcc = nn.Linear(7, 256) # ==== NEW ==== # the input dimension = nr of clinical variables self.fcc = nn.Linear(4, 256) # == END NEW == self.fcg = nn.Linear(60483, 256) self.highway = Highway(256, 10, f=F.relu) self.fc2 = nn.Linear(256, 2) self.fcd = nn.Linear(256, 1) self.bn1 = nn.BatchNorm1d(256) self.bn2 = nn.BatchNorm1d(256) self.bn3 = nn.BatchNorm1d(1, affine=True)
def build_graph(self): """Builds the main part of the graph for the model, starting from the input embeddings to the final distributions for the answer span. Defines: self.logits_start, self.logits_end: Both tensors shape (batch_size, context_len). These are the logits (i.e. values that are fed into the softmax function) for the start and end distribution. Important: these are -large in the pad locations. Necessary for when we feed into the cross entropy function. self.probdist_start, self.probdist_end: Both shape (batch_size, context_len). Each row sums to 1. These are the result of taking (masked) softmax of logits_start and logits_end. """ """ # Use a RNN to get hidden states for the context and the question # Note: here the RNNEncoder is shared (i.e. the weights are the same) # between the context and the question. encoder = RNNEncoder(self.FLAGS.hidden_size, self.keep_prob) context_hiddens = encoder.build_graph(self.context_embs, self.context_mask) # (batch_size, context_len, hidden_size*2) question_hiddens = encoder.build_graph(self.qn_embs, self.qn_mask) # (batch_size, question_len, hidden_size*2) # Use context hidden states to attend to question hidden states attn_layer = BasicAttn(self.keep_prob, self.FLAGS.hidden_size*2, self.FLAGS.hidden_size*2) _, attn_output = attn_layer.build_graph(question_hiddens, self.qn_mask, context_hiddens) # attn_output is shape (batch_size, context_len, hidden_size*2) # Concat attn_output to context_hiddens to get blended_reps blended_reps = tf.concat([context_hiddens, attn_output], axis=2) # (batch_size, context_len, hidden_size*4) # Apply fully connected layer to each blended representation # Note, blended_reps_final corresponds to b' in the handout # Note, tf.contrib.layers.fully_connected applies a ReLU non-linarity here by default blended_reps_final = tf.contrib.layers.fully_connected(blended_reps, num_outputs=self.FLAGS.hidden_size) # blended_reps_final is shape (batch_size, context_len, hidden_size) # Use softmax layer to compute probability distribution for start location # Note this produces self.logits_start and self.probdist_start, both of which have shape (batch_size, context_len) with vs.variable_scope("StartDist"): softmax_layer_start = SimpleSoftmaxLayer() self.logits_start, self.probdist_start = softmax_layer_start.build_graph(blended_reps_final, self.context_mask) # Use softmax layer to compute probability distribution for end location # Note this produces self.logits_end and self.probdist_end, both of which have shape (batch_size, context_len) with vs.variable_scope("EndDist"): softmax_layer_end = SimpleSoftmaxLayer() self.logits_end, self.probdist_end = softmax_layer_end.build_graph(blended_reps_final, self.context_mask) """ # Highway network for word embeddings highway_cn = Highway(self.FLAGS.embedding_size) self.context_embs = highway_cn.build_graph( self.context_embs ) # we do not use masking because the output is bassed to an encoder that applies masking itself highway_qn = Highway(self.FLAGS.embedding_size) self.qn_embs = highway_qn.build_graph(self.qn_embs) # between the context and the question. encoder = RNNEncoder(self.FLAGS.hidden_size, self.keep_prob) context_hiddens = encoder.build_graph( self.context_embs, self.context_mask) # (batch_size, context_len, hidden_size*2) question_hiddens = encoder.build_graph( self.qn_embs, self.qn_mask) # (batch_size, question_len, hidden_size*2) # Use context hidden states to attend to question hidden states attn_layer = BiDAFAttn(self.FLAGS.hidden_size, self.FLAGS.context_len, self.FLAGS.question_len) attn_output = attn_layer.build_graph( context_hiddens, question_hiddens, self.context_mask, self.qn_mask ) # attn_output is shape (batch_size, context_len, hidden_size*2) # Modeling layer modeling_layer = Modeling(self.FLAGS.hidden_size) modeling_output = modeling_layer.build_graph(attn_output, self.context_mask) # RNNDecoder decoder = RNNDecoder(self.FLAGS.hidden_size) start_features, end_features = decoder.build_graph( modeling_output, attn_output, self.context_mask) with vs.variable_scope("StartDist"): softmax_layer_start = SimpleSoftmaxLayer() self.logits_start, self.probdist_start = softmax_layer_start.build_graph( start_features, self.context_mask) with vs.variable_scope("EndDist"): softmax_layer_end = SimpleSoftmaxLayer() self.logits_end, self.probdist_end = softmax_layer_end.build_graph( end_features, self.context_mask)