Ejemplo n.º 1
0
    def __init__(self, hidden_size, min_window_size=5, n_windows=4):
        super(KnowledgeSelector, self).__init__()
        self.min_window_size=min_window_size
        self.n_windows=n_windows

        self.b_highway = Highway(hidden_size * 2, hidden_size*2, num_layers=2)
        self.c_highway = Highway(hidden_size * 2, hidden_size*2, num_layers=2)
        self.match_attn = BilinearAttention(query_size=hidden_size*2, key_size=hidden_size*2, hidden_size=hidden_size*2)
        self.area_attn = BilinearAttention(query_size=hidden_size, key_size=hidden_size, hidden_size=hidden_size)
Ejemplo n.º 2
0
    def __init__(self):
        super(Network, self).__init__()

        self.fcm = nn.Linear(1881, 256)
        self.fcc = nn.Linear(7, 256)
        self.fcg = nn.Linear(60483, 256)
        self.highway = Highway(256, 10, f=F.relu)
        self.fc2 = nn.Linear(256, 2)
        self.fcd = nn.Linear(256, 1)
        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(256)
        self.bn3 = nn.BatchNorm1d(1, affine=True)
    def __init__(self):
        super(Network, self).__init__()

        self.fcm = nn.Linear(1881, 256)
        # ==== OLD ====
        # self.fcc = nn.Linear(7, 256)
        # ==== NEW ====
        # the input dimension = nr of clinical variables
        self.fcc = nn.Linear(4, 256)
        # == END NEW ==
        self.fcg = nn.Linear(60483, 256)
        self.highway = Highway(256, 10, f=F.relu)
        self.fc2 = nn.Linear(256, 2)
        self.fcd = nn.Linear(256, 1)
        self.bn1 = nn.BatchNorm1d(256)
        self.bn2 = nn.BatchNorm1d(256)
        self.bn3 = nn.BatchNorm1d(1, affine=True)
Ejemplo n.º 4
0
    def build_graph(self):
        """Builds the main part of the graph for the model, starting from the input embeddings to the final distributions for the answer span.

        Defines:
          self.logits_start, self.logits_end: Both tensors shape (batch_size, context_len).
            These are the logits (i.e. values that are fed into the softmax function) for the start and end distribution.
            Important: these are -large in the pad locations. Necessary for when we feed into the cross entropy function.
          self.probdist_start, self.probdist_end: Both shape (batch_size, context_len). Each row sums to 1.
            These are the result of taking (masked) softmax of logits_start and logits_end.
        """
        """
        # Use a RNN to get hidden states for the context and the question
        # Note: here the RNNEncoder is shared (i.e. the weights are the same)
        # between the context and the question.
        encoder = RNNEncoder(self.FLAGS.hidden_size, self.keep_prob)
        context_hiddens = encoder.build_graph(self.context_embs, self.context_mask) # (batch_size, context_len, hidden_size*2)
        question_hiddens = encoder.build_graph(self.qn_embs, self.qn_mask) # (batch_size, question_len, hidden_size*2)

        # Use context hidden states to attend to question hidden states
        attn_layer = BasicAttn(self.keep_prob, self.FLAGS.hidden_size*2, self.FLAGS.hidden_size*2)
        _, attn_output = attn_layer.build_graph(question_hiddens, self.qn_mask, context_hiddens) # attn_output is shape (batch_size, context_len, hidden_size*2)

        # Concat attn_output to context_hiddens to get blended_reps
        blended_reps = tf.concat([context_hiddens, attn_output], axis=2) # (batch_size, context_len, hidden_size*4)

        # Apply fully connected layer to each blended representation
        # Note, blended_reps_final corresponds to b' in the handout
        # Note, tf.contrib.layers.fully_connected applies a ReLU non-linarity here by default
        blended_reps_final = tf.contrib.layers.fully_connected(blended_reps, num_outputs=self.FLAGS.hidden_size) # blended_reps_final is shape (batch_size, context_len, hidden_size)

        # Use softmax layer to compute probability distribution for start location
        # Note this produces self.logits_start and self.probdist_start, both of which have shape (batch_size, context_len)
        with vs.variable_scope("StartDist"):
            softmax_layer_start = SimpleSoftmaxLayer()
            self.logits_start, self.probdist_start = softmax_layer_start.build_graph(blended_reps_final, self.context_mask)

        # Use softmax layer to compute probability distribution for end location
        # Note this produces self.logits_end and self.probdist_end, both of which have shape (batch_size, context_len)
        with vs.variable_scope("EndDist"):
            softmax_layer_end = SimpleSoftmaxLayer()
            self.logits_end, self.probdist_end = softmax_layer_end.build_graph(blended_reps_final, self.context_mask)
        """

        # Highway network for word embeddings
        highway_cn = Highway(self.FLAGS.embedding_size)
        self.context_embs = highway_cn.build_graph(
            self.context_embs
        )  # we do not use masking because the output is bassed to an encoder that applies masking itself

        highway_qn = Highway(self.FLAGS.embedding_size)
        self.qn_embs = highway_qn.build_graph(self.qn_embs)

        # between the context and the question.
        encoder = RNNEncoder(self.FLAGS.hidden_size, self.keep_prob)
        context_hiddens = encoder.build_graph(
            self.context_embs,
            self.context_mask)  # (batch_size, context_len, hidden_size*2)
        question_hiddens = encoder.build_graph(
            self.qn_embs,
            self.qn_mask)  # (batch_size, question_len, hidden_size*2)

        # Use context hidden states to attend to question hidden states
        attn_layer = BiDAFAttn(self.FLAGS.hidden_size, self.FLAGS.context_len,
                               self.FLAGS.question_len)
        attn_output = attn_layer.build_graph(
            context_hiddens, question_hiddens, self.context_mask, self.qn_mask
        )  # attn_output is shape (batch_size, context_len, hidden_size*2)

        # Modeling layer
        modeling_layer = Modeling(self.FLAGS.hidden_size)
        modeling_output = modeling_layer.build_graph(attn_output,
                                                     self.context_mask)

        # RNNDecoder
        decoder = RNNDecoder(self.FLAGS.hidden_size)
        start_features, end_features = decoder.build_graph(
            modeling_output, attn_output, self.context_mask)

        with vs.variable_scope("StartDist"):
            softmax_layer_start = SimpleSoftmaxLayer()
            self.logits_start, self.probdist_start = softmax_layer_start.build_graph(
                start_features, self.context_mask)

        with vs.variable_scope("EndDist"):
            softmax_layer_end = SimpleSoftmaxLayer()
            self.logits_end, self.probdist_end = softmax_layer_end.build_graph(
                end_features, self.context_mask)