예제 #1
0
def build_pair_sentence_module(task, d_inp, model, params):
    """ Build a pair classifier, shared if necessary """

    def build_pair_attn(d_in, d_hid_attn):
        """ Build the pair model """
        d_inp_model = 2 * d_in
        modeling_layer = s2s_e.by_name("lstm").from_params(
            Params(
                {
                    "input_size": d_inp_model,
                    "hidden_size": d_hid_attn,
                    "num_layers": 1,
                    "bidirectional": True,
                }
            )
        )
        pair_attn = AttnPairEncoder(model.vocab, modeling_layer, dropout=params["dropout"])
        return pair_attn

    # Build the "pooler", which does pools a variable length sequence
    #   possibly with a projection layer beforehand
    if params["attn"] and not model.use_bert:
        pooler = Pooler(project=False, d_inp=params["d_hid_attn"], d_proj=params["d_hid_attn"])
        d_out = params["d_hid_attn"] * 2
    else:
        pooler = Pooler(
            project=not model.use_bert,
            d_inp=d_inp,
            d_proj=params["d_proj"],
            pool_type=params["pool_type"],
        )
        d_out = d_inp if model.use_bert else params["d_proj"]

    # Build an attention module if necessary
    if params["shared_pair_attn"] and params["attn"] and not model.use_bert:  # shared attn
        if not hasattr(model, "pair_attn"):
            pair_attn = build_pair_attn(d_inp, params["d_hid_attn"])
            model.pair_attn = pair_attn
        else:
            pair_attn = model.pair_attn
    elif params["attn"] and not model.use_bert:  # non-shared attn
        pair_attn = build_pair_attn(d_inp, params["d_hid_attn"])
    else:  # no attn
        pair_attn = None

    # Build the classifier
    n_classes = task.n_classes if hasattr(task, "n_classes") else 1
    if model.use_bert:
        # BERT handles pair tasks by concatenating the inputs and classifying the joined
        # sequence, so we use a single sentence classifier
        if isinstance(task, WiCTask):
            d_out *= 3  # also pass the two contextual word representations
        classifier = Classifier.from_params(d_out, n_classes, params)
        module = SingleClassifier(pooler, classifier)
    else:
        d_out = d_out + d_inp if isinstance(task, WiCTask) else d_out
        classifier = Classifier.from_params(4 * d_out, n_classes, params)
        module = PairClassifier(pooler, classifier, pair_attn)
    return module
예제 #2
0
def build_multiple_choice_module(task, d_sent, use_bert, params):
    """ Basic parts for MC task: reduce a vector representation for each model into a scalar. """
    pooler = Pooler(
        project=not use_bert, d_inp=d_sent, d_proj=params["d_proj"], pool_type=params["pool_type"]
    )
    d_out = d_sent if use_bert else params["d_proj"]
    choice2scalar = Classifier(d_out, n_classes=1, cls_type=params["cls_type"])
    return SingleClassifier(pooler, choice2scalar)
예제 #3
0
def build_qa_module(task, d_inp, use_bert, params):
    """ Build a simple QA module that
    1) pools representations (either of the joint (context, question, answer) or individually
    2) projects down to two logits
    3) classifier

    This module models each question-answer pair _individually_ """
    pooler = Pooler(
        project=not use_bert, d_inp=d_inp, d_proj=params["d_proj"], pool_type=params["pool_type"]
    )
    d_out = d_inp if use_bert else params["d_proj"]
    classifier = Classifier.from_params(d_out, 2, params)
    return SingleClassifier(pooler, classifier)
예제 #4
0
def build_single_sentence_module(task, d_inp: int, use_bert: bool, params: Params):
    """ Build a single sentence classifier

    args:
        - task (Task): task object, used to get the number of output classes
        - d_inp (int): input dimension to the module, needed for optional linear projection
        - use_bert (bool): if using BERT, skip projection before pooling.
        - params (Params): Params object with task-specific parameters

    returns:
        - SingleClassifier (nn.Module): single-sentence classifier consisting of
            (optional) a linear projection, pooling, and an MLP classifier
    """
    pooler = Pooler(
        project=not use_bert, d_inp=d_inp, d_proj=params["d_proj"], pool_type=params["pool_type"]
    )
    d_out = d_inp if use_bert else params["d_proj"]
    classifier = Classifier.from_params(d_out, task.n_classes, params)
    module = SingleClassifier(pooler, classifier)
    return module
예제 #5
0
    def __init__(
        self,
        vocab: Vocabulary,
        input_dim: int,
        decoder_hidden_size: int,
        max_decoding_steps: int,
        output_proj_input_dim: int,
        target_namespace: str = "targets",
        target_embedding_dim: int = None,
        attention: str = "none",
        dropout: float = 0.0,
        scheduled_sampling_ratio: float = 0.0,
    ) -> None:
        super(Seq2SeqDecoder, self).__init__(vocab)

        self._max_decoding_steps = max_decoding_steps
        self._target_namespace = target_namespace

        # We need the start symbol to provide as the input at the first timestep of decoding, and
        # end symbol as a way to indicate the end of the decoded sequence.
        self._start_index = self.vocab.get_token_index(START_SYMBOL,
                                                       self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL,
                                                     self._target_namespace)
        self._unk_index = self.vocab.get_token_index("@@UNKNOWN@@",
                                                     self._target_namespace)
        num_classes = self.vocab.get_vocab_size(self._target_namespace)

        # Decoder output dim needs to be the same as the encoder output dim since we initialize the
        # hidden state of the decoder with that of the final hidden states of the encoder. Also, if
        # we're using attention with ``DotProductSimilarity``, this is needed.
        self._encoder_output_dim = input_dim
        self._decoder_hidden_dim = decoder_hidden_size
        if self._encoder_output_dim != self._decoder_hidden_dim:
            self._projection_encoder_out = Linear(self._encoder_output_dim,
                                                  self._decoder_hidden_dim)
        else:
            self._projection_encoder_out = lambda x: x
        self._decoder_output_dim = self._decoder_hidden_dim
        self._output_proj_input_dim = output_proj_input_dim
        self._target_embedding_dim = target_embedding_dim
        self._target_embedder = Embedding(num_classes,
                                          self._target_embedding_dim)

        # Used to get an initial hidden state from the encoder states
        self._sent_pooler = Pooler(project=True,
                                   d_inp=input_dim,
                                   d_proj=decoder_hidden_size)

        if attention == "Bahdanau":
            self._decoder_attention = BahdanauAttention(
                decoder_hidden_size + target_embedding_dim, input_dim)
            # The output of attention, a weighted average over encoder outputs, will be
            # concatenated to the input vector of the decoder at each time
            # step.
            self._decoder_input_dim = input_dim + target_embedding_dim
        elif attention == "bilinear":
            self._decoder_attention = BilinearAttention(
                decoder_hidden_size + target_embedding_dim, input_dim)
            # The output of attention, a weighted average over encoder outputs, will be
            # concatenated to the input vector of the decoder at each time
            # step.
            self._decoder_input_dim = input_dim + target_embedding_dim
        elif attention == "none":
            self._decoder_attention = None
            self._decoder_input_dim = target_embedding_dim
        else:
            raise Exception("attention not implemented {}".format(attention))

        self._decoder_cell = LSTMCell(self._decoder_input_dim,
                                      self._decoder_hidden_dim)
        # Allow for a bottleneck layer between encoder outputs and distribution over vocab
        # The bottleneck layer consists of a linear transform and helps to reduce
        # number of parameters
        if self._output_proj_input_dim != self._decoder_output_dim:
            self._projection_bottleneck = Linear(self._decoder_output_dim,
                                                 self._output_proj_input_dim)
        else:
            self._projection_bottleneck = lambda x: x
        self._output_projection_layer = Linear(self._output_proj_input_dim,
                                               num_classes)
        self._dropout = torch.nn.Dropout(p=dropout)
예제 #6
0
def build_image_sent_module(task, d_inp, params):
    pooler = Pooler(project=True, d_inp=d_inp, d_proj=params["d_proj"])
    return pooler