Example #1
0
def build_pair_sentence_module(task, d_inp, model, params):
    """ Build a pair classifier, shared if necessary """

    def build_pair_attn(d_in, d_hid_attn):
        """ Build the pair model """
        d_inp_model = 2 * d_in
        modeling_layer = s2s_e.by_name("lstm").from_params(
            Params(
                {
                    "input_size": d_inp_model,
                    "hidden_size": d_hid_attn,
                    "num_layers": 1,
                    "bidirectional": True,
                }
            )
        )
        pair_attn = AttnPairEncoder(model.vocab, modeling_layer, dropout=params["dropout"])
        return pair_attn

    # Build the "pooler", which does pools a variable length sequence
    #   possibly with a projection layer beforehand
    if params["attn"] and not model.use_bert:
        pooler = Pooler(project=False, d_inp=params["d_hid_attn"], d_proj=params["d_hid_attn"])
        d_out = params["d_hid_attn"] * 2
    else:
        pooler = Pooler(
            project=not model.use_bert,
            d_inp=d_inp,
            d_proj=params["d_proj"],
            pool_type=params["pool_type"],
        )
        d_out = d_inp if model.use_bert else params["d_proj"]

    # Build an attention module if necessary
    if params["shared_pair_attn"] and params["attn"] and not model.use_bert:  # shared attn
        if not hasattr(model, "pair_attn"):
            pair_attn = build_pair_attn(d_inp, params["d_hid_attn"])
            model.pair_attn = pair_attn
        else:
            pair_attn = model.pair_attn
    elif params["attn"] and not model.use_bert:  # non-shared attn
        pair_attn = build_pair_attn(d_inp, params["d_hid_attn"])
    else:  # no attn
        pair_attn = None

    # Build the classifier
    n_classes = task.n_classes if hasattr(task, "n_classes") else 1
    if model.use_bert:
        # BERT handles pair tasks by concatenating the inputs and classifying the joined
        # sequence, so we use a single sentence classifier
        if isinstance(task, WiCTask):
            d_out *= 3  # also pass the two contextual word representations
        classifier = Classifier.from_params(d_out, n_classes, params)
        module = SingleClassifier(pooler, classifier)
    else:
        d_out = d_out + d_inp if isinstance(task, WiCTask) else d_out
        classifier = Classifier.from_params(4 * d_out, n_classes, params)
        module = PairClassifier(pooler, classifier, pair_attn)
    return module
    def __init__(self, task, d_inp: int, task_params, num_spans=2):
        assert num_spans > 0, "Please set num_spans to be more than 0"
        super(SpanClassifierModule, self).__init__()
        # Set config options needed for forward pass.
        self.loss_type = task_params["cls_loss_fn"]
        self.span_pooling = task_params["cls_span_pooling"]
        self.cnn_context = task_params.get("cnn_context", 0)
        self.num_spans = num_spans
        self.proj_dim = task_params["d_hid"]
        self.projs = torch.nn.ModuleList()

        for i in range(num_spans):
            # create a word-level pooling layer operator
            proj = self._make_cnn_layer(d_inp)
            self.projs.append(proj)
        self.span_extractors = torch.nn.ModuleList()

        # Lee's self-pooling operator (https://arxiv.org/abs/1812.10860)
        for i in range(num_spans):
            span_extractor = self._make_span_extractor()
            self.span_extractors.append(span_extractor)

        # Classifier gets concatenated projections of spans.
        clf_input_dim = self.span_extractors[1].get_output_dim() * num_spans
        self.classifier = Classifier.from_params(clf_input_dim, task.n_classes,
                                                 task_params)
Example #3
0
def build_multiple_choice_module(task, d_sent, use_bert, params):
    """ Basic parts for MC task: reduce a vector representation for each model into a scalar. """
    pooler = Pooler(
        project=not use_bert, d_inp=d_sent, d_proj=params["d_proj"], pool_type=params["pool_type"]
    )
    d_out = d_sent if use_bert else params["d_proj"]
    choice2scalar = Classifier(d_out, n_classes=1, cls_type=params["cls_type"])
    return SingleClassifier(pooler, choice2scalar)
Example #4
0
    def __init__(self, task, d_inp: int, task_params):
        super(EdgeClassifierModule, self).__init__()
        # Set config options needed for forward pass.
        self.loss_type = task_params["cls_loss_fn"]
        self.span_pooling = task_params["cls_span_pooling"]
        self.cnn_context = task_params["edgeprobe_cnn_context"]
        self.is_symmetric = task_params["edgeprobe_symmetric"]
        self.single_sided = task.single_sided

        self.proj_dim = task_params["d_hid"]
        # Separate projection for span1, span2.
        # Convolution allows using local context outside the span, with
        # cnn_context = 0 behaving as a per-word linear layer.
        # Use these to reduce dimensionality in case we're enumerating a lot of
        # spans - we want to do this *before* extracting spans for greatest
        # efficiency.
        self.proj1 = self._make_cnn_layer(d_inp)
        if self.is_symmetric or self.single_sided:
            # Use None as dummy padding for readability,
            # so that we can index projs[1] and projs[2]
            self.projs = [None, self.proj1, self.proj1]
        else:
            # Separate params for span2
            self.proj2 = self._make_cnn_layer(d_inp)
            self.projs = [None, self.proj1, self.proj2]

        # Span extractor, shared for both span1 and span2.
        self.span_extractor1 = self._make_span_extractor()
        if self.is_symmetric or self.single_sided:
            self.span_extractors = [None, self.span_extractor1, self.span_extractor1]
        else:
            self.span_extractor2 = self._make_span_extractor()
            self.span_extractors = [None, self.span_extractor1, self.span_extractor2]

        # Classifier gets concatenated projections of span1, span2
        clf_input_dim = self.span_extractors[1].get_output_dim()
        if not self.single_sided: # nonlinear for two spans
            clf_input_dim += self.span_extractors[2].get_output_dim()
            self.classifier = Classifier.from_params(clf_input_dim, task.n_classes, task_params)
        else:  # linear for 1 span
            task_params['cls_type'] = 'log_reg'
            self.classifier = Classifier.from_params(clf_input_dim, task.n_classes, task_params)
Example #5
0
def build_qa_module(task, d_inp, use_bert, params):
    """ Build a simple QA module that
    1) pools representations (either of the joint (context, question, answer) or individually
    2) projects down to two logits
    3) classifier

    This module models each question-answer pair _individually_ """
    pooler = Pooler(
        project=not use_bert, d_inp=d_inp, d_proj=params["d_proj"], pool_type=params["pool_type"]
    )
    d_out = d_inp if use_bert else params["d_proj"]
    classifier = Classifier.from_params(d_out, 2, params)
    return SingleClassifier(pooler, classifier)
Example #6
0
def build_single_sentence_module(task, d_inp: int, use_bert: bool, params: Params):
    """ Build a single sentence classifier

    args:
        - task (Task): task object, used to get the number of output classes
        - d_inp (int): input dimension to the module, needed for optional linear projection
        - use_bert (bool): if using BERT, skip projection before pooling.
        - params (Params): Params object with task-specific parameters

    returns:
        - SingleClassifier (nn.Module): single-sentence classifier consisting of
            (optional) a linear projection, pooling, and an MLP classifier
    """
    pooler = Pooler(
        project=not use_bert, d_inp=d_inp, d_proj=params["d_proj"], pool_type=params["pool_type"]
    )
    d_out = d_inp if use_bert else params["d_proj"]
    classifier = Classifier.from_params(d_out, task.n_classes, params)
    module = SingleClassifier(pooler, classifier)
    return module