def build_pair_sentence_module(task, d_inp, model, params): """ Build a pair classifier, shared if necessary """ def build_pair_attn(d_in, d_hid_attn): """ Build the pair model """ d_inp_model = 2 * d_in modeling_layer = s2s_e.by_name("lstm").from_params( Params( { "input_size": d_inp_model, "hidden_size": d_hid_attn, "num_layers": 1, "bidirectional": True, } ) ) pair_attn = AttnPairEncoder(model.vocab, modeling_layer, dropout=params["dropout"]) return pair_attn # Build the "pooler", which does pools a variable length sequence # possibly with a projection layer beforehand if params["attn"] and not model.use_bert: pooler = Pooler(project=False, d_inp=params["d_hid_attn"], d_proj=params["d_hid_attn"]) d_out = params["d_hid_attn"] * 2 else: pooler = Pooler( project=not model.use_bert, d_inp=d_inp, d_proj=params["d_proj"], pool_type=params["pool_type"], ) d_out = d_inp if model.use_bert else params["d_proj"] # Build an attention module if necessary if params["shared_pair_attn"] and params["attn"] and not model.use_bert: # shared attn if not hasattr(model, "pair_attn"): pair_attn = build_pair_attn(d_inp, params["d_hid_attn"]) model.pair_attn = pair_attn else: pair_attn = model.pair_attn elif params["attn"] and not model.use_bert: # non-shared attn pair_attn = build_pair_attn(d_inp, params["d_hid_attn"]) else: # no attn pair_attn = None # Build the classifier n_classes = task.n_classes if hasattr(task, "n_classes") else 1 if model.use_bert: # BERT handles pair tasks by concatenating the inputs and classifying the joined # sequence, so we use a single sentence classifier if isinstance(task, WiCTask): d_out *= 3 # also pass the two contextual word representations classifier = Classifier.from_params(d_out, n_classes, params) module = SingleClassifier(pooler, classifier) else: d_out = d_out + d_inp if isinstance(task, WiCTask) else d_out classifier = Classifier.from_params(4 * d_out, n_classes, params) module = PairClassifier(pooler, classifier, pair_attn) return module
def __init__(self, task, d_inp: int, task_params, num_spans=2): assert num_spans > 0, "Please set num_spans to be more than 0" super(SpanClassifierModule, self).__init__() # Set config options needed for forward pass. self.loss_type = task_params["cls_loss_fn"] self.span_pooling = task_params["cls_span_pooling"] self.cnn_context = task_params.get("cnn_context", 0) self.num_spans = num_spans self.proj_dim = task_params["d_hid"] self.projs = torch.nn.ModuleList() for i in range(num_spans): # create a word-level pooling layer operator proj = self._make_cnn_layer(d_inp) self.projs.append(proj) self.span_extractors = torch.nn.ModuleList() # Lee's self-pooling operator (https://arxiv.org/abs/1812.10860) for i in range(num_spans): span_extractor = self._make_span_extractor() self.span_extractors.append(span_extractor) # Classifier gets concatenated projections of spans. clf_input_dim = self.span_extractors[1].get_output_dim() * num_spans self.classifier = Classifier.from_params(clf_input_dim, task.n_classes, task_params)
def build_multiple_choice_module(task, d_sent, use_bert, params): """ Basic parts for MC task: reduce a vector representation for each model into a scalar. """ pooler = Pooler( project=not use_bert, d_inp=d_sent, d_proj=params["d_proj"], pool_type=params["pool_type"] ) d_out = d_sent if use_bert else params["d_proj"] choice2scalar = Classifier(d_out, n_classes=1, cls_type=params["cls_type"]) return SingleClassifier(pooler, choice2scalar)
def __init__(self, task, d_inp: int, task_params): super(EdgeClassifierModule, self).__init__() # Set config options needed for forward pass. self.loss_type = task_params["cls_loss_fn"] self.span_pooling = task_params["cls_span_pooling"] self.cnn_context = task_params["edgeprobe_cnn_context"] self.is_symmetric = task_params["edgeprobe_symmetric"] self.single_sided = task.single_sided self.proj_dim = task_params["d_hid"] # Separate projection for span1, span2. # Convolution allows using local context outside the span, with # cnn_context = 0 behaving as a per-word linear layer. # Use these to reduce dimensionality in case we're enumerating a lot of # spans - we want to do this *before* extracting spans for greatest # efficiency. self.proj1 = self._make_cnn_layer(d_inp) if self.is_symmetric or self.single_sided: # Use None as dummy padding for readability, # so that we can index projs[1] and projs[2] self.projs = [None, self.proj1, self.proj1] else: # Separate params for span2 self.proj2 = self._make_cnn_layer(d_inp) self.projs = [None, self.proj1, self.proj2] # Span extractor, shared for both span1 and span2. self.span_extractor1 = self._make_span_extractor() if self.is_symmetric or self.single_sided: self.span_extractors = [None, self.span_extractor1, self.span_extractor1] else: self.span_extractor2 = self._make_span_extractor() self.span_extractors = [None, self.span_extractor1, self.span_extractor2] # Classifier gets concatenated projections of span1, span2 clf_input_dim = self.span_extractors[1].get_output_dim() if not self.single_sided: # nonlinear for two spans clf_input_dim += self.span_extractors[2].get_output_dim() self.classifier = Classifier.from_params(clf_input_dim, task.n_classes, task_params) else: # linear for 1 span task_params['cls_type'] = 'log_reg' self.classifier = Classifier.from_params(clf_input_dim, task.n_classes, task_params)
def build_qa_module(task, d_inp, use_bert, params): """ Build a simple QA module that 1) pools representations (either of the joint (context, question, answer) or individually 2) projects down to two logits 3) classifier This module models each question-answer pair _individually_ """ pooler = Pooler( project=not use_bert, d_inp=d_inp, d_proj=params["d_proj"], pool_type=params["pool_type"] ) d_out = d_inp if use_bert else params["d_proj"] classifier = Classifier.from_params(d_out, 2, params) return SingleClassifier(pooler, classifier)
def build_single_sentence_module(task, d_inp: int, use_bert: bool, params: Params): """ Build a single sentence classifier args: - task (Task): task object, used to get the number of output classes - d_inp (int): input dimension to the module, needed for optional linear projection - use_bert (bool): if using BERT, skip projection before pooling. - params (Params): Params object with task-specific parameters returns: - SingleClassifier (nn.Module): single-sentence classifier consisting of (optional) a linear projection, pooling, and an MLP classifier """ pooler = Pooler( project=not use_bert, d_inp=d_inp, d_proj=params["d_proj"], pool_type=params["pool_type"] ) d_out = d_inp if use_bert else params["d_proj"] classifier = Classifier.from_params(d_out, task.n_classes, params) module = SingleClassifier(pooler, classifier) return module