Ejemplo n.º 1
0
    def __init__(self,
                 vocab,
                 feature_size: int,
                 max_span_width: int,
                 keep_rate: int,
                 mlp_dropout: float = 0.4,
                 embedder_type=None,
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(PrePruner, self).__init__(vocab, regularizer)
        self.keep_rate = keep_rate
        self.embedder = get_embeddings(embedder_type, self.vocab)
        self.ffn = FeedForward(300, 2, 300, F.relu, 0.5)
        embedding_dim = self.embedder.get_output_dim()

        self._span_extractor = PoolingSpanExtractor(
            embedding_dim,
            num_width_embeddings=max_span_width,
            span_width_embedding_dim=feature_size,
            bucket_widths=False)
        entity_feedforward = FeedForward(self._span_extractor.get_output_dim(),
                                         2, 150, F.relu, mlp_dropout)

        self.feedforward_scorer = torch.nn.Sequential(
            TimeDistributed(entity_feedforward),
            TimeDistributed(
                torch.nn.Linear(entity_feedforward.get_output_dim(), 1)),
        )
        self._lexical_dropout = torch.nn.Dropout(p=0.1)

        self.loss = torch.nn.BCELoss()
        self._metric_f1 = FBetaMeasure()
Ejemplo n.º 2
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 phrase_probability: FeedForward,
                 edge_probability: FeedForward,
                 premise_encoder: Seq2SeqEncoder,
                 edge_embedding: Embedding,
                 use_encoding_for_node: bool,
                 ignore_edges: bool,
                 attention_similarity: SimilarityFunction,
                 initializer: InitializerApplicator = InitializerApplicator()) -> None:
        super(TreeAttention, self).__init__(vocab)

        self._text_field_embedder = text_field_embedder
        self._premise_encoder = premise_encoder
        self._nodes_attention = SingleTimeDistributed(MatrixAttention(attention_similarity), 0)
        self._num_labels = vocab.get_vocab_size(namespace="labels")
        self._phrase_probability = TimeDistributed(phrase_probability)
        self._ignore_edges = ignore_edges
        if not self._ignore_edges:
            self._num_edges = vocab.get_vocab_size(namespace="edges")
            self._edge_probability = TimeDistributed(edge_probability)
            self._edge_embedding = edge_embedding
        self._use_encoding_for_node = use_encoding_for_node
        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Ejemplo n.º 3
0
    def forward(
        self, inputs: torch.Tensor
    ) -> Union[torch.Tensor, List[torch.Tensor]]:  # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        inputs: ``torch.Tensor``
            Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.

        Returns
        -------
        The ELMo representations for the input sequence, shape
        ``(batch_size, timesteps, embedding_dim)``
        """
        elmo_output = self._elmo(inputs)

        if self._num_output_representations == 1:
            elmo_representations = elmo_output['elmo_representations'][0]
            if self._projection:
                projection = self._projection
                for _ in range(elmo_representations.dim() - 2):
                    projection = TimeDistributed(projection)
                elmo_representations = projection(elmo_representations)
            return elmo_representations
        else:
            multi_elmo_representations = []
            for elmo_representations in elmo_output['elmo_representations']:
                if self._projection:
                    projection = self._projection
                    for _ in range(elmo_representations.dim() - 2):
                        projection = TimeDistributed(projection)
                    elmo_representations = projection(elmo_representations)
                multi_elmo_representations.append(elmo_representations)
            return multi_elmo_representations
    def forward(self, tokens, num_wrapping_dims: int = 0) -> torch.Tensor:
        embedded_representations = []
        keys = sorted(self._token_embedders.keys())
        for key in keys:
            # Note: need to use getattr here so that the pytorch voodoo
            # with submodules works with multiple GPUs.
            if key in ['tokens', 'elmo']:
                continue
            embedder = getattr(self, 'token_embedder_{}'.format(key))
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(embedder)
            token_vectors = embedder(tokens)
            #token_vectors = self.linear_layers[key](embedder(tokens))
            embedded_representations.append(token_vectors)

        concatenated_emb = torch.cat(embedded_representations, dim=-1)
        combined_emb = self.linear_layer(concatenated_emb)
        combined_emb = self.gamma * combined_emb
        #combined_emb = self.scalar_mix(embedded_representations)

        if self.use_glove:
            embedder = getattr(self, 'token_embedder_tokens')
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(self.glove_embedder)
            glove_emb = embedder(tokens['tokens'])
            combined_emb = torch.cat([combined_emb, glove_emb], dim=-1)

        if self.use_elmo:
            embedder = getattr(self, 'token_embedder_elmo')
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(self.elmo_embedder)
            elmo_emb = embedder(tokens['elmo'])
            combined_emb = torch.cat([combined_emb, elmo_emb], dim=-1)

        return combined_emb
Ejemplo n.º 5
0
 def __init__(self, embedding: Embedding, encoder: Seq2VecEncoder, dropout: float = 0.0) -> None:
     super(TokenBPEEncoder, self).__init__()
     self._embedding = TimeDistributed(embedding)
     self._encoder = TimeDistributed(encoder)
     if dropout > 0:
         self._dropout = torch.nn.Dropout(p=dropout)
     else:
         self._dropout = lambda x: x
 def __init__(self, embedding, encoder, dropout=0.0):
     super(TokenCharactersEncoder, self).__init__()
     self._embedding = TimeDistributed(embedding)
     self._encoder = TimeDistributed(encoder)
     if dropout > 0:
         self._dropout = torch.nn.Dropout(p=dropout)
     else:
         self._dropout = lambda x: x
Ejemplo n.º 7
0
    def __init__(
        self,
        highway_encoder: Seq2SeqEncoder,
        transform_gate_encoder: Seq2SeqEncoder,
        carry_gate_encoder: Optional[Seq2SeqEncoder] = None,
        projection: bool = True,
    ) -> None:
        stateful = highway_encoder.stateful or transform_gate_encoder.stateful
        check_dimensions_match(
            highway_encoder.get_input_dim(),
            transform_gate_encoder.get_input_dim(),
            "highway_encoder input dim",
            "transform_gate_encoder input dim",
        )
        if carry_gate_encoder is not None:
            stateful = stateful or carry_gate_encoder.stateful
            check_dimensions_match(
                highway_encoder.get_input_dim(),
                carry_gate_encoder.get_input_dim(),
                "highway_encoder input dim",
                "carry_gate_encoder input dim",
            )

        super().__init__(stateful=stateful)

        self._input_dim = highway_encoder.get_input_dim()
        self._highway_encoder = highway_encoder
        self._transform_gate_encoder = transform_gate_encoder
        self._carry_gate_encoder = carry_gate_encoder
        self._highway_projection: Optional[torch.nn.Module] = None
        self._transform_gate_projection: Optional[torch.nn.Module] = None
        self._carry_gate_projection: Optional[torch.nn.Module] = None
        if projection:
            self._highway_projection = TimeDistributed(  # type: ignore
                torch.nn.Linear(
                    highway_encoder.get_output_dim(),
                    highway_encoder.get_input_dim(),
                ))
            self._transform_gate_projection = TimeDistributed(  # type: ignore
                torch.nn.Linear(
                    transform_gate_encoder.get_output_dim(),
                    transform_gate_encoder.get_input_dim(),
                ), )
            if carry_gate_encoder is not None:
                self._carry_gate_projection = TimeDistributed(  # type: ignore
                    torch.nn.Linear(
                        carry_gate_encoder.get_output_dim(),
                        carry_gate_encoder.get_input_dim(),
                    ), )
        else:
            assert highway_encoder.get_output_dim() in (self._input_dim, 1)
            assert transform_gate_encoder.get_output_dim() in (self._input_dim,
                                                               1)
            if carry_gate_encoder is not None:
                assert carry_gate_encoder.get_output_dim() in (self._input_dim,
                                                               1)
Ejemplo n.º 8
0
 def project(self, embedded, input_type):
     if input_type == 'entity':
         if self._projection_entity:
             projection = self._projection_entity
             for _ in range(embedded.dim() - 2):
                 projection = TimeDistributed(projection)
             embedded = projection(embedded)
     elif input_type == 'predicate':
         if self._projection_predicate:
             projection = self._projection_predicate
             for _ in range(embedded.dim() - 2):
                 projection = TimeDistributed(projection)
             embedded = projection(embedded)
     return embedded
Ejemplo n.º 9
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 contextualizer: Seq2SeqEncoder,
                 forward_segmental_contextualizer: Seq2SeqEncoder,
                 backward_segmental_contextualizer: Seq2SeqEncoder,
                 label_feature_dim: int,
                 softmax_projection_dim: int,
                 label_namespace: str = "labels",
                 dropout: float = None,
                 num_samples: int = None,
                 sparse_embeddings: bool = False,
                 bidirectional: bool = True,
                 initializer: InitializerApplicator = None) -> None:
        super().__init__(vocab=vocab,
                         text_field_embedder=text_field_embedder,
                         contextualizer=contextualizer,
                         dropout=dropout,
                         num_samples=num_samples,
                         sparse_embeddings=sparse_embeddings,
                         bidirectional=bidirectional,
                         initializer=initializer)
        self._forward_segmental_contextualizer = forward_segmental_contextualizer
        self._backward_segmental_contextualizer = backward_segmental_contextualizer

        if num_samples is not None:
            self._softmax_loss = SampledSoftmaxLoss(
                num_words=vocab.get_vocab_size(),
                embedding_dim=softmax_projection_dim,
                num_samples=num_samples,
                sparse=sparse_embeddings)
        else:
            self._softmax_loss = _SoftmaxLoss(
                num_words=vocab.get_vocab_size(),
                embedding_dim=softmax_projection_dim)

        self.num_classes = self.vocab.get_vocab_size(label_namespace)
        self.label_feature_embedding = Embedding(self.num_classes,
                                                 label_feature_dim)

        base_dim = contextualizer.get_output_dim() // 2
        seg_dim = base_dim + label_feature_dim
        self._forward_dim = softmax_projection_dim

        self.pre_segmental_layer = TimeDistributed(
            Linear(seg_dim, softmax_projection_dim))
        self.projection_layer = TimeDistributed(
            Linear(base_dim * 2, softmax_projection_dim))
    def forward(
            self,  # pylint: disable=arguments-differ
            inputs: torch.Tensor,
            word_inputs: torch.Tensor = None) -> torch.Tensor:
        """
        Parameters
        ----------
        inputs: ``torch.Tensor``
            Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.
        word_inputs : ``torch.Tensor``, optional.
            If you passed a cached vocab, you can in addition pass a tensor of shape
            ``(batch_size, timesteps)``, which represent word ids which have been pre-cached.

        Returns
        -------
        The ELMo representations for the input sequence, shape
        ``(batch_size, timesteps, embedding_dim)``
        """
        elmo_output = self._elmo(inputs, word_inputs)
        elmo_representations = elmo_output['elmo_representations'][0]
        elmo_lstm_output = elmo_output['elmo_lstm_output']
        if self._projection:
            projection = self._projection
            for _ in range(elmo_representations.dim() - 2):
                projection = TimeDistributed(projection)
            elmo_representations = projection(elmo_representations)
        return elmo_representations, elmo_lstm_output
Ejemplo n.º 11
0
    def forward(self, tokens: torch.Tensor) -> torch.Tensor:
        original_size = tokens.size()
        tokens = util.combine_initial_dims(tokens)

        embedded = embedding(
            tokens,
            self.weight,
            padding_idx=self.padding_index,
            max_norm=self.max_norm,
            norm_type=self.norm_type,
            scale_grad_by_freq=self.scale_grad_by_freq,
            sparse=self.sparse,
        )

        # Now (if necessary) add back in the extra dimensions.
        embedded = util.uncombine_initial_dims(embedded, original_size)

        if self._projection:
            projection = self._projection
            for _ in range(embedded.dim() - 2):
                projection = TimeDistributed(projection)
            embedded = projection(embedded)

        # if adv_utils.is_adv_mode():
        #     info = adv_utils.get_gradient_info()
        #     grad_norm = torch.norm(info.last_bw, dim=-1, keepdim=True) + 1e-6
        #     delta = info.last_bw / grad_norm
        #     embedded += info.grd_step * delta
        return embedded
Ejemplo n.º 12
0
    def forward(
        self, text_field_input: TextFieldTensors, num_wrapping_dims: int = 0, **kwargs
    ) -> torch.Tensor:
        if self._token_embedders.keys() != text_field_input.keys():
            message = "Mismatched token keys: %s and %s" % (
                str(self._token_embedders.keys()),
                str(text_field_input.keys()),
            )
            raise ConfigurationError(message)

        embedded_representations = []
        for key in self._ordered_embedder_keys:
            embedder = getattr(self, "token_embedder_{}".format(key))
            forward_params_values = {}
            missing_tensor_args = set()
                if param in kwargs:
                    forward_params_values[param] = kwargs[param]
                else:

            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(embedder)

            tensors: Dict[str, torch.Tensor] = text_field_input[key]
            if len(tensors) == 1 and len(missing_tensor_args) == 1:
                token_vectors = embedder(list(tensors.values())[0], **forward_params_values)
            else:
                token_vectors = embedder(**tensors, **forward_params_values)
            if token_vectors is not None:
Ejemplo n.º 13
0
    def forward(self,
                elmo_tokens: torch.Tensor,
                word_inputs: torch.Tensor = None) -> torch.Tensor:
        """
        # Parameters

        elmo_tokens : `torch.Tensor`
            Shape `(batch_size, timesteps, 50)` of character ids representing the current batch.
        word_inputs : `torch.Tensor`, optional.
            If you passed a cached vocab, you can in addition pass a tensor of shape
            `(batch_size, timesteps)`, which represent word ids which have been pre-cached.

        # Returns

        `torch.Tensor`
            The ELMo representations for the input sequence, shape
            `(batch_size, timesteps, embedding_dim)`
        """
        elmo_output = self._elmo(elmo_tokens, word_inputs)
        elmo_representations = elmo_output["elmo_representations"][0]
        if self._projection:
            projection = self._projection
            for _ in range(elmo_representations.dim() - 2):
                projection = TimeDistributed(projection)
            elmo_representations = projection(elmo_representations)
        return elmo_representations
Ejemplo n.º 14
0
    def forward(self, tokens: torch.Tensor) -> torch.Tensor:
        # tokens may have extra dimensions (batch_size, d1, ..., dn, sequence_length),
        # but embedding expects (batch_size, sequence_length), so pass tokens to
        # util.combine_initial_dims (which is a no-op if there are no extra dimensions).
        # Remember the original size.
        original_size = tokens.size()
        tokens = util.combine_initial_dims(tokens)

        embedded = embedding(
            tokens,
            self.weight,
            padding_idx=self.padding_index,
            max_norm=self.max_norm,
            norm_type=self.norm_type,
            scale_grad_by_freq=self.scale_grad_by_freq,
            sparse=self.sparse,
        )

        # Now (if necessary) add back in the extra dimensions.
        embedded = util.uncombine_initial_dims(embedded, original_size)

        if self._projection:
            projection = self._projection
            for _ in range(embedded.dim() - 2):
                projection = TimeDistributed(projection)
            embedded = projection(embedded)
        return embedded
Ejemplo n.º 15
0
    def forward(self, inputs):  # pylint: disable=arguments-differ
        # inputs may have extra dimensions (batch_size, d1, ..., dn, sequence_length),
        # but embedding expects (batch_size, sequence_length), so pass inputs to
        # util.combine_initial_dims (which is a no-op if there are no extra dimensions).
        # Remember the original size.
        original_size = inputs.size()
        inputs = util.combine_initial_dims(inputs)

        embedded = embedding(inputs,
                             self.weight,
                             padding_idx=self.padding_index,
                             max_norm=self.max_norm,
                             norm_type=self.norm_type,
                             scale_grad_by_freq=self.scale_grad_by_freq,
                             sparse=self.sparse)

        # Now (if necessary) add back in the extra dimensions.
        embedded = util.uncombine_initial_dims(embedded, original_size)

        if self._projection:
            projection = self._projection
            for _ in range(embedded.dim() - 2):
                projection = TimeDistributed(projection)
            embedded = projection(embedded)
        return embedded
Ejemplo n.º 16
0
 def __init__(self, vocab: Vocabulary, glyph_config, encoder) -> None:
     super(GlyphEmbeddingWrapper, self).__init__()
     self.glyph_config = glyph_config
     self.glyph_config.idx2char = vocab._index_to_token['token_characters']
     self.glyph_embedding = CharGlyphEmbedding(self.glyph_config)
     self._encoder = TimeDistributed(encoder)
     self.using_glyph = True
 def forward(self, text_field_input: Dict[str, torch.Tensor], num_wrapping_dims: int = 0) -> torch.Tensor:
     if self._token_embedders.keys() != text_field_input.keys():
         if not self._allow_unmatched_keys:
             message = "Mismatched token keys: %s and %s" % (str(self._token_embedders.keys()),
                                                             str(text_field_input.keys()))
             raise ConfigurationError(message)
     embedded_representations = []
     keys = sorted(self._token_embedders.keys())
     for key in keys:
         # If we pre-specified a mapping explictly, use that.
         if self._embedder_to_indexer_map is not None:
             tensors = [text_field_input[indexer_key] for
                        indexer_key in self._embedder_to_indexer_map[key]]
         else:
             # otherwise, we assume the mapping between indexers and embedders
             # is bijective and just use the key directly.
             tensors = [text_field_input[key]]
         # Note: need to use getattr here so that the pytorch voodoo
         # with submodules works with multiple GPUs.
         embedder = getattr(self, 'token_embedder_{}'.format(key))
         for _ in range(num_wrapping_dims):
             embedder = TimeDistributed(embedder)
         token_vectors = embedder(*tensors)
         embedded_representations.append(token_vectors)
     return torch.cat(embedded_representations, dim=-1)
Ejemplo n.º 18
0
    def forward(self, text_field_input: Dict[str, torch.Tensor],
                classifier_name: str = "@pretrain@", num_wrapping_dims: int = 0) -> torch.Tensor:
        if self._token_embedders.keys() != text_field_input.keys():
            message = "Mismatched token keys: %s and %s" % (str(self._token_embedders.keys()),
                                                            str(text_field_input.keys()))
            raise ConfigurationError(message)
        embedded_representations = []
        keys = sorted(text_field_input.keys())
        for key in keys:
            tensor = text_field_input[key]
            # Note: need to use getattr here so that the pytorch voodoo
            # with submodules works with multiple GPUs.
            embedder = getattr(self, 'token_embedder_{}'.format(key))
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(embedder)
            token_vectors = embedder(tensor)

            # Changed vs original:
            # If we want separate scalars/task, figure out which representation to use, since
            # embedder create a representation for _all_ sets of scalars. This can be optimized
            # with more wrapper classes but we compute all of them for now.
            # The shared ELMo scalar weights version all use the @pretrain@ embeddings.
            # There must be at least as many ELMo representations as the highest index in
            # self.task_map, otherwise indexing will fail.
            if key == "elmo" and not self.elmo_chars_only:
                if self.sep_embs_for_skip:
                    token_vectors = token_vectors['elmo_representations'][self.task_map[classifier_name]]
                else:
                    token_vectors = token_vectors['elmo_representations'][self.task_map["@pretrain@"]]

            # optional projection step that we are ignoring.
            embedded_representations.append(token_vectors)
        return torch.cat(embedded_representations, dim=-1)
Ejemplo n.º 19
0
    def forward(
            self,  # pylint: disable=arguments-differ
            inputs: torch.Tensor,
            lang: str,
            word_inputs: torch.Tensor = None) -> torch.Tensor:
        """
        Parameters
        ----------
        inputs: ``torch.Tensor``
            Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch.
        lang : ``str``, , required.
            The language of the ELMo embedder to use.
        word_inputs : ``torch.Tensor``, optional.
            If you passed a cached vocab, you can in addition pass a tensor of shape
            ``(batch_size, timesteps)``, which represent word ids which have been pre-cached.

        Returns
        -------
        The ELMo representations for the given language for the input sequence, shape
        ``(batch_size, timesteps, embedding_dim)``
        """
        elmo = getattr(self, 'elmo_{}'.format(lang))
        elmo_output = elmo(inputs, word_inputs)
        elmo_representations = elmo_output['elmo_representations'][0]
        aligning = getattr(self, 'aligning_{}'.format(lang))
        elmo_representations = aligning(elmo_representations)
        if self._projection:
            projection = self._projection
            for _ in range(elmo_representations.dim() - 2):
                projection = TimeDistributed(projection)
            elmo_representations = projection(elmo_representations)
        return elmo_representations
Ejemplo n.º 20
0
 def forward(
     self,  # pylint: disable=arguments-differ
     inputs: torch.Tensor) -> torch.Tensor:
     """
     Parameters
     ----------
     inputs: ``torch.Tensor``
         Shape ``(batch_size, timesteps)`` of character ids representing the current batch.
     Returns
     -------
     The VAMPIRE representations for the input sequence, shape
     ``(batch_size, timesteps, embedding_dim)`` or ``(batch_size, timesteps)``
     depending on whether expand_dim is set to True.
     """
     vae_output = self._vae(inputs)
     embedded = vae_output['vae_representation']
     self._layers = vae_output['layers']
     if self._expand_dim:
         embedded = (embedded.unsqueeze(0).expand(
             inputs.shape[1], inputs.shape[0], -1).permute(1, 0,
                                                           2).contiguous())
     if self._projection:
         projection = self._projection
         for _ in range(embedded.dim() - 2):
             projection = TimeDistributed(projection)
         embedded = projection(embedded)
     return embedded
    def forward(self, tokens, num_wrapping_dims: int = 0) -> torch.Tensor:
        embedded_representations = []
        keys = sorted(self._token_embedders.keys())
        for key in keys:
            # Note: need to use getattr here so that the pytorch voodoo
            # with submodules works with multiple GPUs.
            if key in self.separate_embedder_keys:
                continue
            embedder = getattr(self, 'token_embedder_{}'.format(key))
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(embedder)
            token_vectors = self.linear_layers[key](embedder(tokens))
            embedded_representations.append(token_vectors)

        mask = util.get_text_field_mask(tokens)
        embedded_representations = torch.stack(embedded_representations,
                                               dim=-2)
        query_emb = self.rnn_encoder(tokens, mask)
        similarities = torch.matmul(embedded_representations,
                                    query_emb.unsqueeze(-1)).squeeze(-1)
        similarities = util.masked_softmax(similarities, mask, dim=-1)
        combined_emb = torch.matmul(embedded_representations.transpose(2, 3),
                                    similarities.unsqueeze(-1)).squeeze(-1)

        if self.use_glove:
            embedder = getattr(self, 'token_embedder_tokens')
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(self.glove_embedder)
            glove_emb = embedder(tokens['tokens'])
            combined_emb = torch.cat([combined_emb, glove_emb], dim=-1)

        if self.use_elmo:
            embedder = getattr(self, 'token_embedder_elmo')
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(self.elmo_embedder)
            elmo_emb = embedder(tokens['elmo'])
            combined_emb = torch.cat([combined_emb, elmo_emb], dim=-1)

        if self.use_char:
            embedder = getattr(self, 'token_embedder_token_characters')
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(self.char_embeddder)
            token_vectors = embedder(tokens['token_characters'])
            combined_emb = torch.cat([combined_emb, token_vectors], dim=-1)

        return combined_emb
 def __init__(
         self,
         # input_dim: int,
         pooler: Seq2VecEncoder):
     super().__init__()
     self._input_dim = pooler.get_output_dim()
     # we distribute the pooler across _spans_, not actual time
     self._pooler = TimeDistributed(pooler)
Ejemplo n.º 23
0
    def __init__(self,
                 vocab: Vocabulary,
                 user_feature_embedder: TextFieldEmbedder,
                 format_feature_embedder: TextFieldEmbedder,
                 embedder: TextFieldEmbedder,
                 context_encoder: Seq2SeqEncoder,
                 user_encoder: FeedForward,
                 format_encoder: FeedForward,
                 global_encoder: FeedForward,
                 local_encoder: FeedForward,
                 classifier: FeedForward,
                 linguistic_encoder: Seq2SeqEncoder = None,
                 alpha: float = 0.7,
                 dropout: float = 0.5) -> None:
        super().__init__(vocab)

        # The idea behind the CLUF model is to separately encode the:
        #  (C)ontext, (L)inguistic Features, (U)ser, and (F)ormat
        # which is what the following parameters do.

        # (C)ontext Encoder
        self._context_encoder = context_encoder
        # (L)inguistic Encoder
        self._linguistic_encoder = linguistic_encoder
        # (U)ser Encoder
        self._user_encoder = user_encoder
        # (F)ormat Encoder
        self._format_encoder = format_encoder

        # projection for global/local information
        self._global_encoder = global_encoder
        self._local_encoder = TimeDistributed(local_encoder)

        self._classifier = TimeDistributed(classifier)
        self._embedder = embedder
        self._user_feature_embedder = user_feature_embedder
        self._format_feature_embedder = format_feature_embedder

        self._dropout = nn.Dropout(dropout)
        self._auc = Auc()
        self._f1 = F1Measure(1)
        self._alpha = alpha

        self._user_features = ['user', 'countries']
        self._format_features = ['client', 'session', 'format']
Ejemplo n.º 24
0
    def forward(self,
                text_field_input: TextFieldTensors,
                num_wrapping_dims: int = 0,
                **kwargs) -> torch.Tensor:
        if sorted(self._token_embedders.keys()) != sorted(
                text_field_input.keys()):
            message = "Mismatched token keys: %s and %s" % (
                str(self._token_embedders.keys()),
                str(text_field_input.keys()),
            )
            embedder_keys = set(self._token_embedders.keys())
            input_keys = set(text_field_input.keys())
            if embedder_keys > input_keys and all(
                    isinstance(embedder, EmptyEmbedder)
                    for name, embedder in self._token_embedders.items()
                    if name in embedder_keys - input_keys):
                # Allow extra embedders that are only in the token embedders (but not input) and are empty to pass
                # config check
                pass
            else:
                raise ConfigurationError(message)

        embedded_representations = []
        for key in self._ordered_embedder_keys:
            # Note: need to use getattr here so that the pytorch voodoo
            # with submodules works with multiple GPUs.
            embedder = getattr(self, "token_embedder_{}".format(key))
            if isinstance(embedder, EmptyEmbedder):
                # Skip empty embedders
                continue
            forward_params = inspect.signature(embedder.forward).parameters
            forward_params_values = {}
            missing_tensor_args = set()
            for param in forward_params.keys():
                if param in kwargs:
                    forward_params_values[param] = kwargs[param]
                else:
                    missing_tensor_args.add(param)

            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(embedder)

            tensors: Dict[str, torch.Tensor] = text_field_input[key]
            if len(tensors) == 1 and len(missing_tensor_args) == 1:
                # If there's only one tensor argument to the embedder, and we just have one tensor to
                # embed, we can just pass in that tensor, without requiring a name match.
                token_vectors = embedder(
                    list(tensors.values())[0], **forward_params_values)
            else:
                # If there are multiple tensor arguments, we have to require matching names from the
                # TokenIndexer.  I don't think there's an easy way around that.
                token_vectors = embedder(**tensors, **forward_params_values)
            if token_vectors is not None:
                # To handle some very rare use cases, we allow the return value of the embedder to
                # be None; we just skip it in that case.
                embedded_representations.append(token_vectors)
        return torch.cat(embedded_representations, dim=-1)
    def __init__(self, input_dim: int,
                 span_self_attentive_encoder: Seq2SeqEncoder) -> None:
        super().__init__()
        self._input_dim = input_dim
        self._global_attention = TimeDistributed(torch.nn.Linear(input_dim, 1))
        self._span_self_attentive_encoder = span_self_attentive_encoder

        self.modeled_gate = nn.Sequential(nn.Linear(input_dim, 100), nn.ReLU(),
                                          nn.Linear(100, 2))
Ejemplo n.º 26
0
    def __init__(self, vocab: Vocabulary, encoder: Seq2SeqEncoder,
                 classifier: FeedForward, feature_embedder: TextFieldEmbedder,
                 embedder: TextFieldEmbedder) -> None:
        super().__init__(vocab)

        self._classifier = TimeDistributed(classifier)
        self._embedder = embedder
        self._feature_embedder = feature_embedder
        self._encoder = encoder
        self._auc = Auc()
Ejemplo n.º 27
0
 def __init__(self,
              embedding: Embedding,
              encoder: Seq2VecEncoder,
              projection_dim: int = None,
              dropout: float = 0.0) -> None:
     super(TokenCharactersEncoder, self).__init__()
     self._embedding = TimeDistributed(embedding)
     self._encoder = TimeDistributed(encoder)
     self.output_dim = projection_dim or self._encoder._module.get_output_dim(
     )
     if projection_dim:
         self._projection = torch.nn.Linear(
             self._encoder._module.get_output_dim(), projection_dim)
     else:
         self._projection = lambda x: x
     if dropout > 0:
         self._dropout = torch.nn.Dropout(p=dropout)
     else:
         self._dropout = lambda x: x
Ejemplo n.º 28
0
    def __init__(self, input_dim, hidden_dim, num_tags, \
            activation = 'relu',
            dropout = 0.0,
            loss_reduction = 'sum',
            name = None,
            class_weights = None):
        super(SpanScorer, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_tags = num_tags
        self.name = name

        self.activation = activation
        self.activation_fn = get_activation(activation)
        self.dropout = dropout
        self.loss_reduction = loss_reduction
        self.num_layers = 1
        '''
        Create classifier
        '''

        # Feedforward neural network for predicting span labels
        self.FF = FeedForward( \
                    input_dim = self.input_dim,
                    num_layers = self.num_layers,
                    hidden_dims = self.hidden_dim,
                    activations = self.activation_fn,
                    dropout = self.dropout)

        # Span classifier
        self.scorer = torch.nn.Sequential(
            TimeDistributed(self.FF),
            TimeDistributed(torch.nn.Linear(self.hidden_dim, self.num_tags)))

        if class_weights is None:
            self.class_weights = None
        else:
            self.class_weights = torch.tensor(class_weights,
                                              requires_grad=False)
Ejemplo n.º 29
0
    def forward(self, text_field_input: Dict[str, torch.Tensor], num_wrapping_dims: int = 0) -> torch.Tensor:
        embedder_keys = self._token_embedders.keys()
        input_keys = text_field_input.keys()

        # Check for unmatched keys
        if not self._allow_unmatched_keys:
            if embedder_keys < input_keys:
                # token embedder keys are a strict subset of text field input keys.
                message = (f"Your text field is generating more keys ({list(input_keys)}) "
                           f"than you have token embedders ({list(embedder_keys)}. "
                           f"If you are using a token embedder that requires multiple keys "
                           f"(for example, the OpenAI Transformer embedder or the BERT embedder) "
                           f"you need to add allow_unmatched_keys = True "
                           f"(and likely an embedder_to_indexer_map) to your "
                           f"BasicTextFieldEmbedder configuration. "
                           f"Otherwise, you should check that there is a 1:1 embedding "
                           f"between your token indexers and token embedders.")
                raise ConfigurationError(message)

            elif self._token_embedders.keys() != text_field_input.keys():
                # some other mismatch
                message = "Mismatched token keys: %s and %s" % (str(self._token_embedders.keys()),
                                                                str(text_field_input.keys()))
                raise ConfigurationError(message)

        embedded_representations = []
        keys = sorted(embedder_keys)
        for key in keys:
            # If we pre-specified a mapping explictly, use that.
            if self._embedder_to_indexer_map is not None:
                tensors = [text_field_input[indexer_key] for
                           indexer_key in self._embedder_to_indexer_map[key]]
            else:
                # otherwise, we assume the mapping between indexers and embedders
                # is bijective and just use the key directly.
                tensors = [text_field_input[key]]
            # Note: need to use getattr here so that the pytorch voodoo
            # with submodules works with multiple GPUs.
            embedder = getattr(self, 'token_embedder_{}'.format(key))
            for _ in range(num_wrapping_dims):
                embedder = TimeDistributed(embedder)
            if key == 'token_characters' and \
                    'using_glyph' in dir(self._token_embedders['token_characters']):
                token_vectors, loss = embedder(*tensors)
            else:
                token_vectors = embedder(*tensors)
            embedded_representations.append(token_vectors)
        if 'token_characters' in keys and \
                'using_glyph' in dir(self._token_embedders['token_characters']):
            return torch.cat(embedded_representations, dim=-1), loss
        else:
            return torch.cat(embedded_representations, dim=-1)
Ejemplo n.º 30
0
    def forward(self, inputs):  # pylint: disable=arguments-differ
        # inputs may have extra dimensions (batch_size, d1, ..., dn, sequence_length),
        # but embedding expects (batch_size, sequence_length), so pass inputs to
        # util.combine_initial_dims (which is a no-op if there are no extra dimensions).
        # Remember the original size.
        original_size = inputs.size()

        #         padding_ = torch.LongTensor([self.padding_index] * self.context_window).expand(original_size[0],self.context_window)
        #         padding_ = inputs.new(padding_.numpy())
        #         inputs_pad = torch.cat((padding_, inputs, padding_), dim=1)
        inputs_pad = torch.nn.functional.pad(inputs,
                                             pad=(self.context_window,
                                                  self.context_window))

        original_inputs = inputs_pad
        if original_inputs.dim() > 2:
            inputs_pad = inputs_pad.view(-1, inputs_pad.size(-1))

        embedded = []
        for i, w in enumerate(self.weights):
            e = embedding(inputs_pad[:, i:(i + original_size[-1])],
                          w,
                          max_norm=self.max_norm,
                          norm_type=self.norm_type,
                          scale_grad_by_freq=self.scale_grad_by_freq,
                          sparse=self.sparse)
            embedded.append(e)
        x_embed = torch.stack(embedded, dim=3)
        multiple = torch.stack([
            x_embed[:, :, :, 1] * x_embed[:, :, :, i]
            for i in range(2 * self.context_window + 1)
        ],
                               dim=3)

        if self.mode == "sum":
            local_context = torch.sum(multiple, dim=3)
        elif self.mode == "max":
            local_context = torch.max(multiple, dim=3)[0]
        else:
            local_context = torch.mean(multiple, dim=3)

        if original_inputs.dim() > 2:
            view_args = list(original_inputs.size()) + [local_context.size(-1)]
            local_context = local_context.view(*view_args)

        if self._projection:
            projection = self._projection
            for _ in range(local_context.dim() - 2):
                projection = TimeDistributed(projection)
            local_context = projection(local_context)

        return local_context