Ejemplo n.º 1
0
    def __init__(self,
                 sentence_encoder: Seq2VecEncoder,
                 doc_encoder: Seq2VecEncoder,
                 query_encoder: Seq2VecEncoder,
                 use_encoded: bool = False,
                 scorer: Optional[FeedForward] = None,
                 sentence_attention: Optional[Attention] = None,
                 document_attention: Optional[Attention] = None) -> None:

        super(Seq2VecSentenceScorer, self).__init__()

        self.sentence_encoder = sentence_encoder
        self.doc_encoder = doc_encoder
        self.query_encoder = query_encoder
        self.use_encoded = use_encoded
        self.sentence_attention = sentence_attention
        self.document_attention = document_attention
        # get the dimensions for the scorer and for sanity checking
        q_dim = self.query_encoder.get_output_dim()
        d_dim = self.doc_encoder.get_output_dim()

        input_dim = (q_dim + d_dim)
        if use_encoded: input_dim *= 2
        # set up the scorer
        if scorer is None:
            scorer = FeedForward(
                        input_dim=input_dim, num_layers=1,
                        hidden_dims=1, activations=Activation.by_name('linear')(), dropout=0.)
        self.query_transformer = FeedForward(
            input_dim=q_dim, num_layers=1, hidden_dims=q_dim, activations=Activation.by_name('tanh')(), dropout=0.2)
        self.scorer = scorer
        # assertions to ensure our shapes match our assumptions
        assert q_dim == d_dim
        assert self.scorer.get_output_dim() == 1
        assert self.scorer.get_input_dim() == input_dim
Ejemplo n.º 2
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob)

            # Note: Please use `ModuleList` in new code. It provides better
            # support for running on multiple GPUs. We've kept `add_module` here
            # solely for backwards compatibility with existing serialized models.
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}",
                            feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
                attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
Ejemplo n.º 3
0
    def __init__(self, word_embeddings: TextFieldEmbedder, bin_count: int):

        super(DRMM, self).__init__()

        self.word_embeddings = word_embeddings
        self.cosine_module = CosineMatrixAttention()

        self.bin_count = bin_count
        self.matching_classifier = FeedForward(
            input_dim=bin_count,
            num_layers=2,
            hidden_dims=[bin_count, 1],
            activations=[
                Activation.by_name('tanh')(),
                Activation.by_name('tanh')()
            ])
        self.query_gate = FeedForward(
            input_dim=self.word_embeddings.get_output_dim(),
            num_layers=2,
            hidden_dims=[self.word_embeddings.get_output_dim(), 1],
            activations=[
                Activation.by_name('tanh')(),
                Activation.by_name('tanh')()
            ])
        self.query_softmax = MaskedSoftmax()
Ejemplo n.º 4
0
    def __init__(
        self,
        input_dim,
        hidden_dim,
        projection_dim,
        feedforward_hidden_dim,
        num_layers,
        num_attention_heads,
        use_positional_encoding=True,
        dropout_prob=0.2,
    ):
        super(MaskedStackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers = []
        self._feedfoward_layers = []
        self._layer_norm_layers = []
        self._feed_forward_layer_norm_layers = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name("relu")(),
                    Activation.by_name("linear")()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob,
            )

            self.add_module("feedforward_{i}".format(feedfoward))
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_input_dim())
            self.add_module(
                "feedforward_layer_norm_{i}".format(feedforward_layer_norm))
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MaskedMultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
            )
            self.add_module("self_attention_{i}".format(self_attention))
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_input_dim())
            self.add_module("layer_norm_{i}".format(layer_norm))
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = torch.nn.Dropout(dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
        self._output_layer_norm = LayerNorm(self._output_dim)
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob)

            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}",
                            feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
                attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
Ejemplo n.º 6
0
    def __init__(self, input_dim: int, code_dim: int):
        super().__init__()
        self._code_dim = code_dim

        self._mu_linear = FeedForward(input_dim=input_dim,
                                      num_layers=1,
                                      hidden_dims=code_dim,
                                      activations=lambda x: x)
        self._logvar_linear = FeedForward(input_dim=input_dim,
                                          num_layers=1,
                                          hidden_dims=code_dim,
                                          activations=lambda x: x)
Ejemplo n.º 7
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(feedfoward_input_dim,
                                     activations=[Activation.by_name('relu')(),
                                                  Activation.by_name('linear')()],
                                     hidden_dims=[feedforward_hidden_dim, hidden_dim],
                                     num_layers=2,
                                     dropout=dropout_prob)

            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(num_heads=num_attention_heads,
                                                    input_dim=hidden_dim,
                                                    attention_dim=projection_dim,
                                                    values_dim=projection_dim,
                                                    attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
Ejemplo n.º 8
0
    def __init__(
        self,
        input_dim: int,
        hidden_dim: int,
        attention_projection_dim: int,
        feedforward_hidden_dim: int,
        num_convs: int,
        conv_kernel_size: int,
        num_attention_heads: int,
        use_positional_encoding: bool = True,
        dropout_prob: float = 0.1,
        layer_dropout_undecayed_prob: float = 0.1,
        attention_dropout_prob: float = 0,
    ) -> None:
        super().__init__()

        check_dimensions_match(input_dim, hidden_dim, "input_dim", "hidden_dim")

        self._use_positional_encoding = use_positional_encoding

        self._conv_norm_layers = torch.nn.ModuleList(
            [LayerNorm(hidden_dim) for _ in range(num_convs)]
        )
        self._conv_layers = torch.nn.ModuleList()
        for _ in range(num_convs):
            padding = torch.nn.ConstantPad1d(
                (conv_kernel_size // 2, (conv_kernel_size - 1) // 2), 0
            )
            depthwise_conv = torch.nn.Conv1d(
                hidden_dim, hidden_dim, conv_kernel_size, groups=hidden_dim
            )
            pointwise_conv = torch.nn.Conv1d(hidden_dim, hidden_dim, 1)
            self._conv_layers.append(
                torch.nn.Sequential(
                    padding, depthwise_conv, pointwise_conv, Activation.by_name("relu")()
                )
            )

        self.attention_norm_layer = LayerNorm(hidden_dim)
        self.attention_layer = MultiHeadSelfAttention(
            num_heads=num_attention_heads,
            input_dim=hidden_dim,
            attention_dim=attention_projection_dim,
            values_dim=attention_projection_dim,
            attention_dropout_prob=attention_dropout_prob,
        )
        self.feedforward_norm_layer = LayerNorm(hidden_dim)
        self.feedforward = FeedForward(
            hidden_dim,
            activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            num_layers=2,
            dropout=dropout_prob,
        )

        self.dropout = Dropout(dropout_prob)
        self.residual_with_layer_dropout = ResidualWithLayerDropout(layer_dropout_undecayed_prob)
        self._input_dim = input_dim
        self._output_dim = hidden_dim
Ejemplo n.º 9
0
    def __init__(self, input_dim: int, summary_dim: int,
                 feedforward: FeedForward):
        super().__init__()
        self.input_dim = input_dim
        self.summary_dim = summary_dim
        self.feedforward = feedforward

        # Make sure that the input dimension matches the input/stack.
        assert input_dim + summary_dim == feedforward.get_input_dim()
Ejemplo n.º 10
0
    def __init__(self, input_dim: int, code_dim: int, kappa: int):
        super().__init__()
        self._code_dim = code_dim
        self._kappa = kappa

        self._mu_linear = FeedForward(
            input_dim=input_dim,
            num_layers=1,
            hidden_dims=code_dim,
            activations=lambda x: x / x.norm(dim=-1, keepdim=True))
Ejemplo n.º 11
0
    def initialize_network(self, n_tags: int, sense_dim: int, rep_dim: int):
        self.n_tags = n_tags

        self._arc_tag_arg_enc = Linear(rep_dim, self.hidden_dim)

        if self.use_predicate_rep:
            self._arc_tag_pred_enc = Linear(rep_dim, self.hidden_dim)

        if self.graph_type != 2:
            self._arc_tag_sense_enc = Linear(sense_dim, self.hidden_dim)

        if self.graph_type == 1:
            self._arc_tag_tags_enc = Linear(n_tags + 1, self.hidden_dim)
        elif self.graph_type == 2:
            self._arc_tag_tags_enc = Linear(n_tags + 1, self.hidden_dim)
        else:
            self._arc_tag_tags_enc = Linear(2 * n_tags + 1, self.hidden_dim)

        if self.weight_tie:
            self.arc_tag_refiner = lambda x: x.matmul(self._arc_tag_tags_enc.
                                                      weight[:, :n_tags + 1])

            if self.graph_type != 2:
                self.predicate_linear = Linear(rep_dim + n_tags + sense_dim,
                                               self.hidden_dim)
            else:
                self.predicate_linear = Linear(rep_dim + sense_dim,
                                               self.hidden_dim)

            self.predicte_refiner = lambda x: self._dropout(self.activation(self.predicate_linear(x)))\
                    .matmul(self.predicate_linear.weight[:,:sense_dim])
        else:
            self.arc_tag_refiner = FeedForward(self.hidden_dim,
                                               1,
                                               n_tags + 1,
                                               Activation.by_name("linear")(),
                                               dropout=self.dropout)

            self.predicte_refiner = FeedForward(
                rep_dim + n_tags + sense_dim,
                2, [self.hidden_dim] + [sense_dim],
                [self.activation] + [Activation.by_name("linear")()],
                dropout=self.dropout)
Ejemplo n.º 12
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 attention_projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_convs: int,
                 conv_kernel_size: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 layer_dropout_undecayed_prob: float = 0.1,
                 attention_dropout_prob: float = 0) -> None:
        super().__init__()

        check_dimensions_match(input_dim, hidden_dim, 'input_dim',
                               'hidden_dim')

        self._use_positional_encoding = use_positional_encoding

        self._conv_norm_layers = torch.nn.ModuleList(
            [LayerNorm(hidden_dim) for _ in range(num_convs)])
        self._conv_layers = torch.nn.ModuleList([
            DepthwiseSeparableConv(hidden_dim,
                                   hidden_dim,
                                   conv_kernel_size,
                                   activation="relu",
                                   dim=1) for _ in range(num_convs)
        ])

        self.attention_norm_layer = LayerNorm(hidden_dim)
        self.attention_layer = MemoryEfficientMultiHeadSelfAttention(
            num_heads=num_attention_heads,
            input_dim=hidden_dim,
            attention_dim=attention_projection_dim,
            values_dim=attention_projection_dim,
            attention_dropout_prob=attention_dropout_prob)
        self.feedforward_norm_layer = LayerNorm(hidden_dim)
        self.feedforward = FeedForward(
            hidden_dim,
            activations=[
                Activation.by_name('relu')(),
                Activation.by_name('linear')()
            ],
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            num_layers=2,
            dropout=dropout_prob)

        self.dropout = Dropout(dropout_prob)
        self.residual_with_layer_dropout = ResidualWithLayerDropout(
            layer_dropout_undecayed_prob)
        self._input_dim = input_dim
        self._output_dim = hidden_dim
Ejemplo n.º 13
0
    def __init__(self, hdim: int = 768, nlayers: int = 2, dropout_prob: int = 0.1):
        super(GCNNet, self).__init__()
        # self.gcns = nn.ModuleList([GCN(hdim, hdim, F.relu) for i in range(nlayers)])
        self._gcn_layers = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []
        feedfoward_input_dim, feedforward_hidden_dim, hidden_dim = hdim, hdim, hdim
        for i in range(nlayers):
            feedfoward = FeedForward(feedfoward_input_dim,
                                     activations=[Activation.by_name('relu')(),
                                                  Activation.by_name('linear')()],
                                     hidden_dims=[feedforward_hidden_dim, hidden_dim],
                                     num_layers=2,
                                     dropout=dropout_prob)

            # Note: Please use `ModuleList` in new code. It provides better
            # support for running on multiple GPUs. We've kept `add_module` here
            # solely for backwards compatibility with existing serialized models.
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            gcn = GCN(hdim, hdim, F.relu)
            self.add_module(f"gcn_{i}", gcn)
            self._gcn_layers.append(gcn)

            layer_norm = LayerNorm(hdim)
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(dropout_prob)
        self._input_dim = hdim
        self._output_dim = hdim
Ejemplo n.º 14
0
    def __init__(self,
                 vocab: Vocabulary,
                 title_embedder: TextFieldEmbedder,
                 abstract_embedder: TextFieldEmbedder,
                 dense_dim=75) -> None:

        super().__init__(vocab)

        self.title_embedder = title_embedder
        self.abstract_embedder = abstract_embedder
        self.intermediate_dim = 6
        self.n_layers = 3
        self.layer_dims = [dense_dim for i in range(self.n_layers - 1)]
        self.layer_dims.append(1)

        self.activations = [
            Activation.by_name("elu")(),
            Activation.by_name("elu")(),
            Activation.by_name("sigmoid")()
        ]
        self.layers = FeedForward(input_dim=self.intermediate_dim,
                                  num_layers=self.n_layers,
                                  hidden_dims=self.layer_dims,
                                  activations=self.activations)
Ejemplo n.º 15
0
    def __init__(self, vocab, embed_dim: int,
                 word_encoder: Seq2SeqEncoder,
                 sent_encoder: Seq2SeqEncoder,
                 word_attn: Attention,
                 sent_attn: Attention):
        super().__init__(vocab)

        self._vocab = vocab
        self._embed = Embedding(self._vocab.get_vocab_size('tokens'), embed_dim)
        self._word_rnn = word_encoder
        self._sent_rnn = sent_encoder

        word_output_dim = self._word_rnn.get_output_dim()
        sent_output_dim = self._sent_rnn.get_output_dim()
        self._word_proj = FeedForward(word_output_dim, 1, word_output_dim, nn.Tanh())
        self._word_rand = nn.Parameter(torch.rand(word_output_dim))
        self._word_attn = word_attn
        self._sent_proj = FeedForward(sent_output_dim, 1, sent_output_dim, nn.Tanh())
        self._sent_rand = nn.Parameter(torch.rand(sent_output_dim))
        self._sent_attn = sent_attn

        self._doc_project = nn.Linear(sent_output_dim, self._vocab.get_vocab_size('labels'))
        self._crit = nn.CrossEntropyLoss()
        self._acc = CategoricalAccuracy()
Ejemplo n.º 16
0
    def __init__(self,
                 indexer: DocumentIndexer,
                 embedding_matrix: torch.Tensor,
                 dims=None):
        super(SampleEncoder, self).__init__()
        if dims is None:
            dims = default_dims
        self.dims = dims
        words_emb_size = embedding_matrix.size(1)
        self.word_embedder = nn.Embedding.from_pretrained(embedding_matrix)
        self.word_dropout = nn.Dropout(dims['dropout_input'])

        self.char_embedder = nn.Embedding(len(indexer.char_vocab),
                                          dims['char_emb_size'])
        self.case_embedder = nn.Embedding(len(indexer.case_vocab),
                                          dims['case_emb_size'])
        self.pos_embedder = nn.Embedding(len(indexer.pos_vocab),
                                         dims['pos_emb_size'])
        self.ner_embedder = nn.Embedding(len(indexer.ner_vocab),
                                         dims['ner_emb_size'])
        self.char_encoder = PytorchSeq2VecWrapper(
            nn.LSTM(dims['char_emb_size'],
                    dims['chars_hidden'],
                    batch_first=True,
                    bidirectional=True))

        total_emb_size = words_emb_size + dims['case_emb_size'] + 2 * dims['chars_hidden'] \
                         + dims['pos_emb_size'] + dims['ner_emb_size']

        self.encoder = PytorchSeq2SeqWrapper(
            nn.LSTM(total_emb_size,
                    dims['hidden'],
                    batch_first=True,
                    bidirectional=True,
                    num_layers=2))
        self.sent_dropout = nn.Dropout(dims['dropout_lstm'])

        self.feedforward = FeedForward(2 * dims['hidden'],
                                       1,
                                       dims['feedforward'],
                                       activations=nn.Tanh())
        self.attention = nn.Linear(2 * dims['hidden'], dims['attention_dim'])
        self.scores = nn.Linear(dims['attention_dim'], 1)
        self.hidden2tag = nn.Linear(2 * dims['hidden'],
                                    len(indexer.relation_type_vocab))
        self.out_dropout = nn.Dropout(dims['dropout_lstm'])
Ejemplo n.º 17
0
 def __init__(self, input_dims: List[int],
              num_layers: int,
              hidden_dims: Union[int, List[int]],
              activations='relu'):
     super(GCN_layers, self).__init__()
     if not isinstance(hidden_dims, list):
         hidden_dims = [hidden_dims] * num_layers
     # TODO remove hard code relu
     activations = [torch.nn.functional.tanh] * num_layers
     assert len(input_dims) == len(hidden_dims) == len(activations) == num_layers
     gcn_layers = []
     for layer_input_dim, layer_output_dim, activate in zip(input_dims, hidden_dims, activations):
         gcn_layers.append(GCN(layer_input_dim, layer_output_dim, activate))
     self.layers = nn.ModuleList(gcn_layers)
     self._output_dim = hidden_dims[-1]
     self.input_dim = input_dims[0]
     self.ln = LayerNorm(hidden_dims[0])
     self._mlp = FeedForward(hidden_dims[0], 1, hidden_dims[0], torch.nn.functional.sigmoid)
Ejemplo n.º 18
0
    def __init__(self,
                 vocab: Vocabulary,
                 query_field_embedder: TextFieldEmbedder,
                 doc_field_embedder: TextFieldEmbedder,
                 scorer: Scorer,
                 validation_metrics: Dict[str, Metric],
                 temperature: float = 15.0,
                 alpha: float = 0.8,
                 ranking_loss: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 idf_embedder: Optional[TextFieldEmbedder] = None,
                 dropout: float = 0.) -> None:
        super(LeToRWrapper, self).__init__(vocab, regularizer)

        self.embedder = doc_field_embedder
        self.idf_embedder = idf_embedder
        self.final_scorer = FeedForward(2, 1, 1, lambda x: x)

        self.scorer = scorer

        self.initializer = initializer
        self.regularizer = regularizer

        self.metrics = copy.deepcopy(validation_metrics)
        self.metrics.update({'accuracy': CategoricalAccuracy()})

        self.training_metrics = {
            True: ['accuracy'],
            False: validation_metrics.keys()
        }

        self.temperature = temperature
        self.kd_alpha = alpha

        # self.ranking_loss = ranking_loss
        # if self.ranking_loss:
        #self.loss = nn.MarginRankingLoss(margin=1.0)
        # else:
        self.loss = nn.CrossEntropyLoss()
        initializer(self)
Ejemplo n.º 19
0
    def __init__(self, params: Params, vocab: Vocabulary) -> None:
        super().__init__(vocab=vocab)

        enc_hidden_dim = params.pop_int('enc_hidden_dim', 300)
        disc_hidden_dim = params.pop_int('disc_hidden_dim', 1200)
        disc_num_layers = params.pop_int('disc_num_layers', 1)

        emb_dropout = params.pop_float('emb_dropout', 0.0)
        disc_dropout = params.pop_float('disc_dropout', 0.0)
        l2_weight = params.pop_float('l2_weight', 0.0)

        self.emb_dropout = nn.Dropout(emb_dropout)
        self.disc_dropout = nn.Dropout(disc_dropout)
        self._l2_weight = l2_weight

        self._token_embedder = Embedding.from_params(
            vocab=vocab, params=params.pop('token_embedder'))
        self._discriminator_encoder = PytorchSeq2VecWrapper(
            nn.LSTM(input_size=self._token_embedder.get_output_dim(),
                    hidden_size=enc_hidden_dim,
                    batch_first=True))
        self._discriminator = FeedForward(
            input_dim=4 * self._discriminator_encoder.get_output_dim(),
            hidden_dims=[disc_hidden_dim] * disc_num_layers +
            [self._NUM_LABELS],
            num_layers=disc_num_layers + 1,
            activations=[Activation.by_name('relu')()] * disc_num_layers +
            [Activation.by_name('linear')()])

        # Metrics
        self._metrics = {
            'labeled': {
                'discriminator_entropy': ScalarMetric(),
                'discriminator_accuracy': CategoricalAccuracy(),
                'loss': ScalarMetric()
            }
        }
Ejemplo n.º 20
0
def load_decomposable_attention_elmo_softmax_model():
    NEGATIVE_PERCENTAGE = 100
    # EMBEDDING_TYPE = ""
    # LOSS_TYPE = ""				# NLL
    # LOSS_TYPE = "_nll"				# NLL
    LOSS_TYPE = "_mse"  # MSE
    # EMBEDDING_TYPE = ""
    # EMBEDDING_TYPE = "_glove"
    # EMBEDDING_TYPE = "_bert"
    EMBEDDING_TYPE = "_elmo"
    # EMBEDDING_TYPE = "_elmo_retrained"
    # EMBEDDING_TYPE = "_elmo_retrained_2"
    token_indexers = None
    if EMBEDDING_TYPE == "_elmo" or EMBEDDING_TYPE == "_elmo_retrained" or EMBEDDING_TYPE == "_elmo_retrained_2":
        token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
    MAX_BATCH_SIZE = 0
    # MAX_BATCH_SIZE = 150 # for bert and elmo
    reader = QuestionResponseSoftmaxReader(token_indexers=token_indexers,
                                           max_batch_size=MAX_BATCH_SIZE)
    model_file = os.path.join(
        "saved_softmax_models",
        "decomposable_attention{}{}_model_{}.th".format(
            LOSS_TYPE, EMBEDDING_TYPE, NEGATIVE_PERCENTAGE))

    vocabulary_filepath = os.path.join(
        "saved_softmax_models",
        "vocabulary{}{}_{}".format(LOSS_TYPE, EMBEDDING_TYPE,
                                   NEGATIVE_PERCENTAGE))
    print("LOADING VOCABULARY")
    # Load vocabulary
    vocab = Vocabulary.from_files(vocabulary_filepath)

    EMBEDDING_DIM = 300
    PROJECT_DIM = 200
    DROPOUT = 0.2
    NUM_LAYERS = 2
    if EMBEDDING_TYPE == "":
        token_embedding = Embedding(
            num_embeddings=vocab.get_vocab_size('tokens'),
            embedding_dim=EMBEDDING_DIM,
            projection_dim=PROJECT_DIM)
    elif EMBEDDING_TYPE == "_glove":
        token_embedding = Embedding.from_params(vocab=vocab,
                                                params=Params({
                                                    'pretrained_file':
                                                    glove_embeddings_file,
                                                    'embedding_dim':
                                                    EMBEDDING_DIM,
                                                    'projection_dim':
                                                    PROJECT_DIM,
                                                    'trainable':
                                                    False
                                                }))
    elif EMBEDDING_TYPE == "_elmo":
        # options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json"
        # weights_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5"
        options_file = os.path.join(
            "data", "elmo", "elmo_2x2048_256_2048cnn_1xhighway_options.json")
        weights_file = os.path.join(
            "data", "elmo", "elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5")
        # NOTE: using Small size as medium size gave CUDA out of memory error
        # options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json"
        # weights_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5"
        # options_file = os.path.join("data", "elmo", "elmo_2x1024_128_2048cnn_1xhighway_options.json")
        # weights_file = os.path.join("data", "elmo", "elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5")
        token_embedding = ElmoTokenEmbedder(options_file,
                                            weights_file,
                                            dropout=DROPOUT,
                                            projection_dim=PROJECT_DIM)
    elif EMBEDDING_TYPE == "_elmo_retrained":
        options_file = os.path.join("data", "bilm-tf", "elmo_retrained",
                                    "options.json")
        weights_file = os.path.join("data", "bilm-tf", "elmo_retrained",
                                    "weights.hdf5")
        token_embedding = ElmoTokenEmbedder(options_file,
                                            weights_file,
                                            dropout=DROPOUT,
                                            projection_dim=PROJECT_DIM)
    elif EMBEDDING_TYPE == "_elmo_retrained_2":
        options_file = os.path.join("data", "bilm-tf", "elmo_retrained",
                                    "options_2.json")
        weights_file = os.path.join("data", "bilm-tf", "elmo_retrained",
                                    "weights_2.hdf5")
        token_embedding = ElmoTokenEmbedder(options_file,
                                            weights_file,
                                            dropout=DROPOUT,
                                            projection_dim=PROJECT_DIM)
    elif EMBEDDING_TYPE == "_bert":
        print("Loading bert model")
        model = BertModel.from_pretrained('bert-base-uncased')
        token_embedding = BertEmbedder(model)
        PROJECT_DIM = 768
    else:
        print("Error: Some weird Embedding type", EMBEDDING_TYPE)
        exit()
    word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
    HIDDEN_DIM = 200
    params = Params({
        'input_dim': PROJECT_DIM,
        'hidden_dims': HIDDEN_DIM,
        'activations': 'relu',
        'num_layers': NUM_LAYERS,
        'dropout': DROPOUT
    })
    attend_feedforward = FeedForward.from_params(params)
    similarity_function = DotProductSimilarity()
    params = Params({
        'input_dim': 2 * PROJECT_DIM,
        'hidden_dims': HIDDEN_DIM,
        'activations': 'relu',
        'num_layers': NUM_LAYERS,
        'dropout': DROPOUT
    })
    compare_feedforward = FeedForward.from_params(params)
    params = Params({
        'input_dim': 2 * HIDDEN_DIM,
        'hidden_dims': 1,
        'activations': 'linear',
        'num_layers': 1
    })
    aggregate_feedforward = FeedForward.from_params(params)
    model = DecomposableAttentionSoftmax(vocab, word_embeddings,
                                         attend_feedforward,
                                         similarity_function,
                                         compare_feedforward,
                                         aggregate_feedforward)
    print("MODEL CREATED")
    # Load model state
    with open(model_file, 'rb') as f:
        model.load_state_dict(torch.load(f, map_location='cuda:0'))
    print("MODEL LOADED!")
    if torch.cuda.is_available():
        # cuda_device = 3
        # model = model.cuda(cuda_device)
        cuda_device = -1
    else:
        cuda_device = -1

    predictor = DecomposableAttentionSoftmaxPredictor(model,
                                                      dataset_reader=reader)
    return model, predictor
seq2seq_encoder = PytorchSeq2SeqWrapper(
    torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))

# In[1152]:

classifier_params = Params({
    "input_dim": HIDDEN_DIM * 2,
    "num_layers": 2,
    "hidden_dims": [50, 3],
    "activations": ["sigmoid", "linear"],
    "dropout": [0.2, 0.0]
})

# In[1153]:

classifier_feedforward = FeedForward.from_params(classifier_params)

# In[1154]:

parse_label = {
    'word': torch.LongTensor([[1, 0, 3, 7, 2, 9, 4], [0, 0, 5, 0, 0, 0, 4]])
}
embedded_parse_label = field_type2embedder['word'](parse_label)

# In[1155]:

feature_mask = util.get_text_field_mask(parse_label)

# In[1156]:

encoded_feature = encoder(embedded_parse_label, feature_mask)
Ejemplo n.º 22
0
    def __init__(self, params: Params, vocab: Vocabulary) -> None:
        super().__init__(vocab=vocab)

        disc_hidden_dim = params.pop_int('disc_hidden_dim', 1200)
        disc_num_layers = params.pop_int('disc_num_layers', 1)
        code_dist_type = params.pop_choice('code_dist_type',
                                           ['gaussian', 'vmf'],
                                           default_to_first_choice=True)
        code_dim = params.pop_int('code_dim', 500)

        emb_dropout = params.pop_float('emb_dropout', 0.0)
        disc_dropout = params.pop_float('disc_dropout', 0.0)
        latent_dropout = params.pop_float('latent_dropout', 0.0)
        l2_weight = params.pop_float('l2_weight', 0.0)

        self.emb_dropout = nn.Dropout(emb_dropout)
        self.disc_dropout = nn.Dropout(disc_dropout)
        self.latent_dropout = nn.Dropout(latent_dropout)
        self._l2_weight = l2_weight

        self._token_embedder = Embedding.from_params(
            vocab=vocab, params=params.pop('token_embedder'))
        self._encoder = nn.Sequential(
            nn.Conv1d(in_channels=300,
                      out_channels=300,
                      kernel_size=5,
                      stride=2),
            nn.Conv1d(in_channels=300,
                      out_channels=600,
                      kernel_size=5,
                      stride=2),
            nn.Conv1d(in_channels=600,
                      out_channels=500,
                      kernel_size=5,
                      stride=2))
        self._generator = nn.Sequential(
            nn.ConvTranspose1d(in_channels=500,
                               out_channels=600,
                               kernel_size=5,
                               stride=2), nn.ReLU(),
            nn.ConvTranspose1d(in_channels=600,
                               out_channels=300,
                               kernel_size=5,
                               stride=2), nn.ReLU(),
            nn.ConvTranspose1d(in_channels=300,
                               out_channels=300,
                               kernel_size=5,
                               stride=2), nn.ReLU())
        self._generator_projector = nn.Linear(
            in_features=300, out_features=vocab.get_vocab_size(), bias=False)
        self._generator_projector.weight = self._token_embedder.weight

        if code_dist_type == 'vmf':
            vmf_kappa = params.pop_int('vmf_kappa', 150)
            self._code_generator = VmfCodeGenerator(input_dim=500,
                                                    code_dim=code_dim,
                                                    kappa=vmf_kappa)
        elif code_dist_type == 'gaussian':
            self._code_generator = GaussianCodeGenerator(input_dim=500,
                                                         code_dim=code_dim)
        else:
            raise ValueError('Unknown code_dist_type')

        self._discriminator = FeedForward(
            input_dim=4 * self._code_generator.get_output_dim(),
            hidden_dims=[disc_hidden_dim] * disc_num_layers +
            [self._NUM_LABELS],
            num_layers=disc_num_layers + 1,
            activations=[Activation.by_name('relu')()] * disc_num_layers +
            [Activation.by_name('linear')()],
            dropout=disc_dropout)

        self._kl_weight = 1.0
        self._discriminator_weight = params.pop_float('discriminator_weight',
                                                      0.1)
        self._gumbel_temperature = 1.0

        # Metrics
        self._metrics = {
            'generator_loss': ScalarMetric(),
            'kl_divergence': ScalarMetric(),
            'discriminator_accuracy': CategoricalAccuracy(),
            'discriminator_loss': ScalarMetric(),
            'loss': ScalarMetric()
        }
Ejemplo n.º 23
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoding_in_dim:int,
                 encoding_out_dim:int,
                 modeling_in_dim:int,
                 modeling_out_dim:int,
                 dropout_prob: float = 0.1,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 external_number: List[int] = None,
                 answering_abilities: List[str] = None) -> None:
        super().__init__(vocab, regularizer)


        #print (vocab)

        if answering_abilities is None:
            self.answering_abilities = ["span_extraction",
                                        "addition_subtraction", "counting"]
        else:
            self.answering_abilities = answering_abilities


        self.W = torch.nn.Linear(768*2,768)
        
        
        text_embed_dim = text_field_embedder.get_output_dim()
        
        self._text_field_embedder = text_field_embedder

        #self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim)

        """
            为了用于self attention
        """

        if len(self.answering_abilities) > 1:
            self._answer_ability_predictor = FeedForward(text_embed_dim,
                                                         activations=[Activation.by_name('relu')(inplace=True),
                                                                      Activation.by_name('linear')()],
                                                         hidden_dims=[encoding_out_dim,
                                                                      len(self.answering_abilities)],
                                                         num_layers=2,
                                                         dropout=dropout_prob)

        if "span_extraction" in self.answering_abilities:
            self._span_extraction_index = self.answering_abilities.index("span_extraction")
            self._span_start_predictor = FeedForward(text_embed_dim,
                                                      activations=[Activation.by_name('relu')(inplace=True),
                                                                   Activation.by_name('linear')()],
                                                      hidden_dims=[encoding_out_dim,1],
                                                      num_layers=2,
                                                      dropout=dropout_prob)
            self._span_end_predictor = FeedForward(text_embed_dim ,
                                                      activations=[Activation.by_name('relu')(inplace=True),
                                                                   Activation.by_name('linear')()],
                                                      hidden_dims=[encoding_out_dim,1],
                                                      num_layers=2,
                                                      dropout=dropout_prob)


        if "addition_subtraction" in self.answering_abilities:
            self._addition_subtraction_index = self.answering_abilities.index("addition_subtraction")
            self._number_sign_predictor = FeedForward(text_embed_dim*2,
                                                      activations=[Activation.by_name('relu')(inplace=True),
                                                                   Activation.by_name('linear')()],
                                                      hidden_dims=[encoding_out_dim,3],
                                                      num_layers=2,
                                                      dropout=dropout_prob)

        if "counting" in self.answering_abilities:
            self._counting_index = self.answering_abilities.index("counting")
            self._count_number_predictor = FeedForward(text_embed_dim,
                                                       activations=[Activation.by_name('relu')(inplace=True),
                                                                    Activation.by_name('linear')()],
                                                       hidden_dims=[encoding_out_dim, 10],
                                                       num_layers=2,
                                                       dropout=dropout_prob)
        



        self._drop_metrics = DropEmAndF1()
        self._dropout = torch.nn.Dropout(p=dropout_prob)

        initializer(self)
def save_top_results(process_no, start_index, end_index):
    print("Starting process {} with start at {} and end at {}".format(
        process_no, start_index, end_index))
    DATA_FOLDER = "train_data"
    # EMBEDDING_TYPE = ""
    LOSS_TYPE = ""  # NLL
    LOSS_TYPE = "_mse"  # MSE
    # EMBEDDING_TYPE = ""
    # EMBEDDING_TYPE = "_glove"
    # EMBEDDING_TYPE = "_bert"
    EMBEDDING_TYPE = "_elmo"
    # EMBEDDING_TYPE = "_elmo_retrained"
    # EMBEDDING_TYPE = "_elmo_retrained_2"
    token_indexers = None
    if EMBEDDING_TYPE == "_elmo" or EMBEDDING_TYPE == "_elmo_retrained" or EMBEDDING_TYPE == "_elmo_retrained_2":
        token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
    MAX_BATCH_SIZE = 0
    # MAX_BATCH_SIZE = 150 # for bert and elmo
    # q_file = os.path.join("squad_seq2seq_train", "rule_based_system_squad_seq2seq_train_case_sensitive_saved_questions_lexparser_sh.txt")
    # r_file = os.path.join("squad_seq2seq_train", "rule_based_system_squad_seq2seq_train_case_sensitive_generated_answers_lexparser_sh.txt")
    # rules_file = os.path.join("squad_seq2seq_train", "rule_based_system_squad_seq2seq_train_case_sensitive_generated_answer_rules_lexparser_sh.txt")

    #NOTE: Squad dev test set
    q_file = os.path.join(
        "squad_seq2seq_dev_moses_tokenized",
        "rule_based_system_squad_seq2seq_dev_test_saved_questions.txt")
    r_file = os.path.join(
        "squad_seq2seq_dev_moses_tokenized",
        "rule_based_system_squad_seq2seq_dev_test_generated_answers.txt")
    rules_file = os.path.join(
        "squad_seq2seq_dev_moses_tokenized",
        "rule_based_system_squad_seq2seq_dev_test_generated_answer_rules.txt")
    reader = QuestionResponseSoftmaxReader(q_file,
                                           r_file,
                                           token_indexers=token_indexers,
                                           max_batch_size=MAX_BATCH_SIZE)
    glove_embeddings_file = os.path.join("data", "glove",
                                         "glove.840B.300d.txt")
    # RESULTS_DIR = "squad_seq2seq_train2"
    #NOTE: All other experiments
    # RESULTS_DIR = "squad_seq2seq_train_moses_tokenized"
    # make_dir_if_not_exists(RESULTS_DIR)
    # all_results_save_file = os.path.join(RESULTS_DIR, "squad_seq2seq_train_predictions_start_{}_end_{}.txt".format(start_index, end_index))

    #NOTE: Squad dev test set
    RESULTS_DIR = "squad_seq2seq_dev_moses_tokenized"
    make_dir_if_not_exists(RESULTS_DIR)
    all_results_save_file = os.path.join(
        RESULTS_DIR,
        "squad_seq2seq_dev_test_predictions_start_{}_end_{}.txt".format(
            start_index, end_index))

    with open(all_results_save_file, "w") as all_writer:
        print("Testing out model with", EMBEDDING_TYPE, "embeddings")
        print("Testing out model with", LOSS_TYPE, "loss")
        # for NEGATIVE_PERCENTAGE in [100,50,20,10,5,1]:
        for NEGATIVE_PERCENTAGE in [100]:
            model_file = os.path.join(
                "saved_softmax_models",
                "decomposable_attention{}{}_model_{}.th".format(
                    LOSS_TYPE, EMBEDDING_TYPE, NEGATIVE_PERCENTAGE))

            vocabulary_filepath = os.path.join(
                "saved_softmax_models",
                "vocabulary{}{}_{}".format(LOSS_TYPE, EMBEDDING_TYPE,
                                           NEGATIVE_PERCENTAGE))
            print("LOADING VOCABULARY")
            # Load vocabulary
            vocab = Vocabulary.from_files(vocabulary_filepath)

            EMBEDDING_DIM = 300
            PROJECT_DIM = 200
            DROPOUT = 0.2
            NUM_LAYERS = 2
            if EMBEDDING_TYPE == "":
                token_embedding = Embedding(
                    num_embeddings=vocab.get_vocab_size('tokens'),
                    embedding_dim=EMBEDDING_DIM,
                    projection_dim=PROJECT_DIM)
            elif EMBEDDING_TYPE == "_glove":
                token_embedding = Embedding.from_params(
                    vocab=vocab,
                    params=Params({
                        'pretrained_file': glove_embeddings_file,
                        'embedding_dim': EMBEDDING_DIM,
                        'projection_dim': PROJECT_DIM,
                        'trainable': False
                    }))
            elif EMBEDDING_TYPE == "_elmo":
                # options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_options.json"
                # weights_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x2048_256_2048cnn_1xhighway/elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5"
                options_file = os.path.join(
                    "data", "elmo",
                    "elmo_2x2048_256_2048cnn_1xhighway_options.json")
                weights_file = os.path.join(
                    "data", "elmo",
                    "elmo_2x2048_256_2048cnn_1xhighway_weights.hdf5")
                # NOTE: using Small size as medium size gave CUDA out of memory error
                # options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json"
                # weights_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5"
                # options_file = os.path.join("data", "elmo", "elmo_2x1024_128_2048cnn_1xhighway_options.json")
                # weights_file = os.path.join("data", "elmo", "elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5")
                token_embedding = ElmoTokenEmbedder(options_file,
                                                    weights_file,
                                                    dropout=DROPOUT,
                                                    projection_dim=PROJECT_DIM)
            elif EMBEDDING_TYPE == "_elmo_retrained":
                options_file = os.path.join("data", "bilm-tf",
                                            "elmo_retrained", "options.json")
                weights_file = os.path.join("data", "bilm-tf",
                                            "elmo_retrained", "weights.hdf5")
                token_embedding = ElmoTokenEmbedder(options_file,
                                                    weights_file,
                                                    dropout=DROPOUT,
                                                    projection_dim=PROJECT_DIM)
            elif EMBEDDING_TYPE == "_elmo_retrained_2":
                options_file = os.path.join("data", "bilm-tf",
                                            "elmo_retrained", "options_2.json")
                weights_file = os.path.join("data", "bilm-tf",
                                            "elmo_retrained", "weights_2.hdf5")
                token_embedding = ElmoTokenEmbedder(options_file,
                                                    weights_file,
                                                    dropout=DROPOUT,
                                                    projection_dim=PROJECT_DIM)
            elif EMBEDDING_TYPE == "_bert":
                print("Loading bert model")
                model = BertModel.from_pretrained('bert-base-uncased')
                token_embedding = BertEmbedder(model)
                PROJECT_DIM = 768
            else:
                print("Error: Some weird Embedding type", EMBEDDING_TYPE)
                exit()
            word_embeddings = BasicTextFieldEmbedder(
                {"tokens": token_embedding})
            HIDDEN_DIM = 200
            params = Params({
                'input_dim': PROJECT_DIM,
                'hidden_dims': HIDDEN_DIM,
                'activations': 'relu',
                'num_layers': NUM_LAYERS,
                'dropout': DROPOUT
            })
            attend_feedforward = FeedForward.from_params(params)
            similarity_function = DotProductSimilarity()
            params = Params({
                'input_dim': 2 * PROJECT_DIM,
                'hidden_dims': HIDDEN_DIM,
                'activations': 'relu',
                'num_layers': NUM_LAYERS,
                'dropout': DROPOUT
            })
            compare_feedforward = FeedForward.from_params(params)
            params = Params({
                'input_dim': 2 * HIDDEN_DIM,
                'hidden_dims': 1,
                'activations': 'linear',
                'num_layers': 1
            })
            aggregate_feedforward = FeedForward.from_params(params)
            model = DecomposableAttentionSoftmax(vocab, word_embeddings,
                                                 attend_feedforward,
                                                 similarity_function,
                                                 compare_feedforward,
                                                 aggregate_feedforward)
            print("MODEL CREATED")
            # Load model state
            with open(model_file, 'rb') as f:
                device = torch.device('cpu')
                model.load_state_dict(torch.load(f, map_location=device))
            print("MODEL LOADED!")
            if torch.cuda.is_available():
                # cuda_device = 3
                # model = model.cuda(cuda_device)
                cuda_device = -1
            else:
                cuda_device = -1

            predictor = DecomposableAttentionSoftmaxPredictor(
                model, dataset_reader=reader)
            # Read test file and get predictions
            gold = list()
            predicted_labels = list()
            probs = list()
            total_time = avg_time = 0.0
            print("Started Testing:", NEGATIVE_PERCENTAGE)
            # before working on anything just save all the questions and responses in a list
            all_data = list()
            examples_count = processed_examples_count = 0
            with open(q_file,
                      'r') as q_reader, open(r_file, "r") as r_reader, open(
                          rules_file, "r") as rule_reader:
                logger.info("Reading questions from : %s", q_file)
                logger.info("Reading responses from : %s", r_file)
                q = next(q_reader).lower().strip()
                q = mt.tokenize(q, return_str=True, escape=False)
                current_qa = (q, "")
                current_rules_and_responses = list()
                for i, (response,
                        rule) in enumerate(zip(r_reader, rule_reader)):
                    response = response.strip()
                    rule = rule.strip()
                    if response and rule:
                        # get current_answer from response
                        a = get_answer_from_response(response)
                        if not current_qa[1]:
                            current_qa = (q, a)
                        else:
                            # verify if the a is same as the one in current_qa
                            if a != current_qa[1]:
                                # print("answer phrase mismatch!!", current_qa, ":::", a, ":::", response)
                                current_qa = (current_qa[0], a)
                                # print(current_rules_and_responses)
                                # exit()
                        # Add it to the current responses
                        current_rules_and_responses.append((response, rule))
                    elif len(current_rules_and_responses) > 0:
                        # Create a instance
                        # print(current_qa)
                        # print(current_rules_and_responses)
                        # exit()
                        if rule or response:
                            print("Rule Response mismatch")
                            print(current_qa)
                            print(response)
                            print(rule)
                            print(examples_count)
                            print(i)
                            exit()

                        if examples_count < start_index:
                            examples_count += 1
                            q = next(q_reader).lower().strip()
                            q = mt.tokenize(q, return_str=True, escape=False)
                            current_qa = (q, "")
                            current_rules_and_responses = list()
                            continue
                        elif examples_count > end_index:
                            break

                        all_data.append(
                            (current_qa, current_rules_and_responses))
                        try:
                            q = next(q_reader).lower().strip()
                            q = mt.tokenize(q, return_str=True, escape=False)
                        except StopIteration:
                            # previous one was the last question
                            q = ""
                        current_qa = (q, "")
                        current_rules_and_responses = list()
                        examples_count += 1
                        # if(examples_count%100 == 0):
                        # 	print(examples_count)
                    else:
                        # Serious Bug
                        print("Serious BUG!!")
                        print(current_qa)
                        print(response)
                        print(rule)
                        print(examples_count)
                        print(i)
                        exit()
            print("{}:\tFINISHED IO".format(process_no))
            examples_count = start_index
            processed_examples_count = 0
            for current_qa, responses_and_rules in all_data:
                start_time = time.time()
                # Tokenize and preprocess the responses
                preprocessed_responses = [
                    mt.tokenize(remove_answer_brackets(response),
                                return_str=True,
                                escape=False)
                    for response, rule in responses_and_rules
                ]
                # predictions = predictor.predict(current_qa[0], [remove_answer_brackets(response) for response, rule in responses_and_rules])
                predictions = predictor.predict(current_qa[0],
                                                preprocessed_responses)
                label_probs = predictions["label_probs"]
                tuples = zip(responses_and_rules, label_probs)
                sorted_by_score = sorted(tuples,
                                         key=lambda tup: tup[1],
                                         reverse=True)
                count = 0
                all_writer.write("{}\n".format(current_qa[0]))
                all_writer.write("{}\n".format(current_qa[1]))
                for index, ((response, rule),
                            label_prob) in enumerate(sorted_by_score):
                    if index == 3:
                        break
                    all_writer.write("{}\t{}\t{}\t{}\n".format(
                        response,
                        mt.tokenize(remove_answer_brackets(response),
                                    return_str=True,
                                    escape=False), rule, label_prob))
                all_writer.write("\n")
                all_writer.flush()
                end_time = time.time()
                processed_examples_count += 1
                examples_count += 1
                total_time += end_time - start_time
                avg_time = total_time / float(processed_examples_count)
                print(
                    "{}:\ttime to write {} with {} responses is {} secs. {} avg time"
                    .format(process_no, examples_count,
                            len(responses_and_rules), end_time - start_time,
                            avg_time))
Ejemplo n.º 25
0
    def __init__(
            self,
            input_dim: int,
            hidden_dim: int,
            attention_projection_dim: int,
            feedforward_hidden_dim: int,
            num_convs: int,
            conv_kernel_size: int,
            num_attention_heads: int,
            num_semantic_labels: int,
            replace_zero_semantic_labels_with_per_head_labels: bool = True,
            use_positional_encoding: bool = True,
            dropout_prob: float = 0.1,
            layer_dropout_undecayed_prob: float = 0.1,
            attention_dropout_prob: float = 0,
            semantic_integration_mode: str = "projection",
            semantic_emb_dim: int = 0,
            use_semantic_views: bool = True,
            multi_head_attention_batch_computation: bool = False,
            use_separate_label_embeddings_for_q_and_k: bool = True) -> None:
        super().__init__()

        self.return_output_meta_is_supported = True

        check_dimensions_match(input_dim, hidden_dim, 'input_dim',
                               'hidden_dim')

        self._use_positional_encoding = use_positional_encoding
        self._replace_zero_semantic_labels_with_per_head_labels = replace_zero_semantic_labels_with_per_head_labels
        self._conv_norm_layers = torch.nn.ModuleList(
            [LayerNorm(hidden_dim) for _ in range(num_convs)])
        self._conv_layers = torch.nn.ModuleList()

        if semantic_integration_mode not in semantic_integration_mode_supported:
            raise Exception(
                "semantic_integration_mode must be in [{0}] but is `{1}`".
                format(", ".join(semantic_integration_mode_supported),
                       semantic_integration_mode))
        self._semantic_integration_mode = semantic_integration_mode

        self._use_separate_label_embeddings_for_q_and_k = use_separate_label_embeddings_for_q_and_k
        for _ in range(num_convs):
            padding = torch.nn.ConstantPad1d(
                (conv_kernel_size // 2, (conv_kernel_size - 1) // 2), 0)
            depthwise_conv = torch.nn.Conv1d(hidden_dim,
                                             hidden_dim,
                                             conv_kernel_size,
                                             groups=hidden_dim)
            pointwise_conv = torch.nn.Conv1d(hidden_dim, hidden_dim, 1)
            self._conv_layers.append(
                torch.nn.Sequential(padding, depthwise_conv, pointwise_conv,
                                    Activation.by_name("relu")()))

        self.attention_norm_layer = LayerNorm(hidden_dim)
        self.num_semantic_labels = num_semantic_labels
        self.num_attention_heads = num_attention_heads
        self.attention_layer = MultiHeadSemanticFlatConcatSelfAttention(
            num_heads=num_attention_heads,
            num_semantic_labels=num_semantic_labels,
            input_dim=hidden_dim,
            attention_dim=attention_projection_dim,
            values_dim=attention_projection_dim,
            attention_dropout_prob=attention_dropout_prob,
            semantic_integration_mode=semantic_integration_mode,
            semantic_emb_dim=semantic_emb_dim,
            use_semantic_views=use_semantic_views,
            multi_head_attention_batch_computation=
            multi_head_attention_batch_computation,
            use_separate_label_embeddings_for_q_and_k=
            use_separate_label_embeddings_for_q_and_k)

        self.feedforward_norm_layer = LayerNorm(hidden_dim)
        self.feedforward = FeedForward(
            hidden_dim,
            activations=[
                Activation.by_name('relu')(),
                Activation.by_name('linear')()
            ],
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            num_layers=2,
            dropout=dropout_prob)

        self.dropout = Dropout(dropout_prob)
        self.residual_with_layer_dropout = ResidualWithLayerDropout(
            layer_dropout_undecayed_prob)
        self._input_dim = input_dim
        self._output_dim = hidden_dim
Ejemplo n.º 26
0
    def __init__(self,
                 context_dim,
                 dec_state_dim,
                 enc_hid_dim,
                 text_field_embedder,
                 aggressive_compression: int = -1,
                 keep_threshold: float = 0.5,
                 abs_board_file="/home/cc/exComp/board.txt",
                 gather='mean',
                 dropout=0.5,
                 dropout_emb=0.2,
                 valid_tmp_path='/scratch/cluster/jcxu/exComp',
                 serilization_name: str = "",
                 vocab=None,
                 elmo: bool = False,
                 elmo_weight: str = "elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5"):
        super().__init__()
        self.use_elmo = elmo
        self.serilization_name = serilization_name
        if elmo:
            from allennlp.modules.elmo import Elmo, batch_to_ids
            from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
            self.vocab = vocab

            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json"
            weight_file = elmo_weight
            self.elmo = Elmo(options_file, weight_file, 1, dropout=dropout_emb)
            # print(self.elmo.get_output_dim())
            # self.word_emb_dim = text_field_embedder.get_output_dim()
            # self._context_layer = PytorchSeq2SeqWrapper(
            #     torch.nn.LSTM(self.word_emb_dim + self.elmo.get_output_dim(), self.word_emb_dim,
            #                   batch_first=True, bidirectional=True))
            self.word_emb_dim = self.elmo.get_output_dim()
        else:
            self._text_field_embedder = text_field_embedder
            self.word_emb_dim = text_field_embedder.get_output_dim()

        self.XEloss = torch.nn.CrossEntropyLoss(reduction='none')
        self.device = get_device()

        # self.rouge_metrics_compression = RougeStrEvaluation(name='cp', path_to_valid=valid_tmp_path,
        #                                                     writting_address=valid_tmp_path,
        #                                                     serilization_name=serilization_name)
        # self.rouge_metrics_compression_best_possible = RougeStrEvaluation(name='cp_ub', path_to_valid=valid_tmp_path,
        #                                                                   writting_address=valid_tmp_path,
        #                                                                   serilization_name=serilization_name)
        self.enc = EncCompression(inp_dim=self.word_emb_dim, hid_dim=enc_hid_dim, gather=gather)  # TODO dropout

        self.aggressive_compression = aggressive_compression
        self.relu = torch.nn.ReLU()

        self.attn = NewAttention(enc_dim=self.enc.get_output_dim(),
                                 dec_dim=self.enc.get_output_dim_unit() * 2 + dec_state_dim)

        self.concat_size = self.enc.get_output_dim() + self.enc.get_output_dim_unit() * 2 + dec_state_dim
        self.valid_tmp_path = valid_tmp_path
        if self.aggressive_compression < 0:
            self.XELoss = torch.nn.CrossEntropyLoss(reduction='none', ignore_index=-1)
            # self.nn_lin = torch.nn.Linear(self.concat_size, self.concat_size)
            # self.nn_lin2 = torch.nn.Linear(self.concat_size, 2)

            self.ff = FeedForward(input_dim=self.concat_size, num_layers=3,
                                  hidden_dims=[self.concat_size, self.concat_size, 2],
                                  activations=[torch.nn.Tanh(), torch.nn.Tanh(), lambda x: x],
                                  dropout=dropout
                                  )
            # Keep thresold

            # self.keep_thres = list(np.arange(start=0.2, stop=0.6, step=0.075))
            self.keep_thres = [0.0, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 1.0]
            self.rouge_metrics_compression_dict = OrderedDict()
            for thres in self.keep_thres:
                self.rouge_metrics_compression_dict["{}".format(thres)] = RougeStrEvaluation(name='cp_{}'.format(thres),
                                                                                             path_to_valid=valid_tmp_path,
                                                                                             writting_address=valid_tmp_path,
                                                                                             serilization_name=serilization_name)
Ejemplo n.º 27
0
class CompressDecoder(torch.nn.Module):
    def __init__(self,
                 context_dim,
                 dec_state_dim,
                 enc_hid_dim,
                 text_field_embedder,
                 aggressive_compression: int = -1,
                 keep_threshold: float = 0.5,
                 abs_board_file="/home/cc/exComp/board.txt",
                 gather='mean',
                 dropout=0.5,
                 dropout_emb=0.2,
                 valid_tmp_path='/scratch/cluster/jcxu/exComp',
                 serilization_name: str = "",
                 vocab=None,
                 elmo: bool = False,
                 elmo_weight: str = "elmo_2x1024_128_2048cnn_1xhighway_weights.hdf5"):
        super().__init__()
        self.use_elmo = elmo
        self.serilization_name = serilization_name
        if elmo:
            from allennlp.modules.elmo import Elmo, batch_to_ids
            from allennlp.modules.seq2seq_encoders import Seq2SeqEncoder, PytorchSeq2SeqWrapper
            self.vocab = vocab

            options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x1024_128_2048cnn_1xhighway/elmo_2x1024_128_2048cnn_1xhighway_options.json"
            weight_file = elmo_weight
            self.elmo = Elmo(options_file, weight_file, 1, dropout=dropout_emb)
            # print(self.elmo.get_output_dim())
            # self.word_emb_dim = text_field_embedder.get_output_dim()
            # self._context_layer = PytorchSeq2SeqWrapper(
            #     torch.nn.LSTM(self.word_emb_dim + self.elmo.get_output_dim(), self.word_emb_dim,
            #                   batch_first=True, bidirectional=True))
            self.word_emb_dim = self.elmo.get_output_dim()
        else:
            self._text_field_embedder = text_field_embedder
            self.word_emb_dim = text_field_embedder.get_output_dim()

        self.XEloss = torch.nn.CrossEntropyLoss(reduction='none')
        self.device = get_device()

        # self.rouge_metrics_compression = RougeStrEvaluation(name='cp', path_to_valid=valid_tmp_path,
        #                                                     writting_address=valid_tmp_path,
        #                                                     serilization_name=serilization_name)
        # self.rouge_metrics_compression_best_possible = RougeStrEvaluation(name='cp_ub', path_to_valid=valid_tmp_path,
        #                                                                   writting_address=valid_tmp_path,
        #                                                                   serilization_name=serilization_name)
        self.enc = EncCompression(inp_dim=self.word_emb_dim, hid_dim=enc_hid_dim, gather=gather)  # TODO dropout

        self.aggressive_compression = aggressive_compression
        self.relu = torch.nn.ReLU()

        self.attn = NewAttention(enc_dim=self.enc.get_output_dim(),
                                 dec_dim=self.enc.get_output_dim_unit() * 2 + dec_state_dim)

        self.concat_size = self.enc.get_output_dim() + self.enc.get_output_dim_unit() * 2 + dec_state_dim
        self.valid_tmp_path = valid_tmp_path
        if self.aggressive_compression < 0:
            self.XELoss = torch.nn.CrossEntropyLoss(reduction='none', ignore_index=-1)
            # self.nn_lin = torch.nn.Linear(self.concat_size, self.concat_size)
            # self.nn_lin2 = torch.nn.Linear(self.concat_size, 2)

            self.ff = FeedForward(input_dim=self.concat_size, num_layers=3,
                                  hidden_dims=[self.concat_size, self.concat_size, 2],
                                  activations=[torch.nn.Tanh(), torch.nn.Tanh(), lambda x: x],
                                  dropout=dropout
                                  )
            # Keep thresold

            # self.keep_thres = list(np.arange(start=0.2, stop=0.6, step=0.075))
            self.keep_thres = [0.0, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 1.0]
            self.rouge_metrics_compression_dict = OrderedDict()
            for thres in self.keep_thres:
                self.rouge_metrics_compression_dict["{}".format(thres)] = RougeStrEvaluation(name='cp_{}'.format(thres),
                                                                                             path_to_valid=valid_tmp_path,
                                                                                             writting_address=valid_tmp_path,
                                                                                             serilization_name=serilization_name)

    def encode_sent_and_span_paral(self, text,  # batch, max_sent, max_word
                                   text_msk,  # batch, max_sent, max_word
                                   span,  # batch, max_sent_num, max_span_num, max_word
                                   sent_idx  # batch size
                                   ):
        this_text = two_dim_index_select(text['tokens'], sent_idx)  # batch, max_word
        from allennlp.modules.elmo import batch_to_ids
        if self.use_elmo:
            this_text_list: List = this_text.tolist()
            text_str_list = []
            for sample in this_text_list:
                s = [self.vocab.get_token_from_index(x) for x in sample]
                text_str_list.append(s)
            character_ids = batch_to_ids(text_str_list).to(self.device)
            this_context = self.elmo(character_ids)
            # print(this_context['elmo_representations'][0].size())
            this_context = this_context['elmo_representations'][0]
        else:
            this_text = {'tokens': this_text}
            this_context = self._text_field_embedder(this_text)

        num_doc, max_word, inp_dim = this_context.size()
        batch_size = sent_idx.size()[0]
        assert batch_size == num_doc

        # text is the original text of the selected sentence.
        # this_context = two_dim_index_select(context, sent_idx)  # batch, max_word, hdim
        this_context_mask = two_dim_index_select(text_msk, sent_idx)  # batch, max_word
        this_span = two_dim_index_select(span, sent_idx)  # batch , nspan, max_word

        concat_rep_of_compression, \
        span_msk, original_sent_rep = self.enc.forward(word_emb=this_context,
                                                       word_emb_msk=this_context_mask,
                                                       span=this_span)
        return concat_rep_of_compression, span_msk, original_sent_rep

    def encode_sent_and_span(self, text, text_msk, span, batch_idx, sent_idx):
        context = self._text_field_embedder(text)
        num_doc, max_sent, max_word, inp_dim = context.size()
        num_doc_, max_sent_, nspan = span.size()[0:-1]
        assert num_doc == num_doc_
        assert max_sent == max_sent_
        this_context = context[batch_idx, sent_idx, :, :].unsqueeze(0)
        this_span = span[batch_idx, sent_idx, :, :].unsqueeze(0)
        this_context_mask = text_msk[batch_idx, sent_idx, :].unsqueeze(0)
        flattened_enc, attn_dist, \
        spans_rep, span_msk, score \
            = self.enc.forward(word_emb=this_context,
                               word_emb_msk=this_context_mask,
                               span=this_span)
        return flattened_enc, spans_rep, span_msk
        # 1, hid*2      1, span num, hid        1, span num

    def indep_compression_judger(self, reps):
        # t, batch_size_, max_span_num,self.concat_size
        timestep, batch_size, max_span_num, dim = reps.size()
        score = self.ff.forward(reps)
        # lin_out = self.nn_lin(reps)
        # activated = torch.sigmoid(lin_out)
        # score = self.nn_lin2(activated)
        if random.random() < 0.005:
            print("score: {}".format(score[0]))
        return score

    def get_out_dim(self):
        return self.concat_size

    def forward_parallel(self, sent_decoder_states,  # t, batch, hdim
                         sent_decoder_outputs_logit,  # t, batch
                         document_rep,  # batch, hdim
                         text,  # batch, max_sent, max_word
                         text_msk,  # batch, max_sent, max_word
                         span):  # batch, max_sent_num, max_span_num, max_word
        # Encode compression options given sent emission.
        # output scores, attn dist, ...
        t, batch_size, hdim = sent_decoder_states.size()
        t_, batch_size_ = sent_decoder_outputs_logit.size()  # invalid bits are -1
        batch, max_sent, max_span_num, max_word = span.size()
        # assert t == t_
        t = min(t, t_)
        assert batch_size == batch == batch_size_
        if self.aggressive_compression > 0:
            all_attn_dist = torch.zeros((t, batch_size, max_span_num)).to(self.device)
            all_scores = torch.ones((t, batch_size, max_span_num)).to(self.device) * -100
        else:
            all_attn_dist = None
            all_scores = None
        all_reps = torch.zeros((t, batch_size_, max_span_num, self.concat_size), device=self.device)
        for timestep in range(t):
            dec_state = sent_decoder_states[timestep]  # batch, dim
            logit = sent_decoder_outputs_logit[timestep]  # batch

            # valid_mask = (logit > 0)
            positive_logit = self.relu(logit.float()).long()  # turn -1 to 0

            span_t, span_msk_t, sent_t = self.encode_sent_and_span_paral(text=text,
                                                                         text_msk=text_msk,
                                                                         span=span,
                                                                         sent_idx=positive_logit)
            # sent_t : batch, sent_dim
            # span_t: batch, span_num, span_dim
            # span_msk_t: batch, span_num [[1,1,1,0,0,0],

            concated_rep_high_level = torch.cat([dec_state, document_rep, sent_t], dim=1)
            # batch, DIM
            if self.aggressive_compression > 0:
                attn_dist, score = self.attn.forward_one_step(enc_state=span_t,
                                                              dec_state=concated_rep_high_level,
                                                              enc_mask=span_msk_t.float())
            # attn_dist: batch, span num
            # score:    batch, span num

            # concated_rep: batch, dim ==> batch, 1, dim ==> batch, max_span_num, dim
            expanded_concated_rep = concated_rep_high_level.unsqueeze(1).expand((batch, max_span_num, -1))
            all_reps[timestep, :, :, :] = torch.cat([expanded_concated_rep, span_t], dim=2)
            if self.aggressive_compression > 0:
                all_attn_dist[timestep, :, :] = attn_dist
                all_scores[timestep, :, :] = score

        return all_attn_dist, all_scores, all_reps

    def comp_loss_inf_deletion(self,
                               decoder_outputs_logit,  # gold label!!!!
                               # span_seq_label,  # batch, max sent num
                               span_rouge,  # batch, max sent num, max compression num
                               scores,
                               comp_rouge_ratio,
                               loss_thres=1
                               ):
        """

        :param decoder_outputs_logit:
        :param span_rouge: [batch, max_sent, max_compression]
        :param scores: [timestep, batch, max_compression, 2]
        :param comp_rouge_ratio: [batch_size, max_sent, max_compression]
        :return:
        """
        tim, bat = decoder_outputs_logit.size()
        time, batch, max_span, _ = scores.size()
        batch_, sent_len, max_sp = span_rouge.size()
        assert batch_ == batch == bat
        assert time == tim
        assert max_sp == max_span
        goal_rouge_label = torch.ones((tim, batch, max_span), device=self.device, dtype=torch.long,
                                      ) * (-1)
        weights = torch.ones((tim, batch, max_span), device=self.device, dtype=torch.float)
        decoder_outputs_logit_mask = (decoder_outputs_logit >= 0).unsqueeze(2).expand(
            (time, batch, max_span)).float().view(-1)
        decoder_outputs_logit = torch.nn.functional.relu(decoder_outputs_logit).long()
        z = torch.zeros((1), device=self.device)
        for tt in range(tim):
            decoder_outputs_logit_t = decoder_outputs_logit[tt]
            out = two_dim_index_select(inp=comp_rouge_ratio, index=decoder_outputs_logit_t)
            label = torch.gt(out, loss_thres).long()

            mini_mask = torch.gt(out, 0.01).float()

            # baseline_mask = 1 - torch.lt(torch.abs(out - 0.99), 0.01).float()  # baseline will be 0

            # weight = torch.max(input=-out + 0.5, other=z) + 1
            # weights[tt] = mini_mask * baseline_mask
            weights[tt] = mini_mask
            goal_rouge_label[tt] = label
        probs = scores.view(-1, 2)
        goal_rouge_label = goal_rouge_label.view(-1)
        weights = weights.view(-1)
        loss = self.XELoss(input=probs, target=goal_rouge_label)
        loss = loss * decoder_outputs_logit_mask * weights
        return torch.mean(loss)

    def comp_loss(self, decoder_outputs_logit,  # gold label!!!!
                  scores,
                  span_seq_label,  # batch, max sent num
                  span_rouge,  # batch, max sent num, max compression num
                  comp_rouge_ratio
                  ):
        t, batch = decoder_outputs_logit.size()
        t_, batch_, comp_num = scores.size()
        b, max_sent = span_seq_label.size()
        # b_, max_sen, max_comp_, _ = span.size()
        _b, max_sent_, max_comp = span_rouge.size()
        assert batch == batch_ == b == _b
        assert max_sent_ == max_sent
        assert comp_num == max_comp
        span_seq_label = span_seq_label.long()
        total_loss = torch.zeros((t, b)).to(self.device)
        # print(decoder_outputs_logit)
        # print(span_seq_label)
        for timestep in range(t):

            # this is the sent idx
            for batch_idx in range(b):
                logit = decoder_outputs_logit[timestep][batch_idx]
                # print(logit)
                # decoder_outputs_logit should be the gold label for sentence emission.
                # if it's 0 or -1, then we skip supervision.
                if logit < 0:
                    continue
                ref_rouge_score = comp_rouge_ratio[batch_idx][logit]
                num_of_compression = ref_rouge_score.size()[0]

                _supervision_label_msk = (ref_rouge_score > 0.98).float()
                label = torch.from_numpy(np.arange(num_of_compression)).to(self.device).long()
                score_t = scores[timestep][batch_idx].unsqueeze(0)  # comp num
                score_t = score_t.expand(num_of_compression, -1)
                # label = span_seq_label[batch_idx][logit].unsqueeze(0)

                loss = self.XEloss(score_t, label)
                # print(loss)
                loss = _supervision_label_msk * loss
                total_loss[timestep][batch_idx] = torch.sum(loss)
                # sent_msk_t = two_dim_index_select(sent_mask, logit)

        return torch.mean(total_loss)

    def _dec_compression_one_step(self, predict_compression,
                                  sp_meta,
                                  word_sent: List[str], keep_threshold: List[float],
                                  context: List[List[str]] = None):

        full_set_len = set(range(len(word_sent)))
        # max_comp, _ = predict_compression.size

        preds = [full_set_len.copy() for _ in range(len(keep_threshold))]

        # Show all of the compression spans
        stat_compression = {}
        for comp_idx, comp_meta in enumerate(sp_meta):
            p = predict_compression[comp_idx][1]
            node_type, sel_idx, rouge, ratio = comp_meta
            if node_type != "BASELINE":
                selected_words = [x for idx, x in enumerate(word_sent) if idx in sel_idx]
                selected_words_str = "_".join(selected_words)
                stat_compression["{}".format(selected_words_str)] = {
                    "prob": float("{0:.2f}".format(p)),  # float("{0:.2f}".format())
                    "type": node_type,
                    "rouge": float("{0:.2f}".format(rouge)),
                    "ratio": float("{0:.2f}".format(ratio)),
                    "sel_idx": sel_idx,
                    "len": len(sel_idx)
                }
        stat_compression_order = OrderedDict(
            sorted(stat_compression.items(), key=lambda item: item[1]["prob"], reverse=True))  # Python 3
        for idx, _keep_thres in enumerate(keep_threshold):
            history: List[str] = context[idx]
            his_set = set((" ".join(history)).split(" "))
            for key, value in stat_compression_order.items():
                p = value['prob']
                sel_idx = value['sel_idx']
                sel_txt = set([word_sent[x] for x in sel_idx])
                if sel_txt - his_set == set():
                    # print("Save big!")
                    # print("Context: {}\tCandidate: {}".format(his_set, sel_txt))
                    preds[idx] = preds[idx] - set(value['sel_idx'])
                    continue
                if p > _keep_thres:
                    preds[idx] = preds[idx] - set(value['sel_idx'])

        preds = [list(x) for x in preds]
        for pred in preds:
            pred.sort()
        # Visual output
        visual_outputs: List[str] = []
        words_for_evaluation: List[str] = []
        meta_keep_ratio_word = []

        for idx, compression in enumerate(preds):
            output = [word_sent[jdx] if (jdx in compression) else '_' + word_sent[jdx] + '_' for jdx in
                      range(len(word_sent))]
            visual_outputs.append(" ".join(output))

            words = [word_sent[x] for x in compression]
            meta_keep_ratio_word.append(float(len(words) / len(word_sent)))
            # meta_kepp_ratio_span.append(1 - float(len(survery['type'][idx]) / len(sp_meta)))
            words = " ".join(words)
            words = easy_post_processing(words)
            # print(words)
            words_for_evaluation.append(words)
        d: List[List] = []
        for kep_th, vis, words_eva, keep_word_ratio in zip(keep_threshold, visual_outputs, words_for_evaluation,
                                                           meta_keep_ratio_word):
            d.append([kep_th, vis, words_eva, keep_word_ratio])
        return stat_compression_order, d

    def decode_inf_deletion(self,
                            sent_decoder_outputs_logit,  # time, batch
                            span_prob,  # time, batch, max_comp, 2
                            metadata: List,
                            span_meta: List,
                            span_rouge,  # batch, sent, max_comp
                            keep_threshold: List[float]
                            ):
        batch_size, max_sent_num, max_comp_num = span_rouge.size()
        t, batsz, max_comp, _ = span_prob.size()
        span_score = torch.nn.functional.softmax(span_prob, dim=3).cpu().numpy()
        timestep, batch = sent_decoder_outputs_logit.size()
        sent_decoder_outputs_logit = sent_decoder_outputs_logit.cpu().data

        for idx, m in enumerate(metadata):
            abs_s = [" ".join(s) for s in m["abs_list"]]
            comp_exe = CompExecutor(span_meta=span_meta[idx],
                                    sent_idxs=sent_decoder_outputs_logit[:, idx],
                                    prediction_score=span_score[:, idx, :, :],
                                    abs_str=abs_s,
                                    name=m['name'],
                                    doc_list=m["doc_list"],
                                    keep_threshold=keep_threshold,
                                    part=m['name'], ser_dir=self.valid_tmp_path,
                                    ser_fname=self.serilization_name
                                    )
            # processed_words, del_record, \
            # compressions, full_sents, \
            bag_pred_eval = comp_exe.run()
            full_sents: List[List[str]] = comp_exe.full_sents
            # assemble full sents
            full_sents = [" ".join(x) for x in full_sents]

            # visual to console
            for idx in range(len(keep_threshold)):
                self.rouge_metrics_compression_dict["{}".format(keep_threshold[idx])](pred=bag_pred_eval[idx],
                                                                                      ref=[abs_s], origin=full_sents
                                                                                      )
Ejemplo n.º 28
0
    def __init__(self, vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 num_highway_layers: int,
                 phrase_layer: Seq2SeqEncoder,
                 matrix_attention_layer: MatrixAttention,
                 modeling_layer: Seq2SeqEncoder,
                 dropout_prob: float = 0.1,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None,
                 answering_abilities: Iterable[str] = ("passage_span_extraction",
                                                       "question_span_extraction",
                                                       "addition_subtraction",
                                                       "counting")) -> None:
        super().__init__(vocab, regularizer)

        # The answering abilities to include in this model
        self.answering_abilities = list(answering_abilities)

        text_embed_dim = text_field_embedder.get_output_dim()
        encoding_in_dim = phrase_layer.get_input_dim()
        encoding_out_dim = phrase_layer.get_output_dim()
        modeling_in_dim = modeling_layer.get_input_dim()
        modeling_out_dim = modeling_layer.get_output_dim()

        self._text_field_embedder = text_field_embedder

        self._embedding_proj_layer = torch.nn.Linear(text_embed_dim, encoding_in_dim)
        self._highway_layer = Highway(encoding_in_dim, num_highway_layers)

        self._encoding_proj_layer = torch.nn.Linear(encoding_in_dim, encoding_in_dim)
        self._phrase_layer = phrase_layer

        self._matrix_attention = matrix_attention_layer

        self._modeling_proj_layer = torch.nn.Linear(encoding_out_dim * 4, modeling_in_dim)
        self._modeling_layer = modeling_layer

        self._passage_weights_predictor = torch.nn.Linear(modeling_out_dim, 1)
        self._question_weights_predictor = torch.nn.Linear(encoding_out_dim, 1)

        if len(self.answering_abilities) > 1:
            self._answer_ability_predictor = FeedForward(modeling_out_dim + encoding_out_dim,
                                                         activations=[Activation.by_name('relu')(),
                                                                      Activation.by_name('linear')()],
                                                         hidden_dims=[modeling_out_dim,
                                                                      len(self.answering_abilities)],
                                                         num_layers=2,
                                                         dropout=dropout_prob)

        if "passage_span_extraction" in self.answering_abilities:
            self._passage_span_extraction_index = self.answering_abilities.index("passage_span_extraction")
            self._passage_span_start_predictor = FeedForward(modeling_out_dim * 2,
                                                             activations=[Activation.by_name('relu')(),
                                                                          Activation.by_name('linear')()],
                                                             hidden_dims=[modeling_out_dim, 1],
                                                             num_layers=2)
            self._passage_span_end_predictor = FeedForward(modeling_out_dim * 2,
                                                           activations=[Activation.by_name('relu')(),
                                                                        Activation.by_name('linear')()],
                                                           hidden_dims=[modeling_out_dim, 1],
                                                           num_layers=2)

        if "question_span_extraction" in answering_abilities:
            self._question_span_extraction_index = self.answering_abilities.index("question_span_extraction")
            self._question_span_start_predictor = FeedForward(modeling_out_dim * 2,
                                                              activations=[Activation.by_name('relu')(),
                                                                           Activation.by_name('linear')()],
                                                              hidden_dims=[modeling_out_dim, 1],
                                                              num_layers=2)
            self._question_span_end_predictor = FeedForward(modeling_out_dim * 2,
                                                            activations=[Activation.by_name('relu')(),
                                                                         Activation.by_name('linear')()],
                                                            hidden_dims=[modeling_out_dim, 1],
                                                            num_layers=2)

        if "addition_subtraction" in answering_abilities:
            self._addition_subtraction_index = self.answering_abilities.index("addition_subtraction")
            self._number_sign_predictor = FeedForward(modeling_out_dim * 3,
                                                      activations=[Activation.by_name('relu')(),
                                                                   Activation.by_name('linear')()],
                                                      hidden_dims=[modeling_out_dim, 3],
                                                      num_layers=2)

        if "counting" in answering_abilities:
            self._counting_index = self.answering_abilities.index("counting")
            self._count_number_predictor = FeedForward(modeling_out_dim,
                                                       activations=[Activation.by_name('relu')(),
                                                                    Activation.by_name('linear')()],
                                                       hidden_dims=[modeling_out_dim, 10],
                                                       num_layers=2)

        self._drop_metrics = DropEmAndF1()
        self._dropout = torch.nn.Dropout(p=dropout_prob)

        initializer(self)
Ejemplo n.º 29
0
        initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
            Used to initialize the model parameters.
        regularizer : ``RegularizerApplicator``, optional (default=``None``)
            If provided, will be used to calculate the regularization penalty during training.
    """

    lstm = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
    inference = PytorchSeq2SeqWrapper(
        torch.nn.LSTM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))
    # esim = PytorchSeq2SeqWrapper(torch.nn.ESIM(EMBEDDING_DIM, HIDDEN_DIM, batch_first=True))

    encoder_dim = word_embeddings.get_output_dim()

    projection_feedforward = FeedForward(encoder_dim * 4, 1,
                                         inference.get_input_dim(),
                                         Activation.by_name("elu")())

    # (batch_size, model_dim * 2 * 4)
    output_feedforward = FeedForward(lstm.get_output_dim() * 4, 1, 2,
                                     Activation.by_name("elu")())

    output_logit = torch.nn.Linear(in_features=2, out_features=2)

    simfunc = BilinearSimilarity(encoder_dim, encoder_dim)

    model = ESIM(vocab=vocab,
                 text_field_embedder=word_embeddings,
                 encoder=lstm,
                 inference_encoder=inference,
                 similarity_function=simfunc,
     model = BertModel.from_pretrained('bert-base-uncased')
     token_embedding = BertEmbedder(model)
     PROJECT_DIM = 768
 else:
     print("Error: Some weird Embedding type", EMBEDDING_TYPE)
     exit()
 word_embeddings = BasicTextFieldEmbedder({"tokens": token_embedding})
 HIDDEN_DIM = 200
 params = Params({
     'input_dim': PROJECT_DIM,
     'hidden_dims': HIDDEN_DIM,
     'activations': 'relu',
     'num_layers': NUM_LAYERS,
     'dropout': DROPOUT
 })
 attend_feedforward = FeedForward.from_params(params)
 similarity_function = DotProductSimilarity()
 params = Params({
     'input_dim': 2 * PROJECT_DIM,
     'hidden_dims': HIDDEN_DIM,
     'activations': 'relu',
     'num_layers': NUM_LAYERS,
     'dropout': DROPOUT
 })
 compare_feedforward = FeedForward.from_params(params)
 params = Params({
     'input_dim': 2 * HIDDEN_DIM,
     'hidden_dims': 1,
     'activations': 'linear',
     'num_layers': 1
 })
Ejemplo n.º 31
0
    def __init__(
        self,
        vocab: Vocabulary,
        source_embedder: TextFieldEmbedder,  # just Embedding layer
        encoder1: Seq2SeqEncoder,  # user encoder
        encoder2: Seq2SeqEncoder,  # system encoder
        attention: Attention,  # decoding attention
        max_decoding_steps: int = 200,  # max timesteps of decoder
        beam_size: int = 3,  # beam search parameter
        target_namespace: str = "target_tokens",  # two separate vocabulary
        target_embedding_dim: int = None,  # target word embedding dimension
        scheduled_sampling_ratio: float = 0.,  # maybe unnecessary
        projection_dim: int = None,  #
        use_coverage: bool = False,  # coverage penalty, optional
        coverage_loss_weight: float = None,
        domain_lambda:
        float = 0.5,  # the penalty weight in final loss function, need to be tuned
        initializer: InitializerApplicator = InitializerApplicator()
    ) -> None:

        super(SPNet, self).__init__(vocab)

        # General variables
        # target_namespace: target_tokens; source_namespace: tokens;
        self._target_namespace = target_namespace
        self._start_index = self.vocab.get_token_index(START_SYMBOL,
                                                       self._target_namespace)
        self._end_index = self.vocab.get_token_index(END_SYMBOL,
                                                     self._target_namespace)
        self._source_unk_index = self.vocab.get_token_index(DEFAULT_OOV_TOKEN)
        self._target_unk_index = self.vocab.get_token_index(
            DEFAULT_OOV_TOKEN, self._target_namespace)
        self._source_vocab_size = self.vocab.get_vocab_size()
        self._target_vocab_size = self.vocab.get_vocab_size(
            self._target_namespace)

        # Encoder setting
        self._source_embedder = source_embedder
        self._encoder1 = encoder1
        self._encoder2 = encoder2
        # We assume that the 2 encoders have the same hidden state size
        self._encoder_output_dim = self._encoder1.get_output_dim()

        # Decoder setting
        self._target_embedding_dim = target_embedding_dim or source_embedder.get_output_dim(
        )
        self._num_classes = self.vocab.get_vocab_size(self._target_namespace)
        self._target_embedder = Embedding(self._num_classes,
                                          self._target_embedding_dim)
        self._decoder_input_dim = self._encoder_output_dim * 2  # default as the decoder_output_dim
        # input projection of decoder: [context_attn, target_emb] -> [decoder_input_dim]
        self._input_projection_layer = Linear(
            self._target_embedding_dim + self._encoder_output_dim * 2,
            self._decoder_input_dim)
        self._decoder_output_dim = self._encoder_output_dim * 2
        self._decoder_cell = LSTMCell(self._decoder_input_dim,
                                      self._decoder_output_dim)
        self._projection_dim = projection_dim or self._source_embedder.get_output_dim(
        )
        self._output_projection_layer = Linear(self._decoder_output_dim,
                                               self._num_classes)
        self._p_gen_layer = Linear(
            self._encoder_output_dim * 2 + self._decoder_output_dim * 2 +
            self._decoder_input_dim, 1)
        self._attention = attention

        # coverage penalty setting
        self._use_coverage = use_coverage
        self._coverage_loss_weight = coverage_loss_weight
        self._eps = 1e-45

        # Decoding strategy setting
        self._scheduled_sampling_ratio = scheduled_sampling_ratio
        self._max_decoding_steps = max_decoding_steps
        self._beam_search = BeamSearch(self._end_index,
                                       max_steps=max_decoding_steps,
                                       beam_size=beam_size)

        # multitasking of domain classification
        self._domain_penalty = domain_lambda  # penalty term = 0.5 as default
        self._classifier_params = Params({
            "input_dim": self._decoder_output_dim,
            "hidden_dims": [128, 7],
            "activations": ["relu", "linear"],
            "dropout": [0.2, 0.0],
            "num_layers": 2
        })
        self._domain_classifier = FeedForward.from_params(
            self._classifier_params)

        initializer(self)