Ejemplo n.º 1
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None:

        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder
        self.vocab = vocab
        self.label_vocab = vocab.get_index_to_token_vocabulary(
            namespace='labels')

        inf_vec = torch.Tensor([float('-inf')] * encoder.get_input_dim())
        self.class_avgs = [
            inf_vec.clone() for i in range(len(self.label_vocab))
        ]

        self.accuracy = CategoricalAccuracy()
        self.debug = False

        if self.debug:
            print("===MODEL DEBUG===")
            print(
                "Number of embeddings:",
                self.word_embeddings._token_embedders['tokens'].num_embeddings)
            # print("Token embedders:", self.word_embeddings._token_embedders)
            # print("Embedding weights", self.word_embeddings._token_embedders['tokens'].weight)
            print("vocab:", vocab)
            print("===MODEL DEBUG===")
Ejemplo n.º 2
0
    def __init__(
        self,
        encoder: Seq2SeqEncoder,
        projection: bool = True,
    ) -> None:
        super().__init__(stateful=encoder.stateful)

        self._input_dim = encoder.get_input_dim()
        self._encoder = encoder
        self._projection: Optional[torch.nn.Module] = None
        if projection:
            self._projection = TimeDistributed(  # type: ignore
                torch.nn.Linear(
                    encoder.get_output_dim(),
                    encoder.get_input_dim(),
                ))
        else:
            check_dimensions_match(
                self._encoder.get_input_dim(),
                self._encoder.get_output_dim(),
                "encoder input dim",
                "encoder output dim",
            )
Ejemplo n.º 3
0
    def __init__(
        self,
        vocab: Vocabulary,
        embedder: TextFieldEmbedder,
        encoder: Seq2SeqEncoder,
        emb_to_enc_proj: FeedForward = None,
        feedforward: FeedForward = None,
        dropout: float = 0.0,
        num_tags: int = 2,
        use_crf: bool = False,
    ):
        super().__init__(vocab)
        self.embedder = embedder
        self.emb_to_enc_proj = None
        if emb_to_enc_proj is not None:
            self.emb_to_enc_proj = emb_to_enc_proj
        self.encoder = encoder
        assert (embedder.get_output_dim() == encoder.get_input_dim()
                or emb_to_enc_proj is not None and
                emb_to_enc_proj.get_output_dim() == encoder.get_input_dim())
        self.feedforward = None
        pre_output_dim = encoder.get_output_dim()
        if feedforward is not None:
            assert feedforward.get_input_dim() == encoder.get_output_dim()
            self.feedforward = feedforward
            pre_output_dim = self.feedforward.get_output_dim()

        self.hidden2tag = torch.nn.Linear(in_features=pre_output_dim,
                                          out_features=num_tags)
        self.dropout = torch.nn.Dropout(dropout)
        self.accuracy = CategoricalAccuracy()
        self.f1 = F1Measure(1)
        self.use_crf = use_crf
        if use_crf:
            self.crf = ConditionalRandomField(
                num_tags, include_start_end_transitions=True)
Ejemplo n.º 4
0
    def __init__(
        self,
        highway_encoder: Seq2SeqEncoder,
        transform_gate_encoder: Seq2SeqEncoder,
        carry_gate_encoder: Optional[Seq2SeqEncoder] = None,
        projection: bool = True,
    ) -> None:
        stateful = highway_encoder.stateful or transform_gate_encoder.stateful
        check_dimensions_match(
            highway_encoder.get_input_dim(),
            transform_gate_encoder.get_input_dim(),
            "highway_encoder input dim",
            "transform_gate_encoder input dim",
        )
        if carry_gate_encoder is not None:
            stateful = stateful or carry_gate_encoder.stateful
            check_dimensions_match(
                highway_encoder.get_input_dim(),
                carry_gate_encoder.get_input_dim(),
                "highway_encoder input dim",
                "carry_gate_encoder input dim",
            )

        super().__init__(stateful=stateful)

        self._input_dim = highway_encoder.get_input_dim()
        self._highway_encoder = highway_encoder
        self._transform_gate_encoder = transform_gate_encoder
        self._carry_gate_encoder = carry_gate_encoder
        self._highway_projection: Optional[torch.nn.Module] = None
        self._transform_gate_projection: Optional[torch.nn.Module] = None
        self._carry_gate_projection: Optional[torch.nn.Module] = None
        if projection:
            self._highway_projection = TimeDistributed(  # type: ignore
                torch.nn.Linear(
                    highway_encoder.get_output_dim(),
                    highway_encoder.get_input_dim(),
                ))
            self._transform_gate_projection = TimeDistributed(  # type: ignore
                torch.nn.Linear(
                    transform_gate_encoder.get_output_dim(),
                    transform_gate_encoder.get_input_dim(),
                ), )
            if carry_gate_encoder is not None:
                self._carry_gate_projection = TimeDistributed(  # type: ignore
                    torch.nn.Linear(
                        carry_gate_encoder.get_output_dim(),
                        carry_gate_encoder.get_input_dim(),
                    ), )
        else:
            assert highway_encoder.get_output_dim() in (self._input_dim, 1)
            assert transform_gate_encoder.get_output_dim() in (self._input_dim,
                                                               1)
            if carry_gate_encoder is not None:
                assert carry_gate_encoder.get_output_dim() in (self._input_dim,
                                                               1)
Ejemplo n.º 5
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder, vocab: Vocabulary) -> None:

        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder
        self.vocab = vocab
        self.label_vocab = vocab.get_index_to_token_vocabulary(
            namespace='labels')

        inf_vec = torch.Tensor([float('-inf')] * encoder.get_input_dim())
        self.class_avgs = [
            inf_vec.clone() for i in range(len(self.label_vocab))
        ]

        self.accuracy = CategoricalAccuracy()
        self.f_beta = FBetaMeasure(1.0, None, [0, 1, 2])
Ejemplo n.º 6
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder, dropout_p: int,
                 vocab: Vocabulary) -> None:
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        self.embedding2input = FeedForward(
            input_dim=word_embeddings.get_output_dim(),
            num_layers=1,
            hidden_dims=encoder.get_input_dim(),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.encoder = encoder

        self.hidden2intermediate = FeedForward(
            input_dim=encoder.get_output_dim(),
            num_layers=1,
            hidden_dims=int(encoder.get_output_dim() / 2),
            activations=Activation.by_name('relu')(),
            dropout=dropout_p)

        self.intermediate2tag = nn.Linear(
            in_features=int(encoder.get_output_dim() / 2),
            out_features=vocab.get_vocab_size('labels'))

        # self.accuracy = CategoricalAccuracy()

        label_vocab = vocab.get_token_to_index_vocabulary('labels').copy()
        # print("label_vocab: ", label_vocab)
        [label_vocab.pop(x) for x in ['O', 'OR']]
        labels_for_metric = list(label_vocab.values())
        # print("labels_for_metric: ", labels_for_metric)
        self.accuracy = CustomFBetaMeasure(beta=1.0,
                                           average='micro',
                                           labels=labels_for_metric)
Ejemplo n.º 7
0
    def __init__(
        self,
        vocab: Vocabulary,
        utterance_embedder: TextFieldEmbedder,
        utterance_embedder2: TextFieldEmbedder,
        slot_embedder: TextFieldEmbedder,
        utterance_encoder: Seq2SeqEncoder,
        utterance_encoder2: Seq2SeqEncoder,
        slot_encoder: Seq2SeqEncoder,
        matrix_attention: MatrixAttention,
        modeling_layer: Seq2SeqEncoder,
        fc_ff_layer=FeedForward,
        label_namespace: str = "labels",
        s1_label_namespace: str = "s1_labels",
        feedforward: Optional[FeedForward] = None,
        label_encoding: Optional[str] = "BIO",
        include_start_end_transitions: bool = True,
        constrain_crf_decoding: bool = True,
        calculate_span_f1: bool = True,
        dropout: Optional[float] = 0.3,
        mask_lstms: bool = True,
        verbose_metrics: bool = False,
        initializer: InitializerApplicator = InitializerApplicator(),
        top_k: int = 1,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        self.label_namespace = label_namespace
        self.s1_label_namespace = s1_label_namespace

        self.utterance_embedder = utterance_embedder
        self.utterance_embedder2 = utterance_embedder2
        self.slot_embedder = slot_embedder

        self.num_tags = self.vocab.get_vocab_size(label_namespace)
        self.s1_num_tags = self.vocab.get_vocab_size(s1_label_namespace)

        self.utterance_encoder = utterance_encoder
        self.utterance_encoder2 = utterance_encoder2
        self.slot_encoder = slot_encoder

        self._matrix_attention = matrix_attention
        self._modeling_layer = modeling_layer
        self.fc_ff_layer = fc_ff_layer
        self.top_k = top_k

        self._verbose_metrics = verbose_metrics

        self._mask_lstms = mask_lstms

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
        else:
            self.dropout = None
        self._feedforward = feedforward

        if feedforward is not None:
            output_dim = feedforward.get_output_dim()
            s1_output_dim = feedforward.get_output_dim()
        else:
            output_dim = self.fc_ff_layer.get_output_dim()
            s1_output_dim = self.utterance_encoder.get_output_dim()

        self.tag_projection_layer = TimeDistributed(
            Linear(output_dim, self.num_tags))
        self.s1_tag_projection_layer = TimeDistributed(
            Linear(s1_output_dim, self.s1_num_tags))

        if constrain_crf_decoding is None:
            constrain_crf_decoding = label_encoding is not None
        if calculate_span_f1 is None:
            calculate_span_f1 = label_encoding is not None

        self.label_encoding = label_encoding
        if constrain_crf_decoding:
            if not label_encoding:
                raise ConfigurationError(
                    "constrain_crf_decoding is True, but no label_encoding was specified."
                )
            labels = self.vocab.get_index_to_token_vocabulary(label_namespace)
            s1_labels = self.vocab.get_index_to_token_vocabulary(
                s1_label_namespace)

            constraints = allowed_transitions(label_encoding, labels)
            s1_constraints = allowed_transitions(label_encoding, s1_labels)
        else:
            constraints = None
            s1_constraints = None

        self.include_start_end_transitions = include_start_end_transitions
        self.crf = ConditionalRandomField(
            self.num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        #s1
        self.s1_crf = ConditionalRandomField(
            self.s1_num_tags,
            constraints,
            include_start_end_transitions=include_start_end_transitions)

        self.calculate_span_f1 = calculate_span_f1
        if calculate_span_f1:
            if not label_encoding:
                raise ConfigurationError(
                    "calculate_span_f1 is True, but no label_encoding was specified."
                )
            self._f1_metric = SpanBasedF1Measure(vocab,
                                                 tag_namespace=label_namespace,
                                                 label_encoding=label_encoding)

        self.metrics = {
            "accuracy": CategoricalAccuracy(),
            "f1": self._f1_metric
        }

        check_dimensions_match(
            utterance_embedder.get_output_dim(),
            utterance_encoder.get_input_dim(),
            "utterance field embedding dim",
            "utterance encoder input dim",
        )
        if feedforward is not None:
            check_dimensions_match(
                modeling_layer.get_output_dim(),
                feedforward.get_input_dim(),
                "encoder output dim",
                "feedforward input dim",
            )
        initializer(self)
    def __init__(self,
                 encoder: Seq2SeqEncoder,
                 metrics_dict_seq: dict,
                 metrics_dict_reg: dict,
                 vocab: Vocabulary,
                 attention: Attention = DotProductAttention(),
                 seq_weight_loss: float=1.0,
                 reg_weight_loss: float=1.0,
                 reg_seq_weight_loss: float=1.0,
                 predict_seq: bool=True,
                 predict_avg_total_payoff: bool=True,
                 batch_size: int=10,
                 linear_dim=None,
                 dropout: float=None,
                 use_last_hidden_vec: bool=False,
                 use_transformer_encode: bool=False,
                 input_dim: int=0,
                 use_raisha_attention: bool=False,
                 raisha_num_features: int=0,
                 linear_layers_activation='relu',
                 use_raisha_LSTM: bool=False) -> None:
        super(LSTMBasedModel, self).__init__(vocab)
        self.encoder = encoder
        self.use_raisha_LSTM = use_raisha_LSTM
        if use_transformer_encode:
            encoder_output_dim = input_dim
        else:
            encoder_output_dim = encoder.get_output_dim()

        self.use_raisha_LSTM = use_raisha_LSTM

        if use_raisha_attention and raisha_num_features > 0:  # add attention layer to create raisha representation
            self.raisha_attention_layer = attention
            self.raisha_attention_vector = torch.randn((batch_size, raisha_num_features), requires_grad=True)
            # linear layer: raisha num features -> saifa num features (encoder.get_input_dim())
            self.linear_after_raisha_attention_layer = LinearLayer(input_size=raisha_num_features,
                                                                   output_size=encoder.get_input_dim(),
                                                                   activation=linear_layers_activation)
            if torch.cuda.is_available():
                self.raisha_attention_vector = self.raisha_attention_vector.cuda()
        else:
            self.raisha_attention_layer = None
            self.raisha_attention_vector = None

        if predict_seq:  # need hidden2tag layer
            if linear_dim is not None:  # add linear layer before hidden2tag
                self.linear_layer = LinearLayer(input_size=encoder_output_dim, output_size=linear_dim, dropout=dropout,
                                                activation=linear_layers_activation)
                hidden2tag_input_size = linear_dim
            else:
                self.linear_layer = None
                hidden2tag_input_size = encoder_output_dim
            self.hidden2tag = LinearLayer(input_size=hidden2tag_input_size, output_size=vocab.get_vocab_size('labels'),
                                          dropout=dropout, activation=linear_layers_activation)

        if predict_avg_total_payoff:  # need attention and regression layer
            self.attention = attention
            self.linear_after_attention_layer = LinearLayer(input_size=encoder_output_dim, output_size=batch_size,
                                                            activation=linear_layers_activation)
            self.regressor = LinearLayer(input_size=batch_size, output_size=1, activation=linear_layers_activation)
            self.attention_vector = torch.randn((batch_size, encoder_output_dim), requires_grad=True)
            if torch.cuda.is_available():
                self.attention_vector = self.attention_vector.cuda()
            self.mse_loss = nn.MSELoss()

        if predict_avg_total_payoff and predict_seq:  # for avg_turn models
            self.seq_reg_mse_loss = nn.MSELoss()

        if use_last_hidden_vec:
            if linear_dim is not None:  # add linear layer before last_hidden_reg
                self.linear_layer = LinearLayer(input_size=encoder_output_dim, output_size=linear_dim, dropout=dropout)
                self.last_hidden_reg = LinearLayer(input_size=linear_dim, output_size=1, dropout=dropout)
            else:
                self.linear_layer = None
                self.last_hidden_reg = LinearLayer(input_size=encoder_output_dim, output_size=1, dropout=dropout)

        self.metrics_dict_seq = metrics_dict_seq
        self.metrics_dict_reg = metrics_dict_reg
        self.seq_predictions = defaultdict(dict)
        self.reg_predictions = pd.DataFrame()
        self._epoch = 0
        self._first_pair = None
        self.seq_weight_loss = seq_weight_loss
        self.reg_weight_loss = reg_weight_loss
        self.reg_seq_weight_loss = reg_seq_weight_loss
        self.predict_seq = predict_seq
        self.predict_avg_total_payoff = predict_avg_total_payoff
        self.use_last_hidden_vec = use_last_hidden_vec