Ejemplo n.º 1
0
    def __init__(self, 
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        # raise ValueError(self.vocab.get_vocab_size("tokens"))
        # raise ValueError(text_field_embedder.get_output_dim())

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the title_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            encoder.get_input_dim()))

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.encoder = encoder
        self.classifier_feedforward = classifier_feedforward

        self.metrics = {
                "multilabel-f1": MultiLabelF1Measure(),
                'accuracy': BooleanAccuracy()
        }
        self.pearson_r = PearsonCorrelation()
        self.loss = nn.MultiLabelSoftMarginLoss() #BCEWithLogitsLoss() 
        
        self._threshold = 0.5

        initializer(self)
Ejemplo n.º 2
0
class WS353(Metric):
    def __init__(self, sim_file_path: str) -> None:
        self._sim_data = []
        self._sim_gold = []
        self._data_reader = KoWikiReader()
        self._pearson = PearsonCorrelation()

        with open(sim_file_path, 'r', encoding='utf-8') as f:
            f.readline()
            for line in f:
                w1, w2, score = line.strip().split('\t')
                self._sim_data.append((w1, w2))
                self._sim_gold.append(float(score))
        self._sim_gold = torch.tensor(self._sim_gold)

    @overrides
    def __call__(self,
                 vocab: Vocabulary,
                 embedder: SyllableEmbedder,
                 cuda_device: torch.device,
                 print_mode: bool = False) -> None:
        preds = []
        for i in range(len(self._sim_data)):
            w1, w2 = self._sim_data[i]
            w1 = self._data_reader.text_to_instance(source=Token(w1))['source']
            w2 = self._data_reader.text_to_instance(source=Token(w2))['source']

            w1.index(vocab)
            w2.index(vocab)

            w1 = w1.as_tensor(w1.get_padding_lengths())['syllables'].to(cuda_device)
            w2 = w2.as_tensor(w2.get_padding_lengths())['syllables'].to(cuda_device)
            e1, e2 = embedder(w1), embedder(w2)

            preds.append(F.cosine_similarity(e1, e2))

        self._pearson(torch.tensor(preds), self._sim_gold)

        if print_mode:
            print('w1\tw2\tgold\tpred')
            for ((w1, w2), gold, pred) in zip(self._sim_data, self._sim_gold, preds):
                print(f'{w1}\t{w2}\t{gold.item():.2f}\t{pred.item():.2f}')
            print(f'pscore: {self.get_metric():.3f}')

    @overrides
    def get_metric(self, reset: bool = False):
        score = self._pearson.get_metric(reset)
        if reset:
            self.reset()
        return score

    @overrides
    def reset(self):
        self._pearson.reset()
    def forward(self,
                sentence: Dict[str, torch.Tensor],
                labels: torch.Tensor = None) -> Dict[str, torch.Tensor]:
        # 接下来我们需要实现forward,这是实际计算发生的地方。数据集中的每个实例(Instance)都将(与其他实例(instances)一起批处理)输入forward。
        # 张量的输入作为forward方法的输入,并且它们的名称应该是实例(Instances)中字段(fields)的名称。
        # 在这种情况下,我们有一个句子字段(sentence field)和(可能)标签字段(labels field),所以我们将相应地构建我们的forward:

        mask = get_text_field_mask(sentence)
        # AllenNLP设计用于批量输入,但不同的输入序列具有不同的长度。
        # 因此,AllenNLP填充(padding)较短的输入,以便批处理具有统一的形状,这意味着我们的计算需要使用掩码(mask)来排除填充。
        # 这里我们只使用效用函数(utility function) get_text_field_mask,它返回与填充和未填充位置相对应的0和1的张量。

        embeddings = self.word_embeddings(sentence)
        # 我们首先将句子张量(每个句子一系列tokens ID)传递给word_embeddings模块,该模块将每个句子转换为嵌入式张量序列(a sequence of embedded tensors)。

        encoder_out = self.encoder(embeddings, mask)
        # 接下来,我们将嵌入式张量(embedded tensors)(和掩码(mask))传递给LSTM,LSTM产生一系列编码(encoded)输出。

        tag_logits = self.hidden2tag(encoder_out)
        output = {"tag_logits": tag_logits}
        # 最后,我们将每个编码输出张量(encoded output tensor)传递给前馈层(feedforward),以产生对应于各种标签(tags)的logits。

        if labels is not None:
            self.accuracy(tag_logits, labels, mask)
            output["loss"] = sequence_cross_entropy_with_logits(
                tag_logits, labels, mask)

            logits_flat = tag_logits.view(-1, tag_logits.size(-1))
            # shape : (batch * sequence_length, num_classes)
            log_probs_flat = torch.nn.functional.log_softmax(logits_flat,
                                                             dim=-1)
            # shape : (batch * max_len, 1)
            targets_flat = labels.view(-1, 1).long()

            negative_log_likelihood_flat = -torch.gather(
                log_probs_flat, dim=1, index=targets_flat)
            # shape : (batch, sequence_length)
            negative_log_likelihood = negative_log_likelihood_flat.view(
                *labels.size())
            # shape : (batch, sequence_length)
            negative_log_likelihood = negative_log_likelihood * mask.float()

            from allennlp.training.metrics import PearsonCorrelation
            self.m = PearsonCorrelation()

            self.m(predictions=negative_log_likelihood,
                   gold_labels=labels.float())

    # 和以前一样,标签是可选的,因为我们可能希望运行此模型来对未标记的数据进行预测。
    # 如果我们有标签,那么我们使用它们来更新我们的准确度指标(accuracy metric)并计算输出中的“损失(loss)”。

        return output
Ejemplo n.º 4
0
    def __init__(self, sim_file_path: str) -> None:
        self._sim_data = []
        self._sim_gold = []
        self._data_reader = KoWikiReader()
        self._pearson = PearsonCorrelation()

        with open(sim_file_path, 'r', encoding='utf-8') as f:
            f.readline()
            for line in f:
                w1, w2, score = line.strip().split('\t')
                self._sim_data.append((w1, w2))
                self._sim_gold.append(float(score))
        self._sim_gold = torch.tensor(self._sim_gold)
Ejemplo n.º 5
0
    def test_pearson_correlation_unmasked_computation(self):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions_1 = np.random.randn(batch_size, num_labels).astype("float32")
        labels_1 = 0.5 * predictions_1 + np.random.randn(batch_size, num_labels).astype("float32")

        predictions_2 = np.random.randn(1).repeat(num_labels).astype("float32")
        predictions_2 = predictions_2[np.newaxis, :].repeat(batch_size, axis=0)
        labels_2 = np.random.randn(1).repeat(num_labels).astype("float32")
        labels_2 = 0.5 * predictions_2 + labels_2[np.newaxis, :].repeat(batch_size, axis=0)

        # in most cases, the data is constructed like predictions_1, the data of such a batch different.
        # but in a few cases, for example, predictions_2, the data of such a batch is exactly the same.
        predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)]

        stride = 10

        for predictions, labels in predictions_labels:
            pearson_correlation.reset()
            for i in range(batch_size // stride):
                timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i + 1), :])
                timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :])
                expected_pearson_correlation = pearson_corrcoef(predictions[:stride * (i + 1), :].reshape(-1),
                                                                labels[:stride * (i + 1), :].reshape(-1))
                pearson_correlation(timestep_predictions, timestep_labels)
                assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
            # Test reset
            pearson_correlation.reset()
            pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels))
            assert_allclose(pearson_corrcoef(predictions.reshape(-1), labels.reshape(-1)),
                            pearson_correlation.get_metric(), rtol=1e-5)
Ejemplo n.º 6
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 seq2vec_encoder: Seq2VecEncoder,
                 seq2seq_encoder: Seq2SeqEncoder = None,
                 dropout: float = None,
                 scale: float = 1,
                 label_namespace: str = "labels",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super().__init__(vocab, regularizer)
        self._text_field_embedder = text_field_embedder

        if seq2seq_encoder:
            self._seq2seq_encoder = seq2seq_encoder
        else:
            self._seq2seq_encoder = None

        self._seq2vec_encoder = seq2vec_encoder

        self._classifier_input_dim = self._seq2vec_encoder.get_output_dim(
        ) * 2  # run encoder seperately and concat the result

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
            self._dropout_a = torch.nn.Dropout(dropout)
            self._dropout_b = torch.nn.Dropout(dropout)
        else:
            self._dropout = None

        self._label_namespace = label_namespace

        self._num_labels = 1  # because we're running a regression task
        self._scale = scale
        self.__first = True

        self._mlp_dims = [self._classifier_input_dim] * 3
        self._mlp_layers = torch.nn.ModuleList()
        for i, j in zip(self._mlp_dims, self._mlp_dims[1:]):
            self._mlp_layers.append(torch.nn.Linear(i, j))
            self._mlp_layers.append(torch.nn.ReLU())
            if dropout:
                self._mlp_layers.append(torch.nn.Dropout(dropout))
        self._classification_layer = torch.nn.Linear(
            self._classifier_input_dim, self._num_labels)
        self._metric = PearsonCorrelation()
        self._similarity = torch.nn.CosineSimilarity()
        self._loss = torch.nn.MSELoss()
        initializer(self)
Ejemplo n.º 7
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 dropout: float = 0.5,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward

        self._num_labels = 1

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        check_dimensions_match(encoder.get_output_dim() * 4,
                               projection_feedforward.get_input_dim(),
                               "encoder output dim",
                               "projection feedforward input")
        check_dimensions_match(projection_feedforward.get_output_dim(),
                               inference_encoder.get_input_dim(),
                               "proj feedforward output dim",
                               "inference lstm input dim")

        self._metric = PearsonCorrelation()
        self._loss = torch.nn.MSELoss()

        initializer(self)
Ejemplo n.º 8
0
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder, vocab: Vocabulary) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder

        hidden_dim = 128
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(in_features=encoder.get_output_dim() * 4,
                            out_features=hidden_dim), torch.nn.Tanh(),
            torch.nn.Linear(in_features=hidden_dim, out_features=hidden_dim),
            torch.nn.Tanh(),
            torch.nn.Linear(in_features=hidden_dim, out_features=1))
        self.covar = Covariance()
        self.pearson = PearsonCorrelation()
Ejemplo n.º 9
0
    def test_pearson_correlation_masked_computation(self, device: str):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions_1 = torch.randn(batch_size, num_labels, device=device)
        labels_1 = 0.5 * predictions_1 + torch.randn(
            batch_size, num_labels, device=device)

        predictions_2 = torch.randn(1, device=device).expand(num_labels)
        predictions_2 = predictions_2.unsqueeze(0).expand(batch_size, -1)
        labels_2 = torch.randn(1, device=device).expand(num_labels)
        labels_2 = 0.5 * predictions_2 + labels_2.unsqueeze(0).expand(
            batch_size, -1)

        predictions_labels = [(predictions_1, labels_1),
                              (predictions_2, labels_2)]

        # Random binary mask
        mask = torch.randint(0,
                             2,
                             size=(batch_size, num_labels),
                             device=device).bool()
        stride = 10

        for predictions, labels in predictions_labels:
            pearson_correlation.reset()
            for i in range(batch_size // stride):
                timestep_predictions = predictions[stride * i:stride *
                                                   (i + 1), :]
                timestep_labels = labels[stride * i:stride * (i + 1), :]
                timestep_mask = mask[stride * i:stride * (i + 1), :]
                expected_pearson_correlation = pearson_corrcoef(
                    predictions[:stride * (i + 1), :].view(-1).cpu().numpy(),
                    labels[:stride * (i + 1), :].view(-1).cpu().numpy(),
                    fweights=mask[:stride * (i + 1), :].view(-1).cpu().numpy(),
                )

                pearson_correlation(timestep_predictions, timestep_labels,
                                    timestep_mask)
                assert_allclose(expected_pearson_correlation,
                                pearson_correlation.get_metric())
            # Test reset
            pearson_correlation.reset()
            pearson_correlation(predictions, labels, mask)
            expected_pearson_correlation = pearson_corrcoef(
                predictions.view(-1).cpu().numpy(),
                labels.view(-1).cpu().numpy(),
                fweights=mask.view(-1).cpu().numpy(),
            )

            assert_allclose(expected_pearson_correlation,
                            pearson_correlation.get_metric())
Ejemplo n.º 10
0
    def test_pearson_correlation_unmasked_computation(self):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions = np.random.randn(batch_size, num_labels).astype("float32")
        labels = 0.5 * predictions + np.random.randn(
            batch_size, num_labels).astype("float32")

        stride = 10

        for i in range(batch_size // stride):
            timestep_predictions = torch.FloatTensor(
                predictions[stride * i:stride * (i + 1), :])
            timestep_labels = torch.FloatTensor(labels[stride * i:stride *
                                                       (i + 1), :])
            expected_pearson_correlation = np.corrcoef(
                predictions[:stride * (i + 1), :].reshape(-1),
                labels[:stride * (i + 1), :].reshape(-1))[0, 1]
            pearson_correlation(timestep_predictions, timestep_labels)
            assert_allclose(expected_pearson_correlation,
                            pearson_correlation.get_metric(),
                            rtol=1e-5)
        # Test reset
        pearson_correlation.reset()
        pearson_correlation(torch.FloatTensor(predictions),
                            torch.FloatTensor(labels))
        assert_allclose(np.corrcoef(predictions.reshape(-1),
                                    labels.reshape(-1))[0, 1],
                        pearson_correlation.get_metric(),
                        rtol=1e-5)
Ejemplo n.º 11
0
class RuseModel(Model):
    def __init__(self, word_embeddings: TextFieldEmbedder,
                 encoder: Seq2VecEncoder, vocab: Vocabulary) -> None:
        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder

        hidden_dim = 128
        self.mlp = torch.nn.Sequential(
            torch.nn.Linear(in_features=encoder.get_output_dim() * 4,
                            out_features=hidden_dim), torch.nn.Tanh(),
            torch.nn.Linear(in_features=hidden_dim, out_features=hidden_dim),
            torch.nn.Tanh(),
            torch.nn.Linear(in_features=hidden_dim, out_features=1))
        self.covar = Covariance()
        self.pearson = PearsonCorrelation()

    def forward(self, mt_sent: Dict[str, torch.Tensor],
                ref_sent: Dict[str, torch.Tensor], human_score: np.ndarray,
                origin: str) -> Dict[str, torch.Tensor]:
        mt_mask = get_text_field_mask(mt_sent)
        ref_mask = get_text_field_mask(ref_sent)

        mt_embeddings = self.word_embeddings(mt_sent)
        ref_embeddings = self.word_embeddings(ref_sent)

        mt_encoder_out = self.encoder(mt_embeddings, mt_mask)
        ref_encoder_out = self.encoder(ref_embeddings, ref_mask)

        input = torch.cat((mt_encoder_out, ref_encoder_out,
                           torch.mul(mt_encoder_out, ref_encoder_out),
                           torch.abs(mt_encoder_out - ref_encoder_out)), 1)
        reg = self.mlp(input)
        output = {"reg": reg}

        if human_score is not None:
            # run metric calculation
            self.covar(reg, human_score)
            self.pearson(reg, human_score)

            # calculate mean squared error
            delta = reg - human_score
            output["loss"] = torch.mul(delta, delta).sum()

        return output

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        return {
            "covar": self.covar.get_metric(reset),
            "pearson": self.pearson.get_metric(reset)
        }
Ejemplo n.º 12
0
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 seq2vec_encoder: Seq2VecEncoder,
                 seq2seq_encoder: Seq2SeqEncoder = None,
                 dropout: float = None,
                 scale: float = 1,
                 label_namespace: str = "labels",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super().__init__(vocab, regularizer)
        self._text_field_embedder = text_field_embedder

        if seq2seq_encoder:
            self._seq2seq_encoder = seq2seq_encoder
        else:
            self._seq2seq_encoder = None

        self._seq2vec_encoder = seq2vec_encoder
        self._classifier_input_dim = self._seq2vec_encoder.get_output_dim()

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
        else:
            self._dropout = None

        self._label_namespace = label_namespace

        self._num_labels = 1  # because we're running a regression task
        self._scale = scale

        self._classification_layer = torch.nn.Linear(
            self._classifier_input_dim, self._num_labels)
        self._metric = PearsonCorrelation()
        self._loss = torch.nn.MSELoss()
        initializer(self)
Ejemplo n.º 13
0
    def test_pearson_correlation_unmasked_computation(self, device: str):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions_1 = torch.randn(batch_size, num_labels, device=device)
        labels_1 = 0.5 * predictions_1 + torch.randn(
            batch_size, num_labels, device=device)

        predictions_2 = torch.randn(1, device=device).expand(num_labels)
        predictions_2 = predictions_2.unsqueeze(0).expand(batch_size, -1)
        labels_2 = torch.randn(1, device=device).expand(num_labels)
        labels_2 = 0.5 * predictions_2 + labels_2.unsqueeze(0).expand(
            batch_size, -1)

        # in most cases, the data is constructed like predictions_1, the data of such a batch different.
        # but in a few cases, for example, predictions_2, the data of such a batch is exactly the same.
        predictions_labels = [(predictions_1, labels_1),
                              (predictions_2, labels_2)]

        stride = 10

        for predictions, labels in predictions_labels:
            pearson_correlation.reset()
            for i in range(batch_size // stride):
                timestep_predictions = predictions[stride * i:stride *
                                                   (i + 1), :]
                timestep_labels = labels[stride * i:stride * (i + 1), :]
                expected_pearson_correlation = pearson_corrcoef(
                    predictions[:stride * (i + 1), :].view(-1).cpu().numpy(),
                    labels[:stride * (i + 1), :].view(-1).cpu().numpy(),
                )
                pearson_correlation(timestep_predictions, timestep_labels)
                assert_allclose(expected_pearson_correlation,
                                pearson_correlation.get_metric())
            # Test reset
            pearson_correlation.reset()
            pearson_correlation(predictions, labels)
            assert_allclose(
                pearson_corrcoef(
                    predictions.view(-1).cpu().numpy(),
                    labels.view(-1).cpu().numpy()),
                pearson_correlation.get_metric(),
            )
Ejemplo n.º 14
0
    def test_pearson_correlation_masked_computation(self):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions_1 = np.random.randn(batch_size, num_labels).astype("float32")
        labels_1 = 0.5 * predictions_1 + np.random.randn(batch_size, num_labels).astype("float32")

        predictions_2 = np.random.randn(1).repeat(num_labels).astype("float32")
        predictions_2 = predictions_2[np.newaxis, :].repeat(batch_size, axis=0)
        labels_2 = np.random.randn(1).repeat(num_labels).astype("float32")
        labels_2 = 0.5 * predictions_2 + labels_2[np.newaxis, :].repeat(batch_size, axis=0)

        predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)]

        # Random binary mask
        mask = np.random.randint(0, 2, size=(batch_size, num_labels)).astype("float32")
        stride = 10

        for predictions, labels in predictions_labels:
            pearson_correlation.reset()
            for i in range(batch_size // stride):
                timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i + 1), :])
                timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :])
                timestep_mask = torch.FloatTensor(mask[stride * i:stride * (i + 1), :])
                expected_pearson_correlation = pearson_corrcoef(predictions[:stride * (i + 1), :].reshape(-1),
                                                                labels[:stride * (i + 1), :].reshape(-1),
                                                                fweights=mask[:stride * (i + 1), :].reshape(-1))

                pearson_correlation(timestep_predictions, timestep_labels, timestep_mask)
                assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
            # Test reset
            pearson_correlation.reset()
            pearson_correlation(torch.FloatTensor(predictions),
                                torch.FloatTensor(labels), torch.FloatTensor(mask))
            expected_pearson_correlation = pearson_corrcoef(predictions.reshape(-1), labels.reshape(-1),
                                                            fweights=mask.reshape(-1))

            assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
Ejemplo n.º 15
0
    def test_distributed_pearson(self):
        batch_size = 10
        num_labels = 10
        predictions = torch.randn(batch_size, num_labels)
        labels = 0.5 * predictions + torch.randn(batch_size, num_labels)

        expected_pearson_correlation = pearson_corrcoef(
            predictions.view(-1).cpu().numpy(),
            labels.view(-1).cpu().numpy(),
        )
        predictions = [predictions[:5], predictions[5:]]
        labels = [labels[:5], labels[5:]]
        metric_kwargs = {"predictions": predictions, "gold_labels": labels}
        run_distributed_test(
            [-1, -1],
            global_distributed_metric,
            PearsonCorrelation(),
            metric_kwargs,
            expected_pearson_correlation,
            exact=(0.0001, 1e-01),
        )
Ejemplo n.º 16
0
    def test_pearson_correlation_masked_computation(self):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions = np.random.randn(batch_size, num_labels).astype("float32")
        labels = 0.5 * predictions + np.random.randn(
            batch_size, num_labels).astype("float32")
        # Random binary mask
        mask = np.random.randint(0, 2, size=(batch_size,
                                             num_labels)).astype("float32")
        stride = 10

        for i in range(batch_size // stride):
            timestep_predictions = torch.FloatTensor(
                predictions[stride * i:stride * (i + 1), :])
            timestep_labels = torch.FloatTensor(labels[stride * i:stride *
                                                       (i + 1), :])
            timestep_mask = torch.FloatTensor(mask[stride * i:stride *
                                                   (i + 1), :])
            covariance_matrices = np.cov(
                predictions[:stride * (i + 1), :].reshape(-1),
                labels[:stride * (i + 1), :].reshape(-1),
                fweights=mask[:stride * (i + 1), :].reshape(-1))
            expected_pearson_correlation = covariance_matrices[0, 1] / np.sqrt(
                covariance_matrices[0, 0] * covariance_matrices[1, 1])
            pearson_correlation(timestep_predictions, timestep_labels,
                                timestep_mask)
            assert_allclose(expected_pearson_correlation,
                            pearson_correlation.get_metric(),
                            rtol=1e-5)
        # Test reset
        pearson_correlation.reset()
        pearson_correlation(torch.FloatTensor(predictions),
                            torch.FloatTensor(labels), torch.FloatTensor(mask))
        covariance_matrices = np.cov(predictions.reshape(-1),
                                     labels.reshape(-1),
                                     fweights=mask.reshape(-1))
        expected_pearson_correlation = covariance_matrices[0, 1] / np.sqrt(
            covariance_matrices[0, 0] * covariance_matrices[1, 1])
        assert_allclose(expected_pearson_correlation,
                        pearson_correlation.get_metric(),
                        rtol=1e-5)
Ejemplo n.º 17
0
class AttMT(Model):
    """
    This ``Model`` implements the   baseline model with attention

    Parameters
    ----------
    vocab : ``Vocabulary``
    text_field_embedder : ``TextFieldEmbedder``
        Used to embed the ``mtref`` and ``mtsys`` ``TextFields`` we get as input to the
        model.
    encoder : ``Seq2SeqEncoder``
        Used to encode the mtref and mtsys.
    similarity_function : ``SimilarityFunction``
        This is the similarity function used when computing the similarity matrix between encoded
        words in the mtref and words in the mtsys.
    output_feedforward : ``FeedForward``
        Used to prepare the concatenated mtref and mtsys for prediction.
    output_logit: FeedForward,
	legacy input that does nothing
    dropout : ``float``, optional (default=0.5)
        Dropout percentage to use.
    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        Used to initialize the model parameters.
    regularizer : ``RegularizerApplicator``, optional (default=``None``)
        If provided, will be used to calculate the regularization penalty during training.
    """
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 dropout: float = 0.5,
                 aggr_type: str = "both",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = LegacyMatrixAttention(similarity_function)

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward
        self._output_logit = output_logit
        self._num_labels = 1

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        # check_dimensions_match(encoder.get_output_dim() * 4, projection_feedforward.get_input_dim(),
        #                        "encoder output dim", "projection feedforward input")
        # check_dimensions_match(projection_feedforward.get_output_dim(), inference_encoder.get_input_dim(),
        #                        "proj feedforward output dim", "inference lstm input dim")
        self._aggr_type = aggr_type
        self._metric = PearsonCorrelation()
        self._loss = torch.nn.MSELoss()

        initializer(self)

    def forward(
        self,  # type: ignore
        ref: Dict[str, torch.LongTensor],
        mt: Dict[str, torch.LongTensor],
        score: torch.IntTensor = None,
        # pylint:disable=unused-argument
    ) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        ref : Dict[str, torch.LongTensor]
            From a ``TextField``
        mt : Dict[str, torch.LongTensor]
            From a ``TextField``
        score : torch.IntTensor, optional (default = None)
            From a ``NumericField``
        Returns
        -------
        An output dictionary consisting of:

        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """
        embedded_mtref = self._text_field_embedder(ref)
        embedded_mtsys = self._text_field_embedder(mt)
        mtref_mask = get_text_field_mask(ref).float()
        mtsys_mask = get_text_field_mask(mt).float()

        # apply dropout for LSTM
        if self.rnn_input_dropout:
            embedded_mtref = self.rnn_input_dropout(embedded_mtref)
            embedded_mtsys = self.rnn_input_dropout(embedded_mtsys)

        # encode mtref and mtsys

        # Shape: (batch_size, mtref/sys_length, modeldim*bi? =600)
        encoded_mtref = self._encoder(embedded_mtref, mtref_mask)
        encoded_mtsys = self._encoder(embedded_mtsys, mtsys_mask)

        # Shape: (batch_size, mtref_length, mtsys_length)
        similarity_matrix = self._matrix_attention(encoded_mtref,
                                                   encoded_mtsys)

        # Shape: (batch_size, mtref_length, mtsys_length)
        p2h_attention = masked_softmax(similarity_matrix, mtsys_mask)
        # Shape: (batch_size, mtref_length, modeldim*2)
        attended_mtref = weighted_sum(encoded_mtsys, p2h_attention)

        # Shape: (batch_size, mtsys_length, mtref_length)
        h2p_attention = masked_softmax(
            similarity_matrix.transpose(1, 2).contiguous(), mtref_mask)
        # Shape: (batch_size, mtsys_length, modeldim*2)
        attended_mtsys = weighted_sum(encoded_mtref, h2p_attention)

        # The pooling layer -- max and avg pooling.
        # (batch_size, model_dim *2 = 600)
        v_a_max, _ = replace_masked_values(attended_mtref,
                                           mtref_mask.unsqueeze(-1),
                                           -1e7).max(dim=1)
        # (batch_size, model_dim *2  = 600)
        v_b_max, _ = replace_masked_values(attended_mtsys,
                                           mtsys_mask.unsqueeze(-1),
                                           -1e7).max(dim=1)

        v_a_avg = torch.sum(attended_mtref * mtref_mask.unsqueeze(-1),
                            dim=1) / torch.sum(mtref_mask, 1, keepdim=True)
        v_b_avg = torch.sum(attended_mtsys * mtsys_mask.unsqueeze(-1),
                            dim=1) / torch.sum(mtsys_mask, 1, keepdim=True)

        if self._aggr_type == 'both':
            # Now concat
            # (batch_size, model_dim *2* 2 * 4)
            v_all = torch.cat([
                v_a_avg, v_b_avg, v_a_avg - v_b_avg, v_a_avg * v_b_avg,
                v_a_max, v_b_max, v_a_max - v_b_max, v_a_max * v_b_max
            ],
                              dim=1)
        elif self._aggr_type == 'max':
            # (batch_size, model_dim *2* 4)

            v_all = torch.cat(
                [v_a_max, v_b_max, v_a_max - v_b_max, v_a_max * v_b_max],
                dim=1)

        elif self._aggr_type == 'avg':

            v_all = torch.cat(
                [v_a_avg, v_b_avg, v_a_avg - v_b_avg, v_a_avg * v_b_avg],
                dim=1)

        # the final MLP -- apply dropout to input, and MLP applies to output & hidden
        if self.dropout:
            v_all = self.dropout(v_all)

        pred = self._output_feedforward(v_all)

        output_dict = {'pred': pred}
        if score is not None:
            loss = self._loss(pred, score)
            self._metric(pred, score)
            output_dict["loss"] = loss

        return output_dict

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        return {'pearson': self._metric.get_metric(reset)}
Ejemplo n.º 18
0
    def __init__(self,
                 vocab: Vocabulary,
                 token_representation_dim: int,
                 encoder: Optional[Seq2SeqEncoder] = None,
                 decoder: Optional[Union[FeedForward, str]] = None,
                 contextualizer: Optional[Contextualizer] = None,
                 pretrained_file: Optional[str] = None,
                 transfer_contextualizer_from_pretrained_file: bool = False,
                 transfer_encoder_from_pretrained_file: bool = False,
                 freeze_encoder: bool = False,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super(SelectiveRegressor, self).__init__(vocab, regularizer)

        self._token_representation_dim = token_representation_dim
        self._contextualizer = contextualizer
        if encoder is None:
            encoder = PassThroughEncoder(
                input_dim=self._token_representation_dim)
        self._encoder = encoder

        # Load the contextualizer and encoder weights from the
        # pretrained_file if applicable
        if pretrained_file:
            archive = None
            if self._contextualizer and transfer_contextualizer_from_pretrained_file:
                logger.info("Attempting to load contextualizer weights from "
                            "pretrained_file at {}".format(pretrained_file))
                archive = load_archive(cached_path(pretrained_file))
                contextualizer_state = archive.model._contextualizer.state_dict(
                )
                contextualizer_layer_num = self._contextualizer._layer_num
                self._contextualizer.load_state_dict(contextualizer_state)
                if contextualizer_layer_num is not None:
                    logger.info("Setting layer num to {}".format(
                        contextualizer_layer_num))
                    self._contextualizer.set_layer_num(
                        contextualizer_layer_num)
                else:
                    self._contextualizer.reset_layer_num()
                logger.info("Successfully loaded contextualizer weights!")
            if transfer_encoder_from_pretrained_file:
                logger.info("Attempting to load encoder weights from "
                            "pretrained_file at {}".format(pretrained_file))
                if archive is None:
                    archive = load_archive(cached_path(pretrained_file))
                encoder_state = archive.model._encoder.state_dict()
                self._encoder.load_state_dict(encoder_state)
                logger.info("Successfully loaded encoder weights!")

        self._freeze_encoder = freeze_encoder
        for parameter in self._encoder.parameters():
            # If freeze is true, requires_grad should be false and vice versa.
            parameter.requires_grad_(not self._freeze_encoder)

        if decoder is None or decoder == "linear":
            # Create the default decoder (logistic regression) if it is not provided.
            decoder = FeedForward.from_params(
                Params({
                    "input_dim": self._encoder.get_output_dim(),
                    "num_layers": 1,
                    "hidden_dims": 1,
                    "activations": "linear"
                }))
            logger.info("No decoder provided to model, using default "
                        "decoder: {}".format(decoder))
        elif decoder == "mlp":
            # Create the MLP decoder
            decoder = FeedForward.from_params(
                Params({
                    "input_dim": self._encoder.get_output_dim(),
                    "num_layers": 2,
                    "hidden_dims": [1024, 1],
                    "activations": ["relu", "linear"]
                }))
            logger.info("Using MLP decoder: {}".format(decoder))
        self._decoder = decoder

        check_dimensions_match(self._token_representation_dim,
                               self._encoder.get_input_dim(),
                               "token representation dim", "encoder input dim")
        check_dimensions_match(self._encoder.get_output_dim(),
                               self._decoder.get_input_dim(),
                               "encoder output dim", "decoder input dim")
        check_dimensions_match(self._decoder.get_output_dim(), 1,
                               "decoder output dim",
                               "1, since we're predicting a real value")
        # SmoothL1Loss as described in "Neural Models of Factuality" (NAACL 2018)
        self.loss = torch.nn.SmoothL1Loss(reduction="none")
        self.metrics = {
            "mae": MeanAbsoluteError(),
            "pearson_r": PearsonCorrelation()
        }

        # Whether to run in error analysis mode or not, see commands.error_analysis
        self.error_analysis = False
        logger.info("Applying initializer...")
        initializer(self)
Ejemplo n.º 19
0
class BERTMoji(Model):
    """
    This ``Model`` performs text classification for an academic paper.  We assume we're given a
    title and an abstract, and we predict some output label.

    The basic model structure: we'll embed the title and the abstract, and encode each of them with
    separate Seq2VecEncoders, getting a single vector representing the content of each.  We'll then
    concatenate those two vectors, and pass the result through a feedforward network, the output of
    which we'll use as our scores for each label.

    Parameters
    ----------
    vocab : ``Vocabulary``, required
        A Vocabulary, required in order to compute sizes for input/output projections.
    text_field_embedder : ``TextFieldEmbedder``, required
        Used to embed the ``tokens`` ``TextField`` we get as input to the model.
    encoder : ``Seq2VecEncoder``
        The encoder that we will use to convert the text to a vector.
    classifier_feedforward : ``FeedForward``
    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        Used to initialize the model parameters.
    regularizer : ``RegularizerApplicator``, optional (default=``None``)
        If provided, will be used to calculate the regularization penalty during training.
    """
    def __init__(self, 
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2VecEncoder,
                 classifier_feedforward: FeedForward,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        # raise ValueError(self.vocab.get_vocab_size("tokens"))
        # raise ValueError(text_field_embedder.get_output_dim())

        if text_field_embedder.get_output_dim() != encoder.get_input_dim():
            raise ConfigurationError("The output dimension of the text_field_embedder must match the "
                                     "input dimension of the title_encoder. Found {} and {}, "
                                     "respectively.".format(text_field_embedder.get_output_dim(),
                                                            encoder.get_input_dim()))

        self.text_field_embedder = text_field_embedder
        self.num_classes = self.vocab.get_vocab_size("labels")
        self.encoder = encoder
        self.classifier_feedforward = classifier_feedforward

        self.metrics = {
                "multilabel-f1": MultiLabelF1Measure(),
                'accuracy': BooleanAccuracy()
        }
        self.pearson_r = PearsonCorrelation()
        self.loss = nn.MultiLabelSoftMarginLoss() #BCEWithLogitsLoss() 
        
        self._threshold = 0.5

        initializer(self)

    @overrides
    def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                label: torch.LongTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, Variable], required
            The output of ``TextField.as_array()``.
        label : Variable, optional (default = None)
            A variable representing the label for each instance in the batch.

        Returns
        -------
        An output dictionary consisting of:
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_classes)`` representing a distribution over the
            label classes for each instance.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """
        # print(tokens)
        embedded = self.text_field_embedder(tokens)
        mask = util.get_text_field_mask(tokens)
        encoded = self.encoder(embedded, mask)

        logits = self.classifier_feedforward(encoded)
        output_dict = {'logits': torch.sigmoid(logits)}
        
        if label is None: # inference
            decoded = self.decode(output_dict)
            output_dict['decoded'] = decoded
        else:
            loss = self.loss(logits, label.float())
            loss = loss + (1-rsq_loss(logits, label.float()))
            
            self.pearson_r(logits, label.float())
            preds = (logits > self._threshold).long()
            for metric in self.metrics.values():
                metric(preds, label)
            output_dict["loss"] = loss

        return output_dict

    # @overrides
    def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
        """
        Does a simple argmax over the class probabilities, converts indices to string labels, and
        adds a ``"label"`` key to the dictionary with the result.
        """
        def get_scores(row):
            scores = ((self.vocab.get_token_from_index(i, namespace="labels"), s) for (i, s) in enumerate(row))
            return sorted(scores, key=lambda x: x[1], reverse=True)
        
        class_probabilities = output_dict['logits']
        predictions = class_probabilities.cpu().data.numpy()
        
        output_dict['scores'] = list(map(get_scores, predictions))
        return output_dict

    @overrides
    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        def unpack(m):
            if isinstance(m, tuple):
                return m[-1]
            return m
        metrics = {metric_name: unpack(metric.get_metric(reset)) for metric_name, metric in self.metrics.items()}
        metrics['pearson_r'] = self.pearson_r.get_metric(reset)
        return metrics
Ejemplo n.º 20
0
class ESIM(Model):
    """
    This ``Model`` implements the ESIM sequence model described in `"Enhanced LSTM for Natural Language Inference"
    <https://www.semanticscholar.org/paper/Enhanced-LSTM-for-Natural-Language-Inference-Chen-Zhu/83e7654d545fbbaaf2328df365a781fb67b841b4>`_
    by Chen et al., 2017.
    This code was taken from the AllenNLP repo, and modified for predicting (continuous) scores for MT system outputs

    Parameters
    ----------
    vocab : ``Vocabulary``
    text_field_embedder : ``TextFieldEmbedder``
        Used to embed the ``mtref`` and ``mtsys`` ``TextFields`` we get as input to the
        model.
    encoder : ``Seq2SeqEncoder``
        Used to encode the mtref and mtsys.
    similarity_function : ``SimilarityFunction``
        This is the similarity function used when computing the similarity matrix between encoded
        words in the mtref and words in the mtsys.
    projection_feedforward : ``FeedForward``
        The feedforward network used to project down the encoded and enhanced mtref and mtsys.
    inference_encoder : ``Seq2SeqEncoder``
        Used to encode the projected mtref and mtsys for prediction.
    output_feedforward : ``FeedForward``
        Used to prepare the concatenated mtref and mtsys for prediction.
    output_logit: FeedForward,
	legacy input that does nothing
    dropout : ``float``, optional (default=0.5)
        Dropout percentage to use.
    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        Used to initialize the model parameters.
    regularizer : ``RegularizerApplicator``, optional (default=``None``)
        If provided, will be used to calculate the regularization penalty during training.
    """
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 encoder: Seq2SeqEncoder,
                 similarity_function: SimilarityFunction,
                 projection_feedforward: FeedForward,
                 inference_encoder: Seq2SeqEncoder,
                 output_feedforward: FeedForward,
                 output_logit: FeedForward,
                 dropout: float = 0.5,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self._text_field_embedder = text_field_embedder
        self._encoder = encoder

        self._matrix_attention = LegacyMatrixAttention(similarity_function)
        self._projection_feedforward = projection_feedforward

        self._inference_encoder = inference_encoder

        if dropout:
            self.dropout = torch.nn.Dropout(dropout)
            self.rnn_input_dropout = InputVariationalDropout(dropout)
        else:
            self.dropout = None
            self.rnn_input_dropout = None

        self._output_feedforward = output_feedforward

        self._num_labels = 1

        check_dimensions_match(text_field_embedder.get_output_dim(),
                               encoder.get_input_dim(),
                               "text field embedding dim", "encoder input dim")
        check_dimensions_match(encoder.get_output_dim() * 4,
                               projection_feedforward.get_input_dim(),
                               "encoder output dim",
                               "projection feedforward input")
        check_dimensions_match(projection_feedforward.get_output_dim(),
                               inference_encoder.get_input_dim(),
                               "proj feedforward output dim",
                               "inference lstm input dim")

        self._metric = PearsonCorrelation()
        self._loss = torch.nn.MSELoss()

        initializer(self)

    def forward(
        self,  # type: ignore
        ref: Dict[str, torch.LongTensor],
        mt: Dict[str, torch.LongTensor],
        score: torch.IntTensor = None  # pylint:disable=unused-argument
    ) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        ref : Dict[str, torch.LongTensor]
            From a ``TextField``
        mt : Dict[str, torch.LongTensor]
            From a ``TextField``
        score : torch.IntTensor, optional (default = None)
            From a ``NumericField`` 

        Returns
        -------
        An output dictionary consisting of:

        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """
        # print(worker)
        embedded_mtref = self._text_field_embedder(ref)
        embedded_mtsys = self._text_field_embedder(mt)
        mtref_mask = get_text_field_mask(ref).float()
        mtsys_mask = get_text_field_mask(mt).float()

        # apply dropout for LSTM
        if self.rnn_input_dropout:
            embedded_mtref = self.rnn_input_dropout(embedded_mtref)
            embedded_mtsys = self.rnn_input_dropout(embedded_mtsys)

        # encode mtref and mtsys

        # Shape: (batch_size, mtref/sys_length, modeldim*2 =600)
        encoded_mtref = self._encoder(embedded_mtref, mtref_mask)
        encoded_mtsys = self._encoder(embedded_mtsys, mtsys_mask)

        # Shape: (batch_size, mtref_length, mtsys_length)
        similarity_matrix = self._matrix_attention(encoded_mtref,
                                                   encoded_mtsys)

        # Shape: (batch_size, mtref_length, mtsys_length)
        p2h_attention = masked_softmax(similarity_matrix, mtsys_mask)
        # Shape: (batch_size, mtref_length, embedding_dim)
        attended_mtsys = weighted_sum(encoded_mtsys, p2h_attention)

        # Shape: (batch_size, mtsys_length, mtref_length)
        h2p_attention = masked_softmax(
            similarity_matrix.transpose(1, 2).contiguous(), mtref_mask)
        # Shape: (batch_size, mtsys_length, embedding_dim)
        attended_mtref = weighted_sum(encoded_mtref, h2p_attention)

        # the "enhancement" layer
        # Shape: (batch_size, mtref/sys_length, modeldim *2 * 4=2400)
        mtref_enhanced = torch.cat([
            encoded_mtref, attended_mtsys, encoded_mtref - attended_mtsys,
            encoded_mtref * attended_mtsys
        ],
                                   dim=-1)
        mtsys_enhanced = torch.cat([
            encoded_mtsys, attended_mtref, encoded_mtsys - attended_mtref,
            encoded_mtsys * attended_mtref
        ],
                                   dim=-1)

        # The projection layer down to the model dimension.  Dropout is not applied before
        # projection.

        # Shape: (batch_size, mtref/sys_length, modeldim =300)
        projected_enhanced_mtref = self._projection_feedforward(mtref_enhanced)
        projected_enhanced_mtsys = self._projection_feedforward(mtsys_enhanced)

        # Run the inference layer
        if self.rnn_input_dropout:
            projected_enhanced_mtref = self.rnn_input_dropout(
                projected_enhanced_mtref)
            projected_enhanced_mtsys = self.rnn_input_dropout(
                projected_enhanced_mtsys)
        # Shape: (batch_size, mtref/sys_length, modeldim*2 =600)
        v_ai = self._inference_encoder(projected_enhanced_mtref, mtref_mask)
        v_bi = self._inference_encoder(projected_enhanced_mtsys, mtsys_mask)

        # The pooling layer -- max and avg pooling.
        # (batch_size, model_dim*2 = 600)
        v_a_max, _ = replace_masked_values(v_ai, mtref_mask.unsqueeze(-1),
                                           -1e7).max(dim=1)
        # (batch_size, model_dim * 2 = 600)
        v_b_max, _ = replace_masked_values(v_bi, mtsys_mask.unsqueeze(-1),
                                           -1e7).max(dim=1)
        # (batch_size, model_dim * 2 = 600)
        v_a_avg = torch.sum(v_ai * mtref_mask.unsqueeze(-1),
                            dim=1) / torch.sum(mtref_mask, 1, keepdim=True)
        # (batch_size, model_dim * 2 = 600)
        v_b_avg = torch.sum(v_bi * mtsys_mask.unsqueeze(-1),
                            dim=1) / torch.sum(mtsys_mask, 1, keepdim=True)

        # Now concat
        # (batch_size, model_dim * 2 * 4)
        v_all = torch.cat([v_a_avg, v_a_max, v_b_avg, v_b_max], dim=1)

        # the final MLP -- apply dropout to input, and MLP applies to output & hidden
        if self.dropout:
            v_all = self.dropout(v_all)

        pred = self._output_feedforward(v_all)

        output_dict = {'pred': pred}

        if score is not None:
            loss = self._loss(pred, score)
            self._metric(pred, score)
            output_dict["loss"] = loss

        return output_dict

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        return {'pearson': self._metric.get_metric(reset)}
class LstmTagger(Model):
    '''
    您基本上必须实现的另一个类是Model,它是torch.nn.Module的子类。
    它的工作原理在很大程度上取决于你,它主要只是需要一个前向方法(forward method),它接受张量输入并产生一个张量输出的字典,
    其中包括你用来训练模型的损失(losss)。
    如上所述,我们的模型将包括嵌入层(embedding layer),序列编码器(sequence encoder)和前馈网络(feedforward network)。
    '''
    '''
    可能看似不寻常的一件事是我们将嵌入器(embedder)和序列编码器(sequence encoder)作为构造函数参数(constructor parameters)传递。
    这使我们可以尝试不同的嵌入器(embedders)和编码器(encoders),而无需更改模型代码。
    '''
    def __init__(
        self,
        word_embeddings: TextFieldEmbedder,
        # 嵌入层(embedding layer)被指定为AllenNLP TextFieldEmbedder,它表示将tokens转换为张量(tensors)的一般方法。
        # (这里我们知道我们想要用学习的张量来表示每个唯一的单词,但是使用通用类(general class)可以让我们轻松地尝试不同类型的嵌入,例如ELMo。)
        encoder: Seq2SeqEncoder,
        # 类似地,编码器(encoder)被指定为通用Seq2SeqEncoder,即使我们知道我们想要使用LSTM。
        # 同样,这使得可以很容易地尝试其他序列编码器(sequence encoders),例如Transformer。
        vocab: Vocabulary
    ) -> None:
        # 每个AllenNLP模型还需要一个词汇表(Vocabulary),其中包含tokens到索引(indices)和索引标签(labels to indices)的命名空间映射。

        super().__init__(vocab)
        self.word_embeddings = word_embeddings
        self.encoder = encoder
        # 请注意,我们必须将vocab传递给基类构造函数(base class constructor)。

        self.hidden2tag = torch.nn.Linear(
            in_features=encoder.get_output_dim(),
            out_features=vocab.get_vocab_size('labels'))
        # 前馈层(feed forward layer)不作为参数传入,而是由我们构造。
        # 请注意,它会查看编码器(encoder)以查找正确的输入维度并查看词汇表(vocabulary)(特别是在 label->index 映射处)以查找正确的输出维度。

        self.accuracy = CategoricalAccuracy()
        # 最后要注意的是我们还实例化了一个CategoricalAccuracy指标,我们将用它来跟踪每个训练(training)和验证(validation)epoch的准确性。

    def forward(self,
                sentence: Dict[str, torch.Tensor],
                labels: torch.Tensor = None) -> Dict[str, torch.Tensor]:
        # 接下来我们需要实现forward,这是实际计算发生的地方。数据集中的每个实例(Instance)都将(与其他实例(instances)一起批处理)输入forward。
        # 张量的输入作为forward方法的输入,并且它们的名称应该是实例(Instances)中字段(fields)的名称。
        # 在这种情况下,我们有一个句子字段(sentence field)和(可能)标签字段(labels field),所以我们将相应地构建我们的forward:

        mask = get_text_field_mask(sentence)
        # AllenNLP设计用于批量输入,但不同的输入序列具有不同的长度。
        # 因此,AllenNLP填充(padding)较短的输入,以便批处理具有统一的形状,这意味着我们的计算需要使用掩码(mask)来排除填充。
        # 这里我们只使用效用函数(utility function) get_text_field_mask,它返回与填充和未填充位置相对应的0和1的张量。

        embeddings = self.word_embeddings(sentence)
        # 我们首先将句子张量(每个句子一系列tokens ID)传递给word_embeddings模块,该模块将每个句子转换为嵌入式张量序列(a sequence of embedded tensors)。

        encoder_out = self.encoder(embeddings, mask)
        # 接下来,我们将嵌入式张量(embedded tensors)(和掩码(mask))传递给LSTM,LSTM产生一系列编码(encoded)输出。

        tag_logits = self.hidden2tag(encoder_out)
        output = {"tag_logits": tag_logits}
        # 最后,我们将每个编码输出张量(encoded output tensor)传递给前馈层(feedforward),以产生对应于各种标签(tags)的logits。

        if labels is not None:
            self.accuracy(tag_logits, labels, mask)
            output["loss"] = sequence_cross_entropy_with_logits(
                tag_logits, labels, mask)

            logits_flat = tag_logits.view(-1, tag_logits.size(-1))
            # shape : (batch * sequence_length, num_classes)
            log_probs_flat = torch.nn.functional.log_softmax(logits_flat,
                                                             dim=-1)
            # shape : (batch * max_len, 1)
            targets_flat = labels.view(-1, 1).long()

            negative_log_likelihood_flat = -torch.gather(
                log_probs_flat, dim=1, index=targets_flat)
            # shape : (batch, sequence_length)
            negative_log_likelihood = negative_log_likelihood_flat.view(
                *labels.size())
            # shape : (batch, sequence_length)
            negative_log_likelihood = negative_log_likelihood * mask.float()

            from allennlp.training.metrics import PearsonCorrelation
            self.m = PearsonCorrelation()

            self.m(predictions=negative_log_likelihood,
                   gold_labels=labels.float())

    # 和以前一样,标签是可选的,因为我们可能希望运行此模型来对未标记的数据进行预测。
    # 如果我们有标签,那么我们使用它们来更新我们的准确度指标(accuracy metric)并计算输出中的“损失(loss)”。

        return output

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        return {
            "accuracy": self.accuracy.get_metric(reset),
            "pearson_correlation": self.m.get_metric(reset)
        }
Ejemplo n.º 22
0
class STSBRegressor(Model):
    """
    This ``Model`` implements a basic text regressor. After embedding the text into
    a text field, we will optionally encode the embeddings with a ``Seq2SeqEncoder``. The
    resulting sequence is pooled using a ``Seq2VecEncoder`` and then passed to
    a linear regression layer, which projects into a single value. If a
    ``Seq2SeqEncoder`` is not provided, we will pass the embedded text directly to the
    ``Seq2VecEncoder``.

    Parameters
    ----------
    vocab : ``Vocabulary``
    text_field_embedder : ``TextFieldEmbedder``
        Used to embed the input text into a ``TextField``
    seq2seq_encoder : ``Seq2SeqEncoder``, optional (default=``None``)
        Optional Seq2Seq encoder layer for the input text.
    seq2vec_encoder : ``Seq2VecEncoder``
        Required Seq2Vec encoder layer. If `seq2seq_encoder` is provided, this encoder
        will pool its output. Otherwise, this encoder will operate directly on the output
        of the `text_field_embedder`.
    dropout : ``float``, optional (default = ``None``)
        Dropout percentage to use.
    scale : ``float``, optional (default = 1)
        Scale regression result is between 0 ~ scale
    label_namespace: ``str``, optional (default = "labels")
        Vocabulary namespace corresponding to labels. By default, we use the "labels" namespace.
    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
        If provided, will be used to initialize the model parameters.
    regularizer : ``RegularizerApplicator``, optional (default=``None``)
        If provided, will be used to calculate the regularization penalty during training.
    """
    def __init__(self,
                 vocab: Vocabulary,
                 text_field_embedder: TextFieldEmbedder,
                 seq2vec_encoder: Seq2VecEncoder,
                 seq2seq_encoder: Seq2SeqEncoder = None,
                 dropout: float = None,
                 scale: float = 1,
                 label_namespace: str = "labels",
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:

        super().__init__(vocab, regularizer)
        self._text_field_embedder = text_field_embedder

        if seq2seq_encoder:
            self._seq2seq_encoder = seq2seq_encoder
        else:
            self._seq2seq_encoder = None

        self._seq2vec_encoder = seq2vec_encoder

        self._classifier_input_dim = self._seq2vec_encoder.get_output_dim(
        ) * 2  # run encoder seperately and concat the result

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
            self._dropout_a = torch.nn.Dropout(dropout)
            self._dropout_b = torch.nn.Dropout(dropout)
        else:
            self._dropout = None

        self._label_namespace = label_namespace

        self._num_labels = 1  # because we're running a regression task
        self._scale = scale
        self.__first = True

        self._mlp_dims = [self._classifier_input_dim] * 3
        self._mlp_layers = torch.nn.ModuleList()
        for i, j in zip(self._mlp_dims, self._mlp_dims[1:]):
            self._mlp_layers.append(torch.nn.Linear(i, j))
            self._mlp_layers.append(torch.nn.ReLU())
            if dropout:
                self._mlp_layers.append(torch.nn.Dropout(dropout))
        self._classification_layer = torch.nn.Linear(
            self._classifier_input_dim, self._num_labels)
        self._metric = PearsonCorrelation()
        self._similarity = torch.nn.CosineSimilarity()
        self._loss = torch.nn.MSELoss()
        initializer(self)

    def forward(
            self,  # type: ignore
            tokens_a: Dict[str, torch.LongTensor],
            tokens_b: Dict[str, torch.LongTensor],
            label: torch.IntTensor = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor]
            From a ``TextField``
        label : torch.IntTensor, optional (default = None)
            From a ``LabelField``

        Returns
        -------
        An output dictionary consisting of:

        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, 1)`` representing
            unnormalized log probabilities of the label.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.
        """
        tokens = {
            "tokens_a": tokens_a["tokens_a"],
            "tokens_b": tokens_b["tokens_b"]
        }
        if self.__first:
            self.__first = False
            print("tokens: \n")
            print(tokens)
        # I don't know why tokens_a and tokens_b both includes keys named by each other
        tokens_a = {"tokens_a": tokens_a["tokens_a"]}
        tokens_b = {"tokens_b": tokens_b["tokens_b"]}
        embedded_text = self._text_field_embedder(tokens)
        embedded_text_a = embedded_text[
            "tokens_a"]  # TODO: check the shape for this
        mask_a = get_text_field_mask(tokens_a).float()
        embedded_text_b = embedded_text["tokens_b"]
        mask_b = get_text_field_mask(tokens_b).float()

        if self._seq2seq_encoder:
            embedded_text_a = self._seq2seq_encoder(embedded_text_a,
                                                    mask=mask_a)
            embedded_text_b = self._seq2seq_encoder(embedded_text_b,
                                                    mask=mask_b)

        embedded_text_a = self._seq2vec_encoder(embedded_text_a, mask=mask_a)
        embedded_text_b = self._seq2vec_encoder(embedded_text_b, mask=mask_b)
        # embedded_text = torch.cat([embedded_text_a, embedded_text_b], dim=-1)

        if self._dropout:
            embedded_text_a = self._dropout_a(embedded_text_a)
            embedded_text_b = self._dropout_b(embedded_text_b)
        '''
        if self._mlp_layers:
            for l in self._mlp_layers:
                embedded_text = l(embedded_text)
        logits = self._classification_layer(embedded_text)
        '''
        logits = self._similarity(embedded_text_a, embedded_text_b) * 5
        output_dict = {"logits": logits}

        if label is not None:  # convert the label into a float number and update the metric
            label_to_str = lambda l: self.vocab.get_index_to_token_vocabulary(
                self._label_namespace).get(l)
            label_tensor = torch.tensor(
                [
                    float(label_to_str(int(label[i])))
                    for i in range(label.shape[0])
                ],
                device=logits.device,
                requires_grad=True
            )  # make sure loss.backward have something to update
            loss = self._loss(logits.view(-1), label_tensor)
            output_dict["loss"] = loss
            self._metric(logits, label_tensor)

        return output_dict

    @overrides
    def decode(
            self, output_dict: Dict[str,
                                    torch.Tensor]) -> Dict[str, torch.Tensor]:
        """
        Does a simple argmax over the probabilities, converts index to string label, and
        add ``"label"`` key to the dictionary with the result.
        """
        # update this part to generate a float number result as similarity score
        predictions = output_dict["logits"]
        if predictions.dim() == 2:
            predictions_list = [
                predictions[i] for i in range(predictions.shape[0])
            ]
        else:
            predictions_list = [predictions]
        classes = []
        for prediction in predictions_list:
            label_idx = "{:.1f}".format(prediction.long())
            label_str = (self.vocab.get_index_to_token_vocabulary(
                self._label_namespace).get(label_idx, str(label_idx)))
            classes.append(label_str)
        output_dict["label"] = classes
        return output_dict

    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
        metrics = {'PearsonCorrelation': self._metric.get_metric(reset)}
        return metrics