Exemple #1
0
    def test_pearson_correlation_unmasked_computation(self):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions = np.random.randn(batch_size, num_labels).astype("float32")
        labels = 0.5 * predictions + np.random.randn(
            batch_size, num_labels).astype("float32")

        stride = 10

        for i in range(batch_size // stride):
            timestep_predictions = torch.FloatTensor(
                predictions[stride * i:stride * (i + 1), :])
            timestep_labels = torch.FloatTensor(labels[stride * i:stride *
                                                       (i + 1), :])
            expected_pearson_correlation = np.corrcoef(
                predictions[:stride * (i + 1), :].reshape(-1),
                labels[:stride * (i + 1), :].reshape(-1))[0, 1]
            pearson_correlation(timestep_predictions, timestep_labels)
            assert_allclose(expected_pearson_correlation,
                            pearson_correlation.get_metric(),
                            rtol=1e-5)
        # Test reset
        pearson_correlation.reset()
        pearson_correlation(torch.FloatTensor(predictions),
                            torch.FloatTensor(labels))
        assert_allclose(np.corrcoef(predictions.reshape(-1),
                                    labels.reshape(-1))[0, 1],
                        pearson_correlation.get_metric(),
                        rtol=1e-5)
    def test_pearson_correlation_unmasked_computation(self):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions_1 = np.random.randn(batch_size, num_labels).astype("float32")
        labels_1 = 0.5 * predictions_1 + np.random.randn(batch_size, num_labels).astype("float32")

        predictions_2 = np.random.randn(1).repeat(num_labels).astype("float32")
        predictions_2 = predictions_2[np.newaxis, :].repeat(batch_size, axis=0)
        labels_2 = np.random.randn(1).repeat(num_labels).astype("float32")
        labels_2 = 0.5 * predictions_2 + labels_2[np.newaxis, :].repeat(batch_size, axis=0)

        # in most cases, the data is constructed like predictions_1, the data of such a batch different.
        # but in a few cases, for example, predictions_2, the data of such a batch is exactly the same.
        predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)]

        stride = 10

        for predictions, labels in predictions_labels:
            pearson_correlation.reset()
            for i in range(batch_size // stride):
                timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i + 1), :])
                timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :])
                expected_pearson_correlation = pearson_corrcoef(predictions[:stride * (i + 1), :].reshape(-1),
                                                                labels[:stride * (i + 1), :].reshape(-1))
                pearson_correlation(timestep_predictions, timestep_labels)
                assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
            # Test reset
            pearson_correlation.reset()
            pearson_correlation(torch.FloatTensor(predictions), torch.FloatTensor(labels))
            assert_allclose(pearson_corrcoef(predictions.reshape(-1), labels.reshape(-1)),
                            pearson_correlation.get_metric(), rtol=1e-5)
Exemple #3
0
class WS353(Metric):
    def __init__(self, sim_file_path: str) -> None:
        self._sim_data = []
        self._sim_gold = []
        self._data_reader = KoWikiReader()
        self._pearson = PearsonCorrelation()

        with open(sim_file_path, 'r', encoding='utf-8') as f:
            f.readline()
            for line in f:
                w1, w2, score = line.strip().split('\t')
                self._sim_data.append((w1, w2))
                self._sim_gold.append(float(score))
        self._sim_gold = torch.tensor(self._sim_gold)

    @overrides
    def __call__(self,
                 vocab: Vocabulary,
                 embedder: SyllableEmbedder,
                 cuda_device: torch.device,
                 print_mode: bool = False) -> None:
        preds = []
        for i in range(len(self._sim_data)):
            w1, w2 = self._sim_data[i]
            w1 = self._data_reader.text_to_instance(source=Token(w1))['source']
            w2 = self._data_reader.text_to_instance(source=Token(w2))['source']

            w1.index(vocab)
            w2.index(vocab)

            w1 = w1.as_tensor(w1.get_padding_lengths())['syllables'].to(cuda_device)
            w2 = w2.as_tensor(w2.get_padding_lengths())['syllables'].to(cuda_device)
            e1, e2 = embedder(w1), embedder(w2)

            preds.append(F.cosine_similarity(e1, e2))

        self._pearson(torch.tensor(preds), self._sim_gold)

        if print_mode:
            print('w1\tw2\tgold\tpred')
            for ((w1, w2), gold, pred) in zip(self._sim_data, self._sim_gold, preds):
                print(f'{w1}\t{w2}\t{gold.item():.2f}\t{pred.item():.2f}')
            print(f'pscore: {self.get_metric():.3f}')

    @overrides
    def get_metric(self, reset: bool = False):
        score = self._pearson.get_metric(reset)
        if reset:
            self.reset()
        return score

    @overrides
    def reset(self):
        self._pearson.reset()
    def test_pearson_correlation_masked_computation(self, device: str):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions_1 = torch.randn(batch_size, num_labels, device=device)
        labels_1 = 0.5 * predictions_1 + torch.randn(
            batch_size, num_labels, device=device)

        predictions_2 = torch.randn(1, device=device).expand(num_labels)
        predictions_2 = predictions_2.unsqueeze(0).expand(batch_size, -1)
        labels_2 = torch.randn(1, device=device).expand(num_labels)
        labels_2 = 0.5 * predictions_2 + labels_2.unsqueeze(0).expand(
            batch_size, -1)

        predictions_labels = [(predictions_1, labels_1),
                              (predictions_2, labels_2)]

        # Random binary mask
        mask = torch.randint(0,
                             2,
                             size=(batch_size, num_labels),
                             device=device).bool()
        stride = 10

        for predictions, labels in predictions_labels:
            pearson_correlation.reset()
            for i in range(batch_size // stride):
                timestep_predictions = predictions[stride * i:stride *
                                                   (i + 1), :]
                timestep_labels = labels[stride * i:stride * (i + 1), :]
                timestep_mask = mask[stride * i:stride * (i + 1), :]
                expected_pearson_correlation = pearson_corrcoef(
                    predictions[:stride * (i + 1), :].view(-1).cpu().numpy(),
                    labels[:stride * (i + 1), :].view(-1).cpu().numpy(),
                    fweights=mask[:stride * (i + 1), :].view(-1).cpu().numpy(),
                )

                pearson_correlation(timestep_predictions, timestep_labels,
                                    timestep_mask)
                assert_allclose(expected_pearson_correlation,
                                pearson_correlation.get_metric())
            # Test reset
            pearson_correlation.reset()
            pearson_correlation(predictions, labels, mask)
            expected_pearson_correlation = pearson_corrcoef(
                predictions.view(-1).cpu().numpy(),
                labels.view(-1).cpu().numpy(),
                fweights=mask.view(-1).cpu().numpy(),
            )

            assert_allclose(expected_pearson_correlation,
                            pearson_correlation.get_metric())
    def test_pearson_correlation_unmasked_computation(self, device: str):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions_1 = torch.randn(batch_size, num_labels, device=device)
        labels_1 = 0.5 * predictions_1 + torch.randn(
            batch_size, num_labels, device=device)

        predictions_2 = torch.randn(1, device=device).expand(num_labels)
        predictions_2 = predictions_2.unsqueeze(0).expand(batch_size, -1)
        labels_2 = torch.randn(1, device=device).expand(num_labels)
        labels_2 = 0.5 * predictions_2 + labels_2.unsqueeze(0).expand(
            batch_size, -1)

        # in most cases, the data is constructed like predictions_1, the data of such a batch different.
        # but in a few cases, for example, predictions_2, the data of such a batch is exactly the same.
        predictions_labels = [(predictions_1, labels_1),
                              (predictions_2, labels_2)]

        stride = 10

        for predictions, labels in predictions_labels:
            pearson_correlation.reset()
            for i in range(batch_size // stride):
                timestep_predictions = predictions[stride * i:stride *
                                                   (i + 1), :]
                timestep_labels = labels[stride * i:stride * (i + 1), :]
                expected_pearson_correlation = pearson_corrcoef(
                    predictions[:stride * (i + 1), :].view(-1).cpu().numpy(),
                    labels[:stride * (i + 1), :].view(-1).cpu().numpy(),
                )
                pearson_correlation(timestep_predictions, timestep_labels)
                assert_allclose(expected_pearson_correlation,
                                pearson_correlation.get_metric())
            # Test reset
            pearson_correlation.reset()
            pearson_correlation(predictions, labels)
            assert_allclose(
                pearson_corrcoef(
                    predictions.view(-1).cpu().numpy(),
                    labels.view(-1).cpu().numpy()),
                pearson_correlation.get_metric(),
            )
Exemple #6
0
    def test_pearson_correlation_masked_computation(self):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions = np.random.randn(batch_size, num_labels).astype("float32")
        labels = 0.5 * predictions + np.random.randn(
            batch_size, num_labels).astype("float32")
        # Random binary mask
        mask = np.random.randint(0, 2, size=(batch_size,
                                             num_labels)).astype("float32")
        stride = 10

        for i in range(batch_size // stride):
            timestep_predictions = torch.FloatTensor(
                predictions[stride * i:stride * (i + 1), :])
            timestep_labels = torch.FloatTensor(labels[stride * i:stride *
                                                       (i + 1), :])
            timestep_mask = torch.FloatTensor(mask[stride * i:stride *
                                                   (i + 1), :])
            covariance_matrices = np.cov(
                predictions[:stride * (i + 1), :].reshape(-1),
                labels[:stride * (i + 1), :].reshape(-1),
                fweights=mask[:stride * (i + 1), :].reshape(-1))
            expected_pearson_correlation = covariance_matrices[0, 1] / np.sqrt(
                covariance_matrices[0, 0] * covariance_matrices[1, 1])
            pearson_correlation(timestep_predictions, timestep_labels,
                                timestep_mask)
            assert_allclose(expected_pearson_correlation,
                            pearson_correlation.get_metric(),
                            rtol=1e-5)
        # Test reset
        pearson_correlation.reset()
        pearson_correlation(torch.FloatTensor(predictions),
                            torch.FloatTensor(labels), torch.FloatTensor(mask))
        covariance_matrices = np.cov(predictions.reshape(-1),
                                     labels.reshape(-1),
                                     fweights=mask.reshape(-1))
        expected_pearson_correlation = covariance_matrices[0, 1] / np.sqrt(
            covariance_matrices[0, 0] * covariance_matrices[1, 1])
        assert_allclose(expected_pearson_correlation,
                        pearson_correlation.get_metric(),
                        rtol=1e-5)
    def test_pearson_correlation_masked_computation(self):
        pearson_correlation = PearsonCorrelation()
        batch_size = 100
        num_labels = 10
        predictions_1 = np.random.randn(batch_size, num_labels).astype("float32")
        labels_1 = 0.5 * predictions_1 + np.random.randn(batch_size, num_labels).astype("float32")

        predictions_2 = np.random.randn(1).repeat(num_labels).astype("float32")
        predictions_2 = predictions_2[np.newaxis, :].repeat(batch_size, axis=0)
        labels_2 = np.random.randn(1).repeat(num_labels).astype("float32")
        labels_2 = 0.5 * predictions_2 + labels_2[np.newaxis, :].repeat(batch_size, axis=0)

        predictions_labels = [(predictions_1, labels_1), (predictions_2, labels_2)]

        # Random binary mask
        mask = np.random.randint(0, 2, size=(batch_size, num_labels)).astype("float32")
        stride = 10

        for predictions, labels in predictions_labels:
            pearson_correlation.reset()
            for i in range(batch_size // stride):
                timestep_predictions = torch.FloatTensor(predictions[stride * i:stride * (i + 1), :])
                timestep_labels = torch.FloatTensor(labels[stride * i:stride * (i + 1), :])
                timestep_mask = torch.FloatTensor(mask[stride * i:stride * (i + 1), :])
                expected_pearson_correlation = pearson_corrcoef(predictions[:stride * (i + 1), :].reshape(-1),
                                                                labels[:stride * (i + 1), :].reshape(-1),
                                                                fweights=mask[:stride * (i + 1), :].reshape(-1))

                pearson_correlation(timestep_predictions, timestep_labels, timestep_mask)
                assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)
            # Test reset
            pearson_correlation.reset()
            pearson_correlation(torch.FloatTensor(predictions),
                                torch.FloatTensor(labels), torch.FloatTensor(mask))
            expected_pearson_correlation = pearson_corrcoef(predictions.reshape(-1), labels.reshape(-1),
                                                            fweights=mask.reshape(-1))

            assert_allclose(expected_pearson_correlation, pearson_correlation.get_metric(), rtol=1e-5)