Example #1
0
 def test_compute_class_weights(self):
     # regression test for issue #181
     np.random.seed(0)
     y = np.random.choice(a=[0, 1, 2], size=1000, p=[0.3, 0.6, 0.1])
     class_counts = Counter(y)
     weights = compute_class_weights('log', class_counts=class_counts)
     self.assertEqual(weights[1], 1.0)
Example #2
0
    def _dataset_with_targets(self, Xs, Y, train, context=None):
        if not callable(Xs) and not callable(Y):
            if self.config.use_auxiliary_info:
                dataset = lambda: zip(
                    Xs, Y, context
                )  # Do not need to check if context is callable - it is turned in along with Xs, and thus must have the same form
            else:
                dataset = lambda: zip(Xs, Y)
        elif callable(Xs) and callable(Y):
            if self.config.use_auxiliary_info:
                dataset = lambda: zip(Xs(), Y(), context()
                                      )  # encode one sample at a time.
            else:
                dataset = lambda: zip(Xs(), Y())
        else:
            raise ValueError(
                "Either neither or both of Xs and Y should be callable, not a mixture"
            )

        dataset_encoded = lambda: itertools.chain.from_iterable(
            map(lambda xy: self.text_to_tokens_mask(*xy), dataset()))

        if not callable(Y) and train:
            dataset_encoded_list = list(dataset_encoded())
            dataset_encoded_list = self._filter_empty_examples(
                dataset_encoded_list)
            class_counts = self._compute_class_counts(dataset_encoded_list)
            self.config.dataset_size = len(dataset_encoded_list)
            if self.config.class_weights is not None:
                self.config.class_weights = compute_class_weights(
                    class_weights=self.config.class_weights,
                    class_counts=class_counts)
        shape_def = self.feed_shape_type_def()
        return Dataset.from_generator(
            lambda: self.wrap_tqdm(dataset_encoded(), train), *shape_def)
Example #3
0
    def _dataset_with_targets(self, Xs, Y, train):
        if not callable(Xs) and not callable(Y):
            dataset = lambda: zip(Xs, Y)
        elif callable(Xs) and callable(Y):
            dataset = lambda: zip(Xs(), Y())  # encode one sample at a time.
        else:
            raise ValueError(
                "Either neither or both of Xs and Y should be callable, not a mixture"
            )

        dataset_encoded = lambda: itertools.chain.from_iterable(
            map(lambda xy: self.text_to_tokens_mask(*xy), dataset()))
        shape_def = self.feed_shape_type_def()

        if not callable(Y) and train:
            dataset_encoded_list = list(dataset_encoded())
            class_counts = self._compute_class_counts(dataset_encoded_list)
            self.config.dataset_size = len(dataset_encoded_list)
            if self.config.class_weights is not None:
                self.config.class_weights = compute_class_weights(
                    class_weights=self.config.class_weights,
                    class_counts=class_counts)

        return Dataset.from_generator(
            lambda: self.wrap_tqdm(dataset_encoded(), train), *shape_def)
Example #4
0
 def _compute_class_weights(self, class_weights, class_counts):
     class_weights = compute_class_weights(
         class_weights=class_weights,
         class_counts=class_counts,
         n_total=self.config.dataset_size,
         multilabel=True
     )
     return class_weights
Example #5
0
    def _post_data_initialization(self, Y):
        self.label_encoder = self._target_encoder()
        if not callable(Y):
            Y_fit = Y
            self.label_encoder.fit(Y)
        else:
            Y_fit = list(itertools.islice(Y(), 10000))
            self.label_encoder.fit(Y_fit)
        self.config.pad_idx = self.pad_idx

        target_dim = self.label_encoder.target_dim
        self.lm_loss_coef = self.config.lm_loss_coef if target_dim is not None else 1.0
        self.target_dim = target_dim

        if Y_fit is not None:
            self.config.class_weights = compute_class_weights(
                class_weights=self.config.class_weights, Y=Y_fit)
Example #6
0
 def _compute_class_weights(self, class_weights, class_counts):
     return compute_class_weights(class_weights=class_weights,
                                  class_counts=class_counts)