예제 #1
0
    def fit_before_convert(self, dataset):
        """Fit the encoder."""
        # If in tf.data.Dataset, must be encoded already.
        if isinstance(dataset, tf.data.Dataset):
            return

        # Convert the data to np.ndarray.
        if isinstance(dataset, pd.DataFrame):
            dataset = dataset.values
        if isinstance(dataset, pd.Series):
            dataset = dataset.values.reshape(-1, 1)

        # If encoded.
        # TODO: support raw string labels for multi-label.
        if len(dataset.flatten()) != len(dataset):
            if self.num_classes:
                self._check_data_shape(dataset.shape[1:])
            return

        # Fit encoder.
        labels = set(dataset.flatten())
        if len(labels) < 2:
            raise ValueError(
                'Expect the target data for {name} to have '
                'at least 2 classes, but got {num_classes}.'.format(
                    name=self.name, num_classes=self.num_classes))
        if len(labels) == 2 and not self.multi_label:
            self.label_encoder = encoders.LabelEncoder()
        else:
            self.label_encoder = encoders.OneHotEncoder()
        self.label_encoder.fit(dataset)
예제 #2
0
 def fit_before_convert(self, dataset):
     # If in tf.data.Dataset, must be encoded already.
     if isinstance(dataset, tf.data.Dataset):
         if not self.num_classes:
             shape = utils.dataset_shape(dataset)[0]
             # Single column with 0s and 1s.
             if shape == 1:
                 self.num_classes = 2
             else:
                 self.num_classes = shape
         return
     if isinstance(dataset, pd.DataFrame):
         dataset = dataset.values
     if isinstance(dataset, pd.Series):
         dataset = dataset.values.reshape(-1, 1)
     # Not label.
     if len(dataset.flatten()) != len(dataset):
         self.num_classes = dataset.shape[1]
         return
     labels = set(dataset.flatten())
     if self.num_classes is None:
         self.num_classes = len(labels)
     if self.num_classes == 2:
         self.label_encoder = encoders.LabelEncoder()
     elif self.num_classes > 2:
         self.label_encoder = encoders.OneHotEncoder()
     elif self.num_classes < 2:
         raise ValueError(
             'Expect the target data for {name} to have '
             'at least 2 classes, but got {num_classes}.'.format(
                 name=self.name, num_classes=self.num_classes))
     self.label_encoder.fit(dataset)