Beispiel #1
0
 def fit_transform(self, features: Features) -> Features:
     df = features.as_pandas(copy=False)
     cat_columns = [
         features.names[i] for i, tf in enumerate(features.types)
         if tf == FeatureType.CATEGORIAL
     ]
     for name in cat_columns:
         values = set(df[name].tolist())
         logging.debug("Got values %s for feature %s: ", values, name)
         self.feature_values[name] = list(values)
     return self._transform_df(df, features)
Beispiel #2
0
 def transform(self, features: Features) -> Features:
     indexes_to_keep = []
     remove_indexes = []
     for i, name in enumerate(features.names):
         if name in self.features:
             indexes_to_keep.append(i)
         else:
             remove_indexes.append(i)
     filtered_data = features.as_pandas()
     filtered_data.drop(columns=filtered_data.columns[remove_indexes],
                        inplace=True)
     indexes_to_keep = set(indexes_to_keep)
     filtered_types = [
         f_type for i, f_type in enumerate(features.types)
         if i in indexes_to_keep
     ]
     filtered_features = Features.from_pandas(df=filtered_data,
                                              types=filtered_types)
     return filtered_features
Beispiel #3
0
 def transform(self, features: Features) -> Features:
     df = features.as_pandas(copy=False)
     return self._transform_df(df, features)