def reverse_transform(self, data): """Reverse the transformed data to the original format. Args: data (pandas.DataFrame): Data to be reverse transformed. Returns: pandas.DataFrame """ if not self.fitted: raise MetadataNotFittedError() reversed_data = self._hyper_transformer.reverse_transform(data) for constraint in self._constraints: reversed_data = constraint.reverse_transform(reversed_data) for name, field_metadata in self._fields_metadata.items(): field_type = field_metadata['type'] if field_type == 'id' and name not in reversed_data: field_data = self._make_ids(name, field_metadata, len(reversed_data)) elif field_metadata.get('pii', False): faker = self._get_faker(field_metadata['pii_category']) field_data = pd.Series([faker() for _ in range(len(reversed_data))]) else: field_data = reversed_data[name] reversed_data[name] = field_data[field_data.notnull()].astype(self._dtypes[name]) return reversed_data[self._field_names]
def reverse_transform(self, data): """Reverse the transformed data to the original format. Args: data (pandas.DataFrame): Data to be reverse transformed. Returns: pandas.DataFrame """ if not self.fitted: raise MetadataNotFittedError() reversed_data = self._hyper_transformer.reverse_transform(data) for constraint in self._constraints: reversed_data = constraint.reverse_transform(reversed_data) fields = self._fields_metadata for name, dtype in self.get_dtypes(ids=True).items(): field_metadata = fields[name] field_type = field_metadata['type'] if field_type != 'id': field_data = reversed_data[name] elif field_metadata.get('pii', False): faker = self._get_faker(field_metadata['pii_category']) field_data = pd.Series( [faker() for _ in range(len(reversed_data))]) else: field_data = pd.Series(np.arange(len(reversed_data))) reversed_data[name] = field_data.dropna().astype(dtype) return reversed_data[self._field_names]
def transform(self, data): """Transform the given data. Args: data (pandas.DataFrame): Table data. Returns: pandas.DataFrame: Transformed data. """ if not self.fitted: raise MetadataNotFittedError() fields = [ field for field in self.get_dtypes(ids=False) if field in data.columns ] LOGGER.debug('Anonymizing table %s', self.name) data = self._anonymize(data[fields]) LOGGER.debug('Transforming constraints for table %s', self.name) for constraint in self._constraints: data = constraint.transform(data) LOGGER.debug('Transforming table %s', self.name) return self._hyper_transformer.transform(data)
def reverse_transform(self, data): """Reverse the transformed data to the original format. Args: data (pandas.DataFrame): Data to be reverse transformed. Returns: pandas.DataFrame """ if not self.fitted: raise MetadataNotFittedError() try: reversed_data = self._hyper_transformer.reverse_transform(data) except rdt.errors.NotFittedError: reversed_data = data for constraint in reversed(self._constraints_to_reverse): reversed_data = constraint.reverse_transform(reversed_data) for name, field_metadata in self._fields_metadata.items(): field_type = field_metadata['type'] if field_type == 'id' and name not in reversed_data: field_data = self._make_ids(field_metadata, len(reversed_data)) elif field_metadata.get('pii', False): field_data = pd.Series( Table._get_fake_values(field_metadata, len(reversed_data))) else: field_data = reversed_data[name] reversed_data[name] = field_data[field_data.notnull()].astype( self._dtypes[name]) return reversed_data[self._field_names]
def transform(self, data): """Transform the given data. Args: data (pandas.DataFrame): Table data. Returns: pandas.DataFrame: Transformed data. """ if not self.fitted: raise MetadataNotFittedError() fields = self.get_dtypes(ids=False) data = self._anonymize(data[fields]) for constraint in self._constraints: data = constraint.transform(data) return self._hyper_transformer.transform(data)
def transform(self, data, on_missing_column='error'): """Transform the given data. Args: data (pandas.DataFrame): Table data. on_missing_column (str): If the value is error, then a `MissingConstraintColumnError` is raised. If the value is drop, then the columns involved in the constraint that are present in data will be dropped. Returns: pandas.DataFrame: Transformed data. Raises: ConstraintsNotMetError: If the table data is not valid for the provided constraints. """ if not self.fitted: raise MetadataNotFittedError() fields = [ field for field in self.get_dtypes(ids=False) if field in data.columns ] LOGGER.debug('Anonymizing table %s', self.name) data = self._anonymize(data[fields]) self._validate_data_on_constraints(data) LOGGER.debug('Transforming constraints for table %s', self.name) data = self._transform_constraints(data, on_missing_column) LOGGER.debug('Transforming table %s', self.name) return self._hyper_transformer.transform(data)