def reverse_transform(self, col, col_meta=None, missing=None): """Converts data back into original format. Args: col(pandas.DataFrame): Data to transform. col_meta(dict): Meta information of the column. missing(bool): Wheter or not handle missing values using NullTransformer. Returns: pandas.DataFrame """ col_meta = col_meta or self.col_meta missing = missing if missing is not None else self.missing self.check_data_type(col_meta) output = pd.DataFrame() new_col = self.get_category(col[self.col_name]) if missing: new_col = new_col.rename(self.col_name) data = pd.concat([new_col, col['?' + self.col_name]], axis=1) nt = NullTransformer() output[self.col_name] = nt.reverse_transform(data, col_meta) else: output[self.col_name] = new_col return output
def transform(self, col, col_meta=None, missing=None): """Prepare the transformer to convert data and return the processed table. Args: col(pandas.DataFrame): Data to transform. col_meta(dict): Meta information of the column. missing(bool): Wheter or not handle missing values using NullTransformer. Returns: pandas.DataFrame """ col_meta = col_meta or self.col_meta missing = missing if missing is not None else self.missing self.check_data_type(col_meta) out = pd.DataFrame() # Make sure all nans are handled the same by replacing with None column = col[self.col_name].replace({np.nan: None}) out[self.col_name] = column.apply(self.get_val) # Handle missing if missing: nt = NullTransformer() res = nt.fit_transform(out, col_meta) return res return out
def transform(self, col, col_meta=None, missing=None): """Prepare the transformer to convert data and return the processed table. Args: col(pandas.DataFrame): Data to transform. col_meta(dict): Meta information of the column. missing(bool): Wheter or not handle missing values using NullTransformer. Returns: pandas.DataFrame """ col_meta = col_meta or self.col_meta missing = missing if missing is not None else self.missing self.check_data_type(col_meta) out = pd.DataFrame() out[self.col_name] = self.safe_datetime_cast(col, col_meta) out[self.col_name] = self.to_timestamp(out) # Handle missing if missing: nt = NullTransformer() res = nt.fit_transform(out, col_meta) return res return out
def transform(self, col, col_meta=None, missing=None): """Prepare the transformer to convert data and return the processed table. Args: col(pandas.DataFrame): Data to transform. col_meta(dict): Meta information of the column. missing(bool): Wheter or not handle missing values using NullTransformer. Returns: pandas.DataFrame """ col_meta = col_meta or self.col_meta missing = missing if missing is not None else self.missing self.check_data_type(col_meta) out = pd.DataFrame() # if are just processing child rows, then the name is already known out[self.col_name] = col[self.col_name] # Handle missing if missing: nt = NullTransformer() out = nt.fit_transform(out, col_meta) out[self.col_name] = out.apply(self.get_val, axis=1) return out out[self.col_name] = out.apply(self.get_val, axis=1) if self.subtype == 'int': out[self.col_name] = out[self.col_name].astype(int) return out
def test___init__(self): """On __init__ set type to number and datetime.""" # Run transformer = NullTransformer() # Check assert transformer.type == ['datetime', 'number']
def test_reverse_transform(self): """Checks the conversion of the data back into original format.""" # Setup col_meta = {'name': 'age', 'type': 'number'} transformer = NullTransformer() data = pd.DataFrame({ 'age': [62, 35, 0, 24, 27], '?age': [1, 1, 0, 1, 1] }) expected_result = pd.Series([62, 35, np.nan, 24, 27], name='age') # Result result = transformer.reverse_transform(data, col_meta) # Check assert result.age.equals(expected_result)
def test_fit_transform_notnull(self): """Creates a new column with the mean of the values.""" # Setup col = pd.Series([62, 53, 53, 45, np.nan]) col_meta = {'name': 'age', 'type': 'number'} transformer = NullTransformer() expected_result = pd.DataFrame({ 'age': [62.0, 53.0, 53.0, 45.0, 53.25], '?age': [1, 1, 1, 1, 0] }) # Run result = transformer.fit_transform(col, col_meta) # Check assert result.equals(expected_result)
def test_fit_transform_isnull(self): """It will replace nan values with 0 and creats a new column.""" # Setup col = pd.Series([62, np.nan, np.nan, np.nan, np.nan], name='age') col_meta = {'name': 'age', 'type': 'number'} transformer = NullTransformer() expected_result = pd.DataFrame({ 'age': [62.0, 62.0, 62.0, 62.0, 62.0], '?age': [1, 0, 0, 0, 0] }) # Run result = transformer.fit_transform(col, col_meta) # Check assert result.equals(expected_result)
def reverse_transform(self, col, col_meta=None, missing=None): """Converts data back into original format. Args: col(pandas.DataFrame): Data to transform. col_meta(dict): Meta information of the column. missing(bool): Wheter or not handle missing values using NullTransformer. Returns: pandas.DataFrame """ col_meta = col_meta or self.col_meta missing = missing if missing is not None else self.missing if isinstance(col, pd.Series): col = col.to_frame() self.check_data_type(col_meta) output = pd.DataFrame() date_format = col_meta['format'] fn = self.get_date_converter(self.col_name, date_format) reversed_column = col.apply(fn, axis=1) if missing: reversed_column = reversed_column.rename(self.col_name) data = pd.concat([reversed_column, col['?' + self.col_name]], axis=1) nt = NullTransformer() output[self.col_name] = nt.reverse_transform(data, col_meta) else: output[self.col_name] = reversed_column return output
def reverse_transform(self, col, col_meta=None, missing=None): """Converts data back into original format. Args: col(pandas.DataFrame): Data to transform. col_meta(dict): Meta information of the column. missing(bool): Wheter or not handle missing values using NullTransformer. Returns: pandas.DataFrame """ col_meta = col_meta or self.col_meta missing = missing if missing is not None else self.missing self.check_data_type(col_meta) output = pd.DataFrame(columns=[]) subtype = col_meta['subtype'] col_name = col_meta['name'] fn = self.get_number_converter(col_name, subtype) if missing: new_col = col.apply(fn, axis=1) new_col = new_col.rename(col_name) data = pd.concat([new_col, col['?' + col_name]], axis=1) nt = NullTransformer() output[col_name] = nt.reverse_transform(data, col_meta) else: output[col_name] = col.apply(fn, axis=1) if self.subtype == 'int': output[self.col_name] = output[self.col_name].astype(int) return output