コード例 #1
0
ファイル: CatTransformer.py プロジェクト: ManuelAlvarezC/RDT
    def reverse_transform(self, col, col_meta=None, missing=None):
        """Converts data back into original format.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """
        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing

        self.check_data_type(col_meta)

        output = pd.DataFrame()
        new_col = self.get_category(col[self.col_name])

        if missing:
            new_col = new_col.rename(self.col_name)
            data = pd.concat([new_col, col['?' + self.col_name]], axis=1)
            nt = NullTransformer()
            output[self.col_name] = nt.reverse_transform(data, col_meta)

        else:
            output[self.col_name] = new_col

        return output
コード例 #2
0
ファイル: CatTransformer.py プロジェクト: ManuelAlvarezC/RDT
    def transform(self, col, col_meta=None, missing=None):
        """Prepare the transformer to convert data and return the processed table.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """

        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing

        self.check_data_type(col_meta)

        out = pd.DataFrame()

        # Make sure all nans are handled the same by replacing with None
        column = col[self.col_name].replace({np.nan: None})
        out[self.col_name] = column.apply(self.get_val)
        # Handle missing

        if missing:
            nt = NullTransformer()
            res = nt.fit_transform(out, col_meta)
            return res

        return out
コード例 #3
0
ファイル: DTTransformer.py プロジェクト: ManuelAlvarezC/RDT
    def transform(self, col, col_meta=None, missing=None):
        """Prepare the transformer to convert data and return the processed table.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """

        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing

        self.check_data_type(col_meta)

        out = pd.DataFrame()
        out[self.col_name] = self.safe_datetime_cast(col, col_meta)
        out[self.col_name] = self.to_timestamp(out)

        # Handle missing
        if missing:
            nt = NullTransformer()
            res = nt.fit_transform(out, col_meta)
            return res

        return out
コード例 #4
0
    def transform(self, col, col_meta=None, missing=None):
        """Prepare the transformer to convert data and return the processed table.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """

        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing
        self.check_data_type(col_meta)

        out = pd.DataFrame()

        # if are just processing child rows, then the name is already known
        out[self.col_name] = col[self.col_name]

        # Handle missing
        if missing:
            nt = NullTransformer()
            out = nt.fit_transform(out, col_meta)
            out[self.col_name] = out.apply(self.get_val, axis=1)
            return out

        out[self.col_name] = out.apply(self.get_val, axis=1)

        if self.subtype == 'int':
            out[self.col_name] = out[self.col_name].astype(int)

        return out
コード例 #5
0
    def test___init__(self):
        """On __init__ set type to number and datetime."""

        # Run
        transformer = NullTransformer()

        # Check
        assert transformer.type == ['datetime', 'number']
コード例 #6
0
    def test_reverse_transform(self):
        """Checks the conversion of the data back into original format."""

        # Setup
        col_meta = {'name': 'age', 'type': 'number'}
        transformer = NullTransformer()
        data = pd.DataFrame({
            'age': [62, 35, 0, 24, 27],
            '?age': [1, 1, 0, 1, 1]
        })

        expected_result = pd.Series([62, 35, np.nan, 24, 27], name='age')

        # Result
        result = transformer.reverse_transform(data, col_meta)

        # Check
        assert result.age.equals(expected_result)
コード例 #7
0
    def test_fit_transform_notnull(self):
        """Creates a new column with the mean of the values."""

        # Setup
        col = pd.Series([62, 53, 53, 45, np.nan])
        col_meta = {'name': 'age', 'type': 'number'}
        transformer = NullTransformer()

        expected_result = pd.DataFrame({
            'age': [62.0, 53.0, 53.0, 45.0, 53.25],
            '?age': [1, 1, 1, 1, 0]
        })

        # Run
        result = transformer.fit_transform(col, col_meta)

        # Check
        assert result.equals(expected_result)
コード例 #8
0
    def test_fit_transform_isnull(self):
        """It will replace nan values with 0 and creats a new column."""

        # Setup
        col = pd.Series([62, np.nan, np.nan, np.nan, np.nan], name='age')
        col_meta = {'name': 'age', 'type': 'number'}
        transformer = NullTransformer()

        expected_result = pd.DataFrame({
            'age': [62.0, 62.0, 62.0, 62.0, 62.0],
            '?age': [1, 0, 0, 0, 0]
        })

        # Run
        result = transformer.fit_transform(col, col_meta)

        # Check
        assert result.equals(expected_result)
コード例 #9
0
ファイル: DTTransformer.py プロジェクト: ManuelAlvarezC/RDT
    def reverse_transform(self, col, col_meta=None, missing=None):
        """Converts data back into original format.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """

        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing

        if isinstance(col, pd.Series):
            col = col.to_frame()

        self.check_data_type(col_meta)

        output = pd.DataFrame()
        date_format = col_meta['format']

        fn = self.get_date_converter(self.col_name, date_format)
        reversed_column = col.apply(fn, axis=1)

        if missing:
            reversed_column = reversed_column.rename(self.col_name)
            data = pd.concat([reversed_column, col['?' + self.col_name]], axis=1)
            nt = NullTransformer()
            output[self.col_name] = nt.reverse_transform(data, col_meta)

        else:
            output[self.col_name] = reversed_column

        return output
コード例 #10
0
    def reverse_transform(self, col, col_meta=None, missing=None):
        """Converts data back into original format.

        Args:
            col(pandas.DataFrame): Data to transform.
            col_meta(dict): Meta information of the column.
            missing(bool): Wheter or not handle missing values using NullTransformer.

        Returns:
            pandas.DataFrame
        """

        col_meta = col_meta or self.col_meta
        missing = missing if missing is not None else self.missing

        self.check_data_type(col_meta)

        output = pd.DataFrame(columns=[])
        subtype = col_meta['subtype']
        col_name = col_meta['name']
        fn = self.get_number_converter(col_name, subtype)

        if missing:
            new_col = col.apply(fn, axis=1)
            new_col = new_col.rename(col_name)
            data = pd.concat([new_col, col['?' + col_name]], axis=1)
            nt = NullTransformer()
            output[col_name] = nt.reverse_transform(data, col_meta)

        else:
            output[col_name] = col.apply(fn, axis=1)

        if self.subtype == 'int':
            output[self.col_name] = output[self.col_name].astype(int)

        return output