コード例 #1
0
    def prepare(self, data):
        df = super(ForestFiresPreparerETL, self).prepare(data)

        self.input_validator.validate(df)

        _rows, cols = df.shape

        # Target Transformations
        df['area_log'] = np.log1p(df['area'])

        # Feature Transformations
        df['FFMC_log'] = np.log1p(df['FFMC'])
        df['ISI_log'] = np.log1p(df['ISI'])
        df['rain_log'] = np.log1p(df['rain'])
        df['rain_cat'] = (df['rain'] > 0).astype(np.uint8)

        df = dummify(df, 'month', [
            'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep',
            'oct', 'nov', 'dec'
        ])
        df = dummify(df, 'day',
                     ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'])

        self.output_validator.validate(df)

        return df
コード例 #2
0
    def prepare(self, data):
        df = super(AbalonePreparerETL, self).prepare(data)
        self.input_validator.validate(df)

        df['age'] = df['rings'] + 1.5
        df = dummify(df, 'sex', ['M', 'F', 'I'])

        return df
コード例 #3
0
    def prepare(self, data):
        df = super(ForestFiresPreparer, self).prepare(data)

        df['rain_cat'] = (df['rain'] > 0).astype(np.uint8)
        df['ISI_log'] = np.log1p(df['ISI'])

        df = dummify(df, 'month', [
            'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep',
            'oct', 'nov', 'dec'
        ])
        df = dummify(df, 'day',
                     ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'])

        selected_features = [
            'X',
            'Y',
            'FFMC',
            'DMC',
            'DC',
            'ISI_log',
            'temp',
            'RH',
            'wind',
            'rain_cat',
            'apr',
            'aug',
            'dec',
            'feb',
            'jan',
            'jun',
            'mar',
            'may',
            'nov',
            'oct',
            'sep',
            'fri',
            'mon',
            'sat',
            'sun',
            'thu',
        ]

        return df[selected_features].copy()
コード例 #4
0
 def test_includes_dummy_na(self):
     df = pd.DataFrame(['a', 'b', 'a', None], columns=['category'])
     dummified = dummify(df,
                         'category',
                         categories=['a', 'b', 'c'],
                         dummy_na=True)
     assert 'a' in dummified, "New column should be added"
     assert 'b' in dummified, "New column should be added"
     assert 'c' in dummified, "New column should be added"
     assert None in dummified, "New column should be added"
コード例 #5
0
    def prepare(self, data):
        df = super(AbalonePreparer, self).prepare(data)

        df = dummify(df, 'sex', ['M', 'F', 'I'])

        selected_features = [
            'length', 'diameter', 'height', 'whole_weight', 'shucked_weight',
            'viscera_weight', 'shell_weight', 'M', 'F'
        ]

        return df[selected_features].copy()
コード例 #6
0
 def test_includes_columns_explicitly(self):
     dummified = dummify(df, 'category', categories=['a', 'b', 'c'])
     assert 'a' in dummified, "New column should be added"
     assert 'b' in dummified, "New column should be added"
     assert 'c' in dummified, "New column should be added"
コード例 #7
0
 def test_dummy_values(self):
     dummified = dummify(df, 'category', categories=['a', 'b'])
     dummy_values = [['a', 1, 0], ['b', 0, 1], ['a', 1, 0]]
     assert np.array_equal(dummified.values.tolist(), dummy_values)
コード例 #8
0
 def test_new_number_of_columns(self):
     dummified = dummify(df, 'category', categories=['a', 'b'])
     assert len(
         dummified.columns) == 3, "New columns should be concatenated"
コード例 #9
0
 def test_raise_datatype_error(self):
     with pytest.raises(TypeError, match="df must be a DataFrame"):
         dummify(None, 'category', categories=['a', 'b'])