Esempio n. 1
0
class StopWordScrubberTest(unittest.TestCase):
    def setUp(self):
        data = {
            'test': [['this', 'sentence', 'has', 'multiple', 'stopwords'],
                     ['this', 'sentence', 'one', 'multiple', 'too'],
                     ['verb', 'noun'], ['too', 'than', 'can']]
        }

        metadata = MetaData()
        self.df = pd.DataFrame(data)
        self.data_model = DataModel(self.df)
        self.data_model.metadata = metadata

    def test_scrubbing_new_column(self):
        scrubber = StopWordScrubber(column='test', new_column='test2')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for row in df['test2']:
            self.assertFalse('this' in row)
            self.assertFalse('has' in row)
            self.assertFalse('too' in row)
            self.assertFalse('than' in row)
            self.assertFalse('can' in row)

        self.assertEqual(5, len(df['test'][0]))
        self.assertEqual(5, len(df['test'][1]))
        self.assertEqual(2, len(df['test'][2]))
        self.assertEqual(3, len(df['test'][3]))

    def test_scrubbing(self):
        scrubber = StopWordScrubber(column='test')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for row in df['test']:
            self.assertFalse('this' in row)
            self.assertFalse('has' in row)
            self.assertFalse('too' in row)
            self.assertFalse('than' in row)
            self.assertFalse('can' in row)

    def test_verbose_scrubbing(self):
        scrubber = StopWordScrubber(column='test', verbosity=1)
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for row in df['test']:
            self.assertFalse('this' in row)
            self.assertFalse('has' in row)
            self.assertFalse('too' in row)
            self.assertFalse('than' in row)
            self.assertFalse('can' in row)
Esempio n. 2
0
class testHTMLScrubber(unittest.TestCase):
    def setUp(self):
        data = {
            'text': [
                '<p>this is some text</p>',
                '<h1 id="bla", class="blabla", style="transform: translatyeY(-50%)">this is some more text</h1>',
                '<p>and even <b>more</b> text, damn</p>',
                'this is my text (dont remove this)',
                "this is some text,  $('span#TrackingJobBody a').each(function (i, v) { if ($(v).attr('href')) { var href = $(v).attr('href').toLowerCase(); if (href.match(\"^http\")) { switch (true) { case /facebook/.test(href): $(v).attr('mns_rt', 'NonJob-Facebook'); break; case /linkedin/.test(href): $(v).attr('mns_rt', 'NonJob-Linkedin'); break; case /twitter\.com/.test(href): $(v).attr('mns_rt', 'NonJob-Twitter'); break; case /plus\.google\.com/.test(href): $(v).attr('mns_rt', 'NonJob-GooglePlus'); break; case /youtube/.test(href): $(v).attr('data-track', 'Client-Social-Youtube'); break; case /http[s]?\:\/\/([a-z0-9\-\.]{1,}\.[a-z]{2,})[\/]?$/.test(href): $(v).attr('data-track', 'Client-Link-Homepage'); break; default: $(v).attr('mns_rt', 'jobcustomapplyonline'); break; } } } });"
            ]
        }

        metadata = MetaData()
        self.df = pd.DataFrame(data)
        self.data_model = DataModel(self.df)
        self.data_model.metadata = metadata

    def testHTMLRemoval(self):
        scrubber = CodeScrubber('text')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['text']:
            self.validate_string(text)

    def validate_string(self, text):
        print(text)
        self.assertFalse('>' in text)
        self.assertFalse('<' in text)
        self.assertFalse('{' in text)
        self.assertFalse('bla' in text)
        self.assertFalse('transform' in text)
        self.assertFalse('50%' in text)
        self.assertTrue('dont remove this' in text)

    def testHTMLRemoval_verbose(self):
        scrubber = CodeScrubber('text', verbosity=1)
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['text']:
            self.validate_string(text)

    def testHTMLRemoval_new_col(self):
        scrubber = CodeScrubber(text_column='text', new_text_column='new')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['new']:
            self.validate_string(text)
Esempio n. 3
0
class testPunctuationScrubber(unittest.TestCase):
    def setUp(self):
        data = {
            'text': [
                'This is, some text.', 'Is this, (some) text!?',
                'Would you like: ham, spam and eggs; spam, ham and eggs or eggs, ham and spam?'
            ]
        }

        metadata = MetaData()
        self.df = pd.DataFrame(data)
        self.data_model = DataModel(self.df)
        self.data_model.metadata = metadata

    def test_punc_removal(self):
        scrubber = PunctuationScrubber('text')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['text']:
            self.validate_string(text)

    def validate_string(self, text):
        self.assertFalse(',' in text)
        self.assertFalse('.' in text)
        self.assertFalse(';' in text)
        self.assertFalse(':' in text)
        self.assertFalse('!' in text)
        self.assertFalse('?' in text)
        self.assertFalse('(' in text)
        self.assertFalse(')' in text)

    def test_punc_removal_verbose(self):
        scrubber = PunctuationScrubber('text', verbosity=1)
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['text']:
            self.validate_string(text)

    def test_punc_removal_new_col(self):
        scrubber = PunctuationScrubber(text_column='text',
                                       new_text_column='new')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['new']:
            self.validate_string(text)
Esempio n. 4
0
class WordStemmerTest(unittest.TestCase):
    def setUp(self):
        data = {
            'test': [['is', 'this', 'a', 'stemmable', 'sentence'],
                     ['cats', 'are', 'smarter', 'than', 'dogs']]
        }

        metadata = MetaData()
        self.df = pd.DataFrame(data)
        self.data_model = DataModel(self.df)
        self.data_model.metadata = metadata

    def test_scrubbing(self):
        scrubber = WordStemmer(column='test')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        self.assertEqual(df['test'][0],
                         ['is', 'this', 'a', 'stemmabl', 'sentenc'])
        self.assertEqual(df['test'][1],
                         ['cat', 'are', 'smarter', 'than', 'dog'])

    def test_scrubbing_verbose(self):
        scrubber = WordStemmer(column='test', verbosity=1)
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        self.assertEqual(df['test'][0],
                         ['is', 'this', 'a', 'stemmabl', 'sentenc'])
        self.assertEqual(df['test'][1],
                         ['cat', 'are', 'smarter', 'than', 'dog'])

    def test_scrubbing_new_col(self):
        scrubber = WordStemmer(column='test', new_column='new')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        self.assertEqual(df['new'][0],
                         ['is', 'this', 'a', 'stemmabl', 'sentenc'])
        self.assertEqual(df['new'][1],
                         ['cat', 'are', 'smarter', 'than', 'dog'])
        self.assertEqual(df['test'][0],
                         ['is', 'this', 'a', 'stemmable', 'sentence'])
        self.assertEqual(df['test'][1],
                         ['cats', 'are', 'smarter', 'than', 'dogs'])
Esempio n. 5
0
    def separate_by_target_categories(data_model: DataModel,
                                      target_column_name):
        df = data_model.get_dataframe()
        categories = df[target_column_name].unique()

        stack_one = df.loc[df[target_column_name] == categories[0]]
        stack_two = df.loc[df[target_column_name] == categories[1]]

        return stack_one, stack_two
Esempio n. 6
0
class TestAverageColumnScrubber(unittest.TestCase):
    def setUp(self):
        self._data = {
            'column_1': [0, 2, 3],
            'column_2': [3, 2, 1],
            'column_3': [3, 2, 2],
        }

        self._dataframe = pd.DataFrame(data=self._data)
        self._data_model = DataModel(self._dataframe)
        self._data_model.metadata.define_numerical_columns(
            ['column_1', 'column_2', 'column_3'])

    def test_invalid_column(self):
        input_columns = ('column_1', 'column_2', 'invalid_1')
        average_column = 'average'

        average_converter = AverageColumnScrubber(input_columns,
                                                  average_column)

        with self.assertRaises(RuntimeError):
            average_converter.validate(self._data_model)

    def test_valid_metadata(self):
        input_columns = ('column_1', 'column_2', 'column_3')
        average_column = 'average'

        average_converter = AverageColumnScrubber(input_columns,
                                                  average_column)
        self.assertIsNone(
            average_converter.validate_metadata(self._data_model.metadata))

    def test_invalid_metadata(self):
        input_columns = ('column_1', 'column_2', 'column_3')
        average_column = 'average'

        self._data_model.metadata.define_categorical_columns(['column_3'])
        average_converter = AverageColumnScrubber(input_columns,
                                                  average_column)
        with self.assertRaises(RuntimeError):
            average_converter.validate_metadata(self._data_model.metadata)

    def test_averaging_3_columns(self):
        input_columns = ('column_1', 'column_2', 'column_3')
        average_column = 'average'

        average_converter = AverageColumnScrubber(input_columns,
                                                  average_column)
        average_converter.validate(self._data_model)
        average_converter.scrub(self._data_model)

        self.assertListEqual(
            [2, 2, 2],
            self._data_model.get_dataframe()[average_column].tolist())
Esempio n. 7
0
class testLowerTextScrubber(unittest.TestCase):
    def setUp(self):
        data = {
            'text': [
                'This is some text.', 'Get some text ASAP?',
                'This is some text for John and Joan'
            ]
        }

        metadata = MetaData()
        self.df = pd.DataFrame(data)
        self.data_model = DataModel(self.df)
        self.data_model.metadata = metadata

    def test_lower(self):
        scrubber = LowerTextScrubber('text')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['text']:
            self.validate_string(text)

    def validate_string(self, text: str):
        self.assertEqual(text, text.lower())

    def test_lower_verbose(self):
        scrubber = LowerTextScrubber('text', verbosity=1)
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['text']:
            self.validate_string(text)

    def test_lower_new_col(self):
        scrubber = LowerTextScrubber(text_column='text', new_text_column='new')
        scrubber.scrub(self.data_model)

        df = self.data_model.get_dataframe()
        for text in df['new']:
            self.validate_string(text)
Esempio n. 8
0
class TestMultipleCatListToMultipleHot(unittest.TestCase):
    def testScrubbing(self):
        data = {
            'num_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 50],
            'list_1': [['EUR', 'USD'], ['USD', 'JPY', 'AUD'], ['EUR'],
                       ['EUR', 'GBP', 'AUD'], ['USD'], ['EUR', 'JPY'],
                       ['EUR', 'GBP'], ['USD', 'JPY'], ['EUR', 'GBP'],
                       ['USD']],
        }

        metadata = MetaData()
        metadata.define_numerical_columns(['num_1'])
        metadata.define_multiple_cat_columns(['list_1'])

        self.df = pd.DataFrame(data)
        self.data_model = DataModel(self.df)
        self.data_model.metadata = metadata

        scrubber = MultipleCatListToMultipleHotScrubber(col_name='list_1')
        scrubber.validate(self.data_model)
        scrubber.scrub(self.data_model)

        new_df = self.data_model.get_dataframe()
        columns = list(new_df.columns.values)

        # test new columns
        self.assertEqual(len(columns), 7)
        self.assertIn('list_1_EUR', columns)
        self.assertIn('list_1_GBP', columns)
        self.assertIn('list_1_USD', columns)
        self.assertIn('list_1_JPY', columns)
        self.assertIn('list_1_AUD', columns)

        # check column contents
        has_EUR_series = new_df['list_1_EUR']
        self.assertEqual(list(has_EUR_series.to_dict().values()),
                         [1, 0, 1, 1, 0, 1, 1, 0, 1, 0])

        # test metadata
        meta_data_categorical_cols = self.data_model.metadata.binary_columns
        self.assertEqual(len(meta_data_categorical_cols), 5)
        self.assertIn('list_1_EUR', columns)
        self.assertIn('list_1_GBP', columns)
        self.assertIn('list_1_USD', columns)
        self.assertIn('list_1_JPY', columns)
        self.assertIn('list_1_AUD', columns)
Esempio n. 9
0
    def balance_data(self, data_model: DataModel,
                     target_column_name: str) -> DataModel:
        weights_by_label = self.map_weights_by_cat_label(
            data_model, target_column_name)

        df = data_model.get_dataframe()
        weights = self.get_weights_list(df, target_column_name,
                                        weights_by_label)

        weights_column_warning = 'note: adding weights column ({}), make sure it is passed to the estimator- and ' \
                                 'data builder!'.format(WEIGHTS_COLUMN)
        warnings.warn(weights_column_warning)

        df[WEIGHTS_COLUMN] = weights
        data_model.set_dataframe(df)

        return data_model
Esempio n. 10
0
class TestMissingDataReplacer(unittest.TestCase):
    def setUp(self):
        self._data = {
            'numerical_1': [2, None, 3, 4],
            'numerical_3': [None, 2, 3, 4],
            'categorical_1': ['one', None, 'two', 'three'],
            'categorical_2': ['apple', 'pie', None, 'three'],
            'categorical_3': ['apple', 'pie', None, 'three'],
            'unknown_1': [9, 10, 11, 12]
        }

        self._dataframe = pd.DataFrame(data=self._data)
        self._data_model = DataModel(self._dataframe)

    def test_categorize_columns(self):
        categorical = ['categorical_1', 'categorical_2', 'categorical_3']
        self._data_model.metadata.define_categorical_columns(categorical)

        numerical = ['numerical_1', 'numerical_3']
        self._data_model.metadata.define_numerical_columns(numerical)

        unknown_category = 'unknown'
        missing_data_scrubber1 = MissingDataReplacer(
            missing_category_name=unknown_category,
            missing_numerical_value='average',
            scrub_columns=['categorical_1', 'categorical_2', 'numerical_1'])
        missing_data_scrubber1.validate(self._data_model)
        missing_data_scrubber1.scrub(self._data_model)

        missing_data_scrubber2 = MissingDataReplacer(
            missing_numerical_value=1, scrub_columns=['numerical_3'])
        missing_data_scrubber2.validate(self._data_model)
        missing_data_scrubber2.scrub(self._data_model)

        self.assertEqual(unknown_category,
                         self._data_model.get_dataframe()['categorical_1'][1])
        self.assertEqual(unknown_category,
                         self._data_model.get_dataframe()['categorical_2'][2])
        self.assertEqual(None,
                         self._data_model.get_dataframe()['categorical_3'][2])
        self.assertEqual(2, self._data_model.get_dataframe()['numerical_1'][0])
        self.assertEqual(3, self._data_model.get_dataframe()['numerical_1'][1])
        self.assertEqual(1, self._data_model.get_dataframe()['numerical_3'][0])
        self.assertEqual(2, self._data_model.get_dataframe()['numerical_3'][1])
        self.assertEqual(9, self._data_model.get_dataframe()['unknown_1'][0])
Esempio n. 11
0
class TestDateScrubber(unittest.TestCase):
    def setUp(self):
        self._data = {
            'date': [
                '2017-03-26T05:04:46.539Z', '2017-12-01T23:04:46.539Z',
                '2017-02-08T07:38:48.129Z'
            ],
        }

        self._dataframe = pd.DataFrame(data=self._data)
        self.data_model = DataModel(self._dataframe)

    def test_validate(self):
        data_scrubber = StringToDateScrubber(date_columns={'date': '%Y-%m-%d'})

        data_scrubber.validate(self.data_model)

    def test_scrub(self):
        data_scrubber = StringToDateScrubber(date_columns={'date': '%Y-%m-%d'})

        data_scrubber.scrub(self.data_model)
        date = datetime.strptime('2017-12-01', '%Y-%m-%d')
        self.assertEqual(date, self.data_model.get_dataframe()['date'][1])
Esempio n. 12
0
class TestMultipleCatToListColumnScrubber(unittest.TestCase):
    def setUp(self):
        data = {
            'num_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 50],
            'mh_1': [
                'EUR,USD', 'USD,JPY,AUD', 'EUR', 'EUR,GBP,AUD', 'USD',
                'EUR,JPY', 'EUR,GBP', 'USD,JPY', 'EUR,GBP', 'USD'
            ],
        }

        metadata = MetaData()
        metadata.define_numerical_columns(['num_1'])
        metadata.define_multiple_cat_columns(['mh_1'])

        self.df = pd.DataFrame(data)
        self.data_model = DataModel(self.df)
        self.data_model.metadata = metadata

    def testScrubbing(self):
        scrubber = MultipleCatToListScrubber(sepperator=',')
        scrubber.scrub(self.data_model)

        first_item = self.data_model.get_dataframe()['mh_1'][0]
        self.assertIsInstance(first_item, list)
Esempio n. 13
0
 def randomize_data(data: DataModel, seed: int):
     df = data.get_dataframe()
     df = df.sample(frac=1, random_state=seed)
     data.set_dataframe(df)
Esempio n. 14
0
 def validate_data_frame(self, data_frame: DataModel, columns: list):
     self.assertEqual(data_frame.target_column_name, 'target_1')
     self.assertCountEqual(data_frame.get_dataframe().columns.tolist(), columns, )
Esempio n. 15
0
class TestOutlierScrubbing(unittest.TestCase):
    def setUp(self):
        data = {
            'num_1': [0, 1, 2, 3, 4, 5, 6, 7, 8, 50],
            'num_2': [0, 1, 2, 3, 4, 5, 6, 7, 8, 50],
            'cat_1': [
                'EUR', 'USD', 'EUR', 'EUR', 'USD', 'EUR', 'EUR', 'USD', 'EUR',
                'USD'
            ],
        }

        metadata = MetaData()
        metadata.define_numerical_columns(['num_1', 'num_2'])
        metadata.define_categorical_columns(['cat_1'])
        self.df = pd.DataFrame(data)
        self.data_model = DataModel(self.df)
        self.data_model.metadata = metadata

    def testOutlierScrubberCreate1(self):
        OutlierScrubber(col_z={'num_1': 2, 'num_2': 3})

    def testOutlierScrubberCreate2(self):
        OutlierScrubber(all_z=2)

    def testOutlierScrubberCreate3(self):
        with self.assertRaises(AssertionError):
            OutlierScrubber(col_z={'num_1': 2, 'num_2': 3}, all_z=2)

    def testOutlierScrubberCreate4(self):
        with self.assertRaises(AssertionError):
            OutlierScrubber()

    def testOutlierScrubberConfig(self):
        scrubber = OutlierScrubber(col_z={'num_1': 2, 'num_2': 3})
        config = scrubber.scrubber_config_list
        self.assertEqual(config['num_1'], MetaData.NUMERICAL_DATA_TYPE)
        self.assertEqual(config['num_2'], MetaData.NUMERICAL_DATA_TYPE)

    def testOutlierValidateMetaData(self):
        scrubber = OutlierScrubber(col_z={'num_1': 2, 'num_2': 3})
        scrubber.validate_metadata(self.data_model.metadata)

    def testOutlierValidateMetaDataFalse(self):
        scrubber = OutlierScrubber(col_z={'num_1': 2, 'num_3': 3})

        with self.assertRaises(RuntimeError):
            scrubber.validate_metadata(self.data_model.metadata)

    def testOutlierValidate(self):
        scrubber = OutlierScrubber(col_z={'num_1': 2, 'num_2': 3})
        scrubber.validate(self.data_model)

    def testOutlierValidateFalse(self):
        scrubber = OutlierScrubber(col_z={'num_1': 2, 'num_3': 3})

        with self.assertRaises(AssertionError):
            scrubber.validate(self.data_model)

    def setOutlierScrubber1(self):
        with self.assertRaises(AssertionError):
            outlier_scrubber = OutlierScrubber(col_z={'num_1': 2, 'cat_1': 3})
            outlier_scrubber.scrub(self.data_model)

    def testOutlierScrubber2(self):
        outlierScrubber = OutlierScrubber(col_z={'num_1': 2, 'num_2': 3})
        outlierScrubber.scrub(self.data_model)
        self.assertEqual(len(self.data_model.get_dataframe()), 9)

    def testOutlierScrubber3a(self):
        outlierScrubber = OutlierScrubber(all_z=2)
        outlierScrubber.scrub(self.data_model)
        self.assertEqual(len(self.data_model.get_dataframe()), 9)

    def testOutlierScrubber3b(self):
        outlierScrubber = OutlierScrubber(all_z=1)
        outlierScrubber.scrub(self.data_model)
        self.assertEqual(len(self.data_model.get_dataframe()), 5)
Esempio n. 16
0
class TestAndScrubber(unittest.TestCase):
    def setUp(self):
        date_1 = datetime.strptime('01-01-2018', '%d-%m-%Y')
        date_2 = datetime.strptime('01-01-2017', '%d-%m-%Y')
        date_3 = datetime.now()

        self._data = {
            'value_1': [0, 1, 50],
            'value_2': [0, 3, 150],
            'currency': ['EUR', 'USD', 'EUR'],
            'date': [date_3, date_1, date_2],
        }

        self._df = pd.DataFrame(self._data)
        self.data_model = DataModel(self._df)
        self.data_model.metadata.define_numerical_columns(
            ['value_1', 'value_2'])

        self.data_model.metadata.define_categorical_columns(['currency'])

    def test_multiple_validation(self):
        average_column_scrubber = AverageColumnScrubber(('value_1', 'value_2'),
                                                        'value')
        convert_currency_scrubber = ConvertCurrencyScrubber(
            'value', 'new_value', 'currency', 'USD', 'date')
        self.and_scrubber = AndScrubber(average_column_scrubber,
                                        convert_currency_scrubber)

        self.and_scrubber.validate_metadata(self.data_model.metadata)

    def test_first_invalid(self):
        average_column_scrubber = AverageColumnScrubber(
            ('value_1', 'value_2, value_3'), 'value')
        convert_currency_scrubber = ConvertCurrencyScrubber(
            'value', 'currency', 'USD', 'date')
        self.and_scrubber = AndScrubber(average_column_scrubber,
                                        convert_currency_scrubber)

        with self.assertRaises(RuntimeError):
            self.and_scrubber.validate_metadata(self.data_model.metadata)

    def test_second_invalid(self):
        average_column_scrubber = AverageColumnScrubber(('value_1', 'value_2'),
                                                        'value')
        convert_currency_scrubber = ConvertCurrencyScrubber(
            'value', 'invalid', 'USD', 'date')
        self.and_scrubber = AndScrubber(average_column_scrubber,
                                        convert_currency_scrubber)

        with self.assertRaises(RuntimeError):
            self.and_scrubber.validate_metadata(self.data_model.metadata)

    def test_multiple_scrubbing(self):
        average_column_scrubber = AverageColumnScrubber(('value_1', 'value_2'),
                                                        'value')
        convert_currency_scrubber = ConvertCurrencyScrubber(
            'value', 'new_value', 'currency', 'USD', 'date')
        self.and_scrubber = AndScrubber(average_column_scrubber,
                                        convert_currency_scrubber)

        self.and_scrubber.scrub(self.data_model)
        result = self.data_model.get_dataframe()['new_value']
        self.assertEqual(0.0, result[0])
        self.assertEqual(2.0, result[1])
        self.assertEqual(105, round(result[2]))
Esempio n. 17
0
class TestConvertCurrencyScrubber(unittest.TestCase):
    def setUp(self):
        date_1 = datetime.strptime('01-01-2018', '%d-%m-%Y')
        date_2 = datetime.strptime('01-01-2017', '%d-%m-%Y')
        date_3 = datetime.now()

        self._data = {
            'value': [0, 2, 100],
            'currency': ['EUR', 'USD', 'EUR'],
            'date': [date_3, date_1, date_2],
        }

        self._dataframe = pd.DataFrame(data=self._data)
        self._data_model = DataModel(self._dataframe)

    def test_valid_columns(self):
        currency_converter = ConvertCurrencyScrubber('value', 'currency',
                                                     'USD', 'date')

        with self.assertRaises(RuntimeError):
            currency_converter.validate(self._data_model)

    def test_valid_metadata(self):
        self._data_model.metadata.define_numerical_columns(['value'])
        self._data_model.metadata.define_categorical_columns(['currency'])

        currency_converter = ConvertCurrencyScrubber('value', 'value',
                                                     'currency', 'USD', 'date')
        currency_converter.validate_metadata(self._data_model.metadata)

    def test_valid_columns_without_date(self):
        currency_converter = ConvertCurrencyScrubber('value', 'value',
                                                     'currency', 'USD')
        currency_converter.validate(self._data_model)

    def test_invalid_column(self):
        currency_converter = ConvertCurrencyScrubber('value', 'invalid', 'USD',
                                                     'date')

        with self.assertRaises(RuntimeError):
            currency_converter.validate(self._data_model)

    def test_invalid_metadata(self):
        self._data_model.metadata.define_numerical_columns(
            ['value', 'currency'])

        currency_converter = ConvertCurrencyScrubber('value', 'currency',
                                                     'USD', 'date')
        with self.assertRaises(RuntimeError):
            currency_converter.validate_metadata(self._data_model.metadata)

    def test_invalid_date_column(self):
        currency_converter = ConvertCurrencyScrubber('value', 'invalid', 'USD',
                                                     'invalid_date')

        with self.assertRaises(RuntimeError):
            self.assertEqual(False,
                             currency_converter.validate(self._data_model))

    def test_converting_with_date_column(self):
        currency_converter = ConvertCurrencyScrubber('value', 'value',
                                                     'currency', 'USD', 'date')

        currency_converter.scrub(self._data_model)

        df = self._data_model.get_dataframe()
        result = df['value']
        self.assertEqual(0.0, result[0])
        self.assertEqual(2.0, result[1])
        self.assertEqual(105, round(result[2]))

    def test_converting_without_date_column(self):
        currency_converter = ConvertCurrencyScrubber('value', 'value',
                                                     'currency', 'USD')

        currency_converter.scrub(self._data_model)

        df = self._data_model.get_dataframe()
        result = df['value']
        self.assertEqual(0.0, result[0])
        self.assertEqual(2.0, result[1])
        self.assertEqual(116, round(result[2]))