class TestJourneyFederalSenateDataset(TestCase):
    def setUp(self):
        self.path = gettempdir()
        self.subject = Dataset(self.path)

    def test_journey_federal_senate_dataset(self):
        # fetch_saves_raw_files
        self.subject.fetch()
        federal_senate_csv_files = [
            'federal-senate-{}.csv'.format(year)
            for year in self.subject.year_range
        ]
        for federal_senate_csv_file in federal_senate_csv_files:
            file_path = os.path.join(self.path, federal_senate_csv_file)
            self.assertTrue(os.path.exists(file_path), 'fetch_saves_raw_files')

        # translate_creates_english_versions_for_every_csv
        self.subject.translate()
        federal_senate_xz_files = [
            'federal-senate-{}.xz'.format(year)
            for year in self.subject.year_range
        ]
        for federal_senate_xz_file in federal_senate_xz_files:
            file_path = os.path.join(self.path, federal_senate_xz_file)
            self.assertTrue(
                os.path.exists(file_path),
                'translate_creates_english_versions_for_every_csv')

        # clean_creates_a_reimbursements_file
        self.subject.clean()
        file_path = os.path.join(self.path, 'federal-senate-reimbursements.xz')
        self.assertTrue(os.path.exists(file_path),
                        'clean_creates_a_reimbursements_file')
Exemple #2
0
    def update_datasets(self):
        os.makedirs(self.path, exist_ok=True)
        federal_senate = Dataset(self.path)
        federal_senate.fetch()
        federal_senate.translate()
        federal_senate_reimbursements_path = federal_senate.clean()

        return federal_senate_reimbursements_path
class TestJourneyFederalSenateDataset(TestCase):
    def setUp(self):
        self.path = gettempdir()
        self.subject = Dataset(self.path)

    def test_journey_federal_senate_dataset(self):
        # fetch_saves_raw_files
        self.subject.fetch()
        federal_senate_csv_files = ['federal-senate-{}.csv'.format(year) for year in self.subject.years]
        for federal_senate_csv_file in federal_senate_csv_files:
            file_path = os.path.join(self.path, federal_senate_csv_file)
            self.assertTrue(os.path.exists(file_path), 'fetch_saves_raw_files')

        # translate_creates_english_versions_for_every_csv
        self.subject.translate()
        federal_senate_xz_files = ['federal-senate-{}.xz'.format(year) for year in self.subject.years]
        for federal_senate_xz_file in federal_senate_xz_files:
            file_path = os.path.join(self.path, federal_senate_xz_file)
            self.assertTrue(os.path.exists(file_path), 'translate_creates_english_versions_for_every_csv')

        # clean_creates_a_reimbursements_file
        self.subject.clean()
        file_path = os.path.join(self.path, 'federal-senate-reimbursements.xz')
        self.assertTrue(os.path.exists(file_path), 'clean_creates_a_reimbursements_file')
Exemple #4
0
class TestFederalSenateDataset(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.expected_files = [
            'federal-senate-2008.csv', 'federal-senate-2009.csv'
        ]

    @patch('serenata_toolbox.federal_senate.dataset.urlretrieve')
    def test_fetch_files_from_S3(self, mocked_url_etrieve):
        self.path = gettempdir()
        self.subject = Dataset(self.path)

        retrieved_files, not_found_files = self.subject.fetch()

        self.assertTrue(mocked_url_etrieve.called)
        self.assertEqual(mocked_url_etrieve.call_count,
                         len(self.subject.year_range))
        for retrieved_file, expected_file in zip(retrieved_files,
                                                 self.expected_files):

            self.assertIn(expected_file, retrieved_file)

    def test_fetch_not_found_files_from_S3(self):
        self.path = gettempdir()
        self.subject = Dataset(self.path, 2007, 2008)

        retrieved_files, not_found_files = self.subject.fetch()

        for not_found_file, expected_file in zip(not_found_files,
                                                 self.expected_files):

            self.assertIn('federal-senate-2007.csv', not_found_file)

    def test_dataset_translation(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), 2008,
                               2009)

        expected_files = ['federal-senate-2008.csv']

        translated_files, not_found_files = self.subject.translate()

        for translated_file, expected_file in zip(translated_files,
                                                  expected_files):

            self.assertIn(expected_file, translated_file)

    def test_if_translation_happened_as_expected(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), 2008,
                               2009)

        file_path = os.path.join(self.subject.path, 'federal-senate-2008.csv')
        federal_senate_2008 = pd.read_csv(file_path,
                                          sep=';',
                                          encoding='ISO-8859-1',
                                          skiprows=1)
        self.assertIsNotNone(federal_senate_2008['ANO'],
                             'expects \'ANO\' as column in this dataset')

        self.subject.translate()

        translated_file_path = os.path.join(self.subject.path,
                                            'federal-senate-2008.xz')
        translated_federal_senate_2008 = pd.read_csv(translated_file_path,
                                                     encoding='utf-8')

        self.assertIsNotNone(translated_federal_senate_2008['year'],
                             'expects \'year\' as column in this dataset')

        os.remove(os.path.join(self.subject.path, 'federal-senate-2008.xz'))

    def test_dataset_translation_failing_to_find_file(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), 2007,
                               2008)

        expected_files = ['federal-senate-2007.csv']

        translated_files, not_found_files = self.subject.translate()

        for not_found_files, expected_file in zip(not_found_files,
                                                  expected_files):

            self.assertIn(expected_file, not_found_files)

    def test_dataset_cleanup(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'xz'), 2009,
                               2010)

        reimbursement_path = self.subject.clean()

        expected_path = os.path.join('tests', 'fixtures', 'xz',
                                     'federal-senate-reimbursements.xz')
        self.assertEqual(reimbursement_path, expected_path)

        os.remove(expected_path)
class TestFederalSenateDataset(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.expected_files = ['federal-senate-2008.csv',
                              'federal-senate-2009.csv']

    @patch('serenata_toolbox.federal_senate.dataset.urlretrieve')
    def test_fetch_files_from_S3(self, mocked_url_retrieve):
        path = gettempdir()
        self.subject = Dataset(path)

        retrieved_files, _ = self.subject.fetch()

        self.assertTrue(mocked_url_retrieve.called)
        self.assertEqual(mocked_url_retrieve.call_count, len(self.subject.years))
        for retrieved_file, expected_file in zip(
                retrieved_files, self.expected_files):

            self.assertIn(expected_file, retrieved_file)

    @patch('serenata_toolbox.federal_senate.dataset.urlretrieve')
    def test_fetch_raises_HTTPError(self, mocked_url_retrieve):
        mocked_url_retrieve.side_effect = urllib.error.HTTPError(None, None, None, None, None)
        self.path = gettempdir()
        self.subject = Dataset(self.path, [2007])

        with self.assertRaises(urllib.error.HTTPError) as context:
            self.subject.fetch()

        self.assertTrue(isinstance(context.exception, urllib.error.HTTPError))

    @patch('serenata_toolbox.federal_senate.dataset.urlretrieve')
    def test_fetch_raises_URLError(self, mocked_url_retrieve):
        mocked_url_retrieve.side_effect = urllib.error.URLError('tests reason')
        path = gettempdir()
        subject = Dataset(path, [2007])

        with self.assertRaises(urllib.error.URLError) as context:
            subject.fetch()

        self.assertTrue(isinstance(context.exception, urllib.error.URLError))

    def test_dataset_translation(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'),
                               [2008])

        expected_files = ['federal-senate-2008.csv']

        translated_files, _ = self.subject.translate()

        for translated_file, expected_file in zip(
                translated_files, expected_files):

            self.assertIn(expected_file, translated_file)

    def test_if_translation_happened_as_expected(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'),
                               [2008])

        file_path = os.path.join(self.subject.path, 'federal-senate-2008.csv')
        federal_senate_2008 = pd.read_csv(file_path,
                                          sep=';',
                                          encoding='ISO-8859-1',
                                          skiprows=1)
        self.assertIsNotNone(federal_senate_2008['ANO'],
                             "expects 'ANO' as column in this dataset")

        self.subject.translate()

        translated_file_path = os.path.join(self.subject.path, 'federal-senate-2008.xz')
        translated_federal_senate_2008 = pd.read_csv(translated_file_path,
                                                     encoding='utf-8')

        self.assertIsNotNone(translated_federal_senate_2008['year'],
                             "expects 'year' as column in this dataset")

        os.remove(os.path.join(self.subject.path, 'federal-senate-2008.xz'))

    def test_dataset_translation_failing_to_find_file(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'),
                               [2007])

        with self.assertRaises(FileNotFoundError) as context:
            self.subject.translate()

        self.assertTrue(isinstance(context.exception, FileNotFoundError))

    def test_dataset_cleanup(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'xz'),
                               [2009])

        reimbursement_path = self.subject.clean()

        expected_path = os.path.join('tests',
                                     'fixtures',
                                     'xz',
                                     'federal-senate-reimbursements.xz')
        self.assertEqual(
            reimbursement_path,
            expected_path
        )

        os.remove(expected_path)
Exemple #6
0
class TestFederalSenateDataset(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.expected_files = [
            'federal-senate-2008.csv', 'federal-senate-2009.csv'
        ]

    @patch('serenata_toolbox.federal_senate.dataset.urlretrieve')
    def test_fetch_files_from_S3(self, mocked_url_retrieve):
        path = gettempdir()
        self.subject = Dataset(path)

        retrieved_files, _ = self.subject.fetch()

        self.assertTrue(mocked_url_retrieve.called)
        self.assertEqual(mocked_url_retrieve.call_count,
                         len(self.subject.years))
        for retrieved_file, expected_file in zip(retrieved_files,
                                                 self.expected_files):

            self.assertIn(expected_file, retrieved_file)

    @patch('serenata_toolbox.federal_senate.dataset.urlretrieve')
    def test_fetch_raises_HTTPError(self, mocked_url_retrieve):
        mocked_url_retrieve.side_effect = urllib.error.HTTPError(
            None, None, None, None, None)
        self.path = gettempdir()
        self.subject = Dataset(self.path, [2007])

        with self.assertRaises(urllib.error.HTTPError) as context:
            self.subject.fetch()

        self.assertTrue(isinstance(context.exception, urllib.error.HTTPError))

    @patch('serenata_toolbox.federal_senate.dataset.urlretrieve')
    def test_fetch_raises_URLError(self, mocked_url_retrieve):
        mocked_url_retrieve.side_effect = urllib.error.URLError('tests reason')
        path = gettempdir()
        subject = Dataset(path, [2007])

        with self.assertRaises(urllib.error.URLError) as context:
            subject.fetch()

        self.assertTrue(isinstance(context.exception, urllib.error.URLError))

    def test_dataset_translation(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'),
                               [2008])

        expected_files = ['federal-senate-2008.csv']

        translated_files, _ = self.subject.translate()

        for translated_file, expected_file in zip(translated_files,
                                                  expected_files):

            self.assertIn(expected_file, translated_file)

    def test_if_translation_happened_as_expected(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'),
                               [2008])

        file_path = os.path.join(self.subject.path, 'federal-senate-2008.csv')
        federal_senate_2008 = pd.read_csv(file_path,
                                          sep=';',
                                          encoding='ISO-8859-1',
                                          skiprows=1)
        self.assertIsNotNone(federal_senate_2008['ANO'],
                             "expects 'ANO' as column in this dataset")

        self.subject.translate()

        translated_file_path = os.path.join(self.subject.path,
                                            'federal-senate-2008.xz')
        translated_federal_senate_2008 = pd.read_csv(translated_file_path,
                                                     encoding='utf-8')

        self.assertIsNotNone(translated_federal_senate_2008['year'],
                             "expects 'year' as column in this dataset")

        os.remove(os.path.join(self.subject.path, 'federal-senate-2008.xz'))

    def test_dataset_translation_failing_to_find_file(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'),
                               [2007])

        with self.assertRaises(FileNotFoundError) as context:
            self.subject.translate()

        self.assertTrue(isinstance(context.exception, FileNotFoundError))

    def test_dataset_cleanup(self):
        self.subject = Dataset(os.path.join('tests', 'fixtures', 'xz'), [2009])

        reimbursement_path = self.subject.clean()

        expected_path = os.path.join('tests', 'fixtures', 'xz',
                                     'federal-senate-reimbursements.xz')
        self.assertEqual(reimbursement_path, expected_path)

        os.remove(expected_path)