class TestJourneyFederalSenateDataset(TestCase): def setUp(self): self.path = gettempdir() self.subject = Dataset(self.path) def test_journey_federal_senate_dataset(self): # fetch_saves_raw_files self.subject.fetch() federal_senate_csv_files = [ 'federal-senate-{}.csv'.format(year) for year in self.subject.year_range ] for federal_senate_csv_file in federal_senate_csv_files: file_path = os.path.join(self.path, federal_senate_csv_file) self.assertTrue(os.path.exists(file_path), 'fetch_saves_raw_files') # translate_creates_english_versions_for_every_csv self.subject.translate() federal_senate_xz_files = [ 'federal-senate-{}.xz'.format(year) for year in self.subject.year_range ] for federal_senate_xz_file in federal_senate_xz_files: file_path = os.path.join(self.path, federal_senate_xz_file) self.assertTrue( os.path.exists(file_path), 'translate_creates_english_versions_for_every_csv') # clean_creates_a_reimbursements_file self.subject.clean() file_path = os.path.join(self.path, 'federal-senate-reimbursements.xz') self.assertTrue(os.path.exists(file_path), 'clean_creates_a_reimbursements_file')
def update_datasets(self): os.makedirs(self.path, exist_ok=True) federal_senate = Dataset(self.path) federal_senate.fetch() federal_senate.translate() federal_senate_reimbursements_path = federal_senate.clean() return federal_senate_reimbursements_path
class TestJourneyFederalSenateDataset(TestCase): def setUp(self): self.path = gettempdir() self.subject = Dataset(self.path) def test_journey_federal_senate_dataset(self): # fetch_saves_raw_files self.subject.fetch() federal_senate_csv_files = ['federal-senate-{}.csv'.format(year) for year in self.subject.years] for federal_senate_csv_file in federal_senate_csv_files: file_path = os.path.join(self.path, federal_senate_csv_file) self.assertTrue(os.path.exists(file_path), 'fetch_saves_raw_files') # translate_creates_english_versions_for_every_csv self.subject.translate() federal_senate_xz_files = ['federal-senate-{}.xz'.format(year) for year in self.subject.years] for federal_senate_xz_file in federal_senate_xz_files: file_path = os.path.join(self.path, federal_senate_xz_file) self.assertTrue(os.path.exists(file_path), 'translate_creates_english_versions_for_every_csv') # clean_creates_a_reimbursements_file self.subject.clean() file_path = os.path.join(self.path, 'federal-senate-reimbursements.xz') self.assertTrue(os.path.exists(file_path), 'clean_creates_a_reimbursements_file')
class TestFederalSenateDataset(TestCase): @classmethod def setUpClass(cls): cls.expected_files = [ 'federal-senate-2008.csv', 'federal-senate-2009.csv' ] @patch('serenata_toolbox.federal_senate.dataset.urlretrieve') def test_fetch_files_from_S3(self, mocked_url_etrieve): self.path = gettempdir() self.subject = Dataset(self.path) retrieved_files, not_found_files = self.subject.fetch() self.assertTrue(mocked_url_etrieve.called) self.assertEqual(mocked_url_etrieve.call_count, len(self.subject.year_range)) for retrieved_file, expected_file in zip(retrieved_files, self.expected_files): self.assertIn(expected_file, retrieved_file) def test_fetch_not_found_files_from_S3(self): self.path = gettempdir() self.subject = Dataset(self.path, 2007, 2008) retrieved_files, not_found_files = self.subject.fetch() for not_found_file, expected_file in zip(not_found_files, self.expected_files): self.assertIn('federal-senate-2007.csv', not_found_file) def test_dataset_translation(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), 2008, 2009) expected_files = ['federal-senate-2008.csv'] translated_files, not_found_files = self.subject.translate() for translated_file, expected_file in zip(translated_files, expected_files): self.assertIn(expected_file, translated_file) def test_if_translation_happened_as_expected(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), 2008, 2009) file_path = os.path.join(self.subject.path, 'federal-senate-2008.csv') federal_senate_2008 = pd.read_csv(file_path, sep=';', encoding='ISO-8859-1', skiprows=1) self.assertIsNotNone(federal_senate_2008['ANO'], 'expects \'ANO\' as column in this dataset') self.subject.translate() translated_file_path = os.path.join(self.subject.path, 'federal-senate-2008.xz') translated_federal_senate_2008 = pd.read_csv(translated_file_path, encoding='utf-8') self.assertIsNotNone(translated_federal_senate_2008['year'], 'expects \'year\' as column in this dataset') os.remove(os.path.join(self.subject.path, 'federal-senate-2008.xz')) def test_dataset_translation_failing_to_find_file(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), 2007, 2008) expected_files = ['federal-senate-2007.csv'] translated_files, not_found_files = self.subject.translate() for not_found_files, expected_file in zip(not_found_files, expected_files): self.assertIn(expected_file, not_found_files) def test_dataset_cleanup(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'xz'), 2009, 2010) reimbursement_path = self.subject.clean() expected_path = os.path.join('tests', 'fixtures', 'xz', 'federal-senate-reimbursements.xz') self.assertEqual(reimbursement_path, expected_path) os.remove(expected_path)
class TestFederalSenateDataset(TestCase): @classmethod def setUpClass(cls): cls.expected_files = ['federal-senate-2008.csv', 'federal-senate-2009.csv'] @patch('serenata_toolbox.federal_senate.dataset.urlretrieve') def test_fetch_files_from_S3(self, mocked_url_retrieve): path = gettempdir() self.subject = Dataset(path) retrieved_files, _ = self.subject.fetch() self.assertTrue(mocked_url_retrieve.called) self.assertEqual(mocked_url_retrieve.call_count, len(self.subject.years)) for retrieved_file, expected_file in zip( retrieved_files, self.expected_files): self.assertIn(expected_file, retrieved_file) @patch('serenata_toolbox.federal_senate.dataset.urlretrieve') def test_fetch_raises_HTTPError(self, mocked_url_retrieve): mocked_url_retrieve.side_effect = urllib.error.HTTPError(None, None, None, None, None) self.path = gettempdir() self.subject = Dataset(self.path, [2007]) with self.assertRaises(urllib.error.HTTPError) as context: self.subject.fetch() self.assertTrue(isinstance(context.exception, urllib.error.HTTPError)) @patch('serenata_toolbox.federal_senate.dataset.urlretrieve') def test_fetch_raises_URLError(self, mocked_url_retrieve): mocked_url_retrieve.side_effect = urllib.error.URLError('tests reason') path = gettempdir() subject = Dataset(path, [2007]) with self.assertRaises(urllib.error.URLError) as context: subject.fetch() self.assertTrue(isinstance(context.exception, urllib.error.URLError)) def test_dataset_translation(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), [2008]) expected_files = ['federal-senate-2008.csv'] translated_files, _ = self.subject.translate() for translated_file, expected_file in zip( translated_files, expected_files): self.assertIn(expected_file, translated_file) def test_if_translation_happened_as_expected(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), [2008]) file_path = os.path.join(self.subject.path, 'federal-senate-2008.csv') federal_senate_2008 = pd.read_csv(file_path, sep=';', encoding='ISO-8859-1', skiprows=1) self.assertIsNotNone(federal_senate_2008['ANO'], "expects 'ANO' as column in this dataset") self.subject.translate() translated_file_path = os.path.join(self.subject.path, 'federal-senate-2008.xz') translated_federal_senate_2008 = pd.read_csv(translated_file_path, encoding='utf-8') self.assertIsNotNone(translated_federal_senate_2008['year'], "expects 'year' as column in this dataset") os.remove(os.path.join(self.subject.path, 'federal-senate-2008.xz')) def test_dataset_translation_failing_to_find_file(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), [2007]) with self.assertRaises(FileNotFoundError) as context: self.subject.translate() self.assertTrue(isinstance(context.exception, FileNotFoundError)) def test_dataset_cleanup(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'xz'), [2009]) reimbursement_path = self.subject.clean() expected_path = os.path.join('tests', 'fixtures', 'xz', 'federal-senate-reimbursements.xz') self.assertEqual( reimbursement_path, expected_path ) os.remove(expected_path)
class TestFederalSenateDataset(TestCase): @classmethod def setUpClass(cls): cls.expected_files = [ 'federal-senate-2008.csv', 'federal-senate-2009.csv' ] @patch('serenata_toolbox.federal_senate.dataset.urlretrieve') def test_fetch_files_from_S3(self, mocked_url_retrieve): path = gettempdir() self.subject = Dataset(path) retrieved_files, _ = self.subject.fetch() self.assertTrue(mocked_url_retrieve.called) self.assertEqual(mocked_url_retrieve.call_count, len(self.subject.years)) for retrieved_file, expected_file in zip(retrieved_files, self.expected_files): self.assertIn(expected_file, retrieved_file) @patch('serenata_toolbox.federal_senate.dataset.urlretrieve') def test_fetch_raises_HTTPError(self, mocked_url_retrieve): mocked_url_retrieve.side_effect = urllib.error.HTTPError( None, None, None, None, None) self.path = gettempdir() self.subject = Dataset(self.path, [2007]) with self.assertRaises(urllib.error.HTTPError) as context: self.subject.fetch() self.assertTrue(isinstance(context.exception, urllib.error.HTTPError)) @patch('serenata_toolbox.federal_senate.dataset.urlretrieve') def test_fetch_raises_URLError(self, mocked_url_retrieve): mocked_url_retrieve.side_effect = urllib.error.URLError('tests reason') path = gettempdir() subject = Dataset(path, [2007]) with self.assertRaises(urllib.error.URLError) as context: subject.fetch() self.assertTrue(isinstance(context.exception, urllib.error.URLError)) def test_dataset_translation(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), [2008]) expected_files = ['federal-senate-2008.csv'] translated_files, _ = self.subject.translate() for translated_file, expected_file in zip(translated_files, expected_files): self.assertIn(expected_file, translated_file) def test_if_translation_happened_as_expected(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), [2008]) file_path = os.path.join(self.subject.path, 'federal-senate-2008.csv') federal_senate_2008 = pd.read_csv(file_path, sep=';', encoding='ISO-8859-1', skiprows=1) self.assertIsNotNone(federal_senate_2008['ANO'], "expects 'ANO' as column in this dataset") self.subject.translate() translated_file_path = os.path.join(self.subject.path, 'federal-senate-2008.xz') translated_federal_senate_2008 = pd.read_csv(translated_file_path, encoding='utf-8') self.assertIsNotNone(translated_federal_senate_2008['year'], "expects 'year' as column in this dataset") os.remove(os.path.join(self.subject.path, 'federal-senate-2008.xz')) def test_dataset_translation_failing_to_find_file(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'csv'), [2007]) with self.assertRaises(FileNotFoundError) as context: self.subject.translate() self.assertTrue(isinstance(context.exception, FileNotFoundError)) def test_dataset_cleanup(self): self.subject = Dataset(os.path.join('tests', 'fixtures', 'xz'), [2009]) reimbursement_path = self.subject.clean() expected_path = os.path.join('tests', 'fixtures', 'xz', 'federal-senate-reimbursements.xz') self.assertEqual(reimbursement_path, expected_path) os.remove(expected_path)