Esempi in Python per Dataset, esempi in Python per serenata_toolbox.chamber_of_deputies.dataset.Dataset

Esempio n. 1

0

Mostra file

File: test_chamber_of_deputies_dataset.py Progetto: turicas/serenata-toolbox

 def setUp(self):
     self.path = mkdtemp(prefix='serenata-')
     print(self.path)
     self.subject = Dataset(self.path, [2017])
     self.years = [2017]
     self.fixtures_path = os.path.join('tests', 'fixtures',
                                       'chamber_of_deputies')

Esempio n. 2

0

Mostra file

File: adapter.py Progetto: rlimax/rosie

 def update_datasets(self):
     os.makedirs(self.path, exist_ok=True)
     chamber_of_deputies = Dataset(self.path)
     chamber_of_deputies.fetch()
     chamber_of_deputies.convert_to_csv()
     chamber_of_deputies.translate()
     chamber_of_deputies.clean()
     fetch(self.COMPANIES_DATASET, self.path)

Esempio n. 3

0

Mostra file

File: test_chamber_of_deputies_dataset.py Progetto: darkmoonzika/serenata-toolbox

class TestChamberOfDeputiesDataset(TestCase):

    def setUp(self):
        self.path = mkdtemp(prefix='serenata-')
        print(self.path)
        self.subject = Dataset(self.path)
        self.years = [year for year in range(2009, date.today().year + 1)]


    def tearDown(self):
        rmtree(self.path, ignore_errors=True)


    def test_fetch_translate_clean_integration(self):
        self.subject.fetch()
        files = ["Ano-{}.csv".format(n) for n in self.years]
        files.append('datasets-format.html')

        for name in files:
            file_path = os.path.join(self.path, name)
            assert(os.path.exists(file_path))

        self.subject.translate()
        for name in ["reimbursements-{}.xz".format(n) for n in self.years]:
            file_path = os.path.join(self.path, name)
            assert(os.path.exists(file_path))

        self.subject.clean()
        file_path = os.path.join(self.path, 'reimbursements.xz')
        assert(os.path.exists(file_path))

        # test for subquota translation
        dataset = pd.read_csv(file_path, compression='xz')
        all_subquotas = ['Maintenance of office supporting parliamentary activity',
                     'Locomotion, meal and lodging',
                     'Fuels and lubricants',
                     'Consultancy, research and technical work',
                     'Publicity of parliamentary activity',
                     'Purchase of office supplies',
                     'Software purchase or renting; Postal services; Subscriptions',
                     'Security service provided by specialized company',
                     'Flight tickets',
                     'Telecommunication',
                     'Postal services',
                     'Publication subscriptions',
                     'Congressperson meal',
                     'Lodging, except for congressperson from Distrito Federal',
                     'Automotive vehicle renting or watercraft charter',
                     'Aircraft renting or charter of aircraft',
                     'Automotive vehicle renting or charter',
                     'Watercraft renting or charter',
                     'Taxi, toll and parking',
                     'Terrestrial, maritime and fluvial tickets',
                     'Participation in course, talk or similar event',
                     'Flight ticket issue']

        present_subquotas = pd.unique(dataset['subquota_description'])
        for subquota in present_subquotas:
            assert(subquota in all_subquotas)

Esempio n. 4

0

Mostra file

File: test_chamber_of_deputies_dataset.py Progetto: turicas/serenata-toolbox

    def test_translate_csv_with_reimbursement_with_net_value_with_decimal_comma(
            self):
        csv_with_decimal_comma = os.path.join(self.fixtures_path,
                                              'Ano-with-decimal-comma.csv')
        path_with_decimal_point = os.path.join(
            self.fixtures_path, 'reimbursements-with-decimal-point.csv')
        with open(path_with_decimal_point, 'r') as csv_expected:
            expected = csv_expected.read()

        xz_path = Dataset('')._translate_file(csv_with_decimal_comma)
        with lzma.open(xz_path) as xz_file:
            output = xz_file.read().decode('utf-8')
        self.assertEqual(output, expected)

Esempio n. 5

0

Mostra file

class TestChamberOfDeputiesDataset(TestCase):
    def setUp(self):
        self.path = mkdtemp(prefix='serenata-')
        print(self.path)
        self.subject = Dataset(self.path)
        self.years = [year for year in range(2009, date.today().year + 1)]

    def tearDown(self):
        rmtree(self.path, ignore_errors=True)

    def test_fetch_translate_clean_integration(self):
        self.subject.fetch()
        files = ["Ano-{}.csv".format(n) for n in [2017]]
        files.append('datasets-format.html')

        for name in files:
            file_path = os.path.join(self.path, name)
            assert (os.path.exists(file_path))

        self.subject.translate()
        for name in ["reimbursements-{}.xz".format(n) for n in self.years]:
            file_path = os.path.join(self.path, name)
            assert (os.path.exists(file_path))

        self.subject.clean()
        file_path = os.path.join(self.path, 'reimbursements.xz')
        assert (os.path.exists(file_path))

        # test for subquota translation
        dataset = pd.read_csv(file_path, compression='xz')
        all_subquotas = [
            'Maintenance of office supporting parliamentary activity',
            'Locomotion, meal and lodging', 'Fuels and lubricants',
            'Consultancy, research and technical work',
            'Publicity of parliamentary activity',
            'Purchase of office supplies',
            'Software purchase or renting; Postal services; Subscriptions',
            'Security service provided by specialized company',
            'Flight tickets', 'Telecommunication', 'Postal services',
            'Publication subscriptions', 'Congressperson meal',
            'Lodging, except for congressperson from Distrito Federal',
            'Automotive vehicle renting or watercraft charter',
            'Aircraft renting or charter of aircraft',
            'Automotive vehicle renting or charter',
            'Watercraft renting or charter', 'Taxi, toll and parking',
            'Terrestrial, maritime and fluvial tickets',
            'Participation in course, talk or similar event',
            'Flight ticket issue'
        ]

        present_subquotas = pd.unique(dataset['subquota_description'])
        for subquota in present_subquotas:
            assert (subquota in all_subquotas)

Esempio n. 6

0

Mostra file

File: test_chamber_of_deputies_dataset.py Progetto: darkmoonzika/serenata-toolbox

 def setUp(self):
     self.path = mkdtemp(prefix='serenata-')
     print(self.path)
     self.subject = Dataset(self.path)
     self.years = [year for year in range(2009, date.today().year + 1)]

Esempio n. 7

0

Mostra file

File: test_chamber_of_deputies_dataset.py Progetto: darkmoonzika/serenata-toolbox

 def setUp(self):
     self.path = mkdtemp(prefix='serenata-')
     print(self.path)
     self.subject = Dataset(self.path, [2017])
     self.years = [2017]
     self.fixtures_path = os.path.join('tests', 'fixtures', 'chamber_of_deputies')

Esempio n. 8

0

Mostra file

File: test_chamber_of_deputies_dataset.py Progetto: darkmoonzika/serenata-toolbox

class TestChamberOfDeputiesDataset(TestCase):

    def setUp(self):
        self.path = mkdtemp(prefix='serenata-')
        print(self.path)
        self.subject = Dataset(self.path, [2017])
        self.years = [2017]
        self.fixtures_path = os.path.join('tests', 'fixtures', 'chamber_of_deputies')

    def tearDown(self):
        rmtree(self.path, ignore_errors=True)

    @patch('serenata_toolbox.chamber_of_deputies.dataset.urlretrieve')
    def test_fetch_chambers_of_deputies_datasets(self, mocked_urlretrieve):
        path_to_2017_dataset_zip = os.path.join(self.fixtures_path, 'Ano-2017.zip')
        path_to_dataset_format_html = os.path.join(self.fixtures_path, 'datasets-format.html')
        copy(path_to_2017_dataset_zip, self.path)
        copy(path_to_dataset_format_html, self.path)
        expected_files = ['Ano-2017.zip', 'datasets-format.html']

        retrieved_files = self.subject.fetch()

        self.assertTrue(mocked_urlretrieve.called)
        self.assertEqual(mocked_urlretrieve.call_count, len(expected_files))
        for retrieved_file, expected_file in zip(retrieved_files, expected_files):
            self.assertIn(expected_file, retrieved_file)

    def test_translate_2017_dataset(self):
        xz_path = os.path.join(self.path, 'reimbursements-2017.xz')
        csv_path = os.path.join(self.fixtures_path, 'Ano-2017.csv')
        copy(csv_path, self.path)

        data_frame_2017_as_csv = self._read_csv(csv_path)
        self._assert_that_the_columns_are_as_expected_before_translation(data_frame_2017_as_csv)

        self.subject.translate()
        reimbursements_2017 = self._read_xz(xz_path)

        self._assert_that_the_columns_are_as_expected_after_translation(reimbursements_2017)

    def test_clean_2017_reimbursements(self):
        copy(os.path.join(self.fixtures_path, 'reimbursements-2017.xz'), self.path)
        file_path = os.path.join(self.path, 'reimbursements.xz')

        self.subject.clean()

        assert(os.path.exists(file_path))

        dataset = pd.read_csv(file_path, compression='xz')
        all_subquotas = [subquota[1] for subquota in self.subject.subquotas]

        present_subquotas = pd.unique(dataset['subquota_description'])
        for subquota in present_subquotas:
            with self.subTest():
                assert(subquota in all_subquotas)

    def _read_csv(self, path):
        return pd.read_csv(path,
                           decimal=',',
                           encoding='utf-8',
                           delimiter=";",
                           quoting=csv.QUOTE_NONE,
                           dtype={'ideDocumento': np.str,
                                  'idecadastro': np.str,
                                  'nuCarteiraParlamentar': np.str,
                                  'codLegislatura': np.str,
                                  'txtCNPJCPF': np.str,
                                  'numRessarcimento': np.str},
                           )

    def _read_xz(self, filepath):
        dtype = {
            'applicant_id': np.str,
            'batch_number': np.str,
            'cnpj_cpf': np.str,
            'congressperson_document': np.str,
            'congressperson_id': np.str,
            'document_id': np.str,
            'document_number': np.str,
            'document_type': np.str,
            'leg_of_the_trip': np.str,
            'passenger': np.str,
            'reimbursement_number': np.str,
            'subquota_group_description': np.str,
            'subquota_group_id': np.str,
            'subquota_number': np.str,
            'term_id': np.str,
        }
        return pd.read_csv(filepath, dtype=dtype)

    def _assert_that_the_columns_are_as_expected_before_translation(self, data_frame_2017):
        expected_columns = self.subject.translate_columns.keys()

        for column in expected_columns:
            with self.subTest():
                self.assertIn(column, data_frame_2017.columns)

    def _assert_that_the_columns_are_as_expected_after_translation(self, reimbursements):
        expected_columns = expected_columns = self.subject.translate_columns.values()

        for column in expected_columns:
            with self.subTest():
                self.assertIn(column, reimbursements.columns)

Esempio n. 9

0

Mostra file

 def setUp(self):
     self.path = mkdtemp(prefix='serenata-')
     print(self.path)
     self.subject = Dataset(self.path)
     self.years = [year for year in range(2009, date.today().year + 1)]

Esempio n. 10

0

Mostra file

File: test_chamber_of_deputies_dataset.py Progetto: turicas/serenata-toolbox

class TestChamberOfDeputiesDataset(TestCase):
    def setUp(self):
        self.path = mkdtemp(prefix='serenata-')
        print(self.path)
        self.subject = Dataset(self.path, [2017])
        self.years = [2017]
        self.fixtures_path = os.path.join('tests', 'fixtures',
                                          'chamber_of_deputies')

    def tearDown(self):
        rmtree(self.path, ignore_errors=True)

    @patch('serenata_toolbox.chamber_of_deputies.dataset.urlretrieve')
    def test_fetch_chambers_of_deputies_datasets(self, mocked_urlretrieve):
        path_to_2017_dataset_zip = os.path.join(self.fixtures_path,
                                                'Ano-2017.zip')
        path_to_dataset_format_html = os.path.join(self.fixtures_path,
                                                   'datasets-format.html')
        copy(path_to_2017_dataset_zip, self.path)
        copy(path_to_dataset_format_html, self.path)
        expected_files = ['Ano-2017.zip', 'datasets-format.html']

        retrieved_files = self.subject.fetch()

        self.assertTrue(mocked_urlretrieve.called)
        self.assertEqual(mocked_urlretrieve.call_count, len(expected_files))
        for retrieved_file, expected_file in zip(retrieved_files,
                                                 expected_files):
            self.assertIn(expected_file, retrieved_file)

    def test_translate_2017_dataset(self):
        xz_path = os.path.join(self.path, 'reimbursements-2017.xz')
        csv_path = os.path.join(self.fixtures_path, 'Ano-2017.csv')
        copy(csv_path, self.path)

        data_frame_2017_as_csv = self._read_csv(csv_path)
        self._assert_that_the_columns_are_as_expected_before_translation(
            data_frame_2017_as_csv)

        self.subject.translate()
        reimbursements_2017 = self._read_xz(xz_path)

        self._assert_that_the_columns_are_as_expected_after_translation(
            reimbursements_2017)

    def test_clean_2017_reimbursements(self):
        copy(os.path.join(self.fixtures_path, 'reimbursements-2017.xz'),
             self.path)
        file_path = os.path.join(self.path, 'reimbursements.xz')

        self.subject.clean()

        assert (os.path.exists(file_path))

        dataset = pd.read_csv(file_path, compression='xz')
        all_subquotas = [subquota[1] for subquota in self.subject.subquotas]

        present_subquotas = pd.unique(dataset['subquota_description'])
        for subquota in present_subquotas:
            with self.subTest():
                assert (subquota in all_subquotas)

    def test_translate_csv_with_reimbursement_with_net_value_with_decimal_comma(
            self):
        csv_with_decimal_comma = os.path.join(self.fixtures_path,
                                              'Ano-with-decimal-comma.csv')
        path_with_decimal_point = os.path.join(
            self.fixtures_path, 'reimbursements-with-decimal-point.csv')
        with open(path_with_decimal_point, 'r') as csv_expected:
            expected = csv_expected.read()

        xz_path = Dataset('')._translate_file(csv_with_decimal_comma)
        with lzma.open(xz_path) as xz_file:
            output = xz_file.read().decode('utf-8')
        self.assertEqual(output, expected)

    def _read_csv(self, path):
        return pd.read_csv(
            path,
            decimal=',',
            encoding='utf-8',
            delimiter=";",
            quoting=csv.QUOTE_NONE,
            dtype={
                'ideDocumento': np.str,
                'idecadastro': np.str,
                'nuCarteiraParlamentar': np.str,
                'codLegislatura': np.str,
                'txtCNPJCPF': np.str,
                'numRessarcimento': np.str
            },
        )

    def _read_xz(self, filepath):
        dtype = {
            'applicant_id': np.str,
            'batch_number': np.str,
            'cnpj_cpf': np.str,
            'congressperson_document': np.str,
            'congressperson_id': np.str,
            'document_id': np.str,
            'document_number': np.str,
            'document_type': np.str,
            'leg_of_the_trip': np.str,
            'passenger': np.str,
            'reimbursement_number': np.str,
            'subquota_group_description': np.str,
            'subquota_group_id': np.str,
            'subquota_number': np.str,
            'term_id': np.str,
        }
        return pd.read_csv(filepath, dtype=dtype)

    def _assert_that_the_columns_are_as_expected_before_translation(
            self, data_frame_2017):
        expected_columns = self.subject.translate_columns.keys()

        for column in expected_columns:
            with self.subTest():
                self.assertIn(column, data_frame_2017.columns)

    def _assert_that_the_columns_are_as_expected_after_translation(
            self, reimbursements):
        expected_columns = expected_columns = self.subject.translate_columns.values(
        )

        for column in expected_columns:
            with self.subTest():
                self.assertIn(column, reimbursements.columns)