def test_pipeline(self, app_with_db): pipeline = DITBACIPipeline(app_with_db.dbi) fi = FileInfo.from_path('tests/fixtures/dit/baci/baci.csv') pipeline.process(fi) # check L0 expected_rows = [(1995, 10519, 4, 251, 1.548, 0.051), (1995, 30110, 4, 381, 1.249, 0.01)] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l0_table, pipeline) assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline)
def test_pipeline(self, add_world_bank_raw_tariff): pipeline = WorldBankTariffPipeline(self.dbi, force=True) fi = FileInfo.from_path(file_1) pipeline.process(fi) # check L0 expected_rows = [ (48, 1999, 201, 0, 'AHS', 5, 20), (262, 2005, 201, 380, 'BND', 40, 40), (266, 1998, 201, 0, 'AHS', 20, 20), ] assert rows_equal_table(self.dbi, expected_rows, pipeline._l0_table, pipeline) # check L1 pipeline = WorldBankTariffTransformPipeline(self.dbi, force=True) pipeline.process() expected_rows = [ (201, 12, 24, 2000, None, None, None, None, None, None, None, None), (201, 12, 24, 2001, None, None, None, None, None, None, None, None), ] assert rows_equal_table(self.dbi, expected_rows, pipeline._l1_table, pipeline, top_rows=2) # check second run with different raw tariff updates L1 add_world_bank_raw_tariff([ { 'reporter': 12, 'year': 2000, 'product': 201, 'partner': 24, 'duty_type': 'AHS', 'simple_average': 10, 'number_of_total_lines': 8, }, ]) pipeline.process() expected_rows = [ (201, 12, 24, 2000, 10, 10, None, None, None, None, None, None), (201, 12, 24, 2001, 10, None, None, None, None, None, None, None), ] assert rows_equal_table(self.dbi, expected_rows, pipeline._l1_table, pipeline, top_rows=2)
def test_transform_of_datafile_continue(self, continue_transform, expected_rows, mocker, add_dit_baci): patch_years( mocker, ('2018', '2018'), ) patch_required_countries( mocker, countries=[ ('BRA', 76, True), ('ZAF', 710, True), ('DZA', 12, True), ('AGO', 24, True), ('AUS', 36, True), ], ) self.partial_transform_data() with mock.patch( 'app.etl.organisation.world_bank.WorldBankTariffTransformPipeline._get_products' ) as mock_get_products: mock_get_products.return_value = [['301'], ['401']] pipeline = WorldBankTariffTransformPipeline( self.dbi, force=False, continue_transform=continue_transform) pipeline.process() assert rows_equal_table( self.dbi, expected_rows, pipeline._l1_table, pipeline, )
def test_pipeline_happy_path(self, app_with_db): pipeline = WorldBankBoundRatesPipeline(app_with_db.dbi) fi = FileInfo.from_path('tests/fixtures/world_bank/bound_rates.csv') pipeline.process(fi) # check L0 expected_rows = [ ('H0', 4, 10111, 10, 2), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l0_table, pipeline) # check L1 expected_rows = [ (4, 10111, 10), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline)
def test_raw_to_events(app_with_db): fi = FileInfo.from_path( 'tests/fixtures/hmrc/exporters/exporters_2016_mock.zip') pipeline = HMRCExportersPipeline(app_with_db.dbi) pipeline.process(fi) expected_rows = [ ( '2016-01-01 00:00:00', 'aaa limited', '01 street name street line london', 'BB11BB', ['12345670'], ), ( '2016-01-01 00:00:00', 'abc company ltd', '1 - 3 street avenue house name city name', 'AA11AA', ['12345678', '23456789'], ), ( '2016-02-01 00:00:00', 'bbb limited', 'aaa house 2 street name city county', 'AA111AA', ['98765432', '12345678'], ), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline)
def test_pipeline_multiple_nomen_codes(self, app_with_db): pipeline = WorldBankBoundRatesPipeline(app_with_db.dbi) fi = FileInfo.from_path('tests/fixtures/world_bank/bound_rates_multiple_nomen_codes.csv') pipeline.process(fi) # check L0 expected_rows = [ ('H0', 4, 10111, 10, 2), ('H1', 4, 10111, 5, 2), ('H2', 4, 10111, 7, 2), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l0_table, pipeline) # check L1 has bound rate with highest nomen code expected_rows = [ (4, 10111, 7), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline)
def test_loading_new_data_into_existing_schema(app_with_db): fi = FileInfo.from_path( 'tests/fixtures/hmrc/exporters/exporters_2016_mock.zip') pipeline = HMRCExportersPipeline(app_with_db.dbi) pipeline.process(fi) fi.data.seek(0) fi2 = FileInfo(fi.name + '2', fi.data) pipeline.process(fi2) # check L1 (when loading new data, it should be appended (not re-creating all schema)) expected_rows = [ ( '2016-01-01 00:00:00', 'aaa limited', '01 street name street line london', 'BB11BB', ['12345670'], ), ( '2016-01-01 00:00:00', 'aaa limited', '01 street name street line london', 'BB11BB', ['12345670'], ), ( '2016-01-01 00:00:00', 'abc company ltd', '1 - 3 street avenue house name city name', 'AA11AA', ['12345678', '23456789'], ), ( '2016-01-01 00:00:00', 'abc company ltd', '1 - 3 street avenue house name city name', 'AA11AA', ['12345678', '23456789'], ), ( '2016-02-01 00:00:00', 'bbb limited', 'aaa house 2 street name city county', 'AA111AA', ['98765432', '12345678'], ), ( '2016-02-01 00:00:00', 'bbb limited', 'aaa house 2 street name city county', 'AA111AA', ['98765432', '12345678'], ), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline)
def test_pipeline(self, app_with_db): pipeline = ComtradeCountryCodeAndISOPipeline(app_with_db.dbi) fi = FileInfo.from_path( 'tests/fixtures/comtrade/country_code_and_iso/country_list.csv') pipeline.process(fi) # check L0 expected_rows = [ (0, 'World', 'World', 'World', 'World', 'WL', 'WLD', '1962', '2061'), (4, 'Afghanistan', 'Afghanistan', 'Afghanistan', None, 'AF', 'AFG', '1962', '2061'), ( 899, 'Areas, nes', 'Areas, not elsewhere specified', 'Areas, nes', None, None, None, '1962', '2061', ), ( 918, 'European Union', 'European Union', 'European Union', None, 'EU', 'EUR', None, None, ), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l0_table, pipeline) # check L1 assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline)
def partial_transform_data(self): pipeline = WorldBankTariffPipeline(self.dbi, force=True) fi = FileInfo.from_path(country_to_country_three_products) pipeline.process(fi) with mock.patch( 'app.etl.organisation.world_bank.WorldBankTariffTransformPipeline._get_products' ) as mock_get_products: mock_get_products.return_value = [['201']] with mock.patch( 'app.etl.organisation.world_bank.WorldBankTariffTransformPipeline' '.finish_processing') as mock_finish_processing: mock_finish_processing.return_value = None pipeline = WorldBankTariffTransformPipeline( self.dbi, force=False, continue_transform=True) pipeline.process() assert rows_equal_table(self.dbi, PRODUCT_201_ROWS, pipeline._l1_temp_table, pipeline) assert rows_equal_table(self.dbi, [], pipeline._l1_table, pipeline)
def test_one_datafile(self, app_with_db): pipeline = DITReferencePostcodesPipeline(app_with_db.dbi, force=False) fi = FileInfo.from_path(snapshot1) pipeline.process(fi) expected_rows = [ ( 'AB10 1AA', 'S12000033', 'Aberdeen City', 'S99999999', None, 'S99999999', None, 'S99999999', 'Scotland', '394251', '0806376', '2011-09-01', '2016-10-01', ), ( 'HU4 7SW', 'E06000011', 'East Riding of Yorkshire', 'E37000018', 'Humber', 'E37000039', 'York, North Yorkshire and East Riding', 'E12000003', 'Yorkshire and The Humber', '504860', '0429160', '1980-01-01', None, ), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l0_table, pipeline) assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline)
def test_transform_of_datafile( self, raw_tariffs, bound_tariffs, year_range, required_countries, only_products, expected_rows, add_world_bank_raw_tariff, add_world_bank_bound_rates, mocker, ): patch_years(mocker, year_range) patch_required_countries(mocker, required_countries) add_world_bank_raw_tariff(raw_tariffs) add_world_bank_bound_rates(bound_tariffs) pipeline = WorldBankTariffTransformPipeline(self.dbi, force=True, products=only_products) pipeline.process() assert rows_equal_table(self.dbi, expected_rows, pipeline._l1_table, pipeline)
def test_pipeline(self, app_with_db): pipeline = DITEUCountryMembershipPipeline(app_with_db.dbi) fi = FileInfo.from_path( 'tests/fixtures/dit/eu_country_membership/eu_spine.csv') pipeline.process(fi) # check L0 expected_rows = [ ( 'Austria', 'AUT', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', ), ( 'Belgium', 'BEL', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', ), ( 'Bulgaria', 'BGR', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', 'EUN', ), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l0_table, pipeline) # check L1 expected_rows = [ ('Austria', 'AUT', 1958, None), ('Austria', 'AUT', 1959, None), ('Austria', 'AUT', 1960, None), ('Austria', 'AUT', 1961, None), ('Austria', 'AUT', 1962, None), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline, top_rows=5)
def test_new_data(self, app_with_db): pipeline = ONSPostcodeDirectoryPipeline(app_with_db.dbi, force=False, delete_previous=True) fi = FileInfo.from_path(file1) pipeline.process(fi) fi2 = FileInfo.from_path(file2) pipeline.process(fi2) # check L0 expected_rows = [ ( 'AB1 0AA', 'AB1 0AA', 'AB1 0AA', '198001', None, 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385386', '0801193', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001364', '01C30', 'S99999999', 'S01000011', 'S02000007', '6', '3C2', 'S00090303', 'S01006514', 'S02001237', 'S34002990', 'S03000012', 'S99999999', 'S99999999', '3', '1C3', '57.101474', '-2.242851', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', ), ( 'AB1 0AA', 'AB1 0AA', 'AB1 0AA', '200001', '199606', 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385386', '0801193', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001364', '01C30', 'S99999999', 'S01000011', 'S02000007', '6', '3C2', 'S00090303', 'S01006514', 'S02001237', 'S34002990', 'S03000012', 'S99999999', 'S99999999', '3', '1C3', '57.101474', '-2.242851', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', ), ( 'AB1 0AB', 'AB1 0AB', 'AB1 0AB', '198001', '199606', 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385177', '0801314', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001270', '01C31', 'S99999999', 'S01000011', 'S02000007', '6', '4B3', 'S00090303', 'S01006514', 'S02001237', 'S34002990', 'S03000012', 'S99999999', 'S99999999', '3', '1C3', '57.102554', '-2.246308', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', ), ( 'AB1 0AB', 'AB1 0AB', 'AB1 0AB', '198001', '199606', 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385177', '0801314', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001270', '01C31', 'S99999999', 'S01000011', 'S02000007', '6', '4B3', 'S00090303', 'S01006514', 'S02001237', 'S34002990', 'S03000012', 'S99999999', 'S99999999', '3', '1C3', '57.102554', '-2.246308', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', ), ( 'AB1 0AD', 'AB1 0AD', 'AB1 0AD', '198001', '199606', 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385053', '0801092', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001364', '01C30', 'S99999999', 'S01000011', 'S02000007', '6', '3C2', 'S00090399', 'S01006514', 'S02001237', 'S34003015', 'S03000012', 'S99999999', 'S99999999', '3', '6A1', '57.100556', '-2.248342', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', ), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l0_table, pipeline) # check L1 expected_rows = [ ( 'AB1 0AA', 'AB1 0AA', 'AB1 0AA', '1980-01-01', None, 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385386', '0801193', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001364', '01C30', 'S99999999', 'S01000011', 'S02000007', '6', '3C2', 'S00090303', 'S01006514', 'S02001237', 'S34002990', 'S03000012', 'S99999999', 'S99999999', '3', '1C3', '57.101474', '-2.242851', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', '2019-05-01', ), ( 'AB1 0AA', 'AB1 0AA', 'AB1 0AA', '2000-01-01', '1996-06-01', 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385386', '0801193', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001364', '01C30', 'S99999999', 'S01000011', 'S02000007', '6', '3C2', 'S00090303', 'S01006514', 'S02001237', 'S34002990', 'S03000012', 'S99999999', 'S99999999', '3', '1C3', '57.101474', '-2.242851', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', '2019-07-01', ), ( 'AB1 0AB', 'AB1 0AB', 'AB1 0AB', '1980-01-01', '1996-06-01', 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385177', '0801314', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001270', '01C31', 'S99999999', 'S01000011', 'S02000007', '6', '4B3', 'S00090303', 'S01006514', 'S02001237', 'S34002990', 'S03000012', 'S99999999', 'S99999999', '3', '1C3', '57.102554', '-2.246308', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', '2019-05-01', ), ( 'AB1 0AB', 'AB1 0AB', 'AB1 0AB', '1980-01-01', '1996-06-01', 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385177', '0801314', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001270', '01C31', 'S99999999', 'S01000011', 'S02000007', '6', '4B3', 'S00090303', 'S01006514', 'S02001237', 'S34002990', 'S03000012', 'S99999999', 'S99999999', '3', '1C3', '57.102554', '-2.246308', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', '2019-07-01', ), ( 'AB1 0AD', 'AB1 0AD', 'AB1 0AD', '1980-01-01', '1996-06-01', 'S99999999', 'S99999999', 'S12000033', 'S13002843', 'S99999999', '0', '385053', '0801092', '1', 'S08000020', 'S99999999', 'S92000003', 'S99999999', '0', 'S14000002', 'S15000001', 'S09000001', 'S22000047', 'S03000012', 'S31000935', '99ZZ00', 'S00001364', '01C30', 'S99999999', 'S01000011', 'S02000007', '6', '3C2', 'S00090399', 'S01006514', 'S02001237', 'S34003015', 'S03000012', 'S99999999', 'S99999999', '3', '6A1', '57.100556', '-2.248342', 'S99999999', 'S99999999', 'S23000009', '6808', 'S99999999', 'S99999999', '2019-07-01', ), ] assert rows_equal_table(app_with_db.dbi, expected_rows, pipeline._l1_table, pipeline)