def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'time_id': 'UInt32', 'trade_flow_id': 'UInt8', 'reporter_id': 'UInt32', 'partner_id': 'UInt32', 'qty_unit_id': 'String', 'qty_unit': 'String', 'qty': 'Float64', 'netweight_kg': 'Float64', 'trade_value_us_dollars': 'UInt64', 'hs6_id': 'UInt32' } download_data = DownloadStep(connector=source_connector) unzip_step = UnzipStep(pattern=r"\.csv$") extract_step = ExtractStep() load_step = LoadStep( "trade_i_comtrade_m_hs", db_connector, if_exists="append", dtype=dtype, pk=['reporter_id', 'trade_flow_id', 'time_id'], nullable_list=['qty', 'trade_value_us_dollars'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).foreach(unzip_step).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch( params.get("source_connector"), open("etl/countries/russia/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = copy.deepcopy(DTYPE) dtype['hs6_id'] = 'String' download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep("trade_s_rus_m_hs", db_connector, if_exists="append", dtype=dtype, pk=[ 'trade_flow_id', 'time_id', 'country_id', 'region_id', 'district_id', 'hs10_id' ], nullable_list=['unit_short_name']) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'id': 'String', 'id_num': 'UInt32', 'iso3': 'String', 'iso2': 'String', 'continent': 'String', 'color': 'String', 'name': 'String', } download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep("dim_shared_countries", db_connector, if_exists="append", dtype=dtype, pk=['id_num']) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'year': 'UInt32', 'hs6_id': 'UInt32', 'exporter': 'UInt32', 'importer': 'UInt32', 'trade_value_thou_us_dollars': 'Float64', 'trade_value_us_dollars': 'Float64', 'qty_tons': 'Float64', } download_data = DownloadStep(connector=source_connector) unzip_step = UnzipStep(pattern=r"\.csv$") extract_step = ExtractStep() load_step = LoadStep( "trade_i_baci_a_{}".format(params['hs_code']), db_connector, if_exists="append", dtype=dtype, pk=['exporter', 'importer', 'year'], nullable_list=['qty_tons'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).foreach(unzip_step).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'chapter': 'String', 'chapter_name': 'String', 'chapter_id': 'UInt8', 'hs2': 'String', 'hs2_name': 'String', 'hs2_id': 'UInt8', 'hs4': 'String', 'hs4_name': 'String', 'hs4_id': 'UInt16', 'hs6': 'String', 'hs6_name': 'String', 'hs6_id': 'UInt32', } download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep( "dim_shared_hs{}".format(params['hs_code']), db_connector, if_exists="append", dtype=dtype, pk=['hs6_id'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'year': 'UInt16', 'trade_flow_id': 'UInt8', 'reporter_id': 'UInt32', 'partner_id': 'UInt32', 'service_id': 'UInt16', 'trade_value_us_dollars': 'Int64', } download_data = DownloadStep(connector=source_connector) unzip_step = UnzipStep(pattern=r"\.csv$") extract_step = ExtractStep() load_step = LoadStep("services_i_comtrade_a_eb02", db_connector, if_exists="append", dtype=dtype, pk=['reporter_id', 'trade_flow_id', 'year'], nullable_list=['trade_value_us_dollars']) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).foreach(unzip_step).next( extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'aggregate_level': 'UInt8', 'service_id': 'UInt16', 'service': 'String', } download_data = DownloadStep(connector=source_connector) unzip_step = UnzipStep(pattern=r"\.csv$") extract_step = ExtractStep() consolidation_step = ConsolidationStep() load_step = LoadStep("dim_shared_eb02", db_connector, if_exists="append", dtype=dtype, pk=['service_id'], nullable_list=['aggregate_level']) for year in range(2000, 2017 + 1): params['year'] = year pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).foreach(unzip_step).next(extract_step) pp.run_pipeline() pp = AdvancedPipelineExecutor(params) pp = pp.next(consolidation_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'geo_id': 'String', 'short_name': 'String', 'table_name': 'String', 'long_name': 'String', 'two_alpha_code': 'String', 'two_alpha_code_lower': 'String', 'currency_unit': 'String', 'special_notes': 'String', 'region': 'String', 'income_group': 'String', 'wb_2_code': 'String', 'national_accounts_base_year': 'String', 'national_accounts_reference_year': 'String', 'sna_price_valuation': 'String', 'lending_category': 'String', 'other_groups': 'String', 'system_of_national_accounts': 'String', 'alternative_conversion_factor': 'String', 'ppp_survey_year': 'String', 'balance_of_payments_manual_in_use': 'String', 'external_debt_reporting_status': 'String', 'system_of_trade': 'String', 'government_accounting_concept': 'String', 'imf_data_dissemination_standard': 'String', 'latest_population_census': 'String', 'latest_household_survey': 'String', 'income_and_expenditure_source': 'String', 'vital_registration_complete': 'String', 'latest_agricultural_census': 'String', 'latest_industrial_data': 'UInt32', 'latest_trade_data': 'UInt32', } nullable_list = list(dtype.keys())[3:] download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep("dim_shared_geo", db_connector, if_exists="append", dtype=dtype, pk=['geo_id'], nullable_list=nullable_list) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = grab_connector(__file__, params.get("source-connector")) postgres_connector = grab_connector(__file__, "postgres-local") step1 = DownloadStep(connector=source_connector) step2 = TransformStep() step3 = LoadStep("brazil_ncm", postgres_connector, if_exists="append") pipeline = AdvancedPipelineExecutor(params) pipeline = pipeline.next(step1).next(step2).next(step3) return pipeline.run_pipeline()
def run(params, **kwargs): source_connector = grab_connector(__file__, params.get("source-connector")) postgres_connector = grab_connector(__file__, "postgres-local") step1 = DownloadStep(connector=source_connector) step2 = LoadStep(params.get("dim") + "_table", postgres_connector, if_exists="replace") pipeline = AdvancedPipelineExecutor(params) pipeline = pipeline.next(step1).next(step2) return pipeline.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/countries/russia/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep( "dim_rus_countries", db_connector, if_exists="append", dtype=DTYPE, pk=['id'], nullable_list=['name'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = grab_connector(__file__, params.get("source")) db_connector = grab_connector(__file__, params.get("db")) step1 = DownloadStep(connector=source_connector) step2 = TransformStep() step3 = LoadStep("trade_i_mdic_m_hs", db_connector, if_exists="append", pk=["time_id"]) pipeline = AdvancedPipelineExecutor(params) pipeline = pipeline.next(step1).next(step2).next(step3) return pipeline.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/countries/sweden/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep( "trade_s_swe_m_hs", db_connector, if_exists="append", dtype=DTYPE, pk=['hs6_id', 'trade_flow_id', 'partner_iso3'], nullable_list=['amount'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'trade_flow_id': 'UInt8', 'trade_flow_name': 'String', } extract_step = ExtractStep() load_step = LoadStep( "dim_shared_trade_flow", db_connector, if_exists="append", dtype=dtype, pk=['trade_flow_id'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'indicator_id': 'String', 'topic': 'String', 'indicator_name': 'String', 'short_definition': 'String', 'long_definition': 'String', 'unit_of_measure': 'String', 'periodicity': 'String', 'base_period': 'String', 'other_notes': 'String', 'aggregation_method': 'String', 'limitations_and_expectations': 'String', 'notes_from_original_source': 'String', 'general_comments': 'String', 'source': 'String', 'statistical_concept_and_methodology': 'String', 'development_relevance': 'String', 'related_source_links': 'String', 'other_web_links': 'String', 'related_indicators': 'String', 'license_type': 'String' } nullable_list = list(dtype.keys())[2:] download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep("dim_shared_indicators", db_connector, if_exists="append", dtype=dtype, pk=['indicator_id'], nullable_list=nullable_list) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): db_connector = grab_connector(__file__, params.get("db")) step1 = TimeCreationStep() step2 = LoadStep("dim_shared_time", db_connector, if_exists="replace", pk=["time_id"]) step3 = ProductCreationStep() step4 = LoadStep("dim_shared_products", db_connector, if_exists="replace", pk=["hs4_id"]) step5 = CountryCreationStep() step6 = LoadStep("dim_shared_countries", db_connector, if_exists="replace", pk=["id"]) step7 = StateCreationStep() step8 = LoadStep("dim_shared_states", db_connector, if_exists="replace", pk=["state_id"]) step9 = MunicipalityCreationStep() step10 = LoadStep("dim_shared_municipalities", db_connector, if_exists="replace", pk=["municipality_id"]) step11 = FlowCreationStep() step12 = LoadStep("dim_shared_flow", db_connector, if_exists="replace", pk=["flow_id"]) pipeline = AdvancedPipelineExecutor(params) pipeline = pipeline.next(step1).next(step2).next(step3).next( step4).next(step5).next(step6).next(step7).next(step8).next( step9).next(step10).next(step11).next(step12) return pipeline.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'geo_id': 'String', 'indicator_id': 'String', 'year': 'UInt32', 'mea': 'Float64' } download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep( "indicators_i_wdi_a", db_connector, if_exists="append", dtype=dtype, pk=['geo_id'], nullable_list=['mea'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()