def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'chapter': 'String', 'chapter_name': 'String', 'chapter_id': 'UInt8', 'hs2': 'String', 'hs2_name': 'String', 'hs2_id': 'UInt8', 'hs4': 'String', 'hs4_name': 'String', 'hs4_id': 'UInt16', 'hs6': 'String', 'hs6_name': 'String', 'hs6_id': 'UInt32', } download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep( "dim_shared_hs{}".format(params['hs_code']), db_connector, if_exists="append", dtype=dtype, pk=['hs6_id'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'id': 'String', 'id_num': 'UInt32', 'iso3': 'String', 'iso2': 'String', 'continent': 'String', 'color': 'String', 'name': 'String', } download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep("dim_shared_countries", db_connector, if_exists="append", dtype=dtype, pk=['id_num']) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'aggregate_level': 'UInt8', 'service_id': 'UInt16', 'service': 'String', } download_data = DownloadStep(connector=source_connector) unzip_step = UnzipStep(pattern=r"\.csv$") extract_step = ExtractStep() consolidation_step = ConsolidationStep() load_step = LoadStep("dim_shared_eb02", db_connector, if_exists="append", dtype=dtype, pk=['service_id'], nullable_list=['aggregate_level']) for year in range(2000, 2017 + 1): params['year'] = year pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).foreach(unzip_step).next(extract_step) pp.run_pipeline() pp = AdvancedPipelineExecutor(params) pp = pp.next(consolidation_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'time_id': 'UInt32', 'trade_flow_id': 'UInt8', 'reporter_id': 'UInt32', 'partner_id': 'UInt32', 'qty_unit_id': 'String', 'qty_unit': 'String', 'qty': 'Float64', 'netweight_kg': 'Float64', 'trade_value_us_dollars': 'UInt64', 'hs6_id': 'UInt32' } download_data = DownloadStep(connector=source_connector) unzip_step = UnzipStep(pattern=r"\.csv$") extract_step = ExtractStep() load_step = LoadStep( "trade_i_comtrade_m_hs", db_connector, if_exists="append", dtype=dtype, pk=['reporter_id', 'trade_flow_id', 'time_id'], nullable_list=['qty', 'trade_value_us_dollars'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).foreach(unzip_step).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'year': 'UInt32', 'hs6_id': 'UInt32', 'exporter': 'UInt32', 'importer': 'UInt32', 'trade_value_thou_us_dollars': 'Float64', 'trade_value_us_dollars': 'Float64', 'qty_tons': 'Float64', } download_data = DownloadStep(connector=source_connector) unzip_step = UnzipStep(pattern=r"\.csv$") extract_step = ExtractStep() load_step = LoadStep( "trade_i_baci_a_{}".format(params['hs_code']), db_connector, if_exists="append", dtype=dtype, pk=['exporter', 'importer', 'year'], nullable_list=['qty_tons'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).foreach(unzip_step).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch( params.get("source_connector"), open("etl/countries/russia/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = copy.deepcopy(DTYPE) dtype['hs6_id'] = 'String' download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep("trade_s_rus_m_hs", db_connector, if_exists="append", dtype=dtype, pk=[ 'trade_flow_id', 'time_id', 'country_id', 'region_id', 'district_id', 'hs10_id' ], nullable_list=['unit_short_name']) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'year': 'UInt16', 'trade_flow_id': 'UInt8', 'reporter_id': 'UInt32', 'partner_id': 'UInt32', 'service_id': 'UInt16', 'trade_value_us_dollars': 'Int64', } download_data = DownloadStep(connector=source_connector) unzip_step = UnzipStep(pattern=r"\.csv$") extract_step = ExtractStep() load_step = LoadStep("services_i_comtrade_a_eb02", db_connector, if_exists="append", dtype=dtype, pk=['reporter_id', 'trade_flow_id', 'year'], nullable_list=['trade_value_us_dollars']) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).foreach(unzip_step).next( extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) dtype = { 'geo_id': 'String', 'short_name': 'String', 'table_name': 'String', 'long_name': 'String', 'two_alpha_code': 'String', 'two_alpha_code_lower': 'String', 'currency_unit': 'String', 'special_notes': 'String', 'region': 'String', 'income_group': 'String', 'wb_2_code': 'String', 'national_accounts_base_year': 'String', 'national_accounts_reference_year': 'String', 'sna_price_valuation': 'String', 'lending_category': 'String', 'other_groups': 'String', 'system_of_national_accounts': 'String', 'alternative_conversion_factor': 'String', 'ppp_survey_year': 'String', 'balance_of_payments_manual_in_use': 'String', 'external_debt_reporting_status': 'String', 'system_of_trade': 'String', 'government_accounting_concept': 'String', 'imf_data_dissemination_standard': 'String', 'latest_population_census': 'String', 'latest_household_survey': 'String', 'income_and_expenditure_source': 'String', 'vital_registration_complete': 'String', 'latest_agricultural_census': 'String', 'latest_industrial_data': 'UInt32', 'latest_trade_data': 'UInt32', } nullable_list = list(dtype.keys())[3:] download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep("dim_shared_geo", db_connector, if_exists="append", dtype=dtype, pk=['geo_id'], nullable_list=nullable_list) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/countries/russia/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep( "dim_rus_countries", db_connector, if_exists="append", dtype=DTYPE, pk=['id'], nullable_list=['name'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { 'ent_id': 'UInt8', 'mun_id': 'UInt16', 'sex': 'UInt8', 'level': 'String', 'person_type': 'UInt8', 'age_range': 'UInt8', 'count': 'UInt32' } download_step = DownloadStep( connector=['wellness-ent-total', 'wellness-mun-total'], connector_path='conns.yaml', force=True ) read_step = ReadStep() transform_step = TransformStep() load_step = LoadStep( 'wellness_credits', db_connector, dtype=dtype, if_exists='drop', pk=['ent_id'] ) return [download_step, read_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml")) dtypes = { "bimester_id": "UInt32", "number_companies": "UInt64", "harmonization_payment": "Float64", "payment_contribution_with_credit": "Float64", "payment_contribution_without_credit": "Float64", "nat_id": "String" } download_step = DownloadStep(connector="payment-entity-credits", connector_path="conns.yaml", force=True) transform_step = TransformStep() load_step = LoadStep("infonavit_payment_entity_credits", db_connector, if_exists="drop", pk=["bimester_id"], dtype=dtypes) return [download_step, transform_step, load_step]
def steps(params, **kwargs): db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml")) dtypes = { "mun_id": "UInt16", "national_industry_id": "String", "year": "UInt16" } download_step = DownloadStep( connector="dataset", connector_path="conns.yaml" ) # Definition of each step transform_step = MultiStep() load_step = LoadStep( "inegi_economic_census", db_connector, dtype=dtypes, if_exists="drop", pk=["national_industry_id", "mun_id", "year"], nullable_list=["m000a", "p000c", "a800a", "q000d", "p000a", "p000b", "p030c", "a511a", "m050a", "j203a", "j300a", "j400a", "j500a", "j600a", "k010a", "k020a", "k030a", "k311a", "k041a", "k610a", "k620a", "k060a", "k070a", "k810a", "k910a", "k950a", "k096a", "k976a", "m010a", "m030a", "m090a", "p100a", "p100b", "p030a", "p030b", "q010a", "q020a", "q030a", "q400a", "q900a"] ) return [download_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml")) dtype = { "ent_id": "UInt8", "population": "UInt64", "illiterate_population": "Float32", "population_6_14_school": "Float32", "population_15_incomplete_school": "Float32", "no_health_services": "Float32", "dirt_floor": "Float32", "no_toilet": "Float32", "no_water_supply_network": "Float32", "no_sewer_system": "Float32", "no_electrical_energy": "Float32", "no_washing_machine": "Float32", "no_fridge": "Float32", "social_lag_index": "Float32", "social_lag_degree": "UInt8", "year": "UInt16" } download_step = DownloadStep(connector="social-lag-data", connector_path="conns.yaml") transform_step = TransformStep() load_step = LoadStep("coneval_social_lag_ent", db_connector, if_exists="append", pk=["ent_id", "year"], dtype=dtype, nullable_list=["population"]) return [download_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { "system_id": "UInt8", "system_en": "String", "system_es": "String", "subsystem_id": "UInt8", "subsystem_en": "String", "subsystem_es": "String", "product_id": "UInt16", "product_en": "String", "product_es": "String", } download_step = DownloadStep(connector='product-dim', connector_path='conns.yaml') transform_step = TransformStep() load_step = LoadStep("dim_sniim_products", db_connector, dtype=dtype, if_exists="drop", pk=["product_id"]) return [download_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { 'state_id': 'UInt8', 'product_id': 'UInt16', 'mark_id': 'UInt16', 'hs2_id': 'UInt16', 'unit_id': 'UInt8', 'time_id': 'UInt32', 'value': 'Float32', } download_step = WildcardDownloadStep(connector='sniim-data', connector_path='conns.yaml', force=True) extract_step = ExtractStep() transform_step = TransformStep(connector=db_connector) load_step = LoadStep( 'sniim_products', db_connector, if_exists='drop', dtype=dtype, pk=['time_id', 'state_id', 'product_id', 'mark_id', 'hs2_id']) return [download_step, extract_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { 'chapter': 'UInt8', 'chapter_es': 'String', 'chapter_en': 'String', 'chapter_es_short': 'String', 'chapter_en_short': 'String', 'hs2_id': 'UInt16', 'hs2_es': 'String', 'hs2_en': 'String', 'hs2_es_short': 'String', 'hs2_en_short': 'String', 'hs4_id': 'UInt32', 'hs4_es': 'String', 'hs4_en': 'String', 'hs4_es_short': 'String', 'hs4_en_short': 'String' } download_step = DownloadStep( connector='hs6-2012', connector_path='conns.yaml' ) transform_step = TransformStep() load_step = LoadStep('dim_shared_hs12_4digit', db_connector, if_exists='drop', pk=['hs4_id', 'hs2_id', 'chapter'], dtype=dtype) return [download_step, transform_step, load_step]
def run(params, **kwargs): source_connector = Connector.fetch(params.get("source_connector"), open("etl/countries/sweden/conns.yaml")) db_connector = Connector.fetch(params.get("db_connector"), open("etl/conns.yaml")) download_data = DownloadStep(connector=source_connector) extract_step = ExtractStep() load_step = LoadStep( "trade_s_swe_m_hs", db_connector, if_exists="append", dtype=DTYPE, pk=['hs6_id', 'trade_flow_id', 'partner_iso3'], nullable_list=['amount'] ) pp = AdvancedPipelineExecutor(params) pp = pp.next(download_data).next(extract_step).next(load_step) return pp.run_pipeline()
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { 'product_level': 'UInt8', params.get('name') + '_id': params.get('type'), 'hs2_id': 'UInt16', 'hs4_id': 'UInt32', 'hs6_id': 'UInt32', 'flow_id': 'UInt8', 'partner_country': 'String', 'value': 'UInt64', 'month_id': 'UInt32', 'year': 'UInt16', 'url': 'String' } read_step = ReadStep() download_step = DownloadStep(connector='foreign-trade', connector_path='conns.yaml') transform_step = TransformStep() load_step = LoadStep(params.get('table') + params.get('name'), db_connector, if_exists='append', pk=[ params.get('name') + '_id', 'partner_country', 'month_id', 'year', 'hs2_id', 'hs4_id', 'hs6_id', 'product_level' ], dtype=dtype) return [download_step, read_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype={ "section_id": "UInt8", "section": "String", "section_en": "String", "group_id": "UInt8", "group": "String", "group_en": "String", "sub_group_id": "UInt16", "sub_group": "String", "sub_group_en": "String" } dl_step = DownloadStep( connector="dim-enigh-expense-items", connector_path="conns.yaml", force=True ) read_step = ReadStep() load_step = LoadStep( 'dim_expense_items', db_connector, if_exists='drop', dtype=dtype, pk=['sub_group_id'] ) return [dl_step, read_step, load_step]
def steps(params): db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml")) dtype = { "mun_id": "UInt16", "clues_id": "String", "codigo_postal": "UInt32", "institution_id": "String", "type_id": "UInt8", "estrato_id": "UInt8", "total_consultorios": "Float32", "total_camas": "Float32", "count": "UInt16", "latitud": "String", "longitud": "String", "publication_time": "String" } download_step = DownloadStep(connector="establishments-data", connector_path="conns.yaml", force=True) transform_step = TransformStep() load_step = LoadStep("health_establishments", db_connector, if_exists="drop", pk=["mun_id"], dtype=dtype, nullable_list=["codigo_postal"]) return [download_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch("clickhouse-database", open('../conns.yaml')) dtypes = { 'month_id': 'UInt32', 'trade_flow_id': 'UInt8', 'hs2_id': 'UInt16', 'value': 'UInt64' } download_step = DownloadStep(connector='product', connector_path='conns.yaml', force=True) read_step = ReadStep() xform_step = XformStep(connector=db_connector) ld_step = LoadStep('inegi_foreign_trade_product', db_connector, if_exists="drop", pk=['month_id', 'hs2_id', 'trade_flow_id'], dtype=dtypes) return [download_step, read_step, xform_step, ld_step]
def steps(params): db_connector = Connector.fetch("clickhouse-database", open("../../conns.yaml")) dtype = { "age": "UInt8", "sex_id": "UInt8", "social_security": "UInt8", "cie10": "String", "date_id": "UInt32", "mun_id": "UInt16", "attention_time": "UInt16", "count": "UInt16", "over_time": "UInt8" } download_step = DownloadStep(connector="emergency-data", connector_path="conns.yaml") transform_step = TransformStep() load_step = LoadStep("dgis_emergency", db_connector, if_exists="append", pk=["sex_id", "mun_id"], dtype=dtype, nullable_list=["date_id", "social_security"]) return [download_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { 'mun_id': 'UInt16', 'sex': 'UInt8', 'year': 'UInt16', 'age': 'UInt8', 'population': 'UInt32' } download_step = DownloadStep( connector=['population-data-1', 'population-data-2'], connector_path="conns.yaml") transform_step = TransformStep() load_step = LoadStep('legacy_population_projection', db_connector, if_exists='drop', pk=['mun_id'], dtype=dtype) return [download_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { "nat_id": "String", "ent_id": "UInt8", "year": "UInt16", "decile": "String", "income_source": "UInt8", "value": "UInt32" } dl_step = DownloadStep(connector="enigh-income-expenses", connector_path="conns.yaml", force=True) read_step = ReadStep() transform_step = TransformStep() load_step = LoadStep('inegi_enigh_income_source', db_connector, if_exists='append', dtype=dtype, pk=['ent_id']) return [dl_step, read_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml")) dtypes = { "income_id": "UInt8", "month_id": "UInt32", "credits_number": "UInt32", "check_amount": "Float64", "infonavit_credit_amount": "Float64", "subsidy_number": "UInt16", "subsidy_amount": "Float64", "nat_id": "String" } download_step = DownloadStep(connector="income-level-credits", connector_path="conns.yaml", force=True) transform_step = TransformStep() load_step = LoadStep("infonavit_income_credits", db_connector, if_exists="drop", pk=["income_id"], dtype=dtypes) return [download_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { 'campus_id': 'UInt64', 'campus_name': 'String', 'institution_id': 'UInt32', 'institution_name': 'String' } download_step = DownloadStep(connector='dim-institution', connector_path='conns.yaml', force=True) read_step = ReadStep() transform_step = TransformStep() load_step = LoadStep('dim_anuies_institutions', db_connector, if_exists='drop', pk=['campus_id', 'institution_id'], dtype=dtype) return [download_step, read_step, transform_step, load_step]
def steps(params, **kwargs): # Use of connectors specified in the conns.yaml file db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtype = { 'chapter_id': 'String', 'category_es': 'String', 'category_en': 'String', 'cie10_3digit': 'String', 'cie10_3digit_es': 'String', 'cie10_3digit_en': 'String', 'cie10_4digit': 'String', 'cie10_4digit_es': 'String', 'cie10_4digit_en': 'String' } # Definition of each step download_step = DownloadStep(connector="cie10", connector_path="conns.yaml") read_step = ReadStep() clean_step = CleanStep() load_step = LoadStep('dim_shared_cie10', db_connector, if_exists='drop', pk=['chapter_id', 'cie10_3digit', 'cie10_4digit'], dtype=dtype) return [download_step, read_step, clean_step, load_step]
def steps(params, **kwargs): db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml')) dtypes = { 'sector_id': 'String', 'subsector_id': 'String', 'industry_group_id': 'String', 'naics_industry_id': 'String', 'national_industry_id': 'String' } download_step = DownloadStep(connector='naics-scian-codes', connector_path="conns.yaml") read_step = ReadStep(connector=db_connector) load_step = LoadStep('dim_shared_industry_economic_census', db_connector, dtype=dtypes, if_exists='drop', pk=[ 'sector_id', 'subsector_id', 'industry_group_id', 'naics_industry_id', 'national_industry_id' ]) return [download_step, read_step, load_step]
def steps(params): db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml")) dtypes = { "month_id": "UInt32", "beneficiaries_count": "UInt32", "contribution_amount_viv92": "Float64", "subaccount_balance_regime97": "Float64", "participation_amount_viv92": "Float64", "participation_amount_viv97": "Float64", "contribution_amount_viv92_imss": "Float64", "subaccount_balance_regime97_imss": "Float64", "participation_amount_viv92_imss": "Float64", "participation_amount_viv97_imss": "Float64", "nat_id": "String" } download_step = DownloadStep(connector="housing-participation-credits", connector_path="conns.yaml", force=True) transform_step = TransformStep() load_step = LoadStep("infonavit_housing_participation_credits", db_connector, if_exists="drop", pk=["month_id"], dtype=dtypes) return [download_step, transform_step, load_step]
def steps(params): db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml")) dtype = { "job_id": "UInt8", "national_job": "UInt8", "pay_mode": "UInt8", "contract": "UInt8", "contract_type": "UInt8", "worked_hours": "Float32", "sinco_id": "UInt16", "scian_id": "String", "business_type": "UInt8", "business_size": "UInt8", "eco_stratum": "UInt8", "mun_id": "UInt16", "population": "UInt16", "sex": "UInt8", "age": "UInt8", "year": "UInt16" } download_step = DownloadStep( connector=["enigh-job", "enigh-housing", "enigh-population", "enigh-jobs-expenses"], connector_path="conns.yaml" ) transform_step = TransformStep() load_step = LoadStep( "inegi_enigh_jobs", db_connector, if_exists="append", pk=["mun_id", "sex"], dtype=dtype, nullable_list=["pay_mode", "contract", "contract_type", "business_type"] ) return [download_step, transform_step, load_step]