コード例 #1
0
    def steps(params):
        db_connector = grab_connector(__file__, params.get("output-db"))

        open_step = OpenStep()
        tidy_step = TidyStep()

        region_step = RegionDimensionStep()
        load_region = LoadStep(table_name="tic_dim_region",
                               connector=db_connector,
                               if_exists="drop",
                               pk=["region_id"],
                               dtype={
                                   "region_id": "UInt8",
                                   "region_name": "String"
                               },
                               nullable_list=[])

        variable_step = VariableDimensionStep()
        load_variable = LoadStep(table_name="tic_dim_variable",
                                 connector=db_connector,
                                 if_exists="drop",
                                 pk=["response_id"],
                                 dtype={
                                     "response_id": "UInt8",
                                     "variable_name": "String",
                                     "response_name": "String",
                                     "combined": "String"
                                 },
                                 nullable_list=[])

        fact_step = FactTableStep()
        load_fact = LoadStep(table_name="tic_fact",
                             connector=db_connector,
                             if_exists="drop",
                             pk=["region_id"],
                             dtype={
                                 "region_id": "UInt8",
                                 "data_origin_id": "UInt8",
                                 "response_id": "UInt8",
                                 "year": "UInt8",
                                 "percentage": "Float64"
                             },
                             nullable_list=[])

        if params.get("ingest") == True:
            steps = [
                open_step, tidy_step, region_step, load_region, variable_step,
                load_variable, fact_step, load_fact
            ]
        else:
            steps = [
                open_step, tidy_step, region_step, variable_step, fact_step
            ]

        return steps
コード例 #2
0
ファイル: fdi_10.py プロジェクト: anabcm/data-etl
    def steps(params):
        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        download_step = DownloadStep(connector='fdi-data',
                                     connector_path='conns.yaml',
                                     force=True)

        transform_101_step = Transform_101_Step()
        transform_102_step = Transform_102_Step()
        transform_103_step = Transform_103_Step()

        if params.get('table') == 10.1:

            load_step = LoadStep('fdi_10_year_country',
                                 db_connector,
                                 if_exists='drop',
                                 pk=['country', 'year'],
                                 dtype={
                                     'year': 'UInt16',
                                     'count': 'UInt16',
                                     'value_c': 'Float32'
                                 })

            return [download_step, transform_101_step, load_step]

        if params.get('table') == 10.2:

            load_step = LoadStep('fdi_10_year_country_investment',
                                 db_connector,
                                 if_exists='drop',
                                 pk=['country', 'year'],
                                 dtype={
                                     'year': 'UInt16',
                                     'investment_type': 'UInt8',
                                     'count': 'UInt16',
                                     'value_c': 'Float32'
                                 })

            return [download_step, transform_102_step, load_step]

        if params.get('table') == 10.3:

            load_step = LoadStep('fdi_10_year_investment',
                                 db_connector,
                                 if_exists='drop',
                                 pk=['year'],
                                 dtype={
                                     'year': 'UInt16',
                                     'investment_type': 'UInt8',
                                     'count': 'UInt16',
                                     'value_c': 'Float32'
                                 })

            return [download_step, transform_103_step, load_step]
コード例 #3
0
    def steps(params):
        connector = grab_connector('etl/budget_transparency/conns.yaml',
                                   'budget-transparency-wildcard')
        db_connector = Connector.fetch("clickhouse-database",
                                       open("etl/conns.yaml"))

        download_step = WildcardDownloadStep(connector=connector)

        read_step = ReadStep(save_result_key="global_df")

        dim_fun_step = DimFunStep(save_result_key="dim_fun")

        dim_dep_step = DimDepStep(save_result_key="dim_dep")

        dim_exp_step = DimExpTypeStep(save_result_key="dim_exp")

        dim_cap_step = DimCapStep(save_result_key="dim_cap")

        fact_step = FactStep()

        ld_fun = LoadStep('dim_fun_budget_transparency',
                          db_connector,
                          if_exists="drop",
                          pk=['subfunction_id'],
                          dtype=dt_fun)

        ld_dep = LoadStep('dim_dep_budget_transparency',
                          db_connector,
                          if_exists="drop",
                          pk=['department_id'],
                          dtype=dt_dep)

        ld_exp = LoadStep('dim_exp_budget_transparency',
                          db_connector,
                          if_exists="drop",
                          pk=['exp_type_id'],
                          dtype=dt_exp)

        ld_cap = LoadStep('dim_cap_budget_transparency',
                          db_connector,
                          if_exists="drop",
                          pk=['concept_id'],
                          dtype=dt_cap)

        ld_fact = LoadStep('budget_transparency',
                           db_connector,
                           if_exists="drop",
                           pk=['quarter_id'],
                           dtype=dt_fact)

        return [
            download_step, read_step, dim_fun_step, ld_fun, dim_dep_step,
            ld_dep, dim_exp_step, ld_exp, dim_cap_step, ld_cap, fact_step,
            ld_fact
        ]
コード例 #4
0
    def run(params, **kwargs):
        source_connector = Connector.fetch(params.get("source_connector"),
                                           open("etl/conns.yaml"))
        db_connector = Connector.fetch(params.get("db_connector"),
                                       open("etl/conns.yaml"))

        dtype = {
            'id': 'String',
            'id_num': 'UInt32',
            'iso3': 'String',
            'iso2': 'String',
            'continent': 'String',
            'color': 'String',
            'name': 'String',
        }

        download_data = DownloadStep(connector=source_connector)
        extract_step = ExtractStep()
        load_step = LoadStep("dim_shared_countries",
                             db_connector,
                             if_exists="append",
                             dtype=dtype,
                             pk=['id_num'])

        pp = AdvancedPipelineExecutor(params)
        pp = pp.next(download_data).next(extract_step).next(load_step)

        return pp.run_pipeline()
コード例 #5
0
    def steps(params):
        
        db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml'))

        dtype = {
            'chapter':          'UInt8',
            'chapter_es':       'String',
            'chapter_en':       'String',
            'chapter_es_short': 'String',
            'chapter_en_short': 'String',
            'hs2_id':           'UInt16',
            'hs2_es':           'String',
            'hs2_en':           'String',
            'hs2_es_short':     'String',
            'hs2_en_short':     'String',
            'hs4_id':           'UInt32',
            'hs4_es':           'String',
            'hs4_en':           'String',
            'hs4_es_short':     'String',
            'hs4_en_short':     'String'
        }
        download_step = DownloadStep(
            connector='hs6-2012',
            connector_path='conns.yaml'
        )
        transform_step = TransformStep()
        load_step = LoadStep('dim_shared_hs12_4digit', db_connector, if_exists='drop', pk=['hs4_id', 'hs2_id', 'chapter'], dtype=dtype)

        return [download_step, transform_step, load_step]
コード例 #6
0
    def steps(params):

        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        dtype = {
            'mun_id': 'UInt16',
            'sex': 'UInt8',
            'year': 'UInt16',
            'age': 'UInt8',
            'population': 'UInt32'
        }

        download_step = DownloadStep(
            connector=['population-data-1', 'population-data-2'],
            connector_path="conns.yaml")

        transform_step = TransformStep()

        load_step = LoadStep('legacy_population_projection',
                             db_connector,
                             if_exists='drop',
                             pk=['mun_id'],
                             dtype=dtype)

        return [download_step, transform_step, load_step]
コード例 #7
0
    def steps(params):

        db_connector = Connector.fetch("clickhouse-database",
                                       open("../conns.yaml"))

        dtypes = {
            "bimester_id": "UInt32",
            "number_companies": "UInt64",
            "harmonization_payment": "Float64",
            "payment_contribution_with_credit": "Float64",
            "payment_contribution_without_credit": "Float64",
            "nat_id": "String"
        }

        download_step = DownloadStep(connector="payment-entity-credits",
                                     connector_path="conns.yaml",
                                     force=True)

        transform_step = TransformStep()

        load_step = LoadStep("infonavit_payment_entity_credits",
                             db_connector,
                             if_exists="drop",
                             pk=["bimester_id"],
                             dtype=dtypes)

        return [download_step, transform_step, load_step]
コード例 #8
0
    def steps(params):

        db_connector = Connector.fetch("clickhouse-database",
                                       open("../conns.yaml"))

        dtype = {
            "ent_id": "UInt8",
            "population": "UInt64",
            "illiterate_population": "Float32",
            "population_6_14_school": "Float32",
            "population_15_incomplete_school": "Float32",
            "no_health_services": "Float32",
            "dirt_floor": "Float32",
            "no_toilet": "Float32",
            "no_water_supply_network": "Float32",
            "no_sewer_system": "Float32",
            "no_electrical_energy": "Float32",
            "no_washing_machine": "Float32",
            "no_fridge": "Float32",
            "social_lag_index": "Float32",
            "social_lag_degree": "UInt8",
            "year": "UInt16"
        }

        download_step = DownloadStep(connector="social-lag-data",
                                     connector_path="conns.yaml")
        transform_step = TransformStep()
        load_step = LoadStep("coneval_social_lag_ent",
                             db_connector,
                             if_exists="append",
                             pk=["ent_id", "year"],
                             dtype=dtype,
                             nullable_list=["population"])

        return [download_step, transform_step, load_step]
コード例 #9
0
ファイル: dim_institutions.py プロジェクト: anabcm/data-etl
    def steps(params):

        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        dtype = {
            'campus_id': 'UInt64',
            'campus_name': 'String',
            'institution_id': 'UInt32',
            'institution_name': 'String'
        }

        download_step = DownloadStep(connector='dim-institution',
                                     connector_path='conns.yaml',
                                     force=True)

        read_step = ReadStep()
        transform_step = TransformStep()

        load_step = LoadStep('dim_anuies_institutions',
                             db_connector,
                             if_exists='drop',
                             pk=['campus_id', 'institution_id'],
                             dtype=dtype)

        return [download_step, read_step, transform_step, load_step]
コード例 #10
0
    def steps(params):
        db_connector = Connector.fetch("clickhouse-database",
                                       open("../../conns.yaml"))

        dtype = {
            "age": "UInt8",
            "sex_id": "UInt8",
            "social_security": "UInt8",
            "cie10": "String",
            "date_id": "UInt32",
            "mun_id": "UInt16",
            "attention_time": "UInt16",
            "count": "UInt16",
            "over_time": "UInt8"
        }

        download_step = DownloadStep(connector="emergency-data",
                                     connector_path="conns.yaml")
        transform_step = TransformStep()
        load_step = LoadStep("dgis_emergency",
                             db_connector,
                             if_exists="append",
                             pk=["sex_id", "mun_id"],
                             dtype=dtype,
                             nullable_list=["date_id", "social_security"])

        return [download_step, transform_step, load_step]
コード例 #11
0
    def steps(params):
        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        dtype = {
            'state_id': 'UInt8',
            'product_id': 'UInt16',
            'mark_id': 'UInt16',
            'hs2_id': 'UInt16',
            'unit_id': 'UInt8',
            'time_id': 'UInt32',
            'value': 'Float32',
        }

        download_step = WildcardDownloadStep(connector='sniim-data',
                                             connector_path='conns.yaml',
                                             force=True)

        extract_step = ExtractStep()
        transform_step = TransformStep(connector=db_connector)

        load_step = LoadStep(
            'sniim_products',
            db_connector,
            if_exists='drop',
            dtype=dtype,
            pk=['time_id', 'state_id', 'product_id', 'mark_id', 'hs2_id'])

        return [download_step, extract_step, transform_step, load_step]
コード例 #12
0
    def steps(params):
        db_connector = Connector.fetch("clickhouse-database", open("../../conns.yaml"))

        dtype = {
            "age":                  "UInt8",
            "marital_status":       "UInt8",
            "occupation":           "UInt8",
            "academic_degree":      "UInt8",
            "social_security":      "UInt8",
            "medical_center":       "UInt8",
            "year_decease":         "UInt16",
            "cie10":                "String",
            "year_of_register":     "UInt16",
            "mun_residence_id":     "UInt16",
            "mun_happening_id":     "UInt16",
            "count":                "UInt8"
        }

        download_step = DownloadStep(
            connector=["pregnancy-mortality-data", "dim-pregnancy-mortality"],
            connector_path="conns.yaml",
            force=True
        )
        transform_step = TransformStep()
        load_step = LoadStep(
            "dgis_pregnancy_mortality", db_connector, if_exists="drop", pk=["mun_residence_id", "year_decease"], dtype=dtype
        )

        return [download_step, transform_step, load_step]
コード例 #13
0
    def steps(params):
        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        dtype = {
            'iso3': 'String',
            'trade_flow_id': 'UInt8',
            'month_id': 'UInt32',
            'trade_value': 'UInt64'
        }

        download_step = DownloadStep(
            connector=['exports-destiny-monthly', 'imports-origin-monthly'],
            connector_path='conns.yaml',
            force=True)

        read_step = ReadStep()
        transform_step = TransformStep(connector=db_connector)

        load_step = LoadStep('banxico_trade_flow',
                             db_connector,
                             dtype=dtype,
                             if_exists='drop',
                             pk=['iso3'])

        return [download_step, read_step, transform_step, load_step]
コード例 #14
0
    def steps(params):
        db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml"))

        dtype = {
            'ent_id':          'UInt8',
            'year':            'UInt16',
            'country':         'String',
            'investment_type': 'String',
            'count':           'UInt16',
            'value_c':         'Float32'
        }

        download_step = DownloadStep(
            connector="fdi-data",
            connector_path="conns.yaml",
            force=True
        )

        transform_step = TransformStep()
        load_step = LoadStep(
            'fdi_4', db_connector, if_exists="drop", 
            pk=['ent_id', 'country'], dtype=dtype
        )

        return [download_step, transform_step, load_step]
コード例 #15
0
    def steps(params):
        db_connector = Connector.fetch(params.get("db_connector"),
                                       open("../conns.yaml"))

        dtype = {
            'year': 'UInt16',
            'hs_master_id': 'UInt32',
            'exporter': 'String',
            'importer': 'String',
            'value': 'Float64',
            'quantity': 'Float64',
            'version': 'DateTime',
        }

        download_step = WildcardDownloadStep(connector="baci-yearly",
                                             connector_path="conns.yaml")
        extract_step = ExtractStep()
        unzip_step = UnzipToFolderStep(compression='zip',
                                       target_folder_path='temp/')
        transform_step = TransformStep(connector=db_connector)
        load_step = LoadStep(
            "trade_i_baci_a_{}".format(params['hs_code']),
            db_connector,
            if_exists="append",
            dtype=dtype,
            pk=['year', 'exporter', 'importer', 'hs_master_id'],
            nullable_list=['quantity'],
            engine="ReplacingMergeTree",
            engine_params='version')

        return [
            download_step, extract_step, unzip_step, transform_step, load_step
        ]
コード例 #16
0
    def steps(params):

        db_connector = Connector.fetch("clickhouse-database",
                                       open("../conns.yaml"))

        dtypes = {
            "month_id": "UInt32",
            "beneficiaries_count": "UInt32",
            "contribution_amount_viv92": "Float64",
            "subaccount_balance_regime97": "Float64",
            "participation_amount_viv92": "Float64",
            "participation_amount_viv97": "Float64",
            "contribution_amount_viv92_imss": "Float64",
            "subaccount_balance_regime97_imss": "Float64",
            "participation_amount_viv92_imss": "Float64",
            "participation_amount_viv97_imss": "Float64",
            "nat_id": "String"
        }

        download_step = DownloadStep(connector="housing-participation-credits",
                                     connector_path="conns.yaml",
                                     force=True)

        transform_step = TransformStep()

        load_step = LoadStep("infonavit_housing_participation_credits",
                             db_connector,
                             if_exists="drop",
                             pk=["month_id"],
                             dtype=dtypes)

        return [download_step, transform_step, load_step]
コード例 #17
0
    def steps(params):
        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        dtype = {
            "nat_id": "String",
            "ent_id": "UInt8",
            "year": "UInt16",
            "decile": "String",
            "income_source": "UInt8",
            "value": "UInt32"
        }

        dl_step = DownloadStep(connector="enigh-income-expenses",
                               connector_path="conns.yaml",
                               force=True)
        read_step = ReadStep()
        transform_step = TransformStep()
        load_step = LoadStep('inegi_enigh_income_source',
                             db_connector,
                             if_exists='append',
                             dtype=dtype,
                             pk=['ent_id'])

        return [dl_step, read_step, transform_step, load_step]
コード例 #18
0
    def steps(params):
        db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml"))
        dtype = {
            "job_id":                          "UInt8",
            "national_job":                    "UInt8",
            "pay_mode":                        "UInt8",
            "contract":                        "UInt8",
            "contract_type":                   "UInt8",
            "worked_hours":                    "Float32",
            "sinco_id":                        "UInt16",
            "scian_id":                        "String",
            "business_type":                   "UInt8",
            "business_size":                   "UInt8",
            "eco_stratum":                     "UInt8",
            "mun_id":                          "UInt16",
            "population":                      "UInt16",
            "sex":                             "UInt8",
            "age":                             "UInt8",
            "year":                            "UInt16"
        }

        download_step = DownloadStep(
            connector=["enigh-job", "enigh-housing", "enigh-population", "enigh-jobs-expenses"],
            connector_path="conns.yaml"
        )
        transform_step = TransformStep()
        load_step = LoadStep(
            "inegi_enigh_jobs", db_connector, if_exists="append", pk=["mun_id", "sex"], dtype=dtype, 
            nullable_list=["pay_mode", "contract", "contract_type", "business_type"]
        )

        return [download_step, transform_step, load_step]
コード例 #19
0
    def steps(params, **kwargs):
        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        dtypes = {
            'sector_id': 'String',
            'subsector_id': 'String',
            'industry_group_id': 'String',
            'naics_industry_id': 'String',
            'national_industry_id': 'String'
        }
        download_step = DownloadStep(connector='naics-scian-codes',
                                     connector_path="conns.yaml")
        read_step = ReadStep(connector=db_connector)
        load_step = LoadStep('dim_shared_industry_economic_census',
                             db_connector,
                             dtype=dtypes,
                             if_exists='drop',
                             pk=[
                                 'sector_id', 'subsector_id',
                                 'industry_group_id', 'naics_industry_id',
                                 'national_industry_id'
                             ])

        return [download_step, read_step, load_step]
コード例 #20
0
    def steps(params):
        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        dtype = {
            "system_id": "UInt8",
            "system_en": "String",
            "system_es": "String",
            "subsystem_id": "UInt8",
            "subsystem_en": "String",
            "subsystem_es": "String",
            "product_id": "UInt16",
            "product_en": "String",
            "product_es": "String",
        }

        download_step = DownloadStep(connector='product-dim',
                                     connector_path='conns.yaml')

        transform_step = TransformStep()

        load_step = LoadStep("dim_sniim_products",
                             db_connector,
                             dtype=dtype,
                             if_exists="drop",
                             pk=["product_id"])

        return [download_step, transform_step, load_step]
コード例 #21
0
    def steps(params, **kwargs):
        db_connector = Connector.fetch("clickhouse-database", open("../conns.yaml"))

        dtypes = {
            "mun_id":                  "UInt16",
            "national_industry_id":    "String",
            "year":                    "UInt16"
        }

        download_step = DownloadStep(
            connector="dataset",
            connector_path="conns.yaml"
        )

        # Definition of each step
        transform_step = MultiStep()
        load_step = LoadStep(
            "inegi_economic_census", db_connector, dtype=dtypes, if_exists="drop", 
            pk=["national_industry_id", "mun_id", "year"], 
            nullable_list=["m000a", "p000c", "a800a", "q000d", "p000a", "p000b", "p030c", "a511a", "m050a", "j203a", 
                           "j300a", "j400a", "j500a", "j600a", "k010a", "k020a", "k030a", "k311a", "k041a", "k610a", 
                           "k620a", "k060a", "k070a", "k810a", "k910a", "k950a", "k096a", "k976a", "m010a", "m030a", 
                           "m090a", "p100a", "p100b", "p030a", "p030b", "q010a", "q020a", "q030a", "q400a", "q900a"]
        )

        return [download_step, transform_step, load_step]
コード例 #22
0
    def run(params, **kwargs):
        source_connector = Connector.fetch(
            params.get("source_connector"),
            open("etl/countries/russia/conns.yaml"))
        db_connector = Connector.fetch(params.get("db_connector"),
                                       open("etl/conns.yaml"))

        dtype = copy.deepcopy(DTYPE)
        dtype['hs6_id'] = 'String'

        download_data = DownloadStep(connector=source_connector)
        extract_step = ExtractStep()
        load_step = LoadStep("trade_s_rus_m_hs",
                             db_connector,
                             if_exists="append",
                             dtype=dtype,
                             pk=[
                                 'trade_flow_id', 'time_id', 'country_id',
                                 'region_id', 'district_id', 'hs10_id'
                             ],
                             nullable_list=['unit_short_name'])

        pp = AdvancedPipelineExecutor(params)
        pp = pp.next(download_data).next(extract_step).next(load_step)

        return pp.run_pipeline()
コード例 #23
0
ファイル: wellness.py プロジェクト: anabcm/data-etl
    def steps(params):
        db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml'))

        dtype = {
            'ent_id':       'UInt8',
            'mun_id':       'UInt16',
            'sex':          'UInt8', 
            'level':        'String',
            'person_type':  'UInt8', 
            'age_range':    'UInt8', 
            'count':        'UInt32'
        }

        download_step = DownloadStep(
            connector=['wellness-ent-total', 'wellness-mun-total'],
            connector_path='conns.yaml',
            force=True
        )

        read_step = ReadStep()

        transform_step = TransformStep()

        load_step = LoadStep(
            'wellness_credits', db_connector, dtype=dtype, if_exists='drop',
            pk=['ent_id']
        )

        return [download_step, read_step, transform_step, load_step]
コード例 #24
0
    def steps(params):

        db_connector = Connector.fetch("clickhouse-database",
                                       open('../conns.yaml'))

        dtypes = {
            'month_id': 'UInt32',
            'trade_flow_id': 'UInt8',
            'hs2_id': 'UInt16',
            'value': 'UInt64'
        }

        download_step = DownloadStep(connector='product',
                                     connector_path='conns.yaml',
                                     force=True)

        read_step = ReadStep()

        xform_step = XformStep(connector=db_connector)

        ld_step = LoadStep('inegi_foreign_trade_product',
                           db_connector,
                           if_exists="drop",
                           pk=['month_id', 'hs2_id', 'trade_flow_id'],
                           dtype=dtypes)

        return [download_step, read_step, xform_step, ld_step]
コード例 #25
0
    def steps(params):

        db_connector = Connector.fetch("clickhouse-database",
                                       open("../conns.yaml"))

        dtypes = {
            "income_id": "UInt8",
            "month_id": "UInt32",
            "credits_number": "UInt32",
            "check_amount": "Float64",
            "infonavit_credit_amount": "Float64",
            "subsidy_number": "UInt16",
            "subsidy_amount": "Float64",
            "nat_id": "String"
        }

        download_step = DownloadStep(connector="income-level-credits",
                                     connector_path="conns.yaml",
                                     force=True)

        transform_step = TransformStep()

        load_step = LoadStep("infonavit_income_credits",
                             db_connector,
                             if_exists="drop",
                             pk=["income_id"],
                             dtype=dtypes)

        return [download_step, transform_step, load_step]
コード例 #26
0
    def steps(params):
        db_connector = Connector.fetch("clickhouse-database",
                                       open("../conns.yaml"))
        dtype = {
            "mun_id": "UInt16",
            "clues_id": "String",
            "codigo_postal": "UInt32",
            "institution_id": "String",
            "type_id": "UInt8",
            "estrato_id": "UInt8",
            "total_consultorios": "Float32",
            "total_camas": "Float32",
            "count": "UInt16",
            "latitud": "String",
            "longitud": "String",
            "publication_time": "String"
        }

        download_step = DownloadStep(connector="establishments-data",
                                     connector_path="conns.yaml",
                                     force=True)

        transform_step = TransformStep()

        load_step = LoadStep("health_establishments",
                             db_connector,
                             if_exists="drop",
                             pk=["mun_id"],
                             dtype=dtype,
                             nullable_list=["codigo_postal"])

        return [download_step, transform_step, load_step]
コード例 #27
0
    def steps(params, **kwargs):
        # Use of connectors specified in the conns.yaml file
        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))
        dtype = {
            'chapter_id': 'String',
            'category_es': 'String',
            'category_en': 'String',
            'cie10_3digit': 'String',
            'cie10_3digit_es': 'String',
            'cie10_3digit_en': 'String',
            'cie10_4digit': 'String',
            'cie10_4digit_es': 'String',
            'cie10_4digit_en': 'String'
        }

        # Definition of each step
        download_step = DownloadStep(connector="cie10",
                                     connector_path="conns.yaml")
        read_step = ReadStep()
        clean_step = CleanStep()
        load_step = LoadStep('dim_shared_cie10',
                             db_connector,
                             if_exists='drop',
                             pk=['chapter_id', 'cie10_3digit', 'cie10_4digit'],
                             dtype=dtype)

        return [download_step, read_step, clean_step, load_step]
コード例 #28
0
    def steps(params):
        db_connector = Connector.fetch('clickhouse-database',
                                       open('../conns.yaml'))

        dtype = {
            'product_level': 'UInt8',
            params.get('name') + '_id': params.get('type'),
            'hs2_id': 'UInt16',
            'hs4_id': 'UInt32',
            'hs6_id': 'UInt32',
            'flow_id': 'UInt8',
            'partner_country': 'String',
            'value': 'UInt64',
            'month_id': 'UInt32',
            'year': 'UInt16',
            'url': 'String'
        }

        read_step = ReadStep()

        download_step = DownloadStep(connector='foreign-trade',
                                     connector_path='conns.yaml')

        transform_step = TransformStep()
        load_step = LoadStep(params.get('table') + params.get('name'),
                             db_connector,
                             if_exists='append',
                             pk=[
                                 params.get('name') + '_id', 'partner_country',
                                 'month_id', 'year', 'hs2_id', 'hs4_id',
                                 'hs6_id', 'product_level'
                             ],
                             dtype=dtype)

        return [download_step, read_step, transform_step, load_step]
コード例 #29
0
    def run(params, **kwargs):
        source_connector = Connector.fetch(params.get("source_connector"),
                                           open("etl/conns.yaml"))
        db_connector = Connector.fetch(params.get("db_connector"),
                                       open("etl/conns.yaml"))

        dtype = {
            'year': 'UInt16',
            'trade_flow_id': 'UInt8',
            'reporter_id': 'UInt32',
            'partner_id': 'UInt32',
            'service_id': 'UInt16',
            'trade_value_us_dollars': 'Int64',
        }

        download_data = DownloadStep(connector=source_connector)
        unzip_step = UnzipStep(pattern=r"\.csv$")
        extract_step = ExtractStep()
        load_step = LoadStep("services_i_comtrade_a_eb02",
                             db_connector,
                             if_exists="append",
                             dtype=dtype,
                             pk=['reporter_id', 'trade_flow_id', 'year'],
                             nullable_list=['trade_value_us_dollars'])

        pp = AdvancedPipelineExecutor(params)
        pp = pp.next(download_data).foreach(unzip_step).next(
            extract_step).next(load_step)

        return pp.run_pipeline()
コード例 #30
0
    def steps(params):
        db_connector = Connector.fetch('clickhouse-database', open('../conns.yaml'))

        dtype={
            "section_id": "UInt8",
            "section": "String",
            "section_en": "String",
            "group_id": "UInt8",
            "group": "String",
            "group_en": "String",
            "sub_group_id": "UInt16",
            "sub_group": "String",
            "sub_group_en": "String"
        }

        dl_step = DownloadStep(
            connector="dim-enigh-expense-items", 
            connector_path="conns.yaml", 
            force=True
            )
        read_step = ReadStep()
        load_step = LoadStep(
            'dim_expense_items', db_connector, if_exists='drop', dtype=dtype,
            pk=['sub_group_id']
        )

        return [dl_step, read_step, load_step]