def main(): """ starting import """ # get argument parser = argparse.ArgumentParser() parser.add_argument("input_json", type=str, help="JSON file to process") args = parser.parse_args() # transform data into dataframe for row in data_iter(args.input_json): subset_amterdam.append(row) result = pd.DataFrame(subset_amterdam) # lower all column names result.columns = result.columns.str.lower() # aggregating numbers per group result = result.groupby( ['date_of_publication', 'municipality_code', 'municipality_name', 'province']).agg( total_reported = ('total_reported','sum'), hospital_admission = ('hospital_admission','sum'), deceased = ('deceased','sum'), ).reset_index() log.info(f"Starting import {args.input_json}") engine = get_engine() result.to_sql( "corona_gevallen_new", engine, dtype={ "index": Integer(), "date_of_publication": Date(), "municipality_code": String(), "municipality_name": String(), "province": String(), "total_reported": Integer(), "hospital_admission": Integer(), "deceased": Integer(), }, ) log.info("Data loaded into DB") ProvenanceRenameOperator( task_id="provenance_rename", dataset_name="corona", prefix_table_name="corona_", postfix_table_name="_new", rename_indexes=False, pg_schema="public", ).execute() log.info("Renamed columns based on provenance.")
def import_scooter_data( table_name: str, felyx_api_endpoint: str, felyx_api_header: Dict, ridecheck_token_endpoint: str, ridecheck_token_payload: Dict, ridecheck_token_header: Dict, ridecheck_data_endpoint: str, ridecheck_data_header: Dict, ) -> None: """Union resources felyx and RIDECHECK and imports data into database table Args: felyx_api_endpoint: an URL specification for the data service to call felyx_api_header: a dictionary of headers key value pairs used when calling the data endpoint ridecheck_token_endpoint: an URL specification for the token service to retreive a (time limited) access key ridecheck_token_payload: a dictionary of key value pairs elements used in the data body when calling the token service ridecheck_token_header: a dictionary of headers key value pairs used when calling the token endpoint ridecheck_data_endpoint: an URL specification of the data service to call ridecheck_data_header: a dictionary of headers key value pairs used when calling the data endpoint Executes: SQL insert statements of scooter data into database table """ ridecheck = get_data_ridecheck( ridecheck_token_endpoint, ridecheck_token_payload, ridecheck_token_header, ridecheck_data_endpoint, ridecheck_data_header, ) felyx = get_data_scooter_felyx(felyx_api_endpoint, felyx_api_header) scooter_dataset = chain([row for row in ridecheck], [row for row in felyx]) df = pd.DataFrame([vars(row) for row in scooter_dataset]) df["geometrie"] = df["geometrie"].apply(lambda g: WKTElement(g.wkt, srid=4326)) db_engine = get_engine() df.to_sql( table_name, db_engine, if_exists="replace", index=False, dtype={"geometrie": Geometry(geometry_type="POINT", srid=4326)}, ) with db_engine.connect() as connection: connection.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)")
def load_grex_from_dwh(table_name): db_engine = get_engine() dwh_ora_engine = get_ora_engine("oracle_dwh_stadsdelen") with dwh_ora_engine.connect() as connection: df = pd.read_sql( """ SELECT PLANNR as ID , PLANNAAM , STARTDATUM , PLANSTATUS , OPPERVLAKTE , GEOMETRIE_WKT AS GEOMETRY FROM DMDATA.GREX_GV_PLANNEN_V2 """, connection, index_col="id", coerce_float=True, params=None, parse_dates=["startdatum"], columns=None, chunksize=None, ) df["geometry"] = df["geometry"].apply(wkt_loads_wrapped) grex_rapportage_dtype = { "id": Integer(), "plannaam": Text(), "startdatum": Date(), "planstatus": Text(), "oppervlakte": Float(), "geometry": Geometry(geometry_type="GEOMETRY", srid=4326), } df.to_sql(table_name, db_engine, if_exists="replace", dtype=grex_rapportage_dtype) with db_engine.connect() as connection: connection.execute( f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)") connection.execute(f""" UPDATE {table_name} SET geometry = ST_CollectionExtract(ST_Makevalid(geometry), 3) WHERE ST_IsValid(geometry) = False OR ST_GeometryType(geometry) != 'ST_MultiPolygon'; COMMIT; """) connection.execute(f""" ALTER TABLE {table_name} ALTER COLUMN geometry TYPE geometry(MultiPolygon,28992) USING ST_Transform(geometry,28992); """) connection.execute( f"DELETE FROM {table_name} WHERE geometry is NULL")
def _load_geojson(postgres_conn_id): """As airflow executes tasks at different hosts, these tasks need to happen in a single call. Otherwise, the (large) file is downloaded by one host, and stored in the XCom table to be shared between tasks. """ tmp_dir = Path(f"/tmp/{dag_id}") tmp_dir.mkdir(parents=True, exist_ok=True) # 1. download files files = {} for route in ROUTES: dest = f"{tmp_dir}/{route.name}.geojson" logger.info("Downloading %s to %s", route.url, dest) download_file(route.url, dest, http_conn_id=None) files[route.name] = dest # 2. generate schema ("schema introspect geojson *.geojson") schema = introspect_geojson_files( "gevaarlijke-routes", files=list(files.values()) ) schema = DatasetSchema.from_dict(schema) # TODO: move to schema-tools? # XXX This is not running as one transaction atm, but autocommitting per chunk # 3. import data db_engine = get_engine() importer = GeoJSONImporter(schema, db_engine, logger=logger) for route in ROUTES: geojson_path = files[route.name] logger.info( "Importing %s into %s", route.name, route.tmp_db_table_name ) importer.generate_db_objects( table_name=route.schema_table_name, db_table_name=route.tmp_db_table_name, truncate=True, # when reexecuting the same task ind_tables=True, ind_extra_index=False, ) importer.load_file( geojson_path, ) if route.post_process: hook = PostgresHook(postgres_conn_id=postgres_conn_id) hook.run(route.post_process)
def load_grex(input_csv, table_name): db_engine = get_engine() df = pd.read_csv( input_csv, delimiter=";", decimal=",", encoding="latin_1", parse_dates=["STARTDATUM"], dayfirst=True, index_col="PLANNR", ) df.rename(columns=lambda x: "geometry" if x == "GEOMETRIE" else x.lower(), inplace=True) df.index.name = "id" df["geometry"] = df["geometry"].apply(wkt_loads_wrapped) grex_rapportage_dtype = { "id": Integer(), "plannaam": Text(), "planbeheercode": Text(), "startdatum": Date(), "planstatus": Text(), "oppervlakte": Float(), "geometry": Geometry(geometry_type="MULTIPOLYGON", srid=4326), } df.to_sql(table_name, db_engine, if_exists="replace", dtype=grex_rapportage_dtype) with db_engine.connect() as connection: connection.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)") connection.execute(f""" ALTER TABLE {table_name} ALTER COLUMN geometry TYPE geometry(MultiPolygon,28992) USING ST_Transform(geometry,28992) """) connection.execute(f"DELETE FROM {table_name} WHERE geometry is NULL") connection.execute(f""" UPDATE {table_name} SET geometry = ST_CollectionExtract(ST_Makevalid(geometry), 3) WHERE ST_IsValid(geometry) = False; """)
def main(): parser = argparse.ArgumentParser() parser.add_argument("input_csv", type=str, help="CSV file to process") args = parser.parse_args() df = pd.read_csv( args.input_csv, sep=";", names=["organisatie", "type_interventie", "aantal", "week_nummer", "jaar"], converters={ "organisatie": strip, "type_interventie": strip, }, header=0, ) df.index.name = "id" engine = get_engine() df.to_sql( "corona_handhaving_new", engine, dtype={"id": Integer(), "aantal": Integer(), "week_nummer": Integer(), "jaar": Integer()}, )
def import_auto_data( table_name: str, mywheels_api_endpoint: str, mywheels_api_header: Dict, mywheels_api_payload: Dict, ) -> None: """Union resources MYWHEELS and [...] and imports data into database table Args: mywheels_api_endpoint: an URL specification for the data service to call mywheels_api_header: a dictionary of headers key value pairs used when calling the data endpoint mywheels_api_payload: a dictionary of key value pairs elements used in the data body when calling the token service Executes: SQL insert statements of auto data into database table """ mywheels = get_data_auto_mywheels( mywheels_api_endpoint, mywheels_api_header, mywheels_api_payload ) auto_dataset = [row for row in mywheels] df = pd.DataFrame([vars(row) for row in auto_dataset]) df["geometrie"] = df["geometrie"].apply(lambda g: WKTElement(g.wkt, srid=4326)) db_engine = get_engine() df.to_sql( table_name, db_engine, if_exists="replace", index=False, dtype={"geometrie": Geometry(geometry_type="POINT", srid=4326)}, ) with db_engine.connect() as connection: connection.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)")
def load_from_dwh(table_name): db_engine = get_engine() dwh_ora_engine = get_ora_engine("oracle_dwh_stadsdelen") with dwh_ora_engine.connect() as connection: df = pd.read_sql( """ select ZAAKNUMMER , ZAAKNAAM , ADRES , ZAAK_CATEGORIE , ZAAK_SPECIFICATIE , BEGINDATUM , EINDDATUM , OPENINGSTIJDEN_ZO_DO_VAN , OPENINGSTIJDEN_ZO_DO_TOT , OPENINGSTIJDEN_VR_ZA_VAN , OPENINGSTIJDEN_VR_ZA_TOT , O_TIJDEN_TERRAS_ZO_DO_VAN , O_TIJDEN_TERRAS_ZO_DO_TOT , O_TIJDEN_TERRAS_VR_ZA_VAN , O_TIJDEN_TERRAS_VR_ZA_TOT , LOCATIE_WKT AS LOCATIE , TERRASGEOMETRIE_WKT AS TERRASGEOMETRIE , POSTCODE , STATUS_VERGUNNING , STATUS_TIJDELIJK_TERRAS , TOESTEMMING_TIJDELIJK_TERRAS , PUBL_BESLUIT_TIJDELIJK_TERRAS , TIJDELIJK_TERRAS_DETAILS , STATUS_VERLENG_TIJDELK_TERRAS , VERLENG_TIJDELK_TERRAS_DETAIL from DMDATA.HORECA_EXPLOITATIEVERGUNNING_V """, connection, coerce_float=True, params=None, parse_dates=["begindatum", "einddatum"], columns=None, chunksize=None, ) df["locatie"] = df["locatie"].apply(wkt_loads_wrapped) df["terrasgeometrie"] = df["terrasgeometrie"].apply(wkt_loads_wrapped) dtype = { "zaaknummer": Integer(), "zaaknaam": Text(), "zaakcategorie": Text(), "zaakspecificatie": Text(), "begindatum": Date(), "einddatum": Date(), "openingstijden_zo_do_van": Text(), "openingstijden_zo_do_tot": Text(), "openingstijden_vr_za_van": Text(), "openingstijden_vr_za_tot": Text(), "o_tijden_terras_zo_do_van": Text(), "o_tijden_terras_zo_do_tot": Text(), "o_tijden_terras_vr_za_van": Text(), "o_tijden_terras_vr_za_tot": Text(), "locatie": Geometry(geometry_type="POINT", srid=4326), "terrasgeometrie": Geometry(geometry_type="MULTIPOLYGON", srid=4326), "postcode": Text(), "status_vergunning": Text(), "status_tijdelijk_terras": Text(), "toestemming_tijdelijk_terras": Text(), "publ_besluit_tijdelijk_terras": Text(), "tijdelijk_terras_details": Text(), "status_verleng_tijdelk_terras": Text(), "verleng_tijdelk_terras_detail": Text(), } df.to_sql( table_name, db_engine, if_exists="replace", index_label="id", dtype=dtype ) with db_engine.connect() as connection: connection.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)") connection.execute( f""" ALTER TABLE {table_name} ALTER COLUMN terrasgeometrie TYPE geometry(MultiPolygon,28992) USING ST_Transform(terrasgeometrie,28992) """ ) connection.execute( f""" ALTER TABLE {table_name} ALTER COLUMN locatie TYPE geometry(Point,28992) USING ST_Transform(locatie,28992) """ ) # connection.execute(f"DELETE FROM {table_name} WHERE terrasgeometrie is NULL") connection.execute( f""" UPDATE {table_name} SET terrasgeometrie = ST_CollectionExtract(ST_Makevalid(terrasgeometrie), 3) WHERE ST_IsValid(terrasgeometrie) = False; """ ) connection.execute( f""" ALTER TABLE {table_name} RENAME COLUMN VERLENG_TIJDELK_TERRAS_DETAIL to VERLENGING_TIJDELIJK_TERRAS_DETAILS; ALTER TABLE {table_name} RENAME COLUMN STATUS_VERLENG_TIJDELK_TERRAS to STATUS_VERLENGING_TIJDELIJK_TERRAS; """ )
def load_from_dwh(table_name: str) -> None: """Loads data from an Oracle database source into a Postgres database Args: table_name: The target table where the source data will be stored Executes: SQL INSERT statements for the data and post-processing an ALTER statement to a contraint. Note: The SQL processing is done with SQLAlchemy Note2: Data is filterd on unit OIS 'Onderzoek, Informatie en Statistiek' by code 380000 """ db_engine = get_engine() dwh_ora_engine = get_ora_engine("oracle_dwh_ami") date_fmt = "%Y%d%m" with dwh_ora_engine.connect() as connection: df = pd.read_sql( """ SELECT gb.DATUM_BOEKING as "datum_boeking" ,gb.BEDRAG_BOEKING as "bedrag_boeking" ,gb.OMSCHRIJVING as "boeking_omschrijving" ,gb.OPMERKING as "boeking_opmerking" ,gbr.HOOFDREKENING_CODE as "hoofdrekening_code" ,gbr.REKENING_OMSCHRIJVING as "rekening_omschrijving" ,gbr.BEDRIJFSEENHEID_CODE as "bedrijfseenheid_code" ,gbr.BEDRIJFSEENHEID_OMSCHRIJVING as "bedrijfseenheid_omschrijving" ,gb.SUBGROOTBOEK as "subgrootboek" ,gb.SUBGROOTBOEKTYPE as "subgrootboektype" ,vpr.VPL_VERPLICHTINGNUMMER as "vpl_verplichtingnummer" ,vpr.VPL_OMSCHRIJVING as "vpl_omschrijving" ,vpr.VPL_JAAR as "vpl_jaar" ,vpr.VPL_INKOOPCONTRACTCODE as "vpl_inkoopcontractcode" ,vpr.VPL_STATUS as "vpl_status" ,vpr.VPR_DTM_LSTE_AFBOEKING as "vpr_datum_laatste_afboeking" ,vpr.VPR_BEDRAG_OORSPRONKELIJK as "vpr_bedrag_oorspronkelijk" ,vpr.VPR_BEDRAG_AFGEBOEKT as "vpr_bedrag_afgeboekt" ,vpr.VPR_BEDRAG_RESTANT as "vpr_bedrag_restant" ,vpr.VPR_REGELNUMMER as "vpr_regelnummer" ,vpr.VPR_OMSCHRIJVING as "vpr_omschrijving" ,vpr.VPR_OPMERKING as "vpr_opmerking" ,vpr.VPR_DATUM_GROOTBOEK as "vpr_datum_grootboek" ,vpr.VPR_DATUMVERPLICHTING as "vpr_datumverplichting" ,wor.CODE as "werkorder_code" ,wor.OMSCHRIJVING as "werkorder_omschrijving" ,wor.STATUS as "werkorder_status" ,wor.STATUS_OPMERKING as "werkorder_status_opmerking" ,rel.code as "relatie_code" ,rel.naam as "relatie_naam" ,rel.IND_CREDITEUR as "indicatie_crediteur" ,rel.IND_DEBITEUR as "indicatie_debiteur" ,org.SUBTEAM_CODE as "subteam_code" ,org.SUBTEAM_NAAM as "subteam_naam" ,org.TEAM_CODE as "team_code" ,org.TEAM_NAAM as "team_naam" ,org.AFDELING_CODE as "afdeling_code" ,org.AFDELING_NAAM as "afdeling_naam" ,org.RVE_CODE as "rve_code" ,org.RVE_NAAM as "rve_naam" ,org.CLUSTER_CODE as "cluster_code" ,org.CLUSTER_NAAM as "cluster_naam" FROM DATAHUB.FI2_DHB_FCT_GROOTBOEK GB INNER JOIN DATAHUB.FI2_DHB_DIM_RELATIE REL ON REL.ID = GB.REL_ID INNER JOIN DATAHUB.FI2_DHB_DIM_ORGANISATIE org ON ORG.ID = GB.ORG_ID INNER JOIN DATAHUB.FI2_DHB_DIM_GROOTBOEKREKENING gbr ON gbr.ID = GB.gbr_ID INNER JOIN DATAHUB.FI2_DHB_DIM_VERPLICHTINGREGEL vpr ON vpr.ID = GB.VPR_ID INNER JOIN DATAHUB.FI2_DHB_DIM_WERKORDER wor ON wor.ID = GB.WOR_ID WHERE 1=1 AND RVE_CODE = '380000' """, connection, coerce_float=True, params=None, parse_dates={ "datum_boeking": date_fmt, "vpr_datum_grootboek": date_fmt, "vpr_datumverplichting": date_fmt, "vpr_dtm_lste_afboeking": date_fmt, }, columns=None, chunksize=None, ) dtype = { "datum_boeking": Date(), "bedrag_boeking": Numeric(), "boeking_omschrijving": Text(), "boeking_opmerking": Text(), "hoofdrekening_code": Text(), "rekening_omschrijving": Text(), "bedrijfseenheid_code": Text(), "bedrijfseenheid_omschrijving": Text(), "subgrootboek": Text(), "subgrootboektype": Text(), "vpl_verplichtingnummer": Text(), "vpl_omschrijving": Text(), "vpl_jaar": Integer(), "vpl_inkoopcontractcode": Text(), "vpl_status": Text(), "vpr_datum_laatste_afboeking": Date(), "vpr_bedrag_oorspronkelijk": Numeric(), "vpr_bedrag_afgeboekt": Numeric(), "vpr_bedrag_restant": Numeric(), "vpr_regelnummer": Integer(), "vpr_omschrijving": Text(), "vpr_opmerking": Text(), "vpr_datum_grootboek": Date(), "vpr_datumverplichting": Date(), "werkorder_code": Text(), "werkorder_omschrijving": Text(), "werkorder_status": Text(), "werkorder_status_opmerking": Text(), "relatie_code": Text(), "relatie_naam": Text(), "indicatie_crediteur": Boolean(), "indicatie_debiteur": Boolean(), "subteam_code": Text(), "subteam_naam": Text(), "team_code": Text(), "team_naam": Text(), "afdeling_code": Text(), "afdeling_naam": Text(), "rve_code": Text(), "rve_naam": Text(), "cluster_code": Text(), "cluster_naam": Text(), } df.to_sql(table_name, db_engine, if_exists="replace", index_label="id", index=True, dtype=dtype) with db_engine.connect() as connection: connection.execute( f"ALTER TABLE {table_name} ADD PRIMARY KEY (ID)")
def load_from_dwh(table_name: str) -> None: """Loads data from an Oracle database source into a Postgres database Args: table_name: The target table where the source data will be stored Executes: SQL INSERT statements for the data and post-processing an ALTER statement to a contraint. Note: The SQL processing is done with SQLAlchemy """ db_engine = get_engine() dwh_ora_engine = get_ora_engine("oracle_dwh_stadsdelen") with dwh_ora_engine.connect() as connection: df = pd.read_sql( """ select ADWH_VERSIE_ID , SOORT_WERKZAAMHEDEN , KENTEKEN , ADWH_KENTEKEN , CATEGORIE , ADWH_ACTIVITEIT , WERKZAAMHEDEN_CODE , WERKZAAMHEDEN_OMSCHRIJVING , WERKZAAMHEDEN_DATUM , WERKZAAMHEDEN_DATUM_REF_ID , WERKZAAMHEDEN_STARTTIJD , WERKZAAMHEDEN_EINDTIJD , WERKZAAMHEDEN_UREN_GEPLAND , PAUZE_STARTTIJD , PAUZE_EINDTIJD , PAUZE_UREN_GEPLAND , INHUUR , FASE , MEMO , TEAM , AANTAL_MEDEWERKERS , UREN_INZET_MEDEWERKER_INTERN , UREN_INZET_MEDEWERKER_INHUUR , UREN_INZET_VOERTUIG , AANTAL_MEDEWERKERS_INTERN , AANTAL_MEDEWERKERS_INHUUR , ADWH_LAATST_GEZIEN , ADWH_LAATST_GEZIEN_BRON from DMDATA.AFVAL_INZML_VOERTUIGPLAN_V2 """, connection, coerce_float=True, params=None, parse_dates=[ "WERKZAAMHEDEN_DATUM", "ADWH_LAATST_GEZIEN", "ADWH_LAATST_GEZIEN_BRON", ], columns=None, chunksize=None, ) dtype = { "ADWH_VERSIE_ID": Integer(), "SOORT_WERKZAAMHEDEN": Text(), "KENTEKEN": Text(), "ADWH_KENTEKEN": Text(), "CATEGORIE": Text(), "ADWH_ACTIVITEIT": Text(), "WERKZAAMHEDEN_CODE": Text(), "WERKZAAMHEDEN_OMSCHRIJVING": Text(), "WERKZAAMHEDEN_DATUM": Date(), "WERKZAAMHEDEN_DATUM_REF_ID": Integer(), "WERKZAAMHEDEN_STARTTIJD": Text(), "WERKZAAMHEDEN_EINDTIJD": Text(), "WERKZAAMHEDEN_UREN_GEPLAND": Numeric(), "PAUZE_STARTTIJD": Text(), "PAUZE_EINDTIJD": Text(), "PAUZE_UREN_GEPLAND": Text(), "INHUUR": Text(), "FASE": Text(), "MEMO": Text(), "TEAM": Text(), "AANTAL_MEDEWERKERS": Text(), "UREN_INZET_MEDEWERKER_INTERN": Numeric(), "UREN_INZET_MEDEWERKER_INHUUR": Numeric(), "UREN_INZET_VOERTUIG": Numeric(), "AANTAL_MEDEWERKERS_INTERN": Numeric(), "AANTAL_MEDEWERKERS_INHUUR": Numeric(), "ADWH_LAATST_GEZIEN": DateTime(), "ADWH_LAATST_GEZIEN_BRON": DateTime(), } df.to_sql(table_name, db_engine, if_exists="replace", dtype=dtype, index=False) with db_engine.connect() as connection: connection.execute( f"ALTER TABLE {table_name} ADD PRIMARY KEY (ADWH_VERSIE_ID)")
def load_from_dwh(table_name: str) -> None: """Loads data from an Oracle database source into a Postgres database Args: table_name: The target table where the source data will be stored Executes: SQL INSERT statements for the data and post-processing an ALTER statement to a contraint. Note: The SQL processing is done with SQLAlchemy """ db_engine = get_engine() dwh_ora_engine = get_ora_engine("oracle_dwh_stadsdelen") with dwh_ora_engine.connect() as connection: df = pd.read_sql( """ select "ID" ,"DATUM" ,"DAG_VAN_WEEK_NUMMER" ,"DAG_VAN_WEEK_NAAM" ,"DAG_VAN_WEEK_KNAAM" ,"VORIGE_DAG_VAN_WEEK_NAAM" ,"VORIGE_DAG_VAN_WEEK_KNAAM" ,"WEEKEND_IND" ,"FEESTDAG_IND" ,"FEESTDAG_ADAM_IND" ,"AANTAL_WERKDAGEN" ,"AANTAL_WERKDAGEN_ADAM" ,"SEIZOEN" ,"WEEK_IN_MAAND_NUMMER" ,"WEEK_IN_MAAND_START_DATUM" ,"WEEK_IN_MAAND_EINDE_DATUM" ,"WEEK_IN_JAAR_NUMMER" ,"ISO_WEEK_NUMMER" ,"JAAR_ISO_WEEKNR" ,"ISO_WEEK_START_DATUM" ,"ISO_WEEK_EINDE_DATUM" ,"DAG_VAN_MAAND_NUMMER" ,"MAAND_WAARDE" ,"MAAND_NAAM" ,"MAAND_KNAAM" ,"JAARMAAND" ,"MAAND_START_DATUM" ,"MAAND_EINDE_DATUM" ,"DAGEN_IN_MAAND" ,"LAATSTE_DAG_VAN_MAAND_IND" ,"DAG_VAN_KWARTAAL_NUMMER" ,"KWARTAAL_WAARDE" ,"KWARTAAL_NAAM" ,"JAARKWARTAAL" ,"KWARTAAL_START_DATUM" ,"KWARTAAL_EINDE_DATUM" ,"DAGEN_IN_KWARTAAL" ,"LAATSTE_DAG_VAN_KWARTAAL_IND" ,"TERTAAL" ,"JAAR_TERTAAL" ,"DAG_IN_TERTAAL_NUMMER" ,"DAGEN_IN_TERTAAL" ,"LAATSTE_DAG_VAN_TERTAAL_IND" ,"DAG_VAN_JAAR_NUMMER" ,"JAAR_WAARDE" ,"JAAR_NAAM" ,"JAAR_KNAAM" ,"JAAR_START_DATUM" ,"JAAR_EINDE_DATUM" ,"DAGEN_IN_JAAR" ,"LAATSTE_DAG_VAN_JAAR_IND" ,"JAARNAAM_KORT" ,"JULIAANSE_DATUM" ,CAST(NULL AS NUMBER) AS "SCHOOLVAKANTIE_NL_NOORD_IND" ,CAST(NULL AS NUMBER) AS "SCHOOLVAKANTIE_NL_MIDDEN_IND" ,CAST(NULL AS NUMBER) AS "SCHOOLVAKANTIE_NL_ZUID_IND" ,"NIVEAUCODE" from DMDATA.ALG_DIM_DATUM_V2 """, connection, coerce_float=True, params=None, parse_dates=[ "DATUM", "ISO_WEEK_START_DATUM", "ISO_WEEK_EIND_DATUM", "WEEK_IN_MAAND_START_DATUM", "WEEK_IN_MAAND_EINDE_DATUM", "MAAND_START_DATUM", "MAAND_EINDE_DATUM", "KWARTAAL_START_DATUM", "KWARTAAL_EINDE_DATUM", "JAAR_START_DATUM", "JAAR_EINDE_DATUM", ], columns=None, chunksize=None, ) dtype = { "ID": Integer(), "DATUM": Date(), "DAG_VAN_WEEK_NUMMER": Integer(), "DAG_VAN_WEEK_NAAM": Text(), "DAG_VAN_WEEK_KNAAM": Text(), "VORIGE_DAG_VAN_WEEK_NAAM": Text(), "VORIGE_DAG_VAN_WEEK_KNAAM": Text(), "WEEKEND_IND": Integer(), "FEESTDAG_IND": Integer(), "FEESTDAG_ADAM_IND": Integer(), "AANTAL_WERKDAGEN": Integer(), "AANTAL_WERKDAGEN_ADAM": Integer(), "SEIZOEN": Text(), "WEEK_IN_MAAND_NUMMER": Integer(), "WEEK_IN_MAAND_START_DATUM": Date(), "WEEK_IN_MAAND_EINDE_DATUM": Date(), "WEEK_IN_JAAR_NUMMER": Integer(), "ISO_WEEK_NUMMER": Integer(), "JAAR_ISO_WEEKNR": Integer(), "ISO_WEEK_START_DATUM": Date(), "ISO_WEEK_EINDE_DATUM": Date(), "DAG_VAN_MAAND_NUMMER": Integer(), "MAAND_WAARDE": Integer(), "MAAND_NAAM": Text(), "MAAND_KNAAM": Text(), "JAARMAAND": Integer(), "MAAND_START_DATUM": Text(), "MAAND_EINDE_DATUM": Text(), "DAGEN_IN_MAAND": Integer(), "LAATSTE_DAG_VAN_MAAND_IND": Integer(), "DAG_VAN_KWARTAAL_NUMMER": Integer(), "KWARTAAL_WAARDE": Integer(), "KWARTAAL_NAAM": Text(), "JAARKWARTAAL": Integer(), "KWARTAAL_START_DATUM": Date(), "KWARTAAL_EINDE_DATUM": Date(), "DAGEN_IN_KWARTAAL": Integer(), "LAATSTE_DAG_VAN_KWARTAAL_IND": Integer(), "TERTAAL": Integer(), "JAAR_TERTAAL": Text(), "DAG_IN_TERTAAL_NUMMER": Integer(), "DAGEN_IN_TERTAAL": Integer(), "LAATSTE_DAG_VAN_TERTAAL_IND": Integer(), "DAG_VAN_JAAR_NUMMER": Integer(), "JAAR_WAARDE": Integer(), "JAAR_NAAM": Text(), "JAAR_KNAAM": Text(), "JAAR_START_DATUM": Date(), "JAAR_EINDE_DATUM": Date(), "DAGEN_IN_JAAR": Integer(), "LAATSTE_DAG_VAN_JAAR_IND": Integer(), "JAARNAAM_KORT": Text(), "JULIAANSE_DATUM": Integer(), "SCHOOLVAKANTIE_NL_NOORD_IND": Integer(), "SCHOOLVAKANTIE_NL_MIDDEN_IND": Integer(), "SCHOOLVAKANTIE_NL_ZUID_IND": Integer(), "NIVEAUCODE": Text(), } df.to_sql(table_name, db_engine, if_exists="replace", dtype=dtype, index=False) with db_engine.connect() as connection: connection.execute( f"ALTER TABLE {table_name} ADD PRIMARY KEY (ID)")