def main(): date, dfs = extract_db_data( glob.glob(os.path.join(credentials.data_path_db, "*.zip"))) dfs['df_lab'] = extract_lab_data( os.path.join(credentials.data_path_xml, "*.xml")) add_global_dfs(dfs) convert_datetime_columns(dfs) # conn = create_db('../tests/fixtures/coronavirus_massentests.db', dfs) for report_def in [{ 'file_name': 'massentests_pool.csv', 'table_name': table_names[0] }, { 'file_name': 'massentests_single.csv', 'table_name': table_names[1] }]: report = calculate_report(report_def['table_name']) export_file = os.path.join(credentials.export_path, report_def['file_name']) logging.info( f'Exporting data derived from table {report_def["table_name"]} to file {export_file}...' ) report.to_csv(export_file, index=False) common.upload_ftp(export_file, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'gd_gs/coronavirus_massenteststs') # conn.close() logging.info(f'Job successful!')
def main(): data_file_names = credentials.data_orig abst_date, concatenated_df = calculate_kennzahlen(data_file_names) export_file_name = os.path.join(credentials.path, 'data-processing-output', f'Abstimmungen_{abst_date}.csv') print(f'Exporting to {export_file_name}...') concatenated_df.to_csv(export_file_name, index=False) common.upload_ftp(export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'wahlen_abstimmungen/abstimmungen') print('Job successful!')
def export_data(df, df_agg): agg_export_file_name = os.path.join(credentials.impftermine_path, 'export', f'impftermine_agg.csv') print(f'Exporting resulting data to {agg_export_file_name}...') df_agg.to_csv(agg_export_file_name, index=False) common.upload_ftp(agg_export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'md/covid19_vacc') raw_export_file = os.path.join(credentials.impftermine_path, 'export', f'impftermine.csv') print(f'Exporting resulting data to {raw_export_file}...') df[[ 'date', 'Birthdate', 'birthday', 'age', 'age_group', 'has_appointments' ]].to_csv(raw_export_file, index=False)
def main(): pysqldf = lambda q: sqldf(q, globals()) df_bs_long_all = get_raw_df() df_bs_perc = get_reporting_df(df_bs_long_all) for dataset in [{ 'dataframe': df_bs_long_all, 'filename': f'vaccinations_by_age_group.csv' }, { 'dataframe': df_bs_perc, 'filename': f'vaccination_report_bs_age_group_long.csv' }]: export_file_name = os.path.join(credentials.vmdl_path, dataset['filename']) print(f'Exporting resulting data to {export_file_name}...') dataset['dataframe'].to_csv(export_file_name, index=False) common.upload_ftp(export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'bag/vmdl') print(f'Job successful!')
def get_bag_data(dataset_name, url, suffix): print(f'Reading current csv from {url} into data frame...') df = common.pandas_read_csv(url) print(f'Checking which column contains the date...') date_column = 'datum' if 'datum' in df.columns else 'date' print(f'Dropping lines with empty value in date column "{date_column}"...') print(f'{df[date_column].isna()}') df = df.dropna(subset=[date_column]) print(f'Calculating columns...') if 'weekly' not in suffix: print(f'Date column is regarded as being a calendar day, calculating dayofweek, wochentag, week...') df['dayofweek'] = pd.to_datetime(df[date_column]).dt.dayofweek df['wochentag'] = df['dayofweek'].apply(lambda x: common.weekdays_german[x]) df['week'] = pd.to_datetime(df[date_column]).dt.week else: print(f'Date column is regarded as being a week number. Calculating year, week...') df['year'] = df[date_column].astype(str).str.slice(stop=4) df['week'] = df[date_column].astype(str).str.slice(start=-2) suffix_string = f'_{suffix}' if suffix != '' else '' export_file_name = os.path.join(credentials.path, f'covid19_{dataset_name}{suffix_string}.csv') print(f'Exporting to file {export_file_name}...') df.to_csv(export_file_name, index=False) common.upload_ftp(export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'bag')
lots_file_name = f'{credentials.path}csv/lots/parkendd-lots.csv' print(f'Processing data...') for lot in parsed['lots']: lot['last_downloaded'] = parsed['last_downloaded'] lot['last_updated'] = parsed['last_updated'] normalized = pd.json_normalize(parsed, record_path='lots') normalized['title'] = "Parkhaus " + normalized['name'] normalized['id2'] = normalized['id'].str.replace('baselparkhaus', '') normalized['link'] = "https://www.parkleitsystem-basel.ch/parkhaus/" + normalized['id2'] normalized['description'] = 'Anzahl freie Parkplätze: ' + normalized['free'].astype(str) normalized['published'] = normalized['last_downloaded'] print(f'Creating lots file and saving as {lots_file_name}...') lots = normalized[['address','id','lot_type','name','total','last_downloaded','last_updated','coords.lat','coords.lng','title','id2','link','published']] lots.to_csv(lots_file_name, index=False) values_file_name = f'{credentials.path}csv/values/parkendd-{str(datetime.now()).replace(":", "")}.csv' print(f'Creating values file and saving as {values_file_name}...') values = normalized[['published', 'free', 'id', 'id2']] values.to_csv(values_file_name, index=False) common.upload_ftp(lots_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'parkendd/csv/lots') folder = datetime.now().strftime('%Y-%m') common.ensure_ftp_dir(credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'parkendd/csv/values/{folder}') common.upload_ftp(values_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'parkendd/csv/values/{folder}') print('Job successful!')
df = pd.read_csv(filename, sep=';') df = df.rename( columns={ 'sterbe_datum': 'Date_of_death', 'melde_datum': 'Date', 'sterbe_alter': 'AgeYear', 'geschlecht': 'Gender', 'vorerkrankung': 'PreExistingCond', 'verstorbene_kumuliert': 'ncumul_deceased' }) df['Source'] = 'https://www.gesundheit.bs.ch' df['Area'] = 'Canton_BS' df['NewDeaths'] = 1 df = df.sort_values(by=['Date']) df['CaseNumberPerDay'] = df.groupby(['Date']).cumcount() + 1 # Limit columns to be exported df = df[[ 'Date', 'Area', 'AgeYear', 'Gender', 'NewDeaths', 'PreExistingCond', 'Source', 'ncumul_deceased', 'CaseNumberPerDay' ]] export_filename = os.path.join(credentials.export_path, credentials.export_filename_gestorbene) print(f'Exporting data to {export_filename}...') df.to_csv(export_filename, index=False) common.upload_ftp(export_filename, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'md/covid19_cases') print('Job successful!')
#df_append['ndiff_conf'] = df_diff.ncumul_conf #df_append['ndiff_released'] = df_diff.ncumul_released #df_append['ndiff_deceased'] = df_diff.ncumul_deceased df_append[ 'ndiff_confirmed_non_resident'] = df_diff.ncumul_confirmed_non_resident print(f'Change column order and keeping only necessary columns...') df_append = df_append[[ 'date', 'time', 'abbreviation_canton_and_fl', 'ncumul_tested', 'ncumul_conf', 'new_hosp', 'current_hosp', 'current_icu', 'current_vent', 'ncumul_released', 'ncumul_deceased', 'source', 'current_isolated', 'current_quarantined', 'ncumul_confirmed_non_resident', 'current_hosp_non_resident', 'current_quarantined_riskareatravel', 'current_quarantined_total', 'current_hosp_resident', 'ndiff_conf', 'ndiff_released', 'ndiff_deceased', 'ndiff_confirmed_non_resident', 'test_date' ]] print(f'Removing test_date column for the moment...') df_append = df_append.drop(columns=['test_date']) # export_filename = os.path.join(credentials.path, credentials.filename).replace('.csv', f'_{latest_date}.csv') export_filename = os.path.join(credentials.path, credentials.filename) print(f'Exporting csv to {export_filename}') df_append.to_csv(export_filename, index=False) common.upload_ftp(export_filename, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'covid19bs/auto_generated') print('Job successful!')
def main(): # file_path = vmdl_extract.retrieve_vmdl_data() file_path = vmdl.file_path() print(f'Reading data into dataframe...') df = pd.read_csv(file_path, sep=';') # df['vacc_date_dt'] = pd.to_datetime(df.vacc_date, format='%Y-%m-%dT%H:%M:%S.%f%z') df['vacc_day'] = df.vacc_date.str.slice(stop=10) print(f'Executing calculations...') pysqldf = lambda q: sqldf(q, globals()) print( f'Filter by BS and vacc_date, sum type 1 and 99, create "other" type, count persons...' ) df_bs = sqldf(f''' select * from df where reporting_unit_location_ctn = "BS" and vacc_day < "{vmdl.today_string()}"''' ) df_bs_by = sqldf(''' select vacc_day, vacc_count, case reporting_unit_location_type when 1 then "vacc_centre" when 99 then "vacc_centre" when 6 then "hosp" else "other" end as location_type, count(person_anonymised_id) as count from df_bs group by vacc_day, vacc_count, location_type order by vacc_day asc;''') print(f'Create empty table of all combinations...') df_all_days = pd.DataFrame(data=pd.date_range( start=df_bs.vacc_day.min(), end=vmdl.yesterday_string()).astype(str), columns=['vacc_day']) df_all_vacc_count = sqldf('select distinct vacc_count from df;') df_all_location_type = sqldf('select distinct location_type from df_bs_by') df_all_comb = sqldf( 'select * from df_all_days cross join df_all_vacc_count cross join df_all_location_type;' ) print(f'Adding days without vaccinations...') df_bs_by_all = df_all_comb.merge( df_bs_by, on=['vacc_day', 'vacc_count', 'location_type'], how='outer').fillna(0) print(f'Pivoting...') df_pivot_table = df_bs_by_all.pivot_table( values='count', index=['vacc_day'], columns=['location_type', 'vacc_count'], fill_value=0) # Replace the 2-level column names with a string that concatenates both strings df_pivot_table.columns = [ "_".join(str(c) for c in col) for col in df_pivot_table.columns.values ] df_pivot = df_pivot_table.reset_index() print(f'Ensure columns exist...') for column_name in [ 'other_1', 'other_2', 'in_aph_verabreichte_impfungen_pro_tag', 'im_aph_mit_erster_dosis_geimpfte_personen_pro_tag', 'im_aph_mit_zweiter_dosis_geimpfte_personen_pro_tag', ]: if column_name not in df_pivot.columns: df_pivot[column_name] = 0 print(f'Calculating columns...') df_pivot['hosp'] = df_pivot.hosp_1 + df_pivot.hosp_2 df_pivot['vacc_centre'] = df_pivot.vacc_centre_1 + df_pivot.vacc_centre_2 df_pivot['other'] = df_pivot.other_1 + df_pivot.other_2 df_pivot[ 'vacc_count_1'] = df_pivot.hosp_1 + df_pivot.vacc_centre_1 + df_pivot.other_1 df_pivot[ 'vacc_count_2'] = df_pivot.hosp_2 + df_pivot.vacc_centre_2 + df_pivot.other_2 df_pivot['cum_1'] = df_pivot.vacc_count_1.cumsum() df_pivot['cum_2'] = df_pivot.vacc_count_2.cumsum() df_pivot['only_1'] = df_pivot.cum_1 - df_pivot.cum_2 df_pivot['total'] = df_pivot.hosp + df_pivot.vacc_centre + df_pivot.other df_pivot['total_cum'] = df_pivot.total.cumsum() print(f'Renaming and restricting columns for export...') df_export = df_pivot.rename( columns={ 'vacc_day': 'datum', 'hosp_1': 'im_spital_mit_erster_dosis_geimpfte_personen_pro_tag', 'hosp_2': 'im_spital_mit_zweiter_dosis_geimpfte_personen_pro_tag', 'vacc_centre_1': 'im_impfzentrum_mit_erster_dosis_geimpfte_personen_pro_tag', 'vacc_centre_2': 'im_impfzentrum_mit_zweiter_dosis_geimpfte_personen_pro_tag', 'other_1': 'anderswo_mit_erster_dosis_geimpfte_personen_pro_tag', 'other_2': 'anderswo_mit_zweiter_dosis_geimpfte_personen_pro_tag', 'hosp': 'im_spital_verabreichte_impfungen_pro_tag', 'vacc_centre': 'im_impfzentrum_verabreichte_impfungen_pro_tag', 'other': 'anderswo_verabreichte_impfungen_pro_tag', 'vacc_count_1': 'total_mit_erster_dosis_geimpfte_personen_pro_tag', 'vacc_count_2': 'total_mit_zweiter_dosis_geimpfte_personen_pro_tag', 'cum_1': 'total_personen_mit_erster_dosis', 'cum_2': 'total_personen_mit_zweiter_dosis', 'only_1': 'total_personen_mit_ausschliesslich_erster_dosis', 'total': 'total_verabreichte_impfungen_pro_tag', 'total_cum': 'total_verabreichte_impfungen', }) df_export = df_export[[ 'datum', 'total_verabreichte_impfungen', 'total_personen_mit_erster_dosis', 'total_personen_mit_ausschliesslich_erster_dosis', 'total_personen_mit_zweiter_dosis', 'im_impfzentrum_verabreichte_impfungen_pro_tag', 'im_impfzentrum_mit_erster_dosis_geimpfte_personen_pro_tag', 'im_impfzentrum_mit_zweiter_dosis_geimpfte_personen_pro_tag', 'in_aph_verabreichte_impfungen_pro_tag', 'im_aph_mit_erster_dosis_geimpfte_personen_pro_tag', 'im_aph_mit_zweiter_dosis_geimpfte_personen_pro_tag', 'im_spital_verabreichte_impfungen_pro_tag', 'im_spital_mit_erster_dosis_geimpfte_personen_pro_tag', 'im_spital_mit_zweiter_dosis_geimpfte_personen_pro_tag', 'anderswo_verabreichte_impfungen_pro_tag', 'anderswo_mit_erster_dosis_geimpfte_personen_pro_tag', 'anderswo_mit_zweiter_dosis_geimpfte_personen_pro_tag', 'total_verabreichte_impfungen_pro_tag', ]] export_file_name = os.path.join(credentials.vmdl_path, f'vaccination_report_bs.csv') print(f'Exporting resulting data to {export_file_name}...') df_export.to_csv(export_file_name, index=False) common.upload_ftp(export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'bag/vmdl') print(f'Job successful!')
header=None) print(f'Merging data...') merged_df = abfluss_df.merge(pegel_df, on=['datum', 'zeit', 'intervall'], how='outer') print('Processing data...') merged_df = merged_df.loc[merged_df.intervall == 5] print(f'Fixing entries with zeit == 24:00...') # Replacing 24:00 with 23:59 merged_df.loc[merged_df.zeit == '24:00', 'zeit'] = '23:59' # Time is given in MEZ (UTC+1) thus use 'Etc/GMT-1' according to https://en.wikipedia.org/wiki/List_of_tz_database_time_zones # merged_df['timestamp'] = pd.to_datetime(merged_df.datum + ' ' + merged_df.zeit, format='%d.%m.%Y %H:%M').dt.tz_localize('Europe/Zurich') merged_df['timestamp'] = pd.to_datetime( merged_df.datum + ' ' + merged_df.zeit, format='%d.%m.%Y %H:%M').dt.tz_localize('Etc/GMT-1') # Adding a minute to entries with time 23:59 then replacing 23:59 with 24:00 again merged_df.timestamp = np.where(merged_df.zeit != '23:59', merged_df.timestamp, merged_df.timestamp + pd.Timedelta(minutes=1)) merged_df.zeit = np.where(merged_df.zeit == '23:59', '24:00', merged_df.zeit) merged_filename = os.path.join( local_path, f'2289_pegel_abfluss_{datetime.today().strftime("%Y-%m-%dT%H-%M-%S")}.csv') merged_df.to_csv(merged_filename, index=False) common.upload_ftp(merged_filename, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, credentials.ftp_remote_dir) print('Job successful!')
rsuffix='wv', lsuffix='points') print('Spatially joining points with Bezirk...') gdf_wv_bez = gpd.sjoin(gdf_wv, df_bez, how='left', op="within", rsuffix='bez', lsuffix='points') print('Dropping unnecessary columns...') gdf_wv_bez.drop(columns=['index_wv', 'index_bez', 'wov_id_points'], inplace=True) # todo: Find nearest Wohnviertel / Bezirk of points outside of those shapes (Rhein, Outside of BS territory) # e.g. see https://gis.stackexchange.com/a/342489 timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') file_path = os.path.join(credentials.path, f'{timestamp}_{credentials.filename}') print(f'Exporting data to {file_path}...') gdf_wv_bez.to_csv(file_path, index=False, date_format='%Y-%m-%dT%H:%M:%S%z') common.upload_ftp(file_path, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'tba/illegale-deponien') print('Job successful!') else: raise Exception(f'HTTP error getting values from API: {r.status_code}')
pd.options.mode.chained_assignment = None # Switch off warnings, see https://stackoverflow.com/a/53954986 # make sure we have a list present, otherwise return None, see https://stackoverflow.com/a/12709152/5005585 live_val['meta.rain.1h.val'] = live_df['meta.rain24h.vals'].apply(lambda x: x[23] if isinstance(x, list) else None) live_val.to_csv(filename_val, index=False) map_df = live_df[['name.original', 'name.custom', 'dates.min_date', 'dates.max_date', 'position.altitude', 'config.timezone_offset', 'position.geo.coordinates']] print('Stations with name.custom of length 1 are not live yet, filter those out...') # For some reason we have to filter > 2 here # map_df['name.custom.len'] = map_df['name.custom'].str.len() live_map = map_df.loc[map_df['name.custom'].str.len() > 2] # let's better do this in ODS, it gets nasty here for some reason. # print('Reversing coordinates for ods...') # live_map['coords'] = df['position.geo.coordinates'].apply(lambda x: [x[1], x[0]]) filename_stations_map = f'{credentials.path}csv/map/stations.csv' print(f'Saving minimized table of station data for map creation to {filename_stations_map}') live_map.to_csv(filename_stations_map, index=False) # print("Retrieving last hour's data from all live stations from API...") # for station in df['name.original']: # # get last data point from each station. See https://api.fieldclimate.com/v1/docs/#info-understanding-your-device # (pretty_resp, station_df) = call_fieldclimate_api('/data/normal/' + station + '/hourly/last/1h', # publicKey, privateKey, f'station--{station}--{datetime.now()}') common.upload_ftp(filename_stations_map, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'map') common.ensure_ftp_dir(credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'val/{folder}') common.upload_ftp(filename_val, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'val/{folder}') print('Job successful!')
import common import os from ods_catalog import credentials url = 'https://data.bs.ch/explore/dataset/100055/download/?format=csv&use_labels_for_header=true&refine.visibility=domain&refine.publishing_published=True' file = os.path.join(credentials.path, credentials.filename) print(f'Downloading {file} from {url}...') r = common.requests_get(url, auth=(credentials.ods_user, credentials.ods_password)) open(file, 'wb').write(r.content) common.upload_ftp(filename=file, server=credentials.ftp_server, user=credentials.ftp_user, password=credentials.ftp_pass, remote_path=credentials.ftp_path)
except KeyError as e: print( f'No file found with keys {(station, date_string)}, ignoring...' ) all_data = all_data[[ 'station_id', 'timestamp', 'Value', 'Latitude', 'Longitude', 'EUI', 'LocalDateTime' ]] today_data_file = os.path.join(credentials.path, f'schall_aktuell.csv') print(f"Exporting yesterday's and today's data to {today_data_file}...") all_data.to_csv(today_data_file, index=False) # todo: Simplify code by pushing yesterday's and today's data to ODS in one batch (as in lufthygiene_pm25) print('Creating stations file from current data file...') df_stations = all_data.drop_duplicates( ['EUI'])[['station_id', 'Latitude', 'Longitude', 'EUI']] stations_file = os.path.join(credentials.path, 'stations/stations.csv') print(f'Exporting stations file to {stations_file}...') df_stations.to_csv(stations_file, index=False) common.upload_ftp(stations_file, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'{credentials.ftp_remote_path_stations}') common.upload_ftp(today_data_file, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, credentials.ftp_remote_path_vals) print('Job successful!')
import common import os from tba_abfuhrtermine import credentials common.upload_ftp(os.path.join(credentials.path, credentials.filename), credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'tba/abfuhrtermine') print('Job successful!')
df_bs = df_bs.rename( columns={ 'entries_neg': 'negative_tests', 'entries_pos': 'positive_tests', 'entries': 'total_tests' }) print(f'Calculating columns...') df_bs['dayofweek'] = pd.to_datetime(df_bs['datum']).dt.dayofweek + 1 df_bs['weekday_nr'] = pd.to_datetime(df_bs['datum']).dt.dayofweek df_bs['woche'] = pd.to_datetime(df_bs['datum']).dt.isocalendar().week export_file_name = os.path.join(credentials.path, credentials.file_name) print(f'Exporting to file {export_file_name}...') df_bs.to_csv(export_file_name, index=False) common.upload_ftp(export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'bag_coronavirus_tests') pcr_antigen_path = os.path.join(credentials.path, 'covid19_testPcrAntigen.csv') print(f'Reading pcr/antigen csv from {pcr_antigen_path} into data frame...') df_pcr_antigen = pd.read_csv(pcr_antigen_path) df_type = df_pcr_antigen[[ 'datum', 'entries', 'entries_neg', 'entries_pos', 'nachweismethode', 'geoRegion' ]] df_type_bs = df_type.query("geoRegion == 'BS'").copy(deep=False) df_type_bs['positivity_rate'] = df_type_bs.entries_pos / df_type_bs.entries df_type_bs['positivity_rate_percent'] = df_type_bs.positivity_rate * 100 df_pivot = df_type_bs.pivot_table(index=['datum', 'geoRegion'], columns=['nachweismethode'], values=[
'dest_dir': 'veranstaltungen' }, { 'file': 'Bevoelkerung/01bevoelkerung_monat_nach_bezirk.csv', 'dest_dir': 'bevoelkerung' }, { 'file': 'Bevoelkerung/02bevoelkerung_jahr_nach_CH_A_geschlecht.csv', 'dest_dir': 'bevoelkerung' }, { 'file': 'Bevoelkerung/03bevoelkerung_jahr_nach_heimat_geschlecht.csv', 'dest_dir': 'bevoelkerung' }, { 'file': 'Bevoelkerung/04bevoelkerung_jahr_nach_vorname.csv', 'dest_dir': 'bevoelkerung' }, { 'file': 'Bevoelkerung/05bevoelkerung_jahr_nach_nachname.csv', 'dest_dir': 'bevoelkerung' }, ] for upload in uploads: common.upload_ftp(os.path.join(credentials.path_work, upload['file']), credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, upload['dest_dir']) print('Job successful!')
print(f'Connecting to DB...') con = pg.connect(credentials.pg_connection) print(f'Reading data into dataframe...') df = psql.read_sql('SELECT *, ST_AsGeoJSON(the_geom) as the_geom_json, ST_AsEWKT(the_geom) as the_geom_EWKT, ST_AsText(the_geom) as the_geom_WKT FROM projekte.geschwindigkeitsmonitoring', con) con.close() df_metadata = df[['ID', 'the_geom', 'Strasse', 'Strasse_Nr', 'Ort', 'Zone', 'Richtung_1', 'Fzg_1', 'V50_1', 'V85_1', 'Ue_Quote_1', 'Richtung_2', 'Fzg_2', 'V50_2', 'V85_2', 'Ue_Quote_2', 'Messbeginn', 'Messende' ]] metadata_filename = os.path.join(credentials.path, credentials.filename.replace('.csv', '_metadata.csv')) print(f'Exporting data to {metadata_filename}...') df_metadata.to_csv(metadata_filename, index=False) common.upload_ftp(filename=metadata_filename, server=credentials.ftp_server, user=credentials.ftp_user, password=credentials.ftp_pass, remote_path=credentials.ftp_remote_path_metadata) print(f'Creating dataframe with one row per Messung-ID and Richtung-ID...') # Manual stacking of the columns for Richtung 1 and 2 df_richtung1 = df_metadata[['ID', 'Richtung_1', 'Fzg_1', 'V50_1', 'V85_1', 'Ue_Quote_1']] df_richtung1 = df_richtung1.rename(columns={'ID': 'Messung-ID', 'Richtung_1': 'Richtung', 'Fzg_1': 'Fzg', 'V50_1': 'V50', 'V85_1': 'V85', 'Ue_Quote_1': 'Ue_Quote'}) df_richtung1['Richtung ID'] = 1 df_richtung2 = df_metadata[['ID', 'Richtung_2', 'Fzg_2', 'V50_2', 'V85_2', 'Ue_Quote_2']] df_richtung2 = df_richtung2.rename(columns={'ID': 'Messung-ID', 'Richtung_2': 'Richtung', 'Fzg_2': 'Fzg', 'V50_2': 'V50', 'V85_2': 'V85', 'Ue_Quote_2': 'Ue_Quote'}) df_richtung2['Richtung ID'] = 2 df_richtung = df_richtung1.append(df_richtung2) df_richtung = df_richtung.sort_values(by=['Messung-ID', 'Richtung ID']) # Changing column order df_richtung = df_richtung[['Messung-ID', 'Richtung ID', 'Richtung', 'Fzg', 'V50', 'V85', 'Ue_Quote']] richtung_filename = os.path.join(credentials.path, credentials.filename.replace('.csv', '_richtung.csv')) print(f'Exporting richtung data to {richtung_filename}...')
'Iterating over each canton, sorting, adding missing dates, then filling the value gaps using ffill()...' ) cantons = df.abbreviation_canton_and_fl.unique() df_filled = pd.DataFrame(columns=df.columns) for canton in cantons: print(f'Working through canton {canton}...') df_canton = df[df.abbreviation_canton_and_fl == canton].sort_values( by='date') df_canton_filled = df_canton.set_index('date').reindex( date_range).ffill().reset_index().rename(columns={'index': 'date'}) print('Getting rid of rows with empty date...') df_canton_filled.dropna(subset=['abbreviation_canton_and_fl'], inplace=True) print('Calculating differences between rows in new columns...') df_canton_diff = df_canton_filled.drop( columns=['abbreviation_canton_and_fl']).diff() df_canton_filled['ndiff_conf'] = df_canton_diff.ncumul_conf df_canton_filled['ndiff_released'] = df_canton_diff.ncumul_released df_canton_filled['ndiff_deceased'] = df_canton_diff.ncumul_deceased df_filled = df_filled.append(df_canton_filled, ignore_index=True) filename = os.path.join(credentials.path, credentials.filename) print(f'Exporting data to {filename}') df_filled.to_csv(filename, index=False) common.upload_ftp(filename, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'covid19dashboard') print('Job successful!')
# df_fret.to_csv('C:/dev/workspace/data-processing/euroairport/data/fret.csv', index=False) # df_mvt.to_csv('C:/dev/workspace/data-processing/euroairport/data/mvt.csv', index=False) print('Merging data frames into one again...') df_merged1 = pd.merge(df_pax, df_fret, on=['date', 'Kategorie'], how='outer') df_merged = pd.merge(df_merged1, df_mvt, on=['date', 'Kategorie'], how='outer') print('Sorting...') df_sort = df_merged.sort_values(by=['date', 'Kategorie'], ascending=False) print('Replacing french with german words in Kategorie...') df_german = df_sort.replace({ 'Kategorie': { 'PAX': 'Passagierverkehr', 'FRET_EXPRESS': 'Fracht Express', 'FRET_CARGO': 'Fracht Cargo', 'AUTRES': 'Andere Kategorien' } }) print('Removing Totals...') df_nototal = df_german[df_german.Kategorie != "Total"] export_file_name = os.path.join(credentials.path, credentials.data_export) print(f'Exporting to {export_file_name}...') df_nototal.to_csv(export_file_name, index=False) common.upload_ftp(export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, '') print('Job successful!')
# Include all files with shpfile's name files_to_zip = glob.glob( os.path.join(path, shpfilename_noext + '.*')) for file_to_zip in files_to_zip: # Do not add the zip file into the zip file... if not file_to_zip.endswith('.zip'): # todo: uncomment to create zip files zipf.write(file_to_zip, os.path.split(file_to_zip)[1]) pass zipf.close() # Upload zip file to ftp server ftp_remote_dir = 'harvesters/GVA/data' if not no_file_copy: common.upload_ftp(zipfilepath, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, ftp_remote_dir) # Load metadata from geocat.ch # See documentation at https://www.geocat.admin.ch/de/dokumentation/csw.html # For unknown reasons (probably proxy-related), requests always returns http error 404, so we have to revert to launching curl in a subprocess # curl -X GET "https://www.geocat.ch/geonetwork/srv/api/0.1/records/289b9c0c-a1bb-4ffc-ba09-c1e41dc7138a" -H "accept: application/json" -H "Accept: application/xml" -H "X-XSRF-TOKEN: a1284e46-b378-42a4-ac6a-d48069e05494" # resp = requests.get('https://www.geocat.ch/geonetwork/srv/api/0.1/records/2899c0c-a1bb-4ffc-ba09-c1e41dc7138a', params={'accept': 'application/json'}, proxies={'https': credentials.proxy}) # resp = requests.get('https://www.geocat.ch/geonetwork/srv/api/0.1/records/2899c0c-a1bb-4ffc-ba09-c1e41dc7138a', headers={'accept': 'application/xml, application/json'}, proxies={'https': credentials.proxy}) # cmd = 'curl -X GET "https://www.geocat.ch/geonetwork/srv/api/0.1/records/289b9c0c-a1bb-4ffc-ba09-c1e41dc7138a" -H "accept: application/json" -H "accept: application/json" -k' # args = shlex.split(cmd) # In some geocat URLs there's a tab character, remove it. geocat_uid = row['geocat'].rsplit('/', 1)[-1].replace('\t', '') geocat_url = f'https://www.geocat.ch/geonetwork/srv/api/0.1/records/{geocat_uid}' print(f'Getting metadata from {geocat_url}...')