Python upload_ftp 예제들, common.upload_ftp Python 예제들

예제 #1

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

def main():
    date, dfs = extract_db_data(
        glob.glob(os.path.join(credentials.data_path_db, "*.zip")))
    dfs['df_lab'] = extract_lab_data(
        os.path.join(credentials.data_path_xml, "*.xml"))
    add_global_dfs(dfs)
    convert_datetime_columns(dfs)
    # conn = create_db('../tests/fixtures/coronavirus_massentests.db', dfs)
    for report_def in [{
            'file_name': 'massentests_pool.csv',
            'table_name': table_names[0]
    }, {
            'file_name': 'massentests_single.csv',
            'table_name': table_names[1]
    }]:
        report = calculate_report(report_def['table_name'])
        export_file = os.path.join(credentials.export_path,
                                   report_def['file_name'])
        logging.info(
            f'Exporting data derived from table {report_def["table_name"]} to file {export_file}...'
        )
        report.to_csv(export_file, index=False)
        common.upload_ftp(export_file, credentials.ftp_server,
                          credentials.ftp_user, credentials.ftp_pass,
                          'gd_gs/coronavirus_massenteststs')
    # conn.close()
    logging.info(f'Job successful!')

예제 #2

0

파일 보기

파일: etl_kennzahlen.py 프로젝트: opendatabs/data-processing

def main():
    data_file_names = credentials.data_orig
    abst_date, concatenated_df = calculate_kennzahlen(data_file_names)

    export_file_name = os.path.join(credentials.path, 'data-processing-output', f'Abstimmungen_{abst_date}.csv')
    print(f'Exporting to {export_file_name}...')
    concatenated_df.to_csv(export_file_name, index=False)

    common.upload_ftp(export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'wahlen_abstimmungen/abstimmungen')
    print('Job successful!')

예제 #3

0

파일 보기

파일: etl_impftermine.py 프로젝트: opendatabs/data-processing

def export_data(df, df_agg):
    agg_export_file_name = os.path.join(credentials.impftermine_path, 'export',
                                        f'impftermine_agg.csv')
    print(f'Exporting resulting data to {agg_export_file_name}...')
    df_agg.to_csv(agg_export_file_name, index=False)
    common.upload_ftp(agg_export_file_name, credentials.ftp_server,
                      credentials.ftp_user, credentials.ftp_pass,
                      'md/covid19_vacc')
    raw_export_file = os.path.join(credentials.impftermine_path, 'export',
                                   f'impftermine.csv')
    print(f'Exporting resulting data to {raw_export_file}...')
    df[[
        'date', 'Birthdate', 'birthday', 'age', 'age_group', 'has_appointments'
    ]].to_csv(raw_export_file, index=False)

예제 #4

0

파일 보기

파일: etl_vmdl_altersgruppen.py 프로젝트: opendatabs/data-processing

def main():
    pysqldf = lambda q: sqldf(q, globals())
    df_bs_long_all = get_raw_df()
    df_bs_perc = get_reporting_df(df_bs_long_all)
    for dataset in [{
            'dataframe': df_bs_long_all,
            'filename': f'vaccinations_by_age_group.csv'
    }, {
            'dataframe': df_bs_perc,
            'filename': f'vaccination_report_bs_age_group_long.csv'
    }]:
        export_file_name = os.path.join(credentials.vmdl_path,
                                        dataset['filename'])
        print(f'Exporting resulting data to {export_file_name}...')
        dataset['dataframe'].to_csv(export_file_name, index=False)
        common.upload_ftp(export_file_name, credentials.ftp_server,
                          credentials.ftp_user, credentials.ftp_pass,
                          'bag/vmdl')
    print(f'Job successful!')

예제 #5

0

파일 보기

파일: copy_bag_datasets.py 프로젝트: opendatabs/data-processing

def get_bag_data(dataset_name, url, suffix):
    print(f'Reading current csv from {url} into data frame...')
    df = common.pandas_read_csv(url)
    print(f'Checking which column contains the date...')
    date_column = 'datum' if 'datum' in df.columns else 'date'
    print(f'Dropping lines with empty value in date column "{date_column}"...')
    print(f'{df[date_column].isna()}')
    df = df.dropna(subset=[date_column])
    print(f'Calculating columns...')
    if 'weekly' not in suffix:
        print(f'Date column is regarded as being a calendar day, calculating dayofweek, wochentag, week...')
        df['dayofweek'] = pd.to_datetime(df[date_column]).dt.dayofweek
        df['wochentag'] = df['dayofweek'].apply(lambda x: common.weekdays_german[x])
        df['week'] = pd.to_datetime(df[date_column]).dt.week
    else:
        print(f'Date column is regarded as being a week number. Calculating year, week...')
        df['year'] = df[date_column].astype(str).str.slice(stop=4)
        df['week'] = df[date_column].astype(str).str.slice(start=-2)
    suffix_string = f'_{suffix}' if suffix != '' else ''
    export_file_name = os.path.join(credentials.path, f'covid19_{dataset_name}{suffix_string}.csv')
    print(f'Exporting to file {export_file_name}...')
    df.to_csv(export_file_name, index=False)
    common.upload_ftp(export_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'bag')

예제 #6

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

lots_file_name = f'{credentials.path}csv/lots/parkendd-lots.csv'
print(f'Processing data...')
for lot in parsed['lots']:
    lot['last_downloaded'] = parsed['last_downloaded']
    lot['last_updated'] = parsed['last_updated']

normalized = pd.json_normalize(parsed, record_path='lots')
normalized['title'] = "Parkhaus " + normalized['name']
normalized['id2'] = normalized['id'].str.replace('baselparkhaus', '')
normalized['link'] = "https://www.parkleitsystem-basel.ch/parkhaus/" + normalized['id2']
normalized['description'] = 'Anzahl freie Parkplätze: ' + normalized['free'].astype(str)
normalized['published'] = normalized['last_downloaded']

print(f'Creating lots file and saving as {lots_file_name}...')
lots = normalized[['address','id','lot_type','name','total','last_downloaded','last_updated','coords.lat','coords.lng','title','id2','link','published']]
lots.to_csv(lots_file_name, index=False)

values_file_name = f'{credentials.path}csv/values/parkendd-{str(datetime.now()).replace(":", "")}.csv'
print(f'Creating values file and saving as {values_file_name}...')
values = normalized[['published', 'free', 'id', 'id2']]
values.to_csv(values_file_name, index=False)

common.upload_ftp(lots_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'parkendd/csv/lots')
folder = datetime.now().strftime('%Y-%m')
common.ensure_ftp_dir(credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'parkendd/csv/values/{folder}')
common.upload_ftp(values_file_name, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'parkendd/csv/values/{folder}')

print('Job successful!')

예제 #7

0

파일 보기

df = pd.read_csv(filename, sep=';')
df = df.rename(
    columns={
        'sterbe_datum': 'Date_of_death',
        'melde_datum': 'Date',
        'sterbe_alter': 'AgeYear',
        'geschlecht': 'Gender',
        'vorerkrankung': 'PreExistingCond',
        'verstorbene_kumuliert': 'ncumul_deceased'
    })
df['Source'] = 'https://www.gesundheit.bs.ch'
df['Area'] = 'Canton_BS'
df['NewDeaths'] = 1
df = df.sort_values(by=['Date'])
df['CaseNumberPerDay'] = df.groupby(['Date']).cumcount() + 1

# Limit columns to be exported
df = df[[
    'Date', 'Area', 'AgeYear', 'Gender', 'NewDeaths', 'PreExistingCond',
    'Source', 'ncumul_deceased', 'CaseNumberPerDay'
]]

export_filename = os.path.join(credentials.export_path,
                               credentials.export_filename_gestorbene)
print(f'Exporting data to {export_filename}...')
df.to_csv(export_filename, index=False)
common.upload_ftp(export_filename, credentials.ftp_server,
                  credentials.ftp_user, credentials.ftp_pass,
                  'md/covid19_cases')
print('Job successful!')

예제 #8

0

파일 보기

#df_append['ndiff_conf'] = df_diff.ncumul_conf
#df_append['ndiff_released'] = df_diff.ncumul_released
#df_append['ndiff_deceased'] = df_diff.ncumul_deceased
df_append[
    'ndiff_confirmed_non_resident'] = df_diff.ncumul_confirmed_non_resident

print(f'Change column order and keeping only necessary columns...')
df_append = df_append[[
    'date', 'time', 'abbreviation_canton_and_fl', 'ncumul_tested',
    'ncumul_conf', 'new_hosp', 'current_hosp', 'current_icu', 'current_vent',
    'ncumul_released', 'ncumul_deceased', 'source', 'current_isolated',
    'current_quarantined', 'ncumul_confirmed_non_resident',
    'current_hosp_non_resident', 'current_quarantined_riskareatravel',
    'current_quarantined_total', 'current_hosp_resident', 'ndiff_conf',
    'ndiff_released', 'ndiff_deceased', 'ndiff_confirmed_non_resident',
    'test_date'
]]

print(f'Removing test_date column for the moment...')
df_append = df_append.drop(columns=['test_date'])

# export_filename = os.path.join(credentials.path, credentials.filename).replace('.csv', f'_{latest_date}.csv')
export_filename = os.path.join(credentials.path, credentials.filename)
print(f'Exporting csv to {export_filename}')
df_append.to_csv(export_filename, index=False)

common.upload_ftp(export_filename, credentials.ftp_server,
                  credentials.ftp_user, credentials.ftp_pass,
                  'covid19bs/auto_generated')
print('Job successful!')

예제 #9

0

파일 보기

def main():
    # file_path = vmdl_extract.retrieve_vmdl_data()
    file_path = vmdl.file_path()

    print(f'Reading data into dataframe...')
    df = pd.read_csv(file_path, sep=';')
    # df['vacc_date_dt'] = pd.to_datetime(df.vacc_date, format='%Y-%m-%dT%H:%M:%S.%f%z')
    df['vacc_day'] = df.vacc_date.str.slice(stop=10)

    print(f'Executing calculations...')
    pysqldf = lambda q: sqldf(q, globals())

    print(
        f'Filter by BS and vacc_date, sum type 1 and 99, create "other" type, count persons...'
    )
    df_bs = sqldf(f'''
        select * 
        from df 
        where reporting_unit_location_ctn = "BS" and vacc_day < "{vmdl.today_string()}"'''
                  )

    df_bs_by = sqldf('''
        select vacc_day, vacc_count, 
        case reporting_unit_location_type 
            when 1  then "vacc_centre" 
            when 99 then "vacc_centre" 
            when 6  then "hosp" 
            else "other" 
            end as location_type, 
        count(person_anonymised_id) as count 
        from df_bs 
        group by vacc_day, vacc_count, location_type
        order by vacc_day asc;''')

    print(f'Create empty table of all combinations...')
    df_all_days = pd.DataFrame(data=pd.date_range(
        start=df_bs.vacc_day.min(), end=vmdl.yesterday_string()).astype(str),
                               columns=['vacc_day'])
    df_all_vacc_count = sqldf('select distinct vacc_count from df;')
    df_all_location_type = sqldf('select distinct location_type from df_bs_by')
    df_all_comb = sqldf(
        'select * from df_all_days cross join df_all_vacc_count cross join df_all_location_type;'
    )

    print(f'Adding days without vaccinations...')
    df_bs_by_all = df_all_comb.merge(
        df_bs_by, on=['vacc_day', 'vacc_count',
                      'location_type'], how='outer').fillna(0)

    print(f'Pivoting...')
    df_pivot_table = df_bs_by_all.pivot_table(
        values='count',
        index=['vacc_day'],
        columns=['location_type', 'vacc_count'],
        fill_value=0)
    # Replace the 2-level column names with a string that concatenates both strings
    df_pivot_table.columns = [
        "_".join(str(c) for c in col) for col in df_pivot_table.columns.values
    ]
    df_pivot = df_pivot_table.reset_index()

    print(f'Ensure columns exist...')
    for column_name in [
            'other_1',
            'other_2',
            'in_aph_verabreichte_impfungen_pro_tag',
            'im_aph_mit_erster_dosis_geimpfte_personen_pro_tag',
            'im_aph_mit_zweiter_dosis_geimpfte_personen_pro_tag',
    ]:
        if column_name not in df_pivot.columns:
            df_pivot[column_name] = 0

    print(f'Calculating columns...')
    df_pivot['hosp'] = df_pivot.hosp_1 + df_pivot.hosp_2
    df_pivot['vacc_centre'] = df_pivot.vacc_centre_1 + df_pivot.vacc_centre_2
    df_pivot['other'] = df_pivot.other_1 + df_pivot.other_2
    df_pivot[
        'vacc_count_1'] = df_pivot.hosp_1 + df_pivot.vacc_centre_1 + df_pivot.other_1
    df_pivot[
        'vacc_count_2'] = df_pivot.hosp_2 + df_pivot.vacc_centre_2 + df_pivot.other_2
    df_pivot['cum_1'] = df_pivot.vacc_count_1.cumsum()
    df_pivot['cum_2'] = df_pivot.vacc_count_2.cumsum()
    df_pivot['only_1'] = df_pivot.cum_1 - df_pivot.cum_2
    df_pivot['total'] = df_pivot.hosp + df_pivot.vacc_centre + df_pivot.other
    df_pivot['total_cum'] = df_pivot.total.cumsum()

    print(f'Renaming and restricting columns for export...')
    df_export = df_pivot.rename(
        columns={
            'vacc_day': 'datum',
            'hosp_1': 'im_spital_mit_erster_dosis_geimpfte_personen_pro_tag',
            'hosp_2': 'im_spital_mit_zweiter_dosis_geimpfte_personen_pro_tag',
            'vacc_centre_1':
            'im_impfzentrum_mit_erster_dosis_geimpfte_personen_pro_tag',
            'vacc_centre_2':
            'im_impfzentrum_mit_zweiter_dosis_geimpfte_personen_pro_tag',
            'other_1': 'anderswo_mit_erster_dosis_geimpfte_personen_pro_tag',
            'other_2': 'anderswo_mit_zweiter_dosis_geimpfte_personen_pro_tag',
            'hosp': 'im_spital_verabreichte_impfungen_pro_tag',
            'vacc_centre': 'im_impfzentrum_verabreichte_impfungen_pro_tag',
            'other': 'anderswo_verabreichte_impfungen_pro_tag',
            'vacc_count_1': 'total_mit_erster_dosis_geimpfte_personen_pro_tag',
            'vacc_count_2':
            'total_mit_zweiter_dosis_geimpfte_personen_pro_tag',
            'cum_1': 'total_personen_mit_erster_dosis',
            'cum_2': 'total_personen_mit_zweiter_dosis',
            'only_1': 'total_personen_mit_ausschliesslich_erster_dosis',
            'total': 'total_verabreichte_impfungen_pro_tag',
            'total_cum': 'total_verabreichte_impfungen',
        })

    df_export = df_export[[
        'datum',
        'total_verabreichte_impfungen',
        'total_personen_mit_erster_dosis',
        'total_personen_mit_ausschliesslich_erster_dosis',
        'total_personen_mit_zweiter_dosis',
        'im_impfzentrum_verabreichte_impfungen_pro_tag',
        'im_impfzentrum_mit_erster_dosis_geimpfte_personen_pro_tag',
        'im_impfzentrum_mit_zweiter_dosis_geimpfte_personen_pro_tag',
        'in_aph_verabreichte_impfungen_pro_tag',
        'im_aph_mit_erster_dosis_geimpfte_personen_pro_tag',
        'im_aph_mit_zweiter_dosis_geimpfte_personen_pro_tag',
        'im_spital_verabreichte_impfungen_pro_tag',
        'im_spital_mit_erster_dosis_geimpfte_personen_pro_tag',
        'im_spital_mit_zweiter_dosis_geimpfte_personen_pro_tag',
        'anderswo_verabreichte_impfungen_pro_tag',
        'anderswo_mit_erster_dosis_geimpfte_personen_pro_tag',
        'anderswo_mit_zweiter_dosis_geimpfte_personen_pro_tag',
        'total_verabreichte_impfungen_pro_tag',
    ]]

    export_file_name = os.path.join(credentials.vmdl_path,
                                    f'vaccination_report_bs.csv')
    print(f'Exporting resulting data to {export_file_name}...')
    df_export.to_csv(export_file_name, index=False)
    common.upload_ftp(export_file_name, credentials.ftp_server,
                      credentials.ftp_user, credentials.ftp_pass, 'bag/vmdl')
    print(f'Job successful!')

예제 #10

0

파일 보기

    header=None)

print(f'Merging data...')
merged_df = abfluss_df.merge(pegel_df,
                             on=['datum', 'zeit', 'intervall'],
                             how='outer')

print('Processing data...')
merged_df = merged_df.loc[merged_df.intervall == 5]
print(f'Fixing entries with zeit == 24:00...')
# Replacing 24:00 with 23:59
merged_df.loc[merged_df.zeit == '24:00', 'zeit'] = '23:59'
# Time is given in MEZ (UTC+1) thus use 'Etc/GMT-1' according to https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
# merged_df['timestamp'] = pd.to_datetime(merged_df.datum + ' ' + merged_df.zeit, format='%d.%m.%Y %H:%M').dt.tz_localize('Europe/Zurich')
merged_df['timestamp'] = pd.to_datetime(
    merged_df.datum + ' ' + merged_df.zeit,
    format='%d.%m.%Y %H:%M').dt.tz_localize('Etc/GMT-1')
# Adding a minute to entries with time 23:59 then replacing 23:59 with 24:00 again
merged_df.timestamp = np.where(merged_df.zeit != '23:59', merged_df.timestamp,
                               merged_df.timestamp + pd.Timedelta(minutes=1))
merged_df.zeit = np.where(merged_df.zeit == '23:59', '24:00', merged_df.zeit)
merged_filename = os.path.join(
    local_path,
    f'2289_pegel_abfluss_{datetime.today().strftime("%Y-%m-%dT%H-%M-%S")}.csv')
merged_df.to_csv(merged_filename, index=False)

common.upload_ftp(merged_filename, credentials.ftp_server,
                  credentials.ftp_user, credentials.ftp_pass,
                  credentials.ftp_remote_dir)
print('Job successful!')

예제 #11

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

                           rsuffix='wv',
                           lsuffix='points')
        print('Spatially joining points with Bezirk...')
        gdf_wv_bez = gpd.sjoin(gdf_wv,
                               df_bez,
                               how='left',
                               op="within",
                               rsuffix='bez',
                               lsuffix='points')
        print('Dropping unnecessary columns...')
        gdf_wv_bez.drop(columns=['index_wv', 'index_bez', 'wov_id_points'],
                        inplace=True)

        # todo: Find nearest Wohnviertel / Bezirk of points outside of those shapes (Rhein, Outside of BS territory)
        # e.g. see https://gis.stackexchange.com/a/342489

        timestamp = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
        file_path = os.path.join(credentials.path,
                                 f'{timestamp}_{credentials.filename}')
        print(f'Exporting data to {file_path}...')
        gdf_wv_bez.to_csv(file_path,
                          index=False,
                          date_format='%Y-%m-%dT%H:%M:%S%z')

        common.upload_ftp(file_path, credentials.ftp_server,
                          credentials.ftp_user, credentials.ftp_pass,
                          'tba/illegale-deponien')
        print('Job successful!')
else:
    raise Exception(f'HTTP error getting values from API: {r.status_code}')

예제 #12

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

pd.options.mode.chained_assignment = None  # Switch off warnings, see https://stackoverflow.com/a/53954986
# make sure we have a list present, otherwise return None, see https://stackoverflow.com/a/12709152/5005585
live_val['meta.rain.1h.val'] = live_df['meta.rain24h.vals'].apply(lambda x: x[23] if isinstance(x, list) else None)
live_val.to_csv(filename_val, index=False)

map_df = live_df[['name.original', 'name.custom', 'dates.min_date', 'dates.max_date', 'position.altitude', 'config.timezone_offset', 'position.geo.coordinates']]
print('Stations with name.custom of length 1 are not live yet, filter those out...')
# For some reason we have to filter > 2 here
# map_df['name.custom.len'] = map_df['name.custom'].str.len()
live_map = map_df.loc[map_df['name.custom'].str.len() > 2]

# let's better do this in ODS, it gets nasty here for some reason.
# print('Reversing coordinates for ods...')
# live_map['coords'] = df['position.geo.coordinates'].apply(lambda x: [x[1], x[0]])
filename_stations_map = f'{credentials.path}csv/map/stations.csv'
print(f'Saving minimized table of station data for map creation to {filename_stations_map}')
live_map.to_csv(filename_stations_map, index=False)

# print("Retrieving last hour's data from all live stations from API...")
# for station in df['name.original']:
#     # get last data point from each station. See https://api.fieldclimate.com/v1/docs/#info-understanding-your-device
#     (pretty_resp, station_df) = call_fieldclimate_api('/data/normal/' + station + '/hourly/last/1h',
#                                                       publicKey, privateKey, f'station--{station}--{datetime.now()}')


common.upload_ftp(filename_stations_map, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, 'map')
common.ensure_ftp_dir(credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'val/{folder}')
common.upload_ftp(filename_val, credentials.ftp_server, credentials.ftp_user, credentials.ftp_pass, f'val/{folder}')

print('Job successful!')

예제 #13

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

import common
import os
from ods_catalog import credentials
url = 'https://data.bs.ch/explore/dataset/100055/download/?format=csv&use_labels_for_header=true&refine.visibility=domain&refine.publishing_published=True'
file = os.path.join(credentials.path, credentials.filename)
print(f'Downloading {file} from {url}...')
r = common.requests_get(url, auth=(credentials.ods_user, credentials.ods_password))
open(file, 'wb').write(r.content)
common.upload_ftp(filename=file, server=credentials.ftp_server, user=credentials.ftp_user, password=credentials.ftp_pass, remote_path=credentials.ftp_path)

예제 #14

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

        except KeyError as e:
            print(
                f'No file found with keys {(station, date_string)}, ignoring...'
            )

all_data = all_data[[
    'station_id', 'timestamp', 'Value', 'Latitude', 'Longitude', 'EUI',
    'LocalDateTime'
]]
today_data_file = os.path.join(credentials.path, f'schall_aktuell.csv')
print(f"Exporting yesterday's and today's data to {today_data_file}...")
all_data.to_csv(today_data_file, index=False)

# todo: Simplify code by pushing yesterday's and today's data to ODS in one batch (as in lufthygiene_pm25)

print('Creating stations file from current data file...')
df_stations = all_data.drop_duplicates(
    ['EUI'])[['station_id', 'Latitude', 'Longitude', 'EUI']]
stations_file = os.path.join(credentials.path, 'stations/stations.csv')
print(f'Exporting stations file to {stations_file}...')
df_stations.to_csv(stations_file, index=False)

common.upload_ftp(stations_file, credentials.ftp_server, credentials.ftp_user,
                  credentials.ftp_pass,
                  f'{credentials.ftp_remote_path_stations}')
common.upload_ftp(today_data_file, credentials.ftp_server,
                  credentials.ftp_user, credentials.ftp_pass,
                  credentials.ftp_remote_path_vals)

print('Job successful!')

예제 #15

0

파일 보기

import common
import os
from tba_abfuhrtermine import credentials

common.upload_ftp(os.path.join(credentials.path, credentials.filename),
                  credentials.ftp_server, credentials.ftp_user,
                  credentials.ftp_pass, 'tba/abfuhrtermine')
print('Job successful!')

예제 #16

0

파일 보기

파일: etl_test.py 프로젝트: opendatabs/data-processing

df_bs = df_bs.rename(
    columns={
        'entries_neg': 'negative_tests',
        'entries_pos': 'positive_tests',
        'entries': 'total_tests'
    })
print(f'Calculating columns...')
df_bs['dayofweek'] = pd.to_datetime(df_bs['datum']).dt.dayofweek + 1
df_bs['weekday_nr'] = pd.to_datetime(df_bs['datum']).dt.dayofweek
df_bs['woche'] = pd.to_datetime(df_bs['datum']).dt.isocalendar().week

export_file_name = os.path.join(credentials.path, credentials.file_name)
print(f'Exporting to file {export_file_name}...')
df_bs.to_csv(export_file_name, index=False)
common.upload_ftp(export_file_name, credentials.ftp_server,
                  credentials.ftp_user, credentials.ftp_pass,
                  'bag_coronavirus_tests')

pcr_antigen_path = os.path.join(credentials.path, 'covid19_testPcrAntigen.csv')
print(f'Reading pcr/antigen csv from {pcr_antigen_path} into data frame...')
df_pcr_antigen = pd.read_csv(pcr_antigen_path)
df_type = df_pcr_antigen[[
    'datum', 'entries', 'entries_neg', 'entries_pos', 'nachweismethode',
    'geoRegion'
]]
df_type_bs = df_type.query("geoRegion == 'BS'").copy(deep=False)
df_type_bs['positivity_rate'] = df_type_bs.entries_pos / df_type_bs.entries
df_type_bs['positivity_rate_percent'] = df_type_bs.positivity_rate * 100
df_pivot = df_type_bs.pivot_table(index=['datum', 'geoRegion'],
                                  columns=['nachweismethode'],
                                  values=[

예제 #17

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

        'dest_dir': 'veranstaltungen'
    },
    {
        'file': 'Bevoelkerung/01bevoelkerung_monat_nach_bezirk.csv',
        'dest_dir': 'bevoelkerung'
    },
    {
        'file': 'Bevoelkerung/02bevoelkerung_jahr_nach_CH_A_geschlecht.csv',
        'dest_dir': 'bevoelkerung'
    },
    {
        'file': 'Bevoelkerung/03bevoelkerung_jahr_nach_heimat_geschlecht.csv',
        'dest_dir': 'bevoelkerung'
    },
    {
        'file': 'Bevoelkerung/04bevoelkerung_jahr_nach_vorname.csv',
        'dest_dir': 'bevoelkerung'
    },
    {
        'file': 'Bevoelkerung/05bevoelkerung_jahr_nach_nachname.csv',
        'dest_dir': 'bevoelkerung'
    },
]

for upload in uploads:
    common.upload_ftp(os.path.join(credentials.path_work, upload['file']),
                      credentials.ftp_server, credentials.ftp_user,
                      credentials.ftp_pass, upload['dest_dir'])

print('Job successful!')

예제 #18

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing


print(f'Connecting to DB...')
con = pg.connect(credentials.pg_connection)
print(f'Reading data into dataframe...')
df = psql.read_sql('SELECT *, ST_AsGeoJSON(the_geom) as the_geom_json, ST_AsEWKT(the_geom) as the_geom_EWKT, ST_AsText(the_geom) as the_geom_WKT FROM projekte.geschwindigkeitsmonitoring', con)
con.close()

df_metadata = df[['ID', 'the_geom', 'Strasse', 'Strasse_Nr', 'Ort', 'Zone',
       'Richtung_1', 'Fzg_1', 'V50_1', 'V85_1', 'Ue_Quote_1',
       'Richtung_2', 'Fzg_2', 'V50_2', 'V85_2', 'Ue_Quote_2', 'Messbeginn', 'Messende'
      ]]
metadata_filename = os.path.join(credentials.path, credentials.filename.replace('.csv', '_metadata.csv'))
print(f'Exporting data to {metadata_filename}...')
df_metadata.to_csv(metadata_filename, index=False)
common.upload_ftp(filename=metadata_filename, server=credentials.ftp_server, user=credentials.ftp_user, password=credentials.ftp_pass, remote_path=credentials.ftp_remote_path_metadata)

print(f'Creating dataframe with one row per Messung-ID and Richtung-ID...')
# Manual stacking of the columns for Richtung 1 and 2
df_richtung1 = df_metadata[['ID', 'Richtung_1', 'Fzg_1', 'V50_1', 'V85_1', 'Ue_Quote_1']]
df_richtung1 = df_richtung1.rename(columns={'ID': 'Messung-ID', 'Richtung_1': 'Richtung', 'Fzg_1': 'Fzg', 'V50_1': 'V50', 'V85_1': 'V85', 'Ue_Quote_1': 'Ue_Quote'})
df_richtung1['Richtung ID'] = 1
df_richtung2 = df_metadata[['ID', 'Richtung_2', 'Fzg_2', 'V50_2', 'V85_2', 'Ue_Quote_2']]
df_richtung2 = df_richtung2.rename(columns={'ID': 'Messung-ID', 'Richtung_2': 'Richtung', 'Fzg_2': 'Fzg', 'V50_2': 'V50', 'V85_2': 'V85', 'Ue_Quote_2': 'Ue_Quote'})
df_richtung2['Richtung ID'] = 2
df_richtung = df_richtung1.append(df_richtung2)
df_richtung = df_richtung.sort_values(by=['Messung-ID', 'Richtung ID'])
# Changing column order
df_richtung = df_richtung[['Messung-ID', 'Richtung ID', 'Richtung', 'Fzg', 'V50', 'V85', 'Ue_Quote']]
richtung_filename = os.path.join(credentials.path, credentials.filename.replace('.csv', '_richtung.csv'))
print(f'Exporting richtung data to {richtung_filename}...')

예제 #19

0

파일 보기

    'Iterating over each canton, sorting, adding missing dates, then filling the value gaps using ffill()...'
)
cantons = df.abbreviation_canton_and_fl.unique()
df_filled = pd.DataFrame(columns=df.columns)
for canton in cantons:
    print(f'Working through canton {canton}...')
    df_canton = df[df.abbreviation_canton_and_fl == canton].sort_values(
        by='date')
    df_canton_filled = df_canton.set_index('date').reindex(
        date_range).ffill().reset_index().rename(columns={'index': 'date'})

    print('Getting rid of rows with empty date...')
    df_canton_filled.dropna(subset=['abbreviation_canton_and_fl'],
                            inplace=True)

    print('Calculating differences between rows in new columns...')
    df_canton_diff = df_canton_filled.drop(
        columns=['abbreviation_canton_and_fl']).diff()
    df_canton_filled['ndiff_conf'] = df_canton_diff.ncumul_conf
    df_canton_filled['ndiff_released'] = df_canton_diff.ncumul_released
    df_canton_filled['ndiff_deceased'] = df_canton_diff.ncumul_deceased

    df_filled = df_filled.append(df_canton_filled, ignore_index=True)

filename = os.path.join(credentials.path, credentials.filename)
print(f'Exporting data to {filename}')
df_filled.to_csv(filename, index=False)

common.upload_ftp(filename, credentials.ftp_server, credentials.ftp_user,
                  credentials.ftp_pass, 'covid19dashboard')
print('Job successful!')

예제 #20

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

# df_fret.to_csv('C:/dev/workspace/data-processing/euroairport/data/fret.csv', index=False)
# df_mvt.to_csv('C:/dev/workspace/data-processing/euroairport/data/mvt.csv', index=False)

print('Merging data frames into one again...')
df_merged1 = pd.merge(df_pax, df_fret, on=['date', 'Kategorie'], how='outer')
df_merged = pd.merge(df_merged1, df_mvt, on=['date', 'Kategorie'], how='outer')

print('Sorting...')
df_sort = df_merged.sort_values(by=['date', 'Kategorie'], ascending=False)

print('Replacing french with german words in Kategorie...')
df_german = df_sort.replace({
    'Kategorie': {
        'PAX': 'Passagierverkehr',
        'FRET_EXPRESS': 'Fracht Express',
        'FRET_CARGO': 'Fracht Cargo',
        'AUTRES': 'Andere Kategorien'
    }
})

print('Removing Totals...')
df_nototal = df_german[df_german.Kategorie != "Total"]

export_file_name = os.path.join(credentials.path, credentials.data_export)
print(f'Exporting to {export_file_name}...')
df_nototal.to_csv(export_file_name, index=False)

common.upload_ftp(export_file_name, credentials.ftp_server,
                  credentials.ftp_user, credentials.ftp_pass, '')
print('Job successful!')

예제 #21

0

파일 보기

파일: etl.py 프로젝트: opendatabs/data-processing

                # Include all files with shpfile's name
                files_to_zip = glob.glob(
                    os.path.join(path, shpfilename_noext + '.*'))
                for file_to_zip in files_to_zip:
                    # Do not add the zip file into the zip file...
                    if not file_to_zip.endswith('.zip'):
                        # todo: uncomment to create zip files
                        zipf.write(file_to_zip, os.path.split(file_to_zip)[1])
                        pass
                zipf.close()

                # Upload zip file to ftp server
                ftp_remote_dir = 'harvesters/GVA/data'
                if not no_file_copy:
                    common.upload_ftp(zipfilepath, credentials.ftp_server,
                                      credentials.ftp_user,
                                      credentials.ftp_pass, ftp_remote_dir)

                # Load metadata from geocat.ch
                # See documentation at https://www.geocat.admin.ch/de/dokumentation/csw.html
                # For unknown reasons (probably proxy-related), requests always returns http error 404, so we have to revert to launching curl in a subprocess
                # curl -X GET "https://www.geocat.ch/geonetwork/srv/api/0.1/records/289b9c0c-a1bb-4ffc-ba09-c1e41dc7138a" -H "accept: application/json" -H "Accept: application/xml" -H "X-XSRF-TOKEN: a1284e46-b378-42a4-ac6a-d48069e05494"
                # resp = requests.get('https://www.geocat.ch/geonetwork/srv/api/0.1/records/2899c0c-a1bb-4ffc-ba09-c1e41dc7138a', params={'accept': 'application/json'}, proxies={'https': credentials.proxy})
                # resp = requests.get('https://www.geocat.ch/geonetwork/srv/api/0.1/records/2899c0c-a1bb-4ffc-ba09-c1e41dc7138a', headers={'accept': 'application/xml, application/json'}, proxies={'https': credentials.proxy})
                # cmd = 'curl -X GET "https://www.geocat.ch/geonetwork/srv/api/0.1/records/289b9c0c-a1bb-4ffc-ba09-c1e41dc7138a" -H "accept: application/json" -H "accept: application/json" -k'
                # args = shlex.split(cmd)

                # In some geocat URLs there's a tab character, remove it.
                geocat_uid = row['geocat'].rsplit('/', 1)[-1].replace('\t', '')
                geocat_url = f'https://www.geocat.ch/geonetwork/srv/api/0.1/records/{geocat_uid}'
                print(f'Getting metadata from {geocat_url}...')