コード例 #1
0
def computeMunicipalityCases(update_time):
    filename = 'csv/municipality-cases.csv'
    print("Processing", filename)
    old_hash = sha1sum(filename)
    dfConfirmed = pd.read_csv('csv/municipality-confirmed.csv',
                              index_col='date')
    dfActive = pd.read_csv('csv/municipality-active.csv', index_col='date')
    dfDeceased = pd.read_csv('csv/municipality-deceased.csv', index_col='date')
    dfVaccinated = pd.read_csv('csv/vaccination-by_municipality.csv',
                               index_col='date')
    dfConfirmed.columns = [
        str(col) + '.cases.confirmed.todate' for col in dfConfirmed.columns
    ]
    dfActive.columns = [str(col) + '.cases.active' for col in dfActive.columns]
    dfDeceased.columns = [
        str(col) + '.deceased.todate' for col in dfDeceased.columns
    ]
    dfVaccinated = dfVaccinated.filter(like='date', axis='columns') \
                    .rename(mapper=lambda x: x.replace('vaccination.region', 'region'), axis='columns') \
                    .rename(mapper=lambda x: x.replace('1st.todate', 'vaccinated.1st.todate'), axis='columns') \
                    .rename(mapper=lambda x: x.replace('2nd.todate', 'vaccinated.2nd.todate'), axis='columns')
    merged = dfConfirmed.join(dfActive).join(dfDeceased).join(
        dfVaccinated).sort_index(axis=1)
    merged.to_csv(filename, float_format='%.0f', index_label='date')
    write_timestamp_file(filename=filename, old_hash=old_hash)
コード例 #2
0
def computeVaccination(update_time):
    filename = 'csv/vaccination.csv'
    print("Processing", filename)
    old_hash = sha1sum(filename)

    df_a = pd.read_csv('csv/vaccination-administered.csv', index_col='date')

    df_d = pd.read_csv('csv/vaccination-delivered.csv', index_col='date')

    merged = df_a.join(df_d, how='outer')
    merged['vaccination.pfizer.delivered.todate'] = \
        merged['vaccination.pfizer.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    merged['vaccination.moderna.delivered.todate'] = \
        merged['vaccination.moderna.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    merged['vaccination.az.delivered.todate'] = \
        merged['vaccination.az.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    merged['vaccination.delivered.todate'] = merged['vaccination.pfizer.delivered.todate'] \
        .add(merged['vaccination.moderna.delivered.todate'], fill_value=0) \
        .add(merged['vaccination.az.delivered.todate'], fill_value=0).astype('Int64')

    merged = merged.reindex(
        [  # sort
            'vaccination.administered', 'vaccination.administered.todate',
            'vaccination.administered2nd',
            'vaccination.administered2nd.todate', 'vaccination.used.todate',
            'vaccination.delivered.todate', 'vaccination.pfizer.delivered',
            'vaccination.pfizer.delivered.todate',
            'vaccination.moderna.delivered',
            'vaccination.moderna.delivered.todate', 'vaccination.az.delivered',
            'vaccination.az.delivered.todate'
        ],
        axis='columns')
    merged.to_csv(filename, float_format='%.0f', line_terminator='\r\n')
    write_timestamp_file(filename=filename, old_hash=old_hash)
コード例 #3
0
ファイル: update_vaccination.py プロジェクト: sledilnik/data
def import_nijz_dash_vacc_delivered():
    filename = "csv/vaccination-delivered.csv"

    df = pd.DataFrame.from_dict(
        cepimose.vaccines_supplied_by_manufacturer()).set_index('date').rename(
            columns=lambda m: f'vaccination.{m}.delivered')

    manufacturersMap = {
        "pfizer": cepimose.data.Manufacturer.PFIZER,
        "moderna": cepimose.data.Manufacturer.MODERNA,
        "az": cepimose.data.Manufacturer.AZ,
        "janssen": cepimose.data.Manufacturer.JANSSEN,
        "novavax": cepimose.data.Manufacturer.NOVAVAX,
    }

    columns = []
    for m in manufacturersMap:
        columns.append(f'vaccination.{m}.delivered')

    # sort columns
    df = df[columns]

    df = adjust_vacc_delivered(df)

    # write csv
    old_hash = sha1sum(filename)
    # force integer type
    df.fillna(0).round().astype('Int64').replace({
        0: None
    }).dropna(thresh=1).to_csv(filename,
                               date_format="%Y-%m-%d",
                               line_terminator='\r\n')
    write_timestamp_file(filename, old_hash)
コード例 #4
0
def import_sheet(update_time, sheet, range, filename, **kwargs):
    print("Processing", filename)
    pathlib.Path(os.path.dirname(filename)).mkdir(parents=True, exist_ok=True)
    old_hash = sha1sum(filename)
    try:
        sheet2csv.sheet2csv(id=sheet, range=range, api_key=GOOGLE_API_KEY, filename=filename, **kwargs)
    except Exception as e:
        print("Failed to import {}".format(filename))
        raise e
    write_timestamp_file(filename=filename, old_hash=old_hash)
コード例 #5
0
ファイル: update_vaccination.py プロジェクト: sledilnik/data
def import_nijz_dash_vacc_by_age():
    filename = "csv/vaccination-by_age.csv"

    # map cepimose regions to sledilnik regions, preserving previous order
    ageGroups = {
        cepimose.data.AgeGroup.GROUP_0_11: "0-11",
        cepimose.data.AgeGroup.GROUP_12_17: "12-17",
        cepimose.data.AgeGroup.GROUP_18_24: "18-24",
        cepimose.data.AgeGroup.GROUP_25_29: "25-29",
        cepimose.data.AgeGroup.GROUP_30_34: "30-34",
        cepimose.data.AgeGroup.GROUP_35_39: "35-39",
        cepimose.data.AgeGroup.GROUP_40_44: "40-44",
        cepimose.data.AgeGroup.GROUP_45_49: "45-49",
        cepimose.data.AgeGroup.GROUP_50_54: "50-54",
        cepimose.data.AgeGroup.GROUP_55_59: "55-59",
        cepimose.data.AgeGroup.GROUP_60_64: "60-64",
        cepimose.data.AgeGroup.GROUP_65_69: "65-69",
        cepimose.data.AgeGroup.GROUP_70_74: "70-74",
        cepimose.data.AgeGroup.GROUP_75_79: "75-79",
        cepimose.data.AgeGroup.GROUP_80_84: "80-84",
        cepimose.data.AgeGroup.GROUP_85_89: "85-89",
        cepimose.data.AgeGroup.GROUP_90: "90+"
    }

    df = pd.DataFrame()
    vByAgeGroups = cepimose.vaccinations_by_age_group()
    for ag in vByAgeGroups:
        print(f"Joining {ageGroups[ag]} ({ag}): {len(vByAgeGroups[ag])} rows:")
        agData = pd.DataFrame.from_dict(vByAgeGroups[ag]).set_index('date')
        # agData["first_diff"] = agData["first_dose"].diff()
        # agData["second_diff"] = agData["second_dose"].diff()
        # agData = agData[['first_diff', 'first_dose', 'second_diff', 'second_dose']]
        agData.rename(
            inplace=True,
            columns={
                # 'first_diff': f'vaccination.age.{ageGroups[ag]}.1st',
                'first_dose': f'vaccination.age.{ageGroups[ag]}.1st.todate',
                # 'second_diff': f'vaccination.region.{ageGroups[ag]}.2nd',
                'second_dose': f'vaccination.age.{ageGroups[ag]}.2nd.todate',
                # 'third_diff': f'vaccination.region.{ageGroups[ag]}.3rd',
                'third_dose': f'vaccination.age.{ageGroups[ag]}.3rd.todate',
            })
        print(agData)
        print(agData.describe())
        df = df.join(agData, how='outer')

    print(df)
    print(df.describe())

    old_hash = sha1sum(filename)
    df.astype('Int64').replace({
        0: None
    }).to_csv(filename, date_format='%Y-%m-%d')
    write_timestamp_file(filename, old_hash)
コード例 #6
0
def computeMunicipalityCases(update_time):
    filename = 'csv/municipality-cases.csv'
    print("Processing", filename)
    old_hash = sha1sum(filename)
    dfConfirmed = pd.read_csv('csv/municipality-confirmed.csv', index_col='date')
    dfActive = pd.read_csv('csv/municipality-active.csv', index_col='date')
    dfDeceased = pd.read_csv('csv/municipality-deceased.csv', index_col='date')
    dfConfirmed.columns = [str(col) + '.cases.confirmed.todate' for col in dfConfirmed.columns]
    dfActive.columns = [str(col) + '.cases.active' for col in dfActive.columns]
    dfDeceased.columns = [str(col) + '.deceased.todate' for col in dfDeceased.columns]
    merged = dfConfirmed.join(dfActive).join(dfDeceased).sort_index(axis=1)
    merged.to_csv(filename, float_format='%.0f', index_label='date')
    write_timestamp_file(filename=filename, old_hash=old_hash)
コード例 #7
0
ファイル: update_vaccination.py プロジェクト: sledilnik/data
def import_nijz_dash_vacc_administred():
    filename = "csv/vaccination-administered.csv"

    df = pd.DataFrame.from_dict(
        cepimose.vaccinations_by_day()).set_index('date').rename(
            columns={
                'first_dose': 'vaccination.administered.todate',
                'second_dose': 'vaccination.administered2nd.todate',
                'third_dose': 'vaccination.administered3rd.todate'
            })

    # dummy row for diff calculation remowed afterwards
    dummy_date = datetime.datetime(2020, 12, 26)
    dummy_row = pd.DataFrame(
        {
            'vaccination.administered.todate': 0,
            'vaccination.administered2nd.todate': 0,
            'vaccination.administered3rd.todate': 0
        },
        index=[dummy_date])

    # calculate diffs from cumulative values (vaccinations per day)
    df_diff = pd.concat([
        dummy_row, df
    ]).diff().drop(labels=[dummy_date]).rename(
        columns={
            'vaccination.administered.todate': 'vaccination.administered',
            'vaccination.administered2nd.todate':
            'vaccination.administered2nd',
            'vaccination.administered3rd.todate': 'vaccination.administered3rd'
        }).astype('Int64')

    # merge dataframes (cumulative and per day)
    df = pd.merge(df, df_diff, right_index=True, left_index=True)

    # sort cols
    df = df[[
        'vaccination.administered', 'vaccination.administered.todate',
        'vaccination.administered2nd', 'vaccination.administered2nd.todate',
        'vaccination.administered3rd', 'vaccination.administered3rd.todate'
    ]]
    df = df.astype('Int64')

    # write csv
    old_hash = sha1sum(filename)
    # replace 0 with pd.NA so it does not get written to CSV
    df.replace(0, pd.NA).to_csv(filename, date_format='%Y-%m-%d')
    write_timestamp_file(filename, old_hash)
コード例 #8
0
def computeRegionCases(update_time):
    filename = 'csv/region-cases.csv'
    print("Processing", filename)
    old_hash = sha1sum(filename)
    dfConfirmed = pd.read_csv('csv/region-confirmed.csv', index_col='date')
    dfActive = pd.read_csv('csv/region-active.csv', index_col='date')
    dfDeceased = pd.read_csv('csv/region-deceased.csv', index_col='date')
    dfConfirmed = dfConfirmed.rename(mapper=lambda x: x.replace('todate', 'cases.confirmed.todate'), axis='columns') \
                    .drop('region.cases.confirmed.todate', axis='columns') 
    dfActive = dfActive.rename(mapper=lambda x: x.replace('active', 'cases.active'), axis='columns') \
                    .drop('region.cases.active', axis='columns') 
    dfDeceased = dfDeceased.rename(mapper=lambda x: x.replace('todate', 'deceased.todate'), axis='columns') \
                    .drop('region.deceased.todate', axis='columns') 
    merged = dfConfirmed.join(dfActive).join(dfDeceased).sort_index(axis=1)
    merged.to_csv(filename, float_format='%.0f', index_label='date')
    write_timestamp_file(filename=filename, old_hash=old_hash)
コード例 #9
0
def computeCases(update_time):
    filename = 'csv/cases.csv'
    print("Processing", filename)

    # LAB (9:00): cases.confirmed, cases.confirmed.todate, cases.active, cases.closed
    df_cases = pd.read_csv(filename, index_col='date')
    df_cases_old_hash = sha1sum(filename)
    df_lab_tests = pd.read_csv('csv/lab-tests.csv', index_col='date').replace({None: 0})
    date_diff = df_lab_tests.index.difference(df_cases.index)
    date_diff = [date for date in date_diff if date not in {  # discard irrelevant early days
        '2020-02-02', '2020-02-09', '2020-02-16', '2020-02-23', '2020-02-24', '2020-02-25', '2020-02-26',
        '2020-02-27', '2020-02-28', '2020-02-29', '2020-03-01', '2020-03-02', '2020-03-03'
    }]
    assert len(date_diff) <= 1, 'The date difference between lab-tests.csv and cases.csv is more than one day.'
    if len(date_diff) > 0:
        df_cases = df_cases.append(pd.DataFrame(index=date_diff, columns=df_cases.columns))
        date = date_diff[0]  # equals index of -1
        # only manipulate last row
        df_cases.at[date, 'cases.confirmed'] = df_lab_tests.at[date, 'tests.positive'] + df_lab_tests.at[date, 'tests.hagt.positive']
        df_cases.at[date, 'cases.confirmed.todate'] = df_cases.iloc[-2, df_cases.columns.get_loc('cases.confirmed.todate')] + df_cases.at[date, 'cases.confirmed']

        df_cases['cases.active.temp'] = df_cases['cases.confirmed'].rolling(window=14).sum()
        df_cases.at[date, 'cases.active'] = df_cases.at[date, 'cases.active.temp']
        df_cases.drop('cases.active.temp', axis='columns', inplace=True)

        df_cases.at[date, 'cases.closed.todate'] = df_cases.at[date, 'cases.confirmed.todate'] - df_cases.at[date, 'cases.active']

        # TODO use common function for writing CSV
        df_cases.index.rename('date', inplace=True)  # name it explicitly otherwise it doesn't show up in csv
        df_cases.replace({0: None}).astype('Int64').to_csv(filename, line_terminator='\r\n')
        write_timestamp_file(filename=filename, old_hash=df_cases_old_hash)

    # HOS (10:30): cases.recovered.todate
    df_patients = pd.read_csv('csv/patients.csv', index_col='date')
    df_cases['cases.recovered.todate'] = df_cases['cases.closed.todate'] - df_patients['state.deceased.todate'].shift(-1)
    df_cases = df_cases.reindex([
        'cases.confirmed', 'cases.confirmed.todate', 'cases.active', 'cases.closed.todate', 'cases.recovered.todate',
        'cases.rh.occupant.confirmed.todate', 'cases.hs.employee.confirmed.todate', 'cases.rh.employee.confirmed.todate'
    ], axis='columns')

    df_cases.replace({0: None}).astype('Int64').to_csv(filename, line_terminator='\r\n')
    write_timestamp_file(filename=filename, old_hash=df_cases_old_hash)
コード例 #10
0
def import_nijz_dash_vacc_delivered():
    filename = "csv/vaccination-delivered.csv"

    df = pd.DataFrame.from_dict(
        cepimose.vaccines_supplied_by_manufacturer()).set_index('date').rename(
            columns=lambda m: f'vaccination.{m}.delivered')

    manufacturersMap = {
        "pfizer": cepimose.data.Manufacturer.PFIZER,
        "moderna": cepimose.data.Manufacturer.MODERNA,
        "az": cepimose.data.Manufacturer.AZ,
        "janssen": cepimose.data.Manufacturer.JANSSEN,
    }

    # add more columns
    manufacturers_supplied_used = cepimose.vaccinations_by_manufacturer_supplied_used(
    )
    columns = []
    for m in manufacturersMap:
        supplied_used = manufacturers_supplied_used[manufacturersMap[m]]
        df_supplied_used = pd.DataFrame.from_dict(supplied_used).rename(
            columns={
                'supplied': f'vaccination.{m}.delivered.todate',
                'used': f'vaccination.{m}.used.todate',
            }).set_index('date')

        df = df.join(df_supplied_used)
        columns.append(f'vaccination.{m}.delivered')
        # columns.append(f'vaccination.{m}.delivered.todate')
        columns.append(f'vaccination.{m}.used.todate')

    # # sort columns
    df = df[columns]

    # write csv
    old_hash = sha1sum(filename)
    # force integer type
    df.fillna(0).round().astype('Int64').replace({
        0: None
    }).to_csv(filename, date_format="%Y-%m-%d", line_terminator='\r\n')
    write_timestamp_file(filename, old_hash)
コード例 #11
0
ファイル: update_vaccination.py プロジェクト: sledilnik/data
def computeVaccination(update_time):
    filename = 'csv/vaccination.csv'
    print("Processing", filename)
    old_hash = sha1sum(filename)

    df_a = pd.read_csv('csv/vaccination-administered.csv', index_col='date')

    df_d = pd.read_csv('csv/vaccination-delivered.csv', index_col='date')

    df_m = pd.read_csv('csv/vaccination-used_by_manufacturer.csv',
                       index_col='date')

    df_g = pd.read_csv('csv/vaccination-by_age.csv', index_col='date')

    merged = df_a.join(df_m, how='outer').join(df_d,
                                               how='outer').join(df_g,
                                                                 how='outer')
    merged['vaccination.pfizer.delivered.todate'] = \
        merged['vaccination.pfizer.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    merged['vaccination.moderna.delivered.todate'] = \
        merged['vaccination.moderna.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    merged['vaccination.az.delivered.todate'] = \
        merged['vaccination.az.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    merged['vaccination.janssen.delivered.todate'] = \
        merged['vaccination.janssen.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    merged['vaccination.novavax.delivered.todate'] = \
        merged['vaccination.novavax.delivered'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    merged['vaccination.delivered.todate'] = merged['vaccination.pfizer.delivered.todate'] \
        .add(merged['vaccination.moderna.delivered.todate'], fill_value=0) \
        .add(merged['vaccination.az.delivered.todate'], fill_value=0) \
        .add(merged['vaccination.janssen.delivered.todate'], fill_value=0).astype('Int64') \
        .add(merged['vaccination.novavax.delivered.todate'], fill_value=0).astype('Int64')

    merged = merged.reindex(
        [  # sort
            'vaccination.administered', 'vaccination.administered.todate',
            'vaccination.administered2nd',
            'vaccination.administered2nd.todate',
            'vaccination.administered3rd',
            'vaccination.administered3rd.todate', 'vaccination.used.todate',
            'vaccination.pfizer.used.todate',
            'vaccination.moderna.used.todate', 'vaccination.az.used.todate',
            'vaccination.janssen.used.todate',
            'vaccination.novavax.used.todate', 'vaccination.delivered.todate',
            'vaccination.pfizer.delivered',
            'vaccination.pfizer.delivered.todate',
            'vaccination.moderna.delivered',
            'vaccination.moderna.delivered.todate', 'vaccination.az.delivered',
            'vaccination.az.delivered.todate', 'vaccination.janssen.delivered',
            'vaccination.janssen.delivered.todate',
            'vaccination.novavax.delivered',
            'vaccination.novavax.delivered.todate',
            'vaccination.age.0-11.1st.todate',
            'vaccination.age.0-11.2nd.todate',
            'vaccination.age.0-11.3rd.todate',
            'vaccination.age.12-17.1st.todate',
            'vaccination.age.12-17.2nd.todate',
            'vaccination.age.12-17.3rd.todate',
            'vaccination.age.18-24.1st.todate',
            'vaccination.age.18-24.2nd.todate',
            'vaccination.age.18-24.3rd.todate',
            'vaccination.age.25-29.1st.todate',
            'vaccination.age.25-29.2nd.todate',
            'vaccination.age.25-29.3rd.todate',
            'vaccination.age.30-34.1st.todate',
            'vaccination.age.30-34.2nd.todate',
            'vaccination.age.30-34.3rd.todate',
            'vaccination.age.35-39.1st.todate',
            'vaccination.age.35-39.2nd.todate',
            'vaccination.age.35-39.3rd.todate',
            'vaccination.age.40-44.1st.todate',
            'vaccination.age.40-44.2nd.todate',
            'vaccination.age.40-44.3rd.todate',
            'vaccination.age.45-49.1st.todate',
            'vaccination.age.45-49.2nd.todate',
            'vaccination.age.45-49.3rd.todate',
            'vaccination.age.50-54.1st.todate',
            'vaccination.age.50-54.2nd.todate',
            'vaccination.age.50-54.3rd.todate',
            'vaccination.age.55-59.1st.todate',
            'vaccination.age.55-59.2nd.todate',
            'vaccination.age.55-59.3rd.todate',
            'vaccination.age.60-64.1st.todate',
            'vaccination.age.60-64.2nd.todate',
            'vaccination.age.60-64.3rd.todate',
            'vaccination.age.65-69.1st.todate',
            'vaccination.age.65-69.2nd.todate',
            'vaccination.age.65-69.3rd.todate',
            'vaccination.age.70-74.1st.todate',
            'vaccination.age.70-74.2nd.todate',
            'vaccination.age.70-74.3rd.todate',
            'vaccination.age.75-79.1st.todate',
            'vaccination.age.75-79.2nd.todate',
            'vaccination.age.75-79.3rd.todate',
            'vaccination.age.80-84.1st.todate',
            'vaccination.age.80-84.2nd.todate',
            'vaccination.age.80-84.3rd.todate',
            'vaccination.age.85-89.1st.todate',
            'vaccination.age.85-89.2nd.todate',
            'vaccination.age.85-89.3rd.todate',
            'vaccination.age.90+.1st.todate', 'vaccination.age.90+.2nd.todate',
            'vaccination.age.90+.3rd.todate'
        ],
        axis='columns')
    merged.to_csv(filename, float_format='%.0f', line_terminator='\r\n')
    write_timestamp_file(filename=filename, old_hash=old_hash)
コード例 #12
0
def import_nijz_dash_labtests():
    filenameByDay = "csv/lab-tests.csv"
    print("Processing", filenameByDay)

    df_existing = pd.read_csv(filenameByDay,
                              index_col='date',
                              parse_dates=['date'])
    print(df_existing)
    d = cepimose.lab_end_timestamp()

    print(f"Adding/updating lab test data for {d.date()}")

    # copy last row structure, with None values:
    day_data = dict.fromkeys(df_existing.tail(1), None)
    day_data['tests.regular.performed'] = cepimose.lab_PCR_tests_performed()
    # PCR+HAT day_data['tests.regular.positive'] = cepimose.lab_cases_confirmed()
    day_data['tests.hagt.performed'] = cepimose.lab_HAT_tests_performed()

    day_data['tests.performed'] = day_data['tests.regular.performed']
    # PCR+HAT day_data['tests.positive'] = day_data['tests.regular.positive']

    df_day_data = pd.DataFrame([day_data], index=[d])
    df_day_data.index.name = 'date'
    print(df_day_data)

    df_updated = df_day_data.combine_first(df_existing).fillna(
        0).round().replace({
            0: None
        }).astype('Int64')

    #recalculate .todate fields
    df_updated['tests.regular.performed.todate'] = \
        df_updated['tests.regular.performed'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df_updated['tests.regular.positive.todate'] = \
        df_updated['tests.regular.positive'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df_updated['tests.hagt.performed.todate'] = \
        df_updated['tests.hagt.performed'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df_updated['tests.performed.todate'] = \
        df_updated['tests.performed'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df_updated['tests.positive.todate'] = \
        df_updated['tests.positive'].fillna(0).cumsum().replace({0: None}).astype('Int64')

    print(df_updated)

    yesterday = date.today() - timedelta(days=1)
    if d.date() < yesterday:
        print(
            f"ABORTING update with too old date {d.date()} ({d}). Now is {datetime.now()}"
        )
        print(
            f"Cases {cepimose.lab_cases_confirmed()}, PCR: {cepimose.lab_PCR_tests_performed()} HAT: {cepimose.lab_HAT_tests_performed()}"
        )
        exit(1)

    old_hash = sha1sum(filenameByDay)
    df_updated.to_csv(filenameByDay,
                      date_format='%Y-%m-%d',
                      line_terminator='\r\n')
    write_timestamp_file(filenameByDay, old_hash)

    return cepimose.lab_cases_confirmed(
    )  # PCR+HAT positive is confirmed cases
コード例 #13
0
ファイル: update_vaccination.py プロジェクト: sledilnik/data
def import_nijz_dash_vacc_by_municipalities():
    filename = "csv/vaccination-by_municipality-latest.csv"
    filenameByDay = "csv/vaccination-by_municipality.csv"
    print("Processing", filename)
    print("Processing", filenameByDay)

    municipalities = pd.read_csv("csv/dict-municipality.csv", index_col="id")[[
        "region", "iso_code", "name", "name_alt", "population"
    ]]

    # uppercase for easy matching
    municipalities['name_search'] = municipalities['name'].str.upper()
    municipalities['name_alt'] = municipalities['name_alt'].str.upper()
    municipalities['name_id'] = municipalities.index.str.upper()

    for row in cepimose.vaccinations_by_municipalities_share():
        nameNormalized = row.name.upper().replace('-', ' - ')
        mun = municipalities.loc[municipalities['name_search'] ==
                                 nameNormalized]
        if mun is None or mun.empty:
            mun = municipalities.loc[municipalities['name_alt'] ==
                                     nameNormalized]
        if mun is None or mun.empty:
            mun = municipalities.loc[municipalities['name_id'] ==
                                     nameNormalized]
        if mun is None or mun.empty:
            mun = municipalities.loc[municipalities['name_search'] ==
                                     nameNormalized.replace("SV. ", "SVETA ")]

        if mun is None or mun.empty:
            raise Exception(f'No municipality match: {row.name}')

        if len(mun.index) > 1:
            raise Exception(
                f'{len(mun.index)} municipalities match: {row.name}')

        # pop=mun.to_records()[0].population
        # if pop != row.population:
        #     # comment this out if it starts to fail to continue scraping until the population is fixed in dict-municipality.csv
        #     raise Exception(f'Population mismatch in {row.name}: {pop} (dict-municipality.csv) != {row.population} (NIJZ)')

        # add new columns:
        munId = mun.to_records()[0].id
        municipalities.loc[
            munId,
            'population'] = row.population  # overwrite the population with the one from NIJZ, could differ from the one in dict-municipality.csv
        municipalities.loc[munId, '1st.todate'] = row.dose1
        municipalities.loc[munId, '1st.share.todate'] = round(row.share1, 5)
        municipalities.loc[munId, '2nd.todate'] = row.dose2
        municipalities.loc[munId, '2nd.share.todate'] = round(row.share2, 5)

    # trim down extra columns
    municipalities = municipalities[[
        'region', 'iso_code', 'name', 'population', '1st.todate',
        '1st.share.todate', '2nd.todate', '2nd.share.todate'
    ]]
    municipalities['1st.todate'] = municipalities['1st.todate'].astype('Int64')
    municipalities['2nd.todate'] = municipalities['2nd.todate'].astype('Int64')
    municipalities.dropna(thresh=4, inplace=True)
    print(municipalities)

    old_hash = sha1sum(filename)
    municipalities.to_csv(filename)
    write_timestamp_file(filename, old_hash)

    # daily history
    today_data = {}
    for id, m in municipalities.iterrows():
        fieldPrefix = f'vaccination.region.{m["region"]}.{id}.'
        # today_data[f'{fieldPrefix}population'] = m["population"]
        today_data[f'{fieldPrefix}1st.todate'] = m["1st.todate"]
        # today_data[f'{fieldPrefix}1st.share.todate'] = m["1st.share.todate"]
        today_data[f'{fieldPrefix}2nd.todate'] = m["2nd.todate"]
        # today_data[f'{fieldPrefix}2nd.share.todate'] = m["2nd.share.todate"]

    df_today = pd.DataFrame([today_data], index=[datetime.date.today()])
    df_today.index.name = 'date'
    # print(df_today)
    # uncomment if there's no previous history file:
    # df_today.to_csv(filenameByDay, date_format='%Y-%m-%d')
    # write_timestamp_file(filenameByDay, "")

    df_existing = pd.read_csv(filenameByDay, index_col=0, parse_dates=[0])
    # print(df_existing)

    df_updated = df_today.combine_first(df_existing).fillna(0).round().replace(
        {
            0: None
        }).astype('Int64')
    print(df_updated)

    old_hash = sha1sum(filenameByDay)
    df_updated.to_csv(filenameByDay, date_format='%Y-%m-%d')
    write_timestamp_file(filenameByDay, old_hash)
コード例 #14
0
ファイル: update_vaccination.py プロジェクト: sledilnik/data
def import_nijz_dash_vacc_by_region():
    filename = "csv/vaccination-by_region.csv"
    print("Processing", filename)

    df = pd.DataFrame()
    vaccByRegion = cepimose.vaccinations_by_region_by_day()

    # map cepimose regions to sledilnik regions, preserving previous order
    regions = {
        cepimose.data.Region.OBALNOKRASKA: "kp",
        cepimose.data.Region.GORISKA: "ng",
        cepimose.data.Region.PRIMORSKONOTRANJSKA: "po",
        cepimose.data.Region.GORENJSKA: "kr",
        cepimose.data.Region.OSREDNJESLOVENSKA: "lj",
        cepimose.data.Region.JUGOVZHODNASLOVENIJA: "nm",
        cepimose.data.Region.POSAVSKA: "kk",
        cepimose.data.Region.ZASAVSKA: "za",
        cepimose.data.Region.SAVINJSKA: "ce",
        cepimose.data.Region.KOROSKA: "sg",
        cepimose.data.Region.PODRAVSKA: "mb",
        cepimose.data.Region.POMURSKA: "ms",
    }

    # join all regions
    for reg in regions:
        print("Joining {r} ({reg}): {c} rows:".format(r=regions[reg],
                                                      reg=reg,
                                                      c=len(
                                                          vaccByRegion[reg])))
        regData = pd.DataFrame.from_dict(vaccByRegion[reg]).set_index('date')
        regData["first_diff"] = regData["first_dose"].diff()
        regData["second_diff"] = regData["second_dose"].diff()
        regData["third_diff"] = regData["third_dose"].diff()
        regData = regData[[
            'first_diff', 'first_dose', 'second_diff', 'second_dose',
            'third_diff', 'third_dose'
        ]]
        regData.rename(
            inplace=True,
            columns={
                'first_diff':
                'vaccination.region.{}.1st'.format(regions[reg]),
                'first_dose':
                'vaccination.region.{}.1st.todate'.format(regions[reg]),
                'second_diff':
                'vaccination.region.{}.2nd'.format(regions[reg]),
                'second_dose':
                'vaccination.region.{}.2nd.todate'.format(regions[reg]),
                'third_diff':
                'vaccination.region.{}.3rd'.format(regions[reg]),
                'third_dose':
                'vaccination.region.{}.3rd.todate'.format(regions[reg]),
            })
        print(regData)
        print(regData.describe())
        df = df.join(regData, how='outer')

    print(df)
    print(df.describe())

    # write csv
    old_hash = sha1sum(filename)
    # force integer type
    df.fillna(0).round().astype('Int64').replace({
        0: None
    }).to_csv(filename, date_format="%Y-%m-%d", line_terminator='\r\n')
    write_timestamp_file(filename, old_hash)
コード例 #15
0
ファイル: update_vaccination.py プロジェクト: sledilnik/data
def import_nijz_dash_vacc_used_by_manufacturer():
    filename = "csv/vaccination-used_by_manufacturer.csv"
    # add used by manufacturers
    df = pd.DataFrame.from_dict(
        cepimose.vaccinations_by_manufacturer_used()).set_index('date').rename(
            columns={
                'pfizer': 'vaccination.pfizer.used',
                'moderna': 'vaccination.moderna.used',
                'az': 'vaccination.az.used',
                'janssen': 'vaccination.janssen.used',
                'novavax': 'vaccination.novavax.used'
            }).astype('Int64')

    df['vaccination.pfizer.used.todate'] = \
        df['vaccination.pfizer.used'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df['vaccination.moderna.used.todate'] = \
        df['vaccination.moderna.used'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df['vaccination.az.used.todate'] = \
        df['vaccination.az.used'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df['vaccination.janssen.used.todate'] = \
        df['vaccination.janssen.used'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df['vaccination.novavax.used.todate'] = \
        df['vaccination.novavax.used'].fillna(0).cumsum().replace({0: None}).astype('Int64')

    # calculate used vaccine doeses used total
    df['vaccination.used'] = \
        df['vaccination.pfizer.used'].fillna(0) + \
        df['vaccination.moderna.used'].fillna(0) + \
        df['vaccination.az.used'].fillna(0) + \
        df['vaccination.janssen.used'].fillna(0) + \
        df['vaccination.novavax.used'].fillna(0)
    df['vaccination.used.todate'] = \
        df['vaccination.used'].fillna(0).cumsum().replace({0: None}).astype('Int64')
    df = df.astype('Int64')

    # sort cols
    df = df[[
        'vaccination.used',
        'vaccination.used.todate',
        'vaccination.pfizer.used',
        'vaccination.pfizer.used.todate',
        'vaccination.moderna.used',
        'vaccination.moderna.used.todate',
        'vaccination.az.used',
        'vaccination.az.used.todate',
        'vaccination.janssen.used',
        'vaccination.janssen.used.todate',
        'vaccination.novavax.used',
        'vaccination.novavax.used.todate',
    ]]
    df = df.replace({0: None}).astype('Int64')

    today = datetime.date.today().isoformat()
    if today in df.index and pd.isna(df.loc[today]['vaccination.used']):
        df.drop(df.loc[df.index == today].index, inplace=True)

    # write csv
    old_hash = sha1sum(filename)
    # replace 0 with pd.NA so it does not get written to CSV
    df.replace(0, pd.NA).to_csv(filename, date_format='%Y-%m-%d')
    write_timestamp_file(filename, old_hash)
コード例 #16
0
ファイル: update.py プロジェクト: trnsik/data
def computeStats(update_time):
    filename = 'csv/stats.csv'
    print("Processing", filename)
    old_hash = sha1sum(filename)

    df_patients = pd.read_csv(
        'csv/patients.csv', index_col='date', parse_dates=['date'])[[
            'state.in_hospital', 'state.in_hospital.todate', 'state.icu',
            'state.critical', 'state.out_of_hospital.todate',
            'state.deceased.todate', 'state.recovered.todate'
        ]]

    df_phases = pd.read_csv('csv/dict-phases.csv',
                            index_col='date.from',
                            parse_dates=['date.from'
                                         ]).rename(mapper={'id': 'phase'},
                                                   axis='columns')[['phase']]
    df_phases = df_phases.reindex(pd.date_range(df_phases.index.min(),
                                                df_patients.index.max(),
                                                freq='D'),
                                  method='ffill')
    df_phases.index.name = 'date'

    dfRegions = pd.read_csv('csv/regions-cases.csv', index_col='date')
    dfAgeC = pd.read_csv('csv/age-cases.csv', index_col='date')
    dfAgeD = pd.read_csv('csv/age-deceased.csv', index_col='date')
    dfRhD = pd.read_csv('csv/rh-deceased.csv', index_col='date')
    df_lab_tests = pd.read_csv('csv/lab-tests.csv', index_col='date')[[
        'tests.performed',
        'tests.performed.todate',
        'tests.positive',
        'tests.positive.todate',
        'tests.regular.performed',
        'tests.regular.performed.todate',
        'tests.regular.positive',
        'tests.regular.positive.todate',
        'tests.hagt.performed',
        'tests.hagt.performed.todate',
        'tests.hagt.positive',
        'tests.hagt.positive.todate',
        'tests.ns-apr20.performed',
        'tests.ns-apr20.performed.todate',
        'tests.ns-apr20.positive',
        'tests.ns-apr20.positive.todate',
    ]]
    df_cases = pd.read_csv('csv/cases.csv', index_col='date')[[
        'cases.confirmed',
        'cases.confirmed.todate',
        'cases.active',
        'cases.recovered.todate',
        'cases.closed.todate',
        'cases.hs.employee.confirmed.todate',
        'cases.rh.employee.confirmed.todate',
        'cases.rh.occupant.confirmed.todate',
    ]]
    merged = df_phases.join(df_patients).join(dfRegions).join(dfAgeC).join(
        dfAgeD).join(dfRhD).join(df_lab_tests).join(df_cases)
    merged['cases.unclassified.confirmed.todate'] = merged['cases.confirmed.todate'] \
        .sub(merged['cases.hs.employee.confirmed.todate'], fill_value=0) \
        .sub(merged['cases.rh.employee.confirmed.todate'], fill_value=0) \
        .sub(merged['cases.rh.occupant.confirmed.todate'], fill_value=0)

    merged.insert(loc=0, column='day', value=range(-8, -8 + len(merged)))
    merged.reset_index(inplace=True)
    merged.set_index('day', inplace=True)

    merged = merged.reindex(
        [  # sort
            'date', 'phase', 'tests.performed.todate', 'tests.performed',
            'tests.positive.todate', 'tests.positive',
            'tests.regular.performed.todate', 'tests.regular.performed',
            'tests.regular.positive.todate', 'tests.regular.positive',
            'tests.hagt.performed', 'tests.hagt.performed.todate',
            'tests.hagt.positive', 'tests.hagt.positive.todate',
            'tests.ns-apr20.performed.todate', 'tests.ns-apr20.performed',
            'tests.ns-apr20.positive.todate', 'tests.ns-apr20.positive',
            'cases.confirmed.todate', 'cases.confirmed', 'cases.active',
            'cases.recovered.todate', 'cases.closed.todate',
            'cases.hs.employee.confirmed.todate',
            'cases.rh.employee.confirmed.todate',
            'cases.rh.occupant.confirmed.todate',
            'cases.unclassified.confirmed.todate', 'state.in_hospital',
            'state.icu', 'state.critical', 'state.in_hospital.todate',
            'state.out_of_hospital.todate', 'state.deceased.todate',
            'state.recovered.todate', 'region.lj.todate', 'region.ce.todate',
            'region.mb.todate', 'region.ms.todate', 'region.kr.todate',
            'region.nm.todate', 'region.za.todate', 'region.sg.todate',
            'region.po.todate', 'region.ng.todate', 'region.kp.todate',
            'region.kk.todate', 'region.foreign.todate',
            'region.unknown.todate', 'region.todate', 'age.0-4.todate',
            'age.5-14.todate', 'age.15-24.todate', 'age.25-34.todate',
            'age.35-44.todate', 'age.45-54.todate', 'age.55-64.todate',
            'age.65-74.todate', 'age.75-84.todate', 'age.85+.todate',
            'age.todate', 'age.female.0-4.todate', 'age.female.5-14.todate',
            'age.female.15-24.todate', 'age.female.25-34.todate',
            'age.female.35-44.todate', 'age.female.45-54.todate',
            'age.female.55-64.todate', 'age.female.65-74.todate',
            'age.female.75-84.todate', 'age.female.85+.todate',
            'age.female.todate', 'age.male.0-4.todate', 'age.male.5-14.todate',
            'age.male.15-24.todate', 'age.male.25-34.todate',
            'age.male.35-44.todate', 'age.male.45-54.todate',
            'age.male.55-64.todate', 'age.male.65-74.todate',
            'age.male.75-84.todate', 'age.male.85+.todate', 'age.male.todate',
            'age.unknown.0-4.todate', 'age.unknown.5-14.todate',
            'age.unknown.15-24.todate', 'age.unknown.25-34.todate',
            'age.unknown.35-44.todate', 'age.unknown.45-54.todate',
            'age.unknown.55-64.todate', 'age.unknown.65-74.todate',
            'age.unknown.75-84.todate', 'age.unknown.85+.todate',
            'age.unknown.todate', 'deceased.0-4.todate',
            'deceased.5-14.todate', 'deceased.15-24.todate',
            'deceased.25-34.todate', 'deceased.35-44.todate',
            'deceased.45-54.todate', 'deceased.55-64.todate',
            'deceased.65-74.todate', 'deceased.75-84.todate',
            'deceased.85+.todate', 'deceased.todate',
            'deceased.female.0-4.todate', 'deceased.female.5-14.todate',
            'deceased.female.15-24.todate', 'deceased.female.25-34.todate',
            'deceased.female.35-44.todate', 'deceased.female.45-54.todate',
            'deceased.female.55-64.todate', 'deceased.female.65-74.todate',
            'deceased.female.75-84.todate', 'deceased.female.85+.todate',
            'deceased.female.todate', 'deceased.male.0-4.todate',
            'deceased.male.5-14.todate', 'deceased.male.15-24.todate',
            'deceased.male.25-34.todate', 'deceased.male.35-44.todate',
            'deceased.male.45-54.todate', 'deceased.male.55-64.todate',
            'deceased.male.65-74.todate', 'deceased.male.75-84.todate',
            'deceased.male.85+.todate', 'deceased.male.todate',
            'deceased.rhoccupant.todate', 'deceased.other.todate'
        ],
        axis='columns')

    merged.to_csv(filename, float_format='%.0f', line_terminator='\r\n')
    write_timestamp_file(filename=filename, old_hash=old_hash)
コード例 #17
0
def import_nijz_dash_vacc_by_age():
    filename = "csv/vaccination-by_age.csv"
    df_existing = pd.read_csv(filename, index_col=0, parse_dates=[0])

    today_data = {}
    for row in cepimose.vaccinations_by_age():
        today_data["vaccination.age.{}.1st.todate".format(
            row.age_group)] = row.count_first
        today_data["vaccination.age.{}.2nd.todate".format(
            row.age_group)] = row.count_second

    df_today = pd.DataFrame([today_data], index=[datetime.date.today()])
    df_today.index.name = 'date'

    def start_age(colname: str):
        return int(colname.split('.')[2].split('-')[0].strip('+'))

    def phase(colname: str):
        return colname.split('.')[3]

    # columns to be calculates
    columns_1864_1st = list(
        filter(lambda s: start_age(s) < 65 and phase(s) == '1st',
               df_today.columns))
    columns_1864_2nd = list(
        filter(lambda s: start_age(s) < 65 and phase(s) == '2nd',
               df_today.columns))
    columns_65_1st = list(
        filter(lambda s: start_age(s) >= 65 and phase(s) == '1st',
               df_today.columns))
    columns_65_2nd = list(
        filter(lambda s: start_age(s) >= 65 and phase(s) == '2nd',
               df_today.columns))
    df_today['vaccination.age.18-64.1st.todate'] = df_today[
        columns_1864_1st].sum(axis=1)
    df_today['vaccination.age.18-64.2nd.todate'] = df_today[
        columns_1864_2nd].sum(axis=1)
    df_today['vaccination.age.65+.1st.todate'] = df_today[columns_65_1st].sum(
        axis=1)
    df_today['vaccination.age.65+.2nd.todate'] = df_today[columns_65_2nd].sum(
        axis=1)

    df_updated = df_today.combine_first(df_existing).astype('Int64')

    col_order = [
        'vaccination.age.0-17.1st.todate', 'vaccination.age.0-17.2nd.todate',
        'vaccination.age.18-24.1st.todate', 'vaccination.age.18-24.2nd.todate',
        'vaccination.age.25-29.1st.todate', 'vaccination.age.25-29.2nd.todate',
        'vaccination.age.30-34.1st.todate', 'vaccination.age.30-34.2nd.todate',
        'vaccination.age.35-39.1st.todate', 'vaccination.age.35-39.2nd.todate',
        'vaccination.age.40-44.1st.todate', 'vaccination.age.40-44.2nd.todate',
        'vaccination.age.45-49.1st.todate', 'vaccination.age.45-49.2nd.todate',
        'vaccination.age.50-54.1st.todate', 'vaccination.age.50-54.2nd.todate',
        'vaccination.age.55-59.1st.todate', 'vaccination.age.55-59.2nd.todate',
        'vaccination.age.60-64.1st.todate', 'vaccination.age.60-64.2nd.todate',
        'vaccination.age.65-69.1st.todate', 'vaccination.age.65-69.2nd.todate',
        'vaccination.age.70-74.1st.todate', 'vaccination.age.70-74.2nd.todate',
        'vaccination.age.75-79.1st.todate', 'vaccination.age.75-79.2nd.todate',
        'vaccination.age.80-84.1st.todate', 'vaccination.age.80-84.2nd.todate',
        'vaccination.age.85-89.1st.todate', 'vaccination.age.85-89.2nd.todate',
        'vaccination.age.90+.1st.todate', 'vaccination.age.90+.2nd.todate',
        'vaccination.age.18-64.1st.todate', 'vaccination.age.18-64.2nd.todate',
        'vaccination.age.65+.1st.todate', 'vaccination.age.65+.2nd.todate'
    ]

    df_updated = df_updated[col_order]

    old_hash = sha1sum(filename)
    df_updated.astype('Int64').to_csv(filename, date_format='%Y-%m-%d')
    write_timestamp_file(filename, old_hash)