def main():

    parser = ArgumentParser()
    parser.add_argument('--group',
                        choices=['first', 'second', 'everything'],
                        default='everything')
    parser.add_argument('--area')
    parser.add_argument('-t', '--type', choices=['first', 'second'])
    add_date_arg(parser,
                 help='first release to use',
                 default=earliest_vaccination)
    add_date_arg(parser,
                 '--to-date',
                 default=find_latest('vaccination_cum_*')[1])
    parser.add_argument('--duration', type=float, default=0.2)
    parser.add_argument('--raise-errors', action='store_true')
    args = parser.parse_args()

    if args.type:
        to_show = tuple_product_array(
            [' '.join(w.capitalize() for w in args.area.split('-'))],
            args.type.capitalize())
        name = f'{args.area}_{args.type}'
    else:
        to_show = selection_mapping(args.from_date)[args.group]
        name = args.group

    dates = pd.date_range(args.from_date, args.to_date)
    parallel_render(f'animated_vaccinations_{name}',
                    partial(render_plots, to_show, args.to_date),
                    dates,
                    duration=args.duration,
                    raise_errors=args.raise_errors)
Beispiel #2
0
def summary_data(series,
                 data_date=None,
                 start=None,
                 end=None,
                 nation='england'):
    if data_date in (None, '*'):
        data_path, data_date = find_latest(f'{nation}_*.csv')
    else:
        data_path = base_path / f'{nation}_{pd.to_datetime(data_date).date()}.csv'
    data = read_csv(data_path,
                    start,
                    end, [s_.metric for s_ in series],
                    index_col=[date_col]) / 7
    return data, data_date
Beispiel #3
0
def weekly_data():
    raw_path, _ = find_latest('vaccination_old_style_2021-04-08.csv')
    raw = read_csv(raw_path)
    raw.sort_values([date_col, area_code], inplace=True)
    weekly = raw[[
        date_col, area_code,
        'weeklyPeopleVaccinatedFirstDoseByVaccinationDate',
        'weeklyPeopleVaccinatedSecondDoseByVaccinationDate'
    ]].dropna()
    weekly.rename(errors='raise',
                  inplace=True,
                  columns={
                      'weeklyPeopleVaccinatedFirstDoseByVaccinationDate':
                      any_cov,
                      'weeklyPeopleVaccinatedSecondDoseByVaccinationDate':
                      full_cov
                  })
    return weekly.set_index([date_col, area_code]).groupby(level=-1).cumsum()
Beispiel #4
0
def raw_vaccination_data(dt='*', sanity_checks: bool = True):
    if dt == '*':
        dt = '????-*'
    else:
        dt = pd.to_datetime(dt).date()
    data_path, data_date = find_latest(f'vaccination_{dt}.csv')
    raw = read_csv(data_path)
    raw.sort_values([date_col, area_code], inplace=True)

    if sanity_checks:
        complete = raw[[
            complete_dose_publish_cum, second_dose_publish_cum,
            complete_dose_publish_new, second_dose_publish_new
        ]].dropna(how='any')
        cum_equal = (complete[complete_dose_publish_cum] ==
                     complete[second_dose_publish_cum]).all()
        new_equal = ((complete[complete_dose_publish_new] ==
                      complete[second_dose_publish_new]).all())
        assert raw[complete_dose_publish_cum].isnull().all() or (cum_equal
                                                                 and new_equal)

    return raw, data_date
Beispiel #5
0
def best_data(dt='*',
              area_type=ltla,
              areas=None,
              earliest=None,
              days=None,
              metric=new_cases_by_specimen_date,
              file_prefix: str = None,
              metrics=(),
              date_index=False):
    metrics = list(metrics) if metrics else [metric]
    if file_prefix is None:
        file_prefix = area_type
    if area_type == msoa:
        assert dt == '*'
        data_path = base_path / 'msoa_composite.csv'
        data = read_csv(data_path)
        data_date = pd.to_datetime(data.iloc[-1][release_timestamp])
    else:
        try:
            data_path, data_date = find_latest(f'{file_prefix}_{dt}.csv')
        except FileNotFoundError:
            if metric != [new_cases_by_specimen_date]:
                raise
            area_type_filter = area_type_filters.get(area_type)
            if area_type_filter is None:
                raise
            data_path, data_date = find_latest(f'coronavirus-cases_{dt}.csv')
            data = pd.read_csv(data_path, parse_dates=[specimen_date])
            data = data[data['Area type'].isin(area_type_filter)]
            data.rename(inplace=True,
                        errors='raise',
                        columns={
                            area:
                            area_name,
                            code:
                            area_code,
                            specimen_date:
                            date_col,
                            'Daily lab-confirmed cases':
                            new_cases_by_specimen_date,
                        })
        else:
            data = read_csv(data_path)

    if days:
        earliest = datetime.combine(data_date - timedelta(days=days),
                                    datetime.min.time())
    if earliest:
        data = data[data[date_col] >= pd.to_datetime(earliest)]

    if areas:
        data = data[data[area_code].isin(areas)]

    missing = []
    if data.empty:
        missing = metrics
    else:
        for metric in metrics:
            series = data.get(metric)
            if series is None or series.empty:
                missing.append(metric)
    if missing:
        missing = ', '.join(missing)
        raise NoData(
            f'No {missing} for {file_prefix} in {areas} available in {data_path}'
        )

    if date_index:
        data = data.set_index(date_col).sort_index()

    return data, data_date
Beispiel #6
0
def latest_map_data():
    path, dt = find_latest('zoe_prevalence_map_*.pickle', date_index=-2)
    df = read_pickle(path)
    gdf = convert_df(df, 'the_geom_webmercator')
    return dt, gdf
Beispiel #7
0
def check_path(path):
    path, dt = find_latest(Path(path).name)
    checker = Checker(dt, path)
    for row in add_blank_rows(tqdm_dict_reader(path)):
        checker.add_row(row)
    checker.check()
Beispiel #8
0
def load_prevalence(source, dates, data_date):
    data_path, data_date = find_latest(f'{source}_*.csv', on_or_before=data_date)
    data = pd.read_csv(data_path, parse_dates=[dates], index_col=[dates])
    return data.sort_index(), data_date