def main(): parser = ArgumentParser() parser.add_argument('--group', choices=['first', 'second', 'everything'], default='everything') parser.add_argument('--area') parser.add_argument('-t', '--type', choices=['first', 'second']) add_date_arg(parser, help='first release to use', default=earliest_vaccination) add_date_arg(parser, '--to-date', default=find_latest('vaccination_cum_*')[1]) parser.add_argument('--duration', type=float, default=0.2) parser.add_argument('--raise-errors', action='store_true') args = parser.parse_args() if args.type: to_show = tuple_product_array( [' '.join(w.capitalize() for w in args.area.split('-'))], args.type.capitalize()) name = f'{args.area}_{args.type}' else: to_show = selection_mapping(args.from_date)[args.group] name = args.group dates = pd.date_range(args.from_date, args.to_date) parallel_render(f'animated_vaccinations_{name}', partial(render_plots, to_show, args.to_date), dates, duration=args.duration, raise_errors=args.raise_errors)
def summary_data(series, data_date=None, start=None, end=None, nation='england'): if data_date in (None, '*'): data_path, data_date = find_latest(f'{nation}_*.csv') else: data_path = base_path / f'{nation}_{pd.to_datetime(data_date).date()}.csv' data = read_csv(data_path, start, end, [s_.metric for s_ in series], index_col=[date_col]) / 7 return data, data_date
def weekly_data(): raw_path, _ = find_latest('vaccination_old_style_2021-04-08.csv') raw = read_csv(raw_path) raw.sort_values([date_col, area_code], inplace=True) weekly = raw[[ date_col, area_code, 'weeklyPeopleVaccinatedFirstDoseByVaccinationDate', 'weeklyPeopleVaccinatedSecondDoseByVaccinationDate' ]].dropna() weekly.rename(errors='raise', inplace=True, columns={ 'weeklyPeopleVaccinatedFirstDoseByVaccinationDate': any_cov, 'weeklyPeopleVaccinatedSecondDoseByVaccinationDate': full_cov }) return weekly.set_index([date_col, area_code]).groupby(level=-1).cumsum()
def raw_vaccination_data(dt='*', sanity_checks: bool = True): if dt == '*': dt = '????-*' else: dt = pd.to_datetime(dt).date() data_path, data_date = find_latest(f'vaccination_{dt}.csv') raw = read_csv(data_path) raw.sort_values([date_col, area_code], inplace=True) if sanity_checks: complete = raw[[ complete_dose_publish_cum, second_dose_publish_cum, complete_dose_publish_new, second_dose_publish_new ]].dropna(how='any') cum_equal = (complete[complete_dose_publish_cum] == complete[second_dose_publish_cum]).all() new_equal = ((complete[complete_dose_publish_new] == complete[second_dose_publish_new]).all()) assert raw[complete_dose_publish_cum].isnull().all() or (cum_equal and new_equal) return raw, data_date
def best_data(dt='*', area_type=ltla, areas=None, earliest=None, days=None, metric=new_cases_by_specimen_date, file_prefix: str = None, metrics=(), date_index=False): metrics = list(metrics) if metrics else [metric] if file_prefix is None: file_prefix = area_type if area_type == msoa: assert dt == '*' data_path = base_path / 'msoa_composite.csv' data = read_csv(data_path) data_date = pd.to_datetime(data.iloc[-1][release_timestamp]) else: try: data_path, data_date = find_latest(f'{file_prefix}_{dt}.csv') except FileNotFoundError: if metric != [new_cases_by_specimen_date]: raise area_type_filter = area_type_filters.get(area_type) if area_type_filter is None: raise data_path, data_date = find_latest(f'coronavirus-cases_{dt}.csv') data = pd.read_csv(data_path, parse_dates=[specimen_date]) data = data[data['Area type'].isin(area_type_filter)] data.rename(inplace=True, errors='raise', columns={ area: area_name, code: area_code, specimen_date: date_col, 'Daily lab-confirmed cases': new_cases_by_specimen_date, }) else: data = read_csv(data_path) if days: earliest = datetime.combine(data_date - timedelta(days=days), datetime.min.time()) if earliest: data = data[data[date_col] >= pd.to_datetime(earliest)] if areas: data = data[data[area_code].isin(areas)] missing = [] if data.empty: missing = metrics else: for metric in metrics: series = data.get(metric) if series is None or series.empty: missing.append(metric) if missing: missing = ', '.join(missing) raise NoData( f'No {missing} for {file_prefix} in {areas} available in {data_path}' ) if date_index: data = data.set_index(date_col).sort_index() return data, data_date
def latest_map_data(): path, dt = find_latest('zoe_prevalence_map_*.pickle', date_index=-2) df = read_pickle(path) gdf = convert_df(df, 'the_geom_webmercator') return dt, gdf
def check_path(path): path, dt = find_latest(Path(path).name) checker = Checker(dt, path) for row in add_blank_rows(tqdm_dict_reader(path)): checker.add_row(row) checker.check()
def load_prevalence(source, dates, data_date): data_path, data_date = find_latest(f'{source}_*.csv', on_or_before=data_date) data = pd.read_csv(data_path, parse_dates=[dates], index_col=[dates]) return data.sort_index(), data_date