def get_cumulative_tests_chart_data(json, dates): log_status("get_cumulative_tests_chart_data()") # Count totals for every day date_counts = defaultdict(int) dates_within_range = set([str(date.date()) for date in list(dates)]) date_start = str(dates[0].date()) count_before_date_range = 0 for res in json: date = res["StatisticsDate"] if date in dates_within_range: date_counts[date] += 1 elif date < date_start: # Hack to count dates before our range count_before_date_range += 1 tests = [] for date in dates: tests.append(date_counts[str(date.date())]) # Add everything before first date to first date tests[0] += count_before_date_range return_json = {"testsAdministered": list(np.cumsum(tests))} return return_json
def get_confirmed_cases_by_county(json, county_mapping): log_status("get_confirmed_cases_by_county()") chart_counties = [ "Harjumaa", "Hiiumaa", "Ida-Virumaa", "Jõgevamaa", "Järvamaa", "Läänemaa", "Lääne-Virumaa", "Põlvamaa", "Pärnumaa", "Raplamaa", "Saaremaa", "Tartumaa", "Valgamaa", "Viljandimaa", "Võrumaa", "Info puudulik", ] # Count totals for every county counts = defaultdict(int) for res in json: if res["ResultValue"] == "P": county = county_mapping[res["County"]] counts[county] += 1 return [counts[county] for county in chart_counties]
def download_data(url, destination): log_status(f"Downloading {url} to {destination}") with requests.get(url, stream=True) as r: r.raise_for_status() with open(destination, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk)
def get_new_cases_per_day_chart_data(data): log_status("get_new_cases_per_day_chart_data()") cases = np.diff(data["cases"], prepend=0) dataNewCasesPerDayChart = { "confirmedCases": list(cases), } return dataNewCasesPerDayChart
def main(): """Run script""" log_dir = os.path.join(args.log_dir, args.meta_model, args.suffix) data = DataContainer( root=args.root, num_pretrain_alphabets=args.num_pretrain, num_classes=args.classes, seed=args.seed, num_workers=args.workers, pin_memory=True, ) ########################################################################### def evaluate(model, case, step): """Run final evaluation""" if args.write_ival > 0: torch.save(model, join(log_dir, 'model.pth.tar')) if case == 'test': iterator = data.test else: iterator = data.val iterator = iterator(args.task_batch_size, args.task_val_steps, args.multi_head) pp('Evaluating on {} tasks'.format(case)) results = [] for i, task in enumerate(iterator): if args.write_ival > 0: task_model = torch.load(join(log_dir, 'model.pth.tar')) else: task_model = model t = time.time() task_results = task_model([task], meta_train=False) t = time.time() - t results.append(task_results) if args.log_ival > 0: log_status(task_results, 'task={}'.format(i), t) if args.log_ival > 0: log_status(consolidate(results), 'task avg', t) if args.write_ival > 0: write_val_res(results, step, case, log_dir) pp('Done')
def train(model_object, model, train_loader, test_loader=None): device = model_object.device if model_object.device.lower() != 'cpu': model_object.device = 'cuda:0' if model_object.log_path and model_object.verbose > 0: log_status(model_object.log_path, 'model_params: {}'.format(str(model_object.params)), init=False) model = model.to(model_object.device) model_object.optimizer = optim.Adam(model.parameters(), lr=model_object.lr, weight_decay=model_object.weight_decay) kwargs = { 'pos_weight': torch.Tensor([model_object.pos_weight]).to(model_object.device), 'reduction': 'mean' } model_object.criterion = LossGenie('BCEWithLogitsLoss', **kwargs) # assisting modules model_object.lr_scheduler = optim.lr_scheduler.StepLR( model_object.optimizer, step_size=model_object.lr_decay_freq, gamma=0.5) model_object.early_stopper = EarlyStopping(model_object.model_dir, patience=model_object.patience, verbose=True) model_object.train_metrics = metrics.BinaryClfMetrics() model_object.test_metrics = metrics.BinaryClfMetrics() for ep in range(1, model_object.epoch + 1): # train for an epoch train_epoch(ep, model_object, model, train_loader, test_loader) # validation for an epoch if test_loader is not None: valid_epoch(ep, model_object, model, test_loader) # return if early stop if model_object.early_stopper.early_stop: return model model_object.lr_scheduler.step() if model_object.log_path and model_object.verbose > 0: log_status(model_object.log_path, 'current lr: {}'.format( model_object.lr_scheduler.get_lr()), init=False) return model
def train_epoch(ep, model_object, model, train_loader, test_loader=None): model.train() train_timer = Timer('M') model_object.criterion.train() model_object.train_metrics.reset() print_counter = 0 # counter for printing for datum in train_loader: model_object.optimizer.zero_grad() loss, prob, tgt, _ = predict(ep, model_object, model, datum) loss.backward() # grad normalization grad = clip_grad_norm_(model.parameters(), 10) model_object.train_metrics.collect(y_prob=prob, y_true=tgt) # update network model_object.optimizer.step() print_counter += 1 if print_counter % model_object.print_freq == 0 and model_object.log_path is not None: train_auc = model_object.train_metrics.roc_auc() train_ap = model_object.train_metrics.average_precision() status = '[epoch {}], train batch {} - batch loss: {:.5f}, running train auc: {:.5f}, running train ap: {:.5f} - time taken: {:.2f} mins' status = status.format(ep, model_object.criterion.n_batch_train, model_object.criterion.get_running_loss(), train_auc, train_ap, train_timer.time()) log_status(model_object.log_path, status, init=False) if ( model_object.log_path and model_object.verbose > 0) else None # evaluate on validation set if test_loader is not None: train_timer.pause() valid_epoch(ep, model_object, model, test_loader) train_timer.resume() model.train() # print and log status train_loss = model_object.criterion.get_running_loss() train_auc = model_object.train_metrics.roc_auc() train_acc = model_object.train_metrics.accuracy() train_ap = model_object.train_metrics.average_precision() if model_object.log_path: status = '[epoch {}], train loss: {:.5f}, train acc: {:.5f}, train auc: {:.5f}, train ap: {:.5f}, time taken: {:.2f} mins' status = status.format(ep, train_loss, train_acc, train_auc, train_ap, train_timer.time_since_start()) log_status(model_object.log_path, status, init=False) if model_object.verbose > 0 else None # reset running criterion losses and timer model_object.criterion.reset_train() train_timer.reset()
def get_in_intensive_data(json, manual_data): log_status("get_in_intensive_data()") if type(json) is not list or type(manual_data) is not dict: return False data = get_dict_with_dates_and_key(json, "IsInIntensive") output = manual_data for day in data: if isinstance(data[day], str): output[day] = int(data[day]) return output
def get_json_data(url): max_retries = 3 for retry in range(1, max_retries + 1): try: # Request remote data response = requests.get(url=url) # Process response if response.status_code == 200: return response.json() else: log_status('Endpoint unavailable. Status code: ' + str(response.status_code)) except: # Log error log_status('Error when retrieving remote data:') log_status(traceback.format_exc()) # Retry? if retry < max_retries: log_status("Retrying...") sleep(5) # Unable to get remote data return None
def get_positive_negative_chart_data(json, mapping): """ Compile data for the "Positive and negative tests by county" chart. """ log_status("get_positive_negative_chart_data()") # Define counties (in order) chart_counties = [ "Info puudulik", "Harjumaa", "Hiiumaa", "Ida-Virumaa", "Jõgevamaa", "Järvamaa", "Läänemaa", "Lääne-Virumaa", "Põlvamaa", "Pärnumaa", "Raplamaa", "Saaremaa", "Tartumaa", "Valgamaa", "Viljandimaa", "Võrumaa", ] results = [d["ResultValue"] for d in json] county = [mapping[d["County"]] for d in json] df = pd.DataFrame({"County": county, "ResultValue": results}) pos_results = df[df.ResultValue == "P"].groupby("County").count() neg_results = df[df.ResultValue == "N"].groupby("County").count() pos_results.rename(columns={"ResultValue": "Positive"}, inplace=True) neg_results.rename(columns={"ResultValue": "Negative"}, inplace=True) end_df = pos_results.join(neg_results, how="outer") end_df.fillna(0, inplace=True) end_df["Positive"] = end_df[["Positive"]].astype("int") county_positive = [] county_negative = [] for order in chart_counties: county_positive.append(end_df.loc[order, "Positive"]) county_negative.append(end_df.loc[order, "Negative"]) end_output = {"negative": county_negative, "positive": county_positive} return end_output
def get_municipality_data(json_test_location, county_mapping): log_status("get_municipality_data()") municipalities_array = [] yesterday = datetime.strftime(datetime.today() - timedelta(1), "%Y-%m-%d") communes_that_are_summed = ["Tallinn", "Pärnu linn", "Saaremaa vald"] communes_that_are_summed_data = {} for result in json_test_location: if result["StatisticsDate"] == yesterday and result["ResultValue"] == "P": if result["Commune"] in communes_that_are_summed: if result["Commune"] in communes_that_are_summed_data: communes_that_are_summed_data[result["Commune"]]["range_start"] += result["TotalCasesFrom"] communes_that_are_summed_data[result["Commune"]]["range_end"] += result["TotalCasesTo"] else: communes_that_are_summed_data[result["Commune"]] = { "range_start": result["TotalCasesFrom"], "range_end": result["TotalCasesTo"], "County": county_mapping[result["County"]], "Commune": result["Commune"], "ResultValue": result["ResultValue"], } else: county = county_mapping[result["County"]] municipalities_array.append( [ county, result["Commune"], result["Village"], result["ResultValue"], result["TotalCasesFrom"], result["TotalCasesTo"], ] ) for commune_index in communes_that_are_summed_data: commune = communes_that_are_summed_data[commune_index] if commune["range_end"] > 0: municipalities_array.append( [ commune["County"], commune["Commune"], "", commune["ResultValue"], commune["range_start"], commune["range_end"], ] ) municipalities_json = {"municipalitiesData": municipalities_array} return municipalities_json
def get_on_ventilation_data(json): log_status("get_on_ventilation_data()") if type(json) is not list: return False output = {} data = get_dict_with_dates_and_key(json, "IsOnVentilation") for day in data: if data[day] is None: data[day] = "0" if isinstance(data[day], str): output[day] = int(data[day]) return output
def valid_epoch(ep, model_object, model, test_loader): model.eval() model_object.criterion.eval() valid_start = time.time() valid_timer = Timer('M') preds, targets = [], [] model_object.valid_formatter = InferenceFormatter( ) # formatter for debugging purpose model_object.valid_formatter_target = InferenceFormatter( ) # formatter for debugging purpose model_object.test_metrics.reset() for datum in test_loader: loss, prob, tgt, meta = predict(ep, model_object, model, datum) model_object.test_metrics.collect(y_prob=prob, y_true=tgt) model_object.valid_formatter.collect( prob, meta) # formatter for debugging purpose model_object.valid_formatter_target.collect( tgt, meta) # formatter for debugging purpose valid_loss = model_object.criterion.get_running_loss() valid_auc = model_object.test_metrics.roc_auc() valid_acc = model_object.test_metrics.accuracy() valid_ap = model_object.test_metrics.average_precision() status = '[epoch {}], valid loss: {:.5f}, valid acc: {:.5f}, valid auc: {:.5f}, valid ap: {:0.5f}, time taken: {:.2f} mins' status = status.format(ep, valid_loss, valid_acc, valid_auc, valid_ap, valid_timer.time()) if model_object.log_path: log_status(model_object.log_path, status, init=False) if model_object.verbose > 0 else None log_status(model_object.log_path, str( model_object.test_metrics.top_k_percentile( k=20, step=1).to_string()), init=False) if model_object.verbose > 1 else None model_object.early_stopper(model_object.criterion.get_last_valid_loss(), model, model_object) # reset validation losses and set criterion to training mode model_object.criterion.reset_test() model_object.criterion.train()
def get_tests_per_day_chart_data(json, dates): log_status("get_tests_per_day_chart_data()") # Count totals for every day date_counts = defaultdict(int) date_positive = defaultdict(int) dates_within_range = set([str(date.date()) for date in list(dates)]) date_start = str(dates[0].date()) count_before_date_range = 0 count_positive_before_date_range = 0 for res in json: date = str(pd.to_datetime(res["StatisticsDate"]).date()) if date in dates_within_range: date_counts[date] += 1 if res["ResultValue"] == "P": date_positive[date] += 1 elif date < date_start: # Hack to count dates before our range count_before_date_range += 1 if res["ResultValue"] == "P": count_positive_before_date_range += 1 tests = [] positive_tests = [] for date in dates: tests.append(date_counts[str(date.date())]) positive_tests.append(date_positive[str(date.date())]) # Add everything before first date to first date tests[0] += count_before_date_range positive_tests[0] += count_positive_before_date_range positive_test_percentage = ( list(np.round(np.array(positive_tests) / np.array(tests) * 100, 2)), ) return_json = { "positiveTestsPerDay": positive_tests, "negativeTestsPerDay": list(np.array(tests) - np.array(positive_tests)), "positiveTestsPercentage": positive_test_percentage[0], "positiveTestAverage14Percent": np.round( np.average(positive_test_percentage[0][-14:]), 2 ), } return return_json
def get_vaccinated_people_chart_data(json, dates): log_status("get_vaccinated_people_chart_data()") date_counts_progress = defaultdict(int) date_counts_completed = defaultdict(int) date_counts = defaultdict(int) dates_within_range = set([str(date.date()) for date in list(dates)]) json_progress = [x for x in json if x["MeasurementType"] == "Vaccinated" and x["VaccinationSeries"] == 1] json_completed = [x for x in json if x["MeasurementType"] == "FullyVaccinated" and x["VaccinationSeries"] == 1] for res in json: date = res["StatisticsDate"] if date in dates_within_range: if res["MeasurementType"] == "FullyVaccinated" and res["VaccinationSeries"] == 1: date_counts_progress[date] -= res["DailyCount"] elif res["MeasurementType"] == "Vaccinated" and res["VaccinationSeries"] == 1: date_counts_progress[date] += res["DailyCount"] for res in json_completed: date = res["StatisticsDate"] if date in dates_within_range: date_counts_completed[date] += res["DailyCount"] for res in json_progress: date = res["StatisticsDate"] if date in dates_within_range: date_counts[date] += res["DailyCount"] vacc_progress = [] vacc_completed = [] vacc = [] for date in dates: vacc_progress.append(date_counts_progress[str(date.date())]) vacc_completed.append(date_counts_completed[str(date.date())]) vacc.append(date_counts[str(date.date())]) return_json = { "vaccinesProgress": list(np.cumsum(vacc_progress)), "vaccinesCompleted": list(np.cumsum(vacc_completed)), "vaccinesAll": list(np.cumsum(vacc)), } return return_json
def get_cumulative_cases_chart_data( test_results, dates, tests_per_day_data, ): log_status("get_cumulative_cases_chart_data()") date_counts = defaultdict(int) for res in test_results: if res["ResultValue"] == "P": date = res["StatisticsDate"] date_counts[date] += 1 confirmed_cases = [] for date in dates: confirmed_cases.append(date_counts[str(date.date())]) cases = np.cumsum(confirmed_cases) new_cases_14 = [tests_per_day_data["positiveTestsPerDay"][0]] for i in range(1, 14): new_cases_14.append( new_cases_14[i - 1] + tests_per_day_data["positiveTestsPerDay"][i] ) for i in range(14, len(tests_per_day_data["positiveTestsPerDay"])): new_cases_14.append( new_cases_14[i - 1] - tests_per_day_data["positiveTestsPerDay"][i - 14] + tests_per_day_data["positiveTestsPerDay"][i] ) estonian_population = 1_328_976 # From https://www.stat.ee/en/find-statistics/statistics-theme/population/population-figure per_100k_multiplier = 100_000 / estonian_population new_cases_14_per_100k = [ round(active_cases * per_100k_multiplier, 2) for active_cases in new_cases_14 ] cumulative_cases_chart_data = { "cases": list(cases), "active": new_cases_14, "active100k": new_cases_14_per_100k, } return cumulative_cases_chart_data
def scrape_deaths(): # Load content from Terviset's Covid dashboard and parse it log_status("Scraping data on deaths from " + TERVISEAMET_COVID_DASHBOARD) html = requests.get(TERVISEAMET_COVID_DASHBOARD).text soup = BeautifulSoup(html, "html.parser") # Extract number of deaths from page content and update JSON data on deaths deaths_container = soup.select(DEATHS_SELECTOR) if len(deaths_container) > 0: try: # Get number of deaths and the current date deaths_count = int(deaths_container[0].text.strip()) current_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") # Load existing deaths data json_deaths = read_json_from_file(DEATHS_PATH) # Add new entry to deaths data for current date deaths_output = {} if len(json_deaths): deaths_output = json_deaths deaths_output[current_date] = deaths_count # Save data on deaths save_as_json(DEATHS_PATH + ".tmp", deaths_output) # Log status log_status("Successfully scraped deaths. Total deaths: " + str(deaths_count)) except: # Log error error_message = "Error when scraping data on deaths" log_status(error_message + ":") log_status(traceback.format_exc()) raise Exception(error_message) else: # Log error error_message = "Error: could not find page element with data on deaths" log_status(error_message) raise Exception(error_message)
def get_infection_count_by_county(json, county_mapping) -> list: log_status("get_infection_count_by_county()") # Ordering of counties for chart map_counties = [ "Harjumaa", "Hiiumaa", "Ida-Virumaa", "Jõgevamaa", "Järvamaa", "Läänemaa", "Lääne-Virumaa", "Põlvamaa", "Pärnumaa", "Raplamaa", "Saaremaa", "Tartumaa", "Valgamaa", "Viljandimaa", "Võrumaa", ] # For performance, hashing set_map_counties = set(map_counties) counts = defaultdict(int) for res in json: # Check if test positive if res["ResultValue"] == "P": # Get county and map it, exclude unknown ones county = county_mapping[res["County"]] if county in set_map_counties: counts[county] += 1 # Create list of lists as in current json result_array = [[county, counts[county], county] for county in map_counties] return result_array
def get_hospital_data(json_hospitalisation, start_date): log_status("get_hospital_data()") hospitalizations = [] active_hospitalizations = [] intensive = [] start_date = datetime.strptime(start_date, "%Y-%m-%d") for result in json_hospitalisation: statistics_date = datetime.strptime(result["StatisticsDate"].split("T")[0], "%Y-%m-%d") if statistics_date >= start_date: hospitalizations += [int(result["Hospitalised"])] active_hospitalizations += [int(result["ActivelyHospitalised"])] if result["IsInIntensive"] != None: intensive += [int(result["IsInIntensive"])] else: intensive += [result["IsInIntensive"]] hospital_results = { "hospitalizations": hospitalizations, "activehospitalizations": active_hospitalizations, "intensive": intensive, } return hospital_results
def get_county_by_day(json, dates, county_mapping, county_sizes): log_status("get_county_by_day()") chart_counties = [ "Harjumaa", "Hiiumaa", "Ida-Virumaa", "Jõgevamaa", "Järvamaa", "Läänemaa", "Lääne-Virumaa", "Põlvamaa", "Pärnumaa", "Raplamaa", "Saaremaa", "Tartumaa", "Valgamaa", "Viljandimaa", "Võrumaa", ] county_date_counts = defaultdict(int) for res in json: if res["ResultValue"] == "P": date = res["StatisticsDate"] county = county_mapping[res["County"]] if county in chart_counties: county_date_counts[(county, date)] += 1 county_by_day = {} new_county_by_day = {} map_playback = [] map_playback_10k = [] for county in chart_counties: per_day_county = [] per_day_county_10k = [] for date in dates: val = county_date_counts[(county, str(date.date()))] per_day_county.append(val) per_day_county_10k.append((val / county_sizes[county] * 10000)) map_playback.append( { "MNIMI": county, "sequence": list(np.cumsum(per_day_county)), "drilldown": county, } ) map_playback_10k.append( { "MNIMI": county, "sequence": list(np.round(np.cumsum(per_day_county_10k), 2)), "drilldown": county, } ) # Calculate cumulative new_county_by_day[county] = list(per_day_county) county_by_day[county] = list(np.cumsum(per_day_county)) county_by_day_new = [ county_by_day[county][-1] - county_by_day[county][-2] for county in chart_counties ] county_list = { "countyByDay": county_by_day, "countyByDayNew": county_by_day_new, "newCountyByDay": new_county_by_day, "mapPlayback": map_playback, "mapPlayback10k": map_playback_10k, } return county_list
def main(): # Log status log_status("Starting to generate chart data at " + str(TODAY_DMYHM)) # 1. Create date ranges for charts log_status("Creating date ranges for charts") case_dates = pd.date_range(start=DATE_SETTINGS["first_case_date"], end=YESTERDAY_YMD) vaccination_dates = pd.date_range(start=DATE_SETTINGS["vaccination_start_date"], end=YESTERDAY_YMD) # 2. Calculate data related to deaths try: deaths = read_json_from_file(DEATHS_PATH) manual_data = read_json_from_file(MANUAL_DATA_PATH) except: # Log error log_status('Error when loading local data:') log_status(traceback.format_exc()) exit() log_status("Calculating data related to deaths") manual_data["deceased"].update(deaths) deceased = list(manual_data["deceased"].values()) n_deaths = deceased[-1] n_deaths_change = int(deceased[-1]) - int(deceased[-2]) # 3. Calculate data related to test results # Define columns to import column_list = [ 'Gender', 'AgeGroup', 'County', 'ResultValue', 'StatisticsDate' ] test_results = get_json_from_csv_file(TEST_RESULTS_PATH, column_list) log_status("Calculating data related to test results") # Find count of confirmed cases n_confirmed_cases = np.sum([res["ResultValue"] == "P" for res in test_results]) # Find total number of tests n_tests_administered = len(test_results) log_status("Total number of tests: " + str(n_tests_administered)) infections_by_county = get_infection_count_by_county(test_results, county_mapping) county_by_day = get_county_by_day(test_results, case_dates, county_mapping, county_sizes) confirmed_cases_by_county = get_confirmed_cases_by_county(test_results, county_mapping) tests_per_day_chart_data = get_tests_per_day_chart_data(test_results, case_dates) cumulative_cases_chart_data = get_cumulative_cases_chart_data( test_results, case_dates, tests_per_day_chart_data ) cumulative_tests_chart_data = get_cumulative_tests_chart_data(test_results, case_dates) positive_test_by_age_chart_data = get_positive_tests_by_age_chart_data(test_results) positive_negative_chart_data = get_positive_negative_chart_data(test_results, county_mapping) county_daily_active = get_county_daily_active(test_results, case_dates, county_mapping, county_sizes) # Delete test result data from memory del test_results infections_by_county_10000 = get_infections_data_by_count_10000(infections_by_county, county_sizes) tests_pop_ratio = get_test_data_pop_ratio(infections_by_county_10000) new_cases_per_day_chart_data = get_new_cases_per_day_chart_data(cumulative_cases_chart_data) n_active_cases = cumulative_cases_chart_data["active"][-1] n_active_cases_change = (cumulative_cases_chart_data["active"][-1] - cumulative_cases_chart_data["active"][-2]) per_100k = cumulative_cases_chart_data["active100k"][-1] active_infections_by_county = [ {"MNIMI": k, "sequence": v, "drilldown": k} for k, v in county_daily_active["countyByDayActive"].items() ] active_infections_by_county_100k = [ [k, round(v[-1] / county_sizes[k] * 100000, 2)] for k, v in county_daily_active["countyByDayActive"].items() ] # 4. Calculate data related to test locations test_locations = read_json_from_file(TEST_LOCATIONS_PATH) municipalities_data = get_municipality_data(test_locations, county_mapping) # Delete test location data from memory del test_locations # 5. Calculate data related to hospitalisation hospitalization = read_json_from_file(HOSPITALIZATION_PATH) log_status("Calculating data related to hospitalisation") # Set hospitalised and ICU time-series hospital = get_hospital_data(hospitalization, DATE_SETTINGS["first_case_date"]) # TODO: Based on cross-checking with the hospitalisation data published by TEHIK, the data listed # in the manual_data.json file with the field name "intensive" appears to show the number # of patients on ventilation. We should fix the terminology and make sure that the intensive # and on ventilation statistics are being calculated correctly. intensive = list(get_in_intensive_data(hospitalization, manual_data["intensive"]).values()) on_ventilation = list(get_on_ventilation_data(hospitalization).values()) # Delete hospitalization data from memory del hospitalization hospitalised = hospital["activehospitalizations"] n_on_ventilation = on_ventilation[-1] n_on_ventilation_change = int(on_ventilation[-1]) - int(on_ventilation[-2]) # 6. Calculate data related to vaccination vaccination = read_json_from_file(VACCINATIONS_PATH) log_status("Calculating data related to vaccination") vaccinated_people_chart_data = get_vaccinated_people_chart_data(vaccination, vaccination_dates) last_day_vaccination_data = [x for x in vaccination if x["MeasurementType"] == "Vaccinated" and x["VaccinationSeries"] == 1][-1] last_day_completed_vaccination_data = [x for x in vaccination if x["MeasurementType"] == "FullyVaccinated" and x["VaccinationSeries"] == 1][-1] last_day_doses_administered_data = [x for x in vaccination if x["MeasurementType"] == "DosesAdministered" and x["VaccinationSeries"] == 1][-1] # Delete vaccination data from memory del vaccination n_fully_vaccinated = last_day_completed_vaccination_data["TotalCount"] n_fully_vaccinated_change = last_day_completed_vaccination_data["DailyCount"] n_fully_vaccinated_percentage = last_day_completed_vaccination_data["PopulationCoverage"] n_vaccinated_at_least_one_dose = last_day_vaccination_data["TotalCount"] n_vaccinated_at_least_one_dose_change = last_day_vaccination_data["DailyCount"] n_vaccinated_at_least_one_dose_percentage = last_day_vaccination_data["PopulationCoverage"] # vaccination_number_total = (n_vaccinated_at_least_one_dose - n_fully_vaccinated) # vaccination_number_last_day = (n_vaccinated_at_least_one_dose_change - n_fully_vaccinated_change) # 7. Create and save final JSON log_status("Compiling final JSON") final_json = { "updatedOn": TODAY_DMYHM, "confirmedCasesNumber": str(n_confirmed_cases), # TODO: For consistency, we should include the change in the number of confirmed cases as well. "hospitalisedNumber": str(hospital["activehospitalizations"][-1]), "hospitalChanged": str(hospital["activehospitalizations"][-1] - hospital["activehospitalizations"][-2]), "onVentilation": on_ventilation, "onVentilationNumber": n_on_ventilation, "onVentilationChanged": n_on_ventilation_change, "deceased": deceased, "deceasedNumber": str(n_deaths), "deceasedChanged": str(n_deaths_change), "testsAdministeredNumber": str(n_tests_administered), # TODO: For consistency, we should include the change in the number of tests as well. "activeCasesNumber": str(n_active_cases), "activeChanged": str(n_active_cases_change), "perHundred": str(per_100k), # TODO: This should be given a clearer name. "dates2": [str(x.date()) for x in case_dates], # TODO: Change key to "caseDates" "dates3": [str(x.date()) for x in vaccination_dates], # TODO: Change key to "vaccinationDates" "counties": counties, "age_groups": age_groups, "dataInfectionsByCounty": infections_by_county, "dataInfectionsByCounty10000": infections_by_county_10000, "dataActiveInfectionsByCounty100k": active_infections_by_county_100k, "dataActiveInfectionsByCounty": active_infections_by_county, "dataTestsPopRatio": tests_pop_ratio, "countyByDay": county_by_day, "dataCountyDailyActive": county_daily_active, "dataConfirmedCasesByCounty": confirmed_cases_by_county, "dataCumulativeCasesChart": cumulative_cases_chart_data, "dataNewCasesPerDayChart": new_cases_per_day_chart_data, "dataCumulativeTestsChart": cumulative_tests_chart_data, "dataTestsPerDayChart": tests_per_day_chart_data, "dataPositiveTestsByAgeChart": positive_test_by_age_chart_data, "dataPositiveNegativeChart": positive_negative_chart_data, "dataVaccinatedPeopleChart": vaccinated_people_chart_data, "dataMunicipalities": municipalities_data, "hospital": hospital, # TODO: Rename this to make it clearer what data it contains. # "vaccinationNumberTotal": vaccination_number_total, # "vaccinationNumberLastDay": vaccination_number_last_day, "fullyVaccinatedNumber": n_fully_vaccinated, "fullyVaccinatedNumberChange": n_fully_vaccinated_change, "fullyVaccinatedNumberPercentage": n_fully_vaccinated_percentage, "vaccinatedAtLeastOneDoseNumber": n_vaccinated_at_least_one_dose, "vaccinatedAtLeastOneDoseChange": n_vaccinated_at_least_one_dose_change, "vaccinatedAtLeastOneDosePercentage": n_vaccinated_at_least_one_dose_percentage, } # Dump JSON output log_status("Dumping JSON output") save_as_json(OUTPUT_FILE_LOCATION, final_json) # Log finish time finish = datetime.today().astimezone(ESTONIA_TIMEZONE).strftime("%d/%m/%Y, %H:%M") log_status("Finished update process at " + finish)
args.task_train_steps, args.multi_head) results = model(task_batch, meta_train=True) train_step += 1 if train_step % args.write_ival == 0: write_train_res(results, train_step, log_dir) if train_step % args.test_ival == 0: evaluate(model, 'val', train_step) pp("Resuming training") if args.log_ival > 0 and train_step % args.log_ival == 0: t = (time.time() - t) / args.log_ival log_status(results, 'step={}'.format(train_step), t) t = time.time() if results.train_loss != results.train_loss: break if train_step == args.meta_train_steps: break except KeyboardInterrupt: pp('Meta-training stopped.') else: pp('Meta-training complete.') try: model = torch.load(join(log_dir, 'model.pth.tar'))
def train(args): # make dataset for train and validation assert args.lr_train_path is not None assert args.hr_train_path is not None assert args.lr_val_path is not None assert args.hr_val_path is not None # patch the train data for training train_dataset = SRDataset(lr_path=args.lr_train_path, hr_path=args.hr_train_path, patch_size=args.patch_size, scale=args.scale, aug=args.augment, normalization=args.normalization, need_patch=True, suffix=args.suffix) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.n_threads) val_dataset = SRDataset(lr_path=args.lr_val_path, hr_path=args.hr_val_path, patch_size=args.patch_size, scale=args.scale, normalization=args.normalization, need_patch=True, suffix=args.suffix) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.n_threads) # chech log check_logs(args) writer = SummaryWriter(log_dir=args.tblog) # check for gpu device = check_hardware(args) # check the model module = import_module('model.' + args.model.lower()) model = module.wrapper(args) # continue train or not start_epoch = 0 best_val_psnr = -1.0 best_val_loss = 1e8 if args.continue_train: status_ = load_status(args.status_logger) args.lr = status_['lr'] start_epoch = status_['epoch'] best_val_loss = status_['best_val_loss'] pretrained_dict = torch.load(status_['last_weight_pth']) model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) logger.info( f"Load model from {status_['last_weight_pth']} for continuing train." ) if not args.cpu: model = model.to(device) # check the optimizer optimizer = check_optimizer_(args, model) # check the lr schedule lr_schedule = StepLR(optimizer, args.decay_step, args.gamma) # check the loss criterion = check_loss_(args) # for iteration to train the model and validation for every epoch for epoch in range(start_epoch, args.epochs): torch.cuda.empty_cache() train_loss = 0.0 model.train() for batch, data in enumerate(train_dataloader): x = data['lr'] y = data['hr'] x = x.to(device) y = y.to(device) # perform forward calculation y_hat = model(x) loss_ = criterion(y_hat, y) train_loss += loss_.item() logger.info("Epoch-%d-Batch-%d, train loss: %.4f" % (epoch, batch, loss_.item())) writer.add_scalar(f'Train/Batchloss', loss_.item(), global_step=epoch * (len(train_dataloader)) + batch) # perform backward calculation optimizer.zero_grad() loss_.backward() # perform gradient clipping if args.gclip > 0: nn.utils.clip_grad_value_(model.parameters(), args.gclip) optimizer.step() train_loss = train_loss / (batch + 1) logger.info("Epoch-%d, train loss: %.4f" % (epoch, train_loss)) writer.add_scalar(f'Train/Epochloss', train_loss, global_step=epoch) # validation model.eval() with torch.no_grad(): val_loss = 0.0 val_psnr = 0.0 for batch, data in enumerate(val_dataloader): x = data['lr'] y = data['hr'] x = x.to(device) y = y.to(device) y_hat = model(x) loss_ = criterion(y_hat, y) val_loss += loss_.item() # save the intermedia result for visualization y = y[0].detach().cpu().numpy() y_hat = y_hat[0].detach().cpu().numpy() y = np.transpose(y, (1, 2, 0)) y_hat = np.transpose(y_hat, (1, 2, 0)) # if args.normalization == 1: # y = y * 255.0 # y_hat = y_hat * 255.0 y = denormalize_(y, args.normalization) y_hat = denormalize_(y_hat, args.normalization) # clip is really important, otherwise the anomaly rgb noise data exists y = np.clip(y, 0.0, 255.0) y_hat = np.clip(y_hat, 0.0, 255.0) _res = np.concatenate([y_hat, y], axis=1).astype(np.uint8) cv2.imwrite( os.path.join(args.log_img_root, f'{epoch}_{batch}.png'), _res) val_loss = val_loss / (batch + 1) logger.info("Epoch-%d, validation loss: %.4f" % (epoch, val_loss)) writer.add_scalar(f'Val/loss', val_loss, global_step=epoch) # adjust the learning rate lr_schedule.step(epoch=epoch) writer.add_scalar(f'Train/lr', lr_schedule.get_lr()[0], global_step=epoch) # save the best validation psnr model parameters if best_val_loss > val_loss: best_val_loss = val_loss model.eval().cpu() torch.save(model.state_dict(), args.weight_pth) logger.info(f"Save {args.weight_pth}") model.to(device).train() # log the training status model.eval().cpu() torch.save(model.state_dict(), args.status_pth) model.to(device).train() status_ = { 'epoch': epoch, 'lr': lr_schedule.get_lr()[0], 'best_val_loss': best_val_loss, 'last_weight_pth': args.status_pth, } log_status(args.status_logger, **status_)
def main(): # Log status log_status("Starting data update process at " + str(today)) # Get current number of deaths from Terviseamet's Covid dashboard try: scrape_deaths() except: log_status("Aborting data update.") exit() # Load data from external services log_status("Downloading data from TEHIK: Test results") json_testing = get_json_data(TESTING_ENDPOINT) log_status("Downloading data from TEHIK: Location data") json_test_location = get_json_data(TEST_LOCATION_ENDPOINT) log_status("Downloading data from TEHIK: Hospitalisation data") json_hospitalisation = get_json_data(HOSPITALISATION_ENDPOINT) log_status("Downloading data from TEHIK: Vaccination data") json_vaccination = get_json_data(VACCINATION_ENDPOINT) # Validate data from remote endpoints # TODO: Add checks that the testing and vaccination data are up to date. We will need to adopt # a different approach than for the test location and hospitalisation data due to the fact # that the data structure of the JSON is different. Checking the "Last-Modified" header of the # response may be the way to go and would handle the possibility that there are no tests or # vaccinations on a particular day. ok = True if json_testing is None: log_status("Unable to retrieve testing data") ok = False if json_test_location is None: log_status("Unable to retrieve location data") ok = False elif not is_up_to_date(json_test_location, "LastStatisticsDate"): log_status("Location data is not up-to-date") ok = False if json_hospitalisation is None: log_status("Unable to retrieve hospitalisation data") ok = False elif not is_up_to_date(json_hospitalisation, "LastLoadStatisticsDate"): log_status("Hospitalisation data is not up-to-date") ok = False if json_vaccination is None: log_status("Unable to retrieve vaccination data") ok = False # TODO: Review whether this check is needed. I have commented it out for now. # if not is_header_last_modified_up_to_date(TEST_LOCATION_ENDPOINT): # log_status("Location data last modified is not up-to-date") # ok = False if not ok: log_status( "One or more of the TEHIK APIs has not been updated or could not be retrieved." ) log_status("Aborting data update.") exit() # Load locally-stored data log_status("Loading local data files") try: json_deaths = read_json_from_file(DEATHS_FILE_LOCATION) json_manual = read_json_from_file(MANUAL_DATA_FILE_LOCATION) except: # Log error log_status('Error when loading local data:') log_status(traceback.format_exc()) exit() # Log status log_status("Calculating main statistics") # Statsbar # Find count of confirmed cases n_confirmed_cases = np.sum( [res["ResultValue"] == "P" for res in json_testing]) # Find total number of tests n_tests_administered = len(json_testing) # Create date ranges for charts # dates1 = pd.date_range(start=DATE_SETTINGS["dates1_start"], end=yesterday) dates2 = pd.date_range(start=DATE_SETTINGS["dates2_start"], end=yesterday) dates3 = pd.date_range(start=DATE_SETTINGS["dates3_start"], end=yesterday) # Set recovered, deceased, hospitalised and ICU time-series hospital = get_hospital_data(json_hospitalisation, DATE_SETTINGS["dates2_start"]) recovered = hospital["discharged"] json_manual["deceased"].update(json_deaths) deceased = list(json_manual["deceased"].values()) hospitalised = hospital["activehospitalizations"] # TODO: Based on cross-checking with the hospitalisation data publishedby TEHIK, the data listed # in the manual_data.json file with the field name "intensive" appears to show the number # of patients on ventilation. We should fix the terminology and make sure that the intensive # and on ventilation statistics are being calculated correctly. intensive = list( get_in_intensive_data(json_hospitalisation, json_manual["intensive"]).values()) on_ventilation = list( get_on_ventilation_data(json_hospitalisation).values()) n_deaths = deceased[-1] n_deaths_change = int(deceased[-1]) - int(deceased[-2]) # Get data for each chart log_status("Calculating data for charts") infections_by_county = get_infection_count_by_county( json_testing, county_mapping) infections_by_county_10000 = get_infections_data_by_count_10000( infections_by_county, county_sizes) tests_pop_ratio = get_test_data_pop_ratio(infections_by_county_10000) county_by_day = get_county_by_day(json_testing, dates2, county_mapping, county_sizes) confirmed_cases_by_county = get_confirmed_cases_by_county( json_testing, county_mapping) cumulative_cases_chart_data = get_cumulative_cases_chart_data( json_testing, recovered, deceased, hospitalised, intensive, on_ventilation, dates2) new_cases_per_day_chart_data = get_new_cases_per_day_chart_data( cumulative_cases_chart_data) cumulative_tests_chart_data = get_cumulative_tests_chart_data( json_testing, dates2) tests_per_day_chart_data = get_tests_per_day_chart_data( json_testing, dates2) positive_test_by_age_chart_data = get_positive_tests_by_age_chart_data( json_testing) positive_negative_chart_data = get_positive_negative_chart_data( json_testing, county_mapping) vaccinated_people_chart_data = get_vaccinated_people_chart_data( json_vaccination, dates3) county_daily_active = get_county_daily_active(json_testing, dates2, county_mapping, county_sizes) n_active_cases = cumulative_cases_chart_data["active"][-1] n_active_cases_change = (cumulative_cases_chart_data["active"][-1] - cumulative_cases_chart_data["active"][-2]) active_infections_by_county = [{ "MNIMI": k, "sequence": v, "drilldown": k } for k, v in county_daily_active["countyByDayActive"].items()] active_infections_by_county_100k = [[ k, round(v[-1] / county_sizes[k] * 100000, 2) ] for k, v in county_daily_active["countyByDayActive"].items()] municipalities_data = get_municipality_data(json_test_location, county_mapping) per_100k = cumulative_cases_chart_data["active100k"][-1] # Calculate vaccination data log_status("Calculating vaccination data") last_day_vaccination_data = [ x for x in json_vaccination if x["MeasurementType"] == "Vaccinated" ][-1] last_day_completed_vaccination_data = [ x for x in json_vaccination if x["MeasurementType"] == "FullyVaccinated" ][-1] # TODO: Doses administered # last_day_doses_administered_data = [x for x in json_vaccination if x['MeasurementType'] == 'DosesAdministered'][-1] completed_vaccination_number_total = last_day_completed_vaccination_data[ "TotalCount"] completed_vaccination_number_last_day = last_day_completed_vaccination_data[ "DailyCount"] all_vaccination_number_total = last_day_vaccination_data["TotalCount"] all_vaccination_number_last_day = last_day_vaccination_data["DailyCount"] vaccination_number_total = (all_vaccination_number_total - completed_vaccination_number_total) vaccination_number_last_day = (all_vaccination_number_last_day - completed_vaccination_number_last_day) fully_vaccinated_from_total_vaccinated_percentage = round( completed_vaccination_number_total * 100 / (all_vaccination_number_total), 2) # Create dictionary for final JSON log_status("Compiling final JSON") final_json = { "updatedOn": today, "confirmedCasesNumber": str(n_confirmed_cases), # TODO: For consistency, we should include the change in the number of confirmed cases as well. "hospitalisedNumber": str(hospital["activehospitalizations"][-1]), "hospitalChanged": str(hospital["activehospitalizations"][-1] - hospital["activehospitalizations"][-2]), "deceasedNumber": str(n_deaths), "deceasedChanged": str(n_deaths_change), "recoveredNumber": str(hospital["discharged"][-1]), "recoveredChanged": str(hospital["discharged"][-1] - hospital["discharged"][-2]), "testsAdministeredNumber": str(n_tests_administered), # TODO: For consistency, we should include the change in the number of tests as well. "activeCasesNumber": str(n_active_cases), "activeChanged": str(n_active_cases_change), "perHundred": str(per_100k), # TODO: This should be given a clearer name. # TODO: I can't find anywhere in the app where "dates1" is used. Is it needed? Commented out for now. # "dates1": [str(x.date()) for x in dates1], "dates2": [str(x.date()) for x in dates2], "dates3": [str(x.date()) for x in dates3], "counties": counties, "age_groups": age_groups, "dataInfectionsByCounty": infections_by_county, "dataInfectionsByCounty10000": infections_by_county_10000, "dataActiveInfectionsByCounty100k": active_infections_by_county_100k, "dataActiveInfectionsByCounty": active_infections_by_county, "dataTestsPopRatio": tests_pop_ratio, "countyByDay": county_by_day, "dataCountyDailyActive": county_daily_active, "dataConfirmedCasesByCounties": confirmed_cases_by_county, "dataCumulativeCasesChart": cumulative_cases_chart_data, "dataNewCasesPerDayChart": new_cases_per_day_chart_data, "dataCumulativeTestsChart": cumulative_tests_chart_data, "dataTestsPerDayChart": tests_per_day_chart_data, "dataPositiveTestsByAgeChart": positive_test_by_age_chart_data, "dataPositiveNegativeChart": positive_negative_chart_data, "dataVaccinatedPeopleChart": vaccinated_people_chart_data, "dataMunicipalities": municipalities_data, "hospital": hospital, # TODO: Rename this to make it clearer what data it contains. "vaccinationNumberTotal": vaccination_number_total, "vaccinationNumberLastDay": vaccination_number_last_day, "completedVaccinationNumberTotal": completed_vaccination_number_total, "completedVaccinationNumberLastDay": completed_vaccination_number_last_day, "allVaccinationNumberTotal": all_vaccination_number_total, "allVaccinationNumberLastDay": all_vaccination_number_last_day, "allVaccinationFromPopulationPercentage": last_day_vaccination_data["PopulationCoverage"], "completelyVaccinatedFromTotalVaccinatedPercentage": fully_vaccinated_from_total_vaccinated_percentage, } # Dump JSON output log_status("Dumping JSON output") save_as_json(OUTPUT_FILE_LOCATION, final_json) # Log finish time finish = datetime.today().astimezone(estonian_timezone).strftime( "%d/%m/%Y, %H:%M") log_status("Finished update process at " + finish)
def get_positive_tests_by_age_chart_data(json): log_status("get_positive_tests_by_age_chart_data()") results = [d["ResultValue"] for d in json] genders = [d["Gender"] for d in json] age_groups = [d["AgeGroup"] for d in json] df = pd.DataFrame( {"Gender": genders, "AgeGroup": age_groups, "ResultValue": results} ) pos_results = df[df.ResultValue == "P"].groupby(["Gender", "AgeGroup"]).count() neg_results = df[df.ResultValue == "N"].groupby(["Gender", "AgeGroup"]).count() pos_results.rename(columns={"ResultValue": "Positive"}, inplace=True) neg_results.rename(columns={"ResultValue": "Negative"}, inplace=True) end_df = pos_results.join(neg_results, how="outer") end_df.fillna(0, inplace=True) end_df["Positive"] = end_df[["Positive"]].astype("int") male_order = [ ("M", "0-4"), ("M", "5-9"), ("M", "10-14"), ("M", "15-19"), ("M", "20-24"), ("M", "25-29"), ("M", "30-34"), ("M", "35-39"), ("M", "40-44"), ("M", "45-49"), ("M", "50-54"), ("M", "55-59"), ("M", "60-64"), ("M", "65-69"), ("M", "70-74"), ("M", "75-79"), ("M", "80-84"), ("M", "üle 85"), ] female_order = [ ("N", "0-4"), ("N", "5-9"), ("N", "10-14"), ("N", "15-19"), ("N", "20-24"), ("N", "25-29"), ("N", "30-34"), ("N", "35-39"), ("N", "40-44"), ("N", "45-49"), ("N", "50-54"), ("N", "55-59"), ("N", "60-64"), ("N", "65-69"), ("N", "70-74"), ("N", "75-79"), ("N", "80-84"), ("N", "üle 85"), ] # Create male positive and negative lists malePositive = [] maleNegative = [] femalePositive = [] femaleNegative = [] for i in range(len(male_order)): malePositive.append(end_df.loc[male_order[i], "Positive"]) maleNegative.append(end_df.loc[male_order[i], "Negative"]) femalePositive.append(end_df.loc[female_order[i], "Positive"]) femaleNegative.append(end_df.loc[female_order[i], "Negative"]) femaleTotal = sum(femalePositive) + sum(femaleNegative) maleTotal = sum(malePositive) + sum(maleNegative) malePositiveTotal = sum(malePositive) femalePositiveTotal = sum(femalePositive) maleNegativeTotal = sum(maleNegative) femaleNegativeTotal = sum(femaleNegative) end_result = { "malePositive": malePositive, "maleNegative": maleNegative, "maleTotal": maleTotal, "malePositiveTotal": malePositiveTotal, "maleNegativeTotal": maleNegativeTotal, "femalePositive": femalePositive, "femaleNegative": femaleNegative, "femaleTotal": femaleTotal, "femalePositiveTotal": femalePositiveTotal, "femaleNegativeTotal": femaleNegativeTotal, } return end_result
def main(): deaths = False try: scrape_deaths() deaths = True except: log_status("Failed to scrape deaths") pass # Download data from external services log_status("Downloading data from TEHIK: Test results") download_data(TESTING_ENDPOINT, TEST_RESULTS_PATH + ".tmp") log_status("Downloading data from TEHIK: Location data") download_data(TEST_LOCATION_ENDPOINT, TEST_LOCATIONS_PATH + ".tmp") log_status("Downloading data from TEHIK: Hospitalization data") download_data(HOSPITALIZATION_ENDPOINT, HOSPITALIZATION_PATH + ".tmp") log_status("Downloading data from TEHIK: Vaccination data") download_data(VACCINATION_ENDPOINT, VACCINATIONS_PATH + ".tmp") # Validate data from remote endpoints # # TODO: Add checks that the testing and vaccination data are up to date. We will need to adopt # a different approach than for the test location and hospitalisation data due to the fact # that the data structure of the JSON is different. Checking the "Last-Modified" header of the # response may be the way to go and would handle the possibility that there are no tests or # vaccinations on a particular day. hospitalization = read_json_from_file(HOSPITALIZATION_PATH + ".tmp") if not is_up_to_date(hospitalization, "LastLoadStatisticsDate"): raise Exception("Hospitalization data is not up-to-date") log_status("All OK, replacing old files with downloaded files") if deaths: move(DEATHS_PATH + ".tmp", DEATHS_PATH) move(TEST_RESULTS_PATH + ".tmp", TEST_RESULTS_PATH) move(TEST_LOCATIONS_PATH + ".tmp", TEST_LOCATIONS_PATH) move(HOSPITALIZATION_PATH + ".tmp", HOSPITALIZATION_PATH) move(VACCINATIONS_PATH + ".tmp", VACCINATIONS_PATH)
def get_infections_data_by_count_10000(infections_by_county, county_sizes): log_status("get_infections_data_by_count_10000()") return [ [county, round(value / county_sizes[county] * 10000, 2), county] for county, value, county in infections_by_county ]
def get_county_daily_active(json, dates, county_mapping, county_sizes): log_status("get_county_daily_active()") chart_counties = [ "Harjumaa", "Hiiumaa", "Ida-Virumaa", "Jõgevamaa", "Järvamaa", "Läänemaa", "Lääne-Virumaa", "Põlvamaa", "Pärnumaa", "Raplamaa", "Saaremaa", "Tartumaa", "Valgamaa", "Viljandimaa", "Võrumaa", ] county_date_counts = defaultdict(int) for res in json: if res["ResultValue"] == "P": date = res["StatisticsDate"] county = county_mapping[res["County"]] if county in chart_counties: county_date_counts[(county, date)] += 1 county_by_day = {} active_map_100k_playback = [] for county in chart_counties: per_day_county = [] active_per_day_county_100k = [] for date in dates: val = county_date_counts[(county, str(date.date()))] per_day_county.append(val) active_per_day_county_100k.append((val / county_sizes[county] * 100000)) # Calculate cumulative county_by_day[county] = list( map(int, pd.Series(per_day_county).rolling(14, min_periods=0).sum()) ) active_map_100k_playback.append( { "MNIMI": county, "sequence": list( round( pd.Series(active_per_day_county_100k) .rolling(14, min_periods=0) .sum(), 1, ) ), "drilldown": county, } ) active_list = { "countyByDayActive": county_by_day, "activeMap100kPlayback": active_map_100k_playback, } return active_list