def test_initial_window(self): data_manager = DataManager("data/test_data/sample_graph.json", 300) change_sets = data_manager.get_initial_window() assert (max_of_day(datetime( 2018, 11, 17)) == data_manager.get_first_included_date() ), "Sample graph starts on 17 Nov 2018, not on {}".format( data_manager.get_first_included_date()) assert ( max_of_day(datetime(2019, 9, 12)) == data_manager.get_last_included_date() ), "Sample graph's initial window ends on 12 Sep 2019, not on {}".format( data_manager.get_first_included_date()) expected_change_sets = [ ChangeSet( commit_hash="CS0", date=max_of_day(str_to_date("2018-11-17T12:00:00Z")), author="d1", issues=["I0"], code_changes=[CodeChange("F0", "ADD")], num_files_in_project=1, ), ChangeSet( commit_hash="CS1", date=max_of_day(str_to_date("2019-01-15T12:00:00Z")), author="d1", issues=["I1"], code_changes=[ CodeChange("F1", "ADD"), CodeChange("F2", "ADD") ], num_files_in_project=3, ), ChangeSet( commit_hash="CS2", date=max_of_day(str_to_date("2019-02-14T12:00:00Z")), author="d2", issues=["I2"], code_changes=[ CodeChange("F2", "MODIFY"), CodeChange("F3", "ADD") ], num_files_in_project=4, ), ChangeSet( commit_hash="CS3", date=max_of_day(str_to_date("2019-05-15T12:00:00Z")), author="d3", issues=["I2"], code_changes=[CodeChange("F4", "ADD")], num_files_in_project=5, ), ] assert change_sets == expected_change_sets, "Change sets are not as expected"
def from_json(json_obj: typing.Dict, chores_listing: typing.List[Chore], housemates: typing.List[Housemate]): base_chore_key: str = json_obj["base_chore_key"] base_chore: Chore = filter(lambda c: c.key == base_chore_key, chores_listing)[0] happened_on: date = util.str_to_date(json_obj["happened_on"]) was_deep_clean: bool = json_obj["was_deep_clean"] cleaner: Housemate = filter(lambda p: p.name == json_obj["cleaner_name"], housemates)[0] return ChoreOccurrence(base_chore, happened_on, was_deep_clean, cleaner)
def parse_adjustments(adjustments_path): dicts = parse_csv(adjustments_path) return [ AdjustmentRecord( day=str_to_date(d['day']), delta=str_to_timedelta(d['delta']), ) for d in dicts ]
def convert_str_value_to_correct_type(param_value, old_value, use_timedelta=False): """Convert param_value to the same type as old_value.""" for primitive_type in [bool, int, float]: if isinstance(old_value, primitive_type): return primitive_type(param_value) if isinstance(old_value, datetime.date): if use_timedelta: return datetime.timedelta(days=int(param_value)) return str_to_date(param_value) raise NotImplementedError(f"Unknown type for value: {type(old_value)}")
def _generate_date_to_change_sets(dataset_path): """ Generate a dictionary for the pairs of date and change sets committed that date. Returns ------- dict: A sorted (by date) dictionary for date and change sets pairs. """ if dataset_path in cache: return cache[dataset_path] with open(dataset_path, encoding="utf8") as f: change_set_jsons = json.load(f)["change_sets"] date_to_change_sets = defaultdict(list) for change_set_json in change_set_jsons: code_changes = [] for code_change in change_set_json["code_changes"]: cc = CodeChange( code_change["file_path"], code_change["change_type"], code_change.get("old_file_path", None), ) code_changes.append(cc) change_set = ChangeSet( change_set_json["commit_hash"], change_set_json["author"], max_of_day(str_to_date(change_set_json["date"])), change_set_json["issues"], code_changes, change_set_json["num_current_files"], ) date_to_change_sets[max_of_day(change_set.date)].append(change_set) # Fill the blanks with empty lists dates = list(date_to_change_sets) last_date = dates[-1] date = dates[0] while date < last_date: date_to_change_sets[date] date += timedelta(days=1) change_sets = sort_dict(date_to_change_sets) cache[dataset_path] = change_sets return change_sets
def convert_mean_params_to_params_dict(mean_params): """Convert list of [param_name, param_value_raw] pairs to dict of param_name to param_value. We also convert string dates to datetime objects Parameters ---------- mean_params : list list of [param_name, param_value_raw] pairs """ params_dict = {} for param_name, param_value_raw in mean_params: try: # attempt to convert to datetime.date object if it is a date params_dict[param_name] = str_to_date(param_value_raw) except (TypeError, ValueError): params_dict[param_name] = param_value_raw return params_dict
def main(): try: settings, args = process_command_line() except ValueError, value_error: logging.critical(value_error.message) print value_error.message sys.exit(1) conn = pyodbc.connect("DRIVER={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=" + PATH_TO_DB) cursor = conn.cursor() dates_to_parse = [] if settings.flag == "i": dates_to_parse = [date.fromordinal(str_to_date(settings.start_date).toordinal() + i) for i in xrange(str_to_date(settings.end_date).toordinal() - str_to_date(settings.start_date).toordinal() + 1)] elif settings.flag == "d": dates_to_parse = list( reversed([date.fromordinal(str_to_date(settings.start_date).toordinal() - i) for i in xrange(config.getint("Parse", "n_days_back") + 1)])) result_counter = 0 logging.info("Parsing has been started: starting_date=" + str(settings.start_date) + ".") for m in CURRENCY.keys(): for _date in dates_to_parse: try: result = soup_parse_result(m, _date) print result if result:
logger = init_logging() logger.info("log started") history_check_result = load_history_data(data_file_name) # turn it on when needed # history_check_result = de_dupe(history_check_result) if history_check_result: history_data, list_of_divisions = history_check_result previous_date = history_data[-1][0] logging.info('load history data, most recent data on day of{}'.format( previous_date)) # compare the date of most recent history data to the 1st day of current week, if the same, it's update to date, no need to run the query, otherwise # a generator is returned contains tuples of (new data, list of divisions) if datetime.date.today() - util.str_to_date( previous_date) > datetime.timedelta(days=13): logger.info('getting data since {} from DB'.format(previous_date)) gen_tuple_data_div = generator_weekly_data( db_config, date_since=util.str_to_date(previous_date)) else: logger.info( 'last reporting date {}, run report 7 days after '.format( previous_date)) update_history = False logger.info('no new data, exiting') else: #first time run, there is no pickle file created to hold the history data, history_data = [] gen_tuple_data_div = generator_weekly_data(db_config)
def main(args): country = args.country region = args.region subregion = args.subregion skip_hospitalizations = args.skip_hospitalizations quarantine_perc = args.quarantine_perc quarantine_effectiveness = args.quarantine_effectiveness verbose = args.verbose if country != "US" and not region: region = "ALL" best_params_type = args.best_params_type assert best_params_type in ["mean", "median", "top", "top10"], best_params_type if args.best_params_dir: # Load parameters from file best_params = load_best_params_from_file( current_path / args.best_params_dir, country, region, subregion ) simulation_start_date = str_to_date(best_params["first_date"]) simulation_create_date = str_to_date(best_params["date"]) simulation_end_date = str_to_date(best_params["projection_end_date"]) region_params = {"population": best_params["population"]} # mean_params, median_params, top_params, or top10_params params_type_name = f"{best_params_type}_params" if verbose: print("best params type:", best_params_type) params_dict = convert_mean_params_to_params_dict(best_params[params_type_name]) else: """ You can hard code your own parameters if you do not want to use the preset parameters. This can be especially useful for regions/countries where we do not have projections. Then simply run `python run_simulation.py -v` to use these parameters. """ simulation_start_date = datetime.date(2020, 2, 1) simulation_create_date = datetime.date.today() # not used so can also be None simulation_end_date = datetime.date(2020, 10, 1) region_params = {"population": 332000000} params_dict = { "INITIAL_R_0": 2.24, "LOCKDOWN_R_0": 0.9, "INFLECTION_DAY": datetime.date(2020, 3, 18), "RATE_OF_INFLECTION": 0.25, "LOCKDOWN_FATIGUE": 1.0, "DAILY_IMPORTS": 500, "MORTALITY_RATE": 0.01, "REOPEN_DATE": datetime.date(2020, 5, 20), "REOPEN_SHIFT_DAYS": 0, "REOPEN_R": 1.2, "REOPEN_INFLECTION": 0.3, "POST_REOPEN_EQUILIBRIUM_R": 1.0, "FALL_R_MULTIPLIER": 1.001, } if args.simulation_start_date: simulation_start_date = str_to_date(args.simulation_start_date) if args.simulation_end_date: simulation_end_date = str_to_date(args.simulation_end_date) if args.set_param: print("---------------------------------------") print("Overwriting params from command line...") for param_name, param_value in args.set_param: assert param_name in params_dict, f"Unrecognized param: {param_name}" old_value = params_dict[param_name] new_value = convert_str_value_to_correct_type(param_value, old_value) print(f"Setting {param_name} to: {new_value}") params_dict[param_name] = new_value if args.change_param: print("---------------------------------------") print("Changing params from command line...") for param_name, value_change in args.change_param: assert param_name in params_dict, f"Unrecognized param: {param_name}" old_value = params_dict[param_name] new_value = old_value + convert_str_value_to_correct_type( value_change, old_value, use_timedelta=True ) print(f"Changing {param_name} from {old_value} to {new_value}") params_dict[param_name] = new_value region_model = RegionModel( country, region, subregion, simulation_start_date, simulation_create_date, simulation_end_date, region_params, compute_hospitalizations=(not skip_hospitalizations), ) if quarantine_perc > 0: print(f"Quarantine percentage: {quarantine_perc:.0%}") print(f"Quarantine effectiveness: {quarantine_effectiveness:.0%}") assert quarantine_effectiveness in [0.025, 0.1, 0.25, 0.5], ( "must specify --quarantine_effectiveness percentage." " Possible values: [0.025, 0.1, 0.25, 0.5]" ) quarantine_effectiveness_to_reduction_idx = {0.025: 0, 0.1: 1, 0.25: 2, 0.5: 3} region_model.quarantine_fraction = quarantine_perc region_model.reduction_idx = quarantine_effectiveness_to_reduction_idx[ quarantine_effectiveness ] if verbose: print("================================") print(region_model) print("================================") print("Parameters:") for param_name, param_value in params_dict.items(): print(f"{param_name:<25s} : {param_value}") real_death = [] real_death_all = [] with open( current_path / "../data/timeseries_prov/mortality_timeseries_prov.csv", "r" ) as csvfile: # reader = csv.reader(csvfile) reader = csv.DictReader(csvfile) for row in reader: if row["province"] == args.subregion: real_death.append(float(row["deaths"])) real_death_all.append(float(row["cumulative_deaths"])) real_date = datetime.date( int(row["date_death_report"][-4:]), int(row["date_death_report"][3:5]), int(row["date_death_report"][:2]), ) # rows= [row for row in reader] # Add params to region_model # for params_dict['REOPEN_SHIFT_DAYS'] in [12,3]: # params_dict['MORTALITY_RATE']=0.013 # params_dict['INITIAL_R_0']=1.79 # params_dict['LOCKDOWN_R_0']=0.76 # params_dict['DAILY_IMPORTS']=160 if verbose: print("--------------------------") print("Running simulation...") print("--------------------------") # Run simulation t = time.time() params_tups = tuple(params_dict.items()) # print(params_dict) region_model.init_params(params_tups) dates, infections, hospitalizations, deaths = run(region_model) deaths_total = deaths.cumsum() ind = np.where(dates == real_date)[0].item() deaths_proj = deaths_total[: ind + 1] if len(deaths_total[: ind + 1]) >= len(real_death_all): pad = len(deaths_proj) - len(real_death_all) real_death_all = np.array([0] * pad + real_death_all) if len(real_death_all) > len(deaths_total[: ind + 1]): real_death_all = np.array(real_death_all) pad = len(real_death_all) - len(deaths_total[: ind + 1]) deaths_proj = np.zeros(pad).extend(deaths_proj) days = len(real_death_all) best_error = 1 / days * sum((real_death_all - deaths_proj) ** 2) MR = params_dict["MORTALITY_RATE"] # =0.013 IR = params_dict["INITIAL_R_0"] # =1.79 LR = params_dict["LOCKDOWN_R_0"] # =0.76 DI = params_dict["DAILY_IMPORTS"] # =160 for params_dict["INITIAL_R_0"] in np.linspace(IR * 0.7, IR): for params_dict["LOCKDOWN_R_0"] in np.linspace(LR * 0.95, LR * 1.05, 20): for params_dict["DAILY_IMPORTS"] in np.linspace(DI, DI * 1.35): for params_dict["MORTALITY_RATE"] in np.linspace( MR * 1.5, MR * 2.5, 20 ): params_tups = tuple(params_dict.items()) region_model.init_params(params_tups) dates, infections, hospitalizations, deaths = run(region_model) deaths_total = deaths.cumsum() deaths_proj = deaths_total[: ind + 1] if len(real_death_all) > len(deaths_total[: ind + 1]): real_death_all = np.array(real_death_all) pad = len(real_death_all) - len(deaths_total[: ind + 1]) deaths_proj = np.zeros(pad).extend(deaths_total[: ind + 1]) error = 1 / days * sum((real_death_all - deaths_proj) ** 2) if error <= best_error: best_error = error inf_proj, hosp_proj, death_proj = ( infections, hospitalizations, deaths, ) best_parameters = { "INITIAL_R_0": params_dict["INITIAL_R_0"], "LOCKDOWN_R_0": params_dict["LOCKDOWN_R_0"], "DAILY_IMPORTS": params_dict["DAILY_IMPORTS"], "MORTALITY_RATE": params_dict["MORTALITY_RATE"], } print("Finding new optimum with error:", best_error) print(best_parameters) print(time.time() - t) """ The following are lists with length N, where N is the number of days from simulation_start_date to simulation_end_date. dates : datetime.date objects representing day i infections : number of new infections on day i hospitalizations : occupied hospital beds on day i deaths : number of new deaths on day i """ infections, hospitalizations, deaths = inf_proj, hosp_proj, death_proj assert len(dates) == len(infections) == len(hospitalizations) == len(deaths) assert dates[0] == simulation_start_date assert dates[-1] == simulation_end_date if verbose: infections_total = infections.cumsum() deaths_total = deaths.cumsum() for i in range(len(dates)): hospitalization_str = "" if not skip_hospitalizations: hospitalization_str = ( f"Hospital beds in use: {hospitalizations[i]:,.0f} - " ) daily_str = ( f"{i+1:<3} - {dates[i]} - " f"New / total infections: {infections[i]:,.0f} / {infections_total[i]:,.0f} - " f"{hospitalization_str}" f"New / total deaths: {deaths[i]:,.2f} / {deaths_total[i]:,.1f} - " f"Mean R: {region_model.effective_r_arr[i]:.3f} - " f"IFR: {region_model.ifr_arr[i]:.2%}" ) print(daily_str) # comment out to spare console buffer print("-------------------------------------") print(f"End of simulation : {region_model.projection_end_date}") print(f"Total infections : {infections.sum():,.0f}") if not skip_hospitalizations: print(f"Peak hospital beds used : {hospitalizations.max():,.0f}") print(f"Total deaths : {deaths.sum():,.0f}") plt.plot(deaths_total, color="blue", linewidth=3.0, linestyle="-.", label="Proj.") plt.plot(real_death_all, color="red", linewidth=3.0, linestyle="--", label="True") plt.xlabel("Days") plt.ylabel("Death") plt.title("Death Proj. Result") plt.legend(loc="best") plt.savefig( current_path / f"../output/Proj_{args.country}_{args.subregion}.png", dpi=1200 ) plt.close() if args.save_csv_fname: dates_str = np.array(list(map(str, dates))) combined_arr = np.vstack( ( dates_str, infections, hospitalizations, deaths, region_model.effective_r_arr, ) ).T headers = "dates,infections,hospitalizations,deaths,mean_r_t" np.savetxt( args.save_csv_fname, combined_arr, "%s", delimiter=",", header=headers ) print("----------\nSaved file to:", args.save_csv_fname)
def main(args): country = args.country region = args.region subregion = args.subregion skip_hospitalizations = args.skip_hospitalizations quarantine_perc = args.quarantine_perc quarantine_effectiveness = args.quarantine_effectiveness verbose = args.verbose if country != "US" and not region: region = "ALL" best_params_type = args.best_params_type assert best_params_type in ["mean", "median", "top", "top10"], best_params_type if args.best_params_dir: # Load parameters from file best_params = load_best_params_from_file( args.best_params_dir, country, region, subregion ) simulation_start_date = str_to_date(best_params["first_date"]) simulation_create_date = str_to_date(best_params["date"]) simulation_end_date = str_to_date(best_params["projection_end_date"]) region_params = {"population": best_params["population"]} # mean_params, median_params, top_params, or top10_params params_type_name = f"{best_params_type}_params" if verbose: print("best params type:", best_params_type) params_dict = convert_mean_params_to_params_dict(best_params[params_type_name]) else: """ You can hard code your own parameters if you do not want to use the preset parameters. This can be especially useful for regions/countries where we do not have projections. Then simply run `python run_simulation.py -v` to use these parameters. """ simulation_start_date = datetime.date(2020, 2, 1) simulation_create_date = datetime.date.today() # not used so can also be None simulation_end_date = datetime.date(2020, 10, 1) region_params = {"population": 332000000} params_dict = { "INITIAL_R_0": 2.24, "LOCKDOWN_R_0": 0.9, "INFLECTION_DAY": datetime.date(2020, 3, 18), "RATE_OF_INFLECTION": 0.25, "LOCKDOWN_FATIGUE": 1.0, "DAILY_IMPORTS": 500, "MORTALITY_RATE": 0.01, "REOPEN_DATE": datetime.date(2020, 5, 20), "REOPEN_SHIFT_DAYS": 0, "REOPEN_R": 1.2, "REOPEN_INFLECTION": 0.3, "POST_REOPEN_EQUILIBRIUM_R": 1.0, "FALL_R_MULTIPLIER": 1.001, } if args.simulation_start_date: simulation_start_date = str_to_date(args.simulation_start_date) if args.simulation_end_date: simulation_end_date = str_to_date(args.simulation_end_date) if args.set_param: print("---------------------------------------") print("Overwriting params from command line...") for param_name, param_value in args.set_param: assert param_name in params_dict, f"Unrecognized param: {param_name}" old_value = params_dict[param_name] new_value = convert_str_value_to_correct_type(param_value, old_value) print(f"Setting {param_name} to: {new_value}") params_dict[param_name] = new_value if args.change_param: print("---------------------------------------") print("Changing params from command line...") for param_name, value_change in args.change_param: assert param_name in params_dict, f"Unrecognized param: {param_name}" old_value = params_dict[param_name] new_value = old_value + convert_str_value_to_correct_type( value_change, old_value, use_timedelta=True ) print(f"Changing {param_name} from {old_value} to {new_value}") params_dict[param_name] = new_value region_model = RegionModel( country, region, subregion, simulation_start_date, simulation_create_date, simulation_end_date, region_params, compute_hospitalizations=(not skip_hospitalizations), ) if quarantine_perc > 0: print(f"Quarantine percentage: {quarantine_perc:.0%}") print(f"Quarantine effectiveness: {quarantine_effectiveness:.0%}") assert quarantine_effectiveness in [0.025, 0.1, 0.25, 0.5], ( "must specify --quarantine_effectiveness percentage." " Possible values: [0.025, 0.1, 0.25, 0.5]" ) quarantine_effectiveness_to_reduction_idx = {0.025: 0, 0.1: 1, 0.25: 2, 0.5: 3} region_model.quarantine_fraction = quarantine_perc region_model.reduction_idx = quarantine_effectiveness_to_reduction_idx[ quarantine_effectiveness ] if verbose: print("================================") print(region_model) print("================================") print("Parameters:") for param_name, param_value in params_dict.items(): print(f"{param_name:<25s} : {param_value}") # Add params to region_model params_tups = tuple(params_dict.items()) region_model.init_params(params_tups) if verbose: print("--------------------------") print("Running simulation...") print("--------------------------") # Run simulation dates, infections, hospitalizations, deaths = run(region_model) """ The following are lists with length N, where N is the number of days from simulation_start_date to simulation_end_date. dates : datetime.date objects representing day i infections : number of new infections on day i hospitalizations : occupied hospital beds on day i deaths : number of new deaths on day i """ assert len(dates) == len(infections) == len(hospitalizations) == len(deaths) assert dates[0] == simulation_start_date assert dates[-1] == simulation_end_date if verbose: infections_total = infections.cumsum() deaths_total = deaths.cumsum() for i in range(len(dates)): hospitalization_str = "" if not skip_hospitalizations: hospitalization_str = ( f"Hospital beds in use: {hospitalizations[i]:,.0f} - " ) daily_str = ( f"{i+1:<3} - {dates[i]} - " f"New / total infections: {infections[i]:,.0f} / {infections_total[i]:,.0f} - " f"{hospitalization_str}" f"New / total deaths: {deaths[i]:,.2f} / {deaths_total[i]:,.1f} - " f"Mean R: {region_model.effective_r_arr[i]:.3f} - " f"IFR: {region_model.ifr_arr[i]:.2%}" ) print(daily_str) # comment out to spare console buffer print("-------------------------------------") print(f"End of simulation : {region_model.projection_end_date}") print(f"Total infections : {infections.sum():,.0f}") if not skip_hospitalizations: print(f"Peak hospital beds used : {hospitalizations.max():,.0f}") print(f"Total deaths : {deaths.sum():,.0f}") if args.save_csv_fname: dates_str = np.array(list(map(str, dates))) combined_arr = np.vstack( ( dates_str, infections, hospitalizations, deaths, region_model.effective_r_arr, ) ).T headers = "dates,infections,hospitalizations,deaths,mean_r_t" np.savetxt( args.save_csv_fname, combined_arr, "%s", delimiter=",", header=headers ) print("----------\nSaved file to:", args.save_csv_fname)
def get_IRR(self, av_t1, av_t2, t1, t2): """ get IRR """ delta_days = (util.str_to_date(t2) - util.str_to_date(t1)).days return (av_t2 / av_t1)**(1.0 / (delta_days / 365.0)) - 1.0