def __init__( self, state, output_interval_days=4, run_mode="can-before", output_dir=None, jhu_dataset=None, cds_dataset=None, include_imputed=False, ): self.output_interval_days = output_interval_days self.state = state self.run_mode = RunMode(run_mode) self.include_imputed = include_imputed self.state_abbreviation = us.states.lookup(state).abbr self.population_data = FIPSPopulation.local().population() self.output_dir = output_dir self.jhu_local = jhu_dataset or JHUDataset.local() self.cds_dataset = cds_dataset or CDSDataset.local() self.county_timeseries = build_aggregate_county_data_frame(self.jhu_local, self.cds_dataset) self.county_timeseries["date"] = self.county_timeseries["date"].dt.normalize() self.state_timeseries = self.jhu_local.timeseries().state_data self.state_timeseries["date"] = self.state_timeseries["date"].dt.normalize() self.df_whitelist = load_data.load_whitelist() self.df_whitelist = self.df_whitelist[self.df_whitelist["inference_ok"] == True]
def __init__( self, state, output_interval_days=4, run_mode="can-before", output_dir=None, jhu_dataset=None, cds_dataset=None, include_imputed=False, ): self.output_interval_days = output_interval_days self.state = state self.run_mode = RunMode(run_mode) self.include_imputed = include_imputed self.state_abbreviation = us.states.lookup(state).abbr self.population_data = FIPSPopulation.local().population() self.output_dir = output_dir self.jhu_local = jhu_dataset or JHUDataset.local() self.cds_dataset = cds_dataset or CDSDataset.local() self.county_timeseries = build_aggregate_county_data_frame( self.jhu_local, self.cds_dataset) self.county_timeseries["date"] = self.county_timeseries[ "date"].dt.normalize() state_timeseries = self.jhu_local.timeseries().get_subset( AggregationLevel.STATE) self.state_timeseries = state_timeseries.data["date"].dt.normalize()
def _cache_global_datasets(): # Populate cache for combined latest and timeseries. Caching pre-fork # will make sure cache is populated for subprocesses. Return value # is not needed as the only goal is to populate the cache. combined_datasets.build_us_latest_with_all_fields() combined_datasets.build_us_timeseries_with_all_fields() global nyt_dataset, cds_dataset if cds_dataset is None: cds_dataset = CDSDataset.local() if nyt_dataset is None: nyt_dataset = NYTimesDataset.local()
def _cache_global_datasets(): global nyt_dataset, cds_dataset if cds_dataset is None: cds_dataset = CDSDataset.local() if nyt_dataset is None: nyt_dataset = NYTimesDataset.load()
def get_cds(): cds_df = CDSDataset.local().data cds_df["date"] = cds_df.date.apply(lambda x: x.strftime("%m/%d/%y")) cds_df = cds_df[CDSDataset.TEST_FIELDS] return cds_df