def parse_cities_request(self, response): cities = json.loads(response.body) today = date_utils.today() current_week = Week.fromdate(today) # We have to do different passes for 2019 and 2020, since the specific days of # the epidemiological week differs. # # The api seems to return the data from the current year as "2020", and the previous as "2019", # so we'll exploit that to extract the data only from the "2020" chart for city in cities: for year in [2020, 2019]: for weeknum in range(1, current_week.week): ep_week = Week(year, weeknum) # Cache more than 4 weeks ago should_cache = (current_week.week - weeknum) > 4 yield self.make_registral_request( city=city, ep_week=ep_week, callback=self.parse_registral_request, dont_cache=not should_cache, )
def __init__(self, modelweek=None, epiweek=None): from epiweeks import Week, Year self.refew = Week(1970, 1) self.refmodelweek = 0 if modelweek is None and epiweek is None: raise self.NoData("Please enter either a model week or an epiweek") elif modelweek is not None and epiweek is not None: raise self.Toomuchdata( "Please ejust one of a model week or an epiweek") elif modelweek is None: self.epiweek = epiweek if type(epiweek) is str: self.epiWeek = Week(int(epiweek[:4]), int(epiweek[4:])) elif type(epiweek) is int: epiweek = str(epiweek) self.epiWeek = Week(int(epiweek[:4]), int(epiweek[4:])) self.fromEpiWeek2ModelWeek() elif epiweek is None: self.modelweek = modelweek self.fromModelWeek2EpiWeek() self.epiWeek = Week(int(str(self.epiweek)[:4]), int(str(self.epiweek)[4:])) self.year = self.epiWeek.year self.week = self.epiWeek.week self.toFrom40Week()
def get_newunit(value): if value[0].isdecimal(): date = pd.to_datetime(value) if unit == 'week': epiweek = str(Week.fromdate(date, system="cdc")) # get epiweeks year, week = epiweek[:4], epiweek[-2:] if weekasdate in ['start', 'end']: if weekasdate == 'start': epiweek = str(Week(int(year), int(week)).startdate()) else: epiweek = str(Week(int(year), int(week)).enddate()) else: epiweek = year + '_' + 'EW' + week if epiweek not in time_cols: time_cols.append(epiweek) return epiweek elif unit == 'month': year_month = date.strftime("%Y-%m") if year_month not in time_cols: time_cols.append(year_month) return year_month elif unit == 'year': year = date.strftime("%Y") if year not in time_cols: time_cols.append(year) return year elif unit == 'full': return 'total' else: if unit == 'full': return 'total' else: return value
def week_value_to_week(value: int) -> Week: year, week = value // 100, value % 100 if year < date.min.year: return Week(date.min.year, 1) if year > date.max.year - 1: return Week( date.max.year - 1, 1) # minus 1 since internally it does some checks with a year + 1 return Week(year=year, week=week)
def year_week(y, w): try: if w not in ['', np.nan, None] and y not in ['', np.nan, None]: week = Week(int(y), int(w)) date = week.startdate() + timedelta(3) return date else: return '' except: return ''
def add_epi_dates(df): ''' Adds epi_week and epi_year to dataframe. ''' df['epi_week'] = df.date.apply(lambda x: Week.fromdate(x).week) df['epi_year'] = df.date.apply(lambda x: Week.fromdate(x).year) df = df[['epi_week', 'epi_year', 'date', 'location', 'location_name', 'cum_death', 'inc_death', 'cum_case', 'inc_case']] return df
def grabForecastWeeks(): from epiweeks import Week, Year thisWeek = Week.thisweek() weeks = [Week(2019, 40)] while weeks[-1] < thisWeek: weeks.append(weeks[-1] + 1) formattedWeeks = [ int("{:04d}{:02d}".format(x.year, x.week)) for x in weeks ] return formattedWeeks
def addLag(obs): EW = str(obs.EW) lag = int(obs.lag) yr,wk = int(EW[:4]),int(EW[4:]) surveillanceWeek = Week(yr,wk) + lag obs['surveillanceWeek'] = "{:d}{:d}".format(surveillanceWeek.year,surveillanceWeek.week) return obs
def get_coverage(dashboard_signal: DashboardSignal, metadata) -> List[DashboardSignalCoverage]: """Get the most recent coverage for the signal.""" count_by_geo_type_df = pd.read_csv( COVERAGE_URL.format(source=dashboard_signal.source, signal=dashboard_signal.covidcast_signal)) try: count_by_geo_type_df["time_value"] = count_by_geo_type_df[ "time_value"].apply( lambda x: pd.to_datetime(str(x), format="%Y%m%d")) except: count_by_geo_type_df["time_value"] = count_by_geo_type_df[ "time_value"].apply( lambda x: pd.to_datetime(Week(x // 100, x % 100).startdate())) signal_coverage_list = [] for _, row in count_by_geo_type_df.iterrows(): signal_coverage = DashboardSignalCoverage( signal_id=dashboard_signal.db_id, date=row['time_value'].date(), geo_type='county', count=row['count']) signal_coverage_list.append(signal_coverage) return signal_coverage_list
def computeLogScores(forecastsAndILI, forecastWeek, iliEW): if forecastsAndILI.shape[0] == 0: return pd.DataFrame() from datetime import datetime calendarEW = Week.thisweek() dayOfWeek = datetime.today().weekday if dayOfWeek in {5, 6}: # Saturday or Sunday calendarWeek = "{:d}{:d}".format( calendarEW.year, calendarEW.week + 1) # should be referenced from the next week else: calendarWeek = "{:d}{:d}".format(calendarEW.year, calendarEW.week) subsetToProbabilities = forecastsAndILI.loc[ (forecastsAndILI.bin_start_incl <= forecastsAndILI.wili) & (forecastsAndILI.bin_end_notincl > forecastsAndILI.wili), :] subsetToProbabilities['logScore'] = np.log( [float(x) for x in subsetToProbabilities.value]) logScores = subsetToProbabilities.loc[:, [ 'model', 'location', 'target', 'region', 'lag', 'releaseEW', 'releaseDate', 'wili', 'logScore' ]] logScores[ 'surveillanceWeek'] = forecastWeek # this is the most recent week of data available logScores[ 'calendarWeek'] = calendarWeek # this is the present week in real-time logScores['targetWeek'] = iliEW # this is the target week of forecasting return logScores
def _date_to_api_string(date: date, time_type: str = "day") -> str: # pylint: disable=W0621 """Convert a date object to a YYYYMMDD or YYYYMM string expected by the API.""" if time_type == "day": date_str = date.strftime("%Y%m%d") elif time_type == "week": date_str = Week.fromdate(date).cdcformat() return date_str
def export_csv(df, geo_name, sensor, export_dir, start_date): """Export data set in format expected for injestion by the API. Parameters ---------- df: pd.DataFrame data frame with columns "geo_id", "timestamp", and "val" geo_name: str name of the geographic region, such as "state" or "hrr" sensor: str name of the sensor; only used for naming the output file export_dir: str path to location where the output CSV files to be uploaded should be stored start_date: datetime.datetime The first date to report end_date: datetime.datetime The last date to report """ df = df.copy() df = df[df["timestamp"] >= start_date] for date in df["timestamp"].unique(): t = Week.fromdate(pd.to_datetime(str(date))) date_short = "weekly_" + str(t.year) + str(t.week).zfill(2) export_fn = f"{date_short}_{geo_name}_{sensor}.csv" result_df = df[df["timestamp"] == date][[ "geo_id", "val", "se", "sample_size" ]] result_df.to_csv(f"{export_dir}/{export_fn}", index=False, float_format="%.8f")
def plot_cummulative_sampling_fraction( df ): df["epiweek"] = df["date"].apply( lambda x: Week.fromdate(x).startdate() ) plot_df = df.groupby( "epiweek" ).agg( new_cases = ("new_cases", "sum"), new_sequences = ("new_sequences", "sum" ) ) plot_df = plot_df.loc[plot_df["new_sequences"]>0] plot_df["fraction"] = plot_df["new_sequences"] / plot_df["new_cases"] plot_df = plot_df.reset_index() fig = go.Figure() fig.add_trace( go.Scattergl( x=plot_df["epiweek"], y=plot_df["fraction"], mode='lines', name='Fraction', line={ "color" : '#767676', "width" : 4 } ) ) _add_date_formating( fig ) fig.update_layout( yaxis_tickformat='.1%' ) cleaned_array = np.log10( plot_df.loc[plot_df["fraction"] > 0, "fraction"] ) cleaned_array = cleaned_array[~np.isinf( cleaned_array )] min_lim = np.floor( cleaned_array.min() ) max_lim = np.ceil( cleaned_array.max() ) fig.update_yaxes( type="log", title="<b>Cases sequenced (%)</b>" ) fig.update_xaxes( range=get_date_limits( plot_df["epiweek"] ) ) return fig
def computeTargetILIepiWeek(row): forecastWeek = int(row.surveillanceWeek) weekAhead = int(row.Target.replace(' wk ahead', '')) iliYear, iliWeek = int(str(forecastWeek)[:3 + 1]), int( str(forecastWeek)[4:]) iliEW = Week(iliYear, iliWeek) + int(weekAhead) iliEW = int("{:04d}{:02d}".format(iliEW.year, iliEW.week)) row['targetWeek'] = iliEW return row
def parse_filtered_metadata(metadata_file, tip_to_tree, label_fields, tree_fields, table_fields, database_date_column): query_dict = {} query_id_dict = {} closest_seqs = set() tree_to_tip = defaultdict(list) with open(metadata_file, "r", encoding="utf-8") as f: reader = csv.DictReader(f) headers = reader.fieldnames with open(metadata_file, "r", encoding="utf-8") as f: in_data = csv.DictReader(f) for sequence in in_data: country = sequence["country"] query_id = sequence['query_id'] query_name = sequence['query'] closest_name = sequence["closest"] sample_date = sequence[database_date_column] #this may need to be flexible if using a different background database closest_distance = sequence["SNPdistance"] snps = sequence['SNPs'] if query_id not in query_id_dict: #it's in the fasta file and in the db, this should take the db new_taxon = taxon(query_name, country, label_fields, tree_fields, table_fields) new_taxon.query_id = query_id if query_name == closest_name: #if it's in database, get its sample date new_taxon.in_db = True new_taxon.sample_date = sample_date new_taxon.epiweek = Week.fromdate(convert_date(sample_date)) new_taxon.closest = "NA" else: new_taxon.closest = closest_name new_taxon.closest_distance = closest_distance new_taxon.snps = snps closest_seqs.add(closest_name) if query_name in tip_to_tree: relevant_tree = tip_to_tree[query_name] else: relevant_tree = "NA" new_taxon.tree = relevant_tree tree_to_tip[relevant_tree].append(new_taxon) query_dict[query_name] = new_taxon query_id_dict[query_id] = new_taxon return query_dict, query_id_dict, tree_to_tip, closest_seqs
def get_epiweeks(value): if value[0].isdecimal(): date = pd.to_datetime(value) epiweek = str(Week.fromdate(date, system="cdc")) # get epiweeks epiweek = epiweek[:4] + '_' + 'EW' + epiweek[-2:] if epiweek not in ew_cols: ew_cols.append(epiweek) return epiweek else: return value
def __init__(self, record_id, country, introduction, acctrans, metadata): #phylotype): self.id = record_id self.introduction = introduction #self.phylotype = phylotype self.country = country self.acctrans = acctrans if metadata[0] == "None": self.date = "NA" else: self.date = metadata[0] self.adm2 = metadata[2] self.global_lineage = metadata[3] epiweek_prep = metadata[1] if epiweek_prep != "0" and epiweek_prep != "": self.epiweek = Week(2020, int(float(epiweek_prep))) elif epiweek_prep == "0": self.epiweek = Week(2019, 52) elif epiweek_prep == "": self.epiweek = "NA" self.unclear = False if "/" in self.date: print("ERROR DATE FORMAT INCORRECT") # self.date_dt = dateparser.parse(self.date, settings={"DATE_ORDER":'DMY'}).date() # date_bits = self.date.split("/") # date_bits = self.date.split("-") # self.date_dt = dateparser.parse(self.date).date() date_bits = self.date.split("-") if len(date_bits) == 3: self.date_dt = dt.date(int(date_bits[0]), int(date_bits[1]), int(date_bits[2])) else: self.date_dt = "NA"
def __init__(self, record_id, lineage, metadata): self.id = record_id self.lineage = lineage self.country = metadata[0] self.date = metadata[1] epiweek_prep = metadata[2] if epiweek_prep != "": if float(epiweek_prep) != 0.0: self.epiweek = Week(2020, int(float(epiweek_prep))) else: self.epiweek = Week(2019, 52) else: self.epiweek = "NA" self.get_date_loc() self.travel_history = False
def recurse(node): value = node.get("node_attrs", {}).get(EPIKEY, {}).get("value", False) if value: # we validate using both the epiweeks package and a regex (epiweeks will perform coercion of non-valid data into valid data) if not re.match(r'^(\d{4})(\d{2})$', value): raise (ValueError( f"Epiweek value {value} was not in format YYYYMM.")) week = Week.fromstring( value, system="cdc") # raises ValueError if not valid observed_values.add(week) for child in node.get("children", []): recurse(child)
def addEpiWeek(self): if 'epiweek' in self.data: self.data.epiweek = self.data.epiweek.astype(str) return self from epiweeks import Week, Year epiweeks = [] for dt in self.data.date: yr, mnth, day = dt.year, dt.month, dt.day epiweek = Week.fromdate(yr, mnth, day) epiweeks.append("{:04d}{:02d}".format(epiweek.year, epiweek.week)) self.data['epiweek'] = epiweeks return self
def _parse_datetimes(date_int: int, time_type: str, date_format: str = "%Y%m%d") -> Union[pd.Timestamp]: # annotating nan errors """Convert a date or epiweeks string into timestamp objects. Datetimes (length 8) are converted to their corresponding date, while epiweeks (length 6) are converted to the date of the start of the week. Returns nan otherwise Epiweeks use the CDC format. :param date_int: Int representation of date. :param date_format: String of the date format to parse. :returns: Timestamp. """ date_str = str(date_int) if time_type == "day": return pd.to_datetime(date_str, format=date_format) if time_type == "week": epiwk = Week(int(date_str[:4]), int(date_str[-2:])) return pd.to_datetime(epiwk.startdate()) return np.nan
def fromEpiWeek2ModelWeek(self, week): from epiweeks import Week, Year if type(week) is Week: pass elif type(week) is str: week = Week(int(week[:4]), int(week[4:])) elif type(week) is int: week = str(week) week = Week(int(week[:4]), int(week[4:])) numWeeks = 0 w = Week(1970, 1) while True: if w.year < week.year: numWeeks += Year(w.year).totalweeks w = Week(w.year + 1, 1) else: break while w < week: numWeeks += 1 w += 1 return numWeeks
def toFrom40Week(self): """ A variable from40 is created as the number of weeks from the current year or, if epiweek's week is less than 40, the past year's epidemic week number 40 to epiweek. """ yr, wk = self.year, self.week if wk > 40: self.from40 = wk - 40 else: _ref40week = Week(yr - 1, 40) _from40 = 0 while _ref40week < self.epiWeek: _ref40week += 1 _from40 += 1 self.from40 = _from40
def get_week_just_from_date(self, date): year_date = date.year year = year_date if date >= self.dates[year_date] and date < self.dates[year_date + 1]: year = year_date else: year = year_date + 1 leap = self.get_leap(year) epi_date = date + timedelta(days=leap.days) epi_week = Week.fromdate(epi_date) return epi_week.week
def regulation_release(state, grid, config, parameters, current_time): # compute the expected monthly release based on Biemans (2011) # TODO this is still written assuming monthly, but here's the epiweek for when that is relevant epiweek = Week.fromdate(current_time).week month = current_time.month streamflow_time_name = config.get( 'water_management.reservoirs.streamflow_time_resolution') # initialize to the average flow state.reservoir_release = grid.reservoir_streamflow_schedule.mean( dim=streamflow_time_name).values # TODO what is k k = state.reservoir_storage_operation_year_start / ( parameters.reservoir_regulation_release_parameter * grid.reservoir_storage_capacity) # TODO what is factor factor = np.where( grid.reservoir_runoff_capacity > parameters.reservoir_runoff_capacity_condition, (2.0 / grid.reservoir_runoff_capacity)**2.0, 0) # release is some combination of prerelease, average flow in the time period, and total average flow state.reservoir_release = np.where( (grid.reservoir_use_electricity > 0) | (grid.reservoir_use_irrigation > 0), np.where( grid.reservoir_runoff_capacity <= 2.0, k * grid.reservoir_prerelease_schedule.sel({ streamflow_time_name: month }).values, k * factor * grid.reservoir_prerelease_schedule.sel({ streamflow_time_name: month }).values + (1 - factor) * grid.reservoir_streamflow_schedule.sel({ streamflow_time_name: month }).values), np.where( grid.reservoir_runoff_capacity <= 2.0, k * grid.reservoir_streamflow_schedule.mean( dim=streamflow_time_name).values, k * factor * grid.reservoir_streamflow_schedule.mean( dim=streamflow_time_name).values + (1 - factor) * grid.reservoir_streamflow_schedule.sel({ streamflow_time_name: month }).values))
def generate_week_periods(open_future_periods, page_limit, begin_period, direction, direction_change): weeks_to_display = {} # When the user first visits the period screen the begin_period variable is empty. # Therefore, use the current week as default. week = Week.thisweek("iso") + open_future_periods # If begin_period variable has a date, use it to calculate the weeks to display. if begin_period != '': week = Week.fromdate(datetime.datetime.strptime(begin_period, '%Y-%m-%d'), 'iso') # This logic is to fix week discrepancy when a user clicks + and changes the direction and press - or vice versa if direction_change: if direction == '+': week += page_limit - 1 if direction == '-': week -= page_limit - 1 # We should not open future dates for data entry. The -1 is to prevent from opening this week. if direction == '+' and week + page_limit > Week.thisweek("iso") + open_future_periods: week = Week.thisweek("iso") + open_future_periods - page_limit - 1 rng = range(page_limit, 0, -1) if direction == '+' else range(page_limit) for key, i in enumerate(rng): w = week + i if direction == '+' else week - (i + 1) weeks_to_display[str(key + 1)] = { "period": w.isoformat(), "display": "W{} - {} - {}".format(w.weektuple()[1], w.startdate(), w.enddate()) } # Take the first week to calculate the beginning period in the next screen. if direction == '+' and i == page_limit: begin_period = str(w.enddate()) # Take the final week to calculate the beginning week in the next screen. if direction == '-' and i == page_limit - 1: begin_period = str(w.startdate()) return begin_period, weeks_to_display
def __init__(self, record_id, country, lineage, metadata,pillar, sub_date): self.id = record_id self.lineage = lineage self.country = country self.pillar2 = pillar if metadata[0] == "None": self.date = "NA" else: date = metadata[0] self.adm2 = metadata[2] self.global_lineage = metadata[3] self.sequencing_centre = metadata[4] if metadata[1] != "": epiweek = int(metadata[1]) if epiweek > 0 and epiweek < 54: self.epiweek = Week(2020, epiweek) elif epiweek >= 53: self.epiweek = Week(2021, epiweek - 53) elif epiweek == 0: self.epiweek = Week(2019, 52) else: self.epiweek = "NA" if "/" in date: print("ERROR DATE FORMAT INCORRECT") try: self.date_dt = dt.datetime.strptime(date, "%Y-%m-%d").date() except: self.date_dt = "NA" try: self.sub_date = dt.datetime.strptime(sub_date, "%Y-%m-%d").date() except: self.sub_date = "NA"
def get_weekdates_range(self, year, week): firstdayofweek = Week(int(year), int(week)).startdate() lastdayofweek = firstdayofweek + timedelta(days=6.9) leap = self.get_leap(year) if leap != timedelta(0): firstdayofweek = firstdayofweek + timedelta(days=leap.days) lastdayofweek = lastdayofweek + timedelta(days=leap.days) else: firstdayofweek = firstdayofweek + timedelta(days=leap.days) lastdayofweek = lastdayofweek + timedelta(days=leap.days) return (firstdayofweek, lastdayofweek)
def reservoir_release(state, grid, config, parameters, current_time): # compute release from reservoirs # TODO so much logic was dependent on monthly, so still assuming monthly for now, but here's the epiweek for when that is relevant epiweek = Week.fromdate(current_time).week month = current_time.month # if it's the start of the operational year for the reservoir, set it's start of op year storage to the current storage state.reservoir_storage_operation_year_start = np.where( state.reservoir_month_start_operations == month, state.reservoir_storage, state.reservoir_storage_operation_year_start) regulation_release(state, grid, config, parameters, current_time) storage_targets(state, grid, config, parameters, current_time)
def week_split_count(df): w = df[['CloseDate','ClosePrice']] w['week']=pd.to_datetime(w.CloseDate).apply(lambda x: '{}'.format(x.week)) w['year']=pd.to_datetime(w.CloseDate).apply(lambda x: '{}'.format(x.year)) ww = w[['week','year']] ww.week=ww.week.apply(int) ww.year=ww.year.apply(int) ww=ww[ww.week!=53] ww['enddate'] = ww.apply(lambda row: pd.to_datetime(Week(row.year, row.week, 'iso').enddate()),axis=1) w['ds'] = ww['enddate'] w=w[['ds','ClosePrice']] w.rename(columns={'ClosePrice':'y'},inplace=True) w=w.groupby('ds').count() w.reset_index(inplace=True) w['ds'] = w['ds'].apply(lambda x: pd.to_datetime(x)) return w