Beispiel #1
0
    def parse_cities_request(self, response):
        cities = json.loads(response.body)

        today = date_utils.today()
        current_week = Week.fromdate(today)

        # We have to do different passes for 2019 and 2020, since the specific days of
        # the epidemiological week differs.
        #
        # The api seems to return the data from the current year as "2020", and the previous as "2019",
        # so we'll exploit that to extract the data only from the "2020" chart

        for city in cities:
            for year in [2020, 2019]:
                for weeknum in range(1, current_week.week):
                    ep_week = Week(year, weeknum)

                    # Cache more than 4 weeks ago
                    should_cache = (current_week.week - weeknum) > 4
                    yield self.make_registral_request(
                        city=city,
                        ep_week=ep_week,
                        callback=self.parse_registral_request,
                        dont_cache=not should_cache,
                    )
Beispiel #2
0
    def __init__(self, modelweek=None, epiweek=None):
        from epiweeks import Week, Year
        self.refew = Week(1970, 1)
        self.refmodelweek = 0

        if modelweek is None and epiweek is None:
            raise self.NoData("Please enter either a model week or an epiweek")
        elif modelweek is not None and epiweek is not None:
            raise self.Toomuchdata(
                "Please ejust one of a model week or an epiweek")
        elif modelweek is None:
            self.epiweek = epiweek
            if type(epiweek) is str:
                self.epiWeek = Week(int(epiweek[:4]), int(epiweek[4:]))
            elif type(epiweek) is int:
                epiweek = str(epiweek)
                self.epiWeek = Week(int(epiweek[:4]), int(epiweek[4:]))
            self.fromEpiWeek2ModelWeek()
        elif epiweek is None:
            self.modelweek = modelweek
            self.fromModelWeek2EpiWeek()
            self.epiWeek = Week(int(str(self.epiweek)[:4]),
                                int(str(self.epiweek)[4:]))

        self.year = self.epiWeek.year
        self.week = self.epiWeek.week

        self.toFrom40Week()
Beispiel #3
0
 def get_newunit(value):
     if value[0].isdecimal():
         date = pd.to_datetime(value)
         if unit == 'week':
             epiweek = str(Week.fromdate(date, system="cdc")) # get epiweeks
             year, week = epiweek[:4], epiweek[-2:]
             if weekasdate in ['start', 'end']:
                 if weekasdate == 'start':
                     epiweek = str(Week(int(year), int(week)).startdate())
                 else:
                     epiweek = str(Week(int(year), int(week)).enddate())
             else:
                 epiweek = year + '_' + 'EW' + week
             if epiweek not in time_cols:
                 time_cols.append(epiweek)
             return epiweek
         elif unit == 'month':
             year_month = date.strftime("%Y-%m")
             if year_month not in time_cols:
                 time_cols.append(year_month)
             return year_month
         elif unit == 'year':
             year = date.strftime("%Y")
             if year not in time_cols:
                 time_cols.append(year)
             return year
         elif unit == 'full':
             return 'total'
     else:
         if unit == 'full':
             return 'total'
         else:
             return value
Beispiel #4
0
def week_value_to_week(value: int) -> Week:
    year, week = value // 100, value % 100
    if year < date.min.year:
        return Week(date.min.year, 1)
    if year > date.max.year - 1:
        return Week(
            date.max.year - 1,
            1)  # minus 1 since internally it does some checks with a year + 1
    return Week(year=year, week=week)
Beispiel #5
0
 def year_week(y, w):
     try:
         if w not in ['', np.nan, None] and y not in ['', np.nan, None]:
             week = Week(int(y), int(w))
             date = week.startdate() + timedelta(3)
             return date
         else:
             return ''
     except:
         return ''
def add_epi_dates(df):
    '''
    Adds epi_week and epi_year to dataframe.
    '''
    df['epi_week'] = df.date.apply(lambda x: Week.fromdate(x).week)
    df['epi_year'] = df.date.apply(lambda x: Week.fromdate(x).year)

    df = df[['epi_week', 'epi_year', 'date', 'location', 'location_name', 
             'cum_death', 'inc_death', 'cum_case', 'inc_case']]
    return df
def grabForecastWeeks():
    from epiweeks import Week, Year
    thisWeek = Week.thisweek()

    weeks = [Week(2019, 40)]
    while weeks[-1] < thisWeek:
        weeks.append(weeks[-1] + 1)
    formattedWeeks = [
        int("{:04d}{:02d}".format(x.year, x.week)) for x in weeks
    ]
    return formattedWeeks
 def addLag(obs):
     EW = str(obs.EW)
     lag = int(obs.lag)
     yr,wk = int(EW[:4]),int(EW[4:])
     surveillanceWeek = Week(yr,wk) + lag
     obs['surveillanceWeek'] = "{:d}{:d}".format(surveillanceWeek.year,surveillanceWeek.week)
     return obs
def get_coverage(dashboard_signal: DashboardSignal,
                 metadata) -> List[DashboardSignalCoverage]:
    """Get the most recent coverage for the signal."""
    count_by_geo_type_df = pd.read_csv(
        COVERAGE_URL.format(source=dashboard_signal.source,
                            signal=dashboard_signal.covidcast_signal))
    try:
        count_by_geo_type_df["time_value"] = count_by_geo_type_df[
            "time_value"].apply(
                lambda x: pd.to_datetime(str(x), format="%Y%m%d"))
    except:
        count_by_geo_type_df["time_value"] = count_by_geo_type_df[
            "time_value"].apply(
                lambda x: pd.to_datetime(Week(x // 100, x % 100).startdate()))

    signal_coverage_list = []

    for _, row in count_by_geo_type_df.iterrows():
        signal_coverage = DashboardSignalCoverage(
            signal_id=dashboard_signal.db_id,
            date=row['time_value'].date(),
            geo_type='county',
            count=row['count'])
        signal_coverage_list.append(signal_coverage)

    return signal_coverage_list
def computeLogScores(forecastsAndILI, forecastWeek, iliEW):
    if forecastsAndILI.shape[0] == 0:
        return pd.DataFrame()

    from datetime import datetime
    calendarEW = Week.thisweek()
    dayOfWeek = datetime.today().weekday
    if dayOfWeek in {5, 6}:  # Saturday or Sunday
        calendarWeek = "{:d}{:d}".format(
            calendarEW.year,
            calendarEW.week + 1)  # should be referenced from the next week
    else:
        calendarWeek = "{:d}{:d}".format(calendarEW.year, calendarEW.week)

    subsetToProbabilities = forecastsAndILI.loc[
        (forecastsAndILI.bin_start_incl <= forecastsAndILI.wili)
        & (forecastsAndILI.bin_end_notincl > forecastsAndILI.wili), :]
    subsetToProbabilities['logScore'] = np.log(
        [float(x) for x in subsetToProbabilities.value])
    logScores = subsetToProbabilities.loc[:, [
        'model', 'location', 'target', 'region', 'lag', 'releaseEW',
        'releaseDate', 'wili', 'logScore'
    ]]
    logScores[
        'surveillanceWeek'] = forecastWeek  # this is the most recent week of data available
    logScores[
        'calendarWeek'] = calendarWeek  # this is the present week in real-time
    logScores['targetWeek'] = iliEW  # this is the target week of forecasting

    return logScores
Beispiel #11
0
def _date_to_api_string(date: date, time_type: str = "day") -> str:  # pylint: disable=W0621
    """Convert a date object to a YYYYMMDD or YYYYMM string expected by the API."""
    if time_type == "day":
        date_str = date.strftime("%Y%m%d")
    elif time_type == "week":
        date_str = Week.fromdate(date).cdcformat()
    return date_str
Beispiel #12
0
def export_csv(df, geo_name, sensor, export_dir, start_date):
    """Export data set in format expected for injestion by the API.

    Parameters
    ----------
    df: pd.DataFrame
        data frame with columns "geo_id", "timestamp", and "val"
    geo_name: str
        name of the geographic region, such as "state" or "hrr"
    sensor: str
        name of the sensor; only used for naming the output file
    export_dir: str
        path to location where the output CSV files to be uploaded should be stored
    start_date: datetime.datetime
        The first date to report
    end_date: datetime.datetime
        The last date to report
    """
    df = df.copy()
    df = df[df["timestamp"] >= start_date]

    for date in df["timestamp"].unique():
        t = Week.fromdate(pd.to_datetime(str(date)))
        date_short = "weekly_" + str(t.year) + str(t.week).zfill(2)
        export_fn = f"{date_short}_{geo_name}_{sensor}.csv"
        result_df = df[df["timestamp"] == date][[
            "geo_id", "val", "se", "sample_size"
        ]]
        result_df.to_csv(f"{export_dir}/{export_fn}",
                         index=False,
                         float_format="%.8f")
Beispiel #13
0
def plot_cummulative_sampling_fraction( df ):
    df["epiweek"] = df["date"].apply( lambda x: Week.fromdate(x).startdate() )
    plot_df = df.groupby( "epiweek" ).agg( new_cases = ("new_cases", "sum"), new_sequences = ("new_sequences", "sum" ) )
    plot_df = plot_df.loc[plot_df["new_sequences"]>0]
    plot_df["fraction"] = plot_df["new_sequences"] / plot_df["new_cases"]
    plot_df = plot_df.reset_index()

    fig = go.Figure()
    fig.add_trace( go.Scattergl( x=plot_df["epiweek"], y=plot_df["fraction"],
                                 mode='lines',
                                 name='Fraction',
                                 line={ "color" : '#767676', "width" : 4 } ) )

    _add_date_formating( fig )

    fig.update_layout( yaxis_tickformat='.1%' )

    cleaned_array = np.log10( plot_df.loc[plot_df["fraction"] > 0, "fraction"] )
    cleaned_array = cleaned_array[~np.isinf( cleaned_array )]

    min_lim = np.floor( cleaned_array.min() )
    max_lim = np.ceil( cleaned_array.max() )

    fig.update_yaxes( type="log", title="<b>Cases sequenced (%)</b>" )
    fig.update_xaxes( range=get_date_limits( plot_df["epiweek"] ) )

    return fig
def computeTargetILIepiWeek(row):
    forecastWeek = int(row.surveillanceWeek)
    weekAhead = int(row.Target.replace(' wk ahead', ''))
    iliYear, iliWeek = int(str(forecastWeek)[:3 + 1]), int(
        str(forecastWeek)[4:])
    iliEW = Week(iliYear, iliWeek) + int(weekAhead)
    iliEW = int("{:04d}{:02d}".format(iliEW.year, iliEW.week))
    row['targetWeek'] = iliEW
    return row
Beispiel #15
0
def parse_filtered_metadata(metadata_file, tip_to_tree, label_fields, tree_fields, table_fields, database_date_column):
    
    query_dict = {}
    query_id_dict = {}

    closest_seqs = set()

    tree_to_tip = defaultdict(list)

    with open(metadata_file, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        headers = reader.fieldnames   

    with open(metadata_file, "r", encoding="utf-8") as f:
        in_data = csv.DictReader(f)
        for sequence in in_data:
            
            country = sequence["country"]
            query_id = sequence['query_id']
            query_name = sequence['query']
            closest_name = sequence["closest"]
            
            sample_date = sequence[database_date_column] #this may need to be flexible if using a different background database

            closest_distance = sequence["SNPdistance"]
            snps = sequence['SNPs']

            if query_id not in query_id_dict: #it's in the fasta file and in the db, this should take the db
               
                new_taxon = taxon(query_name, country, label_fields, tree_fields, table_fields)

                new_taxon.query_id = query_id

                if query_name == closest_name: #if it's in database, get its sample date
                    new_taxon.in_db = True
                    new_taxon.sample_date = sample_date
                    new_taxon.epiweek = Week.fromdate(convert_date(sample_date))
                    new_taxon.closest = "NA"
                else:
                    new_taxon.closest = closest_name
                    new_taxon.closest_distance = closest_distance
                    new_taxon.snps = snps
                    closest_seqs.add(closest_name)
                    
                if query_name in tip_to_tree:
                    relevant_tree = tip_to_tree[query_name]
                else:
                    relevant_tree = "NA"
                new_taxon.tree = relevant_tree

                tree_to_tip[relevant_tree].append(new_taxon)
            
                query_dict[query_name] = new_taxon
                query_id_dict[query_id] = new_taxon
            
    return query_dict, query_id_dict, tree_to_tip, closest_seqs
Beispiel #16
0
 def get_epiweeks(value):
     if value[0].isdecimal():
         date = pd.to_datetime(value)
         epiweek = str(Week.fromdate(date, system="cdc"))  # get epiweeks
         epiweek = epiweek[:4] + '_' + 'EW' + epiweek[-2:]
         if epiweek not in ew_cols:
             ew_cols.append(epiweek)
         return epiweek
     else:
         return value
    def __init__(self, record_id, country, introduction, acctrans, metadata): #phylotype):
    
        self.id = record_id
        self.introduction = introduction
        #self.phylotype = phylotype
        self.country = country
        self.acctrans = acctrans

        if metadata[0] == "None":
            self.date = "NA"
        else:
            self.date = metadata[0]

        self.adm2 = metadata[2]
        self.global_lineage = metadata[3]

        epiweek_prep = metadata[1]
        if epiweek_prep != "0" and epiweek_prep != "":
            self.epiweek = Week(2020, int(float(epiweek_prep)))
        elif epiweek_prep == "0":
            self.epiweek = Week(2019, 52)
        elif epiweek_prep == "":
            self.epiweek = "NA"


        self.unclear = False

        
        if "/" in self.date:
            print("ERROR DATE FORMAT INCORRECT")
        #     self.date_dt = dateparser.parse(self.date,  settings={"DATE_ORDER":'DMY'}).date()
        #     date_bits = self.date.split("/")

        # date_bits = self.date.split("-")
        # self.date_dt = dateparser.parse(self.date).date()

        date_bits = self.date.split("-")

        if len(date_bits) == 3:
            self.date_dt = dt.date(int(date_bits[0]), int(date_bits[1]), int(date_bits[2]))
        else:
            self.date_dt = "NA"
Beispiel #18
0
    def __init__(self, record_id, lineage, metadata):
    
        self.id = record_id
        self.lineage = lineage
        
        self.country = metadata[0]
        self.date = metadata[1]

        epiweek_prep = metadata[2]
        if epiweek_prep != "":
            if float(epiweek_prep) != 0.0:
                self.epiweek = Week(2020, int(float(epiweek_prep)))
            else:
                self.epiweek = Week(2019, 52)
        else:
            self.epiweek = "NA"

        self.get_date_loc()

        self.travel_history = False
Beispiel #19
0
 def recurse(node):
     value = node.get("node_attrs", {}).get(EPIKEY, {}).get("value", False)
     if value:
         # we validate using both the epiweeks package and a regex (epiweeks will perform coercion of non-valid data into valid data)
         if not re.match(r'^(\d{4})(\d{2})$', value):
             raise (ValueError(
                 f"Epiweek value {value} was not in format YYYYMM."))
         week = Week.fromstring(
             value, system="cdc")  # raises ValueError if not valid
         observed_values.add(week)
     for child in node.get("children", []):
         recurse(child)
Beispiel #20
0
 def addEpiWeek(self):
     if 'epiweek' in self.data:
         self.data.epiweek = self.data.epiweek.astype(str)
         return self
     from epiweeks import Week, Year
     epiweeks = []
     for dt in self.data.date:
         yr, mnth, day = dt.year, dt.month, dt.day
         epiweek = Week.fromdate(yr, mnth, day)
         epiweeks.append("{:04d}{:02d}".format(epiweek.year, epiweek.week))
     self.data['epiweek'] = epiweeks
     return self
Beispiel #21
0
def _parse_datetimes(date_int: int,
                     time_type: str,
                     date_format: str = "%Y%m%d") -> Union[pd.Timestamp]:  # annotating nan errors
    """Convert a date or epiweeks string into timestamp objects.

    Datetimes (length 8) are converted to their corresponding date, while epiweeks (length 6)
    are converted to the date of the start of the week. Returns nan otherwise

    Epiweeks use the CDC format.

    :param date_int: Int representation of date.
    :param date_format: String of the date format to parse.
    :returns: Timestamp.
    """
    date_str = str(date_int)
    if time_type == "day":
        return pd.to_datetime(date_str, format=date_format)
    if time_type == "week":
        epiwk = Week(int(date_str[:4]), int(date_str[-2:]))
        return pd.to_datetime(epiwk.startdate())
    return np.nan
Beispiel #22
0
    def fromEpiWeek2ModelWeek(self, week):
        from epiweeks import Week, Year
        if type(week) is Week:
            pass
        elif type(week) is str:
            week = Week(int(week[:4]), int(week[4:]))
        elif type(week) is int:
            week = str(week)
            week = Week(int(week[:4]), int(week[4:]))

        numWeeks = 0
        w = Week(1970, 1)
        while True:
            if w.year < week.year:
                numWeeks += Year(w.year).totalweeks
                w = Week(w.year + 1, 1)
            else:
                break
        while w < week:
            numWeeks += 1
            w += 1
        return numWeeks
Beispiel #23
0
 def toFrom40Week(self):
     """ A variable from40 is created as the number of weeks from the current year or, if epiweek's week is less than 40, the past year's epidemic week number 40 to epiweek.
     """
     yr, wk = self.year, self.week
     if wk > 40:
         self.from40 = wk - 40
     else:
         _ref40week = Week(yr - 1, 40)
         _from40 = 0
         while _ref40week < self.epiWeek:
             _ref40week += 1
             _from40 += 1
         self.from40 = _from40
Beispiel #24
0
    def get_week_just_from_date(self, date):
        year_date = date.year
        year = year_date

        if date >= self.dates[year_date] and date < self.dates[year_date + 1]:
            year = year_date
        else:
            year = year_date + 1

        leap = self.get_leap(year)
        epi_date = date + timedelta(days=leap.days)
        epi_week = Week.fromdate(epi_date)
        return epi_week.week
Beispiel #25
0
def regulation_release(state, grid, config, parameters, current_time):
    # compute the expected monthly release based on Biemans (2011)

    # TODO this is still written assuming monthly, but here's the epiweek for when that is relevant
    epiweek = Week.fromdate(current_time).week
    month = current_time.month
    streamflow_time_name = config.get(
        'water_management.reservoirs.streamflow_time_resolution')

    # initialize to the average flow
    state.reservoir_release = grid.reservoir_streamflow_schedule.mean(
        dim=streamflow_time_name).values

    # TODO what is k
    k = state.reservoir_storage_operation_year_start / (
        parameters.reservoir_regulation_release_parameter *
        grid.reservoir_storage_capacity)

    # TODO what is factor
    factor = np.where(
        grid.reservoir_runoff_capacity >
        parameters.reservoir_runoff_capacity_condition,
        (2.0 / grid.reservoir_runoff_capacity)**2.0, 0)

    # release is some combination of prerelease, average flow in the time period, and total average flow
    state.reservoir_release = np.where(
        (grid.reservoir_use_electricity > 0) |
        (grid.reservoir_use_irrigation > 0),
        np.where(
            grid.reservoir_runoff_capacity <= 2.0,
            k *
            grid.reservoir_prerelease_schedule.sel({
                streamflow_time_name: month
            }).values,
            k * factor *
            grid.reservoir_prerelease_schedule.sel({
                streamflow_time_name: month
            }).values + (1 - factor) * grid.reservoir_streamflow_schedule.sel({
                streamflow_time_name:
                month
            }).values),
        np.where(
            grid.reservoir_runoff_capacity <= 2.0,
            k * grid.reservoir_streamflow_schedule.mean(
                dim=streamflow_time_name).values,
            k * factor * grid.reservoir_streamflow_schedule.mean(
                dim=streamflow_time_name).values +
            (1 - factor) * grid.reservoir_streamflow_schedule.sel({
                streamflow_time_name:
                month
            }).values))
Beispiel #26
0
def generate_week_periods(open_future_periods, page_limit, begin_period, direction, direction_change):
    weeks_to_display = {}

    # When the user first visits the period screen the begin_period variable is empty.
    # Therefore, use the current week as default.
    week = Week.thisweek("iso") + open_future_periods

    # If begin_period variable has a date, use it to calculate the weeks to display.
    if begin_period != '':
        week = Week.fromdate(datetime.datetime.strptime(begin_period, '%Y-%m-%d'), 'iso')
        # This logic is to fix week discrepancy when a user clicks + and changes the direction and press - or vice versa
        if direction_change:
            if direction == '+':
                week += page_limit - 1
            if direction == '-':
                week -= page_limit - 1

    # We should not open future dates for data entry. The -1 is to prevent from opening this week.
    if direction == '+' and week + page_limit > Week.thisweek("iso") + open_future_periods:
        week = Week.thisweek("iso") + open_future_periods - page_limit - 1

    rng = range(page_limit, 0, -1) if direction == '+' else range(page_limit)

    for key, i in enumerate(rng):
        w = week + i if direction == '+' else week - (i + 1)
        weeks_to_display[str(key + 1)] = {
            "period": w.isoformat(),
            "display": "W{} - {} - {}".format(w.weektuple()[1], w.startdate(), w.enddate())
        }

        # Take the first week to calculate the beginning period in the next screen.
        if direction == '+' and i == page_limit:
            begin_period = str(w.enddate())
        # Take the final week to calculate the beginning week in the next screen.
        if direction == '-' and i == page_limit - 1:
            begin_period = str(w.startdate())

    return begin_period, weeks_to_display
    def __init__(self, record_id, country, lineage, metadata,pillar, sub_date):
    
        self.id = record_id
        self.lineage = lineage
        self.country = country
        self.pillar2 = pillar

        if metadata[0] == "None":
            self.date = "NA"
        else:
            date = metadata[0]

        self.adm2 = metadata[2]
        self.global_lineage = metadata[3]
        self.sequencing_centre = metadata[4]

        if metadata[1] != "":
            epiweek = int(metadata[1])
            if epiweek > 0 and epiweek < 54:
                self.epiweek = Week(2020, epiweek)
            elif epiweek >= 53:
                self.epiweek = Week(2021, epiweek - 53)
            elif epiweek == 0:
                self.epiweek = Week(2019, 52)
        else:
            self.epiweek = "NA"
        
        if "/" in date:
            print("ERROR DATE FORMAT INCORRECT")
        
        try:
            self.date_dt = dt.datetime.strptime(date, "%Y-%m-%d").date()
        except:
            self.date_dt = "NA"   
        try:
            self.sub_date = dt.datetime.strptime(sub_date, "%Y-%m-%d").date()
        except:
            self.sub_date = "NA"
Beispiel #28
0
    def get_weekdates_range(self, year, week):
        firstdayofweek = Week(int(year), int(week)).startdate()
        lastdayofweek = firstdayofweek + timedelta(days=6.9)

        leap = self.get_leap(year)

        if leap != timedelta(0):
            firstdayofweek = firstdayofweek + timedelta(days=leap.days)
            lastdayofweek = lastdayofweek + timedelta(days=leap.days)
        else:
            firstdayofweek = firstdayofweek + timedelta(days=leap.days)
            lastdayofweek = lastdayofweek + timedelta(days=leap.days)

        return (firstdayofweek, lastdayofweek)
Beispiel #29
0
def reservoir_release(state, grid, config, parameters, current_time):
    # compute release from reservoirs

    # TODO so much logic was dependent on monthly, so still assuming monthly for now, but here's the epiweek for when that is relevant
    epiweek = Week.fromdate(current_time).week
    month = current_time.month

    # if it's the start of the operational year for the reservoir, set it's start of op year storage to the current storage
    state.reservoir_storage_operation_year_start = np.where(
        state.reservoir_month_start_operations == month,
        state.reservoir_storage, state.reservoir_storage_operation_year_start)

    regulation_release(state, grid, config, parameters, current_time)

    storage_targets(state, grid, config, parameters, current_time)
def week_split_count(df):
    w = df[['CloseDate','ClosePrice']]
    w['week']=pd.to_datetime(w.CloseDate).apply(lambda x: '{}'.format(x.week))
    w['year']=pd.to_datetime(w.CloseDate).apply(lambda x: '{}'.format(x.year))
    ww = w[['week','year']]
    ww.week=ww.week.apply(int)
    ww.year=ww.year.apply(int)
    ww=ww[ww.week!=53]
    ww['enddate'] = ww.apply(lambda row: pd.to_datetime(Week(row.year, row.week, 'iso').enddate()),axis=1)
    w['ds'] = ww['enddate']
    w=w[['ds','ClosePrice']]
    w.rename(columns={'ClosePrice':'y'},inplace=True)
    w=w.groupby('ds').count()
    w.reset_index(inplace=True)
    w['ds'] = w['ds'].apply(lambda x: pd.to_datetime(x))
    return w