예제 #1
0
    def _deserialize_cache(self, cache_result: dict):
        # Initialize copy dict and values we need
        result = {}
        first_date = datetime.strptime(
            Functions.get_formatted_date(cache_result.get("first_date")), "%Y-%m-%d"
        )  # Pop the date key out of the object

        for location in cache_result["locations"]:
            confirmed_map, deaths_map = {}, {}
            date = first_date  # Every location can have a unique first date

            for confirmed, deaths in zip(
                cache_result["locations"][location]["confirmed"],
                cache_result["locations"][location]["deaths"],
            ):
                formatted_date = Functions.to_format_date(date)
                confirmed_map[formatted_date] = int(confirmed or 0)
                deaths_map[formatted_date] = int(deaths or 0)

                date += timedelta(days=1)

            # Clone results to new dict
            result[location] = {**cache_result["locations"][location]}
            result[location]["confirmed"] = confirmed_map
            result[location]["deaths"] = deaths_map

        return result
예제 #2
0
    def _serialize_entry(
        self,
        location_id: str,
        grouped_locations: object,
        confirmed_map: dict,
        deaths_map: dict,
    ) -> dict:
        """Serializes entry into a dictionary that can be serialized as a JSON object. Used for caching.

        Arguments:
            location_id {str} -- location id of the entry.
            grouped_locations {object} -- unserialized entry for this location.
            confirmed_map {dict} -- map containing timestamps for confirmed cases.
            deaths_map {dict} -- map containing timestamps for deaths.

        Returns:
            dict -- serialized version of this location entry.
        """
        return {
            **grouped_locations[location_id],
            "first_date":
            Functions.get_formatted_date(next(iter(confirmed_map)),
                                         "%Y-%m-%d"),
            "confirmed":
            list(confirmed_map.values()),
            "deaths":
            list(deaths_map.values()),
        }
예제 #3
0
    async def get_state_data(self):
        promises = await asyncio.gather(
            self.DATA_SERVICE.get_data(self.ENDPOINT),
            self.LOCATION_SERVICE.get_state_data(),
        )

        results_by_county, last_updated = promises[0]
        state_data = promises[1]

        # Aggregate results on a per state basis
        state_results = {}
        for result in results_by_county:
            key = tuple(result.id.split("@")[:2])

            if key not in state_results:
                properties_for_state = (state_data[key] if key in state_data
                                        else LocationProperties())
                state_results[key] = JhuLocation(
                    id=Functions.to_location_id(key),
                    uid=properties_for_state.uid,
                    iso2=properties_for_state.iso2,
                    iso3=properties_for_state.iso3,
                    code3=properties_for_state.code3,
                    fips=properties_for_state.fips,
                    county=properties_for_state.admin2,
                    state=result.state,
                    country=result.country,
                    latitude=properties_for_state.coordinates.latitude,
                    longitude=properties_for_state.coordinates.longitude,
                    last_updated=last_updated,
                    timelines={
                        "confirmed": {},
                        "deaths": {}
                    },
                    latest=None,
                )

            jhu_location = state_results[key]
            for confirmed_date, count in result.timelines[
                    "confirmed"].category.items():
                value = jhu_location.timelines["confirmed"].get(
                    confirmed_date, 0)
                jhu_location.timelines["confirmed"][
                    confirmed_date] = value + count

            for deaths_date, count in result.timelines[
                    "deaths"].category.items():
                value = jhu_location.timelines["deaths"].get(deaths_date, 0)
                jhu_location.timelines["deaths"][deaths_date] = value + count

        # Remap dicts to Category
        for _, state in state_results.items():
            state.timelines["confirmed"] = Category(
                state.timelines["confirmed"])
            state.timelines["deaths"] = Category(state.timelines["deaths"])
            state.latest = Statistics(
                state.timelines["confirmed"].latest,
                state.timelines["deaths"].latest).to_dict()

        return state_results.values(), last_updated
예제 #4
0
def test__get_formatted_date__given_empty_initial_date__success():
    result = None
    with mock.patch("backend.utils.functions.datetime") as mock_datetime:
        mock_datetime.utcnow.return_value = TEST_DATETIME
        result = Functions.get_formatted_date("")

    assert result == "2020-05-21"
예제 #5
0
    async def get_data(self, endpoint: str, data_type: str = ""):
        """Method that retrieves data from the New York Times. It:
            - Gets all the data aggregated by location_id (state, county, fips) from cache or from source.
            - Builds result as a list so it can be consumed, will store this data if it is new.
        
        Arguments:
            endpoint {str} -- string that represents endpoint to get data from.

        Keyword Arguments:
            data_type {str} -- [description] (default: {''})

        Returns:
            Location[], str -- returns list of location stats and the last updated date.
        """
        from backend.utils.containers import Container

        # Aggregate data by location. Will check for data in cache before checking from source.
        grouped_locations, from_cache = await self._group_locations_cached(
            endpoint, data_type)

        # Build our result from dictionary of aggregated data
        last_updated = Functions.get_formatted_date()
        locations = await self._build_results_cached(grouped_locations,
                                                     data_type, last_updated,
                                                     from_cache)

        logger.info("Finished transforming NYT results.")
        return locations, last_updated
예제 #6
0
    async def get_country_data(self) -> (List[JhuLocation], str):
        """Notes: Function currently designed only for US data
        """
        promises = await asyncio.gather(
            self.DATA_SERVICE.get_data(self.ENDPOINT), )

        results_by_county, last_updated = promises[0]

        location_properties = JhuLocation(
            id=Functions.to_location_id(("US", )),
            uid="840",
            iso2="US",
            iso3="USA",
            code3="USA",
            fips="",
            county="",
            state="",
            country="US",
            latitude="37.0902",  # TODO: Do not hardcode
            longitude="-95.7129",
            last_updated=last_updated,
            timelines={
                "confirmed": {},
                "deaths": {}
            },
            latest=None,
        )

        for result in results_by_county:
            for confirmed_date, count in result.timelines[
                    "confirmed"].category.items():
                value = location_properties.timelines["confirmed"].get(
                    confirmed_date, 0)
                location_properties.timelines["confirmed"][confirmed_date] = (
                    value + count)

            for deaths_date, count in result.timelines[
                    "deaths"].category.items():
                value = location_properties.timelines["deaths"].get(
                    deaths_date, 0)
                location_properties.timelines["deaths"][
                    deaths_date] = value + count

        location_properties.timelines["confirmed"] = Category(
            location_properties.timelines["confirmed"])
        location_properties.timelines["deaths"] = Category(
            location_properties.timelines["deaths"])
        location_properties.latest = Statistics(
            location_properties.timelines["confirmed"].latest,
            location_properties.timelines["deaths"].latest,
        ).to_dict()

        return [location_properties], last_updated
예제 #7
0
    def _deserialize_data(self, cached_result: dict) -> object:
        """Deserializes the data stored in cache.

        Arguments:
            cached_result {dict} -- serialized version of data.

        Returns:
            object -- deserialized data.
        """
        result = {}
        keys = list(cached_result["locations"].keys())

        for location in keys:
            confirmed_map, deaths_map = {}, {}
            date = datetime.strptime(
                Functions.get_formatted_date(
                    cached_result["locations"][location]["first_date"]),
                "%Y-%m-%d",
            )  # Pop the date key out of the object

            for confirmed, deaths in zip(
                    cached_result["locations"][location]["confirmed"],
                    cached_result["locations"][location]["deaths"],
            ):
                formatted_date = str(
                    date
                )[:10]  # Quick way to convert to bypass calling strftime
                confirmed_map[formatted_date] = int(confirmed or 0)
                deaths_map[formatted_date] = int(deaths or 0)

                date += timedelta(days=1)

            # Clone results to new dict
            result[location] = {**cached_result["locations"][location]}
            result[location]["confirmed"] = confirmed_map
            result[location]["deaths"] = deaths_map

        return result
예제 #8
0
def test__to_location_tuple__given_valid_params__success(
        location_id, expected):
    assert Functions.to_location_tuple(location_id) == expected
예제 #9
0
    async def get_data(
        self, endpoint: str, data_type: str = ""
    ) -> (List[JhuLocation], str):
        """Method that retrieves data from JHU CSSEGSI.
        
        Arguments:
            endpoint {str} -- string that represents endpoint to get data from.
            data_type {str} -- string that represents type of data being fetched. Used as key for cache.

        Returns:
            Location[], str -- returns list of location stats and the last updated date.
        """
        _start = time.time() * 1000.0
        promises = await asyncio.gather(
            self._fetch_csv_data(endpoint, "confirmed"),
            self._fetch_csv_data(endpoint, "deaths"),
        )
        _end = time.time() * 1000.0
        logger.info(f"Elapsed _fetch_csv_data for all stats {str(_end-_start)}ms")

        _start = time.time() * 1000.0
        tagged_promises = self._tag_promised_results(
            ["confirmed", "deaths"], promises
        )  # [("confirmed", ...), ...]
        location_result = await self._zip_results(
            tagged_promises
        )  # Store the final map of datapoints { "Locations": {}, "first_date": {} }
        _end = time.time() * 1000.0
        logger.info(f"Elapsed _zip_results for all stats {str(_end-_start)}ms")

        locations = []
        last_updated = Functions.get_formatted_date()

        for location_id, events, in location_result.items():
            confirmed_map = events["confirmed"]
            deaths_map = events["deaths"]

            confirmed = Category(confirmed_map)
            deaths = Category(deaths_map)

            locations.append(
                JhuLocation(
                    id=location_id,
                    uid=events["uid"],
                    iso2=events["iso2"],
                    iso3=events["iso3"],
                    code3=events["code3"],
                    fips=events["FIPS"],
                    county=events["Admin2"],
                    state=events["Province_State"],
                    country=events["Country_Region"],
                    latitude=events["Lat"],
                    longitude=events["Long_"],
                    last_updated=last_updated,
                    timelines={"confirmed": confirmed, "deaths": deaths},
                    latest=Statistics(
                        confirmed=confirmed.latest, deaths=deaths.latest
                    ).to_dict(),
                )
            )

        logger.info("Finished transforming JHU results.")
        return locations, last_updated
예제 #10
0
def test__try_getattr__success(obj, attr, expected):
    assert Functions.try_getattr(obj, attr) == expected
예제 #11
0
    def _populate_location_result(
        self,
        stat: str,
        locations: List[dict],
        location_result: dict,
        to_serialize: dict,
    ):
        """Populates map with information for given location with timeline data.

        Arguments:
            stat {str} -- Statistic we are populating, eg. "Confirmed".
            locations {List[dict]} -- List of maps representing location info. Here, data that does not exist is None and needs to be transformed to "".
            location_result {dict} -- Map of finalized location data to put data in.
        """
        for location in locations:
            location_tuple = (
                self._get_field_from_map(location, "Country_Region"),
                self._get_field_from_map(location, "Province_State"),
                self._get_field_from_map(location, "Admin2"),
            )
            serialized_id = Functions.to_location_id(location_tuple)
            dates = self._filter_date_columns(location.items())

            if serialized_id not in location_result:
                location_result[serialized_id] = {
                    "uid": self._get_field_from_map(location, "UID"),
                    "iso2": self._get_field_from_map(location, "iso2"),
                    "iso3": self._get_field_from_map(location, "iso3"),
                    "code3": self._get_field_from_map(location, "code3"),
                    "FIPS": self._get_field_from_map(location, "FIPS"),
                    "Admin2": self._get_field_from_map(location, "Admin2"),
                    "Province_State": self._get_field_from_map(
                        location, "Province_State"
                    ),
                    "Country_Region": self._get_field_from_map(
                        location, "Country_Region"
                    ),
                    "Lat": self._get_field_from_map(location, "Lat"),
                    "Long_": self._get_field_from_map(location, "Long_"),
                    "confirmed": {},
                    "deaths": {},
                }

                to_serialize["locations"][serialized_id] = {
                    **location_result[serialized_id],
                }

            for date, amount in dates.items():
                location_result[serialized_id][stat][
                    Functions.get_formatted_date(date, "%m/%d/%y")
                ] = int(amount or 0)

            to_serialize["locations"][serialized_id][stat] = list(
                location_result[serialized_id][stat].values()
            )

            # Track the first date with new object entry
            if "first_date" not in to_serialize:
                to_serialize["first_date"] = Functions.get_formatted_date(
                    next(iter(dates)), "%m/%d/%y"
                )
예제 #12
0
async def get_all(
    request: Request,
    source: Source = Source.NYT,
    fips: str = None,
    state: str = None,
    timelines: bool = False,
    properties: bool = False,
):

    params_dict = dict(request.query_params)

    # Remove unfiltered parameters
    params_dict.pop("source", None)
    params_dict.pop("timelines", None)
    params_dict.pop("properties", None)

    # Fetch data
    data_source_service = request.state.data_source
    location_data, _ = await data_source_service.get_state_data()

    # TODO: Refactor filtering
    for key, value in params_dict.items():
        key = key.lower()
        value = value.lower().strip(
            "__")  # Remove access to private/internal fields

        if key == "state" and value.upper() in STATE_ABBR__STATE_NAME:
            value = get_state_name(value)

        location_data = list(
            filter(
                lambda location: str(Functions.try_getattr(location, key)) ==
                "__IGNORE__" or str(Functions.try_getattr(location, key)
                                    ).lower() == str(value),
                location_data,
            ))

    latest = Statistics(
        confirmed=sum(
            map(lambda location: location.timelines["confirmed"].latest,
                location_data)),
        deaths=sum(
            map(lambda location: location.timelines["deaths"].latest,
                location_data)),
    )

    state_data_map = None
    if properties:
        location_data_service = request.state.location_data_service
        state_data_map = await location_data_service.get_state_data()

    locations_response = []
    for location in location_data:
        key = (
            location.country,
            location.state,
        )
        if properties and key in state_data_map:
            location.set_properties(state_data_map[key].to_dict())
        result = location.to_dict(timelines, properties)

        locations_response.append(result)

    return {
        "latest": latest.to_dict(),
        "locations": locations_response,
    }
예제 #13
0
def test__to_location_tuple__given_invalid_params__error(location_id):
    with pytest.raises(ValueError):
        Functions.to_location_tuple(location_id)
예제 #14
0
def test__to_format_date__success(datetime, expected):
    assert Functions.to_format_date(datetime) == expected
예제 #15
0
def test__to_location_id__given_invalid_parameters__error(tuple_id):
    with pytest.raises(ValueError):
        assert Functions.to_location_id(tuple_id)
예제 #16
0
def test__to_location_id__given_valid_parameters__success(
        tuple_id, expected_value):
    assert Functions.to_location_id(tuple_id) == expected_value
예제 #17
0
def test__get_formatted_date__given_valid_parameters__error(
        initial_date, format):
    with pytest.raises(ValueError):
        assert Functions.get_formatted_date(initial_date, format)
예제 #18
0
def test__get_formatted_date__given_valid_parameters__success(
        initial_date, format, expected_value):
    assert Functions.get_formatted_date(initial_date, format) == expected_value