def _deserialize_cache(self, cache_result: dict): # Initialize copy dict and values we need result = {} first_date = datetime.strptime( Functions.get_formatted_date(cache_result.get("first_date")), "%Y-%m-%d" ) # Pop the date key out of the object for location in cache_result["locations"]: confirmed_map, deaths_map = {}, {} date = first_date # Every location can have a unique first date for confirmed, deaths in zip( cache_result["locations"][location]["confirmed"], cache_result["locations"][location]["deaths"], ): formatted_date = Functions.to_format_date(date) confirmed_map[formatted_date] = int(confirmed or 0) deaths_map[formatted_date] = int(deaths or 0) date += timedelta(days=1) # Clone results to new dict result[location] = {**cache_result["locations"][location]} result[location]["confirmed"] = confirmed_map result[location]["deaths"] = deaths_map return result
def _serialize_entry( self, location_id: str, grouped_locations: object, confirmed_map: dict, deaths_map: dict, ) -> dict: """Serializes entry into a dictionary that can be serialized as a JSON object. Used for caching. Arguments: location_id {str} -- location id of the entry. grouped_locations {object} -- unserialized entry for this location. confirmed_map {dict} -- map containing timestamps for confirmed cases. deaths_map {dict} -- map containing timestamps for deaths. Returns: dict -- serialized version of this location entry. """ return { **grouped_locations[location_id], "first_date": Functions.get_formatted_date(next(iter(confirmed_map)), "%Y-%m-%d"), "confirmed": list(confirmed_map.values()), "deaths": list(deaths_map.values()), }
async def get_state_data(self): promises = await asyncio.gather( self.DATA_SERVICE.get_data(self.ENDPOINT), self.LOCATION_SERVICE.get_state_data(), ) results_by_county, last_updated = promises[0] state_data = promises[1] # Aggregate results on a per state basis state_results = {} for result in results_by_county: key = tuple(result.id.split("@")[:2]) if key not in state_results: properties_for_state = (state_data[key] if key in state_data else LocationProperties()) state_results[key] = JhuLocation( id=Functions.to_location_id(key), uid=properties_for_state.uid, iso2=properties_for_state.iso2, iso3=properties_for_state.iso3, code3=properties_for_state.code3, fips=properties_for_state.fips, county=properties_for_state.admin2, state=result.state, country=result.country, latitude=properties_for_state.coordinates.latitude, longitude=properties_for_state.coordinates.longitude, last_updated=last_updated, timelines={ "confirmed": {}, "deaths": {} }, latest=None, ) jhu_location = state_results[key] for confirmed_date, count in result.timelines[ "confirmed"].category.items(): value = jhu_location.timelines["confirmed"].get( confirmed_date, 0) jhu_location.timelines["confirmed"][ confirmed_date] = value + count for deaths_date, count in result.timelines[ "deaths"].category.items(): value = jhu_location.timelines["deaths"].get(deaths_date, 0) jhu_location.timelines["deaths"][deaths_date] = value + count # Remap dicts to Category for _, state in state_results.items(): state.timelines["confirmed"] = Category( state.timelines["confirmed"]) state.timelines["deaths"] = Category(state.timelines["deaths"]) state.latest = Statistics( state.timelines["confirmed"].latest, state.timelines["deaths"].latest).to_dict() return state_results.values(), last_updated
def test__get_formatted_date__given_empty_initial_date__success(): result = None with mock.patch("backend.utils.functions.datetime") as mock_datetime: mock_datetime.utcnow.return_value = TEST_DATETIME result = Functions.get_formatted_date("") assert result == "2020-05-21"
async def get_data(self, endpoint: str, data_type: str = ""): """Method that retrieves data from the New York Times. It: - Gets all the data aggregated by location_id (state, county, fips) from cache or from source. - Builds result as a list so it can be consumed, will store this data if it is new. Arguments: endpoint {str} -- string that represents endpoint to get data from. Keyword Arguments: data_type {str} -- [description] (default: {''}) Returns: Location[], str -- returns list of location stats and the last updated date. """ from backend.utils.containers import Container # Aggregate data by location. Will check for data in cache before checking from source. grouped_locations, from_cache = await self._group_locations_cached( endpoint, data_type) # Build our result from dictionary of aggregated data last_updated = Functions.get_formatted_date() locations = await self._build_results_cached(grouped_locations, data_type, last_updated, from_cache) logger.info("Finished transforming NYT results.") return locations, last_updated
async def get_country_data(self) -> (List[JhuLocation], str): """Notes: Function currently designed only for US data """ promises = await asyncio.gather( self.DATA_SERVICE.get_data(self.ENDPOINT), ) results_by_county, last_updated = promises[0] location_properties = JhuLocation( id=Functions.to_location_id(("US", )), uid="840", iso2="US", iso3="USA", code3="USA", fips="", county="", state="", country="US", latitude="37.0902", # TODO: Do not hardcode longitude="-95.7129", last_updated=last_updated, timelines={ "confirmed": {}, "deaths": {} }, latest=None, ) for result in results_by_county: for confirmed_date, count in result.timelines[ "confirmed"].category.items(): value = location_properties.timelines["confirmed"].get( confirmed_date, 0) location_properties.timelines["confirmed"][confirmed_date] = ( value + count) for deaths_date, count in result.timelines[ "deaths"].category.items(): value = location_properties.timelines["deaths"].get( deaths_date, 0) location_properties.timelines["deaths"][ deaths_date] = value + count location_properties.timelines["confirmed"] = Category( location_properties.timelines["confirmed"]) location_properties.timelines["deaths"] = Category( location_properties.timelines["deaths"]) location_properties.latest = Statistics( location_properties.timelines["confirmed"].latest, location_properties.timelines["deaths"].latest, ).to_dict() return [location_properties], last_updated
def _deserialize_data(self, cached_result: dict) -> object: """Deserializes the data stored in cache. Arguments: cached_result {dict} -- serialized version of data. Returns: object -- deserialized data. """ result = {} keys = list(cached_result["locations"].keys()) for location in keys: confirmed_map, deaths_map = {}, {} date = datetime.strptime( Functions.get_formatted_date( cached_result["locations"][location]["first_date"]), "%Y-%m-%d", ) # Pop the date key out of the object for confirmed, deaths in zip( cached_result["locations"][location]["confirmed"], cached_result["locations"][location]["deaths"], ): formatted_date = str( date )[:10] # Quick way to convert to bypass calling strftime confirmed_map[formatted_date] = int(confirmed or 0) deaths_map[formatted_date] = int(deaths or 0) date += timedelta(days=1) # Clone results to new dict result[location] = {**cached_result["locations"][location]} result[location]["confirmed"] = confirmed_map result[location]["deaths"] = deaths_map return result
def test__to_location_tuple__given_valid_params__success( location_id, expected): assert Functions.to_location_tuple(location_id) == expected
async def get_data( self, endpoint: str, data_type: str = "" ) -> (List[JhuLocation], str): """Method that retrieves data from JHU CSSEGSI. Arguments: endpoint {str} -- string that represents endpoint to get data from. data_type {str} -- string that represents type of data being fetched. Used as key for cache. Returns: Location[], str -- returns list of location stats and the last updated date. """ _start = time.time() * 1000.0 promises = await asyncio.gather( self._fetch_csv_data(endpoint, "confirmed"), self._fetch_csv_data(endpoint, "deaths"), ) _end = time.time() * 1000.0 logger.info(f"Elapsed _fetch_csv_data for all stats {str(_end-_start)}ms") _start = time.time() * 1000.0 tagged_promises = self._tag_promised_results( ["confirmed", "deaths"], promises ) # [("confirmed", ...), ...] location_result = await self._zip_results( tagged_promises ) # Store the final map of datapoints { "Locations": {}, "first_date": {} } _end = time.time() * 1000.0 logger.info(f"Elapsed _zip_results for all stats {str(_end-_start)}ms") locations = [] last_updated = Functions.get_formatted_date() for location_id, events, in location_result.items(): confirmed_map = events["confirmed"] deaths_map = events["deaths"] confirmed = Category(confirmed_map) deaths = Category(deaths_map) locations.append( JhuLocation( id=location_id, uid=events["uid"], iso2=events["iso2"], iso3=events["iso3"], code3=events["code3"], fips=events["FIPS"], county=events["Admin2"], state=events["Province_State"], country=events["Country_Region"], latitude=events["Lat"], longitude=events["Long_"], last_updated=last_updated, timelines={"confirmed": confirmed, "deaths": deaths}, latest=Statistics( confirmed=confirmed.latest, deaths=deaths.latest ).to_dict(), ) ) logger.info("Finished transforming JHU results.") return locations, last_updated
def test__try_getattr__success(obj, attr, expected): assert Functions.try_getattr(obj, attr) == expected
def _populate_location_result( self, stat: str, locations: List[dict], location_result: dict, to_serialize: dict, ): """Populates map with information for given location with timeline data. Arguments: stat {str} -- Statistic we are populating, eg. "Confirmed". locations {List[dict]} -- List of maps representing location info. Here, data that does not exist is None and needs to be transformed to "". location_result {dict} -- Map of finalized location data to put data in. """ for location in locations: location_tuple = ( self._get_field_from_map(location, "Country_Region"), self._get_field_from_map(location, "Province_State"), self._get_field_from_map(location, "Admin2"), ) serialized_id = Functions.to_location_id(location_tuple) dates = self._filter_date_columns(location.items()) if serialized_id not in location_result: location_result[serialized_id] = { "uid": self._get_field_from_map(location, "UID"), "iso2": self._get_field_from_map(location, "iso2"), "iso3": self._get_field_from_map(location, "iso3"), "code3": self._get_field_from_map(location, "code3"), "FIPS": self._get_field_from_map(location, "FIPS"), "Admin2": self._get_field_from_map(location, "Admin2"), "Province_State": self._get_field_from_map( location, "Province_State" ), "Country_Region": self._get_field_from_map( location, "Country_Region" ), "Lat": self._get_field_from_map(location, "Lat"), "Long_": self._get_field_from_map(location, "Long_"), "confirmed": {}, "deaths": {}, } to_serialize["locations"][serialized_id] = { **location_result[serialized_id], } for date, amount in dates.items(): location_result[serialized_id][stat][ Functions.get_formatted_date(date, "%m/%d/%y") ] = int(amount or 0) to_serialize["locations"][serialized_id][stat] = list( location_result[serialized_id][stat].values() ) # Track the first date with new object entry if "first_date" not in to_serialize: to_serialize["first_date"] = Functions.get_formatted_date( next(iter(dates)), "%m/%d/%y" )
async def get_all( request: Request, source: Source = Source.NYT, fips: str = None, state: str = None, timelines: bool = False, properties: bool = False, ): params_dict = dict(request.query_params) # Remove unfiltered parameters params_dict.pop("source", None) params_dict.pop("timelines", None) params_dict.pop("properties", None) # Fetch data data_source_service = request.state.data_source location_data, _ = await data_source_service.get_state_data() # TODO: Refactor filtering for key, value in params_dict.items(): key = key.lower() value = value.lower().strip( "__") # Remove access to private/internal fields if key == "state" and value.upper() in STATE_ABBR__STATE_NAME: value = get_state_name(value) location_data = list( filter( lambda location: str(Functions.try_getattr(location, key)) == "__IGNORE__" or str(Functions.try_getattr(location, key) ).lower() == str(value), location_data, )) latest = Statistics( confirmed=sum( map(lambda location: location.timelines["confirmed"].latest, location_data)), deaths=sum( map(lambda location: location.timelines["deaths"].latest, location_data)), ) state_data_map = None if properties: location_data_service = request.state.location_data_service state_data_map = await location_data_service.get_state_data() locations_response = [] for location in location_data: key = ( location.country, location.state, ) if properties and key in state_data_map: location.set_properties(state_data_map[key].to_dict()) result = location.to_dict(timelines, properties) locations_response.append(result) return { "latest": latest.to_dict(), "locations": locations_response, }
def test__to_location_tuple__given_invalid_params__error(location_id): with pytest.raises(ValueError): Functions.to_location_tuple(location_id)
def test__to_format_date__success(datetime, expected): assert Functions.to_format_date(datetime) == expected
def test__to_location_id__given_invalid_parameters__error(tuple_id): with pytest.raises(ValueError): assert Functions.to_location_id(tuple_id)
def test__to_location_id__given_valid_parameters__success( tuple_id, expected_value): assert Functions.to_location_id(tuple_id) == expected_value
def test__get_formatted_date__given_valid_parameters__error( initial_date, format): with pytest.raises(ValueError): assert Functions.get_formatted_date(initial_date, format)
def test__get_formatted_date__given_valid_parameters__success( initial_date, format, expected_value): assert Functions.get_formatted_date(initial_date, format) == expected_value