def merge_state_data(state): gs_data = google_data.get_base_data()[ state] # Get data from old Google Spreadsheets original_reports = gs_data["reports"] original_cases = gs_data["cases"] db_data = StateSpreadsheet.objects.get_state_data( state) # Get data from database new_reports = db_data["reports"] new_cases = db_data["cases"] # Update original reports (GS) with new ones (DB), removing GS reports for # dates which show up in DB. new_reports_dates = set(row["date"] for row in new_reports) original_reports_filtered = [ row for row in original_reports if row["date"] not in new_reports_dates ] final_reports = original_reports_filtered + new_reports # Update original cases (GS) with new ones (DB), overwritting GS cases for # dates which show up in DB. final_cases = [] original_data_errors = SpreadsheetValidationErrors() for row in original_cases: row = row.copy() city = row["municipio"] if city not in [TOTAL_LINE_DISPLAY, UNDEFINED_DISPLAY]: city_info = get_city_info(city, state) if city_info: city = city_info.city else: msg = f'Nome inválido de cidade "{city}" na planilha do Google.' original_data_errors.new_error(msg) for date, values_for_date in new_cases.items(): date_str = f"{date.day:02d}_{date.month:02d}" city_on_date = values_for_date.get(city, {}) row[f"confirmados_{date_str}"] = city_on_date.get( "confirmed", None) row[f"mortes_{date_str}"] = city_on_date.get("deaths", None) final_cases.append(row) ordered_cases = [] for row in final_cases: ordered_cases.append(row_with_sorted_columns(row)) original_data_errors.raise_if_errors() return {"reports": final_reports, "cases": ordered_cases}
def test_always_use_ibge_data_to_format_the_city_name(self): self.content = self.content.replace("Abatiá,9,1", "abatiá,9,1") file_rows = rows.import_from_csv(self.file_from_content) expected = { "city": "Abatiá", "city_ibge_code": get_city_info("Abatiá", "PR").city_ibge_code, "confirmed": 9, "date": self.date.isoformat(), "deaths": 1, "place_type": "city", "state": "PR", } results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert expected in results
def test_line_can_have_none_for_all_values_if_city_has_no_cases_yet(self): self.content = self.content.replace('Abatiá,9,1', 'Abatiá,,') self.content = self.content.replace('TOTAL NO ESTADO,102,32', 'TOTAL NO ESTADO,93,31') file_rows = rows.import_from_csv(self.file_from_content) expected = { "city": 'Abatiá', "city_ibge_code": get_city_info('Abatiá', 'PR').city_ibge_code, "confirmed": 0, "date": self.date.isoformat(), "deaths": 0, "place_type": "city", "state": 'PR', } results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert expected in results
def _parse_city_data(city, confirmed, deaths, date, state): data = { "city": city, "confirmed": confirmed, "date": date.isoformat(), "deaths": deaths, "place_type": "city", "state": state, } if city == TOTAL_LINE_DISPLAY: data['city_ibge_code'] = get_state_info(state).state_ibge_code data['place_type'] = 'state' data['city'] = None elif city == UNDEFINED_DISPLAY: data['city_ibge_code'] = None else: city_info = get_city_info(city, state) data['city_ibge_code'] = getattr(city_info, 'city_ibge_code', INVALID_CITY_CODE) data['city'] = getattr(city_info, 'city', INVALID_CITY_CODE) return data
def test_allow_zero_as_a_valid_value(self): original_content = self.content base = { "city": "Abatiá", "city_ibge_code": get_city_info("Abatiá", "PR").city_ibge_code, "confirmed": 9, "date": self.date.isoformat(), "deaths": 1, "place_type": "city", "state": "PR", } # zero confirmed cases self.content = original_content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,93,31") self.content = self.content.replace("Abatiá,9,1", "Abatiá,0,0") expected = base.copy() expected["confirmed"] = 0 expected["deaths"] = 0 file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert expected in results # zero deaths self.content = original_content.replace("TOTAL NO ESTADO,102,32", "TOTAL NO ESTADO,102,31") self.content = self.content.replace("Abatiá,9,1", "Abatiá,9,0") expected = base.copy() expected["deaths"] = 0 file_rows = rows.import_from_csv(self.file_from_content) results, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) assert expected in results
def test_format_valid_list_of_rows(self): file_rows = rows.import_from_csv(self.file_from_content) date = self.date.isoformat() data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) cities_data = [ { "nome": "Abatiá", "confirmados": 9, "mortes": 1 }, { "nome": "Adrianópolis", "confirmados": 11, "mortes": 2 }, { "nome": "Agudos do Sul", "confirmados": 12, "mortes": 3 }, { "nome": "Almirante Tamandaré", "confirmados": 8, "mortes": 4 }, { "nome": "Altamira do Paraná", "confirmados": 13, "mortes": 5 }, { "nome": "Alto Paraíso", "confirmados": 47, "mortes": 15 }, ] for d in cities_data: d["ibge"] = get_city_info(d["nome"], self.uf).city_ibge_code expected = [ { "city": None, "city_ibge_code": 41, "confirmed": 102, "date": date, "deaths": 32, "place_type": "state", "state": "PR", }, { "city": "Importados/Indefinidos", "city_ibge_code": None, "confirmed": 2, "date": date, "deaths": 2, "place_type": "city", "state": "PR", }, ] expected.extend([{ "city": c["nome"], "city_ibge_code": c["ibge"], "confirmed": c["confirmados"], "date": date, "deaths": c["mortes"], "place_type": "city", "state": "PR", } for c in cities_data]) assert data == expected assert ["warning"] == warnings
def test_no_city_if_unexisting_state(): info = get_city_info("Nova Friburgo", "XX") assert info is None
def test_get_info_from_city_if_correct_data(): info = get_city_info("Nova Friburgo", "RJ") assert 3303401 == info.city_ibge_code
def test_get_city_info_even_if_wrong_letter_cases(): info = get_city_info("nova friburgo", "rj") assert 3303401 == info.city_ibge_code
def test_get_info_from_city_if_correct_data(): info = get_city_info('Nova Friburgo', 'RJ') assert 3303401 == info.city_ibge_code
def test_format_valid_list_of_rows(self): file_rows = rows.import_from_csv(self.file_from_content) date = self.date.isoformat() data, warnings = format_spreadsheet_rows_as_dict( file_rows, self.date, self.uf) cities_data = [ { 'nome': 'Abatiá', 'confirmados': 9, 'mortes': 1 }, { 'nome': 'Adrianópolis', 'confirmados': 11, 'mortes': 2 }, { 'nome': 'Agudos do Sul', 'confirmados': 12, 'mortes': 3 }, { 'nome': 'Almirante Tamandaré', 'confirmados': 8, 'mortes': 4 }, { 'nome': 'Altamira do Paraná', 'confirmados': 13, 'mortes': 5 }, { 'nome': 'Alto Paraíso', 'confirmados': 47, 'mortes': 15 }, ] for d in cities_data: d['ibge'] = get_city_info(d['nome'], self.uf).city_ibge_code expected = [ { "city": None, "city_ibge_code": 41, "confirmed": 102, "date": date, "deaths": 32, "place_type": "state", "state": 'PR', }, { "city": "Importados/Indefinidos", "city_ibge_code": None, "confirmed": 2, "date": date, "deaths": 2, "place_type": "city", "state": 'PR', }, ] expected.extend([{ "city": c['nome'], "city_ibge_code": c['ibge'], "confirmed": c['confirmados'], "date": date, "deaths": c['mortes'], "place_type": "city", "state": 'PR', } for c in cities_data]) assert data == expected assert ['warning'] == warnings
def setUp(self): self.uf = 'PR' self.cities_cases = [ { 'city': 'Abatiá', 'confirmed': 9, 'deaths': 1 }, { 'city': 'Adrianópolis', 'confirmed': 11, 'deaths': 2 }, { 'city': 'Agudos do Sul', 'confirmed': 12, 'deaths': 3 }, { 'city': 'Almirante Tamandaré', 'confirmed': 8, 'deaths': 4 }, { 'city': 'Altamira do Paraná', 'confirmed': 13, 'deaths': 5 }, { 'city': 'Alto Paraíso', 'confirmed': 47, 'deaths': 15 }, ] self.today = date.today() self.undefined_data = { "city": "Importados/Indefinidos", "city_ibge_code": None, "confirmed": 2, "date": self.today.isoformat(), "deaths": 2, "place_type": "city", "state": self.uf, } self.total_data = { "city": None, "city_ibge_code": 41, "confirmed": 102, "date": self.today.isoformat(), "deaths": 32, "place_type": "state", "state": self.uf, } self.cities_data = [{ "city": c['city'], "city_ibge_code": get_city_info(c['city'], self.uf).city_ibge_code, "confirmed": c['confirmed'], "date": self.today.isoformat(), "deaths": c['deaths'], "place_type": "city", "state": self.uf, } for c in self.cities_cases] self.spreadsheet = baker.make(StateSpreadsheet, state=self.uf, date=self.today)
def merge_state_data(state): gs_data = google_data.get_base_data()[ state] # Get data from old Google Spreadsheets original_reports = gs_data["reports"] # Fix format of old data from Google Spreadsheets (add year to date) original_cases = [{ fix_key(key): value for key, value in city_cases.items() if not key.startswith("field_") } for city_cases in gs_data["cases"]] db_data = StateSpreadsheet.objects.get_state_data( state) # Get data from database new_reports = db_data["reports"] new_cases = db_data["cases"] # Update original reports (GS) with new ones (DB), removing GS reports for # dates which show up in DB. new_reports_dates = set(row["date"] for row in new_reports) original_reports_filtered = [ row for row in original_reports if row["date"] not in new_reports_dates ] final_reports = original_reports_filtered + new_reports # Update original cases (GS) with new ones (DB), overwritting GS cases for # dates which show up in DB. final_cases = [] original_data_errors = SpreadsheetValidationErrors() original_cities = set() for row in original_cases: row = row.copy() city = row["municipio"] if city not in [TOTAL_LINE_DISPLAY, UNDEFINED_DISPLAY]: city_info = get_city_info(city, state) if city_info: city = city_info.city else: msg = f'Nome inválido de cidade "{city}" na planilha do Google.' original_data_errors.new_error(msg) original_cities.add(city) for date, values_for_date in new_cases.items(): date_str = date.isoformat() # YYYY-MM-DD city_on_date = values_for_date.get(city, {}) row[f"confirmados_{date_str}"] = city_on_date.get( "confirmed", None) row[f"mortes_{date_str}"] = city_on_date.get("deaths", None) final_cases.append(row) # recent IBGE data can add new cities that weren't present in the original data new_rows = {} for date, values_for_date in new_cases.items(): date_str = date.isoformat() # YYYY-MM-DD for city, data in values_for_date.items(): city_info = get_city_info(city, state) if city_info: city = city_info.city if city in original_cities: continue elif city not in new_rows: new_rows[city] = {"municipio": city} row = new_rows[city] row[f"confirmados_{date_str}"] = data.get("confirmed", None) row[f"mortes_{date_str}"] = data.get("deaths", None) if new_rows: final_cases.extend(new_rows.values()) ordered_cases = [] for row in final_cases: row = row_with_sorted_columns(row) city_info = get_city_info(row["municipio"], state) if city_info: row["municipio"] = city_info.city ordered_cases.append(row) original_data_errors.raise_if_errors() return {"reports": final_reports, "cases": ordered_cases}
def test_no_city_if_unexisting_state(): info = get_city_info('Nova Friburgo', 'XX') assert info is None
def setUp(self): self.uf = "PR" self.cities_cases = [ { "city": "Abatiá", "confirmed": 9, "deaths": 1 }, { "city": "Adrianópolis", "confirmed": 11, "deaths": 2 }, { "city": "Agudos do Sul", "confirmed": 12, "deaths": 3 }, { "city": "Almirante Tamandaré", "confirmed": 8, "deaths": 4 }, { "city": "Altamira do Paraná", "confirmed": 13, "deaths": 5 }, { "city": "Alto Paraíso", "confirmed": 47, "deaths": 15 }, ] self.today = date.today() self.undefined_data = { "city": "Importados/Indefinidos", "city_ibge_code": None, "confirmed": 2, "date": self.today.isoformat(), "deaths": 2, "place_type": "city", "state": self.uf, } self.total_data = { "city": None, "city_ibge_code": 41, "confirmed": 102, "date": self.today.isoformat(), "deaths": 32, "place_type": "state", "state": self.uf, } self.cities_data = [{ "city": c["city"], "city_ibge_code": get_city_info(c["city"], self.uf).city_ibge_code, "confirmed": c["confirmed"], "date": self.today.isoformat(), "deaths": c["deaths"], "place_type": "city", "state": self.uf, } for c in self.cities_cases] self.spreadsheet = baker.prepare(StateSpreadsheet, state=self.uf, date=self.today)
def test_no_city_if_unexisting_city(): info = get_city_info("Nova Friburgo", "RR") assert info is None
def get_state_data(self, state): """Return all state cases, grouped by date""" from covid19.spreadsheet_validator import TOTAL_LINE_DISPLAY from brazil_data.cities import get_city_info cases, reports = defaultdict(dict), {} qs = self.get_queryset() spreadsheets = qs.deployable_for_state(state, avoid_peer_review_dupes=False, only_active=False) dates_only_with_total = set() for spreadsheet in spreadsheets: date = spreadsheet.date if date in cases and date not in dates_only_with_total: continue elif date in cases and spreadsheet.only_with_total_entry: continue # Group all notes for a same URL to avoid repeated entries for date/url report_data = reports.get(date, defaultdict(list)) for url in spreadsheet.boletim_urls: report_data[url].append(spreadsheet.boletim_notes or "") reports[date] = report_data if spreadsheet.only_with_total_entry: rows = [spreadsheet.get_total_data()] dates_only_with_total.add(date) elif date in dates_only_with_total: rows = spreadsheet.table_data_by_city.values() dates_only_with_total.remove(date) else: rows = spreadsheet.table_data for row in rows: city = row["city"] if city is None: city = TOTAL_LINE_DISPLAY elif city != "Importados/Indefinidos": city = get_city_info(city, state).city # Fix city name cases[date][city] = { "confirmed": row["confirmed"], "deaths": row["deaths"], } # reports entries should be returned as a list reports_as_list = [] for date, urls in reports.items(): for url, notes in urls.items(): reports_as_list.append({ "date": date, "url": url, "notes": "\n".join([n.strip() for n in notes if n.strip()]) }) return { "reports": reports_as_list, "cases": cases, }
def test_no_city_if_unexisting_city(): info = get_city_info('Nova Friburgo', 'RR') assert info is None