Ejemplo n.º 1
0
    def test_400_if_spreadsheet_validation_error(self, mock_merge):
        exception = SpreadsheetValidationErrors()
        exception.new_error("error 1")
        exception.new_error("error 2")
        mock_merge.side_effect = exception

        self.url = reverse("covid19:spreadsheet_proxy", args=["RJ"])
        response = self.client.get(self.url)

        assert 400 == response.status_code
        assert ["error 1", "error 2"] == sorted(response.json()["errors"])
Ejemplo n.º 2
0
    def test_list_all_errors_fom_the_import_process(self, mocked_format):
        exception = SpreadsheetValidationErrors()
        exception.new_error("Error 1")
        exception.new_error("Error 2")
        mocked_format.side_effect = exception

        form = StateSpreadsheetForm(self.data, self.file_data, user=self.user)
        assert not form.is_valid()

        assert 2 == len(form.errors["__all__"])
        assert "Error 1" in form.errors["__all__"]
        assert "Error 2" in form.errors["__all__"]
Ejemplo n.º 3
0
def validate_historical_data(spreadsheet):
    """
    Validate the spreadsheet against historical data in the database.
    If any invalid data, it'll raise a SpreadsheetValidationErrors
    If valid data, returns a list with eventual warning messages
    """

    def lower_numbers(previous, data):
        if not previous:
            return False
        return data["confirmed"] < previous.confirmed or data["deaths"] < previous.deaths

    warnings = []
    clean_results = spreadsheet.table_data
    validation_errors = SpreadsheetValidationErrors()
    covid19_stats = Covid19Stats()
    s_date = spreadsheet.date
    has_only_total = False
    total_data = spreadsheet.get_total_data()
    if len(spreadsheet.table_data) == 1 and total_data:
        has_only_total = True

    city_entries = covid19_stats.most_recent_city_entries_for_state(spreadsheet.state, s_date)
    state_entry = covid19_stats.most_recent_state_entry(spreadsheet.state, s_date)

    for entry in city_entries:
        city_data = spreadsheet.get_data_from_city(entry.city_ibge_code)
        if not has_only_total and not city_data and (entry.confirmed or entry.deaths):
            validation_errors.new_error(f"{entry.city} possui dados históricos e não está presente na planilha.")
            continue
        elif not city_data:  # previous entry for the city has 0 deaths and 0 confirmed
            data = _parse_city_data(entry.city, entry.confirmed, entry.deaths, s_date, entry.state)
            clean_results.append(data)
            if not has_only_total:
                warnings.append(
                    f"{entry.city} possui dados históricos zerados/nulos, não presente na planilha e foi adicionado."
                )
        elif lower_numbers(entry, city_data):
            warnings.append(f"Números de confirmados ou óbitos em {entry.city} é menor que o anterior.")

    if has_only_total:
        if state_entry:
            warnings.append(
                f"Planilha importada somente com dados totais. Dados de cidades foram reutilizados da importação do dia {state_entry.date}."
            )
        else:
            warnings.append(f"Planilha importada somente com dados totais.")

    if lower_numbers(state_entry, total_data):
        warnings.append(f"Números de confirmados ou óbitos totais é menor que o total anterior.")

    validation_errors.raise_if_errors()

    spreadsheet.table_data = clean_results
    return warnings
Ejemplo n.º 4
0
def merge_state_data(state):
    gs_data = google_data.get_base_data()[
        state]  # Get data from old Google Spreadsheets
    original_reports = gs_data["reports"]
    original_cases = gs_data["cases"]

    db_data = StateSpreadsheet.objects.get_state_data(
        state)  # Get data from database
    new_reports = db_data["reports"]
    new_cases = db_data["cases"]

    # Update original reports (GS) with new ones (DB), removing GS reports for
    # dates which show up in DB.
    new_reports_dates = set(row["date"] for row in new_reports)
    original_reports_filtered = [
        row for row in original_reports if row["date"] not in new_reports_dates
    ]
    final_reports = original_reports_filtered + new_reports

    # Update original cases (GS) with new ones (DB), overwritting GS cases for
    # dates which show up in DB.
    final_cases = []
    original_data_errors = SpreadsheetValidationErrors()
    for row in original_cases:
        row = row.copy()
        city = row["municipio"]
        if city not in [TOTAL_LINE_DISPLAY, UNDEFINED_DISPLAY]:
            city_info = get_city_info(city, state)
            if city_info:
                city = city_info.city
            else:
                msg = f'Nome inválido de cidade "{city}" na planilha do Google.'
                original_data_errors.new_error(msg)

        for date, values_for_date in new_cases.items():
            date_str = f"{date.day:02d}_{date.month:02d}"
            city_on_date = values_for_date.get(city, {})
            row[f"confirmados_{date_str}"] = city_on_date.get(
                "confirmed", None)
            row[f"mortes_{date_str}"] = city_on_date.get("deaths", None)
        final_cases.append(row)

    ordered_cases = []
    for row in final_cases:
        ordered_cases.append(row_with_sorted_columns(row))

    original_data_errors.raise_if_errors()
    return {"reports": final_reports, "cases": ordered_cases}
Ejemplo n.º 5
0
def format_spreadsheet_rows_as_dict(rows_table, date, state, skip_sum_cases=False, skip_sum_deaths=False):
    """
    Receives rows.Table object, a date and a brazilan UF, validates the data
    and returns tuble with 2 lists:
        - valid and formated results data
        - warnings about the data

    This is an auxiliary method used by covid19.forms.StateSpreadsheetForm with the uploaded file
    """
    validation_errors = SpreadsheetValidationErrors()
    field_names = rows_table.field_names

    try:
        confirmed_attr = _get_column_name(field_names, ["confirmados", "confirmado", "casos_confirmados"])
    except ValueError as e:
        validation_errors.new_error(str(e))
    try:
        deaths_attr = _get_column_name(field_names, ["obitos", "obito", "morte", "mortes"])
    except ValueError as e:
        validation_errors.new_error(str(e))
    try:
        city_attr = _get_column_name(field_names, ["municipio", "cidade"])
    except ValueError as e:
        validation_errors.new_error(str(e))

    # can't check on field types if any invalid column
    validation_errors.raise_if_errors()

    if not rows_table.fields[confirmed_attr] == IntegerField:
        validation_errors.new_error('A coluna "Confirmados" precisa ter somente números inteiros"')
    if not rows_table.fields[deaths_attr] == IntegerField:
        validation_errors.new_error('A coluna "Mortes" precisa ter somente números inteiros"')

    results, warnings = [], []
    has_total, has_undefined = False, False
    total_cases, total_deaths = 0, 0
    sum_cases, sum_deaths = 0, 0
    processed_cities = set()
    for entry in rows_table:
        city = getattr(entry, city_attr, None)
        confirmed = getattr(entry, confirmed_attr, None)
        deaths = getattr(entry, deaths_attr, None)
        if not city:
            if confirmed or deaths:
                msg = "Uma ou mais linhas com a coluna de cidade vazia possuem números de confirmados ou óbitos"
                validation_errors.new_error(msg)
            continue

        if city in processed_cities:
            validation_errors.new_error(f"Mais de uma entrada para {city}")

        processed_cities.add(city)
        is_undefined = city == UNDEFINED_DISPLAY
        if is_undefined:
            has_undefined = True
        elif city == TOTAL_LINE_DISPLAY:
            has_total = True

        if (confirmed is None and deaths is not None) or (deaths is None and confirmed is not None):
            validation_errors.new_error(f"Dados de casos ou óbitos incompletos na linha {city}")
        if confirmed is None or deaths is None:
            continue

        try:
            if deaths > confirmed:
                if is_undefined:
                    warnings.append(f"{city} com número óbitos maior que de casos confirmados.")
                else:
                    msg = f"Valor de óbitos maior que casos confirmados na linha {city} da planilha"
                    validation_errors.new_error(msg)
            elif deaths < 0 or confirmed < 0:
                validation_errors.new_error(f"Valores negativos na linha {city} da planilha")
        except TypeError:
            validation_errors.new_error(f"Provavelmente há uma fórmula na linha {city} da planilha")
            continue

        result = _parse_city_data(city, confirmed, deaths, date, state)
        if result["city_ibge_code"] == INVALID_CITY_CODE:
            validation_errors.new_error(f"{city} não pertence à UF {state}")
            continue

        if result["place_type"] == "state":
            total_cases, total_deaths = confirmed, deaths
        else:
            sum_cases += confirmed
            sum_deaths += deaths

        results.append(result)

    if not has_total:
        validation_errors.new_error(f'A linha "{TOTAL_LINE_DISPLAY}" está faltando na planilha')
    if not has_undefined and len(results) > 1:
        validation_errors.new_error(f'A linha "{UNDEFINED_DISPLAY}" está faltando na planilha')

    if skip_sum_cases:
        warnings.append("A checagem da soma de casos por cidade com o valor total foi desativada.")
    elif sum_cases and sum_cases != total_cases:
        validation_errors.new_error(f"A soma de casos ({sum_cases}) difere da entrada total ({total_cases}).")
    if skip_sum_deaths:
        warnings.append("A checagem da soma de óbitos por cidade com o valor total foi desativada.")
    elif sum_deaths and sum_deaths != total_deaths:
        validation_errors.new_error(f"A soma de mortes ({sum_deaths}) difere da entrada total ({total_deaths}).")

    validation_errors.raise_if_errors()

    # this is hacky, I know, but I wanted to centralize all kind of validations inside this function
    on_going_spreadsheet = StateSpreadsheet(state=state, date=date)
    on_going_spreadsheet.table_data = results
    warnings.extend(validate_historical_data(on_going_spreadsheet))
    return on_going_spreadsheet.table_data, warnings
Ejemplo n.º 6
0
def merge_state_data(state):
    gs_data = google_data.get_base_data()[
        state]  # Get data from old Google Spreadsheets
    original_reports = gs_data["reports"]
    # Fix format of old data from Google Spreadsheets (add year to date)
    original_cases = [{
        fix_key(key): value
        for key, value in city_cases.items() if not key.startswith("field_")
    } for city_cases in gs_data["cases"]]

    db_data = StateSpreadsheet.objects.get_state_data(
        state)  # Get data from database
    new_reports = db_data["reports"]
    new_cases = db_data["cases"]

    # Update original reports (GS) with new ones (DB), removing GS reports for
    # dates which show up in DB.
    new_reports_dates = set(row["date"] for row in new_reports)
    original_reports_filtered = [
        row for row in original_reports if row["date"] not in new_reports_dates
    ]
    final_reports = original_reports_filtered + new_reports

    # Update original cases (GS) with new ones (DB), overwritting GS cases for
    # dates which show up in DB.
    final_cases = []
    original_data_errors = SpreadsheetValidationErrors()
    original_cities = set()
    for row in original_cases:
        row = row.copy()
        city = row["municipio"]
        if city not in [TOTAL_LINE_DISPLAY, UNDEFINED_DISPLAY]:
            city_info = get_city_info(city, state)
            if city_info:
                city = city_info.city
            else:
                msg = f'Nome inválido de cidade "{city}" na planilha do Google.'
                original_data_errors.new_error(msg)

        original_cities.add(city)
        for date, values_for_date in new_cases.items():
            date_str = date.isoformat()  # YYYY-MM-DD
            city_on_date = values_for_date.get(city, {})
            row[f"confirmados_{date_str}"] = city_on_date.get(
                "confirmed", None)
            row[f"mortes_{date_str}"] = city_on_date.get("deaths", None)
        final_cases.append(row)

    # recent IBGE data can add new cities that weren't present in the original data
    new_rows = {}
    for date, values_for_date in new_cases.items():
        date_str = date.isoformat()  # YYYY-MM-DD
        for city, data in values_for_date.items():
            city_info = get_city_info(city, state)
            if city_info:
                city = city_info.city

            if city in original_cities:
                continue
            elif city not in new_rows:
                new_rows[city] = {"municipio": city}

            row = new_rows[city]
            row[f"confirmados_{date_str}"] = data.get("confirmed", None)
            row[f"mortes_{date_str}"] = data.get("deaths", None)

    if new_rows:
        final_cases.extend(new_rows.values())

    ordered_cases = []
    for row in final_cases:
        row = row_with_sorted_columns(row)
        city_info = get_city_info(row["municipio"], state)
        if city_info:
            row["municipio"] = city_info.city
        ordered_cases.append(row)

    original_data_errors.raise_if_errors()
    return {"reports": final_reports, "cases": ordered_cases}
Ejemplo n.º 7
0
def validate_historical_data(spreadsheet):
    """
    Validate the spreadsheet against historical data in the database.
    If any invalid data, it'll raise a SpreadsheetValidationErrors
    If valid data, returns a list with eventual warning messages
    """
    def lower_numbers(previous, data):
        if not previous:
            return False
        return data["confirmed"] < previous["confirmed"] or data[
            "deaths"] < previous["deaths"]

    warnings = []
    clean_results = spreadsheet.table_data
    validation_errors = SpreadsheetValidationErrors()
    Covid19Stats()
    s_date = spreadsheet.date
    has_only_total = False
    total_data = spreadsheet.get_total_data()
    if len(spreadsheet.table_data) == 1 and total_data:
        has_only_total = True

    city_entries, state_entry = [], {}
    most_recent = StateSpreadsheet.objects.most_recent_deployed(
        spreadsheet.state, spreadsheet.date)
    if most_recent:
        state_entry = most_recent.get_total_data()
        city_entries = most_recent.table_data_by_city.values()

    for entry in city_entries:
        city_data = spreadsheet.get_data_from_city(entry["city_ibge_code"])
        if not has_only_total and not city_data and (entry["confirmed"]
                                                     or entry["deaths"]):
            validation_errors.new_error(
                f"{entry['city']} possui dados históricos e não está presente na planilha."
            )
            continue
        elif not city_data:  # previous entry for the city has 0 deaths and 0 confirmed
            data = _parse_city_data(entry["city"], entry["confirmed"],
                                    entry["deaths"], s_date, entry["state"])
            clean_results.append(data)
            if not has_only_total:
                warnings.append(
                    f"{entry['city']} possui dados históricos zerados/nulos, não presente na planilha e foi adicionado."
                )
        elif lower_numbers(entry, city_data):
            warnings.append(
                f"Números de confirmados ou óbitos em {entry['city']} é menor que o anterior."
            )

    if has_only_total:
        if state_entry:
            warnings.append(
                f"{StateSpreadsheet.ONLY_WITH_TOTAL_WARNING} Dados de cidades foram reutilizados da importação do dia {state_entry['date']}."
            )
        else:
            warnings.append(StateSpreadsheet.ONLY_WITH_TOTAL_WARNING)

    if lower_numbers(state_entry, total_data):
        warnings.append(
            "Números de confirmados ou óbitos totais é menor que o total anterior."
        )

    validation_errors.raise_if_errors()

    spreadsheet.table_data = clean_results
    return warnings