Ejemplo n.º 1
0
    def parse_cities_request(self, response):
        cities = json.loads(response.body)

        today = date_utils.today()
        current_week = Week.fromdate(today)

        # We have to do different passes for 2019 and 2020, since the specific days of
        # the epidemiological week differs.
        #
        # The api seems to return the data from the current year as "2020", and the previous as "2019",
        # so we'll exploit that to extract the data only from the "2020" chart

        for city in cities:
            for year in [2020, 2019]:
                for weeknum in range(1, current_week.week):
                    ep_week = Week(year, weeknum)

                    # Cache more than 4 weeks ago
                    should_cache = (current_week.week - weeknum) > 4
                    yield self.make_registral_request(
                        city=city,
                        ep_week=ep_week,
                        callback=self.parse_registral_request,
                        dont_cache=not should_cache,
                    )
Ejemplo n.º 2
0
    def generate_documents(self, applicant: Applicant):
        applicant_dir = GENERATED_DIR / applicant.contact_info.corporate_email
        applicant_dir.mkdir(exist_ok=True)

        parents = [
            r for r in applicant.family
            if r.type in [RelativeType.FATHER, RelativeType.MOTHER]
        ]
        siblings = [
            r for r in applicant.family
            if r.type in [RelativeType.BROTHER, RelativeType.SISTER]
        ]

        data = applicant.dict()

        photo = data.pop("photo")
        photo_path = applicant_dir / "photo"
        with open(photo_path, "wb") as f:
            f.write(base64.b64decode(photo.encode()))

        context = {
            "date": today(),
            "full_name": applicant.full_name,
            "parents": parents,
            "siblings": siblings,
            **data,
        }

        for (en, rus) in DOCUMENTS:
            doc = DocxTemplate(TEMPLATES_DIR / en)
            doc.render(context, self.jinja_env)
            doc.replace_media(DUMMY_IMAGE, photo_path)
            doc.save(applicant_dir / rus)
Ejemplo n.º 3
0
 def start_requests(self):
     today = date_utils.today()
     tomorrow = today + datetime.timedelta(days=1)
     # `date_utils.date_range` excludes the last, so to get today's data we
     # need to pass tomorrow.
     for date in date_utils.date_range(datetime.date(2020, 1, 1), tomorrow):
         # Won't cache dates from 30 days ago until today (only historical
         # ones which are unlikely to change).
         should_cache = today - date > datetime.timedelta(days=30)
         for state in STATES:
             for search in ("death-respiratory", "death-covid"):
                 if search == "death-covid":
                     yield self.make_covid_request(
                         start_date=date,
                         end_date=date,
                         date_type="data_ocorrido",
                         search=search,
                         cause=None,
                         state=state,
                         callback=self.parse_covid_request,
                         dont_cache=not should_cache,
                     )
                 else:
                     for cause in ("pneumonia", "insuficiencia_respiratoria"):
                         yield self.make_covid_request(
                             start_date=date,
                             end_date=date,
                             date_type="data_ocorrido",
                             search=search,
                             cause=cause,
                             state=state,
                             callback=self.parse_covid_request,
                             dont_cache=not should_cache,
                         )
Ejemplo n.º 4
0
 def start_requests(self):
     for date in date_utils.date_range(self.start_date, date_utils.today()):
         yield self.make_state_confirmed_request(
             date,
             callback=self.parse_state_confirmed,
             meta={"row": {
                 "date": date
             }},
         )
Ejemplo n.º 5
0
class TotalDeathsSpider(BaseRegistroCivilSpider):
    name = "obitos_totais"
    base_url = "https://transparencia.registrocivil.org.br/api/record/death"
    start_date = datetime.date(2015, 1, 1)
    end_date = date_utils.today()

    def make_state_request(self,
                           start_date,
                           end_date,
                           state,
                           callback,
                           dont_cache=False):
        data = [
            ("start_date", str(start_date)),
            ("end_date", str(end_date)),
            ("state", state),
        ]
        return self.make_request(
            url=urljoin(self.base_url, "?" + urlencode(data)),
            callback=callback,
            meta={
                "row": qs_to_dict(data),
                "dont_cache": dont_cache
            },
        )

    def start_requests_after_login(self):
        one_day = datetime.timedelta(days=1)
        today = date_utils.today()
        non_cache_period = datetime.timedelta(days=30)
        # `date_utils.date_range` excludes the last, so we need to add one day
        # to `end_date`.
        for date in date_utils.date_range(self.start_date,
                                          self.end_date + one_day,
                                          interval="monthly"):
            # Won't cache dates from 30 days ago until today (only historical
            # ones which are unlikely to change).
            should_cache = today - date > non_cache_period
            for state in STATES:
                yield self.make_state_request(
                    start_date=date,
                    end_date=date_utils.next_date(date, "monthly") - one_day,
                    state=state,
                    callback=self.parse,
                    dont_cache=not should_cache,
                )

    def parse(self, response):
        meta = response.meta["row"]
        data = json.loads(response.body)["data"]
        for row in data:
            row.update(meta)
            row["city"] = row.pop("name")
            row["deaths_total"] = row.pop("total")
            yield row
Ejemplo n.º 6
0
 def start_requests(self):
     today = date_utils.today()
     tomorrow = today + datetime.timedelta(days=1)
     # `date_utils.date_range` excludes the last, so to get today's data we
     # need to pass tomorrow.
     for date in date_utils.date_range(datetime.date(2020, 1, 1), tomorrow):
         # Won't cache dates from 30 days ago until today (only historical
         # ones which are unlikely to change).
         should_cache = today - date > datetime.timedelta(days=30)
         for state in STATES:
             yield self.make_registral_request(
                 start_date=date,
                 end_date=date,
                 state=state,
                 callback=self.parse_registral_request,
                 dont_cache=not should_cache,
             )
Ejemplo n.º 7
0
 def start_requests_after_login(self):
     one_day = datetime.timedelta(days=1)
     today = date_utils.today()
     non_cache_period = datetime.timedelta(days=30)
     # `date_utils.date_range` excludes the last, so we need to add one day
     # to `end_date`.
     for date in date_utils.date_range(self.start_date,
                                       self.end_date + one_day,
                                       interval="monthly"):
         # Won't cache dates from 30 days ago until today (only historical
         # ones which are unlikely to change).
         should_cache = today - date > non_cache_period
         for state in STATES:
             yield self.make_state_request(
                 start_date=date,
                 end_date=date_utils.next_date(date, "monthly") - one_day,
                 state=state,
                 callback=self.parse,
                 dont_cache=not should_cache,
             )
Ejemplo n.º 8
0
def convert_file(filename):
    # There are some missing data on the registral, so default all to None
    # Multiple passes to keep the same column ordering
    all_keys = []
    for prefix in PREFIX_CHOICES:
        all_keys.extend(year_causes_keys(prefix, YEAR_CHOICES))
        all_keys.extend([f"{prefix}_total_{year}" for year in YEAR_CHOICES])
    base_row = {}
    for key in all_keys:
        base_row[key] = 0 if key.startswith("deaths_") else None

    table_types = {
        "date": rows.fields.DateField,
        "state": rows.fields.TextField,
        "cause": rows.fields.TextField,
        "total": rows.fields.IntegerField,
    }
    table = rows.import_from_csv(filename, force_types=table_types)
    row_key = lambda row: (row.state, datetime.date(2020, row.date.month, row.date.day))
    table = sorted(table, key=row_key)
    accumulated = Counter()
    last_day = today()
    for key, state_data in groupby(table, key=row_key):
        state, date = key
        row = {
            "date": date,
            "state": state,
        }
        try:
            this_day_in_2019 = datetime.date(2019, date.month, date.day)
        except ValueError:  # This day does not exist in 2019 (29 February)
            yesterday = date - one_day
            this_day_in_2019 = datetime.date(2019, yesterday.month, yesterday.day)
        row["epidemiological_week_2019"] = brazilian_epidemiological_week(this_day_in_2019)[1]
        row["epidemiological_week_2020"] = brazilian_epidemiological_week(date)[1]
        row.update(base_row)

        # Zero sum of new deaths for this state in all years (will accumulate)
        for year in YEAR_CHOICES:
            accumulated[(year, state, "new-total")] = 0

        # For each death cause in this date/state, fill `row` and accumulate
        filled_causes = set()
        for item in state_data:
            cause = item.cause
            year = item.date.year
            key_new = get_death_cause_key("new_deaths", cause, year)
            new_deaths = item.total
            if key_new is None:
                if new_deaths > 0:
                    #raise RuntimeError(f"Cannot have new_deaths > 0 when key for (new_deaths, {cause}, {year}) is None")
                    print(f"ERROR converting {item}: new_deaths > 0 but key is None")
                    continue
                else:
                    continue
            accumulated_key = (year, state, cause)
            accumulated_key_total = (year, state, "total")
            accumulated_key_new_total = (year, state, "new-total")
            accumulated[accumulated_key] += new_deaths
            accumulated[accumulated_key_total] += new_deaths
            accumulated[accumulated_key_new_total] += new_deaths
            row[key_new] = new_deaths
            row[get_death_cause_key("deaths", cause, year)] = accumulated[accumulated_key]
            filled_causes.add((year, cause))

        # Fill other deaths_* (accumulated) values with the last available data
        # if not filled by the state_data for this date.
        for cause in RESPIRATORY_DEATH_CAUSES:
            for year in YEAR_CHOICES:
                if (year, cause) in filled_causes:
                    continue
                accumulated_key = (year, state, cause)
                key_name = get_death_cause_key("deaths", cause, year)
                if key_name is None:
                    continue
                row[key_name] = accumulated[accumulated_key]

        # Fill year totals (new and accumulated) for state
        for year in YEAR_CHOICES:
            if year == last_day.year and date > last_day:
                new_total = None
            else:
                new_total = accumulated[(year, state, "new-total")]
            total = accumulated[(year, state, "total")]
            row[get_death_cause_key("new_deaths", "total", year)] = new_total
            row[get_death_cause_key("deaths", "total", year)] = total

        yield row