def _commit_person(person: SchemaPersonType, system_level: SystemLevel,
                       ingest_time: datetime.datetime):
        act_session = SessionFactory.for_schema_base(
            schema_base_for_system_level(system_level))
        merged_person = act_session.merge(person)

        metadata = IngestMetadata(region='somewhere',
                                  jurisdiction_id='12345',
                                  ingest_time=ingest_time,
                                  system_level=system_level)
        update_historical_snapshots(act_session, [merged_person], [], metadata)

        act_session.commit()
        act_session.close()
Esempio n. 2
0
def store_scraper_success(scraper_success: ScraperSuccessModel,
                          jurisdiction_id: str) -> bool:
    """Store a scraper success event"""

    jurisdiction_id = validate_jid(jurisdiction_id)

    ss = ScraperSuccessEntry(
        jid=jurisdiction_id,
        date=scraper_success.date,
    )

    session = SessionFactory.for_schema_base(
        schema_base_for_system_level(SystemLevel.COUNTY))
    session.add(ss)
    session.commit()

    return True
Esempio n. 3
0
def store_single_count(sc: SingleCount, jurisdiction_id: str):
    """Store a single count"""

    jurisdiction_id = validate_jid(jurisdiction_id)

    sca = SingleCountAggregate(
        jid=jurisdiction_id,
        ethnicity=sc.ethnicity.value if sc.ethnicity else None,
        gender=sc.gender.value if sc.gender else None,
        race=sc.race.value if sc.race else None,
        count=sc.count,
        date=sc.date,
    )

    session = SessionFactory.for_schema_base(
        schema_base_for_system_level(SystemLevel.COUNTY))
    session.add(sca)
    session.commit()

    return True
Esempio n. 4
0
def store_single_count(sc: SingleCount, jurisdiction_id: str) -> bool:
    """Store a single count"""

    jurisdiction_id = validate_jid(jurisdiction_id)

    sca = SingleCountAggregate(
        jid=jurisdiction_id,
        ethnicity=sc.ethnicity.value if sc.ethnicity else None,
        gender=sc.gender.value if sc.gender else None,
        race=sc.race.value if sc.race else None,
        count=sc.count,
        date=sc.date,
    )

    logging.info("Writing single count to the database: %s", to_string(sc))
    if not should_persist():
        return True

    session = SessionFactory.for_schema_base(
        schema_base_for_system_level(SystemLevel.COUNTY))
    session.add(sca)
    session.commit()

    return True
Esempio n. 5
0
def write(ingest_info, metadata):
    """
    If in prod or if 'PERSIST_LOCALLY' is set to true, persist each person in
    the ingest_info. If a person with the given surname/birthday already exists,
    then update that person.

    Otherwise, simply log the given ingest_infos for debugging
    """
    ingest_info_validator.validate(ingest_info)

    mtags = {
        monitoring.TagKey.SHOULD_PERSIST: _should_persist(),
        monitoring.TagKey.PERSISTED: False
    }
    total_people = _get_total_people(ingest_info, metadata)
    with monitoring.measurements(mtags) as measurements:

        # Convert the people one at a time and count the errors as they happen.
        conversion_result: IngestInfoConversionResult = \
            ingest_info_converter.convert_to_persistence_entities(ingest_info,
                                                                  metadata)

        people, data_validation_errors = entity_validator.validate(
            conversion_result.people)
        logging.info(
            "Converted [%s] people with [%s] enum_parsing_errors, [%s]"
            " general_parsing_errors, [%s] protected_class_errors and "
            "[%s] data_validation_errors", len(people),
            conversion_result.enum_parsing_errors,
            conversion_result.general_parsing_errors,
            conversion_result.protected_class_errors, data_validation_errors)
        measurements.measure_int_put(m_people, len(people))

        if _should_abort(total_root_entities=total_people,
                         conversion_result=conversion_result,
                         data_validation_errors=data_validation_errors):
            #  TODO(#1665): remove once dangling PERSIST session investigation
            #   is complete.
            logging.info("_should_abort_ was true after converting people")
            return False

        if not _should_persist():
            return True

        persisted = False

        session = SessionFactory.for_schema_base(
            schema_base_for_system_level(metadata.system_level))

        try:
            logging.info("Starting entity matching")

            entity_matching_output = entity_matching.match(
                session, metadata.region, people)
            people = entity_matching_output.people
            total_root_entities = total_people \
                if metadata.system_level == SystemLevel.COUNTY \
                else entity_matching_output.total_root_entities
            logging.info("Completed entity matching with [%s] errors",
                         entity_matching_output.error_count)
            logging.info(
                "Completed entity matching and have [%s] total people "
                "to commit to DB", len(people))
            if _should_abort(
                    total_root_entities=total_root_entities,
                    conversion_result=conversion_result,
                    entity_matching_errors=entity_matching_output.error_count,
                    data_validation_errors=data_validation_errors):
                #  TODO(#1665): remove once dangling PERSIST session
                #   investigation is complete.
                logging.info("_should_abort_ was true after entity matching")
                return False

            database.write_people(
                session,
                people,
                metadata,
                orphaned_entities=entity_matching_output.orphaned_entities)
            logging.info("Successfully wrote to the database")
            session.commit()

            persisted = True
            mtags[monitoring.TagKey.PERSISTED] = True
        except Exception as e:
            logging.exception("An exception was raised in write(): [%s]",
                              type(e).__name__)
            # Record the error type that happened and increment the counter
            mtags[monitoring.TagKey.ERROR] = type(e).__name__
            measurements.measure_int_put(m_errors, 1)
            session.rollback()
            raise
        finally:
            session.close()
        return persisted
Esempio n. 6
0
def write(
    ingest_info: IngestInfo,
    ingest_metadata: IngestMetadata,
    run_txn_fn: Callable[
        [Session, MeasurementMap, Callable[[Session], bool], Optional[int]],
        bool] = retry_transaction,
) -> bool:
    """
    If in prod or if 'PERSIST_LOCALLY' is set to true, persist each person in
    the ingest_info. If a person with the given surname/birthday already exists,
    then update that person.

    Otherwise, simply log the given ingest_infos for debugging

    `run_txn_fn` is exposed primarily for testing and should typically be left as `retry_transaction`. `run_txn_fn`
    must handle the coordination of the transaction including, when to run the body of the transaction and when to
    commit, rollback, or close the session.
    """
    ingest_info_validator.validate(ingest_info)

    mtags: Dict[str, Union[bool, str]] = {
        monitoring.TagKey.SHOULD_PERSIST: should_persist(),
        monitoring.TagKey.PERSISTED: False,
    }
    total_people = _get_total_people(ingest_info, ingest_metadata)
    with monitoring.measurements(mtags) as measurements:

        # Convert the people one at a time and count the errors as they happen.
        conversion_result: IngestInfoConversionResult = (
            ingest_info_converter.convert_to_persistence_entities(
                ingest_info, ingest_metadata))

        people, data_validation_errors = entity_validator.validate(
            conversion_result.people)
        logging.info(
            "Converted [%s] people with [%s] enum_parsing_errors, [%s]"
            " general_parsing_errors, [%s] protected_class_errors and "
            "[%s] data_validation_errors",
            len(people),
            conversion_result.enum_parsing_errors,
            conversion_result.general_parsing_errors,
            conversion_result.protected_class_errors,
            data_validation_errors,
        )
        measurements.measure_int_put(m_people, len(people))

        if _should_abort(
                total_root_entities=total_people,
                system_level=ingest_metadata.system_level,
                conversion_result=conversion_result,
                region_code=ingest_metadata.region,
                data_validation_errors=data_validation_errors,
        ):
            #  TODO(#1665): remove once dangling PERSIST session investigation
            #   is complete.
            logging.info("_should_abort_ was true after converting people")
            return False

        if not should_persist():
            return True

        @trace.span
        def match_and_write_people(session: Session) -> bool:
            logging.info("Starting entity matching")

            entity_matching_output = entity_matching.match(
                session, ingest_metadata.region, people)
            output_people = entity_matching_output.people
            total_root_entities = (total_people if ingest_metadata.system_level
                                   == SystemLevel.COUNTY else
                                   entity_matching_output.total_root_entities)
            logging.info(
                "Completed entity matching with [%s] errors",
                entity_matching_output.error_count,
            )
            logging.info(
                "Completed entity matching and have [%s] total people "
                "to commit to DB",
                len(output_people),
            )
            if _should_abort(
                    total_root_entities=total_root_entities,
                    system_level=ingest_metadata.system_level,
                    conversion_result=conversion_result,
                    region_code=ingest_metadata.region,
                    entity_matching_errors=entity_matching_output.error_count,
            ):
                #  TODO(#1665): remove once dangling PERSIST session
                #   investigation is complete.
                logging.info("_should_abort_ was true after entity matching")
                return False

            database_invariant_errors = (
                database_invariant_validator.validate_invariants(
                    session,
                    ingest_metadata.system_level,
                    ingest_metadata.region,
                    output_people,
                ))

            if _should_abort(
                    total_root_entities=total_root_entities,
                    system_level=ingest_metadata.system_level,
                    conversion_result=conversion_result,
                    region_code=ingest_metadata.region,
                    database_invariant_errors=database_invariant_errors,
            ):
                logging.info(
                    "_should_abort_ was true after database invariant validation"
                )
                return False

            database.write_people(
                session,
                output_people,
                ingest_metadata,
                orphaned_entities=entity_matching_output.orphaned_entities,
            )
            logging.info("Successfully wrote to the database")
            return True

        try:
            if not run_txn_fn(
                    SessionFactory.for_schema_base(
                        schema_base_for_system_level(
                            ingest_metadata.system_level)),
                    measurements,
                    match_and_write_people,
                    5,
            ):
                return False

            mtags[monitoring.TagKey.PERSISTED] = True
        except Exception as e:
            logging.exception("An exception was raised in write(): [%s]",
                              type(e).__name__)
            # Record the error type that happened and increment the counter
            mtags[monitoring.TagKey.ERROR] = type(e).__name__
            measurements.measure_int_put(m_errors, 1)
            raise
        return True