Esempio n. 1
0
def test_get_state_table_class_with_name() -> None:
    class_name = "StateSupervisionViolation"

    assert (
        get_state_database_entity_with_name(class_name)
        == state_schema.StateSupervisionViolation
    )
Esempio n. 2
0
    def __init__(
        self,
        dataset: Optional[str],
        root_entity_class: Type[state_entities.Entity],
        unifying_id_field: str,
        build_related_entities: bool,
        state_code: str,
        unifying_id_field_filter_set: Optional[Set[int]] = None,
    ):
        """Initializes the PTransform with the required arguments.

        Arguments:
            dataset: The name of the dataset to read from BigQuery.
            root_entity_class: The Entity class of the root entity to be built
                as defined in the state entity layer.
            unifying_id_field: The column or attribute name of the id that
                should be used to connect the related entities to the root
                entity. The root entity and all related entities must have this
                field in its database table. This value is usually 'person_id'.
            build_related_entities: When True, also builds and attaches all
                forward-edge children of this entity.
            state_code: The state code to filter all results by
            unifying_id_field_filter_set: When non-empty, we will only build entity
                objects that can be connected to root entities with one of these
                unifying ids.
        """

        super().__init__()
        self._dataset = dataset

        if not root_entity_class:
            raise ValueError(
                f"{self.__class__.__name__}: Expecting root_entity_class to be not None."
            )

        self._root_entity_class = root_entity_class
        self._root_schema_class: Type[
            StateBase] = schema_utils.get_state_database_entity_with_name(
                self._root_entity_class.__name__)
        self._root_table_name = self._root_schema_class.__tablename__
        self._unifying_id_field = unifying_id_field
        self._build_related_entities = build_related_entities
        self._unifying_id_field_filter_set = unifying_id_field_filter_set
        self._state_code = state_code

        if not dataset:
            raise ValueError("No valid data source passed to the pipeline.")

        _validate_schema_entity_pair(self._root_schema_class,
                                     self._root_entity_class)

        if not unifying_id_field:
            raise ValueError("No valid unifying_id_field passed to the"
                             " pipeline.")

        if not hasattr(self._root_schema_class, unifying_id_field):
            raise ValueError(
                f"Root entity class [{self._root_schema_class.__name__}] does not have unifying id field "
                f"[{unifying_id_field}]")
    def __init__(self, dataset: Optional[str],
                 entity_class: Type[state_entities.Entity],
                 unifying_id_field: str, parent_id_field: Optional[str],
                 unifying_id_field_filter_set: Optional[Set[int]],
                 state_code: Optional[str]):
        super(_ExtractEntityBase, self).__init__()
        self._dataset = dataset

        self._unifying_id_field = unifying_id_field
        self._unifying_id_field_filter_set = unifying_id_field_filter_set

        self._parent_id_field = parent_id_field

        self._entity_class = entity_class
        self._schema_class: Type[StateBase] = \
            schema_utils.get_state_database_entity_with_name(self._entity_class.__name__)
        self._entity_table_name = self._schema_class.__tablename__
        self._entity_id_field = self._entity_class.get_class_id_name()
        self._state_code = state_code
    def collect_view_builders(self) -> List[SimpleBigQueryViewBuilder]:
        table_column_checker = BigQueryTableChecker("state",
                                                    STATE_PERSON_TABLE_NAME)
        entity = get_state_database_entity_with_name(STATE_PERSON_ENTITY_NAME)
        builders = [
            SimpleBigQueryViewBuilder(
                dataset_id=VIEWS_DATASET,
                view_id=
                f"ingest_state_metadata__{STATE_PERSON_TABLE_NAME}__{col}",
                description=STATE_PERSON_ENUM_COLUMN_DESCRIPTION_TEMPLATE.
                format(col=col),
                view_query_template=STATE_PERSON_ENUM_QUERY_TEMPLATE,
                table_name=STATE_PERSON_TABLE_NAME,
                column_name=col,
                should_build_predicate=table_column_checker.
                get_has_column_predicate(col),
            ) for col in get_enum_property_names(entity)
            if col not in METADATA_EXCLUDED_PROPERTIES
        ]
        builders.extend([
            SimpleBigQueryViewBuilder(
                dataset_id=VIEWS_DATASET,
                view_id=
                f"ingest_state_metadata__{STATE_PERSON_TABLE_NAME}__{col}",
                description=STATE_PERSON_NON_ENUM_COLUMN_DESCRIPTION_TEMPLATE.
                format(col=col),
                view_query_template=STATE_PERSON_NON_ENUM_QUERY_TEMPLATE,
                table_name=STATE_PERSON_TABLE_NAME,
                column_name=col,
                should_build_predicate=table_column_checker.
                get_has_column_predicate(col),
            ) for col in get_non_enum_property_names(entity)
            if col not in METADATA_EXCLUDED_PROPERTIES
        ])

        return builders
Esempio n. 5
0
def test_get_state_table_class_with_name_invalid_name():
    class_name = 'XXX'

    with pytest.raises(LookupError):
        get_state_database_entity_with_name(class_name)
Esempio n. 6
0
def test_get_state_table_class_with_name():
    class_name = 'StateSupervisionViolation'

    assert get_state_database_entity_with_name(class_name) == \
           state_schema.StateSupervisionViolation
    def expand(self, input_or_inputs):
        names_to_properties = self._parent_schema_class. \
            get_relationship_property_names_and_properties()

        properties_dict = {}
        for property_name, property_object in names_to_properties.items():
            # Get class name associated with the property
            property_class_name = property_object.argument.arg

            property_entity_class = entity_utils.get_entity_class_in_module_with_name(
                state_entities, property_class_name)
            property_schema_class = \
                schema_utils.get_state_database_entity_with_name(property_class_name)

            direction_checker = SchemaEdgeDirectionChecker.state_direction_checker(
            )
            is_property_forward_edge = direction_checker.is_higher_ranked(
                self._parent_schema_class, property_schema_class)
            if is_property_forward_edge:
                # Many-to-many relationship
                if property_object.secondary is not None:
                    association_table = property_object.secondary.name
                    entity_id_field = property_entity_class.get_class_id_name()

                    # Extract the cross-entity relationship
                    entities = (
                        input_or_inputs
                        | f"Extract {property_name}" >>
                        _ExtractEntityWithAssociationTable(
                            dataset=self._dataset,
                            entity_class=property_entity_class,
                            unifying_id_field=self._unifying_id_field,
                            parent_id_field=self._parent_id_field,
                            association_table=association_table,
                            association_table_parent_id_field=self.
                            _parent_id_field,
                            association_table_entity_id_field=entity_id_field,
                            unifying_id_field_filter_set=self.
                            _unifying_id_field_filter_set,
                            state_code=self._state_code))

                # 1-to-many relationship
                elif property_object.uselist:
                    # Extract the cross-entity relationship
                    entities = (input_or_inputs
                                | f"Extract {property_name}" >> _ExtractEntity(
                                    dataset=self._dataset,
                                    entity_class=property_entity_class,
                                    unifying_id_field=self._unifying_id_field,
                                    parent_id_field=self._parent_id_field,
                                    unifying_id_field_filter_set=self.
                                    _unifying_id_field_filter_set,
                                    state_code=self._state_code))

                # 1-to-1 relationship (from parent class perspective)
                else:
                    association_table = self._parent_schema_class.__tablename__
                    association_table_entity_id_field = property_object.key + '_id'

                    # Extract the cross-entity relationship
                    entities = (input_or_inputs
                                | f"Extract {property_name}" >>
                                _ExtractEntityWithAssociationTable(
                                    dataset=self._dataset,
                                    entity_class=property_entity_class,
                                    unifying_id_field=self._unifying_id_field,
                                    parent_id_field=self._parent_id_field,
                                    association_table=association_table,
                                    association_table_parent_id_field=self.
                                    _parent_id_field,
                                    association_table_entity_id_field=
                                    association_table_entity_id_field,
                                    unifying_id_field_filter_set=self.
                                    _unifying_id_field_filter_set,
                                    state_code=self._state_code))

                properties_dict[property_name] = entities

        return properties_dict
def _get_all_columns_for_table(table_name: str) -> List[str]:
    entity_type = get_state_database_entity_with_name(table_name)
    return sorted(entity_type.get_column_property_names())
Esempio n. 9
0
    def expand(self, input_or_inputs):
        names_to_properties = self._root_schema_class. \
            get_relationship_property_names_and_properties()

        properties_dict = {}

        for property_name, property_object in names_to_properties.items():
            # Get class name associated with the property
            class_name = property_object.argument.arg

            entity_class = entity_utils.get_entity_class_in_module_with_name(
                state_entities, class_name)
            schema_class = \
                schema_utils.get_state_database_entity_with_name(class_name)
            table_name = schema_class.__tablename__

            if self._dataset or table_name in self._data_dict:
                # Many-to-many relationship
                if property_object.secondary is not None:
                    association_table = property_object.secondary.name
                    associated_id_field = entity_class.get_class_id_name()

                    # Extract the cross-entity relationship
                    entities = (input_or_inputs
                                | f"Extract {property_name}" >>
                                _ExtractEntityWithAssociationTable(
                                    dataset=self._dataset,
                                    data_dict=self._data_dict,
                                    table_name=table_name,
                                    entity_class=entity_class,
                                    root_id_field=self._root_id_field,
                                    associated_id_field=associated_id_field,
                                    association_table=association_table,
                                    unifying_id_field=self._unifying_id_field))

                # 1-to-many relationship
                elif property_object.uselist:
                    # Extract the cross-entity relationship
                    entities = (input_or_inputs
                                | f"Extract {property_name}" >> _ExtractEntity(
                                    dataset=self._dataset,
                                    data_dict=self._data_dict,
                                    table_name=table_name,
                                    entity_class=entity_class,
                                    unifying_id_field=self._unifying_id_field,
                                    root_id_field=self._root_id_field))

                # 1-to-1 relationship (from root schema class perspective)
                else:
                    association_table = self._root_schema_class.__tablename__
                    associated_id_field = property_object.key + '_id'

                    # Extract the cross-entity relationship
                    entities = (input_or_inputs
                                | f"Extract {property_name}" >>
                                _ExtractEntityWithAssociationTable(
                                    dataset=self._dataset,
                                    data_dict=self._data_dict,
                                    table_name=table_name,
                                    entity_class=entity_class,
                                    root_id_field=self._root_id_field,
                                    associated_id_field=associated_id_field,
                                    association_table=association_table,
                                    unifying_id_field=self._unifying_id_field))

                properties_dict[property_name] = entities

        return properties_dict