def test_get_state_table_class_with_name() -> None: class_name = "StateSupervisionViolation" assert ( get_state_database_entity_with_name(class_name) == state_schema.StateSupervisionViolation )
def __init__( self, dataset: Optional[str], root_entity_class: Type[state_entities.Entity], unifying_id_field: str, build_related_entities: bool, state_code: str, unifying_id_field_filter_set: Optional[Set[int]] = None, ): """Initializes the PTransform with the required arguments. Arguments: dataset: The name of the dataset to read from BigQuery. root_entity_class: The Entity class of the root entity to be built as defined in the state entity layer. unifying_id_field: The column or attribute name of the id that should be used to connect the related entities to the root entity. The root entity and all related entities must have this field in its database table. This value is usually 'person_id'. build_related_entities: When True, also builds and attaches all forward-edge children of this entity. state_code: The state code to filter all results by unifying_id_field_filter_set: When non-empty, we will only build entity objects that can be connected to root entities with one of these unifying ids. """ super().__init__() self._dataset = dataset if not root_entity_class: raise ValueError( f"{self.__class__.__name__}: Expecting root_entity_class to be not None." ) self._root_entity_class = root_entity_class self._root_schema_class: Type[ StateBase] = schema_utils.get_state_database_entity_with_name( self._root_entity_class.__name__) self._root_table_name = self._root_schema_class.__tablename__ self._unifying_id_field = unifying_id_field self._build_related_entities = build_related_entities self._unifying_id_field_filter_set = unifying_id_field_filter_set self._state_code = state_code if not dataset: raise ValueError("No valid data source passed to the pipeline.") _validate_schema_entity_pair(self._root_schema_class, self._root_entity_class) if not unifying_id_field: raise ValueError("No valid unifying_id_field passed to the" " pipeline.") if not hasattr(self._root_schema_class, unifying_id_field): raise ValueError( f"Root entity class [{self._root_schema_class.__name__}] does not have unifying id field " f"[{unifying_id_field}]")
def __init__(self, dataset: Optional[str], entity_class: Type[state_entities.Entity], unifying_id_field: str, parent_id_field: Optional[str], unifying_id_field_filter_set: Optional[Set[int]], state_code: Optional[str]): super(_ExtractEntityBase, self).__init__() self._dataset = dataset self._unifying_id_field = unifying_id_field self._unifying_id_field_filter_set = unifying_id_field_filter_set self._parent_id_field = parent_id_field self._entity_class = entity_class self._schema_class: Type[StateBase] = \ schema_utils.get_state_database_entity_with_name(self._entity_class.__name__) self._entity_table_name = self._schema_class.__tablename__ self._entity_id_field = self._entity_class.get_class_id_name() self._state_code = state_code
def collect_view_builders(self) -> List[SimpleBigQueryViewBuilder]: table_column_checker = BigQueryTableChecker("state", STATE_PERSON_TABLE_NAME) entity = get_state_database_entity_with_name(STATE_PERSON_ENTITY_NAME) builders = [ SimpleBigQueryViewBuilder( dataset_id=VIEWS_DATASET, view_id= f"ingest_state_metadata__{STATE_PERSON_TABLE_NAME}__{col}", description=STATE_PERSON_ENUM_COLUMN_DESCRIPTION_TEMPLATE. format(col=col), view_query_template=STATE_PERSON_ENUM_QUERY_TEMPLATE, table_name=STATE_PERSON_TABLE_NAME, column_name=col, should_build_predicate=table_column_checker. get_has_column_predicate(col), ) for col in get_enum_property_names(entity) if col not in METADATA_EXCLUDED_PROPERTIES ] builders.extend([ SimpleBigQueryViewBuilder( dataset_id=VIEWS_DATASET, view_id= f"ingest_state_metadata__{STATE_PERSON_TABLE_NAME}__{col}", description=STATE_PERSON_NON_ENUM_COLUMN_DESCRIPTION_TEMPLATE. format(col=col), view_query_template=STATE_PERSON_NON_ENUM_QUERY_TEMPLATE, table_name=STATE_PERSON_TABLE_NAME, column_name=col, should_build_predicate=table_column_checker. get_has_column_predicate(col), ) for col in get_non_enum_property_names(entity) if col not in METADATA_EXCLUDED_PROPERTIES ]) return builders
def test_get_state_table_class_with_name_invalid_name(): class_name = 'XXX' with pytest.raises(LookupError): get_state_database_entity_with_name(class_name)
def test_get_state_table_class_with_name(): class_name = 'StateSupervisionViolation' assert get_state_database_entity_with_name(class_name) == \ state_schema.StateSupervisionViolation
def expand(self, input_or_inputs): names_to_properties = self._parent_schema_class. \ get_relationship_property_names_and_properties() properties_dict = {} for property_name, property_object in names_to_properties.items(): # Get class name associated with the property property_class_name = property_object.argument.arg property_entity_class = entity_utils.get_entity_class_in_module_with_name( state_entities, property_class_name) property_schema_class = \ schema_utils.get_state_database_entity_with_name(property_class_name) direction_checker = SchemaEdgeDirectionChecker.state_direction_checker( ) is_property_forward_edge = direction_checker.is_higher_ranked( self._parent_schema_class, property_schema_class) if is_property_forward_edge: # Many-to-many relationship if property_object.secondary is not None: association_table = property_object.secondary.name entity_id_field = property_entity_class.get_class_id_name() # Extract the cross-entity relationship entities = ( input_or_inputs | f"Extract {property_name}" >> _ExtractEntityWithAssociationTable( dataset=self._dataset, entity_class=property_entity_class, unifying_id_field=self._unifying_id_field, parent_id_field=self._parent_id_field, association_table=association_table, association_table_parent_id_field=self. _parent_id_field, association_table_entity_id_field=entity_id_field, unifying_id_field_filter_set=self. _unifying_id_field_filter_set, state_code=self._state_code)) # 1-to-many relationship elif property_object.uselist: # Extract the cross-entity relationship entities = (input_or_inputs | f"Extract {property_name}" >> _ExtractEntity( dataset=self._dataset, entity_class=property_entity_class, unifying_id_field=self._unifying_id_field, parent_id_field=self._parent_id_field, unifying_id_field_filter_set=self. _unifying_id_field_filter_set, state_code=self._state_code)) # 1-to-1 relationship (from parent class perspective) else: association_table = self._parent_schema_class.__tablename__ association_table_entity_id_field = property_object.key + '_id' # Extract the cross-entity relationship entities = (input_or_inputs | f"Extract {property_name}" >> _ExtractEntityWithAssociationTable( dataset=self._dataset, entity_class=property_entity_class, unifying_id_field=self._unifying_id_field, parent_id_field=self._parent_id_field, association_table=association_table, association_table_parent_id_field=self. _parent_id_field, association_table_entity_id_field= association_table_entity_id_field, unifying_id_field_filter_set=self. _unifying_id_field_filter_set, state_code=self._state_code)) properties_dict[property_name] = entities return properties_dict
def _get_all_columns_for_table(table_name: str) -> List[str]: entity_type = get_state_database_entity_with_name(table_name) return sorted(entity_type.get_column_property_names())
def expand(self, input_or_inputs): names_to_properties = self._root_schema_class. \ get_relationship_property_names_and_properties() properties_dict = {} for property_name, property_object in names_to_properties.items(): # Get class name associated with the property class_name = property_object.argument.arg entity_class = entity_utils.get_entity_class_in_module_with_name( state_entities, class_name) schema_class = \ schema_utils.get_state_database_entity_with_name(class_name) table_name = schema_class.__tablename__ if self._dataset or table_name in self._data_dict: # Many-to-many relationship if property_object.secondary is not None: association_table = property_object.secondary.name associated_id_field = entity_class.get_class_id_name() # Extract the cross-entity relationship entities = (input_or_inputs | f"Extract {property_name}" >> _ExtractEntityWithAssociationTable( dataset=self._dataset, data_dict=self._data_dict, table_name=table_name, entity_class=entity_class, root_id_field=self._root_id_field, associated_id_field=associated_id_field, association_table=association_table, unifying_id_field=self._unifying_id_field)) # 1-to-many relationship elif property_object.uselist: # Extract the cross-entity relationship entities = (input_or_inputs | f"Extract {property_name}" >> _ExtractEntity( dataset=self._dataset, data_dict=self._data_dict, table_name=table_name, entity_class=entity_class, unifying_id_field=self._unifying_id_field, root_id_field=self._root_id_field)) # 1-to-1 relationship (from root schema class perspective) else: association_table = self._root_schema_class.__tablename__ associated_id_field = property_object.key + '_id' # Extract the cross-entity relationship entities = (input_or_inputs | f"Extract {property_name}" >> _ExtractEntityWithAssociationTable( dataset=self._dataset, data_dict=self._data_dict, table_name=table_name, entity_class=entity_class, root_id_field=self._root_id_field, associated_id_field=associated_id_field, association_table=association_table, unifying_id_field=self._unifying_id_field)) properties_dict[property_name] = entities return properties_dict