def test_detects_many_to_one(): adset_id_attr = FdqModelAttribute(data_reference='"adset_id"', field_map=['adset_id']) ad_id_attr = FdqModelAttribute(data_reference='"ad_id"', field_map=['ad_id']) spend_attr = FdqModelAttribute(data_reference='"spend"', field_map=['spend']) impressions_attr = FdqModelAttribute(data_reference='"impressions"', field_map=['impressions']) adset_model = create_temp_internal_from_api_model( name='model_1', data_source='source.db.schema.table', fields=[adset_id_attr, spend_attr], identifiers=['adset_id'], ) ad_model = create_temp_internal_from_api_model( name='model_2', data_source='source.db.schema.second_table', fields=[ad_id_attr, adset_id_attr, spend_attr, impressions_attr], identifiers=['adset_id', 'ad_id'], ) models = [adset_model, ad_model] result = detect_joins(models) assert result == { ad_model.model_name: [ FdqModelJoin( to_model=adset_model.model_name, relationship=FdqModelJoinRelationship.many_to_one, fields=['adset_id'], join_type=JoinType.left, ).dict(by_alias=True) ] }
def detect_joins(models: List[FdqModel]) -> Dict[str, List[Dict[str, Any]]]: joins = defaultdict(list) for _, (model, another_model) in enumerate(combinations(models, 2)): left_ids, right_ids = set(model.identifiers), set( another_model.identifiers) if len(left_ids) == 0: logger.warning(f'Model {model.model_name} has no ids defined') continue if len(right_ids) == 0: logger.warning(f'Model {model.model_name} has no ids defined') continue if left_ids == right_ids: logger.info( f'Found possible join: one {model.model_name} to one {another_model.model_name}' ) join = FdqModelJoin( to_model=another_model.model_name, relationship=FdqModelJoinRelationship.one_to_one, fields=list(left_ids), join_type=JoinType.left, ) joins[model.model_name].append(join.dict(by_alias=True)) elif left_ids.issubset(right_ids): logger.info( f'Found possible join: many {another_model.model_name} to one {model.model_name}' ) join = FdqModelJoin( to_model=model.model_name, relationship=FdqModelJoinRelationship.many_to_one, fields=list(left_ids), join_type=JoinType.left, ) joins[another_model.model_name].append(join.dict(by_alias=True)) elif right_ids.issubset(left_ids): logger.info( f'Found possible join: many {model.model_name} to one {another_model.model_name}' ) join = FdqModelJoin( to_model=another_model.model_name, relationship=FdqModelJoinRelationship.many_to_one, fields=list(right_ids), join_type=JoinType.left, ) joins[model.model_name].append(join.dict(by_alias=True)) return dict(joins)
def from_internal(cls, model_join: ModelJoin, virtual_data_source: str) -> 'FdqModelJoin': """ Initialize model join for API model from Husky model join and cutoff virtual data source, if necessary """ return FdqModelJoin.construct( join_type=model_join.join_type, to_model=remove_virtual_data_source_prefix(virtual_data_source, model_join.to_model), relationship=FdqModelJoinRelationship( model_join.relationship.value), taxons=[ remove_virtual_data_source_prefix(virtual_data_source, taxon) for taxon in (model_join.taxons or []) ], )
def test_api_model_join_to_internal(): api_join = FdqModelJoin( join_type=JoinType.left, to_model='model_1', fields=['taxon_slug', 'taxon_slug_2'], relationship=FdqModelJoinRelationship.many_to_one, ) model_join = FdqModelJoinMapper.to_internal(api_join, _VIRTUAL_DATA_SOURCE) assert model_join.to_primitive() == { 'direction': None, 'join_type': api_join.join_type.value, 'to_model': prefix_with_virtual_data_source(_VIRTUAL_DATA_SOURCE, api_join.to_model), 'relationship': api_join.relationship.value, 'taxons': [ prefix_with_virtual_data_source(_VIRTUAL_DATA_SOURCE, taxon) for taxon in api_join.taxons ], }
FdqModel( model_name='api-model-slug-2', data_source='physical.table2', fields=[ FdqModelAttribute(data_reference='"col_name"', field_map=['taxon_slug_3']), FdqModelAttribute( data_reference='"col_name_2"', field_map=['taxon_slug', 'taxon_slug_2'], ), ], identifiers=['taxon_slug', 'taxon_slug_2'], joins=[ FdqModelJoin( join_type=JoinType.left, to_model='model_1', fields=['taxon_slug', 'taxon_slug_2'], relationship=FdqModelJoinRelationship.many_to_one, ) ], visibility=ModelVisibility.hidden, ), ], ) def test_api_model_to_internal(api_model): husky_model = FdqModelMapper.to_internal(api_model, _VIRTUAL_DATA_SOURCE, 'company_id') assert husky_model.to_primitive() == { 'data_sources': [_VIRTUAL_DATA_SOURCE], 'fully_qualified_name_parts': api_model.data_source.split('.'), 'model_type':