Exemple #1
0
def test_detects_many_to_one():
    adset_id_attr = FdqModelAttribute(data_reference='"adset_id"', field_map=['adset_id'])
    ad_id_attr = FdqModelAttribute(data_reference='"ad_id"', field_map=['ad_id'])
    spend_attr = FdqModelAttribute(data_reference='"spend"', field_map=['spend'])
    impressions_attr = FdqModelAttribute(data_reference='"impressions"', field_map=['impressions'])

    adset_model = create_temp_internal_from_api_model(
        name='model_1',
        data_source='source.db.schema.table',
        fields=[adset_id_attr, spend_attr],
        identifiers=['adset_id'],
    )
    ad_model = create_temp_internal_from_api_model(
        name='model_2',
        data_source='source.db.schema.second_table',
        fields=[ad_id_attr, adset_id_attr, spend_attr, impressions_attr],
        identifiers=['adset_id', 'ad_id'],
    )
    models = [adset_model, ad_model]

    result = detect_joins(models)

    assert result == {
        ad_model.model_name: [
            FdqModelJoin(
                to_model=adset_model.model_name,
                relationship=FdqModelJoinRelationship.many_to_one,
                fields=['adset_id'],
                join_type=JoinType.left,
            ).dict(by_alias=True)
        ]
    }
Exemple #2
0
def detect_joins(models: List[FdqModel]) -> Dict[str, List[Dict[str, Any]]]:
    joins = defaultdict(list)

    for _, (model, another_model) in enumerate(combinations(models, 2)):
        left_ids, right_ids = set(model.identifiers), set(
            another_model.identifiers)

        if len(left_ids) == 0:
            logger.warning(f'Model {model.model_name} has no ids defined')
            continue
        if len(right_ids) == 0:
            logger.warning(f'Model {model.model_name} has no ids defined')
            continue

        if left_ids == right_ids:
            logger.info(
                f'Found possible join: one {model.model_name} to one {another_model.model_name}'
            )
            join = FdqModelJoin(
                to_model=another_model.model_name,
                relationship=FdqModelJoinRelationship.one_to_one,
                fields=list(left_ids),
                join_type=JoinType.left,
            )
            joins[model.model_name].append(join.dict(by_alias=True))
        elif left_ids.issubset(right_ids):
            logger.info(
                f'Found possible join: many {another_model.model_name} to one {model.model_name}'
            )
            join = FdqModelJoin(
                to_model=model.model_name,
                relationship=FdqModelJoinRelationship.many_to_one,
                fields=list(left_ids),
                join_type=JoinType.left,
            )
            joins[another_model.model_name].append(join.dict(by_alias=True))
        elif right_ids.issubset(left_ids):
            logger.info(
                f'Found possible join: many {model.model_name} to one {another_model.model_name}'
            )
            join = FdqModelJoin(
                to_model=another_model.model_name,
                relationship=FdqModelJoinRelationship.many_to_one,
                fields=list(right_ids),
                join_type=JoinType.left,
            )
            joins[model.model_name].append(join.dict(by_alias=True))

    return dict(joins)
Exemple #3
0
 def from_internal(cls, model_join: ModelJoin,
                   virtual_data_source: str) -> 'FdqModelJoin':
     """
     Initialize model join for API model from Husky model join
     and cutoff virtual data source, if necessary
     """
     return FdqModelJoin.construct(
         join_type=model_join.join_type,
         to_model=remove_virtual_data_source_prefix(virtual_data_source,
                                                    model_join.to_model),
         relationship=FdqModelJoinRelationship(
             model_join.relationship.value),
         taxons=[
             remove_virtual_data_source_prefix(virtual_data_source, taxon)
             for taxon in (model_join.taxons or [])
         ],
     )
def test_api_model_join_to_internal():
    api_join = FdqModelJoin(
        join_type=JoinType.left,
        to_model='model_1',
        fields=['taxon_slug', 'taxon_slug_2'],
        relationship=FdqModelJoinRelationship.many_to_one,
    )
    model_join = FdqModelJoinMapper.to_internal(api_join, _VIRTUAL_DATA_SOURCE)

    assert model_join.to_primitive() == {
        'direction':
        None,
        'join_type':
        api_join.join_type.value,
        'to_model':
        prefix_with_virtual_data_source(_VIRTUAL_DATA_SOURCE,
                                        api_join.to_model),
        'relationship':
        api_join.relationship.value,
        'taxons': [
            prefix_with_virtual_data_source(_VIRTUAL_DATA_SOURCE, taxon)
            for taxon in api_join.taxons
        ],
    }
        FdqModel(
            model_name='api-model-slug-2',
            data_source='physical.table2',
            fields=[
                FdqModelAttribute(data_reference='"col_name"',
                                  field_map=['taxon_slug_3']),
                FdqModelAttribute(
                    data_reference='"col_name_2"',
                    field_map=['taxon_slug', 'taxon_slug_2'],
                ),
            ],
            identifiers=['taxon_slug', 'taxon_slug_2'],
            joins=[
                FdqModelJoin(
                    join_type=JoinType.left,
                    to_model='model_1',
                    fields=['taxon_slug', 'taxon_slug_2'],
                    relationship=FdqModelJoinRelationship.many_to_one,
                )
            ],
            visibility=ModelVisibility.hidden,
        ),
    ],
)
def test_api_model_to_internal(api_model):
    husky_model = FdqModelMapper.to_internal(api_model, _VIRTUAL_DATA_SOURCE,
                                             'company_id')
    assert husky_model.to_primitive() == {
        'data_sources': [_VIRTUAL_DATA_SOURCE],
        'fully_qualified_name_parts':
        api_model.data_source.split('.'),
        'model_type':