Esempio n. 1
0
def test_detects_many_to_one():
    adset_id_attr = FdqModelAttribute(data_reference='"adset_id"', field_map=['adset_id'])
    ad_id_attr = FdqModelAttribute(data_reference='"ad_id"', field_map=['ad_id'])
    spend_attr = FdqModelAttribute(data_reference='"spend"', field_map=['spend'])
    impressions_attr = FdqModelAttribute(data_reference='"impressions"', field_map=['impressions'])

    adset_model = create_temp_internal_from_api_model(
        name='model_1',
        data_source='source.db.schema.table',
        fields=[adset_id_attr, spend_attr],
        identifiers=['adset_id'],
    )
    ad_model = create_temp_internal_from_api_model(
        name='model_2',
        data_source='source.db.schema.second_table',
        fields=[ad_id_attr, adset_id_attr, spend_attr, impressions_attr],
        identifiers=['adset_id', 'ad_id'],
    )
    models = [adset_model, ad_model]

    result = detect_joins(models)

    assert result == {
        ad_model.model_name: [
            FdqModelJoin(
                to_model=adset_model.model_name,
                relationship=FdqModelJoinRelationship.many_to_one,
                fields=['adset_id'],
                join_type=JoinType.left,
            ).dict(by_alias=True)
        ]
    }
Esempio n. 2
0
def detect_joins_task(detect_joins_job):
    try:
        logger.info(
            f'job_id={detect_joins_job.job_id} Fetching models for vds {detect_joins_job.virtual_data_source} '
            f'under company {detect_joins_job.company_id}'
        )
        husky_models = ModelRetriever.load_models(
            {detect_joins_job.virtual_data_source}, Scope(company_id=detect_joins_job.company_id)
        )
        models = [FdqModelMapper.from_internal(husky_model) for husky_model in husky_models]

        logger.info(
            f'job_id={detect_joins_job.job_id} Running join detection for {detect_joins_job.virtual_data_source} '
            f'under company {detect_joins_job.company_id}'
        )

        detected_joins = detect_joins(models=models)
        detect_joins_job.joins = detected_joins
        detect_joins_job.status = 'COMPLETED'
        logger.info(
            f'Joins for {detect_joins_job.virtual_data_source} '
            f'under company {detect_joins_job.company_id} detected sucessfully job_id={detect_joins_job.job_id} '
        )
    except Exception:
        detect_joins_job.status = 'FAILED'
        logger.error(
            f'Failed detecting joins for {detect_joins_job.virtual_data_source} '
            f'under company {detect_joins_job.company_id} job_id={detect_joins_job.job_id} '
        )
        raise  # Let the celery handler report the failure
Esempio n. 3
0
def test_does_not_find_joins_on_models_without_ids():
    models = [
        create_temp_internal_from_api_model(name='first_model'),
        create_temp_internal_from_api_model(name='another_model'),
        create_temp_internal_from_api_model(name='third_model'),
    ]

    assert detect_joins(models=models) == {}
Esempio n. 4
0
def test_does_not_find_joins_with_no_matching_ids():
    some_id_attr = FdqModelAttribute(data_reference='"some_id"', field_map=['some_id'])
    another_id_attr = FdqModelAttribute(data_reference='"another_id"', field_map=['another_id'])
    spend_attr = FdqModelAttribute(data_reference='"spend"', field_map=['spend'])

    model_one = create_temp_internal_from_api_model(
        name='model_1', data_source='source.db.schema.table', fields=[some_id_attr, spend_attr], identifiers=['some_id']
    )

    model_two = create_temp_internal_from_api_model(
        name='model_2',
        data_source='source.db.schema.table',
        fields=[another_id_attr, spend_attr],
        identifiers=['another_id'],
    )

    assert detect_joins(models=[model_one, model_two]) == {}
Esempio n. 5
0
def test_does_not_find_joins_on_one_model():
    assert detect_joins(models=[create_temp_internal_from_api_model()]) == {}
Esempio n. 6
0
def test_does_not_find_joins_on_empty_models():
    assert detect_joins([]) == {}