Exemple #1
0
def detect_joins_task(detect_joins_job):
    try:
        logger.info(
            f'job_id={detect_joins_job.job_id} Fetching models for vds {detect_joins_job.virtual_data_source} '
            f'under company {detect_joins_job.company_id}'
        )
        husky_models = ModelRetriever.load_models(
            {detect_joins_job.virtual_data_source}, Scope(company_id=detect_joins_job.company_id)
        )
        models = [FdqModelMapper.from_internal(husky_model) for husky_model in husky_models]

        logger.info(
            f'job_id={detect_joins_job.job_id} Running join detection for {detect_joins_job.virtual_data_source} '
            f'under company {detect_joins_job.company_id}'
        )

        detected_joins = detect_joins(models=models)
        detect_joins_job.joins = detected_joins
        detect_joins_job.status = 'COMPLETED'
        logger.info(
            f'Joins for {detect_joins_job.virtual_data_source} '
            f'under company {detect_joins_job.company_id} detected sucessfully job_id={detect_joins_job.job_id} '
        )
    except Exception:
        detect_joins_job.status = 'FAILED'
        logger.error(
            f'Failed detecting joins for {detect_joins_job.virtual_data_source} '
            f'under company {detect_joins_job.company_id} job_id={detect_joins_job.job_id} '
        )
        raise  # Let the celery handler report the failure
Exemple #2
0
    def test_filters_data_sources(self, _retriever_mock):
        scope = Scope(dict(company_id='company_2', project_id='project_2'))

        models = ModelRetriever.load_models({'some-other-source'}, scope)
        assert len(models) == 1
        assert models[0] == next(x for x in mock_response
                                 if x.name == 'some_other_model')
Exemple #3
0
    def test_includes_generally_available_models(self, _retriever_mock):
        scope = Scope(dict(company_id='company_2', project_id='project_2'))
        models = ModelRetriever.load_models(set(), scope)

        model_names = {x.name for x in models}
        assert all([x.visibility is ModelVisibility.available for x in models])
        assert 'an-invalid-model-that-shouldnt-be-displayed' not in model_names
        assert len(models) == 5
Exemple #4
0
    def test_specific_model_name(self, _retriever_mock):
        scope = Scope(dict(company_id='company_2', project_id='project_2'))

        models = ModelRetriever.load_models(set(), scope,
                                            'specific_snap_model')

        assert len(models) == 1
        assert models[0].name == 'specific_snap_model'
Exemple #5
0
    def test_filters_on_scope(self, _retriever_mock):
        scope = Scope(dict(company_id='company_1', project_id='project_1'))

        models = ModelRetriever.load_models(set(), scope)
        assert len(models) == 2

        assert mock_response[0] in models
        assert mock_response[2] in models
        assert next(x for x in mock_response if x.name == 'company_wide_model')
Exemple #6
0
    def test_includes_experimental_if_scope_asks_for_them(
            self, _retriever_mock):
        scope = Scope(
            dict(
                company_id='company_2',
                project_id='project_2',
                model_visibility=ModelVisibility.experimental,
            ))

        models = ModelRetriever.load_models(set(), scope)
        model_names = {model.name for model in models}
        assert len(
            models) == 6, 'All available and experimental models are visible'
        assert 'an-experimental-model' in model_names
Exemple #7
0
    def test_model_augment(self, _retriever_mock):
        scope = Scope(dict(company_id='company_2', project_id='project_2'))

        models = ModelRetriever.load_models(set(), scope,
                                            'specific_snap_model')

        assert len(models) == 1

        model = models[0]

        assert model.name == 'specific_snap_model'
        assert model.get_attribute_by_taxon(
            'data_source').taxon == 'data_source'
        assert model.get_attribute_by_taxon('date_hour').taxon == 'date_hour'
    def build_query(
        ctx: HuskyQueryContext,
        subrequest: InternalDataRequest,
        query_info: QueryInfo,
        preloaded_taxons: TaxonMap,
        dimension_templates: Optional[List[SqlFormulaTemplate]] = None,
        filter_templates: Optional[TaxonToTemplate] = None,
    ) -> Dataframe:
        """
        Returns Query and Taxons obtained in it
        :param dimension_templates Sql column templates to select
        :param filter_templates Filter temples keyed by taxon slug, referenced from scope or preagg filters.
        """
        dimension_templates = dimension_templates or []
        filter_templates = filter_templates or dict()
        # Fetch Taxons
        simple_taxon_manager = SimpleTaxonManager.initialize(
            subrequest, dimension_templates, filter_templates, preloaded_taxons
        )

        data_sources = set(subrequest.properties.data_sources)
        if len(subrequest.properties.data_sources) != 1:
            # Joining across data sources is more complex and not implemented yet.
            raise MultipleDataSources(data_sources)
        data_source = subrequest.properties.data_sources[0]

        models = ModelRetriever.load_models(data_sources, subrequest.scope, subrequest.properties.model_name)

        # Build Graph
        graph = GraphBuilder.create_with_models(models)

        # Create Select Query
        select_query, taxon_model_info_map, effectively_used_models = SelectBuilder(
            ctx,
            subrequest.scope,
            simple_taxon_manager.graph_select_taxons,
            simple_taxon_manager.projection_taxons,
            graph,
            data_source,
            subrequest.preaggregation_filters,
            dimension_templates,
            filter_templates,
        ).get_query()

        query_info.definition = QueryDefinition({'effectively_used_models': effectively_used_models})

        logger.debug('Select Query: %s', compile_query(select_query, ctx.dialect))

        # Create Projection Query
        final_dataframe = ProjectionBuilder.query(
            select_query,
            taxon_model_info_map,
            simple_taxon_manager.projection_taxons,
            subrequest.properties.data_source,
            subrequest.order_by,
            subrequest.limit,
            subrequest.offset,
            dimension_templates,
        )

        logger.debug('Projection Query: %s', compile_query(final_dataframe.query, ctx.dialect))
        return final_dataframe
Exemple #9
0
    def test_visibility(self, _retriever_mock):
        from panoramic.cli.husky.service.utils.exceptions import ModelNotFoundException

        with self.assertRaises(ModelNotFoundException):
            scope = Scope(dict(company_id='company_2', project_id='project_2'))
            ModelRetriever.load_models({'another-special-data-source'}, scope)