def detect_joins_task(detect_joins_job): try: logger.info( f'job_id={detect_joins_job.job_id} Fetching models for vds {detect_joins_job.virtual_data_source} ' f'under company {detect_joins_job.company_id}' ) husky_models = ModelRetriever.load_models( {detect_joins_job.virtual_data_source}, Scope(company_id=detect_joins_job.company_id) ) models = [FdqModelMapper.from_internal(husky_model) for husky_model in husky_models] logger.info( f'job_id={detect_joins_job.job_id} Running join detection for {detect_joins_job.virtual_data_source} ' f'under company {detect_joins_job.company_id}' ) detected_joins = detect_joins(models=models) detect_joins_job.joins = detected_joins detect_joins_job.status = 'COMPLETED' logger.info( f'Joins for {detect_joins_job.virtual_data_source} ' f'under company {detect_joins_job.company_id} detected sucessfully job_id={detect_joins_job.job_id} ' ) except Exception: detect_joins_job.status = 'FAILED' logger.error( f'Failed detecting joins for {detect_joins_job.virtual_data_source} ' f'under company {detect_joins_job.company_id} job_id={detect_joins_job.job_id} ' ) raise # Let the celery handler report the failure
def test_filters_data_sources(self, _retriever_mock): scope = Scope(dict(company_id='company_2', project_id='project_2')) models = ModelRetriever.load_models({'some-other-source'}, scope) assert len(models) == 1 assert models[0] == next(x for x in mock_response if x.name == 'some_other_model')
def setUp(self): super().setUp() self.graph = GraphBuilder([ get_mock_entity_model(), get_mock_metric_model(), get_mock_metric_gender_model(), ]).build_graph() self.scope = Scope(dict(company_id='10', project_id='10'))
def test_includes_generally_available_models(self, _retriever_mock): scope = Scope(dict(company_id='company_2', project_id='project_2')) models = ModelRetriever.load_models(set(), scope) model_names = {x.name for x in models} assert all([x.visibility is ModelVisibility.available for x in models]) assert 'an-invalid-model-that-shouldnt-be-displayed' not in model_names assert len(models) == 5
def test_specific_model_name(self, _retriever_mock): scope = Scope(dict(company_id='company_2', project_id='project_2')) models = ModelRetriever.load_models(set(), scope, 'specific_snap_model') assert len(models) == 1 assert models[0].name == 'specific_snap_model'
def test_filters_on_scope(self, _retriever_mock): scope = Scope(dict(company_id='company_1', project_id='project_1')) models = ModelRetriever.load_models(set(), scope) assert len(models) == 2 assert mock_response[0] in models assert mock_response[2] in models assert next(x for x in mock_response if x.name == 'company_wide_model')
def setUp(self) -> None: super().setUp() self._scope = Scope(dict(company_id='10', project_id='10')) self._properties = DataRequestProperties( dict(data_sources=[MOCK_DATA_SOURCE_NAME])) self._graph = GraphBuilder([ get_mock_entity_model(), get_mock_metric_model(), get_mock_metric_gender_model() ]).build_graph()
def test_includes_experimental_if_scope_asks_for_them( self, _retriever_mock): scope = Scope( dict( company_id='company_2', project_id='project_2', model_visibility=ModelVisibility.experimental, )) models = ModelRetriever.load_models(set(), scope) model_names = {model.name for model in models} assert len( models) == 6, 'All available and experimental models are visible' assert 'an-experimental-model' in model_names
def test_model_augment(self, _retriever_mock): scope = Scope(dict(company_id='company_2', project_id='project_2')) models = ModelRetriever.load_models(set(), scope, 'specific_snap_model') assert len(models) == 1 model = models[0] assert model.name == 'specific_snap_model' assert model.get_attribute_by_taxon( 'data_source').taxon == 'data_source' assert model.get_attribute_by_taxon('date_hour').taxon == 'date_hour'
def test_scope_filters(self): scope_filter = TaxonValueFilterClause({ 'type': FilterClauseType.TAXON_VALUE.value, 'taxon': 'account_id', 'operator': SimpleFilterOperator.EQ.value, 'value': '10', }).to_native() scope = Scope( dict(company_id='10', project_id='10', preaggregation_filters=scope_filter)) model = get_mock_metric_model() query = select([literal_column('test')]) model_info = TaxonModelInfo('acc_id_column', model.name, ValueQuantityType.scalar) new_query = ScopeGuard.add_scope_row_filters( SNOWFLAKE_HUSKY_CONTEXT, scope, query, dict(account_id=model_info)) # Not global model, we are fine without scope filters assert compile_query( new_query) == "SELECT test \nWHERE acc_id_column = '10'"
def test_visibility(self, _retriever_mock): from panoramic.cli.husky.service.utils.exceptions import ModelNotFoundException with self.assertRaises(ModelNotFoundException): scope = Scope(dict(company_id='company_2', project_id='project_2')) ModelRetriever.load_models({'another-special-data-source'}, scope)