def test_get_explores_throws_if_model_or_explore_does_not_exist( fc: fetcher.Fetcher, model: Optional[str], explore: Optional[str], msg: str ): """fetcher.get_explores() should throw if an explore/model is not found.""" with pytest.raises(exceptions.NotFoundError) as exc: fc.get_explores(model=model, explore=explore) assert msg in str(exc.value)
def _analyze_explores(self, model=None, explore=None, sortkey=None, limit=None, min_queries=0, timeframe=90): explores = fetcher.get_explores(self, model=model, explore=explore, verbose=1) explores_usage = {} info = [] for e in explores: # in case explore does not exist (bug - #32748) if e is None: pass else: _used_fields = fetcher.get_used_explore_fields( self, e['model_name'], e['scopes'], timeframe, min_queries) used_fields = list(_used_fields.keys()) exposed_fields = fetcher.get_explore_fields(self, explore=e, scoped_names=1) unused_fields = set(exposed_fields) - set(used_fields) field_count = len(exposed_fields) query_count = fetcher.get_used_explores(self, model=e['model_name'], explore=e['name']) all_joins = set(e['scopes']) all_joins.remove(e['name']) used_joins = set([i.split('.')[2] for i in used_fields]) unused_joins = len(list(all_joins - used_joins)) has_description = 'Yes' if e['description'] else 'No' if query_count.get(e['name']): query_count = query_count[e['name']] else: query_count = 0 info.append({ 'model': e['model_name'], 'explore': e['name'], 'is_hidden': e['hidden'], 'has_description': has_description, 'join_count': len(all_joins), 'unused_joins': unused_joins, 'field_count': field_count, 'unused_fields': len(unused_fields), 'query_count': query_count }) if not info: self.analyze_logger.error('No matching explores found') raise Exception('No matching explores found') valid_values = list(info[0].keys()) info = dc.sort(info, valid_values, sortkey) info = dc.limit(info, limit=limit) return info
def _analyze_models(self, project=None, model=None, sortkey=None, limit=None, timeframe=90, min_queries=0): models = fetcher.get_models(self, project=project, model=model, verbose=1) used_models = fetcher.get_used_models(self, timeframe, min_queries) info = [] for m in models: explore_count = len(m['explores']) if m['name'] in used_models: query_run_count = used_models[m['name']] else: query_run_count = 0 unused_explores = fetcher.get_unused_explores( self, m['name'], timeframe, min_queries) info.append({ 'project': m['project_name'], 'model': m['name'], 'explore_count': explore_count, 'unused_explores': len(unused_explores), 'query_run_count': query_run_count }) valid_values = list(info[0].keys()) info = dc.sort(info, valid_values, sortkey) info = dc.limit(info, limit=limit) return info
def _analyze_projects(self, project=None, sortkey=None, limit=None): projects = fetcher.get_project_files(self, project=project) info = [] for p in projects: metadata = list( map( lambda x: 'model' if x['type'] == 'model' else ('view' if x['type'] == 'view' else None), p['files'])) model_count = metadata.count('model') view_count = metadata.count('view') git_tests = fetcher.test_git_connection(self, p['name']) info.append({ 'project': p['name'], 'model_count': model_count, 'view_count': view_count, 'git_connection_status': git_tests, 'pull_request_mode': p['pr_mode'], 'validation_required': p['validation_required'] }) valid_values = list(info[0].keys()) info = dc.sort(info, valid_values, sortkey) info = dc.limit(info, limit=limit) return info
def test_get_models_throws_if_project_does_not_exist( fc: fetcher.Fetcher, project, model ): """fetcher.get_models() should throw if a model is not found.""" with pytest.raises(exceptions.NotFoundError) as exc: fc.get_models(project=project, model=model) assert "An error occured while getting projects." in str(exc.value)
def test_get_explore_join_stats(fc: fetcher.Fetcher, test_model): """fetcher.get_explore_join_stats() should return the stats of all joins in an explore. """ explore = fc.get_explores( model=test_model["name"], explore="explore_2_joins_1_used" )[0] field_stats = { "explore_2_joins_1_used.d1": 10, "explore_2_joins_1_used.d2": 5, "explore_2_joins_1_used.d3": 0, "explore_2_joins_1_used.m1": 0, "join1.d1": 10, "join1.d2": 10, "join1.d3": 10, "join1.m1": 0, "join2.d1": 0, "join2.d2": 0, "join2.d3": 0, "join2.m1": 0, } join_stats = fc.get_explore_join_stats(explore=explore, field_stats=field_stats) assert isinstance(join_stats, dict) assert len(join_stats) == 2 assert join_stats == {"join1": 30, "join2": 0}
def _vacuum_models(self, project=None, model=None, timeframe=90, min_queries=0): if model is None: model = fetcher.get_models(self, project=project) else: model = model.split() used_models = fetcher.get_used_models(self, timeframe) info = [] for m in model: explores = [ e['name'] for e in fetcher.get_explores(self, model=m, verbose=1) ] unused_explores = fetcher.get_unused_explores( self, m, timeframe, min_queries) query_run_count = used_models[m] if m in used_models.keys() else 0 unused_explores = ('\n').join(unused_explores) info.append({ 'model': m, 'unused_explores': unused_explores or 'None', 'model_query_run_count': query_run_count }) return info
def _analyze_fields(self, model=None, explore=None, sortkey=None, limit=None, min_queries=0, timeframe=90): print('Retrieving explores for fields...') explores = fetcher.get_explores(self, model=model, explore=explore, verbose=1) info = [] progress = 1 for e in explores: print('Analyzing {}.{}, {} of {} explores'.format( e['model_name'], e['name'], progress, len(explores))) if e is None: pass else: _used_fields = fetcher.get_used_explore_fields( self, e['model_name'], e['scopes'], timeframe, min_queries) used_fields = list(_used_fields.keys()) exposed_fields = fetcher.get_explore_fields(self, explore=e, scoped_names=1) unused_fields = set(exposed_fields) - set(used_fields) field_count = len(exposed_fields) missing_description = 0 dimensions = 0 measures = 0 for dim in e['fields']['dimensions']: dimensions += 1 if not dim['description']: missing_description += 1 for measure in e['fields']['measures']: measures += 1 if not measure['description']: missing_description += 1 info.append({ 'model': e['model_name'], 'explore': e['name'], 'field_count': field_count, 'unused_fields': len(unused_fields), 'missing_description': missing_description, 'dimensions': dimensions, 'measures': measures }) progress += 1 if not info: self.analyze_logger.error('No matching explores found') raise Exception('No matching explores found') valid_values = list(info[0].keys()) info = styler.sort(info, valid_values, sortkey) info = styler.limit(info, limit=limit) return info
def test_sort( fc: fetcher.Fetcher, sortkey: Tuple[str, str], expected_output: Sequence[Dict[str, Union[int, str, bool]]], ): fc.sortkey = sortkey result = fc._sort(DATA) assert result == expected_output
def _vacuum_explores(self, model=None, explore=None, timeframe=90, min_queries=0): explores = fetcher.get_explores(self, model=model, explore=explore, verbose=1) info = [] for e in explores: # get field usage from i__looker using all the views inside explore # returns fields in the form of model.explore.view.field _used_fields = fetcher.get_used_explore_fields(self, e['model_name'], e['scopes'], timeframe, min_queries) used_fields = list(_used_fields.keys()) # get field picker fields in the form of model.explore.view.field exposed_fields = fetcher.get_explore_fields(self, explore=e, scoped_names=1) _unused_fields = set(exposed_fields) - set(used_fields) # remove scoping all_joins = set(e['scopes']) all_joins.remove(e['name']) used_joins = set([i.split('.')[2] for i in used_fields]) _unused_joins = list(all_joins - used_joins) unused_joins = ('\n').join(_unused_joins) or 'N/A' # only keep fields that belong to used joins (unused joins fields # don't matter) if there's at least one used join (including the # base view). else don't match anything temp = list(used_joins) temp.append(e['name']) pattern = ('|').join(temp) or 'ALL' unused_fields = [] if pattern != 'ALL': for field in _unused_fields: f = re.match(r'^({0}).*'.format(pattern), '.'.join(field.split('.')[2:])) if f is not None: unused_fields.append(f.group(0)) unused_fields = sorted(unused_fields) unused_fields = ('\n').join(unused_fields) else: unused_fields = color.format(pattern, 'fail', 'color') info.append({ 'model': e['model_name'], 'explore': e['name'], 'unused_joins': unused_joins, 'unused_fields': unused_fields }) if not info: self.vacuum_logger.error('No matching explores found') raise Exception('No matching explores found') return info
def test_limit( fc: fetcher.Fetcher, limit: Optional[int], input_data: Sequence[Dict[str, Union[bool, int, str]]], expected_result: Sequence[int], ): fc.limit = limit result = fc._limit(input_data) assert result == expected_result
def test_get_explores_filters(fc: fetcher.Fetcher): """fetcher.get_explores() should be able to filter on model and/or explore.""" explores = fc.get_explores(model="henry_dusty") assert all(e.model_name == "henry_dusty" for e in explores) explores = fc.get_explores(model="henry_qa", explore="explore_2_joins_all_used") assert all( e.model_name == "henry_qa" and e.name == "explore_2_joins_all_used" for e in explores )
def test_get_models_filters(fc: fetcher.Fetcher, test_project_name, test_model): """fetcher.get_models() should be able to filter on project or model.""" ml = fc.get_models(project=test_project_name) assert all(m.project_name == test_project_name for m in ml) ml = fc.get_models(model=test_model["name"]) assert all(m.name == test_model["name"] for m in ml) ml = fc.get_models(project=test_project_name, model=test_model["name"]) assert all( m.project_name == test_project_name and m.name == test_model["name"] for m in ml )
def test_get_explore_fields_gets_fields( fc: fetcher.Fetcher, test_model, test_explores_stats ): """fetcher.get_explore_fields() should return an explores fields.""" test_explore = test_explores_stats[0] explore = fc.get_explores(model=test_model["name"], explore=test_explore["name"]) assert isinstance(explore, list) explore = explore[0] assert isinstance(explore, models.LookmlModelExplore) assert explore.model_name == test_model["name"] assert explore.name == test_explore["name"] fields = fc.get_explore_fields(explore) assert isinstance(fields, list) assert fields == test_explore["all_fields"]
def test_get_explore_fields_gets_fields_for_dimension_or_measure_only_explores( fc: fetcher.Fetcher, test_model, test_dimensions_or_measures_only_explores ): """fetcher.get_explore_fields() should return when an explore has only dimensions or only measures. """ expected = test_dimensions_or_measures_only_explores[0] explore = fc.get_explores(model=test_model["name"], explore=expected["name"]) assert isinstance(explore, list) actual = explore[0] assert actual.name == expected["name"] assert not (actual.fields.dimensions and actual.fields.measures) expected_fields = [f["name"] for f in expected["fields"]] actual_fields = fc.get_explore_fields(actual) assert actual_fields == expected_fields
def test_get_used_models(fc: fetcher.Fetcher, test_model): """fetcher.get_used_models() should return models that have queries against them.""" used_models = fc.get_used_models() assert isinstance(used_models, dict) assert len(used_models) > 0 assert all(type(model_name) == str for model_name in used_models.keys()) assert all(type(query_count) == int for query_count in used_models.values()) assert test_model["name"] in used_models.keys()
def test_filter( fc: fetcher.Fetcher, data: Optional[Dict[str, int]], condition: Callable, expected_output: Dict[str, int], ): result = fc._filter(data, condition) assert result == expected_output
def _analyze_models(self, project=None, model=None, sortkey=None, limit=None, timeframe=90, min_queries=0): print('fetching all models...') models = fetcher.get_models(self, project=project, model=model, verbose=1) print('complete.') print('fetching used models...') used_models = fetcher.get_used_models(self, timeframe, min_queries) print('complete.') info = [] total = len(used_models) complete = 1 for m in models: print('Processing {} of {} models'.format(complete, total)) explore_count = len(m['explores']) if m['name'] in used_models: query_run_count = used_models[m['name']] else: query_run_count = 0 unused_explores = fetcher.get_unused_explores( self, m['name'], timeframe, min_queries) info.append({ 'project': m['project_name'], 'model': m['name'], 'explore_count': explore_count, 'unused_explores': len(unused_explores), 'query_run_count': query_run_count }) complete += 1 valid_values = list(info[0].keys()) info = styler.sort(info, valid_values, sortkey) info = styler.limit(info, limit=limit) return info
def test_get_explore_field_stats( fc: fetcher.Fetcher, looker_sdk: methods.LookerSDK, test_model, test_used_explore_names, test_explores_stats, ): """fetcher.get_explore_field_stats() should get the stats of all fields in an explore. """ explore = fc.get_explores( model=test_model["name"], explore=test_used_explore_names[0] )[0] actual_stats = fc.get_explore_field_stats(explore) assert isinstance(actual_stats, dict) for e in test_explores_stats: if e["name"] == test_used_explore_names[0]: expected_stats = e assert all(actual_stats[k] == 0 for k in expected_stats["unused_fields"]) assert all(actual_stats[k] > 0 for k in expected_stats["used_fields"])
def test_get_used_explores(fc: fetcher.Fetcher, test_model, test_used_explore_names): """fetcher.get_used_explores() should return all used explores.""" used_explores = fc.get_used_explores(model=test_model["name"]) assert isinstance(used_explores, dict) assert all(e in test_used_explore_names for e in used_explores)
def test_get_projects_returns_projects(fc: fetcher.Fetcher): """fetcher.get_projects() should return a list of projects.""" projects = fc.get_projects() assert isinstance(projects, list) assert isinstance(projects[0], models.Project)
def test_get_explores(fc: fetcher.Fetcher): """fetcher.get_explores() should return a list of explores.""" explores = fc.get_explores() assert isinstance(explores, list) assert len(explores) > 0 assert isinstance(explores[0], models.LookmlModelExplore)
def test_get_models_returns_models(fc: fetcher.Fetcher): """fetcher.get_models() should return a list of models.""" ml = fc.get_models() assert isinstance(ml, list) assert isinstance(ml[0], models.LookmlModel)
def test_get_projects_throws_if_project_does_not_exist(fc: fetcher.Fetcher): """fetchet.get_projects() should error if filter is invalid""" with pytest.raises(exceptions.NotFoundError) as exc: fc.get_projects("BadProject") assert "An error occured while getting projects." in str(exc.value)
def test_get_unused_explores(fc: fetcher.Fetcher, test_model, test_unused_explores): """fetcher.get_unused_explores() should return all unused explores.""" unused_explores = fc.get_unused_explores(model=test_model["name"]) assert all(e in test_unused_explores for e in unused_explores)
def test_get_projects_filters(fc: fetcher.Fetcher, test_project_name): """fetchet.get_projects() should be able to filter on project.""" projects = fc.get_projects(test_project_name) assert isinstance(projects, list) assert len(projects) == 1 assert projects[0].name == test_project_name
def test_sort_throws_for_invalid_sort_keys( fc: fetcher.Fetcher, sortkey: Tuple[str, str] ): with pytest.raises(KeyError): fc.sortkey = sortkey fc._sort(DATA)
def _vacuum_fields(self, model=None, explore=None, timeframe=90, min_queries=0): explores = fetcher.get_explores(self, model=model, explore=explore, verbose=1) info = [] master_exposed_fields = set() master_used_fields = set() distinct_views = set() progress = 1 for e in explores: print('Analyzing {}.{}, {} of {} explores'.format( e['model_name'], e['name'], progress, len(explores))) # get field usage from i__looker using all the views inside explore # returns fields in the form of model.explore.view.field _used_fields = fetcher.get_used_explore_fields( self, e['model_name'], e['scopes'], timeframe, min_queries) used_fields = list(_used_fields.keys()) # get field picker fields in the form of model.explore.view.field exposed_fields = fetcher.get_explore_fields(self, explore=e, scoped_names=1) _unused_fields = set(exposed_fields) - set(used_fields) # Get fields used in joins for join in e['joins']: if join['sql_on'] is not None: f = re.findall('\{(.*?)\}', join['sql_on']) for field in f: master_used_fields.add(field) distinct_views.add(field.split('.')[0]) #Get used fields for field in used_fields: field = '.'.join(field.split('.')[2:]) master_used_fields.add(field) distinct_views.add(field.split('.')[0]) #Get all fields for field in exposed_fields: #strip out the model and explore field = '.'.join(field.split('.')[2:]) master_exposed_fields.add(field) distinct_views.add(field.split('.')[0]) progress += 1 # Fields to ignore if they contain the following: ignore_list = [ 'week', 'quarter', 'year', 'month', 'raw', 'date', 'time' ] # Get all unused fields and then organize them by their view master_unused_fields = master_exposed_fields - master_used_fields for view in sorted(list(distinct_views)): if any(char.isdigit() for char in view): continue unused_fields = [] for field in master_unused_fields: # always keep id fields and basic count fields field_name = field.split('.')[1] if field_name == 'id' or field_name == 'count' or 'id' in field_name.split( '_'): continue elif any(ignore in field for ignore in ignore_list): continue if field.split('.')[0] == view: unused_fields.append(field) unused_fields = ('\n').join(unused_fields) if unused_fields is not None: info.append({'view': view, 'unused_fields': unused_fields}) if not info: self.vacuum_logger.error('No matching explores found') raise Exception('No matching explores found') return info