예제 #1
0
def test_get_explores_throws_if_model_or_explore_does_not_exist(
    fc: fetcher.Fetcher, model: Optional[str], explore: Optional[str], msg: str
):
    """fetcher.get_explores() should throw if an explore/model is not found."""
    with pytest.raises(exceptions.NotFoundError) as exc:
        fc.get_explores(model=model, explore=explore)
    assert msg in str(exc.value)
예제 #2
0
    def _analyze_explores(self,
                          model=None,
                          explore=None,
                          sortkey=None,
                          limit=None,
                          min_queries=0,
                          timeframe=90):
        explores = fetcher.get_explores(self,
                                        model=model,
                                        explore=explore,
                                        verbose=1)
        explores_usage = {}
        info = []
        for e in explores:
            # in case explore does not exist (bug - #32748)
            if e is None:
                pass
            else:
                _used_fields = fetcher.get_used_explore_fields(
                    self, e['model_name'], e['scopes'], timeframe, min_queries)
                used_fields = list(_used_fields.keys())
                exposed_fields = fetcher.get_explore_fields(self,
                                                            explore=e,
                                                            scoped_names=1)
                unused_fields = set(exposed_fields) - set(used_fields)
                field_count = len(exposed_fields)
                query_count = fetcher.get_used_explores(self,
                                                        model=e['model_name'],
                                                        explore=e['name'])

                all_joins = set(e['scopes'])
                all_joins.remove(e['name'])
                used_joins = set([i.split('.')[2] for i in used_fields])
                unused_joins = len(list(all_joins - used_joins))

                has_description = 'Yes' if e['description'] else 'No'

                if query_count.get(e['name']):
                    query_count = query_count[e['name']]
                else:
                    query_count = 0
                info.append({
                    'model': e['model_name'],
                    'explore': e['name'],
                    'is_hidden': e['hidden'],
                    'has_description': has_description,
                    'join_count': len(all_joins),
                    'unused_joins': unused_joins,
                    'field_count': field_count,
                    'unused_fields': len(unused_fields),
                    'query_count': query_count
                })

        if not info:
            self.analyze_logger.error('No matching explores found')
            raise Exception('No matching explores found')
        valid_values = list(info[0].keys())
        info = dc.sort(info, valid_values, sortkey)
        info = dc.limit(info, limit=limit)
        return info
예제 #3
0
 def _analyze_models(self,
                     project=None,
                     model=None,
                     sortkey=None,
                     limit=None,
                     timeframe=90,
                     min_queries=0):
     models = fetcher.get_models(self,
                                 project=project,
                                 model=model,
                                 verbose=1)
     used_models = fetcher.get_used_models(self, timeframe, min_queries)
     info = []
     for m in models:
         explore_count = len(m['explores'])
         if m['name'] in used_models:
             query_run_count = used_models[m['name']]
         else:
             query_run_count = 0
         unused_explores = fetcher.get_unused_explores(
             self, m['name'], timeframe, min_queries)
         info.append({
             'project': m['project_name'],
             'model': m['name'],
             'explore_count': explore_count,
             'unused_explores': len(unused_explores),
             'query_run_count': query_run_count
         })
     valid_values = list(info[0].keys())
     info = dc.sort(info, valid_values, sortkey)
     info = dc.limit(info, limit=limit)
     return info
예제 #4
0
    def _analyze_projects(self, project=None, sortkey=None, limit=None):
        projects = fetcher.get_project_files(self, project=project)
        info = []
        for p in projects:
            metadata = list(
                map(
                    lambda x: 'model' if x['type'] == 'model' else
                    ('view' if x['type'] == 'view' else None), p['files']))

            model_count = metadata.count('model')
            view_count = metadata.count('view')
            git_tests = fetcher.test_git_connection(self, p['name'])
            info.append({
                'project': p['name'],
                'model_count': model_count,
                'view_count': view_count,
                'git_connection_status': git_tests,
                'pull_request_mode': p['pr_mode'],
                'validation_required': p['validation_required']
            })

        valid_values = list(info[0].keys())
        info = dc.sort(info, valid_values, sortkey)
        info = dc.limit(info, limit=limit)

        return info
예제 #5
0
def test_get_models_throws_if_project_does_not_exist(
    fc: fetcher.Fetcher, project, model
):
    """fetcher.get_models() should throw if a model is not found."""
    with pytest.raises(exceptions.NotFoundError) as exc:
        fc.get_models(project=project, model=model)
    assert "An error occured while getting projects." in str(exc.value)
예제 #6
0
def test_get_explore_join_stats(fc: fetcher.Fetcher, test_model):
    """fetcher.get_explore_join_stats() should return the stats of all joins in
    an explore.
    """
    explore = fc.get_explores(
        model=test_model["name"], explore="explore_2_joins_1_used"
    )[0]
    field_stats = {
        "explore_2_joins_1_used.d1": 10,
        "explore_2_joins_1_used.d2": 5,
        "explore_2_joins_1_used.d3": 0,
        "explore_2_joins_1_used.m1": 0,
        "join1.d1": 10,
        "join1.d2": 10,
        "join1.d3": 10,
        "join1.m1": 0,
        "join2.d1": 0,
        "join2.d2": 0,
        "join2.d3": 0,
        "join2.m1": 0,
    }
    join_stats = fc.get_explore_join_stats(explore=explore, field_stats=field_stats)
    assert isinstance(join_stats, dict)
    assert len(join_stats) == 2
    assert join_stats == {"join1": 30, "join2": 0}
예제 #7
0
    def _vacuum_models(self,
                       project=None,
                       model=None,
                       timeframe=90,
                       min_queries=0):
        if model is None:
            model = fetcher.get_models(self, project=project)
        else:
            model = model.split()
        used_models = fetcher.get_used_models(self, timeframe)
        info = []
        for m in model:
            explores = [
                e['name']
                for e in fetcher.get_explores(self, model=m, verbose=1)
            ]
            unused_explores = fetcher.get_unused_explores(
                self, m, timeframe, min_queries)
            query_run_count = used_models[m] if m in used_models.keys() else 0
            unused_explores = ('\n').join(unused_explores)
            info.append({
                'model': m,
                'unused_explores': unused_explores or 'None',
                'model_query_run_count': query_run_count
            })

        return info
예제 #8
0
파일: analyze.py 프로젝트: envoy/henry
    def _analyze_fields(self,
                        model=None,
                        explore=None,
                        sortkey=None,
                        limit=None,
                        min_queries=0,
                        timeframe=90):

        print('Retrieving explores for fields...')
        explores = fetcher.get_explores(self,
                                        model=model,
                                        explore=explore,
                                        verbose=1)
        info = []
        progress = 1
        for e in explores:
            print('Analyzing {}.{}, {} of {} explores'.format(
                e['model_name'], e['name'], progress, len(explores)))
            if e is None:
                pass
            else:
                _used_fields = fetcher.get_used_explore_fields(
                    self, e['model_name'], e['scopes'], timeframe, min_queries)
                used_fields = list(_used_fields.keys())
                exposed_fields = fetcher.get_explore_fields(self,
                                                            explore=e,
                                                            scoped_names=1)
                unused_fields = set(exposed_fields) - set(used_fields)
                field_count = len(exposed_fields)

                missing_description = 0
                dimensions = 0
                measures = 0
                for dim in e['fields']['dimensions']:
                    dimensions += 1
                    if not dim['description']:
                        missing_description += 1
                for measure in e['fields']['measures']:
                    measures += 1
                    if not measure['description']:
                        missing_description += 1

                info.append({
                    'model': e['model_name'],
                    'explore': e['name'],
                    'field_count': field_count,
                    'unused_fields': len(unused_fields),
                    'missing_description': missing_description,
                    'dimensions': dimensions,
                    'measures': measures
                })
                progress += 1
        if not info:
            self.analyze_logger.error('No matching explores found')
            raise Exception('No matching explores found')
        valid_values = list(info[0].keys())
        info = styler.sort(info, valid_values, sortkey)
        info = styler.limit(info, limit=limit)
        return info
예제 #9
0
def test_sort(
    fc: fetcher.Fetcher,
    sortkey: Tuple[str, str],
    expected_output: Sequence[Dict[str, Union[int, str, bool]]],
):
    fc.sortkey = sortkey
    result = fc._sort(DATA)
    assert result == expected_output
예제 #10
0
    def _vacuum_explores(self, model=None, explore=None, timeframe=90,
                         min_queries=0):
        explores = fetcher.get_explores(self,
                                        model=model,
                                        explore=explore,
                                        verbose=1)
        info = []
        for e in explores:
            # get field usage from i__looker using all the views inside explore
            # returns fields in the form of model.explore.view.field
            _used_fields = fetcher.get_used_explore_fields(self,
                                                           e['model_name'],
                                                           e['scopes'],
                                                           timeframe,
                                                           min_queries)
            used_fields = list(_used_fields.keys())
            # get field picker fields in the form of model.explore.view.field
            exposed_fields = fetcher.get_explore_fields(self,
                                                        explore=e,
                                                        scoped_names=1)
            _unused_fields = set(exposed_fields) - set(used_fields)

            # remove scoping
            all_joins = set(e['scopes'])
            all_joins.remove(e['name'])
            used_joins = set([i.split('.')[2] for i in used_fields])

            _unused_joins = list(all_joins - used_joins)
            unused_joins = ('\n').join(_unused_joins) or 'N/A'

            # only keep fields that belong to used joins (unused joins fields
            # don't matter) if there's at least one used join (including the
            # base view). else don't match anything
            temp = list(used_joins)
            temp.append(e['name'])
            pattern = ('|').join(temp) or 'ALL'
            unused_fields = []
            if pattern != 'ALL':
                for field in _unused_fields:
                    f = re.match(r'^({0}).*'.format(pattern),
                                 '.'.join(field.split('.')[2:]))
                    if f is not None:
                        unused_fields.append(f.group(0))
                unused_fields = sorted(unused_fields)
                unused_fields = ('\n').join(unused_fields)
            else:
                unused_fields = color.format(pattern, 'fail', 'color')

            info.append({
                        'model': e['model_name'],
                        'explore': e['name'],
                        'unused_joins': unused_joins,
                        'unused_fields': unused_fields
                        })
        if not info:
            self.vacuum_logger.error('No matching explores found')
            raise Exception('No matching explores found')
        return info
예제 #11
0
def test_limit(
    fc: fetcher.Fetcher,
    limit: Optional[int],
    input_data: Sequence[Dict[str, Union[bool, int, str]]],
    expected_result: Sequence[int],
):
    fc.limit = limit
    result = fc._limit(input_data)
    assert result == expected_result
예제 #12
0
def test_get_explores_filters(fc: fetcher.Fetcher):
    """fetcher.get_explores() should be able to filter on model and/or explore."""
    explores = fc.get_explores(model="henry_dusty")
    assert all(e.model_name == "henry_dusty" for e in explores)

    explores = fc.get_explores(model="henry_qa", explore="explore_2_joins_all_used")
    assert all(
        e.model_name == "henry_qa" and e.name == "explore_2_joins_all_used"
        for e in explores
    )
예제 #13
0
def test_get_models_filters(fc: fetcher.Fetcher, test_project_name, test_model):
    """fetcher.get_models() should be able to filter on project or model."""
    ml = fc.get_models(project=test_project_name)
    assert all(m.project_name == test_project_name for m in ml)

    ml = fc.get_models(model=test_model["name"])
    assert all(m.name == test_model["name"] for m in ml)

    ml = fc.get_models(project=test_project_name, model=test_model["name"])
    assert all(
        m.project_name == test_project_name and m.name == test_model["name"] for m in ml
    )
예제 #14
0
def test_get_explore_fields_gets_fields(
    fc: fetcher.Fetcher, test_model, test_explores_stats
):
    """fetcher.get_explore_fields() should return an explores fields."""
    test_explore = test_explores_stats[0]
    explore = fc.get_explores(model=test_model["name"], explore=test_explore["name"])
    assert isinstance(explore, list)
    explore = explore[0]
    assert isinstance(explore, models.LookmlModelExplore)
    assert explore.model_name == test_model["name"]
    assert explore.name == test_explore["name"]
    fields = fc.get_explore_fields(explore)
    assert isinstance(fields, list)
    assert fields == test_explore["all_fields"]
예제 #15
0
def test_get_explore_fields_gets_fields_for_dimension_or_measure_only_explores(
    fc: fetcher.Fetcher, test_model, test_dimensions_or_measures_only_explores
):
    """fetcher.get_explore_fields() should return when an explore has only dimensions
    or only measures.
    """
    expected = test_dimensions_or_measures_only_explores[0]
    explore = fc.get_explores(model=test_model["name"], explore=expected["name"])
    assert isinstance(explore, list)
    actual = explore[0]
    assert actual.name == expected["name"]
    assert not (actual.fields.dimensions and actual.fields.measures)
    expected_fields = [f["name"] for f in expected["fields"]]
    actual_fields = fc.get_explore_fields(actual)
    assert actual_fields == expected_fields
예제 #16
0
def test_get_used_models(fc: fetcher.Fetcher, test_model):
    """fetcher.get_used_models() should return models that have queries against them."""
    used_models = fc.get_used_models()
    assert isinstance(used_models, dict)
    assert len(used_models) > 0
    assert all(type(model_name) == str for model_name in used_models.keys())
    assert all(type(query_count) == int for query_count in used_models.values())
    assert test_model["name"] in used_models.keys()
예제 #17
0
def test_filter(
    fc: fetcher.Fetcher,
    data: Optional[Dict[str, int]],
    condition: Callable,
    expected_output: Dict[str, int],
):
    result = fc._filter(data, condition)
    assert result == expected_output
예제 #18
0
파일: analyze.py 프로젝트: envoy/henry
    def _analyze_models(self,
                        project=None,
                        model=None,
                        sortkey=None,
                        limit=None,
                        timeframe=90,
                        min_queries=0):
        print('fetching all models...')
        models = fetcher.get_models(self,
                                    project=project,
                                    model=model,
                                    verbose=1)
        print('complete.')
        print('fetching used models...')
        used_models = fetcher.get_used_models(self, timeframe, min_queries)
        print('complete.')
        info = []

        total = len(used_models)
        complete = 1
        for m in models:
            print('Processing {} of {} models'.format(complete, total))
            explore_count = len(m['explores'])
            if m['name'] in used_models:
                query_run_count = used_models[m['name']]
            else:
                query_run_count = 0
            unused_explores = fetcher.get_unused_explores(
                self, m['name'], timeframe, min_queries)
            info.append({
                'project': m['project_name'],
                'model': m['name'],
                'explore_count': explore_count,
                'unused_explores': len(unused_explores),
                'query_run_count': query_run_count
            })
            complete += 1
        valid_values = list(info[0].keys())
        info = styler.sort(info, valid_values, sortkey)
        info = styler.limit(info, limit=limit)
        return info
예제 #19
0
def test_get_explore_field_stats(
    fc: fetcher.Fetcher,
    looker_sdk: methods.LookerSDK,
    test_model,
    test_used_explore_names,
    test_explores_stats,
):
    """fetcher.get_explore_field_stats() should get the stats of all fields in
    an explore.
    """
    explore = fc.get_explores(
        model=test_model["name"], explore=test_used_explore_names[0]
    )[0]
    actual_stats = fc.get_explore_field_stats(explore)
    assert isinstance(actual_stats, dict)

    for e in test_explores_stats:
        if e["name"] == test_used_explore_names[0]:
            expected_stats = e

    assert all(actual_stats[k] == 0 for k in expected_stats["unused_fields"])
    assert all(actual_stats[k] > 0 for k in expected_stats["used_fields"])
예제 #20
0
def test_get_used_explores(fc: fetcher.Fetcher, test_model, test_used_explore_names):
    """fetcher.get_used_explores() should return all used explores."""
    used_explores = fc.get_used_explores(model=test_model["name"])
    assert isinstance(used_explores, dict)
    assert all(e in test_used_explore_names for e in used_explores)
예제 #21
0
def test_get_projects_returns_projects(fc: fetcher.Fetcher):
    """fetcher.get_projects() should return a list of projects."""
    projects = fc.get_projects()
    assert isinstance(projects, list)
    assert isinstance(projects[0], models.Project)
예제 #22
0
def test_get_explores(fc: fetcher.Fetcher):
    """fetcher.get_explores() should return a list of explores."""
    explores = fc.get_explores()
    assert isinstance(explores, list)
    assert len(explores) > 0
    assert isinstance(explores[0], models.LookmlModelExplore)
예제 #23
0
def test_get_models_returns_models(fc: fetcher.Fetcher):
    """fetcher.get_models() should return a list of models."""
    ml = fc.get_models()
    assert isinstance(ml, list)
    assert isinstance(ml[0], models.LookmlModel)
예제 #24
0
def test_get_projects_throws_if_project_does_not_exist(fc: fetcher.Fetcher):
    """fetchet.get_projects() should error if filter is invalid"""
    with pytest.raises(exceptions.NotFoundError) as exc:
        fc.get_projects("BadProject")
    assert "An error occured while getting projects." in str(exc.value)
예제 #25
0
def test_get_unused_explores(fc: fetcher.Fetcher, test_model, test_unused_explores):
    """fetcher.get_unused_explores() should return all unused explores."""
    unused_explores = fc.get_unused_explores(model=test_model["name"])
    assert all(e in test_unused_explores for e in unused_explores)
예제 #26
0
def test_get_projects_filters(fc: fetcher.Fetcher, test_project_name):
    """fetchet.get_projects() should be able to filter on project."""
    projects = fc.get_projects(test_project_name)
    assert isinstance(projects, list)
    assert len(projects) == 1
    assert projects[0].name == test_project_name
예제 #27
0
def test_sort_throws_for_invalid_sort_keys(
    fc: fetcher.Fetcher, sortkey: Tuple[str, str]
):
    with pytest.raises(KeyError):
        fc.sortkey = sortkey
        fc._sort(DATA)
예제 #28
0
    def _vacuum_fields(self,
                       model=None,
                       explore=None,
                       timeframe=90,
                       min_queries=0):
        explores = fetcher.get_explores(self,
                                        model=model,
                                        explore=explore,
                                        verbose=1)
        info = []
        master_exposed_fields = set()
        master_used_fields = set()
        distinct_views = set()
        progress = 1
        for e in explores:
            print('Analyzing {}.{}, {} of {} explores'.format(
                e['model_name'], e['name'], progress, len(explores)))
            # get field usage from i__looker using all the views inside explore
            # returns fields in the form of model.explore.view.field
            _used_fields = fetcher.get_used_explore_fields(
                self, e['model_name'], e['scopes'], timeframe, min_queries)
            used_fields = list(_used_fields.keys())

            # get field picker fields in the form of model.explore.view.field
            exposed_fields = fetcher.get_explore_fields(self,
                                                        explore=e,
                                                        scoped_names=1)
            _unused_fields = set(exposed_fields) - set(used_fields)

            # Get fields used in joins
            for join in e['joins']:
                if join['sql_on'] is not None:
                    f = re.findall('\{(.*?)\}', join['sql_on'])
                    for field in f:
                        master_used_fields.add(field)
                        distinct_views.add(field.split('.')[0])
            #Get used fields
            for field in used_fields:
                field = '.'.join(field.split('.')[2:])
                master_used_fields.add(field)
                distinct_views.add(field.split('.')[0])
            #Get all fields
            for field in exposed_fields:
                #strip out the model and explore
                field = '.'.join(field.split('.')[2:])
                master_exposed_fields.add(field)
                distinct_views.add(field.split('.')[0])
            progress += 1

        # Fields to ignore if they contain the following:
        ignore_list = [
            'week', 'quarter', 'year', 'month', 'raw', 'date', 'time'
        ]

        # Get all unused fields and then organize them by their view
        master_unused_fields = master_exposed_fields - master_used_fields
        for view in sorted(list(distinct_views)):
            if any(char.isdigit() for char in view):
                continue
            unused_fields = []
            for field in master_unused_fields:
                # always keep id fields and basic count fields
                field_name = field.split('.')[1]
                if field_name == 'id' or field_name == 'count' or 'id' in field_name.split(
                        '_'):
                    continue
                elif any(ignore in field for ignore in ignore_list):
                    continue
                if field.split('.')[0] == view:
                    unused_fields.append(field)
            unused_fields = ('\n').join(unused_fields)
            if unused_fields is not None:
                info.append({'view': view, 'unused_fields': unused_fields})
        if not info:
            self.vacuum_logger.error('No matching explores found')
            raise Exception('No matching explores found')
        return info