Esempio n. 1
0
def test_run_field_rules():
    config = {
        "rules": {
            "field_level_rules": [
                {
                    "name": "DescriptionRule",
                    "run": True
                },
                {
                    "name": "YesNoNameRule",
                    "run": True
                },
            ]
        },
    }
    linter = LookMlLinter(config)
    lookml = LookML("test/minimal_multiline.view.lkml")
    v = lookml.views()[0]

    print("v", v)

    out = linter.run_field_rules(v, 'dimension', 'dimensions', "xxx", [])
    assert out[0] == {
        'file': 'xxx',
        'rule': 'DescriptionRule',
        'passed': 1,
        'type': 'dimension',
        'fieldname': 'city_code'
    }
Esempio n. 2
0
def test_finish_up():
    config = {
        "parser": "lookml-parser",
        "infile_globs": ["test/test_orphans_repo/*.*.lkml"],
        "tmp_file": "test/parsed_lookml.json",
        "rules": {
            "other_rules": [{
                "name": "NoOrphansRule",
                "run": True
            }]
        }
    }
    lookml = LookML(config)
    rule = NoOrphansRule(config)
    globstrings = config['infile_globs']
    for globstring in globstrings:
        filepaths = glob.glob(globstring)
        for filepath in filepaths:
            json_data = lookml.get_json_representation(filepath)
            rule.process_file(json_data)
    file_out = rule.finish_up([])
    assert file_out == [{
        "file": "test/test_orphans_repo/orphan.view.lkml",
        "rule": rule.name(),
        "passed": 0
    }]

    if os.path.exists(config['tmp_file']):
        os.remove(config['tmp_file'])
Esempio n. 3
0
def test_run_field_rules():
    config = {
        "rules": {
            "field_level_rules": [
                {
                    "name": "DescriptionRule",
                    "run": True
                },
                {
                    "name": "YesNoNameRule",
                    "run": True
                },
            ]
        },
    }
    linter = LookMlLinter(config)
    lookml = LookML("test/minimal_multiline.view.lkml")
    v = lookml.views()[0]

    print("v", v)

    out = linter.run_field_rules(v, "dimension", "dimensions", "xxx", [])
    assert out[0] == {
        "file": "xxx",
        "rule": "DescriptionRule",
        "passed": 1,
        "type": "dimension",
        "fieldname": "city_code",
    }
Esempio n. 4
0
def test_run_file_rules():
    config = {
        "rules": {
            "file_level_rules": [
                {
                    "name": "DataSourceRule",
                    "run": True
                },
                {
                    "name": "OneViewPerFileRule",
                    "run": False
                },
                {
                    "name": "FilenameViewnameMatchRule",
                    "run": True
                },
            ]
        },
    }
    linter = LookMlLinter(config)

    rule = DataSourceRule()
    lookml = LookML("test/minimal_multiline.view.lkml")
    out = linter.run_file_rules(lookml, "xxx", [])
    assert len(out) == 2
    assert out[0] == {"file": "xxx", "passed": 1, "rule": "DataSourceRule"}
    assert out[1] == {
        "file": "xxx",
        "passed": 0,
        "rule": "FilenameViewnameMatchRule"
    }
Esempio n. 5
0
def test_run_file_rules():
    config = {
        "parser": "lookml-parser",
        "tmp_file": "parsed_lookml.json",
        "rules": {
            "file_level_rules": [{
                "name": "DataSourceRule",
                "run": True
            }, {
                "name": "OneViewPerFileRule",
                "run": False
            }, {
                "name": "FilenameViewnameMatchRule",
                "run": True
            }]
        },
    }
    linter = LookMlLinter(config)

    rule = DataSourceRule()
    json_data = LookML(config).get_json_representation(
        "test/minimal_multiline.lkml")
    out = linter.run_file_rules(json_data, "xxx", [])
    assert len(out) == 2
    assert out[0] == {'file': 'xxx', 'passed': 1, 'rule': 'DataSourceRule'}
    assert out[1] == {
        'file': 'xxx',
        'passed': 0,
        'rule': 'FilenameViewnameMatchRule'
    }
    if os.path.exists(config['tmp_file']):
        os.remove(config['tmp_file'])
Esempio n. 6
0
def get_json_from_lookml(raw_lookml, user_defined_filename=None):
    filename = "test/tmp.view.lkml"
    if user_defined_filename:
        filename = user_defined_filename

    with open(filename, "w") as text_file:
        text_file.write(raw_lookml)

    config = {"parser": "lookml-parser", "tmp_file": "test/parsed_lookml.json"}

    lookml = LookML(config)

    json_data = lookml.get_json_representation(filename)
    teardown(filename)
    teardown(config['tmp_file'])
    return json_data
Esempio n. 7
0
    def __init__(self, config):
        '''instantiate this grapher

        Args:
            config (JSON): JSON configuration

        '''
        self.config = config
        self.lookml = LookML(config)

        # list of edge pair names
        self.models_to_explores = []
        self.explores_to_views = []

        # dict of node names with their type
        self.node_map = {}
Esempio n. 8
0
def test_process_file5(config):
    grapher = LookMlGrapher(config)
    with pytest.raises(Exception) as e:
        lookml = LookML("test/empty.view.lkml")
        grapher.process_lookml(lookml)
    assert 'No models, views, or explores? test/empty.view.lkml' in str(
        e.value)
Esempio n. 9
0
def test_run_field_rules():
    config = {
        "parser": "lookml-parser",
        "tmp_file": "parsed_lookml.json",
        "rules": {
            "field_level_rules": [
                {
                    "name": "DescriptionRule",
                    "run": True
                },
                {
                    "name": "YesNoNameRule",
                    "run": True
                },
            ]
        },
    }
    linter = LookMlLinter(config)
    json_data = LookML(config).get_json_representation(
        "test/minimal_multiline.lkml")
    v = json_data['files'][0]['views'][0]

    out = linter.run_field_rules(v, 'dimension', 'dimensions', "xxx", [])
    assert out[0] == {
        'file': 'xxx',
        'rule': 'DescriptionRule',
        'passed': 1,
        'type': 'dimension',
        'fieldname': 'city_code'
    }
    if os.path.exists(config['tmp_file']):
        os.remove(config['tmp_file'])
Esempio n. 10
0
def test_process_file(config):
    grapher = LookMlGrapher(config)
    assert grapher.models_to_explores == []
    assert grapher.explores_to_views == []
    grapher.process_lookml(LookML("test/grapher_lkml/some_model.model.lkml"))
    assert grapher.models_to_explores == [('some_model', 'some_explore')]
    assert grapher.explores_to_views == [('some_explore', 'some_view'),
                                         ('some_explore', 'some_other_view')]
Esempio n. 11
0
def test_process_file3(config):
    grapher = LookMlGrapher(config)
    assert grapher.node_map == {}
    lookml = LookML("test/grapher_lkml/some_view.view.lkml")
    grapher.process_lookml(lookml)

    assert 'some_view' in grapher.node_map
    assert grapher.node_map['some_view'] == NodeType.VIEW
Esempio n. 12
0
def test_init2():
    filename = "somefile.xxx"
    if not os.path.exists(filename):
        with open(filename, 'w'): pass

    with pytest.raises(Exception) as e:
        LookML(filename)
    assert 'Unsupported filename somefile.xxx' in str(e.value)

    os.remove(filename)
Esempio n. 13
0
def test_process_file4(config):
    grapher = LookMlGrapher(config)
    assert grapher.models_to_explores == []
    assert grapher.explores_to_views == []
    lookml = LookML("test/grapher_lkml/some_explore.explore.lkml")
    grapher.process_lookml(lookml)
    assert grapher.models_to_explores == []
    assert grapher.explores_to_views == [
        ("some_explore", "some_view"),
        ("some_explore", "some_other_view"),
    ]
Esempio n. 14
0
    def modify(self, infilepath, outfilepath):
        '''modify the LookML

        Notes:
            default behavior is to match on full path when matching LookML files
            with the definitions source. 
            However, you can configure to match on LookML file basename by setting
            ``"use_basename": true`` in the config

        Args:
            infilepath (str): path to input LookML file
            outfilepath (str): path of updated LookML to wtite to

        Returns:
            nothing. Writes out modified file contents to file

        '''
        modifier = FileModifier(infilepath)

        lookml = LookML(infilepath)

        # get definitions for this file. In some cases, we might not
        # easily know the full path (such as full_auto_updater.sh which
        # uses timestamp in the git clone). Thus, just match on basename
        if 'use_basename' in self.config and self.config['use_basename']:
            logging.info("Matching files based on basename")
            defs = self.definitions[self.definitions.file == os.path.basename(infilepath)]
        else:
            defs = self.definitions[self.definitions.file == infilepath]

        for definition in defs.T.to_dict().values():
            logging.info("Processing %s: %s", definition['type'], definition['name'])

            description, has_key = self.find_description(lookml, definition['type'], definition['name'])

            num_lines = len(description.split("\n"))

            if has_key:
                logging.info("Existing description for %s.%s: '%s'", definition['type'], definition['name'], description)
            else:
                logging.info("No description for %s.%s", definition['type'], definition['name'])

            exepected_description = definition['definition']

            if description != exepected_description:
                if has_key:
                    logging.info("Update needed: %s.%s -> '%s'", definition['type'], definition['name'], exepected_description)
                    logging.info("This is %d line existing description", num_lines)
                else:
                    logging.info("Injection needed: %s.%s -> '%s'", definition['type'], definition['name'], exepected_description)

                modifier.modify(num_lines, definition['type'], definition['name'], exepected_description, has_key)

        modifier.write(outfilepath)
Esempio n. 15
0
def get_json_from_lookml(raw_lookml, user_defined_filename=None):
    filename = "test/tmp.view.lkml"
    if user_defined_filename:
        filename = user_defined_filename

    with open(filename, "w") as text_file:
        text_file.write(raw_lookml)

    lookml = LookML(filename)

    json_data = lookml.json_data
    teardown(filename)
    return json_data
Esempio n. 16
0
def test_process_explores(config):
    grapher = LookMlGrapher(config)
    lookml = LookML("test/grapher_lkml/some_model.model.lkml")

    m = lookml.base_name
    e = lookml.json_data['explores'][0]

    assert grapher.models_to_explores == []
    assert grapher.explores_to_views == []

    grapher.process_explores(m, e)

    assert grapher.models_to_explores == [('some_model', 'some_explore')]
    assert grapher.explores_to_views == [('some_explore', 'some_view'),
                                         ('some_explore', 'some_other_view')]
Esempio n. 17
0
def test_process_explores(config):
    grapher = LookMlGrapher(config)
    lookml = LookML("test/grapher_lkml/some_model.model.lkml")

    m = lookml.base_name
    e = lookml.json_data["explores"][0]

    assert grapher.models_to_explores == []
    assert grapher.explores_to_views == []

    grapher.process_explores(m, e)

    assert grapher.models_to_explores == [("some_model", "some_explore")]
    assert grapher.explores_to_views == [
        ("some_explore", "some_view"),
        ("some_explore", "some_other_view"),
    ]
Esempio n. 18
0
    def extract_graph_info(self, globstrings):
        '''given a list of fileglobs, process them to extract list of nodes and edges, and orphaned views
            Args:
                globstrings (list): list of globstrings
            Returns:
                nothing but side effect is that nodes are strored in self.node_map and self.models_to_explores 
                and self.views_to_explores are completed
        '''
        for globstring in globstrings:
            if list(glob.glob(globstring)) == []:
                raise Exception("Invalid glob %s" % globstring)

            for filepath in glob.glob(globstring):
                assert os.path.exists(filepath)
                logging.info("Processing " + filepath)
                lookml = LookML(filepath)
                self.process_lookml(lookml)
        self.tag_orphans()
Esempio n. 19
0
 def extract_graph_info(self, globstrings):
     """given a list of fileglobs, process them to extract list of nodes and edges, and orphaned views
         Args:
             globstrings (list): list of globstrings
         Returns:
             nothing but side effect is that nodes are strored in self.node_map and self.models_to_explores 
             and self.views_to_explores are completed
     """
     for globstring in globstrings:
         for filepath in glob.glob(globstring):
             assert os.path.exists(filepath)
             log.info("Processing " + filepath)
             lookml = LookML(filepath)
             self.process_lookml(lookml)
     if not self.node_map:
         # node_map is empty, which means we found no LookML files
         raise NoLookMLFilesFound(globstrings)
     self.tag_orphans()
Esempio n. 20
0
def test_finish_up():
    config = {
        "infile_globs": ["test/test_orphans_repo/*.*.lkml"],
        "rules": {
            "other_rules": [{
                "name": "NoOrphansRule",
                "run": True
            }]
        },
    }
    rule = NoOrphansRule(config)
    globstrings = config["infile_globs"]
    for globstring in globstrings:
        filepaths = glob.glob(globstring)
        for filepath in filepaths:
            rule.process_lookml(LookML(filepath))
    file_out = rule.finish_up([])
    assert file_out == [{
        "file": "test/test_orphans_repo/orphan.view.lkml",
        "rule": rule.name(),
        "passed": 0,
    }]
Esempio n. 21
0
def test_init():
    with pytest.raises(Exception) as e:
        LookML("doesnotexist")
    assert 'Filename does not exist: doesnotexist' in str(e.value)
Esempio n. 22
0
    def run(self):
        '''
            run the set of file and field-level rules against all files in the file glob

            Returns:
                nothing. Saves two CSV files, specified in the config
        '''

        file_out = []
        field_out = []

        timestr = datetime.datetime.now().isoformat()

        no_orphans_rule = None
        if "NoOrphansRule" in self.other_rules_to_run():
            no_orphans_rule = NoOrphansRule(self.config)

        globstrings = self.config['infile_globs']
        for globstring in globstrings:
            filepaths = glob.glob(globstring)
            for filepath in filepaths:

                simple_filepath = os.path.basename(filepath)

                logging.info("Processing %s", filepath)

                lookml = LookML(filepath)

                file_out = self.run_file_rules(lookml, simple_filepath,
                                               file_out)

                if lookml.has_views():
                    v = lookml.views()[0]
                    field_out = self.run_field_rules(v, 'dimension',
                                                     'dimensions',
                                                     simple_filepath,
                                                     field_out)
                    field_out = self.run_field_rules(v, 'dimension_group',
                                                     'dimension_groups',
                                                     simple_filepath,
                                                     field_out)
                    field_out = self.run_field_rules(v, 'measure', 'measures',
                                                     simple_filepath,
                                                     field_out)

                if no_orphans_rule:
                    no_orphans_rule.process_lookml(lookml)

            #add some metadata for each of the records we created above
            [
                f.update({'glob': globstring}) for f in field_out + file_out
                if not 'glob' in f
            ]

        # for this rule, we can only assess who failed after all files are processed
        if no_orphans_rule:
            file_out = no_orphans_rule.finish_up(file_out)

        if 'simple_biquery' in self.config['output']:
            simple_bq_writer = SimpleBqWriter()
        if 'bigquery' in self.config['output']:
            bq_writer = BqWriter()

        if len(file_out) > 0:
            df = pd.DataFrame(file_out)
            df['time'] = timestr
            df['repo'] = self.config['git']['url']

            if 'csv' in self.config['output']:
                self.write_file_csv(df)

            if 'simple_biquery' in self.config['output']:
                simple_bq_writer.upload(df, self.config,
                                        'file_destination_table')

            if 'bigquery' in self.config['output']:
                bq_writer.upload(df, self.config, 'file_destination_table')

        if len(field_out) > 0:
            df = pd.DataFrame(field_out)
            df['time'] = timestr
            df['repo'] = self.config['git']['url']

            if 'csv' in self.config['output']:
                self.write_field_csv(df)

            if 'simple_biquery' in self.config['output']:
                simple_bq_writer.upload(df, self.config,
                                        'field_destination_table')

            if 'bigquery' in self.config['output']:
                bq_writer.upload(df, self.config, 'field_destination_table')

        return file_out, field_out
Esempio n. 23
0
def get_lookml_from_raw_lookml(raw_lookml, type):
    filename = "test/" + type + ".lkml"
    with open(filename, "w") as text_file:
        text_file.write(raw_lookml)
    lookml = LookML(filename)
    return lookml
Esempio n. 24
0
class LookMlGrapher():
    '''A LookML Grapher that parses a set of LookML files specified in some config
        and creates an image showing the relationship among the models, explores and views
    '''
    def __init__(self, config):
        '''instantiate this grapher

        Args:
            config (JSON): JSON configuration

        '''
        self.config = config
        self.lookml = LookML(config)

        # list of edge pair names
        self.models_to_explores = []
        self.explores_to_views = []

        # dict of node names with their type
        self.node_map = {}

    def plot_graph(self,
                   g,
                   filename,
                   title,
                   node_size=500,
                   label_font_size=12,
                   text_angle=0,
                   image_width=16,
                   image_height=12):
        '''plot the graph and write to file

        Args:
            g (networkx): networkx graph object
            filename (str): path to write image to
            title (str): title to add to chart
            node_size (int): node size 
            label_font_size (int): font size
            text_angle (int): angle to rotate. This is angle in degrees counter clockwise from east 
            image_width (int): width of image in inches 
            image_height (int): heightof image in inches

        Returns:
            nothing but does write image to file

        '''
        # map nodes to a color for their node type
        # https://stackoverflow.com/questions/27030473/how-to-set-colors-for-nodes-in-networkx-python
        color_map = []
        colors = ['#b3cde3', '#ccebc5', '#decbe4', '#FFA500']
        for node in g:
            if self.node_map[node] == NodeType.MODEL:
                color_map.append(colors[0])
            elif self.node_map[node] == NodeType.EXPLORE:
                color_map.append(colors[1])
            elif self.node_map[node] == NodeType.VIEW:
                color_map.append(colors[2])
            else:
                color_map.append(colors[3])

        fig = plt.figure(figsize=(image_width, image_height))
        ax = plt.subplot(111)

        try:
            import pydot
            from networkx.drawing.nx_pydot import graphviz_layout
        except ImportError:  # pragma: no cover
            raise ImportError(
                "Requires Graphviz and either PyGraphviz or pydot"
            )  # pragma: no cover

        #pos = nx.spring_layout(g)
        #pos = nx.circular_layout(g)
        #pos = nx.kamada_kawai_layout(g)
        #pos = nx.shell_layout(g)
        #pos = nx.spectral_layout(g)
        pos = graphviz_layout(g, prog='dot', seed=42)
        nx.draw(g,
                pos,
                node_size=node_size,
                node_color=color_map,
                edge_color='#939393',
                font_size=9,
                font_weight='bold')

        text = nx.draw_networkx_labels(g,
                                       pos,
                                       with_labels=False,
                                       font_size=label_font_size)
        for _, t in text.items():
            t.set_rotation(text_angle)

        plt.axis('off')
        plt.title(title, fontsize=20)
        plt.tight_layout()
        plt.savefig(filename, format="PNG")
        logging.info("Graph written to %s", filename)

    def tag_orphans(self):
        '''find any orphaned views and tag them as orphan node type

        Returns:
            nothing but side effect is that any orphans are tagged in the node map

        '''
        referenced_views = set([v[1] for v in self.explores_to_views])
        view_names = set(
            [k for k in self.node_map if self.node_map[k] == NodeType.VIEW])
        orphans = view_names - referenced_views
        for orphan in orphans:
            self.node_map[orphan] = NodeType.ORPHAN

    def orphans(self):
        '''retrieve the set or orphaned views (if any) from the set of files

        Prerequisites:
            tag_orphans() has been called

        Returns:
            set of view names (if any)

        '''
        return set(
            [k for k in self.node_map if self.node_map[k] == NodeType.ORPHAN])

    def create_graph(self):
        '''add nodes and edges to a graph

        Returns:
            instance of networkx graph

        '''
        g = nx.DiGraph()
        [g.add_node(node_name) for node_name in self.node_map]
        [g.add_edge(p[0], p[1]) for p in self.models_to_explores]
        [g.add_edge(p[0], p[1]) for p in self.explores_to_views]
        return g

    def process_explores(self, m, e):
        '''extract the views referenced by these explores and
        add them to node map and add explore-->view or model-->explores

        Args:
            m (str): model
            e (str): explore

        Returns:
            nothing. Side effect is to add to maps

        '''
        explore_name = e['_explore']
        self.node_map[explore_name] = NodeType.EXPLORE
        if m and '_model' in m:
            self.models_to_explores.append((m['_model'], explore_name))
        if 'from' in e:
            # this is the first view mentioned
            self.explores_to_views.append((explore_name, e['from']))
            #logging.info("Adding %s %s" % (explore_name, e['from']))

            # but there could be more mentioned in the list (if any) of joins
            if 'join' in e:
                for k in e['join']:
                    if not k.startswith('_'):
                        if 'from' in e['join'][k]:
                            # this is an edge from explore to a view contained within joined views
                            self.explores_to_views.append(
                                (explore_name, e['join'][k]['from']))

    def process_file(self, filepath, json_data=None):
        '''given a filepath to a LookML file, extract the views, models, explores as the nodes
        as well as any model-->explore and explore-->view edges

        Args:
            filepath (str): path to LookML file
            json_data (JSON): chunk of JSONified LookML code

        Returns:
            nothing but stores node names and their types as well as edges

        '''
        assert filepath or json_data
        if filepath and not json_data:
            logging.info("Processing %s", filepath)
            json_data = self.lookml.get_json_representation(filepath)

        if 'views' in json_data['files'][0]:
            for v in json_data['files'][0]['views']:
                self.node_map[v['_view']] = NodeType.VIEW
        elif 'models' in json_data['files'][0]:
            for m in json_data['files'][0]['models']:
                self.node_map[m['_model']] = NodeType.MODEL
                [self.process_explores(m, e) for e in m['explores']]
        elif 'explores' in json_data['files'][0]:
            for e in json_data['files'][0]['explores']:
                self.process_explores(None, e)
        else:
            #logging.error("Issue with %s", filepath)
            raise Exception("No models, views, or explores? %s", filepath)

    def extract_graph_info(self, globstrings):
        '''given a list of fileglobs, process them to extract list of nodes and edges, and orphaned views

            Args:
                globstrings (list): list of globstrings

            Returns:
                nothing but side effect is that nodes are strored in self.node_map and self.models_to_explores 
                and self.explores_to_views are completed
        '''
        for globstring in globstrings:
            if list(glob.glob(globstring)) == []:
                raise Exception("Invalid glob %s" % globstring)

            for filepath in glob.glob(globstring):
                assert os.path.exists(filepath)
                self.process_file(filepath)
        self.tag_orphans()

    def run(self):
        '''process the set of files and create an image of the graph

            Returns:
                nothing. Saves an image file, specified in the config
        '''
        timestr = datetime.datetime.now().strftime("%Y-%m-%d")
        globstrings = self.config['infile_globs']
        self.extract_graph_info(globstrings)
        g = self.create_graph()

        args = {}
        args['g'] = g
        args['filename'] = self.config['output']
        args['title'] = " ".join(globstrings) + " as of " + timestr
        if 'options' in self.config:
            args.update(self.config['options'])

        logging.info("Setting the following options: %s" % args)

        self.plot_graph(**args)