def test_run_field_rules(): config = { "rules": { "field_level_rules": [ { "name": "DescriptionRule", "run": True }, { "name": "YesNoNameRule", "run": True }, ] }, } linter = LookMlLinter(config) lookml = LookML("test/minimal_multiline.view.lkml") v = lookml.views()[0] print("v", v) out = linter.run_field_rules(v, 'dimension', 'dimensions', "xxx", []) assert out[0] == { 'file': 'xxx', 'rule': 'DescriptionRule', 'passed': 1, 'type': 'dimension', 'fieldname': 'city_code' }
def test_finish_up(): config = { "parser": "lookml-parser", "infile_globs": ["test/test_orphans_repo/*.*.lkml"], "tmp_file": "test/parsed_lookml.json", "rules": { "other_rules": [{ "name": "NoOrphansRule", "run": True }] } } lookml = LookML(config) rule = NoOrphansRule(config) globstrings = config['infile_globs'] for globstring in globstrings: filepaths = glob.glob(globstring) for filepath in filepaths: json_data = lookml.get_json_representation(filepath) rule.process_file(json_data) file_out = rule.finish_up([]) assert file_out == [{ "file": "test/test_orphans_repo/orphan.view.lkml", "rule": rule.name(), "passed": 0 }] if os.path.exists(config['tmp_file']): os.remove(config['tmp_file'])
def test_run_field_rules(): config = { "rules": { "field_level_rules": [ { "name": "DescriptionRule", "run": True }, { "name": "YesNoNameRule", "run": True }, ] }, } linter = LookMlLinter(config) lookml = LookML("test/minimal_multiline.view.lkml") v = lookml.views()[0] print("v", v) out = linter.run_field_rules(v, "dimension", "dimensions", "xxx", []) assert out[0] == { "file": "xxx", "rule": "DescriptionRule", "passed": 1, "type": "dimension", "fieldname": "city_code", }
def test_run_file_rules(): config = { "rules": { "file_level_rules": [ { "name": "DataSourceRule", "run": True }, { "name": "OneViewPerFileRule", "run": False }, { "name": "FilenameViewnameMatchRule", "run": True }, ] }, } linter = LookMlLinter(config) rule = DataSourceRule() lookml = LookML("test/minimal_multiline.view.lkml") out = linter.run_file_rules(lookml, "xxx", []) assert len(out) == 2 assert out[0] == {"file": "xxx", "passed": 1, "rule": "DataSourceRule"} assert out[1] == { "file": "xxx", "passed": 0, "rule": "FilenameViewnameMatchRule" }
def test_run_file_rules(): config = { "parser": "lookml-parser", "tmp_file": "parsed_lookml.json", "rules": { "file_level_rules": [{ "name": "DataSourceRule", "run": True }, { "name": "OneViewPerFileRule", "run": False }, { "name": "FilenameViewnameMatchRule", "run": True }] }, } linter = LookMlLinter(config) rule = DataSourceRule() json_data = LookML(config).get_json_representation( "test/minimal_multiline.lkml") out = linter.run_file_rules(json_data, "xxx", []) assert len(out) == 2 assert out[0] == {'file': 'xxx', 'passed': 1, 'rule': 'DataSourceRule'} assert out[1] == { 'file': 'xxx', 'passed': 0, 'rule': 'FilenameViewnameMatchRule' } if os.path.exists(config['tmp_file']): os.remove(config['tmp_file'])
def get_json_from_lookml(raw_lookml, user_defined_filename=None): filename = "test/tmp.view.lkml" if user_defined_filename: filename = user_defined_filename with open(filename, "w") as text_file: text_file.write(raw_lookml) config = {"parser": "lookml-parser", "tmp_file": "test/parsed_lookml.json"} lookml = LookML(config) json_data = lookml.get_json_representation(filename) teardown(filename) teardown(config['tmp_file']) return json_data
def __init__(self, config): '''instantiate this grapher Args: config (JSON): JSON configuration ''' self.config = config self.lookml = LookML(config) # list of edge pair names self.models_to_explores = [] self.explores_to_views = [] # dict of node names with their type self.node_map = {}
def test_process_file5(config): grapher = LookMlGrapher(config) with pytest.raises(Exception) as e: lookml = LookML("test/empty.view.lkml") grapher.process_lookml(lookml) assert 'No models, views, or explores? test/empty.view.lkml' in str( e.value)
def test_run_field_rules(): config = { "parser": "lookml-parser", "tmp_file": "parsed_lookml.json", "rules": { "field_level_rules": [ { "name": "DescriptionRule", "run": True }, { "name": "YesNoNameRule", "run": True }, ] }, } linter = LookMlLinter(config) json_data = LookML(config).get_json_representation( "test/minimal_multiline.lkml") v = json_data['files'][0]['views'][0] out = linter.run_field_rules(v, 'dimension', 'dimensions', "xxx", []) assert out[0] == { 'file': 'xxx', 'rule': 'DescriptionRule', 'passed': 1, 'type': 'dimension', 'fieldname': 'city_code' } if os.path.exists(config['tmp_file']): os.remove(config['tmp_file'])
def test_process_file(config): grapher = LookMlGrapher(config) assert grapher.models_to_explores == [] assert grapher.explores_to_views == [] grapher.process_lookml(LookML("test/grapher_lkml/some_model.model.lkml")) assert grapher.models_to_explores == [('some_model', 'some_explore')] assert grapher.explores_to_views == [('some_explore', 'some_view'), ('some_explore', 'some_other_view')]
def test_process_file3(config): grapher = LookMlGrapher(config) assert grapher.node_map == {} lookml = LookML("test/grapher_lkml/some_view.view.lkml") grapher.process_lookml(lookml) assert 'some_view' in grapher.node_map assert grapher.node_map['some_view'] == NodeType.VIEW
def test_init2(): filename = "somefile.xxx" if not os.path.exists(filename): with open(filename, 'w'): pass with pytest.raises(Exception) as e: LookML(filename) assert 'Unsupported filename somefile.xxx' in str(e.value) os.remove(filename)
def test_process_file4(config): grapher = LookMlGrapher(config) assert grapher.models_to_explores == [] assert grapher.explores_to_views == [] lookml = LookML("test/grapher_lkml/some_explore.explore.lkml") grapher.process_lookml(lookml) assert grapher.models_to_explores == [] assert grapher.explores_to_views == [ ("some_explore", "some_view"), ("some_explore", "some_other_view"), ]
def modify(self, infilepath, outfilepath): '''modify the LookML Notes: default behavior is to match on full path when matching LookML files with the definitions source. However, you can configure to match on LookML file basename by setting ``"use_basename": true`` in the config Args: infilepath (str): path to input LookML file outfilepath (str): path of updated LookML to wtite to Returns: nothing. Writes out modified file contents to file ''' modifier = FileModifier(infilepath) lookml = LookML(infilepath) # get definitions for this file. In some cases, we might not # easily know the full path (such as full_auto_updater.sh which # uses timestamp in the git clone). Thus, just match on basename if 'use_basename' in self.config and self.config['use_basename']: logging.info("Matching files based on basename") defs = self.definitions[self.definitions.file == os.path.basename(infilepath)] else: defs = self.definitions[self.definitions.file == infilepath] for definition in defs.T.to_dict().values(): logging.info("Processing %s: %s", definition['type'], definition['name']) description, has_key = self.find_description(lookml, definition['type'], definition['name']) num_lines = len(description.split("\n")) if has_key: logging.info("Existing description for %s.%s: '%s'", definition['type'], definition['name'], description) else: logging.info("No description for %s.%s", definition['type'], definition['name']) exepected_description = definition['definition'] if description != exepected_description: if has_key: logging.info("Update needed: %s.%s -> '%s'", definition['type'], definition['name'], exepected_description) logging.info("This is %d line existing description", num_lines) else: logging.info("Injection needed: %s.%s -> '%s'", definition['type'], definition['name'], exepected_description) modifier.modify(num_lines, definition['type'], definition['name'], exepected_description, has_key) modifier.write(outfilepath)
def get_json_from_lookml(raw_lookml, user_defined_filename=None): filename = "test/tmp.view.lkml" if user_defined_filename: filename = user_defined_filename with open(filename, "w") as text_file: text_file.write(raw_lookml) lookml = LookML(filename) json_data = lookml.json_data teardown(filename) return json_data
def test_process_explores(config): grapher = LookMlGrapher(config) lookml = LookML("test/grapher_lkml/some_model.model.lkml") m = lookml.base_name e = lookml.json_data['explores'][0] assert grapher.models_to_explores == [] assert grapher.explores_to_views == [] grapher.process_explores(m, e) assert grapher.models_to_explores == [('some_model', 'some_explore')] assert grapher.explores_to_views == [('some_explore', 'some_view'), ('some_explore', 'some_other_view')]
def test_process_explores(config): grapher = LookMlGrapher(config) lookml = LookML("test/grapher_lkml/some_model.model.lkml") m = lookml.base_name e = lookml.json_data["explores"][0] assert grapher.models_to_explores == [] assert grapher.explores_to_views == [] grapher.process_explores(m, e) assert grapher.models_to_explores == [("some_model", "some_explore")] assert grapher.explores_to_views == [ ("some_explore", "some_view"), ("some_explore", "some_other_view"), ]
def extract_graph_info(self, globstrings): '''given a list of fileglobs, process them to extract list of nodes and edges, and orphaned views Args: globstrings (list): list of globstrings Returns: nothing but side effect is that nodes are strored in self.node_map and self.models_to_explores and self.views_to_explores are completed ''' for globstring in globstrings: if list(glob.glob(globstring)) == []: raise Exception("Invalid glob %s" % globstring) for filepath in glob.glob(globstring): assert os.path.exists(filepath) logging.info("Processing " + filepath) lookml = LookML(filepath) self.process_lookml(lookml) self.tag_orphans()
def extract_graph_info(self, globstrings): """given a list of fileglobs, process them to extract list of nodes and edges, and orphaned views Args: globstrings (list): list of globstrings Returns: nothing but side effect is that nodes are strored in self.node_map and self.models_to_explores and self.views_to_explores are completed """ for globstring in globstrings: for filepath in glob.glob(globstring): assert os.path.exists(filepath) log.info("Processing " + filepath) lookml = LookML(filepath) self.process_lookml(lookml) if not self.node_map: # node_map is empty, which means we found no LookML files raise NoLookMLFilesFound(globstrings) self.tag_orphans()
def test_finish_up(): config = { "infile_globs": ["test/test_orphans_repo/*.*.lkml"], "rules": { "other_rules": [{ "name": "NoOrphansRule", "run": True }] }, } rule = NoOrphansRule(config) globstrings = config["infile_globs"] for globstring in globstrings: filepaths = glob.glob(globstring) for filepath in filepaths: rule.process_lookml(LookML(filepath)) file_out = rule.finish_up([]) assert file_out == [{ "file": "test/test_orphans_repo/orphan.view.lkml", "rule": rule.name(), "passed": 0, }]
def test_init(): with pytest.raises(Exception) as e: LookML("doesnotexist") assert 'Filename does not exist: doesnotexist' in str(e.value)
def run(self): ''' run the set of file and field-level rules against all files in the file glob Returns: nothing. Saves two CSV files, specified in the config ''' file_out = [] field_out = [] timestr = datetime.datetime.now().isoformat() no_orphans_rule = None if "NoOrphansRule" in self.other_rules_to_run(): no_orphans_rule = NoOrphansRule(self.config) globstrings = self.config['infile_globs'] for globstring in globstrings: filepaths = glob.glob(globstring) for filepath in filepaths: simple_filepath = os.path.basename(filepath) logging.info("Processing %s", filepath) lookml = LookML(filepath) file_out = self.run_file_rules(lookml, simple_filepath, file_out) if lookml.has_views(): v = lookml.views()[0] field_out = self.run_field_rules(v, 'dimension', 'dimensions', simple_filepath, field_out) field_out = self.run_field_rules(v, 'dimension_group', 'dimension_groups', simple_filepath, field_out) field_out = self.run_field_rules(v, 'measure', 'measures', simple_filepath, field_out) if no_orphans_rule: no_orphans_rule.process_lookml(lookml) #add some metadata for each of the records we created above [ f.update({'glob': globstring}) for f in field_out + file_out if not 'glob' in f ] # for this rule, we can only assess who failed after all files are processed if no_orphans_rule: file_out = no_orphans_rule.finish_up(file_out) if 'simple_biquery' in self.config['output']: simple_bq_writer = SimpleBqWriter() if 'bigquery' in self.config['output']: bq_writer = BqWriter() if len(file_out) > 0: df = pd.DataFrame(file_out) df['time'] = timestr df['repo'] = self.config['git']['url'] if 'csv' in self.config['output']: self.write_file_csv(df) if 'simple_biquery' in self.config['output']: simple_bq_writer.upload(df, self.config, 'file_destination_table') if 'bigquery' in self.config['output']: bq_writer.upload(df, self.config, 'file_destination_table') if len(field_out) > 0: df = pd.DataFrame(field_out) df['time'] = timestr df['repo'] = self.config['git']['url'] if 'csv' in self.config['output']: self.write_field_csv(df) if 'simple_biquery' in self.config['output']: simple_bq_writer.upload(df, self.config, 'field_destination_table') if 'bigquery' in self.config['output']: bq_writer.upload(df, self.config, 'field_destination_table') return file_out, field_out
def get_lookml_from_raw_lookml(raw_lookml, type): filename = "test/" + type + ".lkml" with open(filename, "w") as text_file: text_file.write(raw_lookml) lookml = LookML(filename) return lookml
class LookMlGrapher(): '''A LookML Grapher that parses a set of LookML files specified in some config and creates an image showing the relationship among the models, explores and views ''' def __init__(self, config): '''instantiate this grapher Args: config (JSON): JSON configuration ''' self.config = config self.lookml = LookML(config) # list of edge pair names self.models_to_explores = [] self.explores_to_views = [] # dict of node names with their type self.node_map = {} def plot_graph(self, g, filename, title, node_size=500, label_font_size=12, text_angle=0, image_width=16, image_height=12): '''plot the graph and write to file Args: g (networkx): networkx graph object filename (str): path to write image to title (str): title to add to chart node_size (int): node size label_font_size (int): font size text_angle (int): angle to rotate. This is angle in degrees counter clockwise from east image_width (int): width of image in inches image_height (int): heightof image in inches Returns: nothing but does write image to file ''' # map nodes to a color for their node type # https://stackoverflow.com/questions/27030473/how-to-set-colors-for-nodes-in-networkx-python color_map = [] colors = ['#b3cde3', '#ccebc5', '#decbe4', '#FFA500'] for node in g: if self.node_map[node] == NodeType.MODEL: color_map.append(colors[0]) elif self.node_map[node] == NodeType.EXPLORE: color_map.append(colors[1]) elif self.node_map[node] == NodeType.VIEW: color_map.append(colors[2]) else: color_map.append(colors[3]) fig = plt.figure(figsize=(image_width, image_height)) ax = plt.subplot(111) try: import pydot from networkx.drawing.nx_pydot import graphviz_layout except ImportError: # pragma: no cover raise ImportError( "Requires Graphviz and either PyGraphviz or pydot" ) # pragma: no cover #pos = nx.spring_layout(g) #pos = nx.circular_layout(g) #pos = nx.kamada_kawai_layout(g) #pos = nx.shell_layout(g) #pos = nx.spectral_layout(g) pos = graphviz_layout(g, prog='dot', seed=42) nx.draw(g, pos, node_size=node_size, node_color=color_map, edge_color='#939393', font_size=9, font_weight='bold') text = nx.draw_networkx_labels(g, pos, with_labels=False, font_size=label_font_size) for _, t in text.items(): t.set_rotation(text_angle) plt.axis('off') plt.title(title, fontsize=20) plt.tight_layout() plt.savefig(filename, format="PNG") logging.info("Graph written to %s", filename) def tag_orphans(self): '''find any orphaned views and tag them as orphan node type Returns: nothing but side effect is that any orphans are tagged in the node map ''' referenced_views = set([v[1] for v in self.explores_to_views]) view_names = set( [k for k in self.node_map if self.node_map[k] == NodeType.VIEW]) orphans = view_names - referenced_views for orphan in orphans: self.node_map[orphan] = NodeType.ORPHAN def orphans(self): '''retrieve the set or orphaned views (if any) from the set of files Prerequisites: tag_orphans() has been called Returns: set of view names (if any) ''' return set( [k for k in self.node_map if self.node_map[k] == NodeType.ORPHAN]) def create_graph(self): '''add nodes and edges to a graph Returns: instance of networkx graph ''' g = nx.DiGraph() [g.add_node(node_name) for node_name in self.node_map] [g.add_edge(p[0], p[1]) for p in self.models_to_explores] [g.add_edge(p[0], p[1]) for p in self.explores_to_views] return g def process_explores(self, m, e): '''extract the views referenced by these explores and add them to node map and add explore-->view or model-->explores Args: m (str): model e (str): explore Returns: nothing. Side effect is to add to maps ''' explore_name = e['_explore'] self.node_map[explore_name] = NodeType.EXPLORE if m and '_model' in m: self.models_to_explores.append((m['_model'], explore_name)) if 'from' in e: # this is the first view mentioned self.explores_to_views.append((explore_name, e['from'])) #logging.info("Adding %s %s" % (explore_name, e['from'])) # but there could be more mentioned in the list (if any) of joins if 'join' in e: for k in e['join']: if not k.startswith('_'): if 'from' in e['join'][k]: # this is an edge from explore to a view contained within joined views self.explores_to_views.append( (explore_name, e['join'][k]['from'])) def process_file(self, filepath, json_data=None): '''given a filepath to a LookML file, extract the views, models, explores as the nodes as well as any model-->explore and explore-->view edges Args: filepath (str): path to LookML file json_data (JSON): chunk of JSONified LookML code Returns: nothing but stores node names and their types as well as edges ''' assert filepath or json_data if filepath and not json_data: logging.info("Processing %s", filepath) json_data = self.lookml.get_json_representation(filepath) if 'views' in json_data['files'][0]: for v in json_data['files'][0]['views']: self.node_map[v['_view']] = NodeType.VIEW elif 'models' in json_data['files'][0]: for m in json_data['files'][0]['models']: self.node_map[m['_model']] = NodeType.MODEL [self.process_explores(m, e) for e in m['explores']] elif 'explores' in json_data['files'][0]: for e in json_data['files'][0]['explores']: self.process_explores(None, e) else: #logging.error("Issue with %s", filepath) raise Exception("No models, views, or explores? %s", filepath) def extract_graph_info(self, globstrings): '''given a list of fileglobs, process them to extract list of nodes and edges, and orphaned views Args: globstrings (list): list of globstrings Returns: nothing but side effect is that nodes are strored in self.node_map and self.models_to_explores and self.explores_to_views are completed ''' for globstring in globstrings: if list(glob.glob(globstring)) == []: raise Exception("Invalid glob %s" % globstring) for filepath in glob.glob(globstring): assert os.path.exists(filepath) self.process_file(filepath) self.tag_orphans() def run(self): '''process the set of files and create an image of the graph Returns: nothing. Saves an image file, specified in the config ''' timestr = datetime.datetime.now().strftime("%Y-%m-%d") globstrings = self.config['infile_globs'] self.extract_graph_info(globstrings) g = self.create_graph() args = {} args['g'] = g args['filename'] = self.config['output'] args['title'] = " ".join(globstrings) + " as of " + timestr if 'options' in self.config: args.update(self.config['options']) logging.info("Setting the following options: %s" % args) self.plot_graph(**args)