def _construct_datalineage_urn(self, sql_table_name: str, looker_view: LookerView) -> str: logger.debug(f"sql_table_name={sql_table_name}") connection_def: LookerConnectionDefinition = looker_view.connection # Check if table name matches cascading derived tables pattern # derived tables can be referred to using aliases that look like table_name.SQL_TABLE_NAME # See https://docs.looker.com/data-modeling/learning-lookml/derived-tables#syntax_for_referencing_a_derived_table if re.fullmatch(r"\w+\.SQL_TABLE_NAME", sql_table_name, flags=re.I): sql_table_name = sql_table_name.lower().split(".")[0] # upstream dataset is a looker view based on current view id's project and model view_id = LookerViewId( project_name=looker_view.id.project_name, model_name=looker_view.id.model_name, view_name=sql_table_name, ) return view_id.get_urn(self.source_config) # Ensure sql_table_name is in canonical form (add in db, schema names) sql_table_name = self._generate_fully_qualified_name( sql_table_name, connection_def) return builder.make_dataset_urn_with_platform_instance( platform=connection_def.platform, name=sql_table_name.lower(), platform_instance=connection_def.platform_instance, env=connection_def.platform_env or self.source_config.env, )
def from_looker_dict( cls, project_name: str, model_name: str, looker_view: dict, connection: LookerConnectionDefinition, looker_viewfile: LookerViewFile, looker_viewfile_loader: LookerViewFileLoader, reporter: LookMLSourceReport, parse_table_names_from_sql: bool = False, sql_parser_path: str = "datahub.utilities.sql_parser.DefaultSQLParser", ) -> Optional["LookerView"]: view_name = looker_view["name"] logger.debug(f"Handling view {view_name} in model {model_name}") # The sql_table_name might be defined in another view and this view is extending that view, # so we resolve this field while taking that into account. sql_table_name: Optional[str] = LookerView.get_including_extends( view_name=view_name, looker_view=looker_view, connection=connection, looker_viewfile=looker_viewfile, looker_viewfile_loader=looker_viewfile_loader, field="sql_table_name", reporter=reporter, ) # Some sql_table_name fields contain quotes like: optimizely."group", just remove the quotes sql_table_name = (sql_table_name.replace('"', "").replace("`", "") if sql_table_name is not None else None) derived_table = looker_view.get("derived_table", None) dimensions = cls._get_fields(looker_view.get("dimensions", []), ViewFieldType.DIMENSION) dimension_groups = cls._get_fields( looker_view.get("dimension_groups", []), ViewFieldType.DIMENSION_GROUP) measures = cls._get_fields(looker_view.get("measures", []), ViewFieldType.MEASURE) fields: List[ViewField] = dimensions + dimension_groups + measures # Parse SQL from derived tables to extract dependencies if derived_table is not None: fields, sql_table_names = cls._extract_metadata_from_sql_query( reporter, parse_table_names_from_sql, sql_parser_path, view_name, sql_table_name, derived_table, fields, ) # also store the view logic and materialization if "sql" in derived_table: view_logic = derived_table["sql"] view_lang = "sql" if "explore_source" in derived_table: view_logic = str(derived_table["explore_source"]) view_lang = "lookml" materialized = False for k in derived_table: if k in [ "datagroup_trigger", "sql_trigger_value", "persist_for" ]: materialized = True if "materialized_view" in derived_table: materialized = (True if derived_table["materialized_view"] == "yes" else False) view_details = ViewProperties(materialized=materialized, viewLogic=view_logic, viewLanguage=view_lang) return LookerView( id=LookerViewId( project_name=project_name, model_name=model_name, view_name=view_name, ), absolute_file_path=looker_viewfile.absolute_file_path, connection=connection, sql_table_names=sql_table_names, fields=fields, raw_file_content=looker_viewfile.raw_file_content, view_details=view_details, ) # If not a derived table, then this view essentially wraps an existing # object in the database. if sql_table_name is not None: # If sql_table_name is set, there is a single dependency in the view, on the sql_table_name. sql_table_names = [sql_table_name] else: # Otherwise, default to the view name as per the docs: # https://docs.looker.com/reference/view-params/sql_table_name-for-view sql_table_names = [view_name] output_looker_view = LookerView( id=LookerViewId(project_name=project_name, model_name=model_name, view_name=view_name), absolute_file_path=looker_viewfile.absolute_file_path, sql_table_names=sql_table_names, connection=connection, fields=fields, raw_file_content=looker_viewfile.raw_file_content, ) return output_looker_view
def from_looker_dict( cls, project_name: str, model_name: str, looker_view: dict, connection: LookerConnectionDefinition, looker_viewfile: LookerViewFile, looker_viewfile_loader: LookerViewFileLoader, reporter: LookMLSourceReport, parse_table_names_from_sql: bool = False, sql_parser_path: str = "datahub.utilities.sql_parser.DefaultSQLParser", ) -> Optional["LookerView"]: view_name = looker_view["name"] logger.debug(f"Handling view {view_name} in model {model_name}") # The sql_table_name might be defined in another view and this view is extending that view, # so we resolve this field while taking that into account. sql_table_name: Optional[str] = LookerView.get_including_extends( view_name=view_name, looker_view=looker_view, connection=connection, looker_viewfile=looker_viewfile, looker_viewfile_loader=looker_viewfile_loader, field="sql_table_name", reporter=reporter, ) # Some sql_table_name fields contain quotes like: optimizely."group", just remove the quotes sql_table_name = (sql_table_name.replace('"', "").replace("`", "") if sql_table_name is not None else None) derived_table = looker_view.get("derived_table", None) dimensions = cls._get_fields(looker_view.get("dimensions", []), ViewFieldType.DIMENSION) dimension_groups = cls._get_fields( looker_view.get("dimension_groups", []), ViewFieldType.DIMENSION_GROUP) measures = cls._get_fields(looker_view.get("measures", []), ViewFieldType.MEASURE) fields: List[ViewField] = dimensions + dimension_groups + measures # Parse SQL from derived tables to extract dependencies if derived_table is not None: sql_table_names = [] if parse_table_names_from_sql and "sql" in derived_table: logger.debug( f"Parsing sql from derived table section of view: {view_name}" ) # Get the list of tables in the query sql_table_names = cls._get_sql_table_names( derived_table["sql"], sql_parser_path) return LookerView( id=LookerViewId( project_name=project_name, model_name=model_name, view_name=view_name, ), absolute_file_path=looker_viewfile.absolute_file_path, connection=connection, sql_table_names=sql_table_names, fields=fields, raw_file_content=looker_viewfile.raw_file_content, ) # If not a derived table, then this view essentially wraps an existing # object in the database. if sql_table_name is not None: # If sql_table_name is set, there is a single dependency in the view, on the sql_table_name. sql_table_names = [sql_table_name] else: # Otherwise, default to the view name as per the docs: # https://docs.looker.com/reference/view-params/sql_table_name-for-view sql_table_names = [view_name] output_looker_view = LookerView( id=LookerViewId(project_name=project_name, model_name=model_name, view_name=view_name), absolute_file_path=looker_viewfile.absolute_file_path, sql_table_names=sql_table_names, connection=connection, fields=fields, raw_file_content=looker_viewfile.raw_file_content, ) return output_looker_view