def _extract_metadata_from_sql_query( cls: Type, reporter: SourceReport, parse_table_names_from_sql: bool, sql_parser_path: str, view_name: str, sql_table_name: Optional[str], derived_table: dict, fields: List[ViewField], ) -> Tuple[List[ViewField], List[str]]: sql_table_names: List[str] = [] if parse_table_names_from_sql and "sql" in derived_table: logger.debug( f"Parsing sql from derived table section of view: {view_name}") sql_query = derived_table["sql"] # Skip queries that contain liquid variables. We currently don't parse them correctly if "{%" in sql_query: logger.debug( f"{view_name}: Skipping sql_query parsing since it contains liquid variables" ) return fields, sql_table_names # Looker supports sql fragments that omit the SELECT and FROM parts of the query # Add those in if we detect that it is missing if not re.search(r"SELECT\s", sql_query, flags=re.I): # add a SELECT clause at the beginning sql_query = "SELECT " + sql_query if not re.search(r"FROM\s", sql_query, flags=re.I): # add a FROM clause at the end sql_query = f"{sql_query} FROM {sql_table_name if sql_table_name is not None else view_name}" # Get the list of tables in the query try: sql_info = cls._get_sql_info(sql_query, sql_parser_path) sql_table_names = sql_info.table_names column_names = sql_info.column_names if fields == []: # it seems like the view is defined purely as sql, let's try using the column names to populate the schema fields = [ # set types to unknown for now as our sql parser doesn't give us column types yet ViewField(c, "unknown", "", ViewFieldType.UNKNOWN) for c in column_names ] except Exception as e: reporter.report_warning( f"looker-view-{view_name}", f"Failed to parse sql query, lineage will not be accurate. Exception: {e}", ) return fields, sql_table_names
def _get_fields(cls, field_list: List[Dict], type_cls: ViewFieldType) -> List[ViewField]: fields = [] for field_dict in field_list: is_primary_key = field_dict.get("primary_key", "no") == "yes" name = field_dict["name"] native_type = field_dict.get("type", "string") description = field_dict.get("description", "") field = ViewField( name=name, type=native_type, description=description, is_primary_key=is_primary_key, field_type=type_cls, ) fields.append(field) return fields