Esempio n. 1
0
    def _extract_metadata_from_sql_query(
        cls: Type,
        reporter: SourceReport,
        parse_table_names_from_sql: bool,
        sql_parser_path: str,
        view_name: str,
        sql_table_name: Optional[str],
        derived_table: dict,
        fields: List[ViewField],
    ) -> Tuple[List[ViewField], List[str]]:
        sql_table_names: List[str] = []
        if parse_table_names_from_sql and "sql" in derived_table:
            logger.debug(
                f"Parsing sql from derived table section of view: {view_name}")
            sql_query = derived_table["sql"]

            # Skip queries that contain liquid variables. We currently don't parse them correctly
            if "{%" in sql_query:
                logger.debug(
                    f"{view_name}: Skipping sql_query parsing since it contains liquid variables"
                )
                return fields, sql_table_names
            # Looker supports sql fragments that omit the SELECT and FROM parts of the query
            # Add those in if we detect that it is missing
            if not re.search(r"SELECT\s", sql_query, flags=re.I):
                # add a SELECT clause at the beginning
                sql_query = "SELECT " + sql_query
            if not re.search(r"FROM\s", sql_query, flags=re.I):
                # add a FROM clause at the end
                sql_query = f"{sql_query} FROM {sql_table_name if sql_table_name is not None else view_name}"
                # Get the list of tables in the query
            try:
                sql_info = cls._get_sql_info(sql_query, sql_parser_path)
                sql_table_names = sql_info.table_names
                column_names = sql_info.column_names
                if fields == []:
                    # it seems like the view is defined purely as sql, let's try using the column names to populate the schema
                    fields = [
                        # set types to unknown for now as our sql parser doesn't give us column types yet
                        ViewField(c, "unknown", "", ViewFieldType.UNKNOWN)
                        for c in column_names
                    ]
            except Exception as e:
                reporter.report_warning(
                    f"looker-view-{view_name}",
                    f"Failed to parse sql query, lineage will not be accurate. Exception: {e}",
                )

        return fields, sql_table_names
Esempio n. 2
0
 def _get_fields(cls, field_list: List[Dict],
                 type_cls: ViewFieldType) -> List[ViewField]:
     fields = []
     for field_dict in field_list:
         is_primary_key = field_dict.get("primary_key", "no") == "yes"
         name = field_dict["name"]
         native_type = field_dict.get("type", "string")
         description = field_dict.get("description", "")
         field = ViewField(
             name=name,
             type=native_type,
             description=description,
             is_primary_key=is_primary_key,
             field_type=type_cls,
         )
         fields.append(field)
     return fields