def _get_vds_dependency_paths(self, vds): self._logger.debug("_get_vds_dependency_paths: processing vds: " + self._utils.get_entity_desc(vds)) if self._config.source_ce or not self._config.source_graph_support: return parse_sql.tables_in_query(vds['sql']) else: graph = self._dremio_env.get_catalog_entity_graph_by_id(vds['id']) if graph is None: self._logger.warn("Could not receive Graph via API. Try to set graph_api_support to False in the job configuration.") return parse_sql.tables_in_query(vds['sql']) vds_parent_list = [] for parent in graph['parents']: vds_parent_list.append(self._utils.normalize_path(parent['path'])) return vds_parent_list
def _get_vds_dependency_paths(self, vds): if self._is_source_ce() or not self._d.vds_parents: # CE does not support graph return parse_sql.tables_in_query(vds['sql']) else: for vds_entry in self._d.vds_parents: if vds_entry['path'] == vds['path']: return vds_entry['parents']
def _process_sql(self, sql, sql_context=None): if sql_context is not None: schema = self._utils.normalize_path(sql_context) + "/" else: schema = "" paths = parse_sql.tables_in_query(sql) # Collect all PDS and VDS with the entire dependency hierarchy for path in paths: self._discover_dependencies(schema + path) # Create SQL statements for all dependencies for pds in self._pds_list: self._process_pds(pds) for vds in self._vds_list: self._process_vds(vds) # Write file self._write_file()
def _discover_dependencies(self, path): dataset = self._dremio_env.get_catalog_entity_by_path(path) if dataset is not None: if dataset['type'] == 'VIRTUAL_DATASET': self._vds_list.append(dataset) elif dataset['type'] == 'PHYSICAL_DATASET': self._pds_list.append(dataset) return else: self._logger.fatal( "_discover_dependencies: Unknown Entity Type: " + dataset['type']) else: self._logger.fatal( "_discover_dependencies: Could not resolve dependency: " + path) # Process recursive dependencies sql_dependency_paths = parse_sql.tables_in_query(dataset['sql']) for dataset_dependency_path in sql_dependency_paths: sql_context = self._utils.get_sql_context(dataset) self._discover_dependencies( self._utils.get_absolute_path(dataset_dependency_path, sql_context))