def _serialize_popular_tables(self, entities: list) -> List[PopularTable]: """ Gets a list of entities and serialize the popular tables. :param entities: List of entities from atlas client :return: a list of PopularTable objects """ popular_tables = list() for table in entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables
def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]: """ :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_tables = list() popular_query_params = { 'typeName': 'Table', 'sortBy': 'popularityScore', 'sortOrder': 'DESCENDING', 'excludeDeletedEntities': True, 'limit': num_entries } search_results = self._driver.search_basic.create( data=popular_query_params) for table in search_results.entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.typeName, cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description') or table_attrs.get('comment')) popular_tables.append(popular_table) return popular_tables
def _prepare_tables(self, response: EntityCollection, enhance_metadata: bool = False) -> List[Table]: """ Based on an Atlas {response} with table entities, we render Table objects. :param response: Collection of Atlas Entities :param enhance_metadata: Should Atlas be queried to acquire complete entity definitions (search might not return all available attributes) :return: List of Table objects """ result = list() # if condition is satisfied then we query Atlas again to collect all available information regarding each table # along with relationship information. This is helpful when using Atlas DSL as returned entities contain minimal # amount of attributes. if enhance_metadata: ids = list() for hit in response: ids.append(hit.guid) entities = self._extract_entities( self.atlas.entity_bulk(guid=ids, ignoreRelationships=False)) else: entities = response for entity in entities: entity_attrs = entity.attributes qn = parse_table_qualified_name( qualified_name=entity_attrs.get(self.ATLAS_QN_ATTRIBUTE)) entity_name = qn.get('table_name') or entity_attrs.get('name') db_name = qn.get('db_name', '') db_cluster = qn.get('cluster_name', '') tags: List[Tag] = [] for classification in entity.classificationNames or list(): tags.append(Tag(tag_name=classification)) badges: List[Tag] = tags table = Table( name=entity_name, key=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}", description=entity_attrs.get('description'), cluster=db_cluster, database=entity.typeName, schema=db_name, tags=tags, badges=badges, column_names=[], last_updated_timestamp=entity_attrs.get('updateTime')) result.append(table) return result
def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]: """ :param num_entries: Number of popular tables to fetch :return: A List of popular tables instances """ popular_tables = list() query_metadata_ids = { 'query': f'FROM Table SELECT metadata.__guid ' f'ORDERBY popularityScore desc ' f'LIMIT {num_entries}' } metadata_entities = self._get_metadata_entities(query_metadata_ids) for metadata in metadata_entities: table = metadata.relationshipAttributes.get("parentEntity") table_attrs = table.get(self.ATTRS_KEY) table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') popular_table = PopularTable( database=table.get("typeName"), cluster=db_cluster, schema=db_name, name=table_name, description=table_attrs.get('description')) popular_tables.append(popular_table) return popular_tables
def get_table(self, *, table_uri: str) -> Table: """ Gathers all the information needed for the Table Detail Page. :param table_uri: :return: A Table object with all the information available or gathered from different entities. """ entity = self._get_table_entity(table_uri=table_uri) table_details = entity.entity try: attrs = table_details[self.ATTRS_KEY] programmatic_descriptions = self._get_programmatic_descriptions( attrs.get('parameters')) table_qn = parse_table_qualified_name( qualified_name=attrs.get(self.QN_KEY)) tags = [] # Using or in case, if the key 'classifications' is there with a None for classification in table_details.get( "classifications") or list(): tags.append( Tag(tag_name=classification.get('typeName'), tag_type="default")) columns = self._serialize_columns(entity=entity) reports_guids = [ report.get("guid") for report in attrs.get("reports") or list() ] table = Table( database=table_details.get('typeName'), cluster=table_qn.get('cluster_name', ''), schema=table_qn.get('db_name', ''), name=attrs.get('name') or table_qn.get("table_name", ''), tags=tags, description=attrs.get('description') or attrs.get('comment'), owners=[User(email=attrs.get('owner'))], resource_reports=self._get_reports(guids=reports_guids), columns=columns, table_readers=self._get_readers(attrs.get(self.QN_KEY)), last_updated_timestamp=self._parse_date( table_details.get('updateTime')), programmatic_descriptions=programmatic_descriptions) return table except KeyError as ex: LOGGER.exception( 'Error while accessing table information. {}'.format(str(ex))) raise BadRequest( 'Some of the required attributes ' 'are missing in : ( {table_uri} )'.format(table_uri=table_uri))
def _parse_results(self, response: EntityCollection) -> List[Table]: """ based on an atlas {response} with table entities, we map the required information :return: list of tables """ table_results = [] ids = list() for hit in response: ids.append(hit.guid) # Receive all entities, with attributes # FixMe: Can ask for the Description and Qualified Name # FixMe: in DSL query above, once it uses indexes entities = self._entities( self.atlas.entity_bulk(guid=ids, ignoreRelationships=True)) for table in entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') tags = [] # type: List[Tag] # Using or in case, if the key 'classifications' is there with attrs None for classification in table_attrs.get("classifications") or list(): tags.append(Tag(tag_name=classification.get('typeName'))) # TODO need to populate these badges = [] # type: List[Tag] # TODO: Implement columns: Not sure if we need this for the search results. columns: List[str] = [] # for column in attrs.get('columns') or list(): # col_entity = entity.referredEntities[column['guid']] # col_attrs = col_entity['attributes'] # columns.append(col_attrs.get(self.NAME_KEY)) # table_name = attrs.get(self.NAME_ATTRIBUTE) table = Table( name=table_name, key=f"{table.typeName}://{db_cluster}.{db_name}/{table_name}", description=table_attrs.get('description'), cluster=db_cluster, database=table.typeName, schema=db_name, column_names=columns, tags=tags, badges=badges, last_updated_timestamp=table_attrs.get('updateTime')) table_results.append(table) return table_results
def get_table_metadata(self, table_entity): """ database.table.metadata@cluster """ table_qn = table_entity.attributes.get("qualifiedName") table_info = parse_table_qualified_name(table_qn) table_guid = table_entity.guid metadata_qn = f'{table_info["db_name"]}.{table_info["table_name"]}.metadata@{table_info["cluster_name"]}' metadata_entity = {'typeName': 'table_metadata', 'attributes': {'qualifiedName': metadata_qn, 'popularityScore': 0, 'table': {'guid': table_guid}} } return metadata_entity
def _fetch_tables(self, query_params: Dict) -> Tuple[List[Table], int]: """ :param query_params: A dictionary of query parameter need to pass to Basic Search Post method of Atlas. :return: list of tables, along with the approximate count """ try: # Fetch the table entities based on query terms table_results = self.atlas.search_basic.create(data=query_params) except BadRequest as ex: LOGGER.error(f"Fetching Tables Failed : {str(ex)}") return [], 0 if not len(table_results.entities): return [], 0 # noinspection PyProtectedMember tables_count = table_results._data.get("approximateCount") tables = [] for table in table_results.entities: table_attrs = table.attributes table_qn = parse_table_qualified_name( qualified_name=table_attrs.get(self.QN_KEY)) table_name = table_qn.get("table_name") or table_attrs.get('name') db_name = table_qn.get("db_name", '') db_cluster = table_qn.get("cluster_name", '') table = Table( name=table_name, key=f"{table.typeName}://{db_cluster}.{db_name}/{table_name}", description=table_attrs.get('description') or table_attrs.get('comment'), cluster=db_cluster, database=table.typeName, schema=db_name, column_names=[], tags=[], badges=[], last_updated_timestamp=table_attrs.get('updateTime')) tables.append(table) return tables, tables_count
if not _regex_result: qn_regex = re.compile( r""" ^(?P<column_name>.*)@(?P<cluster_name>.*?)$ """, re.X) _regex_result = apply_qn_regex(qualified_name, qn_regex) if not _regex_result: qn_regex = re.compile(r""" ^(?P<column_name>.*)$ """, re.X) _regex_result = apply_qn_regex(qualified_name, qn_regex) _regex_result = _regex_result.groupdict() qn_dict = { 'column_name': _regex_result.get('column_name', qualified_name), 'table_name': _regex_result.get('table_name', "default"), 'db_name': _regex_result.get('db_name', "default"), 'cluster_name': _regex_result.get('cluster_name', "default"), } return qn_dict x = 'sakila.customer.address@clx' print(parse_column_qualified_name(x)) table = 'sakila.customer@clx' print(parse_table_qualified_name(table))
def test_parse_table_qn(self): qualified_name = '{}.{}@{}'.format(DB, TB, CL) qn_dict = parse_table_qualified_name(qualified_name) assert qn_dict['db_name'] == DB assert qn_dict['cluster_name'] == CL assert qn_dict['table_name'] == TB
def test_parse_table_qn_only_table(self): qualified_name = '{}'.format(TB) qn_dict = parse_table_qualified_name(qualified_name) assert qn_dict['db_name'] == DEFAULT_DB_CLUSTER assert qn_dict['cluster_name'] == DEFAULT_DB_CLUSTER assert qn_dict['table_name'] == TB
def test_parse_table_qn_without_cluster(self): qualified_name = '{}.{}'.format(DB, TB) qn_dict = parse_table_qualified_name(qualified_name) assert qn_dict['db_name'] == DB assert qn_dict['cluster_name'] == DEFAULT_DB_CLUSTER assert qn_dict['table_name'] == TB
def test_parse_table_qn_without_db(self): qualified_name = '{}@{}'.format(TB, CL) qn_dict = parse_table_qualified_name(qualified_name) assert qn_dict['db_name'] == DEFAULT_DB_CLUSTER assert qn_dict['cluster_name'] == CL assert qn_dict['table_name'] == TB