예제 #1
0
    def _serialize_popular_tables(self, entities: list) -> List[PopularTable]:
        """
        Gets a list of entities and serialize the popular tables.
        :param entities: List of entities from atlas client
        :return: a list of PopularTable objects
        """
        popular_tables = list()
        for table in entities:
            table_attrs = table.attributes

            table_qn = parse_table_qualified_name(
                qualified_name=table_attrs.get(self.QN_KEY))

            table_name = table_qn.get("table_name") or table_attrs.get('name')
            db_name = table_qn.get("db_name", '')
            db_cluster = table_qn.get("cluster_name", '')

            popular_table = PopularTable(
                database=table.typeName,
                cluster=db_cluster,
                schema=db_name,
                name=table_name,
                description=table_attrs.get('description')
                or table_attrs.get('comment'))
            popular_tables.append(popular_table)

        return popular_tables
    def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]:
        """
        :param num_entries: Number of popular tables to fetch
        :return: A List of popular tables instances
        """
        popular_tables = list()
        popular_query_params = {
            'typeName': 'Table',
            'sortBy': 'popularityScore',
            'sortOrder': 'DESCENDING',
            'excludeDeletedEntities': True,
            'limit': num_entries
        }
        search_results = self._driver.search_basic.create(
            data=popular_query_params)
        for table in search_results.entities:
            table_attrs = table.attributes

            table_qn = parse_table_qualified_name(
                qualified_name=table_attrs.get(self.QN_KEY))

            table_name = table_qn.get("table_name") or table_attrs.get('name')
            db_name = table_qn.get("db_name", '')
            db_cluster = table_qn.get("cluster_name", '')

            popular_table = PopularTable(
                database=table.typeName,
                cluster=db_cluster,
                schema=db_name,
                name=table_name,
                description=table_attrs.get('description')
                or table_attrs.get('comment'))
            popular_tables.append(popular_table)

        return popular_tables
예제 #3
0
    def _prepare_tables(self,
                        response: EntityCollection,
                        enhance_metadata: bool = False) -> List[Table]:
        """
        Based on an Atlas {response} with table entities, we render Table objects.

        :param response: Collection of Atlas Entities
        :param enhance_metadata: Should Atlas be queried to acquire complete entity definitions (search might not
        return all available attributes)
        :return: List of Table objects
        """

        result = list()

        # if condition is satisfied then we query Atlas again to collect all available information regarding each table
        # along with relationship information. This is helpful when using Atlas DSL as returned entities contain minimal
        # amount of attributes.
        if enhance_metadata:
            ids = list()

            for hit in response:
                ids.append(hit.guid)

            entities = self._extract_entities(
                self.atlas.entity_bulk(guid=ids, ignoreRelationships=False))
        else:
            entities = response

        for entity in entities:
            entity_attrs = entity.attributes

            qn = parse_table_qualified_name(
                qualified_name=entity_attrs.get(self.ATLAS_QN_ATTRIBUTE))

            entity_name = qn.get('table_name') or entity_attrs.get('name')
            db_name = qn.get('db_name', '')
            db_cluster = qn.get('cluster_name', '')

            tags: List[Tag] = []

            for classification in entity.classificationNames or list():
                tags.append(Tag(tag_name=classification))

            badges: List[Tag] = tags

            table = Table(
                name=entity_name,
                key=f"{entity.typeName}://{db_cluster}.{db_name}/{entity_name}",
                description=entity_attrs.get('description'),
                cluster=db_cluster,
                database=entity.typeName,
                schema=db_name,
                tags=tags,
                badges=badges,
                column_names=[],
                last_updated_timestamp=entity_attrs.get('updateTime'))

            result.append(table)

        return result
예제 #4
0
    def get_popular_tables(self, *, num_entries: int) -> List[PopularTable]:
        """
        :param num_entries: Number of popular tables to fetch
        :return: A List of popular tables instances
        """
        popular_tables = list()
        query_metadata_ids = {
            'query':
            f'FROM Table SELECT metadata.__guid '
            f'ORDERBY popularityScore desc '
            f'LIMIT {num_entries}'
        }

        metadata_entities = self._get_metadata_entities(query_metadata_ids)

        for metadata in metadata_entities:
            table = metadata.relationshipAttributes.get("parentEntity")
            table_attrs = table.get(self.ATTRS_KEY)

            table_qn = parse_table_qualified_name(
                qualified_name=table_attrs.get(self.QN_KEY))

            table_name = table_qn.get("table_name") or table_attrs.get('name')
            db_name = table_qn.get("db_name", '')
            db_cluster = table_qn.get("cluster_name", '')

            popular_table = PopularTable(
                database=table.get("typeName"),
                cluster=db_cluster,
                schema=db_name,
                name=table_name,
                description=table_attrs.get('description'))
            popular_tables.append(popular_table)

        return popular_tables
    def get_table(self, *, table_uri: str) -> Table:
        """
        Gathers all the information needed for the Table Detail Page.
        :param table_uri:
        :return: A Table object with all the information available
        or gathered from different entities.
        """
        entity = self._get_table_entity(table_uri=table_uri)
        table_details = entity.entity

        try:
            attrs = table_details[self.ATTRS_KEY]

            programmatic_descriptions = self._get_programmatic_descriptions(
                attrs.get('parameters'))

            table_qn = parse_table_qualified_name(
                qualified_name=attrs.get(self.QN_KEY))

            tags = []
            # Using or in case, if the key 'classifications' is there with a None
            for classification in table_details.get(
                    "classifications") or list():
                tags.append(
                    Tag(tag_name=classification.get('typeName'),
                        tag_type="default"))

            columns = self._serialize_columns(entity=entity)

            reports_guids = [
                report.get("guid")
                for report in attrs.get("reports") or list()
            ]

            table = Table(
                database=table_details.get('typeName'),
                cluster=table_qn.get('cluster_name', ''),
                schema=table_qn.get('db_name', ''),
                name=attrs.get('name') or table_qn.get("table_name", ''),
                tags=tags,
                description=attrs.get('description') or attrs.get('comment'),
                owners=[User(email=attrs.get('owner'))],
                resource_reports=self._get_reports(guids=reports_guids),
                columns=columns,
                table_readers=self._get_readers(attrs.get(self.QN_KEY)),
                last_updated_timestamp=self._parse_date(
                    table_details.get('updateTime')),
                programmatic_descriptions=programmatic_descriptions)

            return table
        except KeyError as ex:
            LOGGER.exception(
                'Error while accessing table information. {}'.format(str(ex)))
            raise BadRequest(
                'Some of the required attributes '
                'are missing in : ( {table_uri} )'.format(table_uri=table_uri))
예제 #6
0
    def _parse_results(self, response: EntityCollection) -> List[Table]:
        """
        based on an atlas {response} with table entities, we map the required information
        :return: list of tables
        """
        table_results = []
        ids = list()
        for hit in response:
            ids.append(hit.guid)
        # Receive all entities, with attributes
        # FixMe: Can ask for the Description and Qualified Name
        # FixMe: in DSL query above, once it uses indexes
        entities = self._entities(
            self.atlas.entity_bulk(guid=ids, ignoreRelationships=True))

        for table in entities:
            table_attrs = table.attributes

            table_qn = parse_table_qualified_name(
                qualified_name=table_attrs.get(self.QN_KEY))

            table_name = table_qn.get("table_name") or table_attrs.get('name')
            db_name = table_qn.get("db_name", '')
            db_cluster = table_qn.get("cluster_name", '')

            tags = []  # type: List[Tag]
            # Using or in case, if the key 'classifications' is there with attrs None
            for classification in table_attrs.get("classifications") or list():
                tags.append(Tag(tag_name=classification.get('typeName')))

            # TODO need to populate these
            badges = []  # type: List[Tag]

            # TODO: Implement columns: Not sure if we need this for the search results.
            columns: List[str] = []
            # for column in attrs.get('columns') or list():
            #     col_entity = entity.referredEntities[column['guid']]
            #     col_attrs = col_entity['attributes']
            #     columns.append(col_attrs.get(self.NAME_KEY))
            # table_name = attrs.get(self.NAME_ATTRIBUTE)
            table = Table(
                name=table_name,
                key=f"{table.typeName}://{db_cluster}.{db_name}/{table_name}",
                description=table_attrs.get('description'),
                cluster=db_cluster,
                database=table.typeName,
                schema=db_name,
                column_names=columns,
                tags=tags,
                badges=badges,
                last_updated_timestamp=table_attrs.get('updateTime'))

            table_results.append(table)

        return table_results
예제 #7
0
    def get_table_metadata(self, table_entity):
        """
        database.table.metadata@cluster
        """
        table_qn = table_entity.attributes.get("qualifiedName")
        table_info = parse_table_qualified_name(table_qn)
        table_guid = table_entity.guid

        metadata_qn = f'{table_info["db_name"]}.{table_info["table_name"]}.metadata@{table_info["cluster_name"]}'

        metadata_entity = {'typeName': 'table_metadata',
                           'attributes': {'qualifiedName': metadata_qn,
                                          'popularityScore': 0,
                                          'table': {'guid': table_guid}}
                           }
        return metadata_entity
예제 #8
0
    def _fetch_tables(self, query_params: Dict) -> Tuple[List[Table], int]:
        """
        :param query_params: A dictionary of query parameter need to pass
        to Basic Search Post method of Atlas.
        :return: list of tables, along with the approximate count
        """
        try:
            # Fetch the table entities based on query terms
            table_results = self.atlas.search_basic.create(data=query_params)
        except BadRequest as ex:
            LOGGER.error(f"Fetching Tables Failed : {str(ex)}")
            return [], 0

        if not len(table_results.entities):
            return [], 0

        # noinspection PyProtectedMember
        tables_count = table_results._data.get("approximateCount")

        tables = []
        for table in table_results.entities:
            table_attrs = table.attributes

            table_qn = parse_table_qualified_name(
                qualified_name=table_attrs.get(self.QN_KEY))

            table_name = table_qn.get("table_name") or table_attrs.get('name')
            db_name = table_qn.get("db_name", '')
            db_cluster = table_qn.get("cluster_name", '')
            table = Table(
                name=table_name,
                key=f"{table.typeName}://{db_cluster}.{db_name}/{table_name}",
                description=table_attrs.get('description')
                or table_attrs.get('comment'),
                cluster=db_cluster,
                database=table.typeName,
                schema=db_name,
                column_names=[],
                tags=[],
                badges=[],
                last_updated_timestamp=table_attrs.get('updateTime'))

            tables.append(table)

        return tables, tables_count
예제 #9
0
    if not _regex_result:
        qn_regex = re.compile(
            r"""
        ^(?P<column_name>.*)@(?P<cluster_name>.*?)$
        """, re.X)
        _regex_result = apply_qn_regex(qualified_name, qn_regex)

    if not _regex_result:
        qn_regex = re.compile(r"""
        ^(?P<column_name>.*)$
        """, re.X)
        _regex_result = apply_qn_regex(qualified_name, qn_regex)

    _regex_result = _regex_result.groupdict()

    qn_dict = {
        'column_name': _regex_result.get('column_name', qualified_name),
        'table_name': _regex_result.get('table_name', "default"),
        'db_name': _regex_result.get('db_name', "default"),
        'cluster_name': _regex_result.get('cluster_name', "default"),
    }

    return qn_dict


x = 'sakila.customer.address@clx'
print(parse_column_qualified_name(x))

table = 'sakila.customer@clx'
print(parse_table_qualified_name(table))
예제 #10
0
 def test_parse_table_qn(self):
     qualified_name = '{}.{}@{}'.format(DB, TB, CL)
     qn_dict = parse_table_qualified_name(qualified_name)
     assert qn_dict['db_name'] == DB
     assert qn_dict['cluster_name'] == CL
     assert qn_dict['table_name'] == TB
예제 #11
0
 def test_parse_table_qn_only_table(self):
     qualified_name = '{}'.format(TB)
     qn_dict = parse_table_qualified_name(qualified_name)
     assert qn_dict['db_name'] == DEFAULT_DB_CLUSTER
     assert qn_dict['cluster_name'] == DEFAULT_DB_CLUSTER
     assert qn_dict['table_name'] == TB
예제 #12
0
 def test_parse_table_qn_without_cluster(self):
     qualified_name = '{}.{}'.format(DB, TB)
     qn_dict = parse_table_qualified_name(qualified_name)
     assert qn_dict['db_name'] == DB
     assert qn_dict['cluster_name'] == DEFAULT_DB_CLUSTER
     assert qn_dict['table_name'] == TB
예제 #13
0
 def test_parse_table_qn_without_db(self):
     qualified_name = '{}@{}'.format(TB, CL)
     qn_dict = parse_table_qualified_name(qualified_name)
     assert qn_dict['db_name'] == DEFAULT_DB_CLUSTER
     assert qn_dict['cluster_name'] == CL
     assert qn_dict['table_name'] == TB