def make_entries_for_database(self, database_metadata):
        entry_id = '{}'.format(database_metadata.name)
        # Force lowercase since hive is case insensitive
        entry_id = entry_id.lower()

        entry = datacatalog.Entry()

        entry.user_specified_type = 'database'
        entry.user_specified_system = 'hive'

        entry.display_name = database_metadata.name

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        database_desc = database_metadata.desc
        if isinstance(database_desc, str):
            entry.description = database_desc
        entry.linked_resource = \
            '//{}//{}'.format(
                self.__metadata_host_server,
                database_metadata.uri
            )

        return entry_id, entry
Esempio n. 2
0
    def test_run_should_process_scrape_prepare_ingest_workflow(
            self, mock_scraper, mock_assembled_entry_factory, mock_mapper,
            mock_cleaner, mock_ingestor):

        assembled_entry_factory = mock_assembled_entry_factory.return_value
        assembled_entry_factory.make_entries_for_dashboards.return_value = [
            (prepare.AssembledEntryData('test-entry-id-1', datacatalog.Entry(),
                                        []))
        ]

        dashboards_synchronizer.DashboardsSynchronizer(
            tableau_server_address='test-server',
            tableau_api_version='test-api-version',
            tableau_username='******',
            tableau_password='******',
            tableau_site='test-site',
            datacatalog_project_id='test-project-id',
            datacatalog_location_id='test-location-id').run()

        scraper = mock_scraper.return_value
        scraper.scrape_dashboards.assert_called_once()

        assembled_entry_factory.make_entries_for_dashboards\
            .assert_called_once()
        mock_mapper.return_value.fulfill_tag_fields.assert_called_once()
        mock_cleaner.return_value.delete_obsolete_metadata.assert_called_once()
        mock_ingestor.return_value.ingest_metadata.assert_called_once()
Esempio n. 3
0
    def make_entry_for_measure(self, measure_metadata):
        entry = datacatalog.Entry()

        app_metadata = measure_metadata.get('app')

        # The Measure ID is usually a 7 letters string, so the App ID is
        # prepended to prevent overlapping.
        generated_id = self.__format_id(
            constants.ENTRY_ID_PART_MEASURE, f'{app_metadata.get("id")}'
            f'_{measure_metadata.get("qInfo").get("qId")}')
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_MEASURE

        q_meta_def = measure_metadata.get('qMetaDef')

        entry.display_name = self._format_display_name(q_meta_def.get('title'))
        entry.description = q_meta_def.get('description')

        # The linked_resource field is not fulfilled because there is no way to
        # jump directly to a Measure 'edit' page in Qlik Sense.

        # The create_time and update_time fields are not fulfilled because
        # there is no such info in the Measure metadata.

        return generated_id, entry
Esempio n. 4
0
    def make_entry_for_look(self, look):
        entry = datacatalog.Entry()

        generated_id = self.__format_id(constants.ENTRY_ID_PART_LOOK, look.id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_LOOK

        entry.display_name = self._format_display_name(look.title)

        entry.linked_resource = f'{self.__instance_url}/looks/{look.id}'

        created_timestamp = timestamp_pb2.Timestamp()
        created_timestamp.FromDatetime(look.created_at)
        entry.source_system_timestamps.create_time = created_timestamp

        updated_datetime = \
            look.updated_at if look.updated_at else look.created_at
        updated_timestamp = timestamp_pb2.Timestamp()
        # TODO Evaluate/remove "+ 10" after b/144041881 has been closed.
        updated_timestamp.FromDatetime(updated_datetime +
                                       datetime.timedelta(seconds=10))
        entry.source_system_timestamps.update_time = updated_timestamp

        return generated_id, entry
    def make_entry_for_visualization(self, visualization_metadata):
        entry = datacatalog.Entry()

        viz_id = visualization_metadata.get('qInfo').get('qId')
        generated_id = self.__format_id(constants.ENTRY_ID_PART_VISUALIZATION,
                                        viz_id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_VISUALIZATION

        q_meta_def = visualization_metadata.get('qMetaDef')

        entry.display_name = self._format_display_name(q_meta_def.get('title'))
        entry.description = q_meta_def.get('description')

        # The linked_resource field is not fulfilled because Data Catalog
        # currently does not accept ``?`` and ``=`` in the field value.
        # The below statements can be uncommented once
        # https://issuetracker.google.com/issues/176912978
        # has been fixed.
        #
        # app_id = visualization_metadata.get('app').get('id')
        # entry.linked_resource = f'{self.__site_url}/sense/single' \
        #                         f'?appid={app_id}' \
        #                         f'&obj={viz_id}'

        # The create_time and update_time fields are not fulfilled because
        # there is no such info in the Visualization metadata.

        return generated_id, entry
    def create_entry_user_defined_type(cls,
                                       entry_type,
                                       system,
                                       display_name,
                                       name,
                                       description,
                                       linked_resource,
                                       create_time_seconds,
                                       update_time_seconds,
                                       columns=None):

        entry = datacatalog.Entry()

        entry.user_specified_type = entry_type
        entry.user_specified_system = system

        entry.display_name = display_name
        entry.name = name

        create_timestamp = timestamp_pb2.Timestamp()
        create_timestamp.FromSeconds(create_time_seconds)
        update_timestamp = timestamp_pb2.Timestamp()
        update_timestamp.FromSeconds(update_time_seconds)
        entry.source_system_timestamps.create_time = create_timestamp
        entry.source_system_timestamps.update_time = update_timestamp

        entry.description = description
        entry.linked_resource = linked_resource

        if columns:
            entry.schema.columns.extend(columns)

        return entry
Esempio n. 7
0
    def make_entry_for_dashboard(self, dashboard):
        entry = datacatalog.Entry()

        generated_id = self.__format_id(constants.ENTRY_ID_PART_DASHBOARD,
                                        dashboard.id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_DASHBOARD

        entry.display_name = self._format_display_name(dashboard.title)

        entry.linked_resource = \
            f'{self.__instance_url}/dashboards/{dashboard.id}'

        if dashboard.created_at:  # LookML dashboards come with None
            created_timestamp = timestamp_pb2.Timestamp()
            created_timestamp.FromDatetime(dashboard.created_at)
            entry.source_system_timestamps.create_time = created_timestamp

            updated_timestamp = timestamp_pb2.Timestamp()
            # TODO Evaluate/remove "+ 10" after b/144041881 has been closed.
            updated_timestamp.FromDatetime(dashboard.created_at +
                                           datetime.timedelta(seconds=10))
            entry.source_system_timestamps.update_time = updated_timestamp

        else:
            logging.info('Dashboard "%s" has no created_at information!',
                         dashboard.id)

        return generated_id, entry
    def create_user_defined_entry(cls,
                                  entry_id,
                                  entry_type,
                                  system,
                                  display_name,
                                  name,
                                  description,
                                  linked_resource,
                                  create_time_seconds,
                                  update_time_seconds,
                                  tags=None):
        entry = datacatalog.Entry()

        entry.user_specified_type = entry_type
        entry.user_specified_system = system

        entry.display_name = display_name

        entry.name = name

        create_timestamp = timestamp_pb2.Timestamp()
        create_timestamp.FromSeconds(create_time_seconds)
        update_timestamp = timestamp_pb2.Timestamp()
        update_timestamp.FromSeconds(update_time_seconds)
        entry.source_system_timestamps.create_time = create_timestamp
        entry.source_system_timestamps.update_time = update_timestamp

        entry.description = description
        entry.linked_resource = linked_resource
        return ingest.AssembledEntryData(entry_id, entry, tags)
Esempio n. 9
0
    def make_entry_for_entity(self, entity):
        entry = datacatalog.Entry()

        guid = entity['guid']
        data = entity['data']
        type_name = attr_normalizer.DataCatalogAttributeNormalizer.format_name(
            data['typeName'])

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = type_name

        generated_id, name, location, columns = self.__get_entry_attributes(
            entry, guid, data)

        # ADD type to generated_id since ids can be reused between types.
        generated_id = '{}_{}'.format(type_name, generated_id)

        self.__set_entry_names(entry, generated_id, name)

        self.__set_linked_resource(entry, generated_id, location, type_name)

        self.__set_source_timestamp_fields(entry, data, generated_id)

        self.__create_schema(entry, columns)

        return generated_id, entry
Esempio n. 10
0
    def make_entry_for_sql_object(self, sql_object_key, sql_object_type,
                                  sql_object_item):
        sql_object_config = self.__sql_objects_config[sql_object_key]

        metadata_def = sql_object_config[
            constants.SQL_OBJECT_ITEM_METADATA_DEF_KEY]

        name = sql_object_item[constants.SQL_OBJECT_ITEM_NAME]

        entry_id = self._format_id(name)
        entry = datacatalog.Entry()

        entry.user_specified_type = sql_object_type
        entry.user_specified_system = self.__entry_group_id

        entry.display_name = self._format_display_name(name)

        sql_object_fields = metadata_def[constants.SQL_OBJECT_FIELDS]

        sql_object_fields = self.__filter_entry_model_fields(sql_object_fields)

        self.__set_entry_system_timestamps(entry, sql_object_fields,
                                           sql_object_item)

        self.__set_entry_description(entry, sql_object_fields, sql_object_item)

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        entry.linked_resource = '{}/{}'.format(
            self.__entry_resource_url_prefix, entry_id)

        return entry_id, entry
Esempio n. 11
0
    def make_entry_for_stream(self, stream_metadata):
        entry = datacatalog.Entry()

        generated_id = self.__format_id(constants.ENTRY_ID_PART_STREAM,
                                        stream_metadata.get('id'))
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_STREAM

        entry.display_name = self._format_display_name(
            stream_metadata.get('name'))

        entry.linked_resource = f'{self.__site_url}' \
                                f'/hub/stream/{stream_metadata.get("id")}'

        created_datetime = datetime.strptime(
            stream_metadata.get('createdDate'),
            self.__INCOMING_TIMESTAMP_UTC_FORMAT)
        create_timestamp = timestamp_pb2.Timestamp()
        create_timestamp.FromDatetime(created_datetime)
        entry.source_system_timestamps.create_time = create_timestamp

        modified_date = stream_metadata.get('modifiedDate')
        resolved_modified_date = modified_date or stream_metadata.get(
            'createdDate')
        modified_datetime = datetime.strptime(
            resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT)
        update_timestamp = timestamp_pb2.Timestamp()
        update_timestamp.FromDatetime(modified_datetime)
        entry.source_system_timestamps.update_time = update_timestamp

        return generated_id, entry
    def make_entries_for_database(self, database_metadata):
        entry_id = self._format_id_with_hashing(
            database_metadata.name.lower(),
            regex_pattern=self.__ENTRY_ID_INVALID_CHARS_REGEX_PATTERN)

        entry = datacatalog.Entry()

        entry.user_specified_type = 'database'
        entry.user_specified_system = 'hive'

        entry.display_name = self._format_display_name(database_metadata.name)

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        database_desc = database_metadata.desc
        if isinstance(database_desc, str):
            entry.description = database_desc
        entry.linked_resource = \
            self._format_linked_resource('//{}//{}'.format(
                self.__metadata_host_server,
                database_metadata.uri
            ))

        return entry_id, entry
    def make_entry_for_tables(self, table, table_container_name):
        """Create Datacatalog entries from a table dict.

         :param table:
         :param table_container_name:
         :return: entry_id, entry
        """

        entry_id = self._format_id('{}__{}'.format(table_container_name,
                                                   table['name']))

        entry = datacatalog.Entry()

        entry.user_specified_type = self.__metadata_definition['table_def'][
            'type']
        entry.user_specified_system = self.__entry_group_id

        entry.display_name = self._format_display_name(table['name'])

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        desc = table.get('desc')
        if pd.isna(desc):
            desc = ''

        entry.description = desc

        entry.linked_resource = '//{}//{}'.format(
            self.__metadata_host_server, self._format_id(table['name']))

        create_time, update_time = \
            DataCatalogEntryFactory.__convert_source_system_timestamp_fields(
                table.get('create_time'),
                table.get('update_time'))
        if create_time and update_time:
            created_timestamp = timestamp_pb2.Timestamp()
            created_timestamp.FromSeconds(create_time)
            entry.source_system_timestamps.create_time = created_timestamp

            updated_timestamp = timestamp_pb2.Timestamp()
            updated_timestamp.FromSeconds(update_time)
            entry.source_system_timestamps.update_time = updated_timestamp

        columns = []
        for column in table['columns']:
            desc = column.get('desc')
            if pd.isna(desc):
                desc = ''
            columns.append(
                datacatalog.ColumnSchema(
                    column=self._format_id(column['name']),
                    description=desc,
                    type=DataCatalogEntryFactory.__format_entry_column_type(
                        column['type'])))
        entry.schema.columns.extend(columns)

        return entry_id, entry
def make_fake_entry(entry_id: str,
                    linked_resource: Optional[str] = None,
                    user_specified_type: Optional[str] = None) -> Entry:

    entry = datacatalog.Entry()
    entry.name = f'fake_entries/{entry_id}'
    entry.linked_resource = linked_resource
    entry.user_specified_type = user_specified_type
    entry.schema = datacatalog.Schema()
    return entry
    def test_lookup_entry_should_return_datacatalog_client_result(self):
        fake_entry = datacatalog.Entry()
        fake_entry.linked_resource = 'linked_resource'

        datacatalog_client = self.__datacatalog_client
        datacatalog_client.lookup_entry.return_value = fake_entry

        entry = self.__datacatalog_facade.lookup_entry('linked_resource')

        self.assertEqual(fake_entry, entry)
    def make_entry_for_table(self, table_metadata, database_name):
        entry_id = '{}__{}'.format(database_name, table_metadata.name)
        # Force lowercase since hive is case insensitive
        entry_id = entry_id.lower()

        entry = datacatalog.Entry()

        entry.user_specified_type = 'table'
        entry.user_specified_system = 'hive'

        entry.display_name = table_metadata.name

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        # For now we are using the first table_storage relationship,
        # with table partitions we might have to deal
        # with more than one record
        table_storage = table_metadata.table_storages[0]

        entry.linked_resource = \
            '//{}//{}'.format(self.__metadata_host_server,
                              table_storage.location)

        created_timestamp = timestamp_pb2.Timestamp()
        created_timestamp.FromSeconds(table_metadata.create_time)

        entry.source_system_timestamps.create_time = created_timestamp

        update_time_seconds = \
            DataCatalogEntryFactory. \
            __extract_update_time_from_table_metadata(table_metadata)
        if update_time_seconds is not None:
            updated_timestamp = timestamp_pb2.Timestamp()
            updated_timestamp.FromSeconds(update_time_seconds)

            entry.source_system_timestamps.update_time = updated_timestamp
        else:
            entry.source_system_timestamps.update_time = created_timestamp

        columns = []
        for column in table_storage.columns:
            columns.append(
                datacatalog.ColumnSchema(
                    column=column.name,
                    type=DataCatalogEntryFactory.__format_entry_column_type(
                        column.type),
                    description=column.comment))
        entry.schema.columns.extend(columns)

        return entry_id, entry
Esempio n. 17
0
    def test_print_dependency_finder_results_should_get_asset_metadata_values(
            self, mock_get_asset_metadata_value):

        # given
        fake_find_results = {'entries/test-entry': (datacatalog.Entry(), [])}

        # when
        addons.ElastiCubeDependencyPrinter.print_dependency_finder_results(
            fake_find_results)

        # then
        mock_get_asset_metadata_value.assert_has_calls(
            [mock.call([], 'dashboard_title'),
             mock.call([], 'datasource')])
    def make_entry_for_sheet(self, sheet_metadata, workbook_metadata):
        entry = datacatalog.Entry()

        luid = sheet_metadata.get('luid')
        if luid:
            generated_id = self.__format_id(luid)
        else:
            generated_id = self.__format_id(sheet_metadata.get('id'))
            logging.info(
                'Sheet "%s" is hidden in the Workbook and does not have an'
                ' luid. Using its id attribute as a fallback...',
                sheet_metadata.get('name'))

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_SHEET

        entry.display_name = self._format_display_name(
            sheet_metadata.get('name'))

        # A null path means the Sheet is hidden and included in a Dashboard,
        # or deleted from the server but still remains in the Workbook.
        path = sheet_metadata.get('path')
        if path:
            site_content_url = self.__format_site_content_url(
                workbook_metadata)
            entry.linked_resource = \
                f'{self.__server_address}/#{site_content_url}/views/{path}'

        created_at = sheet_metadata.get('createdAt')
        if created_at:
            created_datetime = datetime.strptime(
                created_at, self.__INCOMING_TIMESTAMP_FORMAT)
            create_timestamp = timestamp_pb2.Timestamp()
            create_timestamp.FromDatetime(created_datetime)
            entry.source_system_timestamps.create_time = create_timestamp

        updated_at = sheet_metadata.get('updatedAt')
        if updated_at:
            updated_datetime = datetime.strptime(
                updated_at, self.__INCOMING_TIMESTAMP_FORMAT)
            update_timestamp = timestamp_pb2.Timestamp()
            update_timestamp.FromDatetime(updated_datetime)
            entry.source_system_timestamps.update_time = update_timestamp

        return generated_id, entry
    def make_entry_for_table(self, table_metadata, database_name):
        entry_id = self.__make_entry_id_for_table(database_name,
                                                  table_metadata)

        entry = datacatalog.Entry()

        entry.user_specified_type = 'table'
        entry.user_specified_system = 'hive'

        entry.display_name = self._format_display_name(table_metadata.name)

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        table_storage = table_metadata.table_storages[0]

        entry.linked_resource = \
            self._format_linked_resource(
                '//{}//{}'.format(self.__metadata_host_server,
                                  table_storage.location))

        created_timestamp = timestamp_pb2.Timestamp()
        created_timestamp.FromSeconds(table_metadata.create_time)

        entry.source_system_timestamps.create_time = created_timestamp

        update_time_seconds = \
            DataCatalogEntryFactory. \
            __extract_update_time_from_table_metadata(table_metadata)
        if update_time_seconds is not None:
            updated_timestamp = timestamp_pb2.Timestamp()
            updated_timestamp.FromSeconds(update_time_seconds)

            entry.source_system_timestamps.update_time = updated_timestamp
        else:
            entry.source_system_timestamps.update_time = created_timestamp

        columns = []
        for column in table_storage.columns:
            columns.append(
                datacatalog.ColumnSchema(
                    column=column.name,
                    type=DataCatalogEntryFactory.__format_entry_column_type(
                        column.type),
                    description=column.comment))
        entry.schema.columns.extend(columns)

        return entry_id, entry
Esempio n. 20
0
    def test_print_dependency_finder_results_should_filter_jaql_tags(
            self, mock_filter_jaql_tags):

        # given
        fake_tag = mocks.make_fake_tag()
        fake_find_results = {
            'entries/test-entry': (datacatalog.Entry(), [fake_tag])
        }

        # when
        addons.ElastiCubeDependencyPrinter.print_dependency_finder_results(
            fake_find_results)

        # then
        mock_filter_jaql_tags.assert_called_once_with([fake_tag])
Esempio n. 21
0
    def make_entry_for_custom_property_definition(
            self, custom_property_def_metadata):

        entry = datacatalog.Entry()

        generated_id = self.__format_id(
            constants.ENTRY_ID_PART_CUSTOM_PROPERTY_DEFINITION,
            custom_property_def_metadata.get('id'))
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = \
            constants.USER_SPECIFIED_TYPE_CUSTOM_PROPERTY_DEFINITION

        entry.display_name = self._format_display_name(
            custom_property_def_metadata.get('name'))
        entry.description = custom_property_def_metadata.get('description')

        # The linked_resource field is not fulfilled because there is no way to
        # jump directly to an 'edit' page in the QlikView Management Console
        # (QMC). The the ID wee see in the URL of the Custom Property
        # Definition edit page is generated at the client side as a wrapper
        # around the object. The reason for this is: if someone select a bunch
        # of things in the QMC, it can't pick one, or have a list, so it
        # generates a new 'synthetic' key for the edit page.
        # -- from the Qlik Analytics Platform Architecture Team

        created_datetime = datetime.strptime(
            custom_property_def_metadata.get('createdDate'),
            self.__INCOMING_TIMESTAMP_UTC_FORMAT)
        create_timestamp = timestamp_pb2.Timestamp()
        create_timestamp.FromDatetime(created_datetime)
        entry.source_system_timestamps.create_time = create_timestamp

        modified_date = custom_property_def_metadata.get('modifiedDate')
        resolved_modified_date = \
            modified_date or custom_property_def_metadata.get('createdDate')
        modified_datetime = datetime.strptime(
            resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT)
        update_timestamp = timestamp_pb2.Timestamp()
        update_timestamp.FromDatetime(modified_datetime)
        entry.source_system_timestamps.update_time = update_timestamp

        return generated_id, entry
Esempio n. 22
0
    def make_entry_for_folder(self, folder):
        entry = datacatalog.Entry()

        generated_id = self.__format_id(constants.ENTRY_ID_PART_FOLDER,
                                        folder.id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_FOLDER

        entry.display_name = self._format_display_name(folder.name)

        entry.linked_resource = f'{self.__instance_url}/folders/{folder.id}'

        return generated_id, entry
    def make_entry_for_widget(
            self, widget_metadata: Dict[str, Any]) -> Tuple[str, Entry]:

        entry = datacatalog.Entry()

        widget_id = widget_metadata.get('oid')

        generated_id = self.__format_id(constants.ENTRY_ID_PART_WIDGET,
                                        widget_id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_WIDGET

        entry.display_name = self._format_display_name(
            widget_metadata.get('title') or self.__UNNAMED)
        entry.description = widget_metadata.get('desc')

        entry.linked_resource = f'{self.__server_address}' \
                                f'/app/main#/dashboards' \
                                f'/{widget_metadata.get("dashboardid")}' \
                                f'/widgets/{widget_id}'

        if widget_metadata.get('created'):
            created_datetime = datetime.strptime(
                widget_metadata.get('created'),
                self.__INCOMING_TIMESTAMP_UTC_FORMAT)
            create_timestamp = timestamp_pb2.Timestamp()
            create_timestamp.FromDatetime(created_datetime)
            entry.source_system_timestamps.create_time = create_timestamp

            modified_date = widget_metadata.get('lastUpdated')
            resolved_modified_date = modified_date or widget_metadata.get(
                'created')
            modified_datetime = datetime.strptime(
                resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT)
            update_timestamp = timestamp_pb2.Timestamp()
            update_timestamp.FromDatetime(modified_datetime)
            entry.source_system_timestamps.update_time = update_timestamp

        entry.schema = self.__make_schema_for_widget(widget_metadata)

        return generated_id, entry
Esempio n. 24
0
    def make_entry_for_query(self, query):
        entry = datacatalog.Entry()

        generated_id = self.__format_id(constants.ENTRY_ID_PART_QUERY,
                                        query.id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_QUERY

        entry.display_name = self._format_display_name(
            f'Query {query.id} - model {query.model} - explore {query.view}')

        entry.linked_resource = query.share_url

        return generated_id, entry
Esempio n. 25
0
    def test_print_dependency_finder_results_should_tabulate_tags_data(
            self, mock_filter_jaql_tags, mock_tabulate):

        # given
        fake_tag = mocks.make_fake_tag(
            string_fields=[('table', 'test-table'), ('column', 'test-column')])
        mock_filter_jaql_tags.return_value = [fake_tag]
        fake_find_results = {
            'entries/test-entry': (datacatalog.Entry(), [fake_tag])
        }

        # when
        addons.ElastiCubeDependencyPrinter.print_dependency_finder_results(
            fake_find_results)

        # then
        mock_filter_jaql_tags.assert_called_once_with([fake_tag])
        mock_tabulate.assert_called_once()
    def make_entries_for_table_container(self, table_container):
        """Create Datacatalog entries from a table container dict.

         :param table_container:
         :return: entry_id, entry
        """

        entry_id = self._format_id(table_container['name'])
        entry = datacatalog.Entry()

        entry.user_specified_type = self.__metadata_definition[
            'table_container_def']['type']
        entry.user_specified_system = self.__entry_group_id

        entry.display_name = self._format_display_name(table_container['name'])

        create_time, update_time = \
            DataCatalogEntryFactory.__convert_source_system_timestamp_fields(
                table_container.get('create_time'),
                table_container.get('update_time'))
        if create_time and update_time:
            created_timestamp = timestamp_pb2.Timestamp()
            created_timestamp.FromSeconds(create_time)
            entry.source_system_timestamps.create_time = created_timestamp

            updated_timestamp = timestamp_pb2.Timestamp()
            updated_timestamp.FromSeconds(update_time)
            entry.source_system_timestamps.update_time = updated_timestamp

        desc = table_container.get('desc')
        if pd.isna(desc):
            desc = ''

        entry.description = desc

        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            entry_id)

        entry.linked_resource = '//{}//{}'.format(self.__metadata_host_server,
                                                  entry_id)

        return entry_id, entry
    def make_entry_for_folder(
            self, folder_metadata: Dict[str, Any]) -> Tuple[str, Entry]:

        entry = datacatalog.Entry()

        # The root folder's ``oid`` field is not fulfilled.
        folder_id = folder_metadata.get('oid') or folder_metadata.get('name')

        generated_id = self.__format_id(constants.ENTRY_ID_PART_FOLDER,
                                        folder_id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_FOLDER

        entry.display_name = self._format_display_name(
            folder_metadata.get('name'))

        if folder_metadata.get('oid'):
            entry.linked_resource = f'{self.__server_address}' \
                                    f'/app/main#/home' \
                                    f'/{folder_metadata.get("oid")}'

        if folder_metadata.get('created'):
            created_datetime = datetime.strptime(
                folder_metadata.get('created'),
                self.__INCOMING_TIMESTAMP_UTC_FORMAT)
            create_timestamp = timestamp_pb2.Timestamp()
            create_timestamp.FromDatetime(created_datetime)
            entry.source_system_timestamps.create_time = create_timestamp

            modified_date = folder_metadata.get('lastUpdated')
            resolved_modified_date = modified_date or folder_metadata.get(
                'created')
            modified_datetime = datetime.strptime(
                resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT)
            update_timestamp = timestamp_pb2.Timestamp()
            update_timestamp.FromDatetime(modified_datetime)
            entry.source_system_timestamps.update_time = update_timestamp

        return generated_id, entry
    def make_entry_for_workbook(self, workbook_metadata):
        entry = datacatalog.Entry()

        generated_id = self.__format_id(workbook_metadata.get('luid'))
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_WORKBOOK

        entry.display_name = self._format_display_name(
            workbook_metadata.get('name'))
        entry.description = workbook_metadata.get('description')

        vizportal_url_id = workbook_metadata.get('vizportalUrlId')
        if vizportal_url_id:
            site_content_url = self.__format_site_content_url(
                workbook_metadata)
            entry.linked_resource = f'{self.__server_address}/' \
                f'#{site_content_url}/workbooks/{vizportal_url_id}'

        created_datetime = datetime.strptime(
            workbook_metadata.get('createdAt'),
            self.__INCOMING_TIMESTAMP_FORMAT)
        create_timestamp = timestamp_pb2.Timestamp()
        create_timestamp.FromDatetime(created_datetime)
        entry.source_system_timestamps.create_time = create_timestamp

        updated_datetime = datetime.strptime(
            workbook_metadata.get('updatedAt'),
            self.__INCOMING_TIMESTAMP_FORMAT)
        update_timestamp = timestamp_pb2.Timestamp()
        update_timestamp.FromDatetime(updated_datetime)
        entry.source_system_timestamps.update_time = update_timestamp

        return generated_id, entry
Esempio n. 29
0
    def make_entry_for_sheet(self, sheet_metadata):
        entry = datacatalog.Entry()

        sheet_id = sheet_metadata.get('qInfo').get('qId')
        generated_id = self.__format_id(constants.ENTRY_ID_PART_SHEET,
                                        sheet_id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = constants.USER_SPECIFIED_TYPE_SHEET

        q_meta = sheet_metadata.get('qMeta')
        entry.display_name = self._format_display_name(q_meta.get('title'))
        entry.description = q_meta.get('description')

        entry.linked_resource = f'{self.__site_url}' \
                                f'/sense/app/' \
                                f'{sheet_metadata.get("app").get("id")}' \
                                f'/sheet/{sheet_id}'

        created_datetime = datetime.strptime(
            q_meta.get('createdDate'), self.__INCOMING_TIMESTAMP_UTC_FORMAT)
        create_timestamp = timestamp_pb2.Timestamp()
        create_timestamp.FromDatetime(created_datetime)
        entry.source_system_timestamps.create_time = create_timestamp

        modified_date = q_meta.get('modifiedDate')
        resolved_modified_date = modified_date or q_meta.get('createdDate')
        modified_datetime = datetime.strptime(
            resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT)
        update_timestamp = timestamp_pb2.Timestamp()
        update_timestamp.FromDatetime(modified_datetime)
        entry.source_system_timestamps.update_time = update_timestamp

        return generated_id, entry
Esempio n. 30
0
    def make_entry_for_dashboard_element(self, element):
        title = element.title if element.title else element.title_text

        if not title or title == '':
            logging.warning(
                'Dashboard Element "%s" has no title nor title_text'
                ' and will be skipped!', element.id)
            return None, None

        entry = datacatalog.Entry()

        generated_id = self.__format_id(
            constants.ENTRY_ID_PART_DASHBOARD_ELEMENT, element.id)
        entry.name = datacatalog.DataCatalogClient.entry_path(
            self.__project_id, self.__location_id, self.__entry_group_id,
            generated_id)

        entry.user_specified_system = self.__user_specified_system
        entry.user_specified_type = \
            constants.USER_SPECIFIED_TYPE_DASHBOARD_ELEMENT

        entry.display_name = self._format_display_name(title)

        return generated_id, entry