def make_entries_for_database(self, database_metadata): entry_id = '{}'.format(database_metadata.name) # Force lowercase since hive is case insensitive entry_id = entry_id.lower() entry = datacatalog.Entry() entry.user_specified_type = 'database' entry.user_specified_system = 'hive' entry.display_name = database_metadata.name entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, entry_id) database_desc = database_metadata.desc if isinstance(database_desc, str): entry.description = database_desc entry.linked_resource = \ '//{}//{}'.format( self.__metadata_host_server, database_metadata.uri ) return entry_id, entry
def test_run_should_process_scrape_prepare_ingest_workflow( self, mock_scraper, mock_assembled_entry_factory, mock_mapper, mock_cleaner, mock_ingestor): assembled_entry_factory = mock_assembled_entry_factory.return_value assembled_entry_factory.make_entries_for_dashboards.return_value = [ (prepare.AssembledEntryData('test-entry-id-1', datacatalog.Entry(), [])) ] dashboards_synchronizer.DashboardsSynchronizer( tableau_server_address='test-server', tableau_api_version='test-api-version', tableau_username='******', tableau_password='******', tableau_site='test-site', datacatalog_project_id='test-project-id', datacatalog_location_id='test-location-id').run() scraper = mock_scraper.return_value scraper.scrape_dashboards.assert_called_once() assembled_entry_factory.make_entries_for_dashboards\ .assert_called_once() mock_mapper.return_value.fulfill_tag_fields.assert_called_once() mock_cleaner.return_value.delete_obsolete_metadata.assert_called_once() mock_ingestor.return_value.ingest_metadata.assert_called_once()
def make_entry_for_measure(self, measure_metadata): entry = datacatalog.Entry() app_metadata = measure_metadata.get('app') # The Measure ID is usually a 7 letters string, so the App ID is # prepended to prevent overlapping. generated_id = self.__format_id( constants.ENTRY_ID_PART_MEASURE, f'{app_metadata.get("id")}' f'_{measure_metadata.get("qInfo").get("qId")}') entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_MEASURE q_meta_def = measure_metadata.get('qMetaDef') entry.display_name = self._format_display_name(q_meta_def.get('title')) entry.description = q_meta_def.get('description') # The linked_resource field is not fulfilled because there is no way to # jump directly to a Measure 'edit' page in Qlik Sense. # The create_time and update_time fields are not fulfilled because # there is no such info in the Measure metadata. return generated_id, entry
def make_entry_for_look(self, look): entry = datacatalog.Entry() generated_id = self.__format_id(constants.ENTRY_ID_PART_LOOK, look.id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_LOOK entry.display_name = self._format_display_name(look.title) entry.linked_resource = f'{self.__instance_url}/looks/{look.id}' created_timestamp = timestamp_pb2.Timestamp() created_timestamp.FromDatetime(look.created_at) entry.source_system_timestamps.create_time = created_timestamp updated_datetime = \ look.updated_at if look.updated_at else look.created_at updated_timestamp = timestamp_pb2.Timestamp() # TODO Evaluate/remove "+ 10" after b/144041881 has been closed. updated_timestamp.FromDatetime(updated_datetime + datetime.timedelta(seconds=10)) entry.source_system_timestamps.update_time = updated_timestamp return generated_id, entry
def make_entry_for_visualization(self, visualization_metadata): entry = datacatalog.Entry() viz_id = visualization_metadata.get('qInfo').get('qId') generated_id = self.__format_id(constants.ENTRY_ID_PART_VISUALIZATION, viz_id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_VISUALIZATION q_meta_def = visualization_metadata.get('qMetaDef') entry.display_name = self._format_display_name(q_meta_def.get('title')) entry.description = q_meta_def.get('description') # The linked_resource field is not fulfilled because Data Catalog # currently does not accept ``?`` and ``=`` in the field value. # The below statements can be uncommented once # https://issuetracker.google.com/issues/176912978 # has been fixed. # # app_id = visualization_metadata.get('app').get('id') # entry.linked_resource = f'{self.__site_url}/sense/single' \ # f'?appid={app_id}' \ # f'&obj={viz_id}' # The create_time and update_time fields are not fulfilled because # there is no such info in the Visualization metadata. return generated_id, entry
def create_entry_user_defined_type(cls, entry_type, system, display_name, name, description, linked_resource, create_time_seconds, update_time_seconds, columns=None): entry = datacatalog.Entry() entry.user_specified_type = entry_type entry.user_specified_system = system entry.display_name = display_name entry.name = name create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromSeconds(create_time_seconds) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromSeconds(update_time_seconds) entry.source_system_timestamps.create_time = create_timestamp entry.source_system_timestamps.update_time = update_timestamp entry.description = description entry.linked_resource = linked_resource if columns: entry.schema.columns.extend(columns) return entry
def make_entry_for_dashboard(self, dashboard): entry = datacatalog.Entry() generated_id = self.__format_id(constants.ENTRY_ID_PART_DASHBOARD, dashboard.id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_DASHBOARD entry.display_name = self._format_display_name(dashboard.title) entry.linked_resource = \ f'{self.__instance_url}/dashboards/{dashboard.id}' if dashboard.created_at: # LookML dashboards come with None created_timestamp = timestamp_pb2.Timestamp() created_timestamp.FromDatetime(dashboard.created_at) entry.source_system_timestamps.create_time = created_timestamp updated_timestamp = timestamp_pb2.Timestamp() # TODO Evaluate/remove "+ 10" after b/144041881 has been closed. updated_timestamp.FromDatetime(dashboard.created_at + datetime.timedelta(seconds=10)) entry.source_system_timestamps.update_time = updated_timestamp else: logging.info('Dashboard "%s" has no created_at information!', dashboard.id) return generated_id, entry
def create_user_defined_entry(cls, entry_id, entry_type, system, display_name, name, description, linked_resource, create_time_seconds, update_time_seconds, tags=None): entry = datacatalog.Entry() entry.user_specified_type = entry_type entry.user_specified_system = system entry.display_name = display_name entry.name = name create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromSeconds(create_time_seconds) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromSeconds(update_time_seconds) entry.source_system_timestamps.create_time = create_timestamp entry.source_system_timestamps.update_time = update_timestamp entry.description = description entry.linked_resource = linked_resource return ingest.AssembledEntryData(entry_id, entry, tags)
def make_entry_for_entity(self, entity): entry = datacatalog.Entry() guid = entity['guid'] data = entity['data'] type_name = attr_normalizer.DataCatalogAttributeNormalizer.format_name( data['typeName']) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = type_name generated_id, name, location, columns = self.__get_entry_attributes( entry, guid, data) # ADD type to generated_id since ids can be reused between types. generated_id = '{}_{}'.format(type_name, generated_id) self.__set_entry_names(entry, generated_id, name) self.__set_linked_resource(entry, generated_id, location, type_name) self.__set_source_timestamp_fields(entry, data, generated_id) self.__create_schema(entry, columns) return generated_id, entry
def make_entry_for_sql_object(self, sql_object_key, sql_object_type, sql_object_item): sql_object_config = self.__sql_objects_config[sql_object_key] metadata_def = sql_object_config[ constants.SQL_OBJECT_ITEM_METADATA_DEF_KEY] name = sql_object_item[constants.SQL_OBJECT_ITEM_NAME] entry_id = self._format_id(name) entry = datacatalog.Entry() entry.user_specified_type = sql_object_type entry.user_specified_system = self.__entry_group_id entry.display_name = self._format_display_name(name) sql_object_fields = metadata_def[constants.SQL_OBJECT_FIELDS] sql_object_fields = self.__filter_entry_model_fields(sql_object_fields) self.__set_entry_system_timestamps(entry, sql_object_fields, sql_object_item) self.__set_entry_description(entry, sql_object_fields, sql_object_item) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, entry_id) entry.linked_resource = '{}/{}'.format( self.__entry_resource_url_prefix, entry_id) return entry_id, entry
def make_entry_for_stream(self, stream_metadata): entry = datacatalog.Entry() generated_id = self.__format_id(constants.ENTRY_ID_PART_STREAM, stream_metadata.get('id')) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_STREAM entry.display_name = self._format_display_name( stream_metadata.get('name')) entry.linked_resource = f'{self.__site_url}' \ f'/hub/stream/{stream_metadata.get("id")}' created_datetime = datetime.strptime( stream_metadata.get('createdDate'), self.__INCOMING_TIMESTAMP_UTC_FORMAT) create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromDatetime(created_datetime) entry.source_system_timestamps.create_time = create_timestamp modified_date = stream_metadata.get('modifiedDate') resolved_modified_date = modified_date or stream_metadata.get( 'createdDate') modified_datetime = datetime.strptime( resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromDatetime(modified_datetime) entry.source_system_timestamps.update_time = update_timestamp return generated_id, entry
def make_entries_for_database(self, database_metadata): entry_id = self._format_id_with_hashing( database_metadata.name.lower(), regex_pattern=self.__ENTRY_ID_INVALID_CHARS_REGEX_PATTERN) entry = datacatalog.Entry() entry.user_specified_type = 'database' entry.user_specified_system = 'hive' entry.display_name = self._format_display_name(database_metadata.name) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, entry_id) database_desc = database_metadata.desc if isinstance(database_desc, str): entry.description = database_desc entry.linked_resource = \ self._format_linked_resource('//{}//{}'.format( self.__metadata_host_server, database_metadata.uri )) return entry_id, entry
def make_entry_for_tables(self, table, table_container_name): """Create Datacatalog entries from a table dict. :param table: :param table_container_name: :return: entry_id, entry """ entry_id = self._format_id('{}__{}'.format(table_container_name, table['name'])) entry = datacatalog.Entry() entry.user_specified_type = self.__metadata_definition['table_def'][ 'type'] entry.user_specified_system = self.__entry_group_id entry.display_name = self._format_display_name(table['name']) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, entry_id) desc = table.get('desc') if pd.isna(desc): desc = '' entry.description = desc entry.linked_resource = '//{}//{}'.format( self.__metadata_host_server, self._format_id(table['name'])) create_time, update_time = \ DataCatalogEntryFactory.__convert_source_system_timestamp_fields( table.get('create_time'), table.get('update_time')) if create_time and update_time: created_timestamp = timestamp_pb2.Timestamp() created_timestamp.FromSeconds(create_time) entry.source_system_timestamps.create_time = created_timestamp updated_timestamp = timestamp_pb2.Timestamp() updated_timestamp.FromSeconds(update_time) entry.source_system_timestamps.update_time = updated_timestamp columns = [] for column in table['columns']: desc = column.get('desc') if pd.isna(desc): desc = '' columns.append( datacatalog.ColumnSchema( column=self._format_id(column['name']), description=desc, type=DataCatalogEntryFactory.__format_entry_column_type( column['type']))) entry.schema.columns.extend(columns) return entry_id, entry
def make_fake_entry(entry_id: str, linked_resource: Optional[str] = None, user_specified_type: Optional[str] = None) -> Entry: entry = datacatalog.Entry() entry.name = f'fake_entries/{entry_id}' entry.linked_resource = linked_resource entry.user_specified_type = user_specified_type entry.schema = datacatalog.Schema() return entry
def test_lookup_entry_should_return_datacatalog_client_result(self): fake_entry = datacatalog.Entry() fake_entry.linked_resource = 'linked_resource' datacatalog_client = self.__datacatalog_client datacatalog_client.lookup_entry.return_value = fake_entry entry = self.__datacatalog_facade.lookup_entry('linked_resource') self.assertEqual(fake_entry, entry)
def make_entry_for_table(self, table_metadata, database_name): entry_id = '{}__{}'.format(database_name, table_metadata.name) # Force lowercase since hive is case insensitive entry_id = entry_id.lower() entry = datacatalog.Entry() entry.user_specified_type = 'table' entry.user_specified_system = 'hive' entry.display_name = table_metadata.name entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, entry_id) # For now we are using the first table_storage relationship, # with table partitions we might have to deal # with more than one record table_storage = table_metadata.table_storages[0] entry.linked_resource = \ '//{}//{}'.format(self.__metadata_host_server, table_storage.location) created_timestamp = timestamp_pb2.Timestamp() created_timestamp.FromSeconds(table_metadata.create_time) entry.source_system_timestamps.create_time = created_timestamp update_time_seconds = \ DataCatalogEntryFactory. \ __extract_update_time_from_table_metadata(table_metadata) if update_time_seconds is not None: updated_timestamp = timestamp_pb2.Timestamp() updated_timestamp.FromSeconds(update_time_seconds) entry.source_system_timestamps.update_time = updated_timestamp else: entry.source_system_timestamps.update_time = created_timestamp columns = [] for column in table_storage.columns: columns.append( datacatalog.ColumnSchema( column=column.name, type=DataCatalogEntryFactory.__format_entry_column_type( column.type), description=column.comment)) entry.schema.columns.extend(columns) return entry_id, entry
def test_print_dependency_finder_results_should_get_asset_metadata_values( self, mock_get_asset_metadata_value): # given fake_find_results = {'entries/test-entry': (datacatalog.Entry(), [])} # when addons.ElastiCubeDependencyPrinter.print_dependency_finder_results( fake_find_results) # then mock_get_asset_metadata_value.assert_has_calls( [mock.call([], 'dashboard_title'), mock.call([], 'datasource')])
def make_entry_for_sheet(self, sheet_metadata, workbook_metadata): entry = datacatalog.Entry() luid = sheet_metadata.get('luid') if luid: generated_id = self.__format_id(luid) else: generated_id = self.__format_id(sheet_metadata.get('id')) logging.info( 'Sheet "%s" is hidden in the Workbook and does not have an' ' luid. Using its id attribute as a fallback...', sheet_metadata.get('name')) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_SHEET entry.display_name = self._format_display_name( sheet_metadata.get('name')) # A null path means the Sheet is hidden and included in a Dashboard, # or deleted from the server but still remains in the Workbook. path = sheet_metadata.get('path') if path: site_content_url = self.__format_site_content_url( workbook_metadata) entry.linked_resource = \ f'{self.__server_address}/#{site_content_url}/views/{path}' created_at = sheet_metadata.get('createdAt') if created_at: created_datetime = datetime.strptime( created_at, self.__INCOMING_TIMESTAMP_FORMAT) create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromDatetime(created_datetime) entry.source_system_timestamps.create_time = create_timestamp updated_at = sheet_metadata.get('updatedAt') if updated_at: updated_datetime = datetime.strptime( updated_at, self.__INCOMING_TIMESTAMP_FORMAT) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromDatetime(updated_datetime) entry.source_system_timestamps.update_time = update_timestamp return generated_id, entry
def make_entry_for_table(self, table_metadata, database_name): entry_id = self.__make_entry_id_for_table(database_name, table_metadata) entry = datacatalog.Entry() entry.user_specified_type = 'table' entry.user_specified_system = 'hive' entry.display_name = self._format_display_name(table_metadata.name) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, entry_id) table_storage = table_metadata.table_storages[0] entry.linked_resource = \ self._format_linked_resource( '//{}//{}'.format(self.__metadata_host_server, table_storage.location)) created_timestamp = timestamp_pb2.Timestamp() created_timestamp.FromSeconds(table_metadata.create_time) entry.source_system_timestamps.create_time = created_timestamp update_time_seconds = \ DataCatalogEntryFactory. \ __extract_update_time_from_table_metadata(table_metadata) if update_time_seconds is not None: updated_timestamp = timestamp_pb2.Timestamp() updated_timestamp.FromSeconds(update_time_seconds) entry.source_system_timestamps.update_time = updated_timestamp else: entry.source_system_timestamps.update_time = created_timestamp columns = [] for column in table_storage.columns: columns.append( datacatalog.ColumnSchema( column=column.name, type=DataCatalogEntryFactory.__format_entry_column_type( column.type), description=column.comment)) entry.schema.columns.extend(columns) return entry_id, entry
def test_print_dependency_finder_results_should_filter_jaql_tags( self, mock_filter_jaql_tags): # given fake_tag = mocks.make_fake_tag() fake_find_results = { 'entries/test-entry': (datacatalog.Entry(), [fake_tag]) } # when addons.ElastiCubeDependencyPrinter.print_dependency_finder_results( fake_find_results) # then mock_filter_jaql_tags.assert_called_once_with([fake_tag])
def make_entry_for_custom_property_definition( self, custom_property_def_metadata): entry = datacatalog.Entry() generated_id = self.__format_id( constants.ENTRY_ID_PART_CUSTOM_PROPERTY_DEFINITION, custom_property_def_metadata.get('id')) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = \ constants.USER_SPECIFIED_TYPE_CUSTOM_PROPERTY_DEFINITION entry.display_name = self._format_display_name( custom_property_def_metadata.get('name')) entry.description = custom_property_def_metadata.get('description') # The linked_resource field is not fulfilled because there is no way to # jump directly to an 'edit' page in the QlikView Management Console # (QMC). The the ID wee see in the URL of the Custom Property # Definition edit page is generated at the client side as a wrapper # around the object. The reason for this is: if someone select a bunch # of things in the QMC, it can't pick one, or have a list, so it # generates a new 'synthetic' key for the edit page. # -- from the Qlik Analytics Platform Architecture Team created_datetime = datetime.strptime( custom_property_def_metadata.get('createdDate'), self.__INCOMING_TIMESTAMP_UTC_FORMAT) create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromDatetime(created_datetime) entry.source_system_timestamps.create_time = create_timestamp modified_date = custom_property_def_metadata.get('modifiedDate') resolved_modified_date = \ modified_date or custom_property_def_metadata.get('createdDate') modified_datetime = datetime.strptime( resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromDatetime(modified_datetime) entry.source_system_timestamps.update_time = update_timestamp return generated_id, entry
def make_entry_for_folder(self, folder): entry = datacatalog.Entry() generated_id = self.__format_id(constants.ENTRY_ID_PART_FOLDER, folder.id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_FOLDER entry.display_name = self._format_display_name(folder.name) entry.linked_resource = f'{self.__instance_url}/folders/{folder.id}' return generated_id, entry
def make_entry_for_widget( self, widget_metadata: Dict[str, Any]) -> Tuple[str, Entry]: entry = datacatalog.Entry() widget_id = widget_metadata.get('oid') generated_id = self.__format_id(constants.ENTRY_ID_PART_WIDGET, widget_id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_WIDGET entry.display_name = self._format_display_name( widget_metadata.get('title') or self.__UNNAMED) entry.description = widget_metadata.get('desc') entry.linked_resource = f'{self.__server_address}' \ f'/app/main#/dashboards' \ f'/{widget_metadata.get("dashboardid")}' \ f'/widgets/{widget_id}' if widget_metadata.get('created'): created_datetime = datetime.strptime( widget_metadata.get('created'), self.__INCOMING_TIMESTAMP_UTC_FORMAT) create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromDatetime(created_datetime) entry.source_system_timestamps.create_time = create_timestamp modified_date = widget_metadata.get('lastUpdated') resolved_modified_date = modified_date or widget_metadata.get( 'created') modified_datetime = datetime.strptime( resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromDatetime(modified_datetime) entry.source_system_timestamps.update_time = update_timestamp entry.schema = self.__make_schema_for_widget(widget_metadata) return generated_id, entry
def make_entry_for_query(self, query): entry = datacatalog.Entry() generated_id = self.__format_id(constants.ENTRY_ID_PART_QUERY, query.id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_QUERY entry.display_name = self._format_display_name( f'Query {query.id} - model {query.model} - explore {query.view}') entry.linked_resource = query.share_url return generated_id, entry
def test_print_dependency_finder_results_should_tabulate_tags_data( self, mock_filter_jaql_tags, mock_tabulate): # given fake_tag = mocks.make_fake_tag( string_fields=[('table', 'test-table'), ('column', 'test-column')]) mock_filter_jaql_tags.return_value = [fake_tag] fake_find_results = { 'entries/test-entry': (datacatalog.Entry(), [fake_tag]) } # when addons.ElastiCubeDependencyPrinter.print_dependency_finder_results( fake_find_results) # then mock_filter_jaql_tags.assert_called_once_with([fake_tag]) mock_tabulate.assert_called_once()
def make_entries_for_table_container(self, table_container): """Create Datacatalog entries from a table container dict. :param table_container: :return: entry_id, entry """ entry_id = self._format_id(table_container['name']) entry = datacatalog.Entry() entry.user_specified_type = self.__metadata_definition[ 'table_container_def']['type'] entry.user_specified_system = self.__entry_group_id entry.display_name = self._format_display_name(table_container['name']) create_time, update_time = \ DataCatalogEntryFactory.__convert_source_system_timestamp_fields( table_container.get('create_time'), table_container.get('update_time')) if create_time and update_time: created_timestamp = timestamp_pb2.Timestamp() created_timestamp.FromSeconds(create_time) entry.source_system_timestamps.create_time = created_timestamp updated_timestamp = timestamp_pb2.Timestamp() updated_timestamp.FromSeconds(update_time) entry.source_system_timestamps.update_time = updated_timestamp desc = table_container.get('desc') if pd.isna(desc): desc = '' entry.description = desc entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, entry_id) entry.linked_resource = '//{}//{}'.format(self.__metadata_host_server, entry_id) return entry_id, entry
def make_entry_for_folder( self, folder_metadata: Dict[str, Any]) -> Tuple[str, Entry]: entry = datacatalog.Entry() # The root folder's ``oid`` field is not fulfilled. folder_id = folder_metadata.get('oid') or folder_metadata.get('name') generated_id = self.__format_id(constants.ENTRY_ID_PART_FOLDER, folder_id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_FOLDER entry.display_name = self._format_display_name( folder_metadata.get('name')) if folder_metadata.get('oid'): entry.linked_resource = f'{self.__server_address}' \ f'/app/main#/home' \ f'/{folder_metadata.get("oid")}' if folder_metadata.get('created'): created_datetime = datetime.strptime( folder_metadata.get('created'), self.__INCOMING_TIMESTAMP_UTC_FORMAT) create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromDatetime(created_datetime) entry.source_system_timestamps.create_time = create_timestamp modified_date = folder_metadata.get('lastUpdated') resolved_modified_date = modified_date or folder_metadata.get( 'created') modified_datetime = datetime.strptime( resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromDatetime(modified_datetime) entry.source_system_timestamps.update_time = update_timestamp return generated_id, entry
def make_entry_for_workbook(self, workbook_metadata): entry = datacatalog.Entry() generated_id = self.__format_id(workbook_metadata.get('luid')) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_WORKBOOK entry.display_name = self._format_display_name( workbook_metadata.get('name')) entry.description = workbook_metadata.get('description') vizportal_url_id = workbook_metadata.get('vizportalUrlId') if vizportal_url_id: site_content_url = self.__format_site_content_url( workbook_metadata) entry.linked_resource = f'{self.__server_address}/' \ f'#{site_content_url}/workbooks/{vizportal_url_id}' created_datetime = datetime.strptime( workbook_metadata.get('createdAt'), self.__INCOMING_TIMESTAMP_FORMAT) create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromDatetime(created_datetime) entry.source_system_timestamps.create_time = create_timestamp updated_datetime = datetime.strptime( workbook_metadata.get('updatedAt'), self.__INCOMING_TIMESTAMP_FORMAT) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromDatetime(updated_datetime) entry.source_system_timestamps.update_time = update_timestamp return generated_id, entry
def make_entry_for_sheet(self, sheet_metadata): entry = datacatalog.Entry() sheet_id = sheet_metadata.get('qInfo').get('qId') generated_id = self.__format_id(constants.ENTRY_ID_PART_SHEET, sheet_id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = constants.USER_SPECIFIED_TYPE_SHEET q_meta = sheet_metadata.get('qMeta') entry.display_name = self._format_display_name(q_meta.get('title')) entry.description = q_meta.get('description') entry.linked_resource = f'{self.__site_url}' \ f'/sense/app/' \ f'{sheet_metadata.get("app").get("id")}' \ f'/sheet/{sheet_id}' created_datetime = datetime.strptime( q_meta.get('createdDate'), self.__INCOMING_TIMESTAMP_UTC_FORMAT) create_timestamp = timestamp_pb2.Timestamp() create_timestamp.FromDatetime(created_datetime) entry.source_system_timestamps.create_time = create_timestamp modified_date = q_meta.get('modifiedDate') resolved_modified_date = modified_date or q_meta.get('createdDate') modified_datetime = datetime.strptime( resolved_modified_date, self.__INCOMING_TIMESTAMP_UTC_FORMAT) update_timestamp = timestamp_pb2.Timestamp() update_timestamp.FromDatetime(modified_datetime) entry.source_system_timestamps.update_time = update_timestamp return generated_id, entry
def make_entry_for_dashboard_element(self, element): title = element.title if element.title else element.title_text if not title or title == '': logging.warning( 'Dashboard Element "%s" has no title nor title_text' ' and will be skipped!', element.id) return None, None entry = datacatalog.Entry() generated_id = self.__format_id( constants.ENTRY_ID_PART_DASHBOARD_ELEMENT, element.id) entry.name = datacatalog.DataCatalogClient.entry_path( self.__project_id, self.__location_id, self.__entry_group_id, generated_id) entry.user_specified_system = self.__user_specified_system entry.user_specified_type = \ constants.USER_SPECIFIED_TYPE_DASHBOARD_ELEMENT entry.display_name = self._format_display_name(title) return generated_id, entry