def __delete_entries_and_groups(project_ids): logging.info('\nStarting to clean up the catalog...') query = 'system=apache_atlas' scope = datacatalog.SearchCatalogRequest.Scope() scope.include_project_ids.extend(project_ids) request = datacatalog.SearchCatalogRequest() request.scope = scope request.query = query request.page_size = 1000 search_results = __datacatalog.search_catalog(request) datacatalog_entry_name_pattern = '(?P<entry_group_name>.+?)/entries/(.+?)' entry_group_names = [] for result in search_results: try: __datacatalog.delete_entry(name=result.relative_resource_name) logging.info('Entry deleted: %s', result.relative_resource_name) entry_group_name = re.match( pattern=datacatalog_entry_name_pattern, string=result.relative_resource_name).group('entry_group_name') entry_group_names.append(entry_group_name) except exceptions.GoogleAPICallError as e: logging.warning('Exception deleting entry: %s', str(e)) # Delete any pre-existing Entry Groups. for entry_group_name in set(entry_group_names): try: __datacatalog.delete_entry_group(name=entry_group_name) logging.info('--> Entry Group deleted: %s', entry_group_name) except exceptions.GoogleAPICallError as e: logging.warning('Exception deleting entry group: %s', str(e))
def test_entries_should_exist_after_connector_execution(self): query = 'system=redshift' scope = datacatalog.SearchCatalogRequest.Scope() scope.include_project_ids.append( os.environ['REDSHIFT2DC_DATACATALOG_PROJECT_ID']) request = datacatalog.SearchCatalogRequest() request.scope = scope request.query = query request.page_size = 1000 search_results = [ result for result in datacatalog_client.search_catalog(request) ] self.assertGreater(len(search_results), 0)
def test_looker_entries_should_not_exist_after_cleanup(self): query = 'system=looker' scope = datacatalog.SearchCatalogRequest.Scope() scope.include_project_ids.append( os.environ['LOOKER2DC_DATACATALOG_PROJECT_ID']) request = datacatalog.SearchCatalogRequest() request.scope = scope request.query = query request.page_size = 1000 search_results = [ result for result in datacatalog_client.search_catalog(request) ] self.assertEqual(len(search_results), 0)
def __delete_tag_templates(project_id, location_id): query = 'type=TAG_TEMPLATE name:\"Qlik Custom Property\"' scope = datacatalog.SearchCatalogRequest.Scope() scope.include_project_ids.append(project_id) request = datacatalog.SearchCatalogRequest() request.scope = scope request.query = query request.page_size = 1000 search_results = [ result for result in __datacatalog.search_catalog(request) ] # Add the dynamic Tag Template names template_names = [ result.relative_resource_name for result in search_results ] # Add the static Tag Template names template_names.append( datacatalog.DataCatalogClient.tag_template_path( project_id, location_id, 'qlik_app_metadata')) template_names.append( datacatalog.DataCatalogClient.tag_template_path( project_id, location_id, 'qlik_custom_property_definition_metadata')) template_names.append( datacatalog.DataCatalogClient.tag_template_path( project_id, location_id, 'qlik_dimension_metadata')) template_names.append( datacatalog.DataCatalogClient.tag_template_path( project_id, location_id, 'qlik_measure_metadata')) template_names.append( datacatalog.DataCatalogClient.tag_template_path( project_id, location_id, 'qlik_visualization_metadata')) template_names.append( datacatalog.DataCatalogClient.tag_template_path( project_id, location_id, 'qlik_sheet_metadata')) template_names.append( datacatalog.DataCatalogClient.tag_template_path( project_id, location_id, 'qlik_stream_metadata')) for name in template_names: __delete_tag_template(name)
def search_catalog(self, query): """Searches Data Catalog for a given query. :param query: The query string. :return: A Search Result list. """ scope = datacatalog.SearchCatalogRequest.Scope() scope.include_project_ids.append(self.__project_id) request = datacatalog.SearchCatalogRequest() request.scope = scope request.query = query request.page_size = 1000 return [ result for result in self.__datacatalog.search_catalog(request) ]
def __delete_tag_templates(project_id): query = 'type=TAG_TEMPLATE name:apache_atlas' scope = datacatalog.SearchCatalogRequest.Scope() scope.include_project_ids.extend([project_id]) request = datacatalog.SearchCatalogRequest() request.scope = scope request.query = query request.page_size = 1000 search_results = __datacatalog.search_catalog(request) for result in search_results: try: __datacatalog.delete_tag_template( name=result.relative_resource_name, force=True) logging.info('--> Tag Template deleted: %s', result.relative_resource_name) except exceptions.GoogleAPICallError as e: logging.warning('Exception deleting Tag Template: %s', str(e))
def __delete_entries_and_groups(project_ids): entry_name_pattern = '(?P<entry_group_name>.+?)/entries/(.+?)' query = 'system=qlik' scope = datacatalog.SearchCatalogRequest.Scope() scope.include_project_ids.extend(project_ids) request = datacatalog.SearchCatalogRequest() request.scope = scope request.query = query request.page_size = 1000 # TODO Replace "search entries" by "list entries by group" # when/if it becomes available. search_results = [ result for result in __datacatalog.search_catalog(request) ] entry_group_names = [] for result in search_results: try: __datacatalog.delete_entry(name=result.relative_resource_name) print('Entry deleted: {}'.format(result.relative_resource_name)) entry_group_name = re.match( pattern=entry_name_pattern, string=result.relative_resource_name).group('entry_group_name') entry_group_names.append(entry_group_name) except Exception as e: print('Exception deleting Entry') print(e) # Delete any pre-existing Entry Groups. for entry_group_name in set(entry_group_names): try: __datacatalog.delete_entry_group(name=entry_group_name) print('--> Entry Group deleted: {}'.format(entry_group_name)) except Exception as e: print('Exception deleting Entry Group') print(e)